/*
  $Id$

  Copyright (C) 2008 by The Regents of the University of California
	
  Redistribution of this file is permitted under
  the terms of the BSD license
    
  Date: 02/04/2008
  Author: Alexander Behm <abehm (at) ics.uci.edu>
*/

#include "ftsearchermem.h"
#include "common/query.h"
#include "common/simmetric.h"
#include "listmerger/divideskipmerger.h"
#include "listmerger/scancountmerger.h"

vector<string> dictionary;

// create a dummy dictionary
void initDictionary();

void basicUsage1();
void basicUsage2();
void basicUsage3();

int main() {
  initDictionary();

  basicUsage1();
  basicUsage2();
  basicUsage3();

  return 0;
}

void initDictionary() {
  vector<string> prefixes;
  prefixes.push_back("string");
  prefixes.push_back("example");  
  prefixes.push_back("test");
  prefixes.push_back("hello");
  prefixes.push_back("world");
  prefixes.push_back("foo");
  prefixes.push_back("bar");

  vector<string> suffixes;
  suffixes.push_back("1");
  suffixes.push_back("10");
  suffixes.push_back("100");
  suffixes.push_back("2");
  suffixes.push_back("20");
  suffixes.push_back("200");
  suffixes.push_back("3");
  suffixes.push_back("30");
  suffixes.push_back("300");

  cout << "---------------------------------------" << endl;
  cout << "STRING DICTIONARY:" << endl;
  for(unsigned j = 0; j < prefixes.size(); j++)
    for(unsigned i = 0; i < suffixes.size(); i++) {
      dictionary.push_back(prefixes.at(j) + suffixes.at(i));
      cout << dictionary.at(dictionary.size()-1) << endl;
    }
  cout << "---------------------------------------" << endl << endl;
}

void basicUsage1() {  
  cout << "----- BASIC USAGE 1 ----" << endl;

  // create gramgenerator and similarity metric
  GramGenFixedLen gramGen(3); // using fixed-length grams
  SimMetricEd simMetric(gramGen); // using the edit distance
  //SimMetricJacc simMetric(gramGen); // using jaccard similarity
  //SimMetricCos simMetric(gramGen); // using cosine similarity
  //SimMetricDice simMetric(gramGen); // using dice similarity
  
  // create simple indexer with default template arguments
  // default: in-memory index using Array<unsigned> as an inverted list container
  // first create a string container and fill it with strings to index
  StringContainerVector strContainer;
  strContainer.fillContainer(dictionary); // fill the container from a vector<string>
  FtIndexerSimple<> indexer(&strContainer, &gramGen);
  indexer.addFilter(new LengthFilter(50)); // add length filtering with a maximum string length of 50
  indexer.buildIndex();
  
  // create merger
  DivideSkipMerger<> merger;
  // create searcher passing merger and indexer with default template arguments
  // default: same as indexer, i.e. assumed simple indexer with Array<unsigned> as inverted lists and DivideSkipMerger as merger type
  FtSearcherMem<> searcher(&merger, &indexer);
  
  vector<unsigned> resultStringIDs;
  Query query("xample", simMetric, 2.0f); // query string, similarity metric, similarity threshold
  searcher.search(query, resultStringIDs);  
  cout << "SIMILAR STRINGS: " << endl;
  for(unsigned i = 0; i < resultStringIDs.size(); i++) {
    string tmp;
    strContainer.retrieveString(tmp, resultStringIDs.at(i));
    cout << tmp << endl;
  }
  
  cout << "SAVING INDEX" << endl;
  indexer.saveIndex("ExampleIndex.ix");

  cout << "LOADING INDEX" << endl;
  FtIndexerSimple<> indexerLoaded(&strContainer);
  indexerLoaded.loadIndex("ExampleIndex.ix");
  
  resultStringIDs.clear();
  searcher.setFtIndexer(&indexerLoaded);
  searcher.search(query, resultStringIDs);  
  cout << "SIMILAR STRINGS: " << endl;
  for(unsigned i = 0; i < resultStringIDs.size(); i++) {
    string tmp;
    strContainer.retrieveString(tmp, resultStringIDs.at(i));
    cout << tmp << endl;
  }

  cout << "----------------------" << endl << endl;
}

void basicUsage2() {  
  cout << "----- BASIC USAGE 2 ----" << endl;

  // create gramgenerator and similarity metric
  GramGenFixedLen gramGen(3); // using fixed-length grams
  SimMetricEd simMetric(gramGen); // using the edit distance
  
  // create simple indexer with default template arguments
  // default: in-memory index using Array<unsigned> as an inverted list container
  // first create a string container and fill it with strings to index
  StringContainerVector strContainer;
  strContainer.fillContainer("data/dummy.txt", 180); // fill the container from a datafile and use the first 180 lines
  FtIndexerSimple<> indexer(&strContainer, &gramGen);
  indexer.addFilter(new LengthFilter(50)); // add length filtering with a maximum string length of 50
  indexer.buildIndex();
  
  // create merger
  DivideSkipMerger<> merger;
  // create searcher passing merger and indexer with default template arguments
  // default: same as indexer, i.e. assumed simple indexer with Array<unsigned> as inverted lists and DivideSkipMerger as merger type
  FtSearcherMem<> searcher(&merger, &indexer);
  
  vector<unsigned> resultStringIDs;
  Query query("elloorld", simMetric, 3.0f);
  searcher.search(query, resultStringIDs);  
  cout << "SIMILAR STRINGS: " << endl;
  for(unsigned i = 0; i < resultStringIDs.size(); i++) {
    string tmp;
    strContainer.retrieveString(tmp, resultStringIDs.at(i));
    cout << tmp << endl;
  }
  
  cout << "SAVING INDEX" << endl;
  indexer.saveIndex("ExampleIndex.ix");

  cout << "LOADING INDEX" << endl;
  FtIndexerSimple<> indexerLoaded(&strContainer);
  indexerLoaded.loadIndex("ExampleIndex.ix");
  
  resultStringIDs.clear();
  searcher.setFtIndexer(&indexerLoaded);
  searcher.search(query, resultStringIDs);  
  cout << "SIMILAR STRINGS: " << endl;
  for(unsigned i = 0; i < resultStringIDs.size(); i++) {
    string tmp;
    strContainer.retrieveString(tmp, resultStringIDs.at(i));
    cout << tmp << endl;
  }

  cout << "----------------------" << endl << endl;
}

void basicUsage3() {  
  cout << "----- BASIC USAGE 3 ----" << endl;

  // create gramgenerator and similarity metric
  GramGenFixedLen gramGen(3); // using fixed-length grams
  SimMetricJacc simMetric(gramGen); // using the jaccard distance (using set semantics)
  
  // create simple indexer with default template arguments
  // default: in-memory index using Array<unsigned> as an inverted list container
  // first create a string container and fill it with strings to index
  StringContainerVector strContainer;
  strContainer.fillContainer("data/dummy.txt", 180); // fill the container from a datafile and use the first 180 lines
  FtIndexerSimple<> indexer(&strContainer, &gramGen);
  indexer.addFilter(new ChecksumFilter(50)); // add checksum filtering with a maximum string length of 50
  indexer.buildIndex();
  
  // create merger
  ScanCountMerger<> merger(180);
  // create searcher, specifying a non-default merger
  FtSearcherMem<FtIndexerSimple<>, ScanCountMerger<> > searcher(&merger, &indexer);
  
  vector<unsigned> resultStringIDs;
  Query query("elloworld", simMetric, 0.4f);
  searcher.search(query, resultStringIDs);  
  cout << "SIMILAR STRINGS: " << endl;
  for(unsigned i = 0; i < resultStringIDs.size(); i++) {
    string tmp;
    strContainer.retrieveString(tmp, resultStringIDs.at(i));
    cout << tmp << endl;
  }
  
  cout << "SAVING INDEX" << endl;
  indexer.saveIndex("ExampleIndex.ix");

  cout << "LOADING INDEX" << endl;
  FtIndexerSimple<> indexerLoaded(&strContainer);
  indexerLoaded.loadIndex("ExampleIndex.ix");
  
  resultStringIDs.clear();
  searcher.setFtIndexer(&indexerLoaded);
  searcher.search(query, resultStringIDs);  
  cout << "SIMILAR STRINGS: " << endl;
  for(unsigned i = 0; i < resultStringIDs.size(); i++) {
    string tmp;
    strContainer.retrieveString(tmp, resultStringIDs.at(i));
    cout << tmp << endl;
  }

  cout << "----------------------" << endl << endl;
}
