/*
  $Id$

  Copyright (C) 2007 by The Regents of the University of California
	
  Redistribution of this file is permitted under
  the terms of the BSD license
    
  Date: 09/19/2007
  Author: Alexander Behm <abehm (at) ics.uci.edu>
*/

#include "ftindexersimple.h"
#include "ftsearchermem.h"
#include "statsgen.h"

void generateStatsRun();

int main() {
  generateStatsRun();

  return 0;
}

void generateStatsRun() {
  GramGenFixedLen gramGen(3);
  SimMetricEd simMetric(gramGen); // using the edit distance
  //SimMetricJacc simMetric(gramGen); // using jaccard similarity
  //SimMetricCos simMetric(gramGen); // using cosine similarity
  //SimMetricDice simMetric(gramGen); // using dice similarity
  float ed = 1.0f; // this represents the similarity threshold, in this case edit distance 1
  DivideSkipMerger<> merger;

  typedef FtIndexerSimple<> indexer;
  typedef FtSearcherMem<indexer> searcher;
  StatsGenConfig config;
  config.setGramGen(&gramGen);
  config.setSimMetric(&simMetric, ed); // similarity metric, similarity threshold
  config.setDictSize(10, 180, 10); // the string dictionary size from 10 to 180 in a 10 step
  config.setFanout(50, 50, 50); // fanout from 50 to 50 in step 50
  config.setMaxStrLength(20);
  config.clearFilters();
  config.addFilter(new LengthFilter(20));
  config.setNumberQueries(100); // set total number of queries for the workload to run
  config.setDistinctQueries(100); // set distinct number of queries generated by randomly picking strings from the dictionary
  config.setQueriesDistribution(QD_UNIFORM); // can be QD_UNIFORM or QD_ZIPF
  //config.setZipfSkew(1); // zipf skew parameter if QD_ZIPF specified
  config.setNumberRuns(5); // number times to repeat running workload to stabilize numbers
  config.setRebuildIndexEveryRun(false); // rebuild the index for every run?
  config.overrideWorkload = false; // if set to true, the workload will be read from a file specified
  //config.workloadFile = "custom_workload.txt" // if overrideWorkload is set then read the workload from this file
  
  StatsGen<indexer, searcher> statsgen(&config); // create an instance of the stats generator
  statsgen.setMerger(&merger);
  
  config.setDictDataFile("data/dummy.txt"); // set the data file
  config.setOutputFile("perftest_search_stats.txt"); // set the outputfile for runtime statistics
  statsgen.generate(); // run the experiment

  cout << "SUCCESS!" << endl;
  cout << "Please refer to the documentation of the FilterTree module for more information" << endl;
}

