/* $Id: perftest.cc 4057 2008-10-10 22:05:50Z abehm $ Copyright (C) 2007 by The Regents of the University of California Redistribution of this file is permitted under the terms of the BSD license Date: 09/19/2007 Author: Alexander Behm */ #include "ftindexersimple.h" #include "ftsearchermem.h" #include "statsgen.h" void generateStatsRun(); int main() { generateStatsRun(); return 0; } void generateStatsRun() { GramGenFixedLen gramGen(3); SimMetricEd simMetric(gramGen); // using the edit distance //SimMetricJacc simMetric(gramGen); // using jaccard similarity //SimMetricCos simMetric(gramGen); // using cosine similarity //SimMetricDice simMetric(gramGen); // using dice similarity float ed = 1.0f; // this represents the similarity threshold, in this case edit distance 1 DivideSkipMerger<> merger; typedef FtIndexerSimple<> indexer; typedef FtSearcherMem searcher; StatsGenConfig config; config.setGramGen(&gramGen); config.setSimMetric(&simMetric, ed); // similarity metric, similarity threshold config.setDictSize(10, 180, 10); // the string dictionary size from 10 to 180 in a 10 step config.setFanout(50, 50, 50); // fanout from 50 to 50 in step 50 config.setMaxStrLength(20); config.clearFilters(); config.addFilter(new LengthFilter(20)); config.setNumberQueries(100); // set total number of queries for the workload to run config.setDistinctQueries(100); // set distinct number of queries generated by randomly picking strings from the dictionary config.setQueriesDistribution(QD_UNIFORM); // can be QD_UNIFORM or QD_ZIPF //config.setZipfSkew(1); // zipf skew parameter if QD_ZIPF specified config.setNumberRuns(5); // number times to repeat running workload to stabilize numbers config.setRebuildIndexEveryRun(false); // rebuild the index for every run? config.overrideWorkload = false; // if set to true, the workload will be read from a file specified //config.workloadFile = "custom_workload.txt" // if overrideWorkload is set then read the workload from this file StatsGen statsgen(&config); // create an instance of the stats generator statsgen.setMerger(&merger); config.setDictDataFile("data/dummy.txt"); // set the data file config.setOutputFile("perftest_search_stats.txt"); // set the outputfile for runtime statistics statsgen.generate(); // run the experiment cout << "SUCCESS!" << endl; cout << "Please refer to the documentation of the FilterTree module for more information" << endl; }