/* $Id: utilities.h 4025 2008-10-01 00:01:14Z abehm $ Copyright (C) 2007 by The Regents of the University of California Redistribution of this file is permitted under the terms of the BSD license Author: Jiaheng Lu Date: 05/11/2007 */ #ifndef _utilities_h_ #define _utilities_h_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "util/array.h" using namespace std; void dataProcessing(); void tradeOffResearchInHybrid(const vector*> &arrays, const unsigned threshold, vector & results, unsigned & totalMergeTime, unsigned & totalSearchTime); unsigned max(unsigned s1, unsigned s2); void researchMergSkip(const vector*> &arrays, const unsigned threshold, vector &results, unsigned &elementsScanned); void researchMergOpt(const vector*> &arrays, const unsigned threshold, vector &results, unsigned &elementsScanned); void researchHybridMerger(const vector*> &arrays, const unsigned threshold, vector &results, unsigned &elementsScanned); void researchScanCount(const vector*> &arrays, const unsigned threshold, vector &results, unsigned &elementsScanned); void CountSkipNodes (const vector* > &lists, unsigned vectorIndexContainer[], unsigned containerSize, unsigned pivotData, unsigned pointersIndexList[], unsigned &elementScanned); void analyzeResults(vector &result1, vector &result2, const vector*> &lists); unsigned shorestStringSize(const vector &strings); void searchOptimalParameterInHybrid(const vector*> &arrays, const unsigned threshold, vector &results); void binaryOptimalSearch(const vector*> &arrays, const unsigned threshold); bool binarySearch(vector *v, unsigned value, unsigned start, unsigned end); bool testConsistent(const vector &result1, const vector &result2); void analyzeScanCount(const vector*> &arrays, const unsigned threshold, vector &results, unsigned &time); void separateTwoSets(vector*> *longLists, vector*> *shortLists, unsigned threshold, const vector*> &originalLists, unsigned sortedIndex[] ); unsigned getTotalSize(const vector*> &lists); void sortBySizeOfLists(const vector*> &allLists, unsigned sortedIndex [] ); // ADDED BY ALEX - needed for duplicate detection void sortByArrayAddress(const vector*> &allLists, unsigned sortedIndex [] ); void binarySearchSet(unsigned &count, vector* > *lists, unsigned data); void insertToHeaps(unsigned dataHeap[], unsigned indexHeap[], unsigned &heapSize, const vector< Array* > &lists, unsigned pointersIndexList[], unsigned vectorIndexContainer[], unsigned containerSize); void skipNodes (const vector* > &lists, unsigned vectorIndexContainer[], unsigned containerSize, unsigned pivotData, unsigned pointersIndexList[]); void mergeSkipShortLists(const vector*> &arrays, const unsigned threshold, vector &results, vector &counters); void mergeSkipShortListsWithDuplicate(const vector*> &arrays, const vector &weights, const unsigned threshold, vector &results, vector &counters); unsigned hamming(const string &s1, const string &s2, unsigned T); void splitTwoSets(vector*> *longLists, vector*> *shortLists, const unsigned threshold, const vector*> &originalLists, unsigned sortedIndex[], unsigned longListsSize); void splitTwoSizeSets(vector *longLists, vector *shortLists, const unsigned threshold, const vector &originalLists, unsigned sortedIndex[], unsigned longListsSize); void sortBySize(const vector &allLists, unsigned sortedIndex []); void splitTwoSetsWithDuplicates(vector*> &longLists, vector*> &shortLists, vector &longListsWeights, vector &shortListsWeights, const vector*> &originalLists, const vector &originalWeights, const unsigned shortListsSize); void getStatistics(const vector*> &arrays, unsigned threshold, unsigned longListsSize, const vector &partialResults, const vector &results); void detectDuplicateLists(const vector*> &arrays, vector*> &newArrays, vector &newWeights); #endif