/* $Id: ftsearcherabs.h 5146 2010-03-24 23:05:57Z abehm $ Copyright (C) 2010 by The Regents of the University of California Redistribution of this file is permitted under the terms of the BSD license. Date: 04/04/2008 Author: Alexander Behm */ #ifndef _ftsearcherabs_h_ #define _ftsearcherabs_h_ #include "ftindexerabs.h" #include "statsutil.h" #include "filtertreenode.h" #include "common/filtertypes.h" #include "common/query.h" template struct FtSearcherTraits; template class FtSearcherAbs { public: typedef FtSearcherTraits TraitsType; typedef typename TraitsType::FtIndexer FtIndexer; typedef typename TraitsType::Merger Merger; typedef FtIndexerTraits IndexerTraitsType; typedef typename IndexerTraitsType::InvList InvList; protected: #ifdef DEBUG_STAT QueryStats* queryStats; StatsUtil sutil; #endif unsigned mergeThreshold; GramGen* gramGen; StatsUtil sutilLocal; Merger* merger; FtIndexer* ftIndexer; void getAffectedLeavesRec(FilterTreeNode* node, const Query& query, const vector& queryGramCodes, const vector* filterTypes, const unsigned filterId, vector*>& leaves); void searchLeafNodePanic(FilterTreeNode* leaf, const Query& query, vector& resultStringIDs); void postProcess(const Query& query, const vector& candidates, vector& resultStringIDs); void processLeaves(const vector*>& leaves, const Query& query, const vector& queryGramCodes, vector& resultStringIDs) { static_cast(this)->processLeaves_Impl(leaves, query, queryGramCodes, resultStringIDs); } public: FtSearcherAbs(Merger* merger, FtIndexer* ftIndexer = NULL); void prepare() { static_cast(this)->prepare_Impl(); } void search(const Query& query, vector& resultStringIDs); void setFtIndexer(FtIndexer* ftIndexer) { this->ftIndexer = ftIndexer; if(ftIndexer) gramGen = ftIndexer->getGramGen(); } string getName() { return "FtSearcherAbs"; } #ifdef DEBUG_STAT QueryStats* getQueryStats() { return queryStats; } #endif virtual ~FtSearcherAbs() { #ifdef DEBUG_STAT if(queryStats) delete queryStats; #endif } }; template FtSearcherAbs:: FtSearcherAbs(Merger* merger, FtIndexer* ftIndexer) : merger(merger), ftIndexer(ftIndexer) { if(ftIndexer) gramGen = ftIndexer->getGramGen(); else gramGen = NULL; } template void FtSearcherAbs:: getAffectedLeavesRec(FilterTreeNode* node, const Query& query, const vector& queryGramCodes, const vector* filterTypes, const unsigned filterId, vector*>& leaves) { if(filterTypes->size() == 0) { leaves.push_back( node->children.at(0).node ); return; } if(node->isLeaf) leaves.push_back(node); else { AbstractFilter* filter = filterTypes->at(filterId); // get query bounds for this filter unsigned lbound, ubound; query.sim.getFilterBounds(query.str, query.threshold, filter, lbound, ubound); unsigned nodeLbound = filter->getFilterLbound(); // binary search to first affected child vector >& children = node->children; typename vector >::iterator iter; KeyNodePair lboundKnp(lbound, NULL); iter = lower_bound(children.begin(), children.end(), lboundKnp); // continue recursing into children as long as their keys overlap with lbound, ubound while(iter != children.end() && nodeLbound < ubound) { getAffectedLeavesRec(iter->node, query, queryGramCodes, filterTypes, filterId+1, leaves); nodeLbound = iter->key; iter++; } } } template void FtSearcherAbs:: searchLeafNodePanic(FilterTreeNode* leaf, const Query& query, vector& resultStringIDs) { #ifdef DEBUG_STAT this->sutil.startTimeMeasurement(); #endif if(ftIndexer->filterTypes.size() > 0) { // search the stringids in this leaf if there are any if(leaf->distinctStringIDs) { string dictString; Array* stringIDs = leaf->distinctStringIDs->getArray( this->ftIndexer->getInvListsFile() ); for(unsigned i = 0; i < stringIDs->size(); i++) { unsigned stringId = stringIDs->at(i); ftIndexer->strContainer->retrieveString(dictString, stringId); if ( query.sim(dictString, query.str, query.threshold)) resultStringIDs.push_back(stringId); } leaf->distinctStringIDs->clear(); } } else { // search all stringids string dictString; for(unsigned i = 0; i < ftIndexer->strContainer->size(); i++) { ftIndexer->strContainer->retrieveString(dictString, i); if ( query.sim(dictString, query.str, query.threshold)) resultStringIDs.push_back(i); } } #ifdef DEBUG_STAT this->sutil.stopTimeMeasurement(); this->queryStats->panicTime += this->sutil.getTimeMeasurement(); this->queryStats->panics++; #endif } template void FtSearcherAbs:: search(const Query& query, vector& resultStringIDs) { // create gram codes from query vector queryGramCodes; gramGen->decompose(query.str, queryGramCodes); vector* filterTypes = &ftIndexer->filterTypes; #ifdef DEBUG_STAT this->queryStats->reset(); this->sutil.startTimeMeasurement(); #endif mergeThreshold = query.sim.getMergeThreshold(query.str, queryGramCodes, query.threshold, ftIndexer->getCompressionArgs()); #ifdef DEBUG_STAT this->sutil.stopTimeMeasurement(); this->queryStats->threshTime += this->sutil.getTimeMeasurement(); #endif // get affected leaves and process them vector*> leaves; getAffectedLeavesRec(ftIndexer->filterTreeRoot, query, queryGramCodes, filterTypes, 0, leaves); processLeaves(leaves, query, queryGramCodes, resultStringIDs); #ifdef DEBUG_STAT this->queryStats->totalTime = this->queryStats->threshTime + this->queryStats->preprocTime + this->queryStats->mergeTime + this->queryStats->postprocTime + this->queryStats->panicTime; #endif } template void FtSearcherAbs:: postProcess(const Query& query, const vector& candidates, vector& resultStringIDs) { #ifdef DEBUG_STAT this->sutil.startTimeMeasurement(); #endif for(unsigned i = 0; i < candidates.size(); i++) { unsigned stringId = candidates.at(i); string dictString; this->ftIndexer->strContainer->retrieveString(dictString, stringId); if (query.sim(dictString, query.str, query.threshold)) resultStringIDs.push_back(stringId); } #ifdef DEBUG_STAT this->sutil.stopTimeMeasurement(); this->queryStats->postprocTime += this->sutil.getTimeMeasurement(); #endif } #endif