/* $Id$ Copyright (C) 2007 by The Regents of the University of California Redistribution of this file is permitted under the terms of the BSD license Date: 09/17/2007 Author: Alexander Behm */ #ifndef _ftindexerabs_h_ #define _ftindexerabs_h_ #include "filtertreenode.h" #include "filtertypes.h" #include "statsutil.h" #include "gramlist.h" #include "gramlistsimple.h" #include "stringcontainer.h" #include "common/gramgen.h" #include "util/looptimer.h" enum FtIndexerType { UNCOMPRESSED }; template struct FtIndexerTraits; template class FtIndexerAbs { public: typedef FtIndexerTraits TraitsType; typedef typename TraitsType::StringContainer StringContainer; typedef typename TraitsType::InvList InvList; protected: typedef typename tr1::unordered_map* > GramMap; FtIndexerType indexerType; GramGen* gramGen; unsigned maxStrLen; unsigned ftFanout; unsigned nextStringID; void buildHollowTreeRecursive(FilterTreeNode* node, const unsigned filterId); void deleteFilterTreeRec(FilterTreeNode* node); // for saving/loading indexes void saveBasicInfo(ofstream& fpOut); void loadBasicInfo(ifstream& fpIn); public: StringContainer* strContainer; vector filterTypes; FilterTreeNode* filterTreeRoot; FtIndexerAbs(StringContainer* strContainer); FtIndexerAbs(StringContainer* strContainer, GramGen* gramGenerator, const unsigned maxStrLen = 150, const unsigned ftFanout = 50); void buildIndex(const string& dataFile, const unsigned linesToRead = 0, bool showProgress = true, StatsUtil* sutil = NULL) { return static_cast(this)->buildIndex_Impl(dataFile, linesToRead, showProgress, sutil); } void buildIndex(bool showProgress = true, StatsUtil* sutil = NULL) { return static_cast(this)->buildIndex_Impl(showProgress, sutil); } void addFilter(const AbstractFilter* filter); void clearFilters(); GramGen* getGramGen() const { return gramGen; } void deleteFilterTree() { deleteFilterTreeRec(filterTreeRoot); filterTreeRoot = NULL; } ~FtIndexerAbs() { deleteFilterTree(); clearFilters(); } }; template void FtIndexerAbs:: buildHollowTreeRecursive(FilterTreeNode* node, const unsigned filterId) { // check for case when no filters were used if(filterTypes.size() == 0) { // just create one child FilterTreeNode* newNode = new FilterTreeNode(true); pair*> p(0, newNode); node->children.push_back(p); return; } unsigned filterLbound, filterUbound; AbstractFilter* filter = filterTypes.at(filterId); filterLbound = filter->getFilterLbound(); filterUbound = filter->getFilterUbound(); double step = (filterUbound - filterLbound) / (double)ftFanout; bool isLeaf = filterId >= filterTypes.size()-1; for(unsigned i = 0; i < ftFanout; i++) { FilterTreeNode* newNode = new FilterTreeNode(isLeaf); unsigned key = (unsigned)round((i+1)*step); pair*> p(key, newNode); node->children.push_back(p); if(!isLeaf) buildHollowTreeRecursive(newNode, filterId+1); } } // used for reading/writing index from/to file, // using logical separation into functions so compressed indexers can use these, too template void FtIndexerAbs:: saveBasicInfo(ofstream& fpOut) { unsigned u; // write primitive information fpOut.write((const char*)&this->maxStrLen, sizeof(unsigned)); fpOut.write((const char*)&this->ftFanout, sizeof(unsigned)); fpOut.write((const char*)&this->nextStringID, sizeof(unsigned)); // write gramgenerator this->gramGen->saveGramGenInstance(fpOut); // write filterTypes u = this->filterTypes.size(); fpOut.write((const char*)&u, sizeof(unsigned)); for(unsigned i = 0; i < this->filterTypes.size(); i++) { AbstractFilter* filter = this->filterTypes.at(i); filter->saveFilterInstance(fpOut); } } template void FtIndexerAbs:: loadBasicInfo(ifstream& fpIn) { unsigned nFilters; fpIn.read((char*)&this->maxStrLen, sizeof(unsigned)); fpIn.read((char*)&this->ftFanout, sizeof(unsigned)); fpIn.read((char*)&this->nextStringID, sizeof(unsigned)); // load gramgenerator this->gramGen = GramGen::loadGramGenInstance(fpIn); // load filters fpIn.read((char*)&nFilters, sizeof(unsigned)); for(unsigned i = 0; i < nFilters; i++) { AbstractFilter* newFilter = AbstractFilter::loadFilterInstance(fpIn); this->filterTypes.push_back(newFilter); } } template void FtIndexerAbs:: deleteFilterTreeRec(FilterTreeNode* node) { if(!node) return; if(!node->isLeaf) { vector*> >& children = node->children; for(unsigned i = 0; i < children.size(); i++) { deleteFilterTreeRec(children.at(i).second); } } delete node; } template FtIndexerAbs:: FtIndexerAbs(StringContainer* strContainer) { this->strContainer = strContainer; this->gramGen = NULL; this->maxStrLen = 0; this->ftFanout = 0; this->filterTreeRoot = new FilterTreeNode(false); this->nextStringID = 0; } template FtIndexerAbs:: FtIndexerAbs(StringContainer* strContainer, GramGen* gramGen, const unsigned maxStrLen, const unsigned ftFanout) { this->strContainer = strContainer; this->gramGen = gramGen; this->maxStrLen = maxStrLen; this->ftFanout = ftFanout; this->filterTreeRoot = new FilterTreeNode(false); this->nextStringID = 0; } template void FtIndexerAbs:: addFilter(const AbstractFilter* filter) { for(unsigned i = 0; i < filterTypes.size(); i++) { AbstractFilter* tmp = filterTypes.at(i); if(filter->getType() == tmp->getType()) return; } filterTypes.push_back((AbstractFilter*)filter); } template void FtIndexerAbs:: clearFilters() { for(unsigned i = 0; i < filterTypes.size(); i++) { AbstractFilter* tmp = filterTypes.at(i); if(tmp) delete tmp; } } #endif