/* $Id: ftindexersimple.h 4025 2008-10-01 00:01:14Z abehm $ Copyright (C) 2007 by The Regents of the University of California Redistribution of this file is permitted under the terms of the BSD license Date: 09/17/2007 Author: Alexander Behm */ #ifndef _ftindexersimple_h_ #define _ftindexersimple_h_ #include "ftindexermemabs.h" template class FtIndexerSimple; template struct FtIndexerTraits > { typedef TStringContainer StringContainer; typedef TInvList InvList; }; template > class FtIndexerSimple : public FtIndexerMemAbs > { private: typedef FtIndexerMemAbs > SuperClass; public: typedef typename SuperClass::GramMap GramMap; FtIndexerSimple(StringContainer* strContainer) : SuperClass(strContainer) {} FtIndexerSimple(StringContainer* strContainer, GramGen* gramGenerator, const unsigned maxStrLen = 150, const unsigned maxChildNodes = 50) : SuperClass(strContainer, gramGenerator, maxStrLen, maxChildNodes) {} void addStringId_Impl(FilterTreeNode* leaf, const vector& gramCodes, unsigned stringId); void saveIndex_Impl(const char* indexFileName); void loadIndex_Impl(const char* indexFileName); void insertString(const string& s); void clearInvertedLists(); }; template void FtIndexerSimple:: insertString(const string& s) { if(s.empty()) return; // traverse filtertree if there is one if(this->filterTypes.size() > 0 && this->ftFanout > 0) { FilterTreeNode* currentNode = this->filterTreeRoot; // position currentNode to appropriate leaf node in filter tree for(unsigned filterId = 0; filterId < this->filterTypes.size(); filterId++) { AbstractFilter* filter = this->filterTypes.at(filterId); unsigned key = filter->getKey(s); vector*> >& children = currentNode->children; // match key to node for(unsigned childId = 0; childId < children.size(); childId++) { pair*>& child = children.at(childId); if(child.first >= key) { currentNode = child.second; break; } } } this->addStringToNode(currentNode, s, this->nextStringID); } else { // no filters selected, so get the one leaf node FilterTreeNode* currentNode = this->filterTreeRoot->children.at(0).second; this->addStringToNode(currentNode, s, this->nextStringID); } this->nextStringID++; } template void FtIndexerSimple:: clearInvertedLists() { vector*> leaves; FilterTreeNode::getSubTreeLeaves(this->filterTreeRoot, leaves); for(unsigned i = 0; i < leaves.size(); i++) { GramMap& gramMap = leaves.at(i)->gramMap; typename GramMap::iterator iter; for(iter = gramMap.begin(); iter != gramMap.end(); iter++) delete iter->second; gramMap.clear(); } } template void FtIndexerSimple:: addStringId_Impl(FilterTreeNode* leaf, const vector& gramCodes, unsigned stringId) { typename SuperClass::GramMap& gramMap = leaf->gramMap; for(unsigned i = 0; i < gramCodes.size(); i++) { unsigned gramCode = gramCodes.at(i); if (gramMap.find(gramCode) == gramMap.end()) { // a new gram GramListSimple* newGramList = new GramListSimple(); gramMap[gramCode] = newGramList; InvList* array = newGramList->getArray(); array->append(stringId); } else { // an existing gram GramList* gramList = gramMap[gramCode]; InvList* array = gramList->getArray(); //avoid adding duplicate elements if(array->at(array->size()-1) != stringId) array->append(stringId); } } } template void FtIndexerSimple:: saveIndex_Impl(const char* indexFileName) { ofstream fpOut; fpOut.open(indexFileName, ios::out); if(fpOut.is_open()) { this->saveBasicInfo(fpOut); this->saveLeavesRec(this->filterTreeRoot, fpOut); fpOut.close(); } else cout << "ERROR: could not open file " << indexFileName << endl; } template void FtIndexerSimple:: loadIndex_Impl(const char* indexFileName) { ifstream fpIn; fpIn.open(indexFileName, ios::in); if(fpIn.is_open()) { this->filterTypes.clear(); this->loadBasicInfo(fpIn); this->buildHollowTreeRecursive(this->filterTreeRoot, 0); this->loadLeavesRec(this->filterTreeRoot, fpIn); fpIn.close(); } else cout << "ERROR: could not open file " << indexFileName << endl; } #endif