/* $Id: ftindexermem.h 5146 2010-03-24 23:05:57Z abehm $ Copyright (C) 2010 by The Regents of the University of California Redistribution of this file is permitted under the terms of the BSD license. Date: 04/04/2008 Author: Alexander Behm */ #ifndef _ftindexeruncompressed_h_ #define _ftindexeruncompressed_h_ #include "ftindexermemabs.h" template class FtIndexerMem; template struct FtIndexerTraits > { typedef TStringContainer StringContainer; typedef TInvList InvList; }; template > class FtIndexerMem : public FtIndexerMemAbs > { private: typedef FtIndexerMemAbs > SuperClass; public: typedef typename SuperClass::GramMap GramMap; #ifdef DEBUG_STAT typedef IndexStats IxStatsType; #endif FtIndexerMem(StringContainer* strContainer) : SuperClass(strContainer) { #ifdef DEBUG_STAT this->ixStats = new IndexStats(); #endif } FtIndexerMem(StringContainer* strContainer, GramGen* gramGenerator, unsigned maxStrLen = 150, unsigned maxChildNodes = 50) : SuperClass(strContainer, gramGenerator, maxStrLen, maxChildNodes) { #ifdef DEBUG_STAT this->ixStats = new IndexStats(); #endif } void prepare_Impl() {}; void addStringId_Impl(FilterTreeNode* leaf, const vector& gramCodes, unsigned stringId); CompressionArgs* getCompressionArgs_Impl() { return &(this->compressionArgs); } void saveIndex_Impl(const char* indexFileName); void loadIndex_Impl(const char* indexFileName); string getName() { return "FtIndexerMem"; } void insertString(const string& s); void clearInvertedLists(); }; template void FtIndexerMem:: insertString(const string& s) { if(s.empty()) { this->nextStringID++; // this is extremely important! caller might rely on increment return; } // traverse filtertree if there is one if(this->filterTypes.size() > 0 && this->ftFanout > 0) { FilterTreeNode* currentNode = this->filterTreeRoot; // position currentNode to appropriate leaf node in filter tree for(unsigned filterId = 0; filterId < this->filterTypes.size(); filterId++) { unsigned key = this->filterTypes.at(filterId)->getKey(s); KeyNodePair knp(key, NULL); typename vector >::iterator iter = lower_bound(currentNode->children.begin(), currentNode->children.end(), knp); if(iter != currentNode->children.end()) currentNode = iter->node; } this->addStringToNode(currentNode, s, this->nextStringID); } else { // no filters selected, so get the one leaf node FilterTreeNode* currentNode = this->filterTreeRoot->children.at(0).node; this->addStringToNode(currentNode, s, this->nextStringID); } this->nextStringID++; } template void FtIndexerMem:: clearInvertedLists() { vector*> leaves; FilterTreeNode::getSubTreeLeaves(this->filterTreeRoot, leaves); for(unsigned i = 0; i < leaves.size(); i++) { GramMap& gramMap = leaves.at(i)->gramMap; typename GramMap::iterator iter; for(iter = gramMap.begin(); iter != gramMap.end(); iter++) delete iter->second; gramMap.clear(); } } template void FtIndexerMem:: addStringId_Impl(FilterTreeNode* leaf, const vector& gramCodes, unsigned stringId) { typename SuperClass::GramMap& gramMap = leaf->gramMap; for(unsigned i = 0; i < gramCodes.size(); i++) { unsigned gramCode = gramCodes.at(i); if (gramMap.find(gramCode) == gramMap.end()) { // a new gram GramListSimple* newGramList = new GramListSimple(); gramMap[gramCode] = newGramList; InvList* array = newGramList->getArray(); array->append(stringId); } else { // an existing gram GramList* gramList = gramMap[gramCode]; InvList* array = gramList->getArray(); //avoid adding duplicate elements if(array->at(array->size()-1) != stringId) array->append(stringId); } } } template void FtIndexerMem:: saveIndex_Impl(const char* indexFileName) { ofstream fpOut; fpOut.open(indexFileName, ios::out); if(fpOut.is_open()) { this->saveBasicInfo(fpOut); this->saveLeavesRec(this->filterTreeRoot, fpOut); fpOut.close(); } else cout << "ERROR: could not open file " << indexFileName << endl; } template void FtIndexerMem:: loadIndex_Impl(const char* indexFileName) { ifstream fpIn; fpIn.open(indexFileName, ios::in); if(fpIn.is_open()) { this->filterTypes.clear(); this->loadBasicInfo(fpIn); this->buildHollowTreeRecursive(this->filterTreeRoot, 0); this->loadLeavesRec(this->filterTreeRoot, fpIn); fpIn.close(); } else cout << "ERROR: could not open file " << indexFileName << endl; } // partial specialization for inverted lists containing positional information, i.e. Array // PosID is defined in gramlist.h template class FtIndexerMem > : public FtIndexerMemAbs > > { private: typedef FtIndexerMemAbs > > SuperClass; public: typedef typename SuperClass::GramMap GramMap; FtIndexerMem(StringContainer* strContainer) : SuperClass(strContainer) {} FtIndexerMem(StringContainer* strContainer, GramGen* gramGenerator, const unsigned maxStrLen = 150, const unsigned maxChildNodes = 50) : SuperClass(strContainer, gramGenerator, maxStrLen, maxChildNodes) {} void prepare_Impl() {}; void addStringId_Impl(FilterTreeNode >* leaf, const vector& gramCodes, unsigned stringId); CompressionArgs* getCompressionArgs_Impl() { return &(this->compressionArgs); } void saveIndex_Impl(const char* indexFileName); void loadIndex_Impl(const char* indexFileName); void insertString(const string& s); void clearInvertedLists(); }; template void FtIndexerMem >:: insertString(const string& s) { if(s.empty()) { this->nextStringID++; // this is extremely important! caller might rely on increment return; } // traverse filtertree if there is one if(this->filterTypes.size() > 0 && this->ftFanout > 0) { FilterTreeNode >* currentNode = this->filterTreeRoot; // position currentNode to appropriate leaf node in filter tree for(unsigned filterId = 0; filterId < this->filterTypes.size(); filterId++) { unsigned key = this->filterTypes.at(filterId)->getKey(s); KeyNodePair > knp(key, NULL); typename vector > >::iterator iter = lower_bound(currentNode->children.begin(), currentNode->children.end(), knp); if(iter != currentNode->children.end()) currentNode = iter->node; } this->addStringToNode(currentNode, s, this->nextStringID); } else { // no filters selected, so get the one leaf node FilterTreeNode >* currentNode = this->filterTreeRoot->children.at(0).node; this->addStringToNode(currentNode, s, this->nextStringID); } this->nextStringID++; } template void FtIndexerMem >:: clearInvertedLists() { vector >*> leaves; FilterTreeNode >::getSubTreeLeaves(this->filterTreeRoot, leaves); for(unsigned i = 0; i < leaves.size(); i++) { GramMap& gramMap = leaves.at(i)->gramMap; typename GramMap::iterator iter; for(iter = gramMap.begin(); iter != gramMap.end(); iter++) delete iter->second; gramMap.clear(); } } template void FtIndexerMem >:: addStringId_Impl(FilterTreeNode >* leaf, const vector& gramCodes, unsigned stringId) { typename SuperClass::GramMap& gramMap = leaf->gramMap; for(unsigned i = 0; i < gramCodes.size(); i++) { unsigned gramCode = gramCodes.at(i); if (gramMap.find(gramCode) == gramMap.end()) { // a new gram GramListSimple >* newGramList = new GramListSimple >(); gramMap[gramCode] = newGramList; Array* array = newGramList->getArray(); PosID newPosID(stringId, i); array->append(newPosID); } else { // an existing gram GramList >* gramList = gramMap[gramCode]; Array* array = gramList->getArray(); PosID newPosID(stringId, i); array->append(newPosID); } } } template void FtIndexerMem >:: saveIndex_Impl(const char* indexFileName) { /* ofstream fpOut; fpOut.open(indexFileName, ios::out); if(fpOut.is_open()) { this->saveBasicInfo(fpOut); this->saveLeavesRec(this->filterTreeRoot, fpOut); fpOut.close(); } else cout << "ERROR: could not open file " << indexFileName << endl; */ cout << "ERROR: saveIndex_Impl not implemented" << endl; } template void FtIndexerMem >:: loadIndex_Impl(const char* indexFileName) { /* ifstream fpIn; fpIn.open(indexFileName, ios::in); if(fpIn.is_open()) { this->filterTypes.clear(); this->loadBasicInfo(fpIn); this->buildHollowTreeRecursive(this->filterTreeRoot, 0); this->loadLeavesRec(this->filterTreeRoot, fpIn); fpIn.close(); } else cout << "ERROR: could not open file " << indexFileName << endl; */ cout << "ERROR: loadIndex_Impl not implemented" << endl; } #endif