/* $Id: filtertreenode.h 5783 2010-10-21 23:12:04Z abehm $ Copyright (C) 2010 by The Regents of the University of California Redistribution of this file is permitted under the terms of the BSD license. Date: 04/04/2008 Author: Alexander Behm */ #ifndef _filtertreenode_h_ #define _filtertreenode_h_ #include #include #include #include "gramlist.h" #include "gramlistondisk.h" #include "common/src/filtertypes.h" #include "common/src/gramgen.h" #include "util/src/array.h" template class FilterTreeNode; template class KeyNodePair; template > class KeyNodePair { public: unsigned key; FilterTreeNode* node; KeyNodePair(unsigned key, FilterTreeNode* node) : key(key), node(node) {} bool operator<(const KeyNodePair& knp) { return key < knp.key; } }; template > class FilterTreeNode { public: typedef unordered_map* > GramMap; GramMap gramMap; GramList* distinctStringIDs; vector > children; // sorted by pair.first set gramBlackList; // set of hole grams for compressed index FilterTreeNode() : distinctStringIDs(NULL) {} void getQueryGramLists(const string& query, GramGen& gramGen, vector* >* gramLists); void getQueryGramLists(const vector& gramCodes, vector* >* gramLists); void getInvertedLists(const string& query, const GramGen& gramGen, vector &invertedLists); void getInvertedLists(const vector& gramCodes, vector &invertedLists); // place all leaves below input node into result vector static void getSubTreeLeaves(FilterTreeNode* node, vector* >& leaves); // get size of subtree (including input node), without counting inverted lists static unsigned getSubTreeSize(FilterTreeNode* node); // get size of inverted lists below subtree (including input node) static unsigned getSubTreeInvertedListsSize(FilterTreeNode* node); // get size of this node unsigned getSize(); // get size of inverted lists of this node unsigned getInvertedListsSize(); bool isLeaf() { return children.size() == 0; } ~FilterTreeNode(); }; template FilterTreeNode:: ~FilterTreeNode() { for(typename GramMap::iterator iter = gramMap.begin(); iter != gramMap.end(); iter++) { GramList* tmp = iter->second; if(tmp) tmp->free(); } if(distinctStringIDs) distinctStringIDs->free(); } template void FilterTreeNode:: getInvertedLists(const string& query, const GramGen& gramGen, vector &invertedLists) { vector gramCodes; gramGen.decompose(query, gramCodes); getInvertedLists(gramCodes, invertedLists); } template void FilterTreeNode:: getInvertedLists(const vector& gramCodes, vector &invertedLists) { for(unsigned i = 0; i < gramCodes.size(); i++) { unsigned gramCode = gramCodes[i]; if(gramMap.find(gramCode) != gramMap.end()) { InvList* tmp = gramMap[gramCode]->getArray(); invertedLists.push_back(tmp); } } } template void FilterTreeNode:: getQueryGramLists(const string& query, GramGen& gramGen, vector* >* queryGramLists) { vector gramCodes; gramGen.decompose(query, gramCodes); getGramLists(gramCodes, queryGramLists); } template void FilterTreeNode:: getQueryGramLists(const vector& gramCodes, vector* >* queryGramLists) { for(unsigned i = 0; i < gramCodes.size(); i++) { unsigned gramCode = gramCodes[i]; if(gramMap.find(gramCode) != gramMap.end()) { queryGramLists->push_back( new QueryGramList(gramCode, gramMap[gramCode]) ); } } } template void FilterTreeNode:: getSubTreeLeaves(FilterTreeNode* node, vector* >& leaves) { if(node->isLeaf()) leaves.push_back(node); else { vector >& children = node->children; for(unsigned i = 0; i < children.size(); i++) FilterTreeNode::getSubTreeLeaves(children[i].node, leaves); } } template unsigned FilterTreeNode:: getSubTreeSize(FilterTreeNode* node) { unsigned size = node->getSize(); vector >& children = node->children; for(unsigned i = 0; i < children.size(); i++) size += FilterTreeNode::getSubTreeSize(children[i].node); return size; } template unsigned FilterTreeNode:: getSubTreeInvertedListsSize(FilterTreeNode* node) { unsigned size = node->getInvertedListsSize(); vector >& children = node->children; for(unsigned i = 0; i < children.size(); i++) size += FilterTreeNode::getSubTreeInvertedListsSize(children[i].node); return size; } template unsigned FilterTreeNode:: getSize() { unsigned size = 0; size += sizeof(GramMap); size += sizeof(GramList >*); size += sizeof(vector >); size += sizeof(pair) * children.size(); size += sizeof(InvList); size += gramMap.size() * (sizeof(unsigned) + sizeof(GramListOnDisk)); return size; } template unsigned FilterTreeNode:: getInvertedListsSize() { if(!isLeaf()) return 0; unsigned size = 0; typename GramMap::const_iterator citer; for(citer = gramMap.begin(); citer != gramMap.end(); citer++) size += citer->second->getArray()->size(); size += sizeof(unsigned) * distinctStringIDs->getArray()->size(); return size; } #endif