/* $Id: heapmerger-pq.cc 4038 2008-10-03 19:34:25Z abehm $ Copyright (C) 2007 by The Regents of the University of California Redistribution of this file is permitted under the terms of the BSD license Date: 05/14/2007 Author: Chen Li */ #include #include #include #include #include "listmerger.h" #include "util/array.h" // http://support.microsoft.com/kb/837697 typedef pair*, unsigned> ArrayWithPosition; // Overload the < operator. bool operator< (const ArrayWithPosition &awp1, const ArrayWithPosition &awp2) { return awp1.first->at(awp1.second) < awp2.first->at(awp2.second); } // Overload the > operator. bool operator> (const ArrayWithPosition& awp1, const ArrayWithPosition &awp2) { return awp1.first->at(awp1.second) > awp2.first->at(awp2.second); } void HeapMerger::merge(const vector*> &arrays, const unsigned threshold, const unsigned maxObjectID, vector &results) { // build a heap. Each element is a list with its current position (0) priority_queue pq; for (unsigned i = 0; i < arrays.size(); i ++) pq.push((make_pair(arrays.at(i),0))); // remove any leftover in the results results.clear(); // iterate over the queue unsigned prevObjectId = 0; unsigned objFreqency = 0; while (!pq.empty()) { // remove the head ArrayWithPosition awp = pq.top(); pq.pop(); // check the object id of the current element //assert(awp.first != NULL); //assert(awp.first->size() > awp.second); unsigned newObjectId = awp.first->at(awp.second); if (newObjectId == prevObjectId) { objFreqency ++; } else { prevObjectId = newObjectId; // a new object id objFreqency = 1; } if (objFreqency >= threshold) // found a new candidate if (!results.empty() && results.back() != newObjectId) // ignore duplcates in the results results.push_back(newObjectId); // insert this candidate // increment the position of the list by one awp.second ++; if (awp.second < awp.first->size()) // the list has more elments pq.push(awp); } // return the results }