/* $Id: gramlistunion.h 5146 2010-03-24 23:05:57Z abehm $ Copyright (C) 2010 by The Regents of the University of California Redistribution of this file is permitted under the terms of the BSD license. Date: 04/04/2008 Author: Shengyue Ji */ #ifndef _gramlistunion_h_ #define _gramlistunion_h_ #include #include #include "gramlist.h" template > class GramListUnion : public GramList { private: GramListUnion *parent; public: unsigned rank; unsigned refCount; InvList list; GramListUnion(); GramListUnion *find(); GramListUnion *clean(); static void _union(GramListUnion *ga, GramListUnion *gb, unsigned unionSize); static unsigned testUnion(GramListUnion *ga, GramListUnion *gb, float th = 0.0, bool testOnly = false); InvList* getArray(fstream* invListFile = NULL) { return &list; } void free() { refCount--; if(refCount == 0)delete this; } void clear() {} }; template GramListUnion:: GramListUnion() : parent(0), rank(0), refCount(1) {} template GramListUnion* GramListUnion:: find() { if(parent == NULL)return this; parent = parent->find(); return parent; } template GramListUnion* GramListUnion:: clean() { if(parent == NULL)return this; GramListUnion *tmp; tmp = parent->find(); delete this; return tmp; } template void GramListUnion:: _union(GramListUnion *ga, GramListUnion *gb, unsigned unionSize) { if(ga->rank < gb->rank) { GramListUnion *gt = ga; ga = gb; gb = gt; } if(ga->rank == gb->rank) ga->rank++; ga->refCount += gb->refCount; gb->parent = ga; // merge list b into list a InvList *a = &ga->list; InvList *b = &gb->list; int i, j, k; i = a->size() - 1; j = b->size() - 1; k = unionSize - 1; a->setSize(unionSize); while(j >= 0 && i >= 0) { if((*a)[i] > (*b)[j]) (*a)[k--] = (*a)[i--]; else if((*a)[i] < (*b)[j]) (*a)[k--] = (*b)[j--]; else { (*a)[k--] = (*a)[i--]; j--; } } while(j >= 0 && k >= 0) { (*a)[k--] = (*b)[j--]; } } template unsigned GramListUnion:: testUnion(GramListUnion *ga, GramListUnion *gb, float th, bool testOnly) { ga = ga->find(); gb = gb->find(); if(ga == gb)return 0; unsigned unionSize = 0; unsigned x = 0, y = 0; InvList &a = ga->list; InvList &b = gb->list; while(x < a.size() && y < b.size()) { unionSize++; if(a[x] == b[y]) { x++; y++; } else if(a[x] < b[y]) { x++; } else { y++; } } unionSize += a.size() - x; unionSize += b.size() - y; unsigned intersection = a.size() + b.size() - unionSize; // jaccard similarity if(intersection >= (unsigned)(unionSize * th)) { // do the union here, merge one into the other if(!testOnly)_union(ga, gb, unionSize); return intersection; } return 0; } #endif