/* $Id: gramgen.h Tue Apr 05 10:20:24 PDT 2008 abehm$ Copyright (C) 2007 by The Regents of the University of California Redistribution of this file is permitted under the terms of the BSD license Date: 04/05/2008 Author: Rares Vernica Alexander Behm */ #include "gramgen.h" #include using namespace std; using namespace tr1; const hash GramGen::hashString = hash(); GramGen* GramGen::loadGramGenInstance(ifstream& fpIn) { GramGenType ggt = GGT_FIXED; fpIn.read((char*)&ggt, sizeof(GramGenType)); switch(ggt) { case GGT_FIXED: return new GramGenFixedLen(fpIn); break; // TODO: implement others! default: cout << "ERROR: unknown gramgentype loaded from file!" << endl; return NULL; break; } } GramGenFixedLen::GramGenFixedLen(ifstream& fpIn) { fpIn.read((char*)&q, sizeof(uint)); fpIn.read((char*)&prePost, sizeof(bool)); } bool GramGenFixedLen::containsSpace(string& s) const { string::size_type loc = s.find(' ', 0); return !(loc == string::npos); } void GramGenFixedLen::decompose( const string &s, vector &res, uchar st, uchar en) const { if(prePost) { const string sPad = string(q - 1, st) + s + string(q - 1, en); for (uint i = 0; i < s.length() + q - 1; i++) { string substring = sPad.substr(i, q); if(!(noSpace && containsSpace(substring))) res.push_back(sPad.substr(i, q)); } } else { for (uint i = 0; i < s.length() - q + 1; i++) { string substring = s.substr(i, q); if(!(noSpace && containsSpace(substring))) res.push_back(substring); } } } void GramGenFixedLen::decompose( const string &s, vector &res, uchar st, uchar en) const { if(prePost) { string sPad = string(q - 1, st) + s + string(q - 1, en); for(uint i = 0; i < s.length() + q - 1; i++) { string substring = sPad.substr(i, q); if(!(noSpace && containsSpace(substring))) res.push_back(hashString(substring)); } } else { for (uint i = 0; i < s.length() - q + 1; i++) { string substring = s.substr(i, q); if(!(noSpace && containsSpace(substring))) res.push_back(hashString(substring)); } } } void GramGenFixedLen::decompose( const string &s, multiset &res, uchar st, uchar en) const { if(prePost) { const string sPad = string(q - 1, st) + s + string(q - 1, en); for (uint i = 0; i < s.length() + q - 1; i++) { string substring = sPad.substr(i, q); if(!(noSpace && containsSpace(substring))) res.insert(substring); } } else { for (uint i = 0; i < s.length() - q + 1; i++) { string substring = s.substr(i, q); if(!(noSpace && containsSpace(substring))) res.insert(substring); } } } void GramGenFixedLen::decompose( const string &s, multiset &res, uchar st, uchar en) const { if(prePost) { const string sPad = string(q - 1, st) + s + string(q - 1, en); for (uint i = 0; i < s.length() + q - 1; i++) { string substring = sPad.substr(i, q); if(!(noSpace && containsSpace(substring))) res.insert(hashString(substring)); } } else { for (uint i = 0; i < s.length() - q + 1; i++) { string substring = s.substr(i, q); if(!(noSpace && containsSpace(substring))) res.insert(hashString(substring)); } } } void GramGenFixedLen::decompose( const string &s, set &res, uchar st, uchar en) const { if(prePost) { const string sPad = string(q - 1, st) + s + string(q - 1, en); for (uint i = 0; i < s.length() + q - 1; i++) { string substring = sPad.substr(i, q); if(!(noSpace && containsSpace(substring))) res.insert(substring); } } else { for (uint i = 0; i < s.length() - q + 1; i++) { string substring = s.substr(i, q); if(!(noSpace && containsSpace(substring))) res.insert(substring); } } } void GramGenFixedLen::decompose( const string &s, set &res, uchar st, uchar en) const { if(prePost) { const string sPad = string(q - 1, st) + s + string(q - 1, en); for (uint i = 0; i < s.length() + q - 1; i++) { string substring = sPad.substr(i, q); if(!(noSpace && containsSpace(substring))) res.insert(hashString(substring)); } } else { for (uint i = 0; i < s.length() - q + 1; i++) { string substring = s.substr(i, q); if(!(noSpace && containsSpace(substring))) res.insert(hashString(substring)); } } } void GramGenFixedLen::decompose( const string &s, map &res, uchar st, uchar en) const { if(prePost) { const string sPad = string(q - 1, st) + s + string(q - 1, en); for (uint i = 0; i < s.length() + q - 1; i++) { string substring = sPad.substr(i, q); if(!(noSpace && containsSpace(substring))) res[hashString(substring)]++; } } else { for (uint i = 0; i < s.length() - q + 1; i++) { string substring = s.substr(i, q); if(!(noSpace && containsSpace(substring))) res[hashString(substring)]++; } } } uint GramGenFixedLen::getNumGrams(const string& s) const { if(prePost) return s.length() + q - 1; else return s.length() - q + 1; } void GramGenFixedLen::saveGramGenInstance(ofstream& fpOut) { fpOut.write((const char*)&gramGenType, sizeof(gramGenType)); fpOut.write((const char*)&q, sizeof(uint)); fpOut.write((const char*)&prePost, sizeof(bool)); }