/*
  $Id: gramgen.h Tue Apr 05 10:20:24 PDT 2008 abehm$

  Copyright (C) 2007 by The Regents of the University of California
	
  Redistribution of this file is permitted under
  the terms of BSD license
    
  Date: 04/05/2008
  Author: Alexander Behm <abehm (at) ics.uci.edu>
          Rares Vernica <rares (at) ics.uci.edu> 
*/

#ifndef _gramgen_h_
#define _gramgen_h_

#include <map>
#include <set>
#include <vector>
#include <fstream>
#include <tr1/functional>

#include "typedef.h"

typedef enum GramGenType
{
  GGT_FIXED
};

// gram generator interface
class GramGen
{
 protected:
  static const std::tr1::hash<std::string> hashString;
  static const uchar PREFIXCHAR = 156; // pound
  static const uchar SUFFIXCHAR = 190; // yen

  GramGenType gramGenType;
  
 public:
  // pre-and postfix string when generating grams?
  // needs to be accessible by simmetric for calculating filter bounds (e.g. by jacc and cos)
  bool prePost;

  GramGen(bool usePrePost = true):
    prePost(usePrePost)
    {}

  virtual ~GramGen(){}

  // convert a string to a BAG of grams
  virtual void decompose(
    const std::string &s, 
    std::vector<std::string> &res,	 
    uchar st = PREFIXCHAR, 
    uchar en = SUFFIXCHAR) 
    const = 0;
  
  // convert a string to a BAG of hashed grams
  virtual void decompose(
    const std::string &s, 
    std::vector<uint> &res,	 
    uchar st = PREFIXCHAR, 
    uchar en = SUFFIXCHAR) 
    const = 0;
  
  // convert a string to a BAG (multiset) of grams
  virtual void decompose(
    const std::string &s, 
    std::multiset<std::string> &res,	 
    uchar st = PREFIXCHAR, 
    uchar en = SUFFIXCHAR) 
    const = 0;
  
  // convert a string to a BAG (multiset) of hashed grams
  virtual void decompose(
    const std::string &s, 
    std::multiset<uint> &res, 
    uchar st = PREFIXCHAR, 
    uchar en = SUFFIXCHAR) 
    const = 0;
  
  // convert a string to a SET of grams
  virtual void decompose(
    const std::string &s, 
    std::set<std::string> &res, 
    uchar st = PREFIXCHAR, 
    uchar en = SUFFIXCHAR) 
    const = 0;
  
  // convert a string to a SET of hashed grams
  virtual void decompose(
    const std::string &s, 
    std::set<uint> &res, 
    uchar st = PREFIXCHAR, 
    uchar en = SUFFIXCHAR) 
    const = 0;
  
  // convert a string to a SET of hashed grams with count
  virtual void decompose(
    const std::string &s, 
    std::map<uint, uint> &res,
    uchar st = PREFIXCHAR, 
    uchar en = SUFFIXCHAR) 
    const = 0;  
  
  virtual uint getNumGrams(const std::string& s) const = 0;

  virtual uint getGramLength() const = 0;

  virtual void saveGramGenInstance(std::ofstream& fpOut) = 0;

  GramGenType getType() const { return gramGenType; }

  static GramGen* loadGramGenInstance(std::ifstream& fpIn);
};


class GramGenFixedLen: public GramGen
{
 private:
  uint q;
  bool noSpace;

  bool containsSpace(std::string& s) const;

 public:
  GramGenFixedLen(
    uint gramLength = 3, 
    bool usePrePost = true, 
    bool ignoreSpaces = false): 
    GramGen(usePrePost), 
    q(gramLength), 
    noSpace(ignoreSpaces)
    { gramGenType = GGT_FIXED; }
    
  GramGenFixedLen(std::ifstream& fpIn);
    
  void decompose(
    const std::string &s, 
    std::vector<std::string> &res,
    uchar st = PREFIXCHAR, 
    uchar en = SUFFIXCHAR) 
    const;
  
  void decompose(
    const std::string &s, 
    std::vector<uint> &res, 
    uchar st = PREFIXCHAR, 
    uchar en = SUFFIXCHAR) 
    const;
  
  void decompose(
    const std::string &s, 
    std::multiset<std::string> &res,
    uchar st = PREFIXCHAR, 
    uchar en = SUFFIXCHAR) 
    const;
  
  void decompose(
    const std::string &s, 
    std::multiset<uint> &res, 
    uchar st = PREFIXCHAR, 
    uchar en = SUFFIXCHAR) 
    const;
  
  void decompose(
    const std::string &s, 
    std::set<std::string> &res, 
    uchar st = PREFIXCHAR, 
    uchar en = SUFFIXCHAR) 
    const;
  
  void decompose(
    const std::string &s, 
    std::set<uint> &res, 
    uchar st = PREFIXCHAR, 
    uchar en = SUFFIXCHAR) 
    const;
  
  void decompose(
    const std::string &s, 
    std::map<uint, uint> &res,
    uchar st = PREFIXCHAR, 
    uchar en = SUFFIXCHAR) 
    const;
    
  uint getNumGrams(const std::string& s) const;

  uint getGramLength() const { return q; }

  void saveGramGenInstance(std::ofstream& fpOut);
};

#endif

