/*
  $Id: simmetric.h 4025 2008-10-01 00:01:14Z abehm $

  Copyright (C) 2007 by The Regents of the University of California

  Redistribution of this file is permitted under the terms of the 
  BSD license

  Date: 04/15/2008
  Author: Rares Vernica <rares (at) ics.uci.edu>, Alexander Behm
*/

#ifndef _simmetric_h_
#define _simmetric_h_

#include "typedef.h"
#include "gramgen.h"

// TODO: discuss this with Ray!
// preliminary location of filtertreetypes, just to make everything work
enum FilterType 
  {
    FT_NONE,
    FT_LENGTH,
    FT_CHECKSUM
  };

#define CHECKSUM_ASCII_MAX 127

enum DistanceMeasure
  {
    DM_EDIT_DISTANCE,
    DM_JACCARD,
    DM_COSINE
  };

class SimMetric                 // abstract class
{
public:
  const GramGen& gramGen;
  const std::string name;

  SimMetric(const GramGen &gramGen, const std::string name): 
    gramGen(gramGen), name(name) 
  {}

  virtual ~SimMetric() 
  {};

  virtual float operator()(const std::string &s1, const std::string &s2)
    const = 0;

  virtual bool operator()(
    const std::string &s1, 
    const std::string &s2, 
    float threshold)
    const;

  // compute merging threshold
  virtual uint getMergeThreshold(
    const std::string& query, 
    const std::vector<uint>& queryGramCodes,
    const float simThreshold) 
    const = 0;

  // get filter bounds  
  virtual void getFilterBounds(
    const std::string& query,
    const float simThreshold,
    const FilterType filterType,
    uint& lbound,
    uint& ubound)
    const = 0;  

  // bounds on similarity
  virtual float getSimMin(
    uint noGramsQuery, 
    uint noGramsData, 
    uint noGramsCommon) 
    const = 0;
 
  virtual float getSimMax(
    uint lenQuery, 
    uint noGramsQuery, 
    uint noGramsData, 
    uint noGramsCommon) 
    const = 0;
  
  // bound on common grams
  virtual uint getNoGramsMin(
    uint lenQuery, 
    uint noGramsMin, 
    uint noGramsQuery, 
    float sim)
    const = 0;
};

class SimMetricEd: public SimMetric 
{
public:
  SimMetricEd(const GramGen &gramGen, const std::string name = "ed"): 
    SimMetric(gramGen, name) 
  {}

  virtual float operator()(const std::string &s1, const std::string &s2)
    const;

  virtual bool operator()(
    const std::string &s1, 
    const std::string &s2, 
    float threshold)
    const;

  virtual uint getMergeThreshold(
    const std::string& query, 
    const std::vector<uint>& queryGramCodes,
    const float simThreshold) 
    const;


  virtual void getFilterBounds(
    const std::string& query,
    const float simThreshold,
    const FilterType filterType,
    uint& lbound,
    uint& ubound)
    const;  
  
  virtual float getSimMin(
    uint noGramsQuery, 
    uint noGramsData, 
    uint noGramsCommon) 
    const;

  virtual float getSimMax(
    uint lenQuery, 
    uint noGramsQuery, 
    uint noGramsData, 
    uint noGramsCommon) 
    const;

  virtual uint getNoGramsMin(
    uint lenQuery, 
    uint noGramsMin, 
    uint noGramsQuery, 
    float sim)
    const;
};

class SimMetricEdNorm: public SimMetricEd
{
public:
  SimMetricEdNorm(const GramGen &gramGen, const std::string name = "ednorm"): 
    SimMetricEd(gramGen, name) 
  {}

  virtual float operator()(const std::string &s1, const std::string &s2)
    const;

  virtual bool operator()(
    const std::string &s1, 
    const std::string &s2, 
    float threshold)
    const;

  virtual uint getMergeThreshold(
    const std::string& query, 
    const std::vector<uint>& queryGramCodes,
    const float simThreshold) 
    const;


  virtual void getFilterBounds(
    const std::string& query,
    const float simThreshold,
    const FilterType filterType,
    uint& lbound,
    uint& ubound)
    const;  
  
  virtual float getSimMin(
    uint noGramsQuery, 
    uint noGramsData, 
    uint noGramsCommon) 
    const;

  virtual float getSimMax(
    uint lenQuery, 
    uint noGramsQuery, 
    uint noGramsData, 
    uint noGramsCommon) 
    const;

  virtual uint getNoGramsMin(
    uint lenQuery, 
    uint noGramsMin, 
    uint noGramsQuery, 
    float sim)
    const;
};

class SimMetricEdSwap: public SimMetric 
{
public:
  SimMetricEdSwap(const GramGen &gramGen, const std::string name = "edswap"): 
    SimMetric(gramGen, name) 
  {}

  virtual float operator()(const std::string &s1, const std::string &s2)
    const;

  virtual bool operator()(
    const std::string &s1, const std::string &s2, float threshold) 
    const;

  virtual uint getMergeThreshold(
    const std::string& query, 
    const std::vector<uint>& queryGramCodes,
    const float simThreshold) 
    const;

  virtual void getFilterBounds(
    const std::string& query,
    const float simThreshold,
    const FilterType filterType,
    uint& lbound,
    uint& ubound)
    const;  
  
  virtual float getSimMin(
    uint noGramsQuery, 
    uint noGramsData, 
    uint noGramsCommon) 
    const;

  virtual float getSimMax(
    uint lenQuery, 
    uint noGramsQuery, 
    uint noGramsData, 
    uint noGramsCommon) 
    const;

  virtual uint getNoGramsMin(
    uint lenQuery, 
    uint noGramsMin, 
    uint noGramsQuery, 
    float sim)
    const;
};

class SimMetricGram: public SimMetric // abstract class
{
public:
  SimMetricGram(const GramGen &gramGen, const std::string name): 
    SimMetric(gramGen, name) 
  {}

  virtual float operator()(
    uint noGramsData, 
    uint noGramsQuery, 
    uint noGramsCommon) 
    const = 0;

  virtual bool operator()(
    uint noGramsData, 
    uint noGramsQuery, 
    uint noGramsCommon, 
    float threshold)
    const 
  { return operator()(noGramsData, noGramsQuery, noGramsCommon) >= threshold; }

  virtual float getSimMin(
    uint noGramsQuery, 
    uint noGramsData, 
    uint noGramsCommon) 
    const 
  { return operator()(noGramsData, noGramsQuery, noGramsCommon); }

  virtual float getSimMax(
    uint lenQuery, 
    uint noGramsQuery, 
    uint noGramsData, 
    uint noGramsCommon) 
    const
  { return operator()(noGramsData, noGramsQuery, noGramsCommon); }
};

class SimMetricJacc: public SimMetricGram // set semantics !
{
public:
  SimMetricJacc(const GramGen &gramGen, const std::string name = "jc"): 
    SimMetricGram(gramGen, name) 
  {}

  virtual float operator()(const std::string &s1, const std::string &s2)
    const;

  virtual float operator()(
    uint noGramsData, 
    uint noGramsQuery, 
    uint noGramsCommon) 
    const;

  // TODO: talk to Ray!
  // compute merging threshold
  virtual uint getMergeThreshold(
    const std::string& query, 
    const std::vector<uint>& queryGramCodes,
    const float simThreshold) 
    const; 

  // TODO: talk to Ray!
  // get filter bounds  
  virtual void getFilterBounds(
    const std::string& query,
    const float simThreshold,
    const FilterType filterType,
    uint& lbound,
    uint& ubound)
    const;

  virtual uint getNoGramsMin(
    uint lenQuery, 
    uint noGramsMin, 
    uint noGramsQuery, 
    float sim)
    const;
};

class SimMetricCos: public SimMetricGram // set semantics !
{
public:
  SimMetricCos(const GramGen &gramGen, const std::string name = "cos"): 
    SimMetricGram(gramGen, name) 
  {}

  virtual float operator()(const std::string &s1, const std::string &s2)
    const;

  virtual float operator()(
    uint noGramsData, 
    uint noGramsQuery, 
    uint noGramsCommon) 
    const;

  virtual uint getMergeThreshold(
    const std::string& query, 
    const std::vector<uint>& queryGramCodes,
    const float simThreshold) 
    const;
  
  virtual void getFilterBounds(
    const std::string& query,
    const float simThreshold,
    const FilterType filterType,
    uint& lbound,
    uint& ubound)
    const;

  virtual uint getNoGramsMin(
    uint lenQuery, 
    uint noGramsMin, 
    uint noGramsQuery, 
    float sim)
    const;
};

class SimMetricDice: public SimMetricGram // set semantics !
{
public:
  SimMetricDice(const GramGen &gramGen, const std::string name = "dice"): 
    SimMetricGram(gramGen, name) 
  {}

  virtual float operator()(const std::string &s1, const std::string &s2)
    const;

  virtual float operator()(
    uint noGramsData, 
    uint noGramsQuery, 
    uint noGramsCommon) 
    const;

  virtual uint getMergeThreshold(
    const std::string& query, 
    const std::vector<uint>& queryGramCodes,
    const float simThreshold)
    const;

  virtual void getFilterBounds(
    const std::string& query,
    const float simThreshold,
    const FilterType filterType,
    uint& lbound,
    uint& ubound)
    const;

  virtual uint getNoGramsMin(
    uint lenQuery, 
    uint noGramsMin, 
    uint noGramsQuery, 
    float sim)
    const;
};

class SimMetricGramCount: public SimMetricGram 
{
public:
  SimMetricGramCount(const GramGen &gramGen, const std::string name = "cnt"): 
    SimMetricGram(gramGen, name) 
  {}
 
  virtual float operator()(const std::string &s1, const std::string &s2)
    const;

  virtual float operator()(
    uint noGramsData, 
    uint noGramsQuery, 
    uint noGramsCommon) 
    const;

  virtual uint getMergeThreshold(
    const std::string& query, 
    const std::vector<uint>& queryGramCodes,
    const float simThreshold)
    const;

  virtual void getFilterBounds(
    const std::string& query,
    const float simThreshold,
    const FilterType filterType,
    uint& lbound,
    uint& ubound)
    const;

  virtual uint getNoGramsMin(
    uint lenQuery, 
    uint noGramsMin, 
    uint noGramsQuery, 
    float sim)
    const;
};

#endif
