/*
  $Id: ftindexermemabs.h 4025 2008-10-01 00:01:14Z abehm $

  Copyright (C) 2007 by The Regents of the University of California
	
  Redistribution of this file is permitted under
  the terms of the BSD license
    
  Date: 09/17/2007
  Author: Alexander Behm <abehm (at) ics.uci.edu>
*/

#ifndef _ftindexermemabs_h_
#define _ftindexermemabs_h_

#include "ftindexerabs.h"
#include "gramlistsimple.h"

template<class FtIndexerConcrete>
class FtIndexerMemAbs;

template<class FtIndexerConcrete>
struct FtIndexerTraits<FtIndexerMemAbs<FtIndexerConcrete> > {
  typedef FtIndexerTraits<FtIndexerConcrete> TraitsType;
  typedef typename TraitsType::StringContainer StringContainer;
  typedef typename TraitsType::InvList InvList;
};

template<class FtIndexerConcrete>
class FtIndexerMemAbs 
  : public FtIndexerAbs<FtIndexerMemAbs<FtIndexerConcrete> > {
  
public:
  typedef FtIndexerTraits<FtIndexerConcrete> TraitsType;
  typedef typename TraitsType::StringContainer StringContainer;
  typedef typename TraitsType::InvList InvList;
  
protected:  
  void addStringToNode(FilterTreeNode<InvList>* node, const string& s, const unsigned stringId);

  typedef 
  FtIndexerAbs<FtIndexerMemAbs<FtIndexerConcrete> >
  SuperClass;  
  
  // used for reading/writing index from/to file, 
  // using logical separation into functions so compressed indexers can use these, too
  void saveLeavesRec(FilterTreeNode<InvList>* node, ofstream& fpOut);
  void loadLeavesRec(FilterTreeNode<InvList>* node, ifstream& fpIn);
    
public:
  typedef typename SuperClass::GramMap GramMap;

  FtIndexerMemAbs(StringContainer* strContainer) : SuperClass(strContainer) {}
  FtIndexerMemAbs(StringContainer* strContainer, 
		  GramGen* gramGenerator, 
		  const unsigned maxStrLen = 150, 
		  const unsigned ftFanout = 50) 
    : SuperClass(strContainer, gramGenerator, maxStrLen, ftFanout) {}

  void addStringId(FilterTreeNode<InvList>* leaf, const vector<unsigned>& gramCodes, unsigned stringId) {
    static_cast<FtIndexerConcrete*>(this)->addStringId_Impl(leaf, gramCodes, stringId);
  }

  void saveIndex(const char* indexFileName) {
    static_cast<FtIndexerConcrete*>(this)->saveIndex_Impl(indexFileName);
  }
  
  void loadIndex(const char* indexFileName) {
    static_cast<FtIndexerConcrete*>(this)->loadIndex_Impl(indexFileName);
  }  
  
  void buildIndex_Impl(const string& dataFile, const unsigned linesToRead = 0, bool showProgress = true, StatsUtil* sutil = NULL);
  void buildIndex_Impl(bool showProgress = true, StatsUtil* sutil = NULL);

  void printFilterTree(FilterTreeNode<InvList>* node = NULL);
};

template<class FtIndexerConcrete> 
void 
FtIndexerMemAbs<FtIndexerConcrete>::
addStringToNode(FilterTreeNode<InvList>* node, const string& s, const unsigned stringId) {
  // add stringId to distinctStringIDs and create the GramListSimple instance if neccessary
  if(!node->distinctStringIDs) node->distinctStringIDs = new GramListSimple<InvList>();
  node->distinctStringIDs->getArray()->append(stringId);
  
  // add stringId to gramMap of leaf
  vector<unsigned> gramCodes;
  this->gramGen->decompose(s, gramCodes);    
  addStringId(node, gramCodes, stringId);
}

template<class FtIndexerConcrete> 
void
FtIndexerMemAbs<FtIndexerConcrete>:: 
buildIndex_Impl(const string& dataFile, const unsigned linesToRead, bool showProgress, StatsUtil* sutil) {  
  this->strContainer->fillContainer(dataFile.c_str(), linesToRead, this->maxStrLen);
  this->buildIndex(showProgress, sutil);
}

template<class FtIndexerConcrete> 
void 
FtIndexerMemAbs<FtIndexerConcrete>::
buildIndex_Impl(bool showProgress, StatsUtil* sutil) {
  // do we want to gather stats?
  if(sutil) {
    // fill stats information known at this time
    sutil->filterTreeStats.dictionarySize = this->strContainer->size();
    sutil->filterTreeStats.maxStrLength = this->maxStrLen;
    sutil->filterTreeStats.numberFilters = this->filterTypes.size();    
    sutil->startTimeMeasurement();
  }
  
  LoopTimer loopTimer;

  // build hollow tree, i.e. create nodes and leaves without filling inverted lists  
  if(this->filterTypes.size() > 0 && this->ftFanout > 0) {
    this->buildHollowTreeRecursive(this->filterTreeRoot, 0);
    
    // add stringIds to tree
    if(showProgress) loopTimer.begin("INSERTING INTO INDEX", this->strContainer->size());
    for(unsigned stringId = 0; stringId < this->strContainer->size(); stringId++) {
      FilterTreeNode<InvList>* currentNode = this->filterTreeRoot;
      string currentString;
      this->strContainer->retrieveString(currentString, stringId);
      if(currentString.empty()) continue;
      
      // position currentNode to appropriate leaf node in filter tree      
      for(unsigned filterId = 0; filterId < this->filterTypes.size(); filterId++) {
	AbstractFilter* filter = this->filterTypes.at(filterId);
	unsigned key = filter->getKey(currentString);
	vector<pair<unsigned, FilterTreeNode<InvList>*> >& children = currentNode->children;
	
	// match key to node
	for(unsigned childId = 0; childId < children.size(); childId++) {
	  pair<unsigned, FilterTreeNode<InvList>*>& child = children.at(childId);
	  if(child.first >= key) {
	    currentNode = child.second;
	    break;
	  }
	}
      }      
      addStringToNode(currentNode, currentString, stringId);
      if(showProgress) loopTimer.next();
    }  
    if(showProgress) loopTimer.end();
  }
  else {
    // no filters selected, so just create one leaf node to store all inverted lists
    FilterTreeNode<InvList>* n = new FilterTreeNode<InvList>(true);
    pair<unsigned, FilterTreeNode<InvList>*> p(0, n);
    this->filterTreeRoot->children.push_back(p);
    if(showProgress) loopTimer.begin("INSERTING INTO INDEX", this->strContainer->size());    
    for(unsigned stringId = 0; stringId < this->strContainer->size(); stringId++) {
      string currentString;
      this->strContainer->retrieveString(currentString, stringId);
      if(currentString.empty()) continue;      
      addStringToNode(n, currentString, stringId);
      if(showProgress) loopTimer.next();
    }
    if(showProgress) loopTimer.end();
  } 
  
  // set next stringID
  this->nextStringID = this->strContainer->size();

  // do we want to gather stats?
  if(sutil) {
    sutil->stopTimeMeasurement();
    sutil->filterTreeStats.buildFilterTreeTime = sutil->getTimeMeasurement();        
  }
}

template<class FtIndexerConcrete> 
void 
FtIndexerMemAbs<FtIndexerConcrete>::
saveLeavesRec(FilterTreeNode<InvList>* node, ofstream& fpOut) {
  unsigned u;
  
  if(node->isLeaf) {
    if(node->distinctStringIDs) {
      InvList* distinctArr = node->distinctStringIDs->getArray();
      // write distinct string ids
      u = distinctArr->size();
      fpOut.write((const char*)&u, sizeof(unsigned));
      for(unsigned i = 0; i < distinctArr->size(); i++) {
	u = distinctArr->at(i);
	fpOut.write((const char*)&u, sizeof(unsigned));      
      }      
    }
    else {
      u = 0;
      fpOut.write((const char*)&u, sizeof(unsigned));
    }
    
    // write gram map
    GramMap& gramMap = node->gramMap;
    u = gramMap.size();
    fpOut.write((const char*)&u, sizeof(unsigned));
    for(typename GramMap::iterator iter = gramMap.begin();     
	iter != gramMap.end();
	iter++) {
      
      u = iter->first;
      fpOut.write((const char*)&u, sizeof(unsigned));
      
      Array<unsigned>* arr = iter->second->getArray();
      if(arr) {
	u = arr->size();
	fpOut.write((const char*)&u, sizeof(unsigned));
	for(unsigned i = 0; i < arr->size(); i++) {
	  u = arr->at(i);
	  fpOut.write((const char*)&u, sizeof(unsigned));
	}
      }
      else {
	u = 0;
	fpOut.write((const char*)&u, sizeof(unsigned));
      }      
    }    
  }
  else {
    vector<pair<unsigned, FilterTreeNode<InvList>*> >& children = node->children;
    for(unsigned i = 0; i < children.size(); i++)
      saveLeavesRec(children.at(i).second, fpOut);
  }
}

template<class FtIndexerConcrete> 
void 
FtIndexerMemAbs<FtIndexerConcrete>::
loadLeavesRec(FilterTreeNode<InvList>* node, ifstream& fpIn) {
  if(node->isLeaf) {
    if(node->distinctStringIDs) delete node->distinctStringIDs;
    unsigned distinctStrings;
    fpIn.read((char*)&distinctStrings, sizeof(unsigned));
    if(distinctStrings > 0) node->distinctStringIDs = new GramListSimple<InvList>();
    for(unsigned i = 0; i < distinctStrings; i++) {
      unsigned tmp;
      fpIn.read((char*)&tmp, sizeof(unsigned));
      node->distinctStringIDs->getArray()->append(tmp);
    }    
    
    GramMap& gramMap = node->gramMap;
    unsigned gramMapSize;
    unsigned gramCode;
    unsigned arrSize;
    fpIn.read((char*)&gramMapSize, sizeof(unsigned));    
    for(unsigned j = 0; j < gramMapSize; j++) {
      fpIn.read((char*)&gramCode, sizeof(unsigned));
      fpIn.read((char*)&arrSize, sizeof(unsigned));
      if(arrSize != 0) {	
	GramListSimple<InvList>* gramList = new GramListSimple<InvList>();
	gramMap[gramCode] = gramList;
	InvList* newArr = gramList->getArray();
	for(unsigned i = 0; i < arrSize; i++) {
	  unsigned tmp;
	  fpIn.read((char*)&tmp, sizeof(unsigned));
	  newArr->append(tmp);		
	}
      }
    }
  }
  else {
    vector<pair<unsigned, FilterTreeNode<InvList>*> >& children = node->children;
    for(unsigned i = 0; i < children.size(); i++)
      loadLeavesRec(children.at(i).second, fpIn);       
  } 
}

template<class FtIndexerConcrete> 
void
FtIndexerMemAbs<FtIndexerConcrete>::
printFilterTree(FilterTreeNode<InvList>* node) {
  if(!node) {
    printFilterTree(this->filterTreeRoot);
    return;
  }
  
  if(node->isLeaf()) {
    GramMap& gramMap = node->gramMap;
    
    for(typename GramMap::iterator iter = gramMap.begin();
	iter != gramMap.end();
	iter++) {
      
      Array<unsigned>* arr = iter->second->getArray();
      cout << iter->first << " ";
      if(arr) {
	for(unsigned i = 0; i < arr->size(); i++)
	  cout << arr->at(i) << " ";
      }
      cout << endl;
    }
  }
  else {
    vector<pair<unsigned, FilterTreeNode<InvList>*> >& children = node->children;
    for(unsigned i = 0; i < children.size(); i++) {
      cout << children.at(i).first << endl;
      printFilterTree(children.at(i).second);
    }
  }
}

#endif
