/*
 *  Copyright (c) 2012 Shirou Maruyama
 * 
 *   Redistribution and use in source and binary forms, with or without
 *   modification, are permitted provided that the following conditions
 *   are met:
 * 
 *   1. Redistributions of source code must retain the above Copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above Copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 *   3. Neither the name of the authors nor the names of its contributors
 *      may be used to endorse or promote products derived from this
 *      software without specific prior written permission.
 */

#include <vector>
#include <string>
#include <iostream>
#include <algorithm>
#include <stdint.h>
#include "CompPermIdx.hpp"
#include "HufWatTree.hpp"
#include "MultikeyQsort.hpp"
#include "sais.hxx"

using namespace std;

namespace cpi00 {

  void String2Uint8_Ts(const string& str, vector<uint8_t>& ret) {
    for (string::const_iterator itr = str.begin(); 
         itr != str.end(); ++itr) {
      ret.push_back(static_cast<uint8_t>(*itr));
    }
  }

  void ConcatKeywords(const vector<string>& keywords,
                      vector<uint8_t>& ret) {
    vector<uint8_t>().swap(ret);
    for (vector<string>::const_iterator itr = keywords.begin();
         itr != keywords.end(); ++itr) {
      ret.push_back(kDelimiter);
      String2Uint8_Ts(*itr, ret);
    }
    ret.push_back(kDelimiter);
    ret.push_back(kTerminator);
  }

  void ComputeC(const vector<uint8_t>& str, vector<uint64_t>& c) {
    vector<uint64_t> tmp(kMaxAlphabetSize, 0);
    for (vector<uint8_t>::const_iterator itr = str.begin();
         itr != str.end(); ++itr) {
      ++tmp[*itr];
    }
    vector<uint64_t>().swap(c);
    c.push_back(0);
    for (uint64_t i = 0; i != kMaxAlphabetSize - 1; ++i) {
      c.push_back(c.back() + tmp[i]);
    }
  }

  void CompPermIdx::Build(vector<string>& keywords) {
    MultikeyQsort(keywords, 0, keywords.size(), 0);
    vector<uint8_t> str;
    ConcatKeywords(keywords, str);
    ComputeC(str, C_);
    NumKeys_ = keywords.size();
    vector<uint8_t> bwt_str(str.size());
    vector<uint32_t> SA(str.size());
    saisxx_bwt(str.begin(), bwt_str.begin(), 
               SA.begin(), (int)(str.size()));
    vector<uint8_t>().swap(str);
    vector<uint32_t>().swap(SA);
    WT_.Build(bwt_str);
    vector<uint8_t>().swap(bwt_str);
  }
  
  uint64_t CompPermIdx::Jump2End(const uint64_t pos) const {
    if (pos < C_[1] - 1) return pos + 1;
    return pos;
  }

  uint64_t CompPermIdx::BackStep(const uint64_t pos) const {
    uint64_t x = WT_.Access(pos);
    return C_[x] + WT_.Rank(x, pos) - 1;
  }

  void CompPermIdx::GetString(const uint64_t pos, string& ret) const {
    string().swap(ret);
    uint64_t i = pos;
    while (i >= C_[1]) {
      i = BackStep(i);
    }
    i = Jump2End(i);
    uint64_t s;
    while ((s = WT_.Access(i)) != kDelimiter) {
      ret.push_back(s);
      i = BackStep(i);
    }
    reverse(ret.begin(), ret.end());
  }

  void CompPermIdx::GetStringSet(const uint64_t first, const uint64_t last,
                               vector<string>& ret) const {
    for (uint64_t i = first; i <= last; ++i) {
      string str;
      GetString(i, str);
      ret.push_back(str);
    } 
  }

  bool CheckMarks(const uint64_t pos, const uint64_t first, const uint64_t last,
                  vector<bool>& marks) {
    if (first <= pos && pos <= last) {
      if (marks[pos - first]) {
        return false;
      }
      marks[pos - first] = true;
    }
    return true;
  }

  bool CompPermIdx::GetStringWithFilter(const uint64_t pos, string&ret,
                                        const uint64_t first, const uint64_t last,
                                        vector<bool>& marks) const {
    string().swap(ret);
    uint64_t i = pos;
    while (i >= C_[1]) {
      if (!CheckMarks(i, first, last, marks)) 
        return false;
      i = BackStep(i);
    }
    i = Jump2End(i);
    uint64_t s;
    while ((s = WT_.Access(i)) != kDelimiter) {
      ret.push_back(s);
      i = BackStep(i);
    }
    reverse(ret.begin(), ret.end());
    return true;
  }

  void CompPermIdx::GetStringSetWithFilter(const uint64_t first, const uint64_t last,
                                           vector<string>& ret) const {
    vector<bool> marks(last - first + 1, false);
    for (uint64_t i = first; i <= last; ++i) {
      string str;
      if (GetStringWithFilter(i, str, first, last, marks)) {
          ret.push_back(str);
      }
    }
  }

  uint64_t CompPermIdx::BackPermSearch(const vector<uint8_t>& query,
                                       uint64_t& first, 
                                       uint64_t& last) const {
    uint64_t i = query.size() - 1;
    uint64_t c = query[i];
    first = C_[c];
    last  = C_[c + 1] - 1;
    while (first <= last && i > 0) {
      c = query[i - 1];
      first = Jump2End(first); last = Jump2End(last);
      first = C_[c] + WT_.Rank(c, first - 1);
      last  = C_[c] + WT_.Rank(c, last) - 1;
      --i;
    }
    if (first <= last) {
      return last - first + 1;
    }
    return 0;
  }

  uint64_t CompPermIdx::Rank(const string& str) {
    vector<uint8_t> query;
    query.push_back(kDelimiter);
    String2Uint8_Ts(str, query);
    query.push_back(kDelimiter);
    uint64_t first, last;
    if (BackPermSearch(query, first, last)) {
      return first + 1;
    }
    return UINT64_MAX;
  }

  void CompPermIdx::Select(const uint64_t i, string& ret) {
    GetString(i - 1, ret);
  } 

  bool CompPermIdx::Membership(const string& str) {
    if (Rank(str) != UINT64_MAX) return true;
    return false;
  }

  void CompPermIdx::PrefixSearch(const string& prefix,
                                 vector<string>& ret) const {
    vector<string>().swap(ret);
    vector<uint8_t> query;
    query.push_back(kDelimiter);
    String2Uint8_Ts(prefix, query);
    uint64_t first, last;
    if (BackPermSearch(query, first, last)) {
      GetStringSet(first, last, ret);
    }
  }

  void CompPermIdx::SuffixSearch(const string& suffix,
                                 vector<string>& ret) const {
    vector<string>().swap(ret);
    vector<uint8_t> query;
    String2Uint8_Ts(suffix, query);
    query.push_back(kDelimiter);
    uint64_t first, last;
    if (BackPermSearch(query, first, last)) {
      GetStringSet(first, last, ret);
    }
  }

  void CompPermIdx::PrefixSuffixSearch(const string& prefix, 
                                       const string& suffix,
                                       vector<string>& ret) const {
    vector<string>().swap(ret);
    vector<uint8_t> query;
    String2Uint8_Ts(suffix, query);
    query.push_back(kDelimiter);
    String2Uint8_Ts(prefix, query);
    uint64_t first, last;
    if (BackPermSearch(query, first, last)) {
      GetStringSet(first, last, ret);
    }
  }

  void CompPermIdx::SubstringSearch(const string& substr,
                                    vector<string>& ret) const {
    vector<string>().swap(ret);
    vector<uint8_t> query;
    String2Uint8_Ts(substr, query);
    uint64_t first, last;
    if (BackPermSearch(query, first, last)) {
      GetStringSetWithFilter(first, last, ret);
    }
  }

  uint64_t CompPermIdx::Read(istream& is) {
    uint64_t bytes = 0;
    vector<uint64_t>(kMaxAlphabetSize).swap(C_);
    is.read((char*)&C_[0], sizeof(C_[0]) * kMaxAlphabetSize);
    is.read((char*)&NumKeys_, sizeof(NumKeys_));
    bytes += WT_.Read(is);
    bytes += sizeof(C_[0]) * C_.size();
    bytes += sizeof(NumKeys_);
    return bytes;
  }

  uint64_t CompPermIdx::Write(ostream& os) const {
    uint64_t bytes = 0;
    os.write((const char*)&C_[0], sizeof(C_[0]) * C_.size());
    os.write((const char*)&NumKeys_, sizeof(NumKeys_));
    bytes += WT_.Write(os);
    bytes += sizeof(C_[0]) * C_.size();
    bytes += sizeof(NumKeys_);
    return bytes;
  }

  void CompPermIdx::Clear() {
    vector<uint64_t>().swap(C_);
    WT_.Clear();
    NumKeys_ = 0;
  }

} // namespace cpi00
