/*
 * Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
 *
 * This file is part of Bowtie 2.
 *
 * Bowtie 2 is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Bowtie 2 is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Bowtie 2.  If not, see <http://www.gnu.org/licenses/>.
 */

#ifndef EBWT_H_
#define EBWT_H_

#include <stdint.h>
#include <string.h>
#include <iostream>
#include <fstream>
#include <sstream>
#include <memory>
#include <fcntl.h>
#include <math.h>
#include <errno.h>
#include <stdexcept>
#include <sys/stat.h>
#include <map>
#include <set>
#ifdef BOWTIE_MM
#include <sys/mman.h>
#include <sys/shm.h>
#endif
#include "shmem.h"
#include "alphabet.h"
#include "assert_helpers.h"
#include "bitpack.h"
#include "blockwise_sa.h"
#include "endian_swap.h"
#include "word_io.h"
#include "random_source.h"
#include "ref_read.h"
#include "threading.h"
#include "str_util.h"
#include "mm.h"
#include "timer.h"
#include "reference.h"
#include "search_globals.h"
#include "ds.h"
#include "random_source.h"
#include "mem_ids.h"
#include "btypes.h"
#include "taxonomy.h"

#ifdef POPCNT_CAPABILITY
#include "processor_support.h"
#endif

using namespace std;

// From ccnt_lut.cpp, automatically generated by gen_lookup_tables.pl
extern uint8_t cCntLUT_4[4][4][256];
extern uint8_t cCntLUT_4_rev[4][4][256];

static const uint64_t c_table[4] = {
    0xffffffffffffffff,
    0xaaaaaaaaaaaaaaaa,
    0x5555555555555555,
    0x0000000000000000
};

#ifndef VMSG_NL
#define VMSG_NL(...) \
if(this->verbose()) { \
	stringstream tmp; \
	tmp << __VA_ARGS__ << endl; \
	this->verbose(tmp.str()); \
}
#endif

#ifndef VMSG
#define VMSG(...) \
if(this->verbose()) { \
	stringstream tmp; \
	tmp << __VA_ARGS__; \
	this->verbose(tmp.str()); \
}
#endif

/**
 * Flags describing type of Ebwt.
 */
enum EBWT_FLAGS {
	EBWT_COLOR = 2,     // true -> Ebwt is colorspace
	EBWT_ENTIRE_REV = 4 // true -> reverse Ebwt is the whole
	                    // concatenated string reversed, rather than
						// each stretch reversed
};

/**
 * Extended Burrows-Wheeler transform header.  This together with the
 * actual data arrays and other text-specific parameters defined in
 * class Ebwt constitute the entire Ebwt.
 */
template <typename index_t = uint32_t>
class EbwtParams {

public:
	EbwtParams() { }

	EbwtParams(
		index_t len,
		int32_t lineRate,
		int32_t offRate,
		int32_t ftabChars,
		bool color,
		bool entireReverse)
	{
		init(len, lineRate, offRate, ftabChars, color, entireReverse);
	}

	EbwtParams(const EbwtParams& eh) {
		init(eh._len, eh._lineRate, eh._offRate,
		     eh._ftabChars, eh._color, eh._entireReverse);
	}

	void init(
		index_t len,
		int32_t lineRate,
		int32_t offRate,
		int32_t ftabChars,
		bool color,
		bool entireReverse)
	{
		_color = color;
		_entireReverse = entireReverse;
		_len = len;
		_bwtLen = _len + 1;
		_sz = (len+3)/4;
		_bwtSz = (len/4 + 1);
		_lineRate = lineRate;
		_origOffRate = offRate;
		_offRate = offRate;
		_offMask = std::numeric_limits<index_t>::max() << _offRate;
		_ftabChars = ftabChars;
		_eftabLen = _ftabChars*2;
		_eftabSz = _eftabLen*sizeof(index_t);
		_ftabLen = (1 << (_ftabChars*2))+1;
		_ftabSz = _ftabLen*sizeof(index_t);
		_offsLen = (_bwtLen + (1 << _offRate) - 1) >> _offRate;
		_offsSz = _offsLen*sizeof(index_t);
		_lineSz = 1 << _lineRate;
		_sideSz = _lineSz * 1 /* lines per side */;
		_sideBwtSz = _sideSz - (sizeof(index_t) * 4);
		_sideBwtLen = _sideBwtSz*4;
		_numSides = (_bwtSz+(_sideBwtSz)-1)/(_sideBwtSz);
		_numLines = _numSides * 1 /* lines per side */;
		_ebwtTotLen = _numSides * _sideSz;
		_ebwtTotSz = _ebwtTotLen;
		assert(repOk());
	}

	index_t len() const           { return _len; }
	index_t lenNucs() const       { return _len + (_color ? 1 : 0); }
	index_t bwtLen() const        { return _bwtLen; }
	index_t sz() const            { return _sz; }
	index_t bwtSz() const         { return _bwtSz; }
	int32_t lineRate() const      { return _lineRate; }
	int32_t origOffRate() const   { return _origOffRate; }
	int32_t offRate() const       { return _offRate; }
	index_t offMask() const       { return _offMask; }
	int32_t ftabChars() const     { return _ftabChars; }
	index_t eftabLen() const      { return _eftabLen; }
	index_t eftabSz() const       { return _eftabSz; }
	index_t ftabLen() const       { return _ftabLen; }
	index_t ftabSz() const        { return _ftabSz; }
	index_t offsLen() const       { return _offsLen; }
	index_t offsSz() const        { return _offsSz; }
	index_t lineSz() const        { return _lineSz; }
	index_t sideSz() const        { return _sideSz; }
	index_t sideBwtSz() const     { return _sideBwtSz; }
	index_t sideBwtLen() const    { return _sideBwtLen; }
	index_t numSides() const      { return _numSides; }
	index_t numLines() const      { return _numLines; }
	index_t ebwtTotLen() const    { return _ebwtTotLen; }
	index_t ebwtTotSz() const     { return _ebwtTotSz; }
	bool color() const            { return _color; }
	bool entireReverse() const    { return _entireReverse; }

	/**
	 * Set a new suffix-array sampling rate, which involves updating
	 * rate, mask, sample length, and sample size.
	 */
	void setOffRate(int __offRate) {
		_offRate = __offRate;
		_offMask = std::numeric_limits<index_t>::max() << _offRate;
		_offsLen = (_bwtLen + (1 << _offRate) - 1) >> _offRate;
		_offsSz = _offsLen*sizeof(index_t);
	}

#ifndef NDEBUG
	/// Check that this EbwtParams is internally consistent
	bool repOk() const {
		// assert_gt(_len, 0);
		assert_gt(_lineRate, 3);
		assert_geq(_offRate, 0);
		assert_leq(_ftabChars, 16);
		assert_geq(_ftabChars, 1);
        assert_lt(_lineRate, 32);
		assert_lt(_ftabChars, 32);
		assert_eq(0, _ebwtTotSz % _lineSz);
		return true;
	}
#endif

	/**
	 * Pretty-print the header contents to the given output stream.
	 */
	void print(ostream& out) const {
		out << "Headers:" << endl
		    << "    len: "          << _len << endl
		    << "    bwtLen: "       << _bwtLen << endl
		    << "    sz: "           << _sz << endl
		    << "    bwtSz: "        << _bwtSz << endl
		    << "    lineRate: "     << _lineRate << endl
		    << "    offRate: "      << _offRate << endl
		    << "    offMask: 0x"    << hex << _offMask << dec << endl
		    << "    ftabChars: "    << _ftabChars << endl
		    << "    eftabLen: "     << _eftabLen << endl
		    << "    eftabSz: "      << _eftabSz << endl
		    << "    ftabLen: "      << _ftabLen << endl
		    << "    ftabSz: "       << _ftabSz << endl
		    << "    offsLen: "      << _offsLen << endl
		    << "    offsSz: "       << _offsSz << endl
		    << "    lineSz: "       << _lineSz << endl
		    << "    sideSz: "       << _sideSz << endl
		    << "    sideBwtSz: "    << _sideBwtSz << endl
		    << "    sideBwtLen: "   << _sideBwtLen << endl
		    << "    numSides: "     << _numSides << endl
		    << "    numLines: "     << _numLines << endl
		    << "    ebwtTotLen: "   << _ebwtTotLen << endl
		    << "    ebwtTotSz: "    << _ebwtTotSz << endl
		    << "    color: "        << _color << endl
		    << "    reverse: "      << _entireReverse << endl;
	}

	index_t _len;
	index_t _bwtLen;
	index_t _sz;
	index_t _bwtSz;
	int32_t _lineRate;
	int32_t _origOffRate;
	int32_t _offRate;
	index_t _offMask;
	int32_t _ftabChars;
	index_t _eftabLen;
	index_t _eftabSz;
	index_t _ftabLen;
	index_t _ftabSz;
	index_t _offsLen;
	index_t _offsSz;
	index_t _lineSz;
	index_t _sideSz;
	index_t _sideBwtSz;
	index_t _sideBwtLen;
	index_t _numSides;
	index_t _numLines;
	index_t _ebwtTotLen;
	index_t _ebwtTotSz;
	bool     _color;
	bool     _entireReverse;
};

/**
 * Exception to throw when a file-realted error occurs.
 */
class EbwtFileOpenException : public std::runtime_error {
public:
	EbwtFileOpenException(const std::string& msg = "") :
		std::runtime_error(msg) { }
};

/**
 * Calculate size of file with given name.
 */
static inline int64_t fileSize(const char* name) {
	std::ifstream f;
	f.open(name, std::ios_base::binary | std::ios_base::in);
	if (!f.good() || f.eof() || !f.is_open()) { return 0; }
	f.seekg(0, std::ios_base::beg);
	std::ifstream::pos_type begin_pos = f.tellg();
	f.seekg(0, std::ios_base::end);
	return static_cast<int64_t>(f.tellg() - begin_pos);
}

/**
 * Encapsulates a location in the bwt text in terms of the side it
 * occurs in and its offset within the side.
 */
template <typename index_t = uint32_t>
struct SideLocus {
	SideLocus() :
	_sideByteOff(0),
	_sideNum(0),
	_charOff(0),
	_by(-1),
	_bp(-1) { }

	/**
	 * Construct from row and other relevant information about the Ebwt.
	 */
	SideLocus(index_t row, const EbwtParams<index_t>& ep, const uint8_t* ebwt) {
		initFromRow(row, ep, ebwt);
	}

	/**
	 * Init two SideLocus objects from a top/bot pair, using the result
	 * from one call to initFromRow to possibly avoid a second call.
	 */
	static void initFromTopBot(
		index_t top,
		index_t bot,
		const EbwtParams<index_t>& ep,
		const uint8_t* ebwt,
		SideLocus& ltop,
		SideLocus& lbot)
	{
		const index_t sideBwtLen = ep._sideBwtLen;
		assert_gt(bot, top);
		ltop.initFromRow(top, ep, ebwt);
		index_t spread = bot - top;
		// Many cache misses on the following lines
		if(ltop._charOff + spread < sideBwtLen) {
			lbot._charOff = ltop._charOff + spread;
			lbot._sideNum = ltop._sideNum;
			lbot._sideByteOff = ltop._sideByteOff;
			lbot._by = (int)(lbot._charOff >> 2);
			assert_lt(lbot._by, (int)ep._sideBwtSz);
			lbot._bp = lbot._charOff & 3;
		} else {
			lbot.initFromRow(bot, ep, ebwt);
		}
	}

	/**
	 * Calculate SideLocus based on a row and other relevant
	 * information about the shape of the Ebwt.
	 */
	void initFromRow(index_t row, const EbwtParams<index_t>& ep, const uint8_t* ebwt) {
		const index_t sideSz      = ep._sideSz;
		// Side length is hard-coded for now; this allows the compiler
		// to do clever things to accelerate / and %.
		_sideNum                  = row / ep._sideBwtLen;
		assert_lt(_sideNum, ep._numSides);
		_charOff                  = row % ep._sideBwtLen;
		_sideByteOff              = _sideNum * sideSz;
		assert_leq(row, ep._len);
		assert_leq(_sideByteOff + sideSz, ep._ebwtTotSz);
		// Tons of cache misses on the next line
		_by = (int)(_charOff >> 2); // byte within side
		assert_lt(_by, (int)ep._sideBwtSz);
		_bp = _charOff & 3;  // bit-pair within byte
	}
	
	/**
	 * Transform this SideLocus to refer to the next side (i.e. the one
	 * corresponding to the next side downstream).  Set all cursors to
	 * point to the beginning of the side.
	 */
	void nextSide(const EbwtParams<index_t>& ep) {
		assert(valid());
		_sideByteOff += ep.sideSz();
		_sideNum++;
		_by = _bp = _charOff = 0;
		assert(valid());
	}

	/**
	 * Return true iff this is an initialized SideLocus
	 */
	bool valid() const {
		if(_bp != -1) {
			return true;
		}
		return false;
	}
	
	/**
	 * Convert locus to BW row it corresponds to.
	 */
    index_t toBWRow() const;
	
#ifndef NDEBUG
	/**
	 * Check that SideLocus is internally consistent and consistent
	 * with the (provided) EbwtParams.
	 */
	bool repOk(const EbwtParams<index_t>& ep) const {
		ASSERT_ONLY(index_t row = toBWRow());
		assert_leq(row, ep._len);
		assert_range(-1, 3, _bp);
		assert_range(0, (int)ep._sideBwtSz, _by);
		return true;
	}
#endif

	/// Make this look like an invalid SideLocus
	void invalidate() {
		_bp = -1;
	}

	/**
	 * Return a read-only pointer to the beginning of the top side.
	 */
	const uint8_t *side(const uint8_t* ebwt) const {
		return ebwt + _sideByteOff;
	}
    
    /**
	 * Return a read-only pointer to the beginning of the top side.
	 */
	const uint8_t *next_side(const EbwtParams<index_t>& ep, const uint8_t* ebwt) const {
        if(_sideByteOff + ep._sideSz < ep._ebwtTotSz) {
            return ebwt + _sideByteOff + ep._sideSz;
        } else {
            return NULL;
        }
	}
    
	index_t _sideByteOff; // offset of top side within ebwt[]
	index_t _sideNum;     // index of side
	index_t _charOff;     // character offset within side
	int32_t _by;          // byte within side (not adjusted for bw sides)
	int32_t _bp;          // bitpair within byte (not adjusted for bw sides)
};

/**
 * Convert locus to BW row it corresponds to.
 */
template <typename index_t>
inline index_t SideLocus<index_t>::toBWRow() const {
    if(sizeof(index_t) == 8) {
        return _sideNum * (512 - 16 * sizeof(index_t)) + _charOff;
    } else {
        return _sideNum * (256 - 16 * sizeof(index_t)) + _charOff;
    }
}

template <>
inline uint64_t SideLocus<uint64_t>::toBWRow() const {
    return _sideNum * (512 - 16 * sizeof(uint64_t)) + _charOff;
}

template <>
inline uint32_t SideLocus<uint32_t>::toBWRow() const {
    return _sideNum * (256 - 16 * sizeof(uint32_t)) + _charOff;
}

template <>
inline uint16_t SideLocus<uint16_t>::toBWRow() const {
    return _sideNum * (256 - 16 * sizeof(uint16_t)) + _charOff;
}

#ifdef POPCNT_CAPABILITY   // wrapping of "struct"
struct USE_POPCNT_GENERIC {
#endif
    // Use this standard bit-bashing population count
    inline static int pop64(uint64_t x) {
        // Lots of cache misses on following lines (>10K)
        x = x - ((x >> 1) & 0x5555555555555555llu);
        x = (x & 0x3333333333333333llu) + ((x >> 2) & 0x3333333333333333llu);
        x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0Fllu;
        x = x + (x >> 8);
        x = x + (x >> 16);
        x = x + (x >> 32);
        return (int)(x & 0x3Fllu);
    }
#ifdef POPCNT_CAPABILITY  // wrapping a "struct"
};
#endif

#ifdef POPCNT_CAPABILITY
struct USE_POPCNT_INSTRUCTION {
    inline static int pop64(uint64_t x) {
        int64_t count;
        asm ("popcntq %[x],%[count]\n": [count] "=&r" (count): [x] "r" (x));
        return (int)count;
    }
};
#endif

/**
 * Tricky-bit-bashing bitpair counting for given two-bit value (0-3)
 * within a 64-bit argument.
 */
#ifdef POPCNT_CAPABILITY
template<typename Operation>
#endif
inline static int countInU64(int c, uint64_t dw) {
    uint64_t c0 = c_table[c];
	uint64_t x0 = dw ^ c0;
    uint64_t x1 = (x0 >> 1);
    uint64_t x2 = x1 & (0x5555555555555555);
    uint64_t x3 = x0 & x2;
#ifdef POPCNT_CAPABILITY
    uint64_t tmp = Operation().pop64(x3);
#else
    uint64_t tmp = pop64(x3);
#endif
    return (int) tmp;
}

// Forward declarations for Ebwt class
class EbwtSearchParams;

/**
 * Extended Burrows-Wheeler transform data.
 *
 * An Ebwt may be transferred to and from RAM with calls to
 * evictFromMemory() and loadIntoMemory().  By default, a newly-created
 * Ebwt is not loaded into memory; if the user would like to use a
 * newly-created Ebwt to answer queries, they must first call
 * loadIntoMemory().
 */
template <class index_t = uint32_t>
class Ebwt {
public:
	#define Ebwt_INITS \
	    _toBigEndian(currentlyBigEndian()), \
	    _overrideOffRate(overrideOffRate), \
	    _verbose(verbose), \
	    _passMemExc(passMemExc), \
	    _sanity(sanityCheck), \
	    fw_(fw), \
	    _in1(NULL), \
	    _in2(NULL), \
	    _zOff(std::numeric_limits<index_t>::max()), \
	    _zEbwtByteOff(std::numeric_limits<index_t>::max()), \
	    _zEbwtBpOff(-1), \
	    _nPat(0), \
	    _nFrag(0), \
	    _plen(EBWT_CAT), \
	    _rstarts(EBWT_CAT), \
	    _fchr(EBWT_CAT), \
	    _ftab(EBWT_CAT), \
	    _eftab(EBWT_CAT), \
        _offw(false), \
	    _offs(EBWT_CAT), \
        _offsw(EBWT_CAT), \
	    _ebwt(EBWT_CAT), \
	    _useMm(false), \
	    useShmem_(false), \
	    _refnames(EBWT_CAT), \
	    mmFile1_(NULL), \
	    mmFile2_(NULL), \
        _compressed(false), \
	   _boundaryCheck( 1 ) 

	/// Construct an Ebwt from the given input file
	Ebwt(const string& in,
	     int color,
		 int needEntireReverse,
	     bool fw,
	     int32_t overrideOffRate, // = -1,
	     int32_t offRatePlus, // = -1,
	     bool useMm, // = false,
	     bool useShmem, // = false,
	     bool mmSweep, // = false,
	     bool loadNames, // = false,
		 bool loadSASamp, // = true,
		 bool loadFtab, // = true,
		 bool loadRstarts, // = true,
	     bool verbose, // = false,
	     bool startVerbose, // = false,
	     bool passMemExc, // = false,
	     bool sanityCheck, // = false)
		 bool skipLoading = false) : 
	     Ebwt_INITS
	{
		assert(!useMm || !useShmem);

#ifdef POPCNT_CAPABILITY
		ProcessorSupport ps;
		_usePOPCNTinstruction = ps.POPCNTenabled();
#endif

		packed_ = false;
		_useMm = useMm;
		useShmem_ = useShmem;
		_in1Str = in + ".1." + gEbwt_ext;
		_in2Str = in + ".2." + gEbwt_ext;

		if(!skipLoading) {
			readIntoMemory(
					color,       // expect index to be colorspace?
					fw ? -1 : needEntireReverse, // need REF_READ_REVERSE
					loadSASamp,  // load the SA sample portion?
					loadFtab,    // load the ftab & eftab?
					loadRstarts, // load the rstarts array?
					true,        // stop after loading the header portion?
					&_eh,        // params
					mmSweep,     // mmSweep
					loadNames,   // loadNames
					startVerbose); // startVerbose
			// If the offRate has been overridden, reflect that in the
			// _eh._offRate field
			if(offRatePlus > 0 && _overrideOffRate == -1) {
				_overrideOffRate = _eh._offRate + offRatePlus;
			}
			if(_overrideOffRate > _eh._offRate) {
				_eh.setOffRate(_overrideOffRate);
				assert_eq(_overrideOffRate, _eh._offRate);
			}
			assert(repOk());
		}

		// Read conversion table, genome size table, and taxonomy tree
		string in3Str = in + ".3." + gEbwt_ext;
		if(verbose || startVerbose) cerr << "Opening \"" << in3Str.c_str() << "\"" << endl;
		ifstream in3(in3Str.c_str(), ios::binary);
		if(!in3.good()) {
			cerr << "Could not open index file " << in3Str.c_str() << endl;
		}

		initial_tax_rank_num();

		set<uint64_t> leaves;
		size_t num_cids = 0; // number of compressed sequences
		_uid_to_tid.clear();
		readU32(in3, this->toBe());
		uint64_t nref = readIndex<uint64_t>(in3, this->toBe());
		if(nref > 0) {
			while(!in3.eof()) {
				string uid;
				uint64_t tid;
				while(true) {
					char c = '\0';
					in3 >> c;
					if(c == '\0' || c == '\n') break;
					uid.push_back(c);
				}
				if(uid.find("cid") == 0) {
					num_cids++;
				}
				tid = readIndex<uint64_t>(in3, this->toBe());
				_uid_to_tid.expand();
				_uid_to_tid.back().first = uid;
				_uid_to_tid.back().second = tid;
				leaves.insert(tid);
				if(nref == _uid_to_tid.size()) break;
			}
			assert_eq(nref, _uid_to_tid.size());
		}

		if(num_cids >= 10) {
			this->_compressed = true;
		}

		_tree.clear();
		uint64_t ntid = readIndex<uint64_t>(in3, this->toBe());
		if(ntid > 0) {
			while(!in3.eof()) {
				TaxonomyNode node;
				uint64_t tid = readIndex<uint64_t>(in3, this->toBe());
				node.parent_tid = readIndex<uint64_t>(in3, this->toBe());
				node.rank = readIndex<uint16_t>(in3, this->toBe());
				node.leaf = (leaves.find(tid) != leaves.end());
				_tree[tid] = node;
				if(ntid == _tree.size()) break;
			}
			assert_eq(ntid, _tree.size());
		}

		_name.clear();
		uint64_t nname = readIndex<uint64_t>(in3, this->toBe());
		if(nname > 0) {
			string name;
			while(!in3.eof()) {
				uint64_t tid = readIndex<uint64_t>(in3, this->toBe());
				in3 >> name;
				in3.seekg(1, ios_base::cur);
				assert(_name.find(tid) == _name.end());
				std::replace(name.begin(), name.end(), '@', ' ');
				_name[tid] = name;
				if(_name.size() == nname)
					break;
			}
		}

		_size.clear();
		uint64_t nsize = readIndex<uint64_t>(in3, this->toBe());
		if(nsize > 0) {
			while(!in3.eof()) {
				uint64_t tid = readIndex<uint64_t>(in3, this->toBe());
				uint64_t size = readIndex<uint64_t>(in3, this->toBe());
				assert(_size.find(tid) == _size.end());
				_size[tid] = size;
				if(_size.size() == nsize)
					break;
			}
		}

		// Calculate average genome size
		if(1) { //!this->_offw) { // Skip if there are many sequences (e.g. >64K).
			// Now always compute the genome length.
			map<uint64_t, uint64_t> tid_count;
			map<uint64_t, uint64_t> new_size ;
			
			for (map<uint64_t, uint64_t>::const_iterator size_itr = _size.begin() ; size_itr != _size.end() ; ++size_itr) {
				uint64_t c_tid = size_itr->first ;
				if (_tree.find(c_tid) == _tree.end() || _tree[c_tid].parent_tid == c_tid)	{
					continue ;
				}
				uint64_t add_size = size_itr->second ;
				const TaxonomyNode& size_node = _tree[c_tid];
				if (!((size_node.rank == RANK_UNKNOWN && size_node.leaf)
						|| tax_rank_num[size_node.rank] < tax_rank_num[RANK_SPECIES])
						|| size_node.parent_tid == c_tid)
					continue ;
				c_tid = _tree[c_tid].parent_tid ;
				while (true) {
					map<uint64_t, TaxonomyNode>::const_iterator tree_itr = _tree.find(c_tid) ;
					if(tree_itr == _tree.end())
						break;
					const TaxonomyNode& node = tree_itr->second;
					if(node.rank == RANK_SPECIES || node.rank == RANK_GENUS || node.rank == RANK_FAMILY ||
							node.rank == RANK_ORDER || node.rank == RANK_CLASS || node.rank == RANK_PHYLUM) {
						new_size[c_tid] += add_size ;
						++tid_count[c_tid] ;
					}
					if(c_tid == tree_itr->second.parent_tid)
						break;
					c_tid = tree_itr->second.parent_tid;
				}
			}
			for (map<uint64_t, uint64_t>::const_iterator count_itr = tid_count.begin() ; count_itr != tid_count.end() ; ++count_itr) {
				_size[count_itr->first]	= new_size[count_itr->first] / count_itr->second ;
			}


			/*for(map<uint64_t, TaxonomyNode>::const_iterator tree_itr = _tree.begin(); tree_itr != _tree.end(); tree_itr++) {
				uint64_t tid = tree_itr->first;
				const TaxonomyNode& node = tree_itr->second;
				if(node.rank == RANK_SPECIES || node.rank == RANK_GENUS || node.rank == RANK_FAMILY ||
						node.rank == RANK_ORDER || node.rank == RANK_CLASS || node.rank == RANK_PHYLUM) {
					size_t sum = 0, count = 0;
					for(map<uint64_t, uint64_t>::const_iterator size_itr = _size.begin(); size_itr != _size.end(); size_itr++) {
						uint64_t c_tid = size_itr->first;
						map<uint64_t, TaxonomyNode>::const_iterator tree_itr2 = _tree.find(c_tid);
						if(tree_itr2 == _tree.end())
							continue;

						assert(tree_itr2 != _tree.end());
						const TaxonomyNode& c_node = tree_itr2->second;
						if((c_node.rank == RANK_UNKNOWN && c_node.leaf) ||
								tax_rank_num[c_node.rank] < tax_rank_num[RANK_SPECIES]) {
							c_tid = c_node.parent_tid;
							while(true) {
								if(c_tid == tid) {
									sum += size_itr->second;
									count += 1;
									break;
								}
								tree_itr2 = _tree.find(c_tid);
								if(tree_itr2 == _tree.end())
									break;
								if(c_tid == tree_itr2->second.parent_tid)
									break;
								c_tid = tree_itr2->second.parent_tid;
							}
						}
					}
					if(count > 0) {
						_size[tid] = sum / count;
					}
				}
			}*/
		}
		_paths.buildPaths(_uid_to_tid, _tree);

		in3.close();

		// Read in the information provided by Li. The SA coordinate that corresponds to the start of genome.
		string in4Str = in + ".4." + gEbwt_ext;
		if(verbose || startVerbose) cerr << "Opening \"" << in4Str.c_str() << "\"" << endl;
		ifstream in4(in4Str.c_str(), ios::binary);
		if(!in4.good()) {
			if(verbose || startVerbose) cerr << "Could not open index file " << in4Str.c_str() << endl;
		}
		else
		{
			readU32(in4, this->toBe());

			_saGenomeBoundary.clear() ;
			nsize = readIndex<uint64_t>( in4, this->toBe() ) ;
			//cout<<nsize<<" "<<_uid_to_tid.size()<<endl ;
			
			_lastGenomeBoundary = 0 ;
			if ( nsize > 0 )
			{
				uint64_t t ;
				for ( t = 0 ; t < nsize ; ++t )
				{
					uint64_t saCoord = readIndex<uint64_t>( in4, this->toBe() ) ;
					uint32_t refIdx = readIndex<uint32_t>( in4, this->toBe() ) ;
					/*string uid;
					  while(true) {
					  char c = '\0';
					  in4 >> c;
					  if(c == '\0' || c == '\n') break;
					  uid.push_back(c);
					  }*/
					//cout<<saCoord<<" "<<uid<<" "<< uidStrToIdx[ uid ] <<endl ;
					_saGenomeBoundary[ saCoord ] = refIdx ;

					if ( saCoord > _lastGenomeBoundary )
						_lastGenomeBoundary = saCoord ;
				}

				_boundaryCheckShift = 8 ;
				while ( 1 )	
				{	
					uint64_t blockSize = ((uint64_t)1)<<_boundaryCheckShift ;
					if ( blockSize > _lastGenomeBoundary + 1 )
						break ;
					uint64_t blockCnt =  ( _lastGenomeBoundary + 1 ) / blockSize + 1 ;
					if ( blockCnt < 100 * nsize )
					{
						if ( _boundaryCheckShift > 8 )
							--_boundaryCheckShift ;
						break ;
					}

					++_boundaryCheckShift ;
				}

				//cout<<nsize<<" "<<_lastGenomeBoundary<<" "<<_boundaryCheckShift<<endl ;

				_boundaryCheck.resize( ( ( _lastGenomeBoundary + 1 ) >> _boundaryCheckShift ) + 1 ) ;
				_boundaryCheck.reset() ;
				for ( std::map<uint64_t, uint32_t>::iterator it = _saGenomeBoundary.begin() ; it != _saGenomeBoundary.end() ; ++it )
				{
					_boundaryCheck.set( (it->first) >> _boundaryCheckShift ) ;
				}
			}
		}
		in4.close() ;
	}
	
	/// Construct an Ebwt from the given header parameters and string
	/// vector, optionally using a blockwise suffix sorter with the
	/// given 'bmax' and 'dcv' parameters.  The string vector is
	/// ultimately joined and the joined string is passed to buildToDisk().
	Ebwt(
		 bool packed,
		 int color,
		 int needEntireReverse,
		 int32_t lineRate,
		 int32_t offRate,
		 int32_t ftabChars,
		 const string& file,   // base filename for EBWT files
		 bool fw,
		 int dcv,
		 EList<RefRecord>& szs,
		 index_t sztot,
		 const RefReadInParams& refparams,
		 uint32_t seed,
		 int32_t overrideOffRate = -1,
		 bool verbose = false,
		 bool passMemExc = false,
		 bool sanityCheck = false) :
	Ebwt_INITS,
	_eh(
		joinedLen(szs),
		lineRate,
		offRate,
		ftabChars,
		color,
		refparams.reverse == REF_READ_REVERSE)
	{
#ifdef POPCNT_CAPABILITY
        ProcessorSupport ps;
        _usePOPCNTinstruction = ps.POPCNTenabled();
#endif
		packed_ = packed;
	}

	/// Construct an Ebwt from the given header parameters and string
	/// vector, optionally using a blockwise suffix sorter with the
	/// given 'bmax' and 'dcv' parameters.  The string vector is
	/// ultimately joined and the joined string is passed to buildToDisk().
	template<typename TStr>
	Ebwt(
         TStr& s,
         bool packed,
         int color,
         int needEntireReverse,
         int32_t lineRate,
         int32_t offRate,
         int32_t ftabChars,
         const string& file,   // base filename for EBWT files
         bool fw,
         bool useBlockwise,
         index_t bmax,
         index_t bmaxSqrtMult,
         index_t bmaxDivN,
         int dcv,
         int nthreads,
         EList<FileBuf*>& is,
         EList<RefRecord>& szs,
         index_t sztot,
         const string& conversion_table_fname,
         const string& taxonomy_fname,
         const string& name_table_fname,
         const string& size_table_fname,
         const RefReadInParams& refparams,
         uint32_t seed,
         int32_t overrideOffRate = -1,
         bool doSaFile = false,
         bool doBwtFile = false,
         int kmer_size = 0,
         bool verbose = false,
         bool passMemExc = false,
         bool sanityCheck = false) :
    Ebwt_INITS,
    _eh(
        joinedLen(szs),
        lineRate,
        offRate,
        ftabChars,
        color,
        refparams.reverse == REF_READ_REVERSE)
	{
#ifdef POPCNT_CAPABILITY
        ProcessorSupport ps;
        _usePOPCNTinstruction = ps.POPCNTenabled();
#endif
		_in1Str = file + ".1." + gEbwt_ext;
		_in2Str = file + ".2." + gEbwt_ext;
		packed_ = packed;
		// Open output files
		ofstream fout1(_in1Str.c_str(), ios::binary);
		if(!fout1.good()) {
			cerr << "Could not open index file for writing: \"" << _in1Str.c_str() << "\"" << endl
			     << "Please make sure the directory exists and that permissions allow writing by" << endl
			     << "Bowtie." << endl;
			throw 1;
		}
		ofstream fout2(_in2Str.c_str(), ios::binary);
		if(!fout2.good()) {
			cerr << "Could not open index file for writing: \"" << _in2Str.c_str() << "\"" << endl
			     << "Please make sure the directory exists and that permissions allow writing by" << endl
			     << "Bowtie." << endl;
			throw 1;
		}
        _inSaStr = file + ".sa";
        _inBwtStr = file + ".bwt";
        ofstream *saOut = NULL, *bwtOut = NULL;
        if(doSaFile) {
            saOut = new ofstream(_inSaStr.c_str(), ios::binary);
            if(!saOut->good()) {
                cerr << "Could not open suffix-array file for writing: \"" << _inSaStr.c_str() << "\"" << endl
                << "Please make sure the directory exists and that permissions allow writing by" << endl
                << "Bowtie." << endl;
                throw 1;
            }
        }
        if(doBwtFile) {
            bwtOut = new ofstream(_inBwtStr.c_str(), ios::binary);
            if(!bwtOut->good()) {
                cerr << "Could not open suffix-array file for writing: \"" << _inBwtStr.c_str() << "\"" << endl
                << "Please make sure the directory exists and that permissions allow writing by" << endl
                << "Bowtie." << endl;
                throw 1;
            }
        }
		// Build
		initFromVector<TStr>(
							 s,
							 is,
							 szs,
							 sztot,
							 refparams,
							 fout1,
							 fout2,
                             saOut,
                             bwtOut,
                             kmer_size,
                             file,
                             conversion_table_fname,
                             taxonomy_fname,
                             name_table_fname,
                             size_table_fname,
							 useBlockwise,
							 bmax,
							 bmaxSqrtMult,
							 bmaxDivN,
							 dcv,
                             nthreads,
							 seed,
							 verbose);
		// Close output files
		fout1.flush();
		int64_t tellpSz1 = (int64_t)fout1.tellp();
		VMSG_NL("Wrote " << fout1.tellp() << " bytes to primary EBWT file: " << _in1Str.c_str());
		fout1.close();
		bool err = false;
		if(tellpSz1 > fileSize(_in1Str.c_str())) {
			err = true;
			cerr << "Index is corrupt: File size for " << _in1Str.c_str() << " should have been " << tellpSz1
			     << " but is actually " << fileSize(_in1Str.c_str()) << "." << endl;
		}
		fout2.flush();
		int64_t tellpSz2 = (int64_t)fout2.tellp();
		VMSG_NL("Wrote " << fout2.tellp() << " bytes to secondary EBWT file: " << _in2Str.c_str());
		fout2.close();
		if(tellpSz2 > fileSize(_in2Str.c_str())) {
			err = true;
			cerr << "Index is corrupt: File size for " << _in2Str.c_str() << " should have been " << tellpSz2
			     << " but is actually " << fileSize(_in2Str.c_str()) << "." << endl;
		}
        if(saOut != NULL) {
            // Check on suffix array output file size
            int64_t tellpSzSa = (int64_t)saOut->tellp();
            VMSG_NL("Wrote " << tellpSzSa << " bytes to suffix-array file: " << _inSaStr.c_str());
            saOut->close();
            if(tellpSzSa > fileSize(_inSaStr.c_str())) {
                err = true;
                cerr << "Index is corrupt: File size for " << _inSaStr.c_str() << " should have been " << tellpSzSa
                << " but is actually " << fileSize(_inSaStr.c_str()) << "." << endl;
            }
        }
        if(bwtOut != NULL) {
            // Check on suffix array output file size
            int64_t tellpSzBwt = (int64_t)bwtOut->tellp();
            VMSG_NL("Wrote " << tellpSzBwt << " bytes to BWT file: " << _inBwtStr.c_str());
            bwtOut->close();
            if(tellpSzBwt > fileSize(_inBwtStr.c_str())) {
                err = true;
                cerr << "Index is corrupt: File size for " << _inBwtStr.c_str() << " should have been " << tellpSzBwt
                << " but is actually " << fileSize(_inBwtStr.c_str()) << "." << endl;
            }
        }
		if(err) {
			cerr << "Please check if there is a problem with the disk or if disk is full." << endl;
			throw 1;
		}
		// Reopen as input streams
		VMSG_NL("Re-opening _in1 and _in2 as input streams");
		if(_sanity) {
			VMSG_NL("Sanity-checking Bt2");
			assert(!isInMemory());
			readIntoMemory(
				color,                       // colorspace?
				fw ? -1 : needEntireReverse, // 1 -> need the reverse to be reverse-of-concat
				true,                        // load SA sample (_offs[])?
				true,                        // load ftab (_ftab[] & _eftab[])?
				true,                        // load r-starts (_rstarts[])?
				false,                       // just load header?
				NULL,                        // Params object to fill
				false,                       // mm sweep?
				true,                        // load names?
				false);                      // verbose startup?
			// sanityCheckAll(refparams.reverse);
			evictFromMemory();
			assert(!isInMemory());
		}
		VMSG_NL("Returning from Ebwt constructor");
	}
	
	/**
	 * Static constructor for a pair of forward/reverse indexes for the
	 * given reference string.
	 */
	template<typename TStr>
	static pair<Ebwt*, Ebwt*>
	fromString(
		const char* str,
		bool packed,
		int color,
		int reverse,
		bool bigEndian,
		int32_t lineRate,
		int32_t offRate,
		int32_t ftabChars,
		const string& file,
		bool useBlockwise,
		index_t bmax,
		index_t bmaxSqrtMult,
		index_t bmaxDivN,
		int dcv,
		uint32_t seed,
		bool verbose,
		bool autoMem,
		bool sanity)
	{
		EList<std::string> strs(EBWT_CAT);
		strs.push_back(std::string(str));
		return fromStrings<TStr>(
			strs,
			packed,
			color,
			reverse,
			bigEndian,
			lineRate,
			offRate,
			ftabChars,
			file,
			useBlockwise,
			bmax,
			bmaxSqrtMult,
			bmaxDivN,
			dcv,
			seed,
			verbose,
			autoMem,
			sanity);
	}
	
	/**
	 * Static constructor for a pair of forward/reverse indexes for the
	 * given list of reference strings.
	 */
	template<typename TStr>
	static pair<Ebwt*, Ebwt*>
	fromStrings(
		const EList<std::string>& strs,
		bool packed,
		int color,
		int reverse,
		bool bigEndian,
		int32_t lineRate,
		int32_t offRate,
		int32_t ftabChars,
		const string& file,
		bool useBlockwise,
		index_t bmax,
		index_t bmaxSqrtMult,
		index_t bmaxDivN,
		int dcv,
		uint32_t seed,
		bool verbose,
		bool autoMem,
		bool sanity)
	{
        assert(!strs.empty());
		EList<FileBuf*> is(EBWT_CAT);
		RefReadInParams refparams(color, REF_READ_FORWARD, false, false);
		// Adapt sequence strings to stringstreams open for input
		unique_ptr<stringstream> ss(new stringstream());
		for(index_t i = 0; i < strs.size(); i++) {
			(*ss) << ">" << i << endl << strs[i] << endl;
		}
		unique_ptr<FileBuf> fb(new FileBuf(ss.get()));
		assert(!fb->eof());
		assert(fb->get() == '>');
		ASSERT_ONLY(fb->reset());
		assert(!fb->eof());
		is.push_back(fb.get());
		// Vector for the ordered list of "records" comprising the input
		// sequences.  A record represents a stretch of unambiguous
		// characters in one of the input sequences.
		EList<RefRecord> szs(EBWT_CAT);
		std::pair<index_t, index_t> sztot;
		sztot = BitPairReference::szsFromFasta(is, file, bigEndian, refparams, szs, sanity);
		// Construct Ebwt from input strings and parameters
		Ebwt<index_t> *ebwtFw = new Ebwt<index_t>(
												  TStr(),
												  packed,
												  refparams.color ? 1 : 0,
												  -1,           // fw
												  lineRate,
												  offRate,      // suffix-array sampling rate
												  ftabChars,    // number of chars in initial arrow-pair calc
												  file,         // basename for .?.ebwt files
												  true,         // fw?
												  useBlockwise, // useBlockwise
												  bmax,         // block size for blockwise SA builder
												  bmaxSqrtMult, // block size as multiplier of sqrt(len)
												  bmaxDivN,     // block size as divisor of len
												  dcv,          // difference-cover period
												  is,           // list of input streams
												  szs,          // list of reference sizes
												  sztot.first,  // total size of all unambiguous ref chars
												  refparams,    // reference read-in parameters
												  seed,         // pseudo-random number generator seed
												  -1,           // override offRate
												  verbose,      // be talkative
												  autoMem,      // pass exceptions up to the toplevel so that we can adjust memory settings automatically
												  sanity);      // verify results and internal consistency
		refparams.reverse = reverse;
		szs.clear();
		sztot = BitPairReference::szsFromFasta(is, file, bigEndian, refparams, szs, sanity);
		// Construct Ebwt from input strings and parameters
		Ebwt<index_t> *ebwtBw = new Ebwt<index_t>(
												  TStr(),
												  packed,
												  refparams.color ? 1 : 0,
												  reverse == REF_READ_REVERSE,
												  lineRate,
												  offRate,      // suffix-array sampling rate
												  ftabChars,    // number of chars in initial arrow-pair calc
												  file + ".rev",// basename for .?.ebwt files
												  false,        // fw?
												  useBlockwise, // useBlockwise
												  bmax,         // block size for blockwise SA builder
												  bmaxSqrtMult, // block size as multiplier of sqrt(len)
												  bmaxDivN,     // block size as divisor of len
												  dcv,          // difference-cover period
												  is,           // list of input streams
												  szs,          // list of reference sizes
												  sztot.first,  // total size of all unambiguous ref chars
												  refparams,    // reference read-in parameters
												  seed,         // pseudo-random number generator seed
												  -1,           // override offRate
												  verbose,      // be talkative
												  autoMem,      // pass exceptions up to the toplevel so that we can adjust memory settings automatically
												  sanity);      // verify results and internal consistency
		return make_pair(ebwtFw, ebwtBw);
	}
	
	/// Return true iff the Ebwt is packed
	bool isPacked() { return packed_; }

	/**
	 * Write the rstarts array given the szs array for the reference.
	 */
	void szsToDisk(const EList<RefRecord>& szs, ostream& os, int reverse);
	
	/**
	 * Helper for the constructors above.  Takes a vector of text
	 * strings and joins them into a single string with a call to
	 * joinToDisk, which does a join (with padding) and writes some of
	 * the resulting data directly to disk rather than keep it in
	 * memory.  It then constructs a suffix-array producer (what kind
	 * depends on 'useBlockwise') for the resulting sequence.  The
	 * suffix-array producer can then be used to obtain chunks of the
	 * joined string's suffix array.
	 */
	template <typename TStr>
	void initFromVector(TStr& s,
						EList<FileBuf*>& is,
	                    EList<RefRecord>& szs,
	                    index_t sztot,
	                    const RefReadInParams& refparams,
	                    ofstream& out1,
	                    ofstream& out2,
                        ofstream* saOut,
                        ofstream* bwtOut,
                        int kmer_size,
                        const string& base_fname,
                        const string& conversion_table_fname,
                        const string& taxonomy_fname,
                        const string& size_table_fname,
                        const string& name_table_fname,
	                    bool useBlockwise,
	                    index_t bmax,
	                    index_t bmaxSqrtMult,
	                    index_t bmaxDivN,
	                    int dcv,
                        int nthreads,
	                    uint32_t seed,
						bool verbose)
	{
		// Compose text strings into single string
		VMSG_NL("Calculating joined length");
		index_t jlen;
		jlen = joinedLen(szs);
		assert_geq(jlen, sztot);
		VMSG_NL("Writing header");
		writeFromMemory(true, out1, out2);
		try {
			VMSG_NL("Reserving space for joined string");
			s.resize(jlen);
			VMSG_NL("Joining reference sequences");
			if(refparams.reverse == REF_READ_REVERSE) {
				{
					Timer timer(cout, "  Time to join reference sequences: ", _verbose);
					joinToDisk(is, szs, sztot, refparams, s, out1, out2);
				} {
					Timer timer(cout, "  Time to reverse reference sequence: ", _verbose);
					EList<RefRecord> tmp(EBWT_CAT);
					s.reverse();
					reverseRefRecords(szs, tmp, false, verbose);
					szsToDisk(tmp, out1, refparams.reverse);
				}
			} else {
				Timer timer(cout, "  Time to join reference sequences: ", _verbose);
				joinToDisk(is, szs, sztot, refparams, s, out1, out2);
				szsToDisk(szs, out1, refparams.reverse);
			}
			// Joined reference sequence now in 's'
		} catch(bad_alloc& e) {
			// If we throw an allocation exception in the try block,
			// that means that the joined version of the reference
			// string itself is too larger to fit in memory.  The only
			// alternatives are to tell the user to give us more memory
			// or to try again with a packed representation of the
			// reference (if we haven't tried that already).
			cerr << "Could not allocate space for a joined string of " << jlen << " elements." << endl;
			if(!isPacked() && _passMemExc) {
				// Pass the exception up so that we can retry using a
				// packed string representation
				throw e;
			}
			// There's no point passing this exception on.  The fact
			// that we couldn't allocate the joined string means that
			// --bmax is irrelevant - the user should re-run with
			// ebwt-build-packed
			if(isPacked()) {
				cerr << "Please try running centrifuge-build on a computer with more memory." << endl;
			} else {
				cerr << "Please try running centrifuge-build in packed mode (-p/--packed) or in automatic" << endl
				     << "mode (-a/--auto), or try again on a computer with more memory." << endl;
			}
			if(sizeof(void*) == 4) {
				cerr << "If this computer has more than 4 GB of memory, try using a 64-bit executable;" << endl
				     << "this executable is 32-bit." << endl;
			}
			throw 1;
		}
        
        this->_offw = this->_nPat > std::numeric_limits<uint16_t>::max();
        
        std::set<string> uids;
        for(size_t i = 0; i < _refnames.size(); i++) {
            const string& refname = _refnames[i];
            string uid = get_uid(refname);
            uids.insert(uid);
        }
	
	
        std::map<string, uint64_t> uid_to_tid; // map from unique id to taxonomy id
        {
            ifstream table_file(conversion_table_fname.c_str(), ios::in);
            if(table_file.is_open()) {
                while(!table_file.eof()) {
                    string uid;
                    table_file >> uid;
                    if(uid.length() == 0 || uid[0] == '#') continue;
                    string stid;
                    table_file >> stid;
                    uint64_t tid = get_tid(stid);
                    if(uids.find(uid) == uids.end()) continue;
                    if(uid_to_tid.find(uid) != uid_to_tid.end()) {
						if(uid_to_tid[uid] != tid) {
							cerr << "Warning: Diverging taxonomy IDs for " << uid << " in " << conversion_table_fname << ": "
                                 << uid_to_tid[uid] << " and " << tid << ". Taking first. " << endl;
						}
                        continue;
                    }
                    uid_to_tid[uid] = tid;
                }
                table_file.close();
            } else {
                cerr << "Error: " << conversion_table_fname << " doesn't exist!" << endl;
                throw 1;
            }
        }
        // Open output stream for the '.3.cf' file which will hold conversion table and taxonomy tree
        string fname3 = base_fname + ".3." + gEbwt_ext;
        ofstream fout3(fname3.c_str(), ios::binary);
        if(!fout3.good()) {
            cerr << "Could not open index file for writing: \"" << fname3 << "\"" << endl
            << "Please make sure the directory exists and that permissions allow writing by Centrifuge" << endl;
            throw 1;
        }
        std::set<uint64_t> tids;
        writeIndex<int32_t>(fout3, 1, this->toBe()); // endianness sentinel
        writeIndex<uint64_t>(fout3, _refnames.size(), this->toBe());
        for(size_t i = 0; i < _refnames.size(); i++) {
            const string& refname = _refnames[i];
            string uid = get_uid(refname);
            for(size_t c = 0; c < uid.length(); c++) {
                fout3 << uid[c];
            }
            fout3 << '\0';
            if(uid_to_tid.find(uid) != uid_to_tid.end()) {
                uint64_t tid = uid_to_tid[uid];
                writeIndex<uint64_t>(fout3, tid, this->toBe());
                tids.insert(tid);
            } else {
                cerr << "Warning: taxonomy id doesn't exists for " << uid << "!" << endl;
                writeIndex<uint64_t>(fout3, 0, this->toBe());
            }
        }

        // Read taxonomy
        {
            TaxonomyTree tree = read_taxonomy_tree(taxonomy_fname);
            std::set<uint64_t> tree_color;

            for(std::set<uint64_t>::iterator itr = tids.begin(); itr != tids.end(); itr++) {
                uint64_t tid = *itr;
                if(tree.find(tid) == tree.end()) {
                    cerr << "Warning: Taxonomy ID " << tid << " is not in the provided taxonomy tree (" << taxonomy_fname << ")!" << endl;

                }
                while(tree.find(tid) != tree.end()) {
                    uint64_t parent_tid = tree[tid].parent_tid;
                    tree_color.insert(tid);
                    if(parent_tid == tid) break;
                    tid = parent_tid;
                }
            }
            writeIndex<uint64_t>(fout3, tree_color.size(), this->toBe());
            for(std::set<uint64_t>::iterator itr = tree_color.begin(); itr != tree_color.end(); itr++) {
                uint64_t tid = *itr;
                writeIndex<uint64_t>(fout3, tid, this->toBe());
                assert(tree.find(tid) != tree.end());
                const TaxonomyNode& node = tree[tid];
                writeIndex<uint64_t>(fout3, node.parent_tid, this->toBe());
                writeIndex<uint16_t>(fout3, node.rank, this->toBe());
            }
        
            // Read name table
            _name.clear();
            if(name_table_fname != "") {
                ifstream table_file(name_table_fname.c_str(), ios::in);
                if(table_file.is_open()) {
                    char line[1024];
                    while(!table_file.eof()) {
                        line[0] = 0;
                        table_file.getline(line, sizeof(line));
                        if(line[0] == 0 || line[0] == '#') continue;
                        if(!strstr(line, "scientific name")) continue;
                        istringstream cline(line);
                        uint64_t tid;
                        char dummy;
                        string scientific_name;
                        cline >> tid >> dummy >> scientific_name;
                        if(tree_color.find(tid) == tree_color.end()) continue;
                        string temp;
                        while(true) {
                            cline >> temp;
                            if(temp == "|") break;
                            scientific_name.push_back('@');
                            scientific_name += temp;
                        }
                        _name[tid] = scientific_name;
                    }
                    table_file.close();
                } else {
                    cerr << "Error: " << name_table_fname << " doesn't exist!" << endl;
                    throw 1;
                }
            }
            
            writeIndex<uint64_t>(fout3, _name.size(), this->toBe());
            for(std::map<uint64_t, string>::const_iterator itr = _name.begin(); itr != _name.end(); itr++) {
                writeIndex<uint64_t>(fout3, itr->first, this->toBe());
                fout3 << itr->second << endl;
            }
        }
        
        // Read size table
        {
            _size.clear();
            
            // Calculate contig (or genome) sizes corresponding to each taxonomic ID
            for(size_t i = 0; i < _refnames.size(); i++) {
                string uid = get_uid(_refnames[i]);
                if(uid_to_tid.find(uid) == uid_to_tid.end())
                    continue;
                uint64_t tid = uid_to_tid[uid];
                uint64_t contig_size = plen()[i];
                if(_size.find(tid) == _size.end()) {
                    _size[tid] = contig_size;
                } else {
                    _size[tid] += contig_size;
                }
            }
            
            if(size_table_fname != "") {
                ifstream table_file(size_table_fname.c_str(), ios::in);
                if(table_file.is_open()) {
                    while(!table_file.eof()) {
                        string stid;
                        table_file >> stid;
                        if(stid.length() == 0 || stid[0] == '#') continue;
                        uint64_t tid = get_tid(stid);
                        uint64_t size;
                        table_file >> size;
                        _size[tid] = size;
                    }
                    table_file.close();
                } else {
                    cerr << "Error: " << size_table_fname << " doesn't exist!" << endl;
                    throw 1;
                }
            }
            
            writeIndex<uint64_t>(fout3, _size.size(), this->toBe());
            for(std::map<uint64_t, uint64_t>::const_iterator itr = _size.begin(); itr != _size.end(); itr++) {
                writeIndex<uint64_t>(fout3, itr->first, this->toBe());
                writeIndex<uint64_t>(fout3, itr->second, this->toBe());
            }
        }
        
        fout3.close();
    
		// Succesfully obtained joined reference string
		assert_geq(s.length(), jlen);
		if(bmax != (index_t)OFF_MASK) {
			VMSG_NL("bmax according to bmax setting: " << bmax);
		}
		else if(bmaxSqrtMult != (index_t)OFF_MASK) {
			bmax *= bmaxSqrtMult;
			VMSG_NL("bmax according to bmaxSqrtMult setting: " << bmax);
		}
		else if(bmaxDivN != (index_t)OFF_MASK) {
			bmax = max<uint32_t>((uint32_t)(jlen / bmaxDivN), 1);
			VMSG_NL("bmax according to bmaxDivN setting: " << bmax);
		}
		else {
			bmax = (uint32_t)sqrt(s.length());
			VMSG_NL("bmax defaulted to: " << bmax);
		}
		int iter = 0;
		bool first = true;
		streampos out1pos = out1.tellp();
		streampos out2pos = out2.tellp();
		// Look for bmax/dcv parameters that work.
		while(true) {
			if(!first && bmax < 40 && _passMemExc) {
				cerr << "Could not find approrpiate bmax/dcv settings for building this index." << endl;
				if(!isPacked()) {
					// Throw an exception exception so that we can
					// retry using a packed string representation
					throw bad_alloc();
				} else {
					cerr << "Already tried a packed string representation." << endl;
				}
				cerr << "Please try indexing this reference on a computer with more memory." << endl;
				if(sizeof(void*) == 4) {
					cerr << "If this computer has more than 4 GB of memory, try using a 64-bit executable;" << endl
						 << "this executable is 32-bit." << endl;
				}
				throw 1;
			}
			if(!first) {
				out1.seekp(out1pos);
				out2.seekp(out2pos);
			}
			if(dcv > 4096) dcv = 4096;
			if((iter % 6) == 5 && dcv < 4096 && dcv != 0) {
				dcv <<= 1; // double difference-cover period
			} else {
				bmax -= (bmax >> 2); // reduce by 25%
			}
			VMSG("Using parameters --bmax " << bmax);
			if(dcv == 0) {
				VMSG_NL(" and *no difference cover*");
			} else {
				VMSG_NL(" --dcv " << dcv);
			}
			iter++;
			try {
				{
					VMSG_NL("  Doing ahead-of-time memory usage test");
					// Make a quick-and-dirty attempt to force a bad_alloc iff
					// we would have thrown one eventually as part of
					// constructing the DifferenceCoverSample
					dcv <<= 1;
					index_t sz = (index_t)DifferenceCoverSample<TStr>::simulateAllocs(s, dcv >> 1);
					AutoArray<uint8_t> tmp(sz, EBWT_CAT);
					dcv >>= 1;
					// Likewise with the KarkkainenBlockwiseSA
					sz = (index_t)KarkkainenBlockwiseSA<TStr>::simulateAllocs(s, bmax);
                    if(nthreads > 1) sz *= (nthreads + 1);
					AutoArray<uint8_t> tmp2(sz, EBWT_CAT);
					// Now throw in the 'ftab' and 'isaSample' structures
					// that we'll eventually allocate in buildToDisk
					AutoArray<index_t> ftab(_eh._ftabLen * 2, EBWT_CAT);
					AutoArray<uint8_t> side(_eh._sideSz, EBWT_CAT);
					// Grab another 20 MB out of caution
					AutoArray<uint32_t> extra(20*1024*1024, EBWT_CAT);
					// If we made it here without throwing bad_alloc, then we
					// passed the memory-usage stress test
					VMSG("  Passed!  Constructing with these parameters: --bmax " << bmax << " --dcv " << dcv);
					if(isPacked()) {
						VMSG(" --packed");
					}
					VMSG_NL("");
				}
				VMSG_NL("Constructing suffix-array element generator");
				KarkkainenBlockwiseSA<TStr> bsa(s, bmax, nthreads, dcv, seed, _sanity, _passMemExc, _verbose, base_fname);
				assert(bsa.suffixItrIsReset());
				assert_eq(bsa.size(), s.length()+1);
				VMSG_NL("Converting suffix-array elements to index image");

				string fname4 = base_fname + ".4." + gEbwt_ext;
				ofstream fout4(fname4.c_str(), ios::binary);
				if(!fout4.good()) {
					cerr << "Could not open index file for writing: \"" << fname4 << "\"" << endl
						<< "Please make sure the directory exists and that permissions allow writing by Centrifuge" << endl;
					throw 1;
				}
				buildToDisk(bsa, s, out1, out2, saOut, bwtOut, fout4, szs, kmer_size);
				fout4.close() ;
				out1.flush(); out2.flush();
                bool failed = out1.fail() || out2.fail();
                if(saOut != NULL) {
                    saOut->flush();
                    failed = failed || saOut->fail();
                }
                if(bwtOut != NULL) {
                    bwtOut->flush();
                    failed = failed || bwtOut->fail();
                }
				break;
			} catch(bad_alloc& e) {
				if(_passMemExc) {
					VMSG_NL("  Ran out of memory; automatically trying more memory-economical parameters.");
				} else {
					cerr << "Out of memory while constructing suffix array.  Please try using a smaller" << endl
						 << "number of blocks by specifying a smaller --bmax or a larger --bmaxdivn" << endl;
					throw 1;
				}
			}
			first = false;
		}
		assert(repOk());
		// Now write reference sequence names on the end
		assert_eq(this->_refnames.size(), this->_nPat);
		for(index_t i = 0; i < this->_refnames.size(); i++) {
			out1 << this->_refnames[i].c_str() << endl;
		}
		out1 << '\0';
		out1.flush(); out2.flush();
		if(out1.fail() || out2.fail()) {
			cerr << "An error occurred writing the index to disk.  Please check if the disk is full." << endl;
			throw 1;
		}
		VMSG_NL("Returning from initFromVector");
	}
	
	/**
	 * Return the length that the joined string of the given string
	 * list will have.  Note that this is indifferent to how the text
	 * fragments correspond to input sequences - it just cares about
	 * the lengths of the fragments.
	 */
	index_t joinedLen(EList<RefRecord>& szs) {
		index_t ret = 0;
		for(unsigned int i = 0; i < szs.size(); i++) {
			ret += (index_t)szs[i].len;
		}
		return ret;
	}

	/// Destruct an Ebwt
	~Ebwt() {
		_fchr.reset();
		_ftab.reset();
		_eftab.reset();
		_plen.reset();
		_rstarts.reset();
		_offs.reset();
        _offsw.reset();
		_ebwt.reset();
		if(offs() != NULL && useShmem_) {
			FREE_SHARED(offs());
		}
        if(offsw() != NULL && useShmem_) {
            FREE_SHARED(offsw());
        }
		if(ebwt() != NULL && useShmem_) {
			FREE_SHARED(ebwt());
		}
		if (_in1 != NULL) fclose(_in1);
		if (_in2 != NULL) fclose(_in2);
	}

	/// Accessors
	inline const EbwtParams<index_t>& eh() const     { return _eh; }
	index_t    zOff() const         { return _zOff; }
	index_t    zEbwtByteOff() const { return _zEbwtByteOff; }
	int        zEbwtBpOff() const   { return _zEbwtBpOff; }
	index_t    nPat() const        { return _nPat; }
	index_t    nFrag() const       { return _nFrag; }
	inline index_t*   fchr()              { return _fchr.get(); }
	inline index_t*   ftab()              { return _ftab.get(); }
	inline index_t*   eftab()             { return _eftab.get(); }
	inline uint16_t*   offs()              { return _offs.get(); }
    inline uint32_t*   offsw()             { return _offsw.get(); }
	inline index_t*   plen()              { return _plen.get(); }
	inline index_t*   rstarts()           { return _rstarts.get(); }
	inline uint8_t*    ebwt()              { return _ebwt.get(); }
	inline const index_t* fchr() const    { return _fchr.get(); }
	inline const index_t* ftab() const    { return _ftab.get(); }
	inline const index_t* eftab() const   { return _eftab.get(); }
    inline const uint16_t* offs() const    { return _offs.get(); }
    inline const uint32_t* offsw() const    { return _offsw.get(); }
	inline const index_t* plen() const    { return _plen.get(); }
	inline const index_t* rstarts() const { return _rstarts.get(); }
	inline const uint8_t*  ebwt() const    { return _ebwt.get(); }
	bool        toBe() const         { return _toBigEndian; }
	bool        verbose() const      { return _verbose; }
	bool        sanityCheck() const  { return _sanity; }
	EList<string>& refnames()        { return _refnames; }
	bool        fw() const           { return fw_; }
    
    const EList<pair<string, uint64_t> >&   uid_to_tid() const { return _uid_to_tid; }
    const TaxonomyTree& tree() const { return _tree; }
    const TaxonomyPathTable&                paths() const { return _paths; }
    const std::map<uint64_t, string>&       name() const { return _name; }
    const std::map<uint64_t, uint64_t>&     size() const { return _size; }
    inline const bool 	    saGenomeBoundaryHas( uint64_t key ) const { return _saGenomeBoundary.find( key ) != _saGenomeBoundary.end() ; }
    inline const uint32_t saGenomeBoundaryVal( uint64_t key ) const { return _saGenomeBoundary.at(key) ; }
    bool                                    compressed() const { return _compressed; }
    
    
#ifdef POPCNT_CAPABILITY
    bool _usePOPCNTinstruction;
#endif

	/**
	 * Returns true iff the index contains the given string (exactly).  The
	 * given string must contain only unambiguous characters.  TODO:
	 * support skipping of ambiguous characters.
	 */
	bool contains(
		const BTDnaString& str,
		index_t *top = NULL,
		index_t *bot = NULL) const;

	/**
	 * Returns true iff the index contains the given string (exactly).  The
	 * given string must contain only unambiguous characters.  TODO:
	 * support skipping of ambiguous characters.
	 */
	bool contains(
		const char *str,
		index_t *top = NULL,
		index_t *bot = NULL) const
	{
		return contains(BTDnaString(str, true), top, bot);
	}
	
	/// Return true iff the Ebwt is currently in memory
	bool isInMemory() const {
		if(ebwt() != NULL) {
			// Note: We might have skipped loading _offs, _ftab,
			// _eftab, and _rstarts depending on whether this is the
			// reverse index and what algorithm is being used.
			assert(_eh.repOk());
			//assert(_ftab != NULL);
			//assert(_eftab != NULL);
			assert(fchr() != NULL);
			//assert(_offs != NULL);
			//assert(_rstarts != NULL);
			assert_neq(_zEbwtByteOff, (index_t)OFF_MASK);
			assert_neq(_zEbwtBpOff, -1);
			return true;
		} else {
			assert(ftab() == NULL);
			assert(eftab() == NULL);
			assert(fchr() == NULL);
			assert(offs() == NULL);
            assert(offsw() == NULL);
			// assert(rstarts() == NULL); // FIXME FB: Assertion fails when calling centrifuge-build-bin-debug
			assert_eq(_zEbwtByteOff, (index_t)OFF_MASK);
			assert_eq(_zEbwtBpOff, -1);
			return false;
		}
	}

	/// Return true iff the Ebwt is currently stored on disk
	bool isEvicted() const {
		return !isInMemory();
	}

	/**
	 * Load this Ebwt into memory by reading it in from the _in1 and
	 * _in2 streams.
	 */
	void loadIntoMemory(
		int color,
		int needEntireReverse,
		bool loadSASamp,
		bool loadFtab,
		bool loadRstarts,
		bool loadNames,
		bool verbose)
	{
		readIntoMemory(
			color,       // expect index to be colorspace?
			needEntireReverse, // require reverse index to be concatenated reference reversed
			loadSASamp,  // load the SA sample portion?
			loadFtab,    // load the ftab (_ftab[] and _eftab[])?
			loadRstarts, // load the r-starts (_rstarts[])?
			false,       // stop after loading the header portion?
			NULL,        // params
			false,       // mmSweep
			loadNames,   // loadNames
			verbose);    // startVerbose
	}

	/**
	 * Frees memory associated with the Ebwt.
	 */
	void evictFromMemory() {
		assert(isInMemory());
		_fchr.free();
		_ftab.free();
		_eftab.free();
		_rstarts.free();
		_offs.free(); // might not be under control of APtrWrap
        _offsw.free(); // might not be under control of APtrWrap
		_ebwt.free(); // might not be under control of APtrWrap
		// Keep plen; it's small and the client may want to seq it
		// even when the others are evicted.
		//_plen  = NULL;
		_zEbwtByteOff = (index_t)OFF_MASK;
		_zEbwtBpOff = -1;
	}

	/**
	 * Turn a substring of 'seq' starting at offset 'off' and having
	 * length equal to the index's 'ftabChars' into an int that can be
	 * used to index into the ftab array.
	 */
	index_t ftabSeqToInt(
		const BTDnaString& seq,
		index_t off,
		bool rev) const
	{
		int fc = _eh._ftabChars;
		index_t lo = off, hi = lo + fc;
		assert_leq(hi, seq.length());
		index_t ftabOff = 0;
		for(int i = 0; i < fc; i++) {
			bool fwex = fw();
			if(rev) fwex = !fwex;
			// We add characters to the ftabOff in the order they would
			// have been consumed in a normal search.  For BWT, this
			// means right-to-left order; for BWT' it's left-to-right.
			int c = (fwex ? seq[lo + i] : seq[hi - i - 1]);
			if(c > 3) {
				return std::numeric_limits<index_t>::max();
			}
			assert_range(0, 3, c);
			ftabOff <<= 2;
			ftabOff |= c;
		}
		return ftabOff;
	}
	
	/**
	 * Non-static facade for static function ftabHi.
	 */
	index_t ftabHi(index_t i) const {
		return Ebwt<index_t>::ftabHi(
			ftab(),
			eftab(),
			_eh._len,
			_eh._ftabLen,
		    _eh._eftabLen,
			i);
	}

	/**
	 * Get "high interpretation" of ftab entry at index i.  The high
	 * interpretation of a regular ftab entry is just the entry
	 * itself.  The high interpretation of an extended entry is the
	 * second correpsonding ui32 in the eftab.
	 *
	 * It's a static member because it's convenient to ask this
	 * question before the Ebwt is fully initialized.
	 */
	static index_t ftabHi(
		const index_t *ftab,
		const index_t *eftab,
		index_t len,
		index_t ftabLen,
		index_t eftabLen,
		index_t i)
	{
		assert_lt(i, ftabLen);
		if(ftab[i] <= len) {
			return ftab[i];
		} else {
			index_t efIdx = ftab[i] ^ (index_t)OFF_MASK;
			assert_lt(efIdx*2+1, eftabLen);
			return eftab[efIdx*2+1];
		}
	}

	/**
	 * Non-static facade for static function ftabLo.
	 */
	index_t ftabLo(index_t i) const {
		return Ebwt<index_t>::ftabLo(
			ftab(),
			eftab(),
			_eh._len,
			_eh._ftabLen,
		    _eh._eftabLen,
			i);
	}
	
	/**
	 * Get low bound of ftab range.
	 */
	index_t ftabLo(const BTDnaString& seq, index_t off) const {
		return ftabLo(ftabSeqToInt(seq, off, false));
	}

	/**
	 * Get high bound of ftab range.
	 */
	index_t ftabHi(const BTDnaString& seq, index_t off) const {
		return ftabHi(ftabSeqToInt(seq, off, false));
	}
	
	/**
	 * Extract characters from seq starting at offset 'off' and going either
	 * forward or backward, depending on 'rev'.  Order matters when compiling
	 * the integer that gets looked up in the ftab.  Each successive character
	 * is ORed into the least significant bit-pair, and characters are
	 * integrated in the direction of the search.
	 */
	bool
	ftabLoHi(
		const BTDnaString& seq, // sequence to extract from
		index_t off,             // offset into seq to begin extracting
		bool rev,               // reverse while extracting
		index_t& top,
		index_t& bot) const
	{
		index_t fi = ftabSeqToInt(seq, off, rev);
		if(fi == std::numeric_limits<index_t>::max()) {
			return false;
		}
		top = ftabHi(fi);
		bot = ftabLo(fi+1);
		assert_geq(bot, top);
		return true;
	}
	
	/**
	 * Get "low interpretation" of ftab entry at index i.  The low
	 * interpretation of a regular ftab entry is just the entry
	 * itself.  The low interpretation of an extended entry is the
	 * first correpsonding ui32 in the eftab.
	 *
	 * It's a static member because it's convenient to ask this
	 * question before the Ebwt is fully initialized.
	 */
	static index_t ftabLo(
		const index_t *ftab,
		const index_t *eftab,
		index_t len,
		index_t ftabLen,
		index_t eftabLen,
		index_t i)
	{
		assert_lt(i, ftabLen);
		if(ftab[i] <= len) {
			return ftab[i];
		} else {
			index_t efIdx = ftab[i] ^ (index_t)OFF_MASK;
			assert_lt(efIdx*2+1, eftabLen);
			return eftab[efIdx*2];
		}
	}

	/**
	 * Try to resolve the reference offset of the BW element 'elt'.  If
	 * it can be resolved immediately, return the reference offset.  If
	 * it cannot be resolved immediately, return 0xffffffff.
	 */
	index_t tryOffset(index_t elt) const {
#ifndef NDEBUG
		if(this->_offw) {
			assert(offsw() != NULL);
		} else {
			assert(offs() != NULL);
		}
#endif
		if(elt == _zOff) return 0;
		if((elt & _eh._offMask) == elt) {
			index_t eltOff = elt >> _eh._offRate;
			assert_lt(eltOff, _eh._offsLen);
			index_t off;
			if(this->_offw) {
				off = offsw()[eltOff];
			} else {
				off = offs()[eltOff];
			}
			assert_neq((index_t)OFF_MASK, off);
			return off;
		} else {
			// Try looking at zoff, the first check > 0 makes sure that we load the .4 index
			if ( _lastGenomeBoundary > 0 && elt <= _lastGenomeBoundary && _boundaryCheck.test( elt >> _boundaryCheckShift ) && saGenomeBoundaryHas( elt ) )
			{
				uint32_t ret = (index_t)saGenomeBoundaryVal( elt ) ;
				if ( this->_offw )
					return ret ;	
				else
					return (uint16_t)ret ;

			}

			return (index_t)OFF_MASK;
		}
	}

	/**
	 * Try to resolve the reference offset of the BW element 'elt' such
	 * that the offset returned is at the right-hand side of the
	 * forward reference substring involved in the hit.
	 */
	index_t tryOffset(
		index_t elt,
		bool fw,
		index_t hitlen) const
	{
		index_t off = tryOffset(elt);
		if(off != (index_t)OFF_MASK && !fw) {
			assert_lt(off, _eh._len);
			off = _eh._len - off - 1;
			assert_geq(off, hitlen-1);
			off -= (hitlen-1);
			assert_lt(off, _eh._len);
		}
		return off;
	}

	/**
	 * Walk 'steps' steps to the left and return the row arrived at.
	 */
	index_t walkLeft(index_t row, index_t steps) const;

	/**
	 * Resolve the reference offset of the BW element 'elt'.
	 */
	index_t getOffset(index_t row) const;

	/**
	 * Resolve the reference offset of the BW element 'elt' such that
	 * the offset returned is at the right-hand side of the forward
	 * reference substring involved in the hit.
	 */
	index_t getOffset(
		index_t elt,
		bool fw,
		index_t hitlen) const;

	/**
	 * When using read() to create an Ebwt, we have to set a couple of
	 * additional fields in the Ebwt object that aren't part of the
	 * parameter list and are not stored explicitly in the file.  Right
	 * now, this just involves initializing _zEbwtByteOff and
	 * _zEbwtBpOff from _zOff.
	 */
	void postReadInit(EbwtParams<index_t>& eh) {
		index_t sideNum     = _zOff / eh._sideBwtLen;
		index_t sideCharOff = _zOff % eh._sideBwtLen;
		index_t sideByteOff = sideNum * eh._sideSz;
		_zEbwtByteOff = sideCharOff >> 2;
		assert_lt(_zEbwtByteOff, eh._sideBwtSz);
		_zEbwtBpOff = sideCharOff & 3;
		assert_lt(_zEbwtBpOff, 4);
		_zEbwtByteOff += sideByteOff;
		assert(repOk(eh)); // Ebwt should be fully initialized now
	}

	/**
	 * Given basename of an Ebwt index, read and return its flag.
	 */
	static int32_t readFlags(const string& instr);

	/**
	 * Pretty-print the Ebwt to the given output stream.
	 */
	void print(ostream& out) const {
		print(out, _eh);
	}
	
	/**
	 * Pretty-print the Ebwt and given EbwtParams to the given output
	 * stream.
	 */
	void print(ostream& out, const EbwtParams<index_t>& eh) const {
		eh.print(out); // print params
        return;
		out << "Ebwt (" << (isInMemory()? "memory" : "disk") << "):" << endl
		    << "    zOff: "         << _zOff << endl
		    << "    zEbwtByteOff: " << _zEbwtByteOff << endl
		    << "    zEbwtBpOff: "   << _zEbwtBpOff << endl
		    << "    nPat: "  << _nPat << endl
		    << "    plen: ";
		if(plen() == NULL) {
			out << "NULL" << endl;
		} else {
			out << "non-NULL, [0] = " << plen()[0] << endl;
		}
		out << "    rstarts: ";
		if(rstarts() == NULL) {
			out << "NULL" << endl;
		} else {
			out << "non-NULL, [0] = " << rstarts()[0] << endl;
		}
		out << "    ebwt: ";
		if(ebwt() == NULL) {
			out << "NULL" << endl;
		} else {
			out << "non-NULL, [0] = " << ebwt()[0] << endl;
		}
		out << "    fchr: ";
		if(fchr() == NULL) {
			out << "NULL" << endl;
		} else {
			out << "non-NULL, [0] = " << fchr()[0] << endl;
		}
		out << "    ftab: ";
		if(ftab() == NULL) {
			out << "NULL" << endl;
		} else {
			out << "non-NULL, [0] = " << ftab()[0] << endl;
		}
		out << "    eftab: ";
		if(eftab() == NULL) {
			out << "NULL" << endl;
		} else {
			out << "non-NULL, [0] = " << eftab()[0] << endl;
		}
		out << "    offs: ";
		if(offs() == NULL) {
			out << "NULL" << endl;
		} else {
			out << "non-NULL, [0] = " << offs()[0] << endl;
		}
	}

	// Building
	template <typename TStr> static TStr join(EList<TStr>& l, uint32_t seed);
	template <typename TStr> static TStr join(EList<FileBuf*>& l, EList<RefRecord>& szs, index_t sztot, const RefReadInParams& refparams, uint32_t seed);
	template <typename TStr> void joinToDisk(EList<FileBuf*>& l, EList<RefRecord>& szs, index_t sztot, const RefReadInParams& refparams, TStr& ret, ostream& out1, ostream& out2);
	template <typename TStr> void buildToDisk(InorderBlockwiseSA<TStr>& sa, const TStr& s, ostream& out1, ostream& out2, ostream* saOut, ostream* bwtOut, ostream& out4, const EList<RefRecord>& szs, int kmer_size );

	// I/O
	void readIntoMemory(int color, int needEntireRev, bool loadSASamp, bool loadFtab, bool loadRstarts, bool justHeader, EbwtParams<index_t> *params, bool mmSweep, bool loadNames, bool startVerbose);
	void writeFromMemory(bool justHeader, ostream& out1, ostream& out2) const;
	void writeFromMemory(bool justHeader, const string& out1, const string& out2) const;

	// Sanity checking
	void sanityCheckUpToSide(int upToSide) const;
	void sanityCheckAll(int reverse) const;
	void restore(SString<char>& s) const;
	void checkOrigs(const EList<SString<char> >& os, bool color, bool mirror) const;

	// Searching and reporting
	void joinedToTextOff(index_t qlen, index_t off, index_t& tidx, index_t& textoff, index_t& tlen, bool rejectStraddle, bool& straddled) const;

#define WITHIN_BWT_LEN(x) \
	assert_leq(x[0], this->_eh._sideBwtLen); \
	assert_leq(x[1], this->_eh._sideBwtLen); \
	assert_leq(x[2], this->_eh._sideBwtLen); \
	assert_leq(x[3], this->_eh._sideBwtLen)

#define WITHIN_FCHR(x) \
	assert_leq(x[0], this->fchr()[1]); \
	assert_leq(x[1], this->fchr()[2]); \
	assert_leq(x[2], this->fchr()[3]); \
	assert_leq(x[3], this->fchr()[4])

#define WITHIN_FCHR_DOLLARA(x) \
	assert_leq(x[0], this->fchr()[1]+1); \
	assert_leq(x[1], this->fchr()[2]); \
	assert_leq(x[2], this->fchr()[3]); \
	assert_leq(x[3], this->fchr()[4])

	/**
	 * Count all occurrences of character c from the beginning of the
	 * forward side to <by,bp> and add in the occ[] count up to the side
	 * break just prior to the side.
	 *
	 * A Bowtie 2 side is shaped like:
	 *
	 * XXXXXXXXXXXXXXXX [A] [C] [G] [T]
	 * --------48------ -4- -4- -4- -4-  (numbers in bytes)
	 */
	inline index_t countBt2Side(const SideLocus<index_t>& l, int c) const {
        assert_range(0, 3, c);
        assert_range(0, (int)this->_eh._sideBwtSz-1, (int)l._by);
        assert_range(0, 3, (int)l._bp);
        const uint8_t *side = l.side(this->ebwt());
        index_t cCnt = countUpTo(l, c);
        assert_leq(cCnt, l.toBWRow());
        assert_leq(cCnt, this->_eh._sideBwtLen);
        if(c == 0 && l._sideByteOff <= _zEbwtByteOff && l._sideByteOff + l._by >= _zEbwtByteOff) {
            // Adjust for the fact that we represented $ with an 'A', but
            // shouldn't count it as an 'A' here
            if((l._sideByteOff + l._by > _zEbwtByteOff) ||
               (l._sideByteOff + l._by == _zEbwtByteOff && l._bp > _zEbwtBpOff))
            {
                cCnt--; // Adjust for '$' looking like an 'A'
            }
        }
        index_t ret;
        // Now factor in the occ[] count at the side break
        const uint8_t *acgt8 = side + _eh._sideBwtSz;
        const index_t *acgt = reinterpret_cast<const index_t*>(acgt8);
        assert_leq(acgt[0], this->_eh._numSides * this->_eh._sideBwtLen); // b/c it's used as padding
        assert_leq(acgt[1], this->_eh._len);
        assert_leq(acgt[2], this->_eh._len);
        assert_leq(acgt[3], this->_eh._len);
        ret = acgt[c] + cCnt + this->fchr()[c];
#ifndef NDEBUG
        assert_leq(ret, this->fchr()[c+1]); // can't have jumpded into next char's section
        if(c == 0) {
            assert_leq(cCnt, this->_eh._sideBwtLen);
        } else {
            assert_leq(ret, this->_eh._bwtLen);
        }
#endif
        return ret;
	}

	/**
	 * Count all occurrences of all four nucleotides up to the starting
	 * point (which must be in a forward side) given by 'l' storing the
	 * result in 'cntsUpto', then count nucleotide occurrences within the
	 * range of length 'num' storing the result in 'cntsIn'.  Also, keep
	 * track of the characters occurring within the range by setting
	 * 'masks' accordingly (masks[1][10] == true -> 11th character is a
	 * 'C', and masks[0][10] == masks[2][10] == masks[3][10] == false.
	 */
	inline void countBt2SideRange(
		SideLocus<index_t>& l,        // top locus
		index_t num,        // number of elts in range to tall
		index_t* cntsUpto,  // A/C/G/T counts up to top
		index_t* cntsIn,    // A/C/G/T counts within range
		EList<bool> *masks) const // masks indicating which range elts = A/C/G/T
	{
		assert_gt(num, 0);
		assert_range(0, (int)this->_eh._sideBwtSz-1, (int)l._by);
		assert_range(0, 3, (int)l._bp);
		countUpToEx(l, cntsUpto);
		WITHIN_FCHR_DOLLARA(cntsUpto);
		WITHIN_BWT_LEN(cntsUpto);
		const uint8_t *side = l.side(this->ebwt());
		if(l._sideByteOff <= _zEbwtByteOff && l._sideByteOff + l._by >= _zEbwtByteOff) {
			// Adjust for the fact that we represented $ with an 'A', but
			// shouldn't count it as an 'A' here
			if((l._sideByteOff + l._by > _zEbwtByteOff) ||
			   (l._sideByteOff + l._by == _zEbwtByteOff && l._bp > _zEbwtBpOff))
			{
				cntsUpto[0]--; // Adjust for '$' looking like an 'A'
			}
		}
		// Now factor in the occ[] count at the side break
		const index_t *acgt = reinterpret_cast<const index_t*>(side + _eh._sideBwtSz);
		assert_leq(acgt[0], this->fchr()[1] + this->_eh.sideBwtLen());
		assert_leq(acgt[1], this->fchr()[2]-this->fchr()[1]);
		assert_leq(acgt[2], this->fchr()[3]-this->fchr()[2]);
		assert_leq(acgt[3], this->fchr()[4]-this->fchr()[3]);
		assert_leq(acgt[0], this->_eh._len + this->_eh.sideBwtLen());
		assert_leq(acgt[1], this->_eh._len);
		assert_leq(acgt[2], this->_eh._len);
		assert_leq(acgt[3], this->_eh._len);
		cntsUpto[0] += (acgt[0] + this->fchr()[0]);
		cntsUpto[1] += (acgt[1] + this->fchr()[1]);
		cntsUpto[2] += (acgt[2] + this->fchr()[2]);
		cntsUpto[3] += (acgt[3] + this->fchr()[3]);
		masks[0].resize(num);
		masks[1].resize(num);
		masks[2].resize(num);
		masks[3].resize(num);
		WITHIN_FCHR_DOLLARA(cntsUpto);
		WITHIN_FCHR_DOLLARA(cntsIn);
		// 'cntsUpto' is complete now.
		// Walk forward until we've tallied the entire 'In' range
		index_t nm = 0;
		// Rest of this side
		nm += countBt2SideRange2(l, true, num - nm, cntsIn, masks, nm);
		assert_eq(nm, cntsIn[0] + cntsIn[1] + cntsIn[2] + cntsIn[3]);
		assert_leq(nm, num);
		SideLocus<index_t> lcopy = l;
		while(nm < num) {
			// Subsequent sides, if necessary
			lcopy.nextSide(this->_eh);
			nm += countBt2SideRange2(lcopy, false, num - nm, cntsIn, masks, nm);
			WITHIN_FCHR_DOLLARA(cntsIn);
			assert_leq(nm, num);
			assert_eq(nm, cntsIn[0] + cntsIn[1] + cntsIn[2] + cntsIn[3]);
		}
		assert_eq(num, cntsIn[0] + cntsIn[1] + cntsIn[2] + cntsIn[3]);
		WITHIN_FCHR_DOLLARA(cntsIn);
	}

	/**
	 * Count all occurrences of character c from the beginning of the
	 * forward side to <by,bp> and add in the occ[] count up to the side
	 * break just prior to the side.
	 *
	 * A forward side is shaped like:
	 *
	 * [A] [C] XXXXXXXXXXXXXXXX
	 * -4- -4- --------56------ (numbers in bytes)
	 *         ^
	 *         Side ptr (result from SideLocus.side())
	 *
	 * And following it is a reverse side shaped like:
	 * 
	 * [G] [T] XXXXXXXXXXXXXXXX
	 * -4- -4- --------56------ (numbers in bytes)
	 *         ^
	 *         Side ptr (result from SideLocus.side())
	 *
	 */
	inline void countBt2SideEx(const SideLocus<index_t>& l, index_t* arrs) const {
		assert_range(0, (int)this->_eh._sideBwtSz-1, (int)l._by);
		assert_range(0, 3, (int)l._bp);
		countUpToEx(l, arrs);
		if(l._sideByteOff <= _zEbwtByteOff && l._sideByteOff + l._by >= _zEbwtByteOff) {
			// Adjust for the fact that we represented $ with an 'A', but
			// shouldn't count it as an 'A' here
			if((l._sideByteOff + l._by > _zEbwtByteOff) ||
			   (l._sideByteOff + l._by == _zEbwtByteOff && l._bp > _zEbwtBpOff))
			{
				arrs[0]--; // Adjust for '$' looking like an 'A'
			}
		}
		WITHIN_FCHR(arrs);
		WITHIN_BWT_LEN(arrs);
		// Now factor in the occ[] count at the side break
		const uint8_t *side = l.side(this->ebwt());
		const uint8_t *acgt16 = side + this->_eh._sideSz - sizeof(index_t) * 4;
		const index_t *acgt = reinterpret_cast<const index_t*>(acgt16);
		assert_leq(acgt[0], this->fchr()[1] + this->_eh.sideBwtLen());
		assert_leq(acgt[1], this->fchr()[2]-this->fchr()[1]);
		assert_leq(acgt[2], this->fchr()[3]-this->fchr()[2]);
		assert_leq(acgt[3], this->fchr()[4]-this->fchr()[3]);
		assert_leq(acgt[0], this->_eh._len + this->_eh.sideBwtLen());
		assert_leq(acgt[1], this->_eh._len);
		assert_leq(acgt[2], this->_eh._len);
		assert_leq(acgt[3], this->_eh._len);
		arrs[0] += (acgt[0] + this->fchr()[0]);
		arrs[1] += (acgt[1] + this->fchr()[1]);
		arrs[2] += (acgt[2] + this->fchr()[2]);
		arrs[3] += (acgt[3] + this->fchr()[3]);
		WITHIN_FCHR(arrs);
	}

    /**
	 * Counts the number of occurrences of character 'c' in the given Ebwt
	 * side up to (but not including) the given byte/bitpair (by/bp).
	 *
	 * This is a performance-critical function.  This is the top search-
	 * related hit in the time profile.
	 *
	 * Function gets 11.09% in profile
	 */
	inline index_t countUpTo(const SideLocus<index_t>& l, int c) const {
		// Count occurrences of c in each 64-bit (using bit trickery);
		// Someday countInU64() and pop() functions should be
		// vectorized/SSE-ized in case that helps.
        bool usePOPCNT = false;
		index_t cCnt = 0;
		const uint8_t *side = l.side(this->ebwt());
		int i = 0;
#ifdef POPCNT_CAPABILITY
        if(_usePOPCNTinstruction) {
            usePOPCNT = true;
            int by = l._by + (l._bp > 0 ? 1 : 0);
            for(; i < by; i += 8) {
                if(i + 8 < by) {
                    cCnt += countInU64<USE_POPCNT_INSTRUCTION>(c, *(uint64_t*)&side[i]);
                } else {
                    index_t by_shift = 8 - (by - i);
                    index_t bp_shift = (l._bp > 0 ? 4 - l._bp : 0);
                    index_t shift = (by_shift << 3) + (bp_shift << 1);
                    uint64_t side_i = *(uint64_t*)&side[i];
                    side_i = (_toBigEndian ? side_i >> shift : side_i << shift);
                    index_t cCnt_add = countInU64<USE_POPCNT_INSTRUCTION>(c, side_i);
                    if(c == 0) cCnt_add -= (shift >> 1);
#ifndef NDEBUG
                    index_t cCnt_temp = 0;
                    for(int j = i; j < l._by; j++) {
                        cCnt_temp += cCntLUT_4[0][c][side[j]];
                    }
                    if(l._bp > 0) {
                        cCnt_temp += cCntLUT_4[(int)l._bp][c][side[l._by]];
                    }
                    assert_eq(cCnt_add, cCnt_temp);
#endif
                    cCnt += cCnt_add;
                    break;
                }
            }
        } else {
            for(; i + 7 < l._by; i += 8) {
                cCnt += countInU64<USE_POPCNT_GENERIC>(c, *(uint64_t*)&side[i]);
            }
        }
#else
        for(; i + 7 < l._by; i += 8) {
            cCnt += countInU64(c, *(uint64_t*)&side[i]);
        }
#endif
        
        if(!usePOPCNT) {
            // Count occurences of c in the rest of the side (using LUT)
            for(; i < l._by; i++) {
                cCnt += cCntLUT_4[0][c][side[i]];
            }
            
            // Count occurences of c in the rest of the byte
            if(l._bp > 0) {
                cCnt += cCntLUT_4[(int)l._bp][c][side[i]];
            }
        }
        
		return cCnt;
	}
    
    /**
	 * Counts the number of occurrences of character 'c' in the given Ebwt
	 * side down to the given byte/bitpair (by/bp).
	 *
	 */
	inline index_t countDownTo(const SideLocus<index_t>& l, int c) const {
		// Count occurrences of c in each 64-bit (using bit trickery);
		// Someday countInU64() and pop() functions should be
		// vectorized/SSE-ized in case that helps.
		index_t cCnt = 0;
		const uint8_t *side = l.side(this->ebwt());
		int i = 64 - 4 * sizeof(index_t) - 1;
#ifdef POPCNT_CAPABILITY
        if ( _usePOPCNTinstruction) {
            for(; i - 7 > l._by; i -= 8) {
                cCnt += countInU64<USE_POPCNT_INSTRUCTION>(c, *(uint64_t*)&side[i-7]);
            }
        }
        else {
            for(; i + 7 > l._by; i -= 8) {
                cCnt += countInU64<USE_POPCNT_GENERIC>(c, *(uint64_t*)&side[i-7]);
            }
        }
#else
        for(; i + 7 > l._by; i -= 8) {
            cCnt += countInU64(c, *(uint64_t*)&side[i-7]);
        }
#endif
		// Count occurences of c in the rest of the side (using LUT)
		for(; i > l._by; i--) {
			cCnt += cCntLUT_4_rev[0][c][side[i]];
		}
		// Count occurences of c in the rest of the byte
		if(l._bp > 0) {
			cCnt += cCntLUT_4_rev[4-(int)l._bp][c][side[i]];
		} else {
            cCnt += cCntLUT_4_rev[0][c][side[i]];
        }
		return cCnt;
	}

    /**
     * Tricky-bit-bashing bitpair counting for given two-bit value (0-3)
     * within a 64-bit argument.
     *
     * Function gets 2.32% in profile
     */
#ifdef POPCNT_CAPABILITY
    template<typename Operation>
#endif
    inline static void countInU64Ex(uint64_t dw, index_t* arrs) {
        uint64_t c0 = c_table[0];
        uint64_t x0 = dw ^ c0;
        uint64_t x1 = (x0 >> 1);
        uint64_t x2 = x1 & (0x5555555555555555llu);
        uint64_t x3 = x0 & x2;
#ifdef POPCNT_CAPABILITY
        uint64_t tmp = Operation().pop64(x3);
#else
        uint64_t tmp = pop64(x3);
#endif
        arrs[0] += (uint32_t) tmp;
        
        c0 = c_table[1];
        x0 = dw ^ c0;
        x1 = (x0 >> 1);
        x2 = x1 & (0x5555555555555555llu);
        x3 = x0 & x2;
#ifdef POPCNT_CAPABILITY
        tmp = Operation().pop64(x3);
#else
        tmp = pop64(x3);
#endif
        arrs[1] += (uint32_t) tmp;
        
        c0 = c_table[2];
        x0 = dw ^ c0;
        x1 = (x0 >> 1);
        x2 = x1 & (0x5555555555555555llu);
        x3 = x0 & x2;
#ifdef POPCNT_CAPABILITY
        tmp = Operation().pop64(x3);
#else
        tmp = pop64(x3);
#endif
        arrs[2] += (uint32_t) tmp;
        
        c0 = c_table[3];
        x0 = dw ^ c0;
        x1 = (x0 >> 1);
        x2 = x1 & (0x5555555555555555llu);
        x3 = x0 & x2;
#ifdef POPCNT_CAPABILITY
        tmp = Operation().pop64(x3);
#else
        tmp = pop64(x3);
#endif
        arrs[3] += (uint32_t) tmp;
    }

	/**
	 * Counts the number of occurrences of all four nucleotides in the
	 * given side up to (but not including) the given byte/bitpair (by/bp).
	 * Count for 'a' goes in arrs[0], 'c' in arrs[1], etc.
	 */
	inline void countUpToEx(const SideLocus<index_t>& l, index_t* arrs) const {
		int i = 0;
		// Count occurrences of each nucleotide in each 64-bit word using
		// bit trickery; note: this seems does not seem to lend a
		// significant boost to performance in practice.  If you comment
		// out this whole loop (which won't affect correctness - it will
		// just cause the following loop to take up the slack) then runtime
		// does not change noticeably. Someday the countInU64() and pop()
		// functions should be vectorized/SSE-ized in case that helps.
		const uint8_t *side = l.side(this->ebwt());
#ifdef POPCNT_CAPABILITY
        if (_usePOPCNTinstruction) {
            for(; i+7 < l._by; i += 8) {
                countInU64Ex<USE_POPCNT_INSTRUCTION>(*(uint64_t*)&side[i], arrs);
            }
        }
        else {
            for(; i+7 < l._by; i += 8) {
                countInU64Ex<USE_POPCNT_GENERIC>(*(uint64_t*)&side[i], arrs);
            }
        }
#else
        for(; i+7 < l._by; i += 8) {
            countInU64Ex(*(uint64_t*)&side[i], arrs);
        }
#endif
		// Count occurences of nucleotides in the rest of the side (using LUT)
		// Many cache misses on following lines (~20K)
		for(; i < l._by; i++) {
			arrs[0] += cCntLUT_4[0][0][side[i]];
			arrs[1] += cCntLUT_4[0][1][side[i]];
			arrs[2] += cCntLUT_4[0][2][side[i]];
			arrs[3] += cCntLUT_4[0][3][side[i]];
		}
		// Count occurences of c in the rest of the byte
		if(l._bp > 0) {
			arrs[0] += cCntLUT_4[(int)l._bp][0][side[i]];
			arrs[1] += cCntLUT_4[(int)l._bp][1][side[i]];
			arrs[2] += cCntLUT_4[(int)l._bp][2][side[i]];
			arrs[3] += cCntLUT_4[(int)l._bp][3][side[i]];
		}
	}

#ifndef NDEBUG
	/**
	 * Given top and bot loci, calculate counts of all four DNA chars up to
	 * those loci.  Used for more advanced backtracking-search.
	 */
	inline void mapLFEx(
		const SideLocus<index_t>& l,
		index_t *arrs
		ASSERT_ONLY(, bool overrideSanity = false)
		) const
	{
		assert_eq(0, arrs[0]);
		assert_eq(0, arrs[1]);
		assert_eq(0, arrs[2]);
		assert_eq(0, arrs[3]);
		countBt2SideEx(l, arrs);
		if(_sanity && !overrideSanity) {
			// Make sure results match up with individual calls to mapLF;
			// be sure to override sanity-checking in the callee, or we'll
			// have infinite recursion
			assert_eq(mapLF(l, 0, true), arrs[0]);
			assert_eq(mapLF(l, 1, true), arrs[1]);
			assert_eq(mapLF(l, 2, true), arrs[2]);
			assert_eq(mapLF(l, 3, true), arrs[3]);
		}
	}
#endif

	/**
	 * Given top and bot rows, calculate counts of all four DNA chars up to
	 * those loci.
	 */
	inline void mapLFEx(
		index_t top,
		index_t bot,
		index_t *tops,
		index_t *bots
		ASSERT_ONLY(, bool overrideSanity = false)
		) const
	{
		SideLocus<index_t> ltop, lbot;
		SideLocus<index_t>::initFromTopBot(top, bot, _eh, ebwt(), ltop, lbot);
		mapLFEx(ltop, lbot, tops, bots ASSERT_ONLY(, overrideSanity));
	}

	/**
	 * Given top and bot loci, calculate counts of all four DNA chars up to
	 * those loci.  Used for more advanced backtracking-search.
	 */
	inline void mapLFEx(
		const SideLocus<index_t>& ltop,
		const SideLocus<index_t>& lbot,
		index_t *tops,
		index_t *bots
		ASSERT_ONLY(, bool overrideSanity = false)
		) const
	{
		assert(ltop.repOk(this->eh()));
		assert(lbot.repOk(this->eh()));
		assert_eq(0, tops[0]); assert_eq(0, bots[0]);
		assert_eq(0, tops[1]); assert_eq(0, bots[1]);
		assert_eq(0, tops[2]); assert_eq(0, bots[2]);
		assert_eq(0, tops[3]); assert_eq(0, bots[3]);
		countBt2SideEx(ltop, tops);
		countBt2SideEx(lbot, bots);
#ifndef NDEBUG
		if(_sanity && !overrideSanity) {
			// Make sure results match up with individual calls to mapLF;
			// be sure to override sanity-checking in the callee, or we'll
			// have infinite recursion
			assert_eq(mapLF(ltop, 0, true), tops[0]);
			assert_eq(mapLF(ltop, 1, true), tops[1]);
			assert_eq(mapLF(ltop, 2, true), tops[2]);
			assert_eq(mapLF(ltop, 3, true), tops[3]);
			assert_eq(mapLF(lbot, 0, true), bots[0]);
			assert_eq(mapLF(lbot, 1, true), bots[1]);
			assert_eq(mapLF(lbot, 2, true), bots[2]);
			assert_eq(mapLF(lbot, 3, true), bots[3]);
		}
#endif
	}

	/**
	 * Counts the number of occurrences of all four nucleotides in the
	 * given side from the given byte/bitpair (l->_by/l->_bp) (or the
	 * beginning of the side if l == 0).  Count for 'a' goes in arrs[0],
	 * 'c' in arrs[1], etc.
	 *
	 * Note: must account for $.
	 *
	 * Must fill in masks
	 */
	inline index_t countBt2SideRange2(
		const SideLocus<index_t>& l,
		bool startAtLocus,
		index_t num,
		index_t* arrs,
		EList<bool> *masks,
		index_t maskOff) const
	{
		assert(!masks[0].empty());
		assert_eq(masks[0].size(), masks[1].size());
		assert_eq(masks[0].size(), masks[2].size());
		assert_eq(masks[0].size(), masks[3].size());
		ASSERT_ONLY(index_t myarrs[4] = {0, 0, 0, 0});
		index_t nm = 0; // number of nucleotides tallied so far
		int iby = 0;      // initial byte offset
		int ibp = 0;      // initial base-pair offset
		if(startAtLocus) {
			iby = l._by;
			ibp = l._bp;
		} else {
			// Start at beginning
		}
		int by = iby, bp = ibp;
		assert_lt(bp, 4);
		assert_lt(by, (int)this->_eh._sideBwtSz);
		const uint8_t *side = l.side(this->ebwt());
		while(nm < num) {
			int c = (side[by] >> (bp * 2)) & 3;
			assert_lt(maskOff + nm, masks[c].size());
			masks[0][maskOff + nm] = masks[1][maskOff + nm] =
			masks[2][maskOff + nm] = masks[3][maskOff + nm] = false;
			assert_range(0, 3, c);
			// Note: we tally $ just like an A
			arrs[c]++; // tally it
			ASSERT_ONLY(myarrs[c]++);
			masks[c][maskOff + nm] = true; // not dead
			nm++;
			if(++bp == 4) {
				bp = 0;
				by++;
				assert_leq(by, (int)this->_eh._sideBwtSz);
				if(by == (int)this->_eh._sideBwtSz) {
					// Fell off the end of the side
					break;
				}
			}
		}
		WITHIN_FCHR_DOLLARA(arrs);
#ifndef NDEBUG
		if(_sanity) {
			// Make sure results match up with a call to mapLFEx.
			index_t tops[4] = {0, 0, 0, 0};
			index_t bots[4] = {0, 0, 0, 0};
			index_t top = l.toBWRow();
			index_t bot = top + nm;
			mapLFEx(top, bot, tops, bots, false);
			assert(myarrs[0] == (bots[0] - tops[0]) || myarrs[0] == (bots[0] - tops[0])+1);
			assert_eq(myarrs[1], bots[1] - tops[1]);
			assert_eq(myarrs[2], bots[2] - tops[2]);
			assert_eq(myarrs[3], bots[3] - tops[3]);
		}
#endif
		return nm;
	}

	/**
	 * Return the final character in row i (i.e. the i'th character in the
	 * BWT transform).  Note that the 'L' in the name of the function
	 * stands for 'last', as in the literature.
	 */
	inline int rowL(const SideLocus<index_t>& l) const {
		// Extract and return appropriate bit-pair
		return unpack_2b_from_8b(l.side(this->ebwt())[l._by], l._bp);
	}

	/**
	 * Return the final character in row i (i.e. the i'th character in the
	 * BWT transform).  Note that the 'L' in the name of the function
	 * stands for 'last', as in the literature.
	 */
	inline int rowL(index_t i) const {
		// Extract and return appropriate bit-pair
		SideLocus<index_t> l;
		l.initFromRow(i, _eh, ebwt());
		return rowL(l);
	}

	/**
	 * Given top and bot loci, calculate counts of all four DNA chars up to
	 * those loci.  Used for more advanced backtracking-search.
	 */
	inline void mapLFRange(
		SideLocus<index_t>& ltop,
		SideLocus<index_t>& lbot,
		index_t num,        // Number of elts
		index_t* cntsUpto,  // A/C/G/T counts up to top
		index_t* cntsIn,    // A/C/G/T counts within range
		EList<bool> *masks
		ASSERT_ONLY(, bool overrideSanity = false)
		) const
	{
		assert(ltop.repOk(this->eh()));
		assert(lbot.repOk(this->eh()));
		assert_eq(num, lbot.toBWRow() - ltop.toBWRow());
		assert_eq(0, cntsUpto[0]); assert_eq(0, cntsIn[0]);
		assert_eq(0, cntsUpto[1]); assert_eq(0, cntsIn[1]);
		assert_eq(0, cntsUpto[2]); assert_eq(0, cntsIn[2]);
		assert_eq(0, cntsUpto[3]); assert_eq(0, cntsIn[3]);
		countBt2SideRange(ltop, num, cntsUpto, cntsIn, masks);
		assert_eq(num, cntsIn[0] + cntsIn[1] + cntsIn[2] + cntsIn[3]);
#ifndef NDEBUG
		if(_sanity && !overrideSanity) {
			// Make sure results match up with individual calls to mapLF;
			// be sure to override sanity-checking in the callee, or we'll
			// have infinite recursion
			index_t tops[4] = {0, 0, 0, 0};
			index_t bots[4] = {0, 0, 0, 0};
			assert(ltop.repOk(this->eh()));
			assert(lbot.repOk(this->eh()));
			mapLFEx(ltop, lbot, tops, bots, false);
			for(int i = 0; i < 4; i++) {
				assert(cntsUpto[i] == tops[i] || tops[i] == bots[i]);
				if(i == 0) {
					assert(cntsIn[i] == bots[i]-tops[i] ||
						   cntsIn[i] == bots[i]-tops[i]+1);
				} else {
					assert_eq(cntsIn[i], bots[i]-tops[i]);
				}
			}
		}
#endif
	}

	/**
	 * Given row i, return the row that the LF mapping maps i to.
	 */
	inline index_t mapLF(
		const SideLocus<index_t>& l
		ASSERT_ONLY(, bool overrideSanity = false)
		) const
	{
		ASSERT_ONLY(index_t srcrow = l.toBWRow());
		index_t ret;
		assert(l.side(this->ebwt()) != NULL);
		int c = rowL(l);
		assert_lt(c, 4);
		assert_geq(c, 0);
		ret = countBt2Side(l, c);
		assert_lt(ret, this->_eh._bwtLen);
		assert_neq(srcrow, ret);
#ifndef NDEBUG
		if(_sanity && !overrideSanity) {
			// Make sure results match up with results from mapLFEx;
			// be sure to override sanity-checking in the callee, or we'll
			// have infinite recursion
			index_t arrs[] = { 0, 0, 0, 0 };
			mapLFEx(l, arrs, true);
			assert_eq(arrs[c], ret);
		}
#endif
		return ret;
	}

	/**
	 * Given row i and character c, return the row that the LF mapping maps
	 * i to on character c.
	 */
	inline index_t mapLF(
		const SideLocus<index_t>& l, int c
		ASSERT_ONLY(, bool overrideSanity = false)
		) const
	{
		index_t ret;
		assert_lt(c, 4);
		assert_geq(c, 0);
		ret = countBt2Side(l, c);
		assert_lt(ret, this->_eh._bwtLen);
#ifndef NDEBUG
		if(_sanity && !overrideSanity) {
			// Make sure results match up with results from mapLFEx;
			// be sure to override sanity-checking in the callee, or we'll
			// have infinite recursion
			index_t arrs[] = { 0, 0, 0, 0 };
			mapLFEx(l, arrs, true);
			assert_eq(arrs[c], ret);
		}
#endif
		return ret;
	}

	/**
	 * Given top and bot loci, calculate counts of all four DNA chars up to
	 * those loci.  Also, update a set of tops and bots for the reverse
	 * index/direction using the idea from the bi-directional BWT paper.
	 */
	inline void mapBiLFEx(
		const SideLocus<index_t>& ltop,
		const SideLocus<index_t>& lbot,
		index_t *tops,
		index_t *bots,
		index_t *topsP, // topsP[0] = top
		index_t *botsP
		ASSERT_ONLY(, bool overrideSanity = false)
		) const
	{
#ifndef NDEBUG
		for(int i = 0; i < 4; i++) {
			assert_eq(0, tops[0]);  assert_eq(0, bots[0]);
		}
#endif
		countBt2SideEx(ltop, tops);
		countBt2SideEx(lbot, bots);
#ifndef NDEBUG
		if(_sanity && !overrideSanity) {
			// Make sure results match up with individual calls to mapLF;
			// be sure to override sanity-checking in the callee, or we'll
			// have infinite recursion
			assert_eq(mapLF(ltop, 0, true), tops[0]);
			assert_eq(mapLF(ltop, 1, true), tops[1]);
			assert_eq(mapLF(ltop, 2, true), tops[2]);
			assert_eq(mapLF(ltop, 3, true), tops[3]);
			assert_eq(mapLF(lbot, 0, true), bots[0]);
			assert_eq(mapLF(lbot, 1, true), bots[1]);
			assert_eq(mapLF(lbot, 2, true), bots[2]);
			assert_eq(mapLF(lbot, 3, true), bots[3]);
		}
#endif
		// bots[0..3] - tops[0..3] = # of ways to extend the suffix with an
		// A, C, G, T
		botsP[0] = topsP[0] + (bots[0] - tops[0]);
		topsP[1] = botsP[0];
		botsP[1] = topsP[1] + (bots[1] - tops[1]);
		topsP[2] = botsP[1];
		botsP[2] = topsP[2] + (bots[2] - tops[2]);
		topsP[3] = botsP[2];
		botsP[3] = topsP[3] + (bots[3] - tops[3]);
	}

	/**
	 * Given row and its locus information, proceed on the given character
	 * and return the next row, or all-fs if we can't proceed on that
	 * character.  Returns 0xffffffff if this row ends in $.
	 */
	inline index_t mapLF1(
		index_t row,       // starting row
		const SideLocus<index_t>& l, // locus for starting row
		int c               // character to proceed on
		ASSERT_ONLY(, bool overrideSanity = false)
		) const
	{
		if(rowL(l) != c || row == _zOff) return (index_t)OFF_MASK;
		index_t ret;
		assert_lt(c, 4);
		assert_geq(c, 0);
		ret = countBt2Side(l, c);
		assert_lt(ret, this->_eh._bwtLen);
#ifndef NDEBUG
		if(_sanity && !overrideSanity) {
			// Make sure results match up with results from mapLFEx;
			// be sure to override sanity-checking in the callee, or we'll
			// have infinite recursion
			index_t arrs[] = { 0, 0, 0, 0 };
			mapLFEx(l, arrs, true);
			assert_eq(arrs[c], ret);
		}
#endif
		return ret;
	}


	/**
	 * Given row and its locus information, set the row to LF(row) and
	 * return the character that was in the final column.
	 */
	inline int mapLF1(
		index_t& row,      // starting row
		const SideLocus<index_t>& l  // locus for starting row
		ASSERT_ONLY(, bool overrideSanity = false)
		) const
	{
		if(row == _zOff) return -1;
		int c = rowL(l);
		assert_range(0, 3, c);
		row = countBt2Side(l, c);
		assert_lt(row, this->_eh._bwtLen);
#ifndef NDEBUG
		if(_sanity && !overrideSanity) {
			// Make sure results match up with results from mapLFEx;
			// be sure to override sanity-checking in the callee, or we'll
			// have infinite recursion
			index_t arrs[] = { 0, 0, 0, 0 };
			mapLFEx(l, arrs, true);
			assert_eq(arrs[c], row);
		}
#endif
		return c;
	}

#ifndef NDEBUG
	/// Check that in-memory Ebwt is internally consistent with respect
	/// to given EbwtParams; assert if not
	bool inMemoryRepOk(const EbwtParams<index_t>& eh) const {
		assert_geq(_zEbwtBpOff, 0);
		assert_lt(_zEbwtBpOff, 4);
		assert_lt(_zEbwtByteOff, eh._ebwtTotSz);
		assert_lt(_zOff, eh._bwtLen);
		assert_geq(_nFrag, _nPat);
		return true;
	}

	/// Check that in-memory Ebwt is internally consistent; assert if
	/// not
	bool inMemoryRepOk() const {
		return repOk(_eh);
	}

	/// Check that Ebwt is internally consistent with respect to given
	/// EbwtParams; assert if not
	bool repOk(const EbwtParams<index_t>& eh) const {
		assert(_eh.repOk());
		if(isInMemory()) {
			return inMemoryRepOk(eh);
		}
		return true;
	}

	/// Check that Ebwt is internally consistent; assert if not
	bool repOk() const {
		return repOk(_eh);
	}
#endif
    
    string get_uid(const string& header) {
        size_t ndelim = 0;
        size_t j = 0;
        for(; j < header.length(); j++) {
            if(header[j] == ' ') break;
            if(header[j] == '|') ndelim++;
            if(ndelim == 2) break;
        }
        string uid = header.substr(0, j);
        return uid;
    }
    
    uint64_t get_tid(const string& stid) {
        uint64_t tid1 = 0, tid2 = 0;
        bool sawDot = false;
        for(size_t i = 0; i < stid.length(); i++) {
            if(stid[i] == '.') {
                sawDot = true;
                continue;
            }
            uint32_t num = stid[i] - '0';
            if(sawDot) {
                tid2 = tid2 * 10 + num;
            } else {
                tid1 = tid1 * 10 + num;
            }
        }
        return tid1 | (tid2 << 32);
    }

	bool       _toBigEndian;
	int32_t    _overrideOffRate;
	bool       _verbose;
	bool       _passMemExc;
	bool       _sanity;
	bool       fw_;     // true iff this is a forward index
	FILE    *_in1;    // input fd for primary index file
	FILE    *_in2;    // input fd for secondary index file
	string     _in1Str; // filename for primary index file
	string     _in2Str; // filename for secondary index file
    string     _inSaStr;  // filename for suffix-array file
    string     _inBwtStr; // filename for BWT file
	index_t    _zOff;
	index_t    _zEbwtByteOff;
	int        _zEbwtBpOff;
	index_t    _nPat;  /// number of reference texts
	index_t    _nFrag; /// number of fragments
	APtrWrap<index_t> _plen;
	APtrWrap<index_t> _rstarts; // starting offset of fragments / text indexes
	// _fchr, _ftab and _eftab are expected to be relatively small
	// (usually < 1MB, perhaps a few MB if _fchr is particularly large
	// - like, say, 11).  For this reason, we don't bother with writing
	// them to disk through separate output streams; we
	APtrWrap<index_t> _fchr;
	APtrWrap<index_t> _ftab;
	APtrWrap<index_t> _eftab; // "extended" entries for _ftab
	// _offs may be extremely large.  E.g. for DNA w/ offRate=4 (one
	// offset every 16 rows), the total size of _offs is the same as
	// the total size of the input sequence
    bool _offw;
	APtrWrap<uint16_t> _offs;  // offset when # of seq. is less than 2^16
    APtrWrap<uint32_t> _offsw; // offset when # of seq. is more than 2^16
	// _ebwt is the Extended Burrows-Wheeler Transform itself, and thus
	// is at least as large as the input sequence.
	APtrWrap<uint8_t> _ebwt;
	bool       _useMm;        /// use memory-mapped files to hold the index
	bool       useShmem_;     /// use shared memory to hold large parts of the index
	EList<string> _refnames; /// names of the reference sequences
	char *mmFile1_;
	char *mmFile2_;
    
    bool _compressed; // compressed index?
	bool packed_;
    
    EList<pair<string, uint64_t> >   _uid_to_tid; // table that converts uid to tid
    TaxonomyTree _tree;
    TaxonomyPathTable                _paths;
    std::map<uint64_t, string>       _name;
    std::map<uint64_t, uint64_t>     _size;
    std::map<uint64_t, uint32_t> _saGenomeBoundary ; // indicate the corresponding SA coordinate corresponds to the start of a ref genome. 
    uint64_t _lastGenomeBoundary ;
    uint64_t _boundaryCheckShift ;
    EBitList<128> _boundaryCheck ;

	EbwtParams<index_t> _eh;

	static const uint64_t default_bmax = OFF_MASK;
	static const uint64_t default_bmaxMultSqrt = OFF_MASK;
	static const uint64_t default_bmaxDivN = 4;
	static const int      default_dcv = 1024;
	static const bool     default_noDc = false;
	static const bool     default_useBlockwise = true;
	static const uint32_t default_seed = 0;
#ifdef BOWTIE_64BIT_INDEX
	static const int      default_lineRate = 7;
#else
	static const int      default_lineRate = 6;
#endif
	static const int      default_offRate = 5;
	static const int      default_offRatePlus = 0;
	static const int      default_ftabChars = 10;
	static const bool     default_bigEndian = false;

protected:

	ostream& log() const {
		return cout; // TODO: turn this into a parameter
	}

	/// Print a verbose message and flush (flushing is helpful for
	/// debugging)
	void verbose(const string& s) const {
		if(this->verbose()) {
			this->log() << s.c_str();
			this->log().flush();
		}
	}
};

/**
 * Read reference names from an input stream 'in' for an Ebwt primary
 * file and store them in 'refnames'.
 */
template <typename index_t>
void readEbwtRefnames(istream& in, EList<string>& refnames);

/**
 * Read reference names from the index with basename 'in' and store
 * them in 'refnames'.
 */
template <typename index_t>
void readEbwtRefnames(const string& instr, EList<string>& refnames);

/**
 * Read just enough of the Ebwt's header to determine whether it's
 * colorspace.
 */
bool readEbwtColor(const string& instr);

/**
 * Read just enough of the Ebwt's header to determine whether it's
 * entirely reversed.
 */
bool readEntireReverse(const string& instr);

///////////////////////////////////////////////////////////////////////
//
// Functions for building Ebwts
//
///////////////////////////////////////////////////////////////////////

/**
 * Join several text strings together in a way that's compatible with
 * the text-chunking scheme dictated by chunkRate parameter.
 *
 * The non-static member Ebwt::join additionally builds auxilliary
 * arrays that maintain a mapping between chunks in the joined string
 * and the original text strings.
 */
template <typename index_t>
template <typename TStr>
TStr Ebwt<index_t>::join(EList<TStr>& l, uint32_t seed) {
	RandomSource rand; // reproducible given same seed
	rand.init(seed);
	TStr ret;
	index_t guessLen = 0;
	for(index_t i = 0; i < l.size(); i++) {
		guessLen += length(l[i]);
	}
	ret.resize(guessLen);
	index_t off = 0;
	for(size_t i = 0; i < l.size(); i++) {
		TStr& s = l[i];
		assert_gt(s.length(), 0);
		for(size_t j = 0; j < s.size(); j++) {
			ret.set(s[j], off++);
		}
	}
	return ret;
}

/**
 * Join several text strings together in a way that's compatible with
 * the text-chunking scheme dictated by chunkRate parameter.
 *
 * The non-static member Ebwt::join additionally builds auxilliary
 * arrays that maintain a mapping between chunks in the joined string
 * and the original text strings.
 */
template <typename index_t>
template <typename TStr>
TStr Ebwt<index_t>::join(EList<FileBuf*>& l,
                EList<RefRecord>& szs,
                index_t sztot,
                const RefReadInParams& refparams,
                uint32_t seed)
{
	RandomSource rand; // reproducible given same seed
	rand.init(seed);
	RefReadInParams rpcp = refparams;
	TStr ret;
	index_t guessLen = sztot;
	ret.resize(guessLen);
	ASSERT_ONLY(index_t szsi = 0);
	TIndexOffU dstoff = 0;
	for(index_t i = 0; i < l.size(); i++) {
		// For each sequence we can pull out of istream l[i]...
		assert(!l[i]->eof());
		bool first = true;
		while(!l[i]->eof()) {
			RefRecord rec = fastaRefReadAppend(*l[i], first, ret, dstoff, rpcp);
			first = false;
			index_t bases = (index_t)rec.len;
			assert_eq(rec.off, szs[szsi].off);
			assert_eq(rec.len, szs[szsi].len);
			assert_eq(rec.first, szs[szsi].first);
			ASSERT_ONLY(szsi++);
			if(bases == 0) continue;
		}
	}
	return ret;
}

/**
 * Join several text strings together according to the text-chunking
 * scheme specified in the EbwtParams.  Ebwt fields calculated in this
 * function are written directly to disk.
 *
 * It is assumed, but not required, that the header values have already
 * been written to 'out1' before this function is called.
 *
 * The static member Ebwt::join just returns a joined version of a
 * list of strings without building any of the auxilliary arrays.
 */
template <typename index_t>
template <typename TStr>
void Ebwt<index_t>::joinToDisk(
	EList<FileBuf*>& l,
	EList<RefRecord>& szs,
	index_t sztot,
	const RefReadInParams& refparams,
	TStr& ret,
	ostream& out1,
	ostream& out2)
{
	RefReadInParams rpcp = refparams;
	assert_gt(szs.size(), 0);
	assert_gt(l.size(), 0);
	assert_gt(sztot, 0);
	// Not every fragment represents a distinct sequence - many
	// fragments may correspond to a single sequence.  Count the
	// number of sequences here by counting the number of "first"
	// fragments.
	this->_nPat = 0;
	this->_nFrag = 0;
	for(index_t i = 0; i < szs.size(); i++) {
		if(szs[i].len > 0) this->_nFrag++;
		if(szs[i].first && szs[i].len > 0) this->_nPat++;
	}
	assert_gt(this->_nPat, 0);
	assert_geq(this->_nFrag, this->_nPat);
	_rstarts.reset();
	writeIndex<index_t>(out1, this->_nPat, this->toBe());
	// Allocate plen[]
	try {
		this->_plen.init(new index_t[this->_nPat], this->_nPat);
	} catch(bad_alloc& e) {
		cerr << "Out of memory allocating plen[] in Ebwt::join()"
		     << " at " << __FILE__ << ":" << __LINE__ << endl;
		throw e;
	}
	// For each pattern, set plen
	int npat = -1;
	for(index_t i = 0; i < szs.size(); i++) {
		if(szs[i].first && szs[i].len > 0) {
			if(npat >= 0) {
				writeIndex<index_t>(out1, this->plen()[npat], this->toBe());
			}
			npat++;
			this->plen()[npat] = (szs[i].len + szs[i].off);
		} else {
			this->plen()[npat] += (szs[i].len + szs[i].off);
		}
	}
	assert_eq((index_t)npat, this->_nPat-1);
	writeIndex<index_t>(out1, this->plen()[npat], this->toBe());
	// Write the number of fragments
	writeIndex<index_t>(out1, this->_nFrag, this->toBe());
	index_t seqsRead = 0;
	ASSERT_ONLY(index_t szsi = 0);
	ASSERT_ONLY(index_t entsWritten = 0);
	index_t dstoff = 0;
	// For each filebuf
	for(unsigned int i = 0; i < l.size(); i++) {
		assert(!l[i]->eof());
		bool first = true;
		index_t patoff = 0;
		// For each *fragment* (not necessary an entire sequence) we
		// can pull out of istream l[i]...
		while(!l[i]->eof()) {
			string name;
			// Push a new name onto our vector
			_refnames.push_back("");
			RefRecord rec = fastaRefReadAppend(
				*l[i], first, ret, dstoff, rpcp, &_refnames.back());
			first = false;
			index_t bases = rec.len;
			if(rec.first && rec.len > 0) {
				if(_refnames.back().length() == 0) {
					// If name was empty, replace with an index
					ostringstream stm;
					stm << seqsRead;
					_refnames.back() = stm.str();
				}
			} else {
				// This record didn't actually start a new sequence so
				// no need to add a name
				//assert_eq(0, _refnames.back().length());
				_refnames.pop_back();
			}
			assert_lt(szsi, szs.size());
			assert_eq(rec.off, szs[szsi].off);
			assert_eq(rec.len, szs[szsi].len);
			assert_eq(rec.first, szs[szsi].first);
			assert(rec.first || rec.off > 0);
			ASSERT_ONLY(szsi++);
			// Increment seqsRead if this is the first fragment
			if(rec.first && rec.len > 0) seqsRead++;
			if(bases == 0) continue;
			assert_leq(bases, this->plen()[seqsRead-1]);
			// Reset the patoff if this is the first fragment
			if(rec.first) patoff = 0;
			patoff += rec.off; // add fragment's offset from end of last frag.
			// Adjust rpcps
			//index_t seq = seqsRead-1;
			ASSERT_ONLY(entsWritten++);
			// This is where rstarts elements are written to the output stream
			//writeU32(out1, oldRetLen, this->toBe()); // offset from beginning of joined string
			//writeU32(out1, seq,       this->toBe()); // sequence id
			//writeU32(out1, patoff,    this->toBe()); // offset into sequence
			patoff += (index_t)bases;
		}
		assert_gt(szsi, 0);
		l[i]->reset();
		assert(!l[i]->eof());
#ifndef NDEBUG
		int c = l[i]->get();
		assert_eq('>', c);
		assert(!l[i]->eof());
		l[i]->reset();
		assert(!l[i]->eof());
#endif
	}
	assert_eq(entsWritten, this->_nFrag);
}

/**
 * Build an Ebwt from a string 's' and its suffix array 'sa' (which
 * might actually be a suffix array *builder* that builds blocks of the
 * array on demand).  The bulk of the Ebwt, i.e. the ebwt and offs
 * arrays, is written directly to disk.  This is by design: keeping
 * those arrays in memory needlessly increases the footprint of the
 * building process.  Instead, we prefer to build the Ebwt directly
 * "to disk" and then read it back into memory later as necessary.
 *
 * It is assumed that the header values and join-related values (nPat,
 * plen) have already been written to 'out1' before this function
 * is called.  When this function is finished, it will have
 * additionally written ebwt, zOff, fchr, ftab and eftab to the primary
 * file and offs to the secondary file.
 *
 * Assume DNA/RNA/any alphabet with 4 or fewer elements.
 * Assume occ array entries are 32 bits each.
 *
 * @param sa            the suffix array to convert to a Ebwt
 * @param s             the original string
 * @param out
 */
template <typename index_t>
template <typename TStr>
void Ebwt<index_t>::buildToDisk(
                                InorderBlockwiseSA<TStr>& sa,
                                const TStr& s,
                                ostream& out1,
                                ostream& out2,
                                ostream* saOut,
                                ostream* bwtOut,
				ostream& out4,
                                const EList<RefRecord>& szs,
                                int kmer_size)
{
	const EbwtParams<index_t>& eh = this->_eh;

	assert(eh.repOk());
	assert_eq(s.length()+1, sa.size());
	assert_eq(s.length(), eh._len);
	assert_gt(eh._lineRate, 3);
	assert(sa.suffixItrIsReset());

	index_t  len = eh._len;
	index_t  ftabLen = eh._ftabLen;
	index_t  sideSz = eh._sideSz;
	index_t  ebwtTotSz = eh._ebwtTotSz;
	index_t  fchr[] = {0, 0, 0, 0, 0};
	EList<index_t> ftab(EBWT_CAT);
	index_t  zOff = (index_t)OFF_MASK;

	// Save # of occurrences of each character as we walk along the bwt
	index_t occ[4] = {0, 0, 0, 0};
	index_t occSave[4] = {0, 0, 0, 0};
    
	// Record rows that should "absorb" adjacent rows in the ftab.
	// The absorbed rows represent suffixes shorter than the ftabChars
	// cutoff.
	uint8_t absorbCnt = 0;
	EList<uint8_t> absorbFtab(EBWT_CAT);
	try {
		VMSG_NL("Allocating ftab, absorbFtab");
		ftab.resize(ftabLen);
		ftab.fillZero();
		absorbFtab.resize(ftabLen);
		absorbFtab.fillZero();
	} catch(bad_alloc &e) {
		cerr << "Out of memory allocating ftab[] or absorbFtab[] "
		     << "in Ebwt::buildToDisk() at " << __FILE__ << ":"
		     << __LINE__ << endl;
		throw e;
	}

	// Allocate the side buffer; holds a single side as its being
	// constructed and then written to disk.  Reused across all sides.
#ifdef SIXTY4_FORMAT
	EList<uint64_t> ebwtSide(EBWT_CAT);
#else
	EList<uint8_t> ebwtSide(EBWT_CAT);
#endif
	try {
#ifdef SIXTY4_FORMAT
		ebwtSide.resize(sideSz >> 3);
#else
		ebwtSide.resize(sideSz);
#endif
	} catch(bad_alloc &e) {
		cerr << "Out of memory allocating ebwtSide[] in "
		     << "Ebwt::buildToDisk() at " << __FILE__ << ":"
		     << __LINE__ << endl;
		throw e;
	}

	// Points to the base offset within ebwt for the side currently
	// being written
	index_t side = 0;

	// Whether we're assembling a forward or a reverse bucket
	bool fw;
	int sideCur = 0;
	fw = true;

	// Have we skipped the '$' in the last column yet?
	ASSERT_ONLY(bool dollarSkipped = false);

	index_t si = 0;   // string offset (chars)
	ASSERT_ONLY(index_t lastSufInt = 0);
	ASSERT_ONLY(bool inSA = true); // true iff saI still points inside suffix
	                               // array (as opposed to the padding at the
	                               // end)
	// Iterate over packed bwt bytes
	VMSG_NL("Entering Ebwt loop");
	ASSERT_ONLY(index_t beforeEbwtOff = (index_t)out1.tellp());
    
    // First integer in the suffix-array output file is the length of the
    // array, including $
    if(saOut != NULL) {
        // Write length word
        writeIndex<index_t>(*saOut, len+1, this->toBe());
    }
    
    // First integer in the BWT output file is the length of BWT(T), including $
    if(bwtOut != NULL) {
        // Write length word
        writeIndex<index_t>(*bwtOut, len+1, this->toBe());
    }
    
    // Count the number of distinct k-mers if kmer_size is non-zero
    EList<uint8_t> kmer;
    EList<size_t> kmer_count;
    EList<size_t> acc_szs;
    if(kmer_size > 0) {
        kmer.resize(kmer_size);
        kmer.fillZero();
	kmer_count.resize(kmer_size);
	kmer_count.fillZero();
        for(size_t i = 0; i < szs.size(); i++) {
            if(szs[i].first) {
                size_t size = 0;
                if(acc_szs.size() > 0) {
                    size = acc_szs.back();
                }
                acc_szs.expand();
                acc_szs.back() = size;
            }
            acc_szs.back() += szs[i].len;
        }
    }

	// Add by Li. Collect the boundary information for each reference sequence.
	EBitList<128> refOffsetMark( len + 1 ) ;
	std::map<uint64_t, uint32_t> refOffsetMap ;
	std::map<uint64_t, uint32_t> saBoundaryMap ;
	const uint64_t refOverlap = 11 ; // the last refOverlap bp of a ref sequence will be classified to the next ref sequence.
	{
		index_t refOffset = 0 ;
		size_t refNameIdx = 0 ;
		for (size_t i = 0 ; i < szs.size() ; ++i )
		{
			//cout<<szs[i].off<<" "<<szs[i].len<<" "<<szs[i].first<<endl ;
			if ( szs[i].first && szs[i].len > 0 )
			{
				//cout<<_refnames[ refNameIdx ]<<" "<<refOffset<<endl ;
				uint64_t o = refOffset - refOverlap ;
				if ( refOffset < refOverlap )
					o = 0 ;
				refOffsetMark.set( o ) ;
				/*std::string uid = get_uid( _refnames[ refNameIdx ] ) ;
				if ( uid_to_tid.find( uid ) != uid_to_tid.end() )
					refOffsetMap[o] = uid_to_tid[ uid ] ;
				else
					refOffsetMap[o] = 0 ;*/

				refOffsetMap[o] = refNameIdx ;
				++refNameIdx ;
			}

			refOffset += szs[i].len ;
		}

	}
	writeIndex<int32_t>(out4, 1, this->toBe()); // endianness sentinel


	while(side < ebwtTotSz) {
		// Sanity-check our cursor into the side buffer
		assert_geq(sideCur, 0);
		assert_lt(sideCur, (int)eh._sideBwtSz);
		assert_eq(0, side % sideSz); // 'side' must be on side boundary
		ebwtSide[sideCur] = 0; // clear
		assert_lt(side + sideCur, ebwtTotSz);
		// Iterate over bit-pairs in the si'th character of the BWT
#ifdef SIXTY4_FORMAT
		for(int bpi = 0; bpi < 32; bpi++, si++)
#else
			for(int bpi = 0; bpi < 4; bpi++, si++)
#endif
			{
				int bwtChar;
				bool count = true;
				if(si <= len) {
					// Still in the SA; extract the bwtChar
					index_t saElt = sa.nextSuffix();
					if(saOut != NULL) {
						writeIndex<index_t>(*saOut, saElt, this->toBe());
					}

					//if ( refOffsetMap.find( saElt ) != refOffsetMap.end() )
					if ( refOffsetMark.test( saElt ) )
					{
						saBoundaryMap[ si ] = refOffsetMap[ saElt ] ;
						//cout<<saElt<<" "<<uid<<endl ;
					}

					// (that might have triggered sa to calc next suf block)
					if(saElt == 0) {
						// Don't add the '$' in the last column to the BWT
						// transform; we can't encode a $ (only A C T or G)
						// and counting it as, say, an A, will mess up the
						// LR mapping
						bwtChar = 0; count = false;
						ASSERT_ONLY(dollarSkipped = true);
						zOff = si; // remember the SA row that
						// corresponds to the 0th suffix
					} else {
						bwtChar = (int)(s[saElt-1]);
						assert_lt(bwtChar, 4);
						// Update the fchr
						fchr[bwtChar]++;
					}
					// Update ftab
					if((len-saElt) >= (index_t)eh._ftabChars) {
						// Turn the first ftabChars characters of the
						// suffix into an integer index into ftab.  The
						// leftmost (lowest index) character of the suffix
						// goes in the most significant bit pair if the
						// integer.
						index_t sufInt = 0;
						for(int i = 0; i < eh._ftabChars; i++) {
							sufInt <<= 2;
							assert_lt((index_t)i, len-saElt);
							sufInt |= (unsigned char)(s[saElt+i]);
						}
						// Assert that this prefix-of-suffix is greater
						// than or equal to the last one (true b/c the
						// suffix array is sorted)
#ifndef NDEBUG
						if(lastSufInt > 0) assert_geq(sufInt, lastSufInt);
						lastSufInt = sufInt;
#endif
						// Update ftab
						assert_lt(sufInt+1, ftabLen);
						ftab[sufInt+1]++;
						if(absorbCnt > 0) {
							// Absorb all short suffixes since the last
							// transition into this transition
							absorbFtab[sufInt] = absorbCnt;
							absorbCnt = 0;
						}
					} else {
						// Otherwise if suffix is fewer than ftabChars
						// characters long, then add it to the 'absorbCnt';
						// it will be absorbed into the next transition
						assert_lt(absorbCnt, 255);
						absorbCnt++;
					}
					// Update the number of distinct k-mers
					if(kmer_size > 0) {
						size_t idx = acc_szs.bsearchLoBound(saElt);
						assert_lt(idx, acc_szs.size());
						bool different = false;
						for(size_t k = 0; k < (size_t)kmer_size; k++) {
							if((acc_szs[idx]-saElt) > k) {
								uint8_t bp = s[saElt+k];
								if(kmer[k] != bp || kmer_count[k] <= 0 || different) {
									kmer_count[k]++;
									different = true;
								}
								kmer[k] = bp;
							}
							else {
								break;
							}
						}
					}
					// Suffix array offset boundary? - update offset array
					if((si & eh._offMask) == si) {
						assert_lt((si >> eh._offRate), eh._offsLen);
						// Write offsets directly to the secondary output
						// stream, thereby avoiding keeping them in memory
						index_t tidx = 0, toff = 0, tlen = 0;
						bool straddled2 = false;
						if(saElt > 0) {
							index_t adjustSaElt = saElt + refOverlap ;
							if ( adjustSaElt >= len )
								adjustSaElt = saElt ;
							if ( adjustSaElt >= len )
								--adjustSaElt ;
								
							joinedToTextOff(
									0,
									adjustSaElt,
									tidx,
									toff,
									tlen,
									false,        // reject straddlers?
									straddled2);  // straddled?
						}
						if(this->_offw) {
							writeIndex<uint32_t>(out2, (uint32_t)tidx, this->toBe());
						} else {
							assert_lt(tidx, std::numeric_limits<uint16_t>::max());
							writeIndex<uint16_t>(out2, (uint16_t)tidx, this->toBe());
						}
					}
				} else {
					// Strayed off the end of the SA, now we're just
					// padding out a bucket
#ifndef NDEBUG
					if(inSA) {
						// Assert that we wrote all the characters in the
						// string before now
						assert_eq(si, len+1);
						inSA = false;
					}
#endif
					// 'A' used for padding; important that padding be
					// counted in the occ[] array
					bwtChar = 0;
				}
				if(count) occ[bwtChar]++;
				// Append BWT char to bwt section of current side
				if(fw) {
					// Forward bucket: fill from least to most
#ifdef SIXTY4_FORMAT
					ebwtSide[sideCur] |= ((uint64_t)bwtChar << (bpi << 1));
					if(bwtChar > 0) assert_gt(ebwtSide[sideCur], 0);
#else
					pack_2b_in_8b(bwtChar, ebwtSide[sideCur], bpi);
					assert_eq((ebwtSide[sideCur] >> (bpi*2)) & 3, bwtChar);
#endif
				} else {
					// Backward bucket: fill from most to least
#ifdef SIXTY4_FORMAT
					ebwtSide[sideCur] |= ((uint64_t)bwtChar << ((31 - bpi) << 1));
					if(bwtChar > 0) assert_gt(ebwtSide[sideCur], 0);
#else
					pack_2b_in_8b(bwtChar, ebwtSide[sideCur], 3-bpi);
					assert_eq((ebwtSide[sideCur] >> ((3-bpi)*2)) & 3, bwtChar);
#endif
				}
			} // end loop over bit-pairs
		assert_eq(dollarSkipped ? 3 : 0, (occ[0] + occ[1] + occ[2] + occ[3]) & 3);
#ifdef SIXTY4_FORMAT
		assert_eq(0, si & 31);
#else
		assert_eq(0, si & 3);
#endif

		sideCur++;
		if(sideCur == (int)eh._sideBwtSz) {
			sideCur = 0;
			index_t *uside = reinterpret_cast<index_t*>(ebwtSide.ptr());
			// Write 'A', 'C', 'G' and 'T' tallies
			side += sideSz;
			assert_leq(side, eh._ebwtTotSz);
			uside[(sideSz / sizeof(index_t))-4] = endianizeIndex(occSave[0], this->toBe());
			uside[(sideSz / sizeof(index_t))-3] = endianizeIndex(occSave[1], this->toBe());
			uside[(sideSz / sizeof(index_t))-2] = endianizeIndex(occSave[2], this->toBe());
			uside[(sideSz / sizeof(index_t))-1] = endianizeIndex(occSave[3], this->toBe());
			occSave[0] = occ[0];
			occSave[1] = occ[1];
			occSave[2] = occ[2];
			occSave[3] = occ[3];
			// Write backward side to primary file
			out1.write((const char *)ebwtSide.ptr(), sideSz);
		}
	}
	VMSG_NL("Exited Ebwt loop");
	assert_neq(zOff, (index_t)OFF_MASK);
	if(absorbCnt > 0) {
		// Absorb any trailing, as-yet-unabsorbed short suffixes into
		// the last element of ftab
		absorbFtab[ftabLen-1] = absorbCnt;
	}
	// Assert that our loop counter got incremented right to the end
	assert_eq(side, eh._ebwtTotSz);
	// Assert that we wrote the expected amount to out1
	assert_eq(((index_t)out1.tellp() - beforeEbwtOff), eh._ebwtTotSz);
	// assert that the last thing we did was write a forward bucket
	
	// Denote the end for the information of boundary of reference genomes.
	writeIndex<uint64_t>( out4, saBoundaryMap.size(), this->toBe() ) ;
	for ( std::map<uint64_t, uint32_t>::iterator it = saBoundaryMap.begin() ; it != saBoundaryMap.end() ; ++it )
	{
		writeIndex<uint64_t>( out4, it->first, this->toBe() ) ;
		writeIndex<uint32_t>( out4, it->second, this->toBe() ) ;
	}

	//
	// Write zOff to primary stream
	//
	writeIndex<index_t>(out1, zOff, this->toBe());

	//
	// Finish building fchr
	//
	// Exclusive prefix sum on fchr
	for(int i = 1; i < 4; i++) {
		fchr[i] += fchr[i-1];
	}
	assert_eq(fchr[3], len);
	// Shift everybody up by one
	for(int i = 4; i >= 1; i--) {
		fchr[i] = fchr[i-1];
	}
	fchr[0] = 0;
	if(_verbose) {
		for(int i = 0; i < 5; i++)
			cout << "fchr[" << "ACGT$"[i] << "]: " << fchr[i] << endl;
	}
	// Write fchr to primary file
	for(int i = 0; i < 5; i++) {
		writeIndex<index_t>(out1, fchr[i], this->toBe());
	}

	//
	// Finish building ftab and build eftab
	//
	// Prefix sum on ftable
	index_t eftabLen = 0;
	assert_eq(0, absorbFtab[0]);
	for(index_t i = 1; i < ftabLen; i++) {
		if(absorbFtab[i] > 0) eftabLen += 2;
	}
	assert_leq(eftabLen, (index_t)eh._ftabChars*2);
	eftabLen = eh._ftabChars*2;
	EList<index_t> eftab(EBWT_CAT);
	try {
		eftab.resize(eftabLen);
		eftab.fillZero();
	} catch(bad_alloc &e) {
		cerr << "Out of memory allocating eftab[] "
		     << "in Ebwt::buildToDisk() at " << __FILE__ << ":"
		     << __LINE__ << endl;
		throw e;
	}
	index_t eftabCur = 0;
	for(index_t i = 1; i < ftabLen; i++) {
		index_t lo = ftab[i] + Ebwt<index_t>::ftabHi(ftab.ptr(), eftab.ptr(), len, ftabLen, eftabLen, i-1);
		if(absorbFtab[i] > 0) {
			// Skip a number of short pattern indicated by absorbFtab[i]
			index_t hi = lo + absorbFtab[i];
			assert_lt(eftabCur*2+1, eftabLen);
			eftab[eftabCur*2] = lo;
			eftab[eftabCur*2+1] = hi;
			ftab[i] = (eftabCur++) ^ (index_t)OFF_MASK; // insert pointer into eftab
			assert_eq(lo, Ebwt<index_t>::ftabLo(ftab.ptr(), eftab.ptr(), len, ftabLen, eftabLen, i));
			assert_eq(hi, Ebwt<index_t>::ftabHi(ftab.ptr(), eftab.ptr(), len, ftabLen, eftabLen, i));
		} else {
			ftab[i] = lo;
		}
	}
	assert_eq(Ebwt<index_t>::ftabHi(ftab.ptr(), eftab.ptr(), len, ftabLen, eftabLen, ftabLen-1), len+1);
	// Write ftab to primary file
	for(index_t i = 0; i < ftabLen; i++) {
		writeIndex<index_t>(out1, ftab[i], this->toBe());
	}
	// Write eftab to primary file
	for(index_t i = 0; i < eftabLen; i++) {
		writeIndex<index_t>(out1, eftab[i], this->toBe());
	}
    
    if(kmer_size > 0) {
      for(size_t k = 0; k < (size_t)kmer_size; k++) {
        cerr << "Number of distinct " << k+1 << "-mers is " << kmer_count[k] << endl;
      }
    }
    
    

	// Note: if you'd like to sanity-check the Ebwt, you'll have to
	// read it back into memory first!
	assert(!isInMemory());
	VMSG_NL("Exiting Ebwt::buildToDisk()");
}

/**
 * Try to find the Bowtie index specified by the user.  First try the
 * exact path given by the user.  Then try the user-provided string
 * appended onto the path of the "indexes" subdirectory below this
 * executable, then try the provided string appended onto
 * "$BOWTIE2_INDEXES/".
 */
string adjustEbwtBase(const string& cmdline,
					  const string& ebwtFileBase,
					  bool verbose);


extern string gLastIOErrMsg;

/* Checks whether a call to read() failed or not. */
inline bool is_read_err(int fdesc, ssize_t ret, size_t count) {
    if (ret < 0) {
        std::stringstream sstm;
        sstm << "ERRNO: " << errno << " ERR Msg:" << strerror(errno) << std::endl;
		gLastIOErrMsg = sstm.str();
        return true;
    }
    return false;
}

/* Checks whether a call to fread() failed or not. */
inline bool is_fread_err(FILE* file_hd, size_t ret, size_t count) {
    if (ferror(file_hd)) {
        gLastIOErrMsg = "Error Reading File!";
        return true;
    }
    return false;
}


///////////////////////////////////////////////////////////////////////
//
// Functions for searching Ebwts
// (But most of them are defined in the header)
//
///////////////////////////////////////////////////////////////////////

/**
 * Take an offset into the joined text and translate it into the
 * reference of the index it falls on, the offset into the reference,
 * and the length of the reference.  Use a binary search through the
 * sorted list of reference fragment ranges t
 */
template <typename index_t>
void Ebwt<index_t>::joinedToTextOff(
									index_t qlen,
									index_t off,
									index_t& tidx,
									index_t& textoff,
									index_t& tlen,
									bool rejectStraddle,
									bool& straddled) const
{
	assert(rstarts() != NULL); // must have loaded rstarts
	index_t top = 0;
	index_t bot = _nFrag; // 1 greater than largest addressable element
	index_t elt = (index_t)OFF_MASK;
	// Begin binary search
	while(true) {
		ASSERT_ONLY(index_t oldelt = elt);
		elt = top + ((bot - top) >> 1);
		assert_neq(oldelt, elt); // must have made progress
		index_t lower = rstarts()[elt*3];
		index_t upper;
		if(elt == _nFrag-1) {
			upper = _eh._len;
		} else {
			upper = rstarts()[((elt+1)*3)];
		}
		assert_gt(upper, lower);
		index_t fraglen = upper - lower;
		if(lower <= off) {
			if(upper > off) { // not last element, but it's within
				// off is in this range; check if it falls off
				if(off + qlen > upper) {
					straddled = true;
					if(rejectStraddle) {
						// it falls off; signal no-go and return
						tidx = (index_t)OFF_MASK;
						assert_lt(elt, _nFrag-1);
						return;
					}
				}
				// This is the correct text idx whether the index is
				// forward or reverse
				tidx = rstarts()[(elt*3)+1];
				assert_lt(tidx, this->_nPat);
				assert_leq(fraglen, this->plen()[tidx]);
				// it doesn't fall off; now calculate textoff.
				// Initially it's the number of characters that precede
				// the alignment in the fragment
				index_t fragoff = off - rstarts()[(elt*3)];
				if(!this->fw_) {
					fragoff = fraglen - fragoff - 1;
					fragoff -= (qlen-1);
				}
				// Add the alignment's offset into the fragment
				// ('fragoff') to the fragment's offset within the text
				textoff = fragoff + rstarts()[(elt*3)+2];
				assert_lt(textoff, this->plen()[tidx]);
				break; // done with binary search
			} else {
				// 'off' belongs somewhere in the region between elt
				// and bot
				top = elt;
			}
		} else {
			// 'off' belongs somewhere in the region between top and
			// elt
			bot = elt;
		}
		// continue with binary search
	}
	tlen = this->plen()[tidx];
}

/**
 * Walk 'steps' steps to the left and return the row arrived at.  If we
 * walk through the dollar sign, return 0xffffffff.
 */
template <typename index_t>
index_t Ebwt<index_t>::walkLeft(index_t row, index_t steps) const {
#ifndef NDEBUG
    if(this->_offw) {
        assert(offsw() != NULL);
    } else {
        assert(offs() != NULL);
    }
#endif
	assert_neq((index_t)OFF_MASK, row);
	SideLocus<index_t> l;
	if(steps > 0) l.initFromRow(row, _eh, ebwt());
	while(steps > 0) {
		if(row == _zOff) return (index_t)OFF_MASK;
		index_t newrow = this->mapLF(l ASSERT_ONLY(, false));
		assert_neq((index_t)OFF_MASK, newrow);
		assert_neq(newrow, row);
		row = newrow;
		steps--;
		if(steps > 0) l.initFromRow(row, _eh, ebwt());
	}
	return row;
}

/**
 * Resolve the reference offset of the BW element 'elt'.
 */
template <typename index_t>
index_t Ebwt<index_t>::getOffset(index_t row) const {
#ifndef NDEBUG
	if(this->_offw) {
		assert(offsw() != NULL);
	} else {
		assert(offs() != NULL);
	}
#endif
	assert_neq((index_t)OFF_MASK, row);
	if(row == _zOff) return 0;
	if((row & _eh._offMask) == row) {
		if(this->_offw) {
			return this->offsw()[row >> _eh._offRate];
		} else {
			return this->offs()[row >> _eh._offRate];
		}
	}
	if ( saGenomeBoundaryHas( (uint64_t)row ) )
	{
		return saGenomeBoundaryVal( (uint64_t)row ) ;
	}

	index_t jumps = 0;
	SideLocus<index_t> l;
	l.initFromRow(row, _eh, ebwt());
	while(true) {
		index_t newrow = this->mapLF(l ASSERT_ONLY(, false));
		jumps++;
		assert_neq((index_t)OFF_MASK, newrow);
		assert_neq(newrow, row);
		row = newrow;
		if(row == _zOff) {
			return jumps;
		} else if((row & _eh._offMask) == row) {
			if(this->_offw) {
				return jumps + this->offsw()[row >> _eh._offRate];
			} else {
				return jumps + this->offs()[row >> _eh._offRate];
			}
		}
		l.initFromRow(row, _eh, ebwt());
	}
}

/**
 * Resolve the reference offset of the BW element 'elt' such that
 * the offset returned is at the right-hand side of the forward
 * reference substring involved in the hit.
 */
template <typename index_t>
index_t Ebwt<index_t>::getOffset(
								 index_t elt,
								 bool fw,
								 index_t hitlen) const
{
	index_t off = getOffset(elt);
	assert_neq((index_t)OFF_MASK, off);
	if(!fw) {
		assert_lt(off, _eh._len);
		off = _eh._len - off - 1;
		assert_geq(off, hitlen-1);
		off -= (hitlen-1);
		assert_lt(off, _eh._len);
	}
	return off;
}

/**
 * Returns true iff the index contains the given string (exactly).  The given
 * string must contain only unambiguous characters.  TODO: support ambiguous
 * characters in 'str'.
 */
template <typename index_t>
bool Ebwt<index_t>::contains(
							 const BTDnaString& str,
							 index_t *otop,
							 index_t *obot) const
{
	assert(isInMemory());
	SideLocus<index_t> tloc, bloc;
	if(str.empty()) {
		if(otop != NULL && obot != NULL) *otop = *obot = 0;
		return true;
	}
	int c = str[str.length()-1];
	assert_range(0, 4, c);
	index_t top = 0, bot = 0;
	if(c < 4) {
		top = fchr()[c];
		bot = fchr()[c+1];
	} else {
		bool set = false;
		for(int i = 0; i < 4; i++) {
			if(fchr()[c] < fchr()[c+1]) {
				if(set) {
					return false;
				} else {
					set = true;
					top = fchr()[c];
					bot = fchr()[c+1];
				}
			}
		}
	}
	assert_geq(bot, top);
	tloc.initFromRow(top, eh(), ebwt());
	bloc.initFromRow(bot, eh(), ebwt());
	ASSERT_ONLY(index_t lastDiff = bot - top);
	for(int64_t i = (int64_t)str.length()-2; i >= 0; i--) {
		c = str[i];
		assert_range(0, 4, c);
		if(c <= 3) {
			top = mapLF(tloc, c);
			bot = mapLF(bloc, c);
		} else {
			index_t sz = bot - top;
			int c1 = mapLF1(top, tloc ASSERT_ONLY(, false));
			bot = mapLF(bloc, c1);
			assert_leq(bot - top, sz);
			if(bot - top < sz) {
				// Encountered an N and could not proceed through it because
				// there was more than one possible nucleotide we could replace
				// it with
				return false;
			}
		}
		assert_geq(bot, top);
		assert_leq(bot-top, lastDiff);
		ASSERT_ONLY(lastDiff = bot-top);
		if(i > 0) {
			tloc.initFromRow(top, eh(), ebwt());
			bloc.initFromRow(bot, eh(), ebwt());
		}
	}
	if(otop != NULL && obot != NULL) {
		*otop = top; *obot = bot;
	}
	return bot > top;
}

#endif /*EBWT_H_*/
