/********************************************************************************************************
 * QRNA - Comparative analysis of biological sequences 
 *         with pair hidden Markov models, pair stochastic context-free
 *        grammars, and probabilistic evolutionary  models.
 *       
 * Version 2.0.0 (JUN 2003)
 *
 * Copyright (C) 2000-2003 Howard Hughes Medical Institute/Washington University School of Medicine
 * All Rights Reserved
 * 
 *     This source code is distributed under the terms of the
 *     GNU General Public License. See the files COPYING and LICENSE
 *     for details.
 ***********************************************************************************************************/


/* seqio.c
 *
 * E. Rivas [St. Louis]
 * 
 * 9 april 1999.
 * 
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>
#include <time.h>

#include "funcs.h"
#include "globals.h"
#include "squid.h"
#include "structs.h"
#include "version.h"

#ifdef MEMDEBUG
#include "dbmalloc.h"
#endif


/* Function: AdjustCT()
 * Date:     ER, Thu Dec  5 11:17:23 CST 2002 [St. Louis]
 *
 * Purpose:  given a ct() array compare to the actual sequence it corresponds to.
 *           Eliminates basepairs in which one or both positions are gaps.
 *
 *
 * Args:    seq, ct
 *
 * Returns:  void.
 */
void
AdjustCT(FILE *ofp, SQINFO *sqinfo, char *seq, int *ct)
{
  int len;
  int i;

  len = sqinfo->len;

  for (i = 0; i < len; i++)
    if( ct[i] != -1 && (seq[i] == '.' || seq[ct[i]] == '.') )
      ct[i] = -1;
    
}

void
AliIdStats(int *iseqX, int *iseqY, int j, int d, int win, int slide, double *ret_id_win_mean, double *ret_id_win_sd)
{
  int     i;
  int     k, kmod;
  int     mid;
  int     range;
  int     len;
  int     len_total;
  int    *id;
  int    *empty;
  int     add_empty;
  int     add_id;
  int     num = 0;
  double  mean;
  double  id_win_mean = 0.0;
  double  id_win_sd   = 0.0;

  len_total = d+1;
  i = j - d;

  /* dimension matrices id and empty
   */
  id    = (int *) MallocOrDie (sizeof(int) * win * win);
  empty = (int *) MallocOrDie (sizeof(int) * win * win);
  for (kmod = 0; kmod < win; kmod ++) 
  for (mid = 0; mid < win; mid++) {
    id[kmod*win+mid]    = 0;
    empty[kmod*win+mid] = 0;
  }

  for (k = 0; k < len_total; k ++) {
  
    range = IndexForwardWindow (len_total, win, slide, k);

    kmod  = k%win;

    add_empty = 0;
    add_id    = 0;
    if      (iseqX[i+k] == 4 && iseqY[i+k] == 4) add_empty = 1;
    else if (iseqX[i+k] == iseqY[i+k])           add_id    = 1;

    for (mid = 0; mid <= range; mid++) {
      if (mid == 0) empty[kmod*win+mid] = add_empty;
      else          empty[kmod*win+mid] = empty[((kmod-1<0)?kmod-1+win:kmod-1)*win+mid-1] + add_empty; /* common gaps do not count */
      
      if (mid == 0) id[kmod*win+mid] = add_id;
      else          id[kmod*win+mid] = id[((kmod-1<0)?kmod-1+win:kmod-1)*win+mid-1] + add_id;
    }

    if (IsFullWindow(0, len_total, win, slide, k)) {
      num ++;

      len = range + 1 - empty[kmod*win+range];

      mean = (double)id[kmod*win+range]/(double)len*100.0;
      id_win_mean += mean;
      id_win_sd   += mean*mean;
      /*printf("num %d k %d kmod %d id %d len %d mean %f %f sd %f\n", num, k, kmod, id[kmod*win+range], len, mean, id_win_mean, id_win_sd);*/
    }
    
  }
  
  /* calculate mean and standard deviation
   */
  if (num > 0) id_win_mean /= num;
  else         id_win_mean = 0.0;

  id_win_sd   -= id_win_mean*id_win_mean*num;
  if (num > 1) id_win_sd   /= (num-1); 
  else         id_win_sd   = 0.0;
  id_win_sd = sqrt(id_win_sd);
  
  *ret_id_win_mean  = id_win_mean;
  *ret_id_win_sd    = id_win_sd;

  free(id);
  free(empty);
}

/* Function: AlignStat()
 * 
 * ER, Mon Jun  5 10:14:38 CDT 2000 [STL]
 * 
 * Purpose:  Gather some info about a file of alignments
 *           
 * Args:     ofp     - output file
 *           sqinfoX - info about seqX
 *           seqX    - seqX in int form
 *           sqinfoY - info about seqY
 *           seqY    - seqY in int form
 *           len     - length of alignment
 *           id      - number of identities (added)
 *           gap     - number of gaps       (added)
 *           mut     - number of mutations  (added)
 *           
 * Return:   (void)
 */
void
AlignStat(FILE *ofp, SQINFO *sqinfoX, int *seqX, SQINFO *sqinfoY, int *seqY, int len, 
	  double *ret_id, double *ret_gap, double *ret_mut)
{
  int i;
  int x, y;
  int L;
  int id, gap, mut;

  id  = 0;
  gap = 0;
  mut = 0;

  L = len;

  for (i = 0; i < len; i++) {
    x = seqX[i];
    y = seqY[i];
    
    if      (x == 4 && y < 4)  gap ++;
    else if (y == 4 && x < 4)  gap ++; 
    else if (x == 4 && y == 4) L --;  
    else if (x == y)           id  ++; 
    else                       mut ++; 
  }

  *ret_id  += (L>0)? (float)id/L  : 0.0;
  *ret_gap += (L>0)? (float)gap/L : 0.0;
  *ret_mut += (L>0)? (float)mut/L : 0.0;
}

/* Function: AllocBaseFreq()
 *
 * ER, Tue Jun 22 13:37:47 CDT 1999 [STL]
 * 
 * Purpose:  allocates freq[4]
 *
 * Args:     
 *
 * Return:   basefreq                
 */
double *
AllocBaseFreq(void)
{
  double *basefreq;
  int          nt;

  basefreq = (double *) MallocOrDie(sizeof(double) * 4);


  /* Initialize all probabilities to zero
   */
  for (nt = 0; nt < 4; nt++)
    basefreq[nt] = 0.;

  return basefreq;
}

/* Function: AllocCharSeqs()
 * Date:     ER, Mon Sep 13 17:02:03 CDT 1999 [St. Louis]
 *
 * Purpose:  allocate arrays for seqX and seqY
 *
 * Args:     Lmax  --  length of the max alignmet
 *           seqX  --  sequence X
 *           seqY  --  sequence Y
 *
 * Returns:  void
 *           seqX and seqY  are allocated.
 *
 */
void
AllocCharSeqs(int Lmax, char **ret_seqX, char **ret_seqY)
{
  char *seqX;
  char *seqY;
  int   nt;

  seqX = (char *) MallocOrDie(sizeof(char) * Lmax);
  seqY = (char *) MallocOrDie(sizeof(char) * Lmax);

  /* Initialize to null
   */
  for (nt = 0; nt < Lmax; nt++) {
    seqX[nt] = '.';
    seqY[nt] = '.';
  }

  *ret_seqX = seqX;
  *ret_seqY = seqY;
}

/* Function: AllocCharSeq()
 * Date:     ER, Tue Mar 21 14:30:36 CST 2000 [St. Louis]
 *
 * Purpose:  allocate arrays for seq
 *
 * Args:     Lmax  --  length of the max alignmet
 *           seq   --  sequence X
 *
 * Returns:  void
 *           seq is allocated.
 *
 */
void
AllocCharSeq(int Lmax, char **ret_seq)
{
  char *seq;
  int   nt;

  seq = (char *) MallocOrDie(sizeof(char) * Lmax);

  /* Initialize to null
   */
  for (nt = 0; nt < Lmax; nt++) 
    seq[nt] = '\0';

  *ret_seq = seq;
}

/* Function: AllocIntSeqs()
 * Date:     ER, Mon Sep 13 17:02:03 CDT 1999 [St. Louis]
 *
 * Purpose:  allocate arrays for seqX and seqY
 *
 * Args:     Lmax   --  length of the max alignmet
 *           iseqX  --  sequence X
 *           iseqY  --  sequence Y
 *
 * Returns:  void
 *           iseqX and iseqY  are allocated.
 *
 */
void
AllocIntSeqs(int Lmax, int **ret_iseqX, int **ret_iseqY)
{
  int *iseqX;
  int *iseqY;
  int  nt;

  iseqX = (int *) MallocOrDie(sizeof(int) * Lmax);
  iseqY = (int *) MallocOrDie(sizeof(int) * Lmax);

  /* Initialize to null
   */
  for (nt = 0; nt < Lmax; nt++) {
    iseqX[nt] = -1;
    iseqY[nt] = -1;
  }

  *ret_iseqX = iseqX;
  *ret_iseqY = iseqY;
}
void
AllocIntSeq(int Lmax, int **ret_iseq)
{
  int *iseq;
  int  nt;

  iseq = (int *) MallocOrDie(sizeof(int) * Lmax);

  /* Initialize to null
   */
  for (nt = 0; nt < Lmax; nt++) 
    iseq[nt] = -1;

  *ret_iseq = iseq;
}

void
AllocCt(int Lmax, int **ret_ct)
{
  int *ct;
  int  nt;

  ct = (int *) MallocOrDie(sizeof(int) * Lmax);

  /* Initialize to null
   */
  for (nt = 0; nt < Lmax; nt++) 
    ct[nt] = -1;

  *ret_ct = ct;
}

/* Function: ArrangeSequence()
 * Date:     ER, Mon 12 April 1999 [St. Louis]
 *
 * Purpose:  given a seq:  convert to upper case;
 *                         remove gaps (and modify sqinfo.len accordingly);
 *                         remove degeneracy; 
 *                         convert to 'ACGT' only; 
 *                         if ss, calculate the 'ct' format;
 *
 * Args:     sqinfo, format, seq, ret_ct, verbose
 *
 * Returns:  void.
 */
void
ArrangeSequence(FILE *ofp, SQINFO *sqinfo, int format, char *seq, int **ret_ct, 
		int allow_pseudoknots, int verbose)
{
  int *ct; 

  if (format == kPearson && (sqinfo->flags & SQINFO_SS)) free(sqinfo->ss);
  else if (format == kSquid || format == kSelex) {
    if(!(sqinfo->flags & SQINFO_SS))
      Warn("  Sequence %s has no structure given", sqinfo->name);
    else if (!VerifyKHS(sqinfo->name, sqinfo->ss, sqinfo->len, FALSE))
      Warn("  Sequence %s structure fails validation VerifyKHS() ", sqinfo->name);
    else if (! KHS2ct(sqinfo->ss, sqinfo->len, allow_pseudoknots, &ct)) 
      Warn("  Sequence %s structure fails validation KHS2ct()", sqinfo->name);
  }
  
  s2upper(seq);
  StripDegeneracy(seq, &sqinfo->len);
  
  if (verbose) {
    if (format == kSquid || format == kSelex) WriteSeqkn(ofp, seq, sqinfo, ct);
    if (format == kPearson) fprintf(ofp, "seq = %s\n%s\n\n", sqinfo->name, seq);
  }
  
  *ret_ct = ct;
}


/* Function: ArrangeGapSequence()
 * Date:     ER, Fri Jul 23 19:52:43 CDT 1999 [St. Louis]
 *
 * Purpose:  given a seq:  convert to upper case;
 *                         remove degeneracy; 
 *                         convert to 'ACGT' only; 
 *                         if ss, calculate the 'ct' format;
 *
 * Args:     sqinfo, format, seq, ret_ct, verbose
 *
 * Returns:  void.
 */
void
ArrangeGapSequence(FILE *ofp, SQINFO *sqinfo, int format, char *seq, int **ret_ct, 
		   int allow_pseudoknots, int verbose)
{
  int *ct; 

  if (format == kPearson && (sqinfo->flags & SQINFO_SS)) free(sqinfo->ss);
  else if (format == kSquid || format == kSelex) {
    if(!(sqinfo->flags & SQINFO_SS))
      Warn("  Sequence %s has no structure given", sqinfo->name);
    if (!VerifyKHS(sqinfo->name, sqinfo->ss, sqinfo->len, TRUE))
      Die("  Sequence %s structure fails validation VerifyKHS()", sqinfo->name);
    if (!KHS2ct(sqinfo->ss, sqinfo->len, allow_pseudoknots, &ct)) 
      Die("  Sequence %s structure fails validation KHS2ct()", sqinfo->name);
    AdjustCT(ofp, sqinfo, seq, ct);
  }
  
  s2upper(seq);
  StripSomeDegeneracy(seq, sqinfo->len);
  
  if (verbose) {
    PrintSeq(ofp, sqinfo, sqinfo->len, seq);
    if (format == kSquid || format == kSelex) WriteSeqkn(ofp, seq, sqinfo, ct);
  }
 
  *ret_ct = ct;
}


/* Function: BaseComp() 
 *
 * ER, Tue Jun 22 13:31:03 CDT 1999 [STL]
 *
 * Purpose:  to calculate the base composition of a sequence in freqs.
 *           freq[] has to be allocated first.
 *           
 * Args:     s            - sequence composition (A, C, G, U)
 *           j            - end position                 
 *           d            - length of sequence             
 *           ret_basefreq - vector of frequencies              
 *                  
 * Return:  (void) basefreq are filled up.                
 */
void
BaseComp(FILE *ofp, int *s, int j, int d, double *basefreq)
{
  int    nt;
  int    mid;
  double sumnt   = 0.;
  double sumfreq = 0.;

  for (nt = 0; nt < 4; nt++) basefreq[nt] = 0.0;

  for (mid = 0; mid <= d; mid++) {
    if      (s[j-mid] <  4) { basefreq[s[j-mid]] += 1.; sumnt += 1.; }
    else if (s[j-mid] == 4)           ; /* do not count gaps */
    else Die("unrecognized character (%d, pos = %d) in sequence\n", s[j-mid], j-mid);
  }

  if (sumnt > 0.)
    for (nt = 0; nt < 4; nt++) {
      basefreq[nt] /= sumnt;             /* normalize            */  
      sumfreq += basefreq[nt];           /* add for sumcheck     */
    }

  if (sumfreq != 0.0 && (sumfreq > 1.01 || sumfreq < 0.99)) 
    Die ("BaseComp(): freqs of nts do not add up to one (sumfreq = %f)", sumfreq);
}

/* Function: BaseCount() 
 *
 * ER, Sat Sep 23 11:29:32 CDT 2000 [STL]
 *
 * Purpose:  to calculate the base composition of a sequence in counts.
 *           basecount[] has to be allocated first.
 *           
 * Args:     s              - sequence composition (0,1,2,3,4)
 *           j              - end position                 
 *           d              - length of sequence             
 *           ret_basecount - vector of counts              
 *                  
 * Return:  (void) basecount are filled up.                
 */
void
BaseCount(FILE *ofp, int *s, int j, int d, double *basecount)
{
  int mid;

  for (mid = 0; mid <= d; mid++) {
    if      (s[j-mid] <  4) basecount[s[j-mid]] += 1.;
    else if (s[j-mid] == 4)                                 ; /* do not count gaps */
    else Die("unrecognized character (%d, pos = %d) in sequence\n", s[j-mid], j-mid);
  }
}

void
Count2Freq(FILE *ofp, int dim, double *basecount)
{
  int    i;
  double sumcount = 0.;
  double sumfreq  = 0.;

  for (i = 0; i < dim; i++) 
    sumcount += basecount[i];

  for (i = 0; i < dim; i++) {
    basecount[i] /= sumcount; /* normalize            */
    sumfreq += basecount[i];  /* add for sumcheck     */
  }

  if (sumfreq > 1.01 || sumfreq < 0.99) 
    Die ("freqs of nts do not add up to one (sumfreq = %f)", sumfreq);

}

void
BaseCompChar(FILE *ofp, char *s, int j, int d, double *basefreq)
{
  int    mid;
  double sumnt   = 0.;

  for (mid = 0; mid <= d; mid++) {
    sumnt += 1.;
    if (s[j-mid] == 'A')      basefreq[0] += 1.;
    else if (s[j-mid] == 'C') basefreq[1] += 1.;
    else if (s[j-mid] == 'G') basefreq[2] += 1.;
    else if (s[j-mid] == 'U') basefreq[3] += 1.;
    else Die("unrecognized character (%d, pos = %c) in sequence\n", s[j-mid], j-mid);
  }

}

/* Function: BaseCompNorm() 
 *
 * ER, Tue Jun 22 13:31:03 CDT 1999 [STL]
 *
 * Purpose:  converts nts counts of a sequence to frequencies
 *           
 * Args:     nbases       - total number of counts           
 *           ret_basefreq - vector of frequencies              
 *                  
 * Return:  (void) basefreq are filled up.                
 */
void
BaseCompNorm(int nbases, double *basefreq)
{
  int   nt;
  double sum = 0.;

  for (nt = 0; nt < 4; nt++) {
    basefreq[nt] /= (double)nbases;    /* normalize            */
    sum += basefreq[nt];              /* add for sumcheck     */
  }

  if (sum > 1.01 || sum < 0.99) 
    Die ("freqs of nts do not add up to one (sum = %f)", sum);
}

void
CalculatePairs(FILE *outf, char *seq, SQINFO sqinfo, int *ss)
{
  int   i = 0;
  int   x, y;
  char  symi, symj;
  int   isymi, isymj;
  int   pairs[16];
  int   tpairs = 0;
  int   aupairs, cgpairs, gupairs;

  
  for (x = 0; x < 16; x++)
    pairs[x] = 0;
  
  for (i = 0; i < sqinfo.len; i++) 
    if (ss[i] != -1 && i < ss[i]) {
      symi = seq[i];
      symj = seq[ss[i]];

      switch (symi) {
      case 'A': isymi = 0; break;
      case 'C': isymi = 1; break;
      case 'G': isymi = 2; break;
      case 'U': isymi = 3; break;
      case '-': isymi = 4; break; /* gaps */
      case '.': isymi = 4; break; /* gaps */
      default: Warn("CalculatePairs: bad sym (%c), seq: %s", symi, sqinfo.name);
      }
      
      switch (symj) {
      case 'A': isymj = 0; break;
      case 'C': isymj = 1; break;
      case 'G': isymj = 2; break;
      case 'U': isymj = 3; break;
      case '-': isymj = 4; break; /* gaps */
      case '.': isymj = 4; break; /* gaps */
      default: Warn("CalculatePairs: bad sym (%c), seq: %s", symj, sqinfo.name);
      }
      
      if (isymi < 4 && isymj < 4)
	pairs[isymi+4*isymj] += 1;
    }
  
  for (x = 0; x < 16; x++) 
    tpairs += pairs[x];
  
  aupairs = pairs[12] + pairs[3];
  cgpairs = pairs[6]  + pairs[9];
  gupairs = pairs[11] + pairs[14];

  fprintf(outf, "\nPairs of sequence %s: %d\n\n", sqinfo.name, tpairs);
  fprintf(outf, "  | \t%d\t%d\t%d\t%d\n", 0, 1, 2, 3);
  fprintf(outf, "----------------------------------------\n");
  for (x = 0; x < 4; x++) {
    fprintf(outf, "%d | \t", x);
    for (y = 0; y < 4; y++)
      fprintf(outf, "%d\t", pairs[x+4*y]);
    fprintf(outf, "\n");
  }
  fprintf(outf, "----------------------------------------\n");
  fprintf(outf, "\n");

  fprintf(outf, "A-U/U-A Pairs: %4d\n", aupairs);
  fprintf(outf, "C-G/G-C Pairs: %4d\n", cgpairs);
  fprintf(outf, "U-G/G-U Pairs: %4d\n", gupairs);
  fprintf(outf, "other   Pairs: %4d\n", tpairs-aupairs-cgpairs-gupairs);
  fprintf(outf, "\n");
}

/* Function: CleanUpAli() 
 *
 * ER, Sun Mar 16 12:43:53 CST 2003 [STL]
 *
 * Purpose: given an ali_s structure remove spaces if they have not been used.
 *           
 * Args:     ali        
 *                  
 * Return:             
 */
int
CleanUpAliCt(struct ali_s *ali, int *ct, int j, int d)
{
  struct ali_s *ali_cp;
  int           len;
  int           i, mid;
  int           idx = 0;

  AllocAli(d+1, &ali_cp);

  PatternAli(d+1, ali_cp);

  i = j-d;

  for (mid = 0; mid <= d; mid++) {   
    printf("%c ", ali->charX[i+mid]);
  }
  printf ("\n");
  for (mid = 0; mid <= d; mid++) {   
    printf("%c ", ali->charY[i+mid]);
  }
  printf ("\n");
  for (mid = 0; mid <= d; mid++) {   
    printf("%d ", ct[i+mid]);
  }
  printf ("\n");

  for (mid = 0; mid <= d; mid++) {   
    ali_cp->charX[mid] = ali->charX[i+mid];
    ali_cp->charY[mid] = ali->charY[i+mid];
  }

  for (mid = 0; mid <= d; mid++) {   
    
    if  ((ali_cp->charX[mid] != '.' || ali_cp->charY[mid] != '.')                   ||
	 (ali_cp->charX[mid] == '.' && ali_cp->charY[mid] == '.' && ct[i+mid] != -1)  )  {
      ali->charX[i+idx] = ali_cp->charX[mid];
      ali->charY[i+idx] = ali_cp->charY[mid];

      ct[i+idx]   = ct[i+mid]; 
      ct[ct[i+mid]] = i+idx;
      
      idx ++; 
    }
    else {
      ali->charX[i+idx] = '.';
      ali->charY[i+idx] = '.';
      ct[i+idx] = -1;
    }
  }
  
  len = idx;

  for (mid = len; mid <= d; mid++) {   
      ali->charX[i+mid] = '.';
      ali->charY[i+mid] = '.';
      ct[i+mid] = -1;
  }

  for (mid = 0; mid <= d; mid++) {   
    printf("%c ", ali->charX[i+mid]);
  }
  printf ("\n");
  for (mid = 0; mid <= d; mid++) {   
    printf("%c ", ali->charY[i+mid]);
  }
  printf ("\n");
  for (mid = 0; mid <= d; mid++) {   
    printf("%d ", ct[i+mid]);
  }
  printf ("\n");

  FreeAli(ali_cp);

  return len;
}

int
CleanUpCharSeqsCt(char *charX, char *charY, int *ct, int j, int d)
{
  char *cpcharX;
  char *cpcharY;
  int   verbose;
  int   len;
  int   i, mid;
  int  idx = 0;

  verbose = FALSE;
  AllocCharSeqs(d+1, &cpcharX, &cpcharY);

  i = j-d;

  if (verbose) {
    for (mid = 0; mid <= d; mid++) {   
      printf("%c ", charX[i+mid]);
    }
    printf ("\n");
    for (mid = 0; mid <= d; mid++) {   
      printf("%c ", charY[i+mid]);
    }
    printf ("\n");
    for (mid = 0; mid <= d; mid++) {   
      printf("%d ", ct[i+mid]);
    }
    printf ("\n");
  }
  
  for (mid = 0; mid <= d; mid++) {   
    cpcharX[mid] = charX[i+mid];
    cpcharY[mid] = charY[i+mid];
  }
  
  for (mid = 0; mid <= d; mid++) {   
    
    if  ((cpcharX[mid] != '.' || cpcharY[mid] != '.')                   ||
	 (cpcharX[mid] == '.' && cpcharY[mid] == '.' && ct[i+mid] != -1)  )  {
      charX[i+idx] = cpcharX[mid];
      charY[i+idx] = cpcharY[mid];

      if (ct[i+mid] != -1) {
	ct[i+idx]     = ct[i+mid]; 
	ct[ct[i+mid]] = i+idx;
      }
      else ct[i+idx] = -1;
      
      idx ++; 
    }
  }
  
  len = idx;

  for (mid = len; mid <= d; mid++) {   
      charX[i+mid] = '.';
      charY[i+mid] = '.';
      ct[i+mid]    = -1;
  }

  if (verbose) {
    for (mid = 0; mid <= d; mid++) {   
      printf("%c ", charX[i+mid]);
    }
    printf ("\n");
    for (mid = 0; mid <= d; mid++) {   
      printf("%c ", charY[i+mid]);
    }
    printf ("\n");
    for (mid = 0; mid <= d; mid++) {   
      printf("%d ", ct[i+mid]);
    }
    printf ("\n");
  }
  
  free(cpcharX);
  free(cpcharY);

  return len;
}

/* Function: CleanUpSeqs() 
 *
 * ER, Sun Mar 16 12:43:53 CST 2003 [STL]
 *
 * Purpose: given an ali_s structure remove spaces if they have not been used.
 *           
 * Args:     ali        
 *                  
 * Return:             
 */
void
CleanUpSeqs(int *iseqX, int *iseqY, int j, int d, int *ret_len)
{
  int  *iseqX_cp;
  int  *iseqY_cp;
  int   len;
  int   i, mid;
  int   idx = 0;

  AllocIntSeqs(d+1, &iseqX_cp, &iseqY_cp);

  PatternIntSeqs(d+1, iseqX_cp, iseqY_cp);

  i = j-d;

  for (mid = 0; mid <= d; mid++) {   
    iseqX_cp[mid] = iseqX[i+mid];
    iseqY_cp[mid] = iseqY[i+mid];
  }

  for (mid = 0; mid <= d; mid++) {   
    
    if  (!(iseqX_cp[mid] ==  4 && iseqY_cp[mid] ==  4) && 
	 !(iseqX_cp[mid] == -1 && iseqY_cp[mid] == -1)  )  {
      iseqX[i+idx] = iseqX_cp[mid];
      iseqY[i+idx] = iseqY_cp[mid];

       idx ++; 
    }
  }
  
  len = idx;

  for (mid = len; mid <= d; mid++) {   
      iseqX[i+mid] = -1;
      iseqY[i+mid] = -1;
  }


  free(iseqX_cp);
  free(iseqY_cp);

  *ret_len  = len;
}

/* Function: CheckMaxLength()
 * 
 * Date:     ER, Mon Sep 13 15:25:12 CDT 1999 [St. Louis]
 * 
 * Purpose:  look at a whole file of hists and extract the maximum length. 
 *           This is done to overcome SGI "feature" with malloc and free
 *
 * Args:     fp      - open sequence database file pointer          
 *           format  - format of the file (previously determined
 *                      by call to SeqfileFormat())
 *
 * Return:   void. 
 *
 */
void
CheckMaxLength(SQFILE *sqfp, int format, long int *ret_Lmax, int *ret_nseqs, long int maxlenhit)
{  
  char   *seq;
  SQINFO sqinfo;
  long int Lmax  = 0;
  int      nseqs = 0;

  while (ReadSeq(sqfp, format, &seq, &sqinfo)) {
    nseqs++;

    if (sqinfo.len > maxlenhit) Warn("Long seq # %d %s (%d)\n", nseqs, sqinfo.name, sqinfo.len); 
    else if (sqinfo.len > Lmax) Lmax = sqinfo.len;

    FreeSequence(seq, &sqinfo);
  }

  *ret_Lmax  = Lmax;
  *ret_nseqs = nseqs;

}

/* Function: DedigitizeSequence()
 * Date:     SRE, Tue Dec 16 10:39:19 1997 [StL]
 * 
 * Purpose:  Returns a 0..L-1 character string, converting the
 *           dsq back to the real alphabet.
 */
char *
DedigitizeSequence(int *dsq, int start, int L)
{
  char *seq;
  int i;

  if ((seq = (char *) malloc (L * sizeof(char))) == NULL)
    Die("malloc failed");

  for (i = 0; i < L; i++)
    if (dsq[i+start] == 0) seq[i] = 'A';
    else if (dsq[i+start] == 1) seq[i] = 'C';
    else if (dsq[i+start] == 2) seq[i] = 'G';
    else if (dsq[i+start] == 3) seq[i] = 'T';
    else if (dsq[i+start] == 4) seq[i] = '-';
    else Die ("ilegal digit symbol in sequence %d\n", dsq[i+start]);

  return seq;
}

/* Function: DigitizeSequence()
 * 
 * Purpose:  Internal representation of a sequence in HMMER is
 *           as a char array. 0..L-1 are the indices
 *           of seq symbols in the RNA alphabet (0,1,2,3).  
 *           
 *           Assumes that 'X', the fully degenerate character,
 *           is the last character in the allowed alphabet.
 *           
 * Args:     seq - sequence to be digitized (0..L-1)
 *           L   - length of sequence      
 *           
 * Return:   digitized sequence, dsq.
 *           dsq is allocated here and must be free'd by caller.
 */
int *
DigitizeSequence(char *seq, int L)
{
  int *dsq;
  int i;

  if ((dsq = (int *) malloc (L * sizeof(int))) == NULL)
    Die("malloc failed");

  for (i = 0; i < L; i++) 
    if (seq[i] == 'A') dsq[i] = 0;
    else if (seq[i] == 'C') dsq[i] = 1;
    else if (seq[i] == 'G') dsq[i] = 2;
    else if (seq[i] == 'U') dsq[i] = 3;
    else Die ("ilegal character in sequence\n");
  return dsq;
}

void 
DupIntAseq(int *seq, int *copyseq, int len)
{
  int  i;

  for (i = 0; i < len; i++)
    copyseq[i] = seq[i]; 
}

void
DupIntSeq(int *iseq, int *copyseq, int j, int d)
{
  int  mid;

  for (mid = 0; mid <= d; mid++) 
    copyseq[mid] = iseq[j-d+mid]; 
}



char *
DupSeq(char *seq, int j, int d)
{
  char *new;
  int   mid;

  if ((new = (char *) malloc ((d+1) * sizeof(char))) == NULL)
    Die("malloc failed");
  for (mid = 0; mid <= d; mid++)
    new[mid] = seq[j-d+mid];
  return new;
}

void
FillAliStruct(int *iseqX, int *iseqY, int j, int d, struct ali_s *ali)
{ 
  int           mid;

  for (mid = 0; mid <= d; mid++) {
     if      (iseqX[j-mid] == 4) { ali->charX[d-mid] = '.';}
    else if (iseqX[j-mid] == 3) { ali->charX[d-mid] = 'U'; }
    else if (iseqX[j-mid] == 2) { ali->charX[d-mid] = 'G'; }
    else if (iseqX[j-mid] == 1) { ali->charX[d-mid] = 'C'; }
    else if (iseqX[j-mid] == 0) { ali->charX[d-mid] = 'A'; }
    else                        { Die ("MakeAliStruct() error seqX[%d] =  %d\n", j-mid, iseqX[j-mid]); } 

    if      (iseqY[j-mid] == 4) { ali->charY[d-mid] = '.'; }
    else if (iseqY[j-mid] == 3) { ali->charY[d-mid] = 'U'; }
    else if (iseqY[j-mid] == 2) { ali->charY[d-mid] = 'G'; }
    else if (iseqY[j-mid] == 1) { ali->charY[d-mid] = 'C'; }
    else if (iseqY[j-mid] == 0) { ali->charY[d-mid] = 'A'; }
    else                        { Die ("MakeAliStruct() error seqY[%d] = %d\n", j-mid, iseqY[j-mid]); }                                 
  }
                             
}


/* Function: IntizeGapAsequence()
 *
 * Date: ER, Mon Sep 13 17:36:11 CDT 1999 [STL]
 * 
 * Purpose:  Convert a sequence of A,C,G,U,-into a sequence
 *           of integer indices 0,1,2,3,4
 *           
 * Args:     seq      - sequence (0..N-1) A,C,G,U, and -  allowed
 *           len      - length of seq
 *           ret_iseq - RETURN: integer-ized sequence
 *           
 * Return:   (void)
 *           ret_iseq is alloc'ed elsewhere, must be free'd by caller.
 */
void
IntizeGapAsequence(char *seq, int start, int len, int *iseq, int verbose)
{
  int iabs, i;

  for (i = 0; i < len; i++) {
    iabs = i + start;
   
    switch (seq[iabs]) 
      {
      case 'A': iseq[i] = 0; break;
      case 'a': iseq[i] = 0; break;
      case 'C': iseq[i] = 1; break;
      case 'c': iseq[i] = 1; break;
      case 'G': iseq[i] = 2; break;
      case 'g': iseq[i] = 2; break;
      case 'U': iseq[i] = 3; break;
      case 'u': iseq[i] = 3; break;
      case 'T': iseq[i] = 3; break;
      case 't': iseq[i] = 3; break;
      case '-': iseq[i] = 4; break;
      case '.': iseq[i] = 4; break;
      default: Die("unrecognized character (%c, pos = %d) in sequence\n", seq[iabs], iabs);
      }
  }

  if (verbose) {
    printf("\n");
    for (i = 0; i < len; i++) 
      printf("%d ", iseq[i]);
    printf("\n");
  }
}

/* Function: IntizeGapSequence()
 *
 * Date: ER, Fri Jul 23 19:49:25 CDT 1999 [STL]
 * 
 * Purpose:  Convert a sequence of A,C,G,U,-into a sequence
 *           of integer indices 0,1,2,3,4
 *           
 * Args:     seq      - sequence (0..N-1) A,C,G,U, and -  allowed
 *           len      - length of seq
 *           ret_iseq - RETURN: integer-ized sequence
 *           
 * Return:   (void)
 *           ret_iseq is alloc'ed here, must be free'd by caller.
 */
void
IntizeGapSequence(char *seq, int len, int **ret_iseq)
{
  int  i;
  int *iseq;

  if ((iseq = (int *) malloc (len * sizeof(int))) == NULL)
    Die("malloc failed");

  for (i = 0; i < len; i++)
    switch (seq[i]) 
      {
      case 'A': iseq[i] = 0; break;
      case 'C': iseq[i] = 1; break;
      case 'G': iseq[i] = 2; break;
      case 'U': iseq[i] = 3; break;
      case '-': iseq[i] = 4; break;
      case '.': iseq[i] = 4; break;
      default: Die("unrecognized character (%c, pos = %d) in sequence\n", seq[i], i);
      }

  *ret_iseq = iseq;
}



/* Function: IntizeSequence()
 * 
 * Purpose:  Convert a sequence of A,C,G,U into a sequence
 *           of integer indices 0,1,2,3
 *           
 * Args:     seq      - sequence (0..N-1) only A,C,G,U allowed
 *           len      - length of seq
 *           ret_iseq - RETURN: integer-ized sequence
 *           
 * Return:   (void)
 *           ret_iseq is alloc'ed here, must be free'd by caller.
 */
void
IntizeSequence(char *seq, int len, int **ret_iseq)
{
  int  i;
  int *iseq;

  if ((iseq = (int *) malloc (len * sizeof(int))) == NULL)
    Die("malloc failed");

  for (i = 0; i < len; i++)
    switch (seq[i]) 
      {
      case 'A': iseq[i] = 0; break;
      case 'C': iseq[i] = 1; break;
      case 'G': iseq[i] = 2; break;
      case 'U': iseq[i] = 3; break;
      default: Die("unrecognized character (%c, pos = %d) in sequence\n", seq[i], i);
      }

  *ret_iseq = iseq;
}

void
PatternIntSeqs(int Lmax, int *iseqX, int *iseqY)
{
  int  nt;
  
  /* Initialize to null
   */
  for (nt = 0; nt < Lmax; nt++) {
    iseqX[nt] = -1;
    iseqY[nt] = -1;
  }
}
void
PatternCharSeqs(int Lmax, char *seqX, char *seqY)
{
  int  nt;
  
  /* Initialize to null
   */
  for (nt = 0; nt < Lmax; nt++) {
    seqX[nt] = '.';
    seqY[nt] = '.';
  }
}
void
PatternCt(int Lmax, int *ct)
{
  int  nt;
  
  /* Initialize to null
   */
  for (nt = 0; nt < Lmax; nt++) 
    ct[nt] = -1;
}


int 
LenNoGaps(int *seq, int j, int d)
{
  int mid;
  int len = 0;


  for (mid = 0; mid <= d; mid++) 
    if      (seq[j-mid] == 4) {         }
    else if (seq[j-mid] <  4) { len ++; }
    else                      { Die ("LenNoGaps() error seqX[%d] =  %d\n", j-mid, seq[j-mid]); } 

  return len;
}



void
PercIdSeqs(int *iseqX, int *iseqY, int j, int d, double *ret_id, double *ret_gap, double *ret_mut)
{
  int    mid;
  int    len, id, gap, mut, empty;
  double fid, fgap, fmut;

  id  = 0;
  gap = 0;
  mut = 0;
  empty = 0;

  len = d+1;

  for (mid = 0; mid <= d; mid++) {   

    if      (iseqX[j-mid] == 4 && iseqY[j-mid] == 4) empty ++; /* common gaps do not count */
    else if (iseqX[j-mid] == 4 || iseqY[j-mid] == 4) gap ++;
    else if (iseqX[j-mid] == iseqY[j-mid])           id  ++;
    else                                             mut ++;
  }

  if (id+mut+gap+empty != len) Die ("PercIdSeqs() has problems\n");

  fid  = (len-empty > 0)? (double)id  / (double)(len-empty) : 0.0;
  fgap = (len-empty > 0)? (double)gap / (double)(len-empty) : 0.0;
  fmut = (len-empty > 0)? (double)mut / (double)(len-empty) : 0.0;

  *ret_id  = fid  * 100.0;
  *ret_gap = fgap * 100.0;
  *ret_mut = fmut * 100.0;
}

/* Function: PosNoGAps()
 * Date:     ER, Mon Mar 26 15:45:16 CST 2001 [St. Louis]
 *
 * Purpose:  given a position in a sequence with gaps, 
 *           calculates the position after removing the gaps.
 *
 * Args:     seq -- sequence with gaps
 *           pos -- position in seq
 *
 * Returns:  pos_nogap
 */
int
PosNoGaps(int *seq, int pos)
{
  int i;
  int *rp;      /* read pointer (where we're reading seq) */

  int ret_pos;
  int num_gap   = 0;
  int pos_nogap = 0;

  for (i = 0, rp = seq; i <= pos; i++, rp++)
    {
      if      (*rp <  4) { pos_nogap ++;             }
      else if (*rp == 4) { num_gap   ++;   continue; }  
      else Die("unrecognized symbol %d in sequence\n", *rp);
    }

  pos_nogap -= 1; /* starts at zero */

  /* a bit of paranoia 
   */
  if (pos_nogap + num_gap != pos) Die ("something went wrong removing gaps in PosNoGaps()");

  if (num_gap - 1 == pos) ret_pos = 0; /* the seq starts with gaps, and you have not got pass them yet */
  else                    ret_pos = pos_nogap;

  return ret_pos;
}

void
PrintAlign(FILE *ofp, SQINFO sqinfoX, SQINFO sqinfoY, int start, int L, struct ali_s *ali)
{
  int i, pos;
  int line = 50;
  int nlines = 0;

  pos = start;
  fprintf (ofp, "\n");

  while (L/(pos+1)) {
    fprintf (ofp, "\n\t");
    for (i = 0; i < line; i++)
      if (i == 0)        fprintf (ofp, "%15.15s %c", sqinfoX.name, ali->charX[pos+i]);
     else if (pos+i < L) fprintf (ofp, "%c", ali->charX[pos+i]);
    
    fprintf (ofp, "\n\t");
    for (i = 0; i < line; i++)
      if (i == 0)         fprintf (ofp, "%15.15s %c", sqinfoY.name, ali->charY[pos+i]);
      else if (pos+i < L) fprintf (ofp, "%c", ali->charY[pos+i]);
    
    
    nlines++;
    pos += line;
    fprintf (ofp, "\n");
  }
  
  fprintf (ofp, "\n");
}

void
PrintAlignSequences(FILE *ofp, SQINFO *sqinfoX, SQINFO *sqinfoY, char *string_name, double tfactor,
		    int start, int len, int *seqX, int *seqY, struct ali_s *ali)
{
  int     i;
  int     x, y;
  int     block = 0;
  int     num_block = 0;
  char   *string_nameX;
  char   *string_nameY;
  char   *charblock;
  char   *bl;
  double  id, gap, mut;

  charblock = (char *) MallocOrDie(sizeof(char) * 500);
  bl        = (char *) MallocOrDie(sizeof(char) * 50);
  snprintf (charblock, 500, "");
 
  string_nameX = string_name;
  string_nameY = string_name;

  strncpy(sqinfoX->name, string_nameX, 80);
  strncpy(sqinfoY->name, string_nameY, 80);

  PercIdSeqs(seqX, seqY, start+len-1, len-1, &id, &gap, &mut);  

  for (i = 0; i < len; i++) {
    if (ali->charX[i+start] != '*') block ++;
    else {  
      snprintf (bl, 50, "%d/", block+1);
      strcat(charblock, bl);
      num_block ++; block = 0; 
    }
    
  }
  snprintf (bl, 50, "%d/", block);
  strcat(charblock, bl);
  
  fprintf(ofp, "> %s(%d:%s)-(t=%.4f) [alignment ID = %.2f MUT = %.2f GAP = %.2f] \n", sqinfoX->name, len, charblock, tfactor, id, mut, gap);
  for (i = 0; i < len; i++) {
    x = seqX[i+start];

    if (x == -1) ;
    else if (x < 4) 
      fprintf(ofp, "%c",  DNAAlphabet[x]);
    else  if (x == 4)
      fprintf(ofp, ".");
    else Die("wrong nucleotide (%d, pos X:%d) in PrintAlignSequences()", x, i);
  }

  fprintf(ofp, "\n");
  fprintf(ofp, "> %s(%d:%s)-(t=%.4f) [alignment ID = %.2f MUT = %.2f GAP = %.2f] \n", sqinfoY->name, len, charblock, tfactor, id, mut, gap);
  for (i = 0; i < len; i++) {
    y = seqY[i+start];
    
    if (y == -1) ;
    else if (y < 4) 
      fprintf(ofp, "%c",  DNAAlphabet[y]);
    else  if (y == 4)
      fprintf(ofp, ".");
    else Die("wrong nucleotide (%d, pos Y:%d) in PrintAlignSequences()", y, i);
  }
  fprintf(ofp, "\n\n");

  free(bl);
  free(charblock);
}

void
PrintAlignIntSeqs(FILE *ofp, SQINFO *sqinfoX, SQINFO *sqinfoY, char *string_name, double tfactor,
		    int start, int len, int *seqX, int *seqY)
{
  int     i;
  int     x, y;
  char   *string_nameX;
  char   *string_nameY;
  double  id, gap, mut;

  string_nameX = string_name;
  string_nameY = string_name;

  strncpy(sqinfoX->name, string_nameX, 80);
  strncpy(sqinfoY->name, string_nameY, 80);

  PercIdSeqs(seqX, seqY, start+len-1, len-1, &id, &gap, &mut);  
  
  fprintf(ofp, "> %s(%d)-(t=%.4f) [alignment ID = %.2f MUT = %.2f GAP = %.2f] \n", sqinfoX->name, len, tfactor, id, mut, gap);
  for (i = 0; i < len; i++) {
    x = seqX[i+start];

    if (x == -1) ;
    else if (x < 4) 
      fprintf(ofp, "%c",  DNAAlphabet[x]);
    else  if (x == 4)
      fprintf(ofp, ".");
    else Die("wrong nucleotide (%d, pos X:%d) in PrintAlignSequences()", x, i);
  }

  fprintf(ofp, "\n");
  fprintf(ofp, "> %s(%d)-(t=%.4f) [alignment ID = %.2f MUT = %.2f GAP = %.2f] \n", sqinfoY->name, len, tfactor, id, mut, gap);
  for (i = 0; i < len; i++) {
    y = seqY[i+start];
    
    if (y == -1) ;
    else if (y < 4) 
      fprintf(ofp, "%c",  DNAAlphabet[y]);
    else  if (y == 4)
      fprintf(ofp, ".");
    else Die("wrong nucleotide (%d, pos Y:%d) in PrintAlignSequences()", y, i);
  }
  fprintf(ofp, "\n\n");
}

void
PrintNewAlign(FILE *ofp, SQINFO *sqinfoX, SQINFO *sqinfoY, int start, int L, struct ali_s *ali, 
	      char *string_name)
{
  int i, pos;
  int line = 50;
  int nlines = 0;
  char *string_nameX;
  char *string_nameY;

  string_nameX = string_name;
  string_nameY = string_name;

  strncpy(sqinfoX->name, string_nameX, 40);
  strncpy(sqinfoY->name, string_nameY, 40);


  pos = start;
  fprintf (ofp, "\n");

  while (L/(pos+1)) {
    fprintf (ofp, "\n\t");
    for (i = 0; i < line; i++)
      if (i == 0)        fprintf (ofp, "%15.15s  %c", sqinfoX->name, ali->charX[pos+i]);
     else if (pos+i < L+start) fprintf (ofp, "%c", ali->charX[pos+i]);
    
    fprintf (ofp, "\n\t");
    for (i = 0; i < line; i++)
      if (i == 0)         fprintf (ofp, "%15.15s  %c", sqinfoY->name, ali->charY[pos+i]);
      else if (pos+i < L+start) fprintf (ofp, "%c", ali->charY[pos+i]);
    
    
    nlines++;
    pos += line;
    fprintf (ofp, "\n");
  }
  
  fprintf (ofp, "\n");
}

void
PrintNewCharSeqs(FILE *ofp, SQINFO *sqinfoX, SQINFO *sqinfoY, int start, int L, char *charX, char *charY,
		 char *string_name)
{
  int i, pos;
  int line = 50;
  int nlines = 0;
  char *string_nameX;
  char *string_nameY;

  string_nameX = string_name;
  string_nameY = string_name;

  strncpy(sqinfoX->name, string_nameX, 40);
  strncpy(sqinfoY->name, string_nameY, 40);


  pos = start;
  fprintf (ofp, "\n");

  while (L/(pos+1)) {
    fprintf (ofp, "\n\t");
    for (i = 0; i < line; i++)
      if (i == 0)        fprintf (ofp, "%15.15s  %c", sqinfoX->name, charX[pos+i]);
     else if (pos+i < L+start) fprintf (ofp, "%c", charX[pos+i]);
    
    fprintf (ofp, "\n\t");
    for (i = 0; i < line; i++)
      if (i == 0)         fprintf (ofp, "%15.15s  %c", sqinfoY->name, charY[pos+i]);
      else if (pos+i < L+start) fprintf (ofp, "%c", charY[pos+i]);
    
    
    nlines++;
    pos += line;
    fprintf (ofp, "\n");
  }
  
  fprintf (ofp, "\n");
}

void
PrintCtAlign(FILE *ofp, SQINFO *sqinfoX, SQINFO *sqinfoY, int start, int L, struct ali_s *ali, 
	     int *ct, char *string_name)
{
  char *ss;
  int   i, j, pos;
  int   line = 50;
  int   nlines = 0;
  char *string_nameX;
  char *string_nameY;

  string_nameX = string_name;
  string_nameY = string_name;

  strncpy(sqinfoX->name, string_nameX, 40);
  strncpy(sqinfoY->name, string_nameY, 40);


 if ((ss = (char *) malloc (sizeof(char) * L)) == NULL)
    Die("malloc failed");
  memset(ss, '.', L);

  for (i = 0; i < L; i++)
    for (j = i+1; j < L; j++)
      if (ct[i+start] == j+start && ct[j+start] == i+start) 
	{
	  ss[i] = '>';
	  ss[j] = '<';
	}   
 
  pos = start;
  fprintf (ofp, "\n");

   while (L/(pos+1)) {
     fprintf (ofp, "\n\t");
     for (i = 0; i < line; i++)
       if (i == 0)         fprintf (ofp, "%15.15s  %c", sqinfoX->name, ss[pos+i-start]);
       else if (pos+i < L+start) fprintf (ofp, "%c", ss[pos+i]);
     
     fprintf (ofp, "\n\t");
     for (i = 0; i < line; i++)
       if (i == 0)        fprintf (ofp, "%15.15s  %c", sqinfoX->name, ali->charX[pos+i]);
       else if (pos+i < L+start) fprintf (ofp, "%c", ali->charX[pos+i]);
     
     fprintf (ofp, "\n\t");
     for (i = 0; i < line; i++)
       if (i == 0)         fprintf (ofp, "%15.15s  %c", sqinfoY->name, ali->charY[pos+i]);
       else if (pos+i < L+start) fprintf (ofp, "%c", ali->charY[pos+i]);
     
     
     nlines++;
     pos += line;
     fprintf (ofp, "\n");
   }
   
   fprintf (ofp, "\n");
   free(ss);
}


void
PrintCtCharSeqs(FILE *ofp, SQINFO *sqinfoX, SQINFO *sqinfoY, int start, int L, char *charX, char *charY, 
		int *ct, char *string_name)
{
  char *ss;
  int   i, j, pos;
  int   line = 50;
  int   nlines = 0;
  char *string_nameX;
  char *string_nameY;

  string_nameX = string_name;
  string_nameY = string_name;

  strncpy(sqinfoX->name, string_nameX, 40);
  strncpy(sqinfoY->name, string_nameY, 40);


 if ((ss = (char *) malloc (sizeof(char) * L)) == NULL)
    Die("malloc failed");
  memset(ss, '.', L);

  for (i = 0; i < L; i++)
    for (j = i+1; j < L; j++)
      if (ct[i+start] == j+start && ct[j+start] == i+start) 
	{
	  ss[i] = '>';
	  ss[j] = '<';
	}   
 
  pos = start;
  fprintf (ofp, "\n");

   while (L/(pos+1)) {
     fprintf (ofp, "\n\t");
     for (i = 0; i < line; i++)
       if (i == 0)         fprintf (ofp, "%15.15s  %c", sqinfoX->name, ss[pos+i-start]);
       else if (pos+i < L+start) fprintf (ofp, "%c", ss[pos+i]);
     
     fprintf (ofp, "\n\t");
     for (i = 0; i < line; i++)
       if (i == 0)        fprintf (ofp, "%15.15s  %c", sqinfoX->name, charX[pos+i]);
       else if (pos+i < L+start) fprintf (ofp, "%c", charX[pos+i]);
     
     fprintf (ofp, "\n\t");
     for (i = 0; i < line; i++)
       if (i == 0)         fprintf (ofp, "%15.15s  %c", sqinfoY->name, charY[pos+i]);
       else if (pos+i < L+start) fprintf (ofp, "%c", charY[pos+i]);
     
     
     nlines++;
     pos += line;
     fprintf (ofp, "\n");
   }
   
   fprintf (ofp, "\n");
   free(ss);
}

void
PrintCtSeqs(FILE *ofp, SQINFO *sqinfoX, int *sX, SQINFO *sqinfoY, int *sY, int start, int L, char *ss)
{
  int   i, pos;
  int   line = 50;
  int   nlines = 0;
  char *seqX, *seqY;

  seqX = DedigitizeSequence(sX, start, L);
  seqY = DedigitizeSequence(sY, start, L);

  pos = start;
  fprintf (ofp, "\n");
  
  while (L/(pos+1)) {
     fprintf (ofp, "\n\t");
     for (i = 0; i < line; i++)
       if (i == 0)         fprintf (ofp, "%15.15s  %c", "SS", ss[pos+i]);
       else if (pos+i < L) fprintf (ofp, "%c", ss[pos+i]);
     
     fprintf (ofp, "\n\t");
     for (i = 0; i < line; i++)
       if (i == 0)        fprintf (ofp, "%15.15s  %c", sqinfoX->name, seqX[pos+i]);
       else if (pos+i < L) fprintf (ofp, "%c", seqX[pos+i]);
     
     fprintf (ofp, "\n\t");
     for (i = 0; i < line; i++)
       if (i == 0)         fprintf (ofp, "%15.15s  %c", sqinfoY->name, seqY[pos+i]);
       else if (pos+i < L) fprintf (ofp, "%c", seqY[pos+i]);
     
     
     nlines++;
     pos += line;
     fprintf (ofp, "\n");
   }
   
   fprintf (ofp, "\n");

   free(seqX);
   free(seqY);
}

void
PrintCtSeq(FILE *ofp, SQINFO *sqinfo, int *iseq, int start, int L, char *ss)
{
  int   i, pos;
  int   line = 50;
  int   nlines = 0;
  char *seq;

  seq = DedigitizeSequence(iseq, start, L);
 
  pos = start;
  fprintf (ofp, "\n");
  
  while (L/(pos+1)) {
     fprintf (ofp, "\n\t");
     for (i = 0; i < line; i++)
       if (i == 0)         fprintf (ofp, "%15.15s  %c", "SS", ss[pos+i]);
       else if (pos+i < L) fprintf (ofp, "%c", ss[pos+i]);
     
     fprintf (ofp, "\n\t");
     for (i = 0; i < line; i++)
       if (i == 0)        fprintf (ofp, "%15.15s  %c", sqinfo->name, seq[pos+i]);
       else if (pos+i < L) fprintf (ofp, "%c", seq[pos+i]);
     
     nlines++;
     pos += line;
     fprintf (ofp, "\n");
   }
   
   fprintf (ofp, "\n");

   free(seq);
}

void 
PrintQfile (FILE *ofp, char *qfile, SQINFO sqinfoX, int *isegX, SQINFO sqinfoY, int *isegY, int start, int L, int abs_shift)
{
  int   i, pos;
  int   line = 50;
  int   nlines = 0;
  char *seqX, *seqY;

  if (qfile == NULL) 
    return;
  else 
    {
      seqX = DedigitizeSequence(isegX, start, L);
      seqY = DedigitizeSequence(isegY, start, L);
      
      pos = start;
      fprintf(ofp, ">%s-%d-%d (%d)\n", sqinfoX.name, abs_shift+pos+1,abs_shift+ pos+L, L);
      
      while (L/(pos+1-start)) {
	for (i = 0; i < line; i++)
	  if (pos+i < start+L) fprintf (ofp, "%c", seqX[pos+i-start]);
	
	nlines++;
	pos += line;
	fprintf (ofp, "\n");
      }
      
      pos = start;
      fprintf(ofp, ">%s-%d-%d (%d)\n", sqinfoY.name, abs_shift+pos+1, abs_shift+pos+L, L);
      
      while (L/(pos+1-start)) {
	for (i = 0; i < line; i++)
	  if (pos+i < start+L) fprintf (ofp, "%c", seqY[pos+i-start]);
	
	nlines++;
	pos += line;
	fprintf (ofp, "\n");
      }     

      free(seqX);
      free(seqY);
    }

}

void
PrintSSseqs(FILE *ofp, SQINFO *sqinfoX, int *sX, SQINFO *sqinfoY, int *sY, int start, int L, char *ss, char *cc)
{
  int   i, pos;
  int   line = 50;
  int   nlines = 0;
  char *seqX, *seqY;

  seqX = DedigitizeSequence(sX, start, L);
  seqY = DedigitizeSequence(sY, start, L);

  pos = start;
  fprintf (ofp, "\n");
  
  while (L/(pos+1)) {
     fprintf (ofp, "\n\t");
     for (i = 0; i < line; i++)
       if (i == 0)         fprintf (ofp, "%15.15s  %c", "SS", cc[pos+i]);
       else if (pos+i < L) fprintf (ofp, "%c", cc[pos+i]);
     
     fprintf (ofp, "\n\t");
     for (i = 0; i < line; i++)
       if (i == 0)         fprintf (ofp, "%15.15s  %c", "SS", ss[pos+i]);
       else if (pos+i < L) fprintf (ofp, "%c", ss[pos+i]);
     
     fprintf (ofp, "\n\t");
     for (i = 0; i < line; i++)
       if (i == 0)        fprintf (ofp, "%15.15s  %c", sqinfoX->name, seqX[pos+i]);
       else if (pos+i < L) fprintf (ofp, "%c", seqX[pos+i]);
     
     fprintf (ofp, "\n\t");
     for (i = 0; i < line; i++)
       if (i == 0)         fprintf (ofp, "%15.15s  %c", sqinfoY->name, seqY[pos+i]);
       else if (pos+i < L) fprintf (ofp, "%c", seqY[pos+i]);
     
     
     nlines++;
     pos += line;
     fprintf (ofp, "\n");
   }
   
   fprintf (ofp, "\n");

   free(seqX);
   free(seqY);
}

void
Print2Seqs(FILE *ofp, SQINFO *sqinfoX, int *sX, SQINFO *sqinfoY, int *sY, int start, int L)
{
  int   i, pos;
  int   line = 50;
  int   nlines = 0;
  char *seqX, *seqY;

  seqX = DedigitizeSequence(sX, start, L);
  seqY = DedigitizeSequence(sY, start, L);

  pos = start;
  fprintf (ofp, "\n");
  
  while (L/(pos+1)) {

     fprintf (ofp, "\n\t");
     for (i = 0; i < line; i++)
       if (i == 0)        fprintf (ofp, "%15.15s  %c", sqinfoX->name, seqX[pos+i]);
       else if (pos+i < L) fprintf (ofp, "%c", seqX[pos+i]);
     
     fprintf (ofp, "\n\t");
     for (i = 0; i < line; i++)
       if (i == 0)         fprintf (ofp, "%15.15s  %c", sqinfoY->name, seqY[pos+i]);
       else if (pos+i < L) fprintf (ofp, "%c", seqY[pos+i]);
     
     
     nlines++;
     pos += line;
     fprintf (ofp, "\n");
   }
   
   fprintf (ofp, "\n");

   free(seqX);
   free(seqY);
}


/* Function: PrintBanner()
 * Date:     ER, Mon 12 April 1999 [St. Louis]
 *
 * Purpose:  print banner
 *
 * Args:     ofp, pamfile, rnafile, seqfile, shuffleseq, allow_pseudoknots
 *
 * Returns:  void.
 */
void
PrintBanner(FILE *ofp, char *pamfile, char *rnafile, char *ribofile, char *seqfile, char *seqfile_b, 
	    int allow_pseudoknots, int nseqs, long int Lmax, long int maxhitlen, int minhitlen, 
	    int use_ribo_approx, int sweep, int window, int slide, int scan)
{
  fprintf(ofp, "#---------------------------------------------------------------------------------\n");
  fprintf(ofp, "#      qrna %s (%s)", RELEASE, RELEASEDATE);
  fprintf(ofp, " using squid %s (%s)\n", squid_version, squid_date);
  fprintf(ofp, "#---------------------------------------------------------------------------------\n");
  fprintf(ofp, "#      Rate-generating PAM model =  %s \n", pamfile);
  fprintf(ofp, "#---------------------------------------------------------------------------------\n");
  if (use_ribo_approx) fprintf(ofp, "#      Approx RNA model =  %s\n", rnafile);
  else                 fprintf(ofp, "#      Rate-generating RIBOPROB matrix =  %s\n", ribofile);
  fprintf(ofp, "#---------------------------------------------------------------------------------\n");
  fprintf(ofp, "#      seq file  =  %s\n", seqfile);
  if (seqfile_b != NULL)
    fprintf(ofp, "#      seq file  =  %s\n", seqfile_b);
  if (nseqs > 0) 
    fprintf(ofp, "#                   #seqs: %d (max_len = %ld)\n", nseqs, Lmax);
  fprintf(ofp, "#---------------------------------------------------------------------------------\n");
  if (window == -1 && !scan) 
    fprintf(ofp, "#      full length version: ");
  else if (sweep == TRUE) 
    fprintf(ofp, "#      scanning-motif version: window = %d   slide = %d", window, slide);
  else if (scan && window != -1)
    fprintf(ofp, "#      scanning version: window = %d   slide = %d", window, slide);
  else if (scan && window == -1)
    fprintf(ofp, "#      scanning version: full length");
  else
    fprintf(ofp, "#      window version: window = %d   slide = %d", window, slide);
  fprintf(ofp, " -- length range = [%d,%ld]\n", minhitlen, maxhitlen);

  fprintf(ofp, "#---------------------------------------------------------------------------------\n");
}

/* Function: PrintSeq()
 * Date:     ER, Tue Aug 24 17:00:22 CDT 1999 [St. Louis]
 *
 * Purpose:  print a sequence with gaps
 *
 * Args:     ofp, sqinfo, len, seq
 *
 * Returns:  void.
 */
void
PrintSeq(FILE *ofp, SQINFO *sqinfo, int len, char *seq)
{
  int i;

  fprintf(ofp, "> %s(%d)\n", sqinfo->name, len);

  if (sqinfo->flags & (SQINFO_ID | SQINFO_ACC | SQINFO_START | SQINFO_STOP | SQINFO_OLEN))
    fprintf(ofp, "SRC  %s %s %d..%d::%d\n",
	    (sqinfo->flags & SQINFO_ID)    ? sqinfo->id     : "-",
	    (sqinfo->flags & SQINFO_ACC)   ? sqinfo->acc    : "-",
	    (sqinfo->flags & SQINFO_START) ? sqinfo->start  : 0,
	    (sqinfo->flags & SQINFO_STOP)  ? sqinfo->stop   : 0,
	    (sqinfo->flags & SQINFO_OLEN)  ? sqinfo->olen   : 0);

  if (sqinfo->flags & SQINFO_DESC)
    fprintf(ofp, "DES  %s\n", sqinfo->desc);

  for (i = 0; i < len; i++)
    fprintf(ofp, "%c", toupper(seq[i]));
  fprintf(ofp, "\n");
}
void
PrintIntSeq(FILE *ofp, SQINFO *sqinfo, int len, int *seq)
{
  int i;

  fprintf(ofp, "> %s(%d)\n", sqinfo->name, len);

  if (sqinfo->flags & (SQINFO_ID | SQINFO_ACC | SQINFO_START | SQINFO_STOP | SQINFO_OLEN))
    fprintf(ofp, "SRC  %s %s %d..%d::%d\n",
	    (sqinfo->flags & SQINFO_ID)    ? sqinfo->id     : "-",
	    (sqinfo->flags & SQINFO_ACC)   ? sqinfo->acc    : "-",
	    (sqinfo->flags & SQINFO_START) ? sqinfo->start  : 0,
	    (sqinfo->flags & SQINFO_STOP)  ? sqinfo->stop   : 0,
	    (sqinfo->flags & SQINFO_OLEN)  ? sqinfo->olen   : 0);

  if (sqinfo->flags & SQINFO_DESC)
    fprintf(ofp, "DES  %s\n", sqinfo->desc);

  for (i = 0; i < len; i++)
    fprintf(ofp, "%d", seq[i]);
  fprintf(ofp, "\n");
}
void
PrintIntSequence(FILE *ofp, SQINFO *sqinfo, char *string_name, int start, int len, int *seq)
{
  int i;

  strncpy(sqinfo->name, string_name, 40);

  fprintf(ofp, "> %s(%d)\n", sqinfo->name, len);
  for (i = 0; i < len; i++)
    fprintf(ofp, "%d", seq[i+start]);
  fprintf(ofp, "\n");
}


/* Function: PrintHitBanner()
 * Date:     ER, Tue Sep 28 17:30:48 CDT 1999 [St. Louis]
 *
 * Purpose:  print alignment header
 *
 * Args:     ofp, sqinfoX, sqinfoY, 
 *
 * Returns:  void.
 */
void
PrintHitBanner(FILE *ofp, FILE *regressfp, char *regressionfile, 
	       SQINFO sqinfoX, SQINFO sqinfoY, int nseqs, int ones, 
	       int shuffle, int sre_shuffle, int con_shuffle, int strand)
{
  fprintf(ofp, "# %d ", nseqs);

  if (ones) fprintf(ofp, " [given strand]");
  else      fprintf(ofp, " [both strands]");

  if      (shuffle)     fprintf(ofp, " (shuffled)\n");
  else if (sre_shuffle) fprintf(ofp, " (sre_shuffled)\n");
  else if (con_shuffle) fprintf(ofp, " (con_shuffled)\n");
  else                  fprintf(ofp, " \n");
  
  fprintf(ofp, ">%s (%d)\n", sqinfoX.name, sqinfoX.len);
  fprintf(ofp, ">%s (%d)\n\n", sqinfoY.name, sqinfoY.len);

  if (regressionfile != NULL)  
    fprintf(regressfp, "# %d ", nseqs);
}


/* Function: PrintNull()
 * Date:     ER, Mon Sep 27 14:27:26 CDT 1999 [St. Louis]
 *
 * Purpose:  print null scores and header
 *
 * Args:     ofp, sqinfoX, sqinfoY, nullsc, nullscrev
 *
 * Returns:  void.
 */
void
PrintNull(FILE *ofp, struct nullmodel_s *null, double nullsc, double nullscrev)
{
  fprintf(ofp, "NULL              = %8.3f   NULLrev              = %8.3f \n", nullsc, nullscrev);
  fprintf(ofp, "2*log_2(eta)-NULL = %8.3f   2*log_2(eta)-NULLrev = %8.3f [logodds_sc = log_2_sc + 2.*null->eta - nullsc]\n\n", 
	  2.*null->eta-nullsc, 2.*null->eta-nullscrev);
}



void
RemoveGaps(int *seq, int Lmax, int *len, int verbose)
{
  int i;
  int *wp;      /* write pointer (where we're writing seq)             */
  int *rp;      /* read pointer (where we're reading seq)              */

  int length = 0;
  int length_gap = 0;

  for (i = 0, wp = rp = seq; i < Lmax; i++, rp++)
    {
      length += 1;
      if (*rp < 4) *wp++ = *rp; 
      else if (*rp == 4) { length_gap += 1; continue; }/* is gap. do not allow gaps */
      else Die("unrecognized symbol %d in sequence\n", *rp);
    }
  /* a bit of paranoia 
   */
  if (length != Lmax) Die ("something went wrong when reading the sequence (%d, %d)", length, *len);

  /* if gaps, modify the length and memory allocation of the sequence 
   */
  if (length_gap != 0) 
    length -= length_gap; 

 for (i = length; i < Lmax; i++) *wp++ = 4;  /* add blanks at the end */
 
 *len = length;

 if (verbose) 
   {
     for (i = 0; i < length; i++)
       printf("%d ", seq[i]);
     printf ("\n");
   }
}

/* Function: RemoveJointGaps()
 * Date:     ER, Tue Aug 24 17:49:42 CDT 1999[St. Louis]
 *
 * Purpose:  provided that 2 seqs form an alignment, revome common gaps
 *
 *                 x1 -- x3 -- x5         x1 x3 -- x5
 *                 y1 -- y3 y4 --   to    y1 y3 y4 --
 *
 * Args:     seqX, seqY, len
 *
 * Returns:  void.
 */
void
RemoveJointGaps(SQINFO sqinfoX, int *seqX, SQINFO sqinfoY, int *seqY, int *ret_leg, int sweep, char *ss)
{
  int   i;
  int  *wpX,  *wpY;    /* write pointer (where we're writing seq)                 */
  int  *rpX,  *rpY;    /* read pointer (where we're reading seq)                  */
  char *wpss;          /* write pointer (where we're writing alignment structure) */
  char *rpss;          /* read  pointer (where we're reading alignment structure) */

  int leg;             /* length of the alignment                                 */
  int length = 0;
  int length_gap = 0;
  
  if (sqinfoX.len != sqinfoY.len) 
    {
      if (!sweep) 
	Warn("not an alignment (seqX = %s lenX = %d; seqY = %s lenY = %d). We'll add blanks to the shorter sequence", 
	     sqinfoX.name, sqinfoX.len, sqinfoY.name, sqinfoY.len); 

      leg = (sqinfoX.len > sqinfoY.len)? sqinfoX.len : sqinfoY.len;
    }
  else leg = sqinfoX.len;
	
  if (sqinfoX.flags & SQINFO_SS) { rpss = ss; wpss = ss; }

  for (i = 0, wpX = rpX = seqX, wpY = rpY = seqY; i < leg; i++, rpX++, rpY++)
    {
      length += 1;
      if (*rpX <  4 || *rpY <  4) { 
	*wpX++ = *rpX; 
	*wpY++ = *rpY;  

	if (sqinfoX.flags & SQINFO_SS) { *wpss++ = *rpss; }
      }
      else if (*rpX == 4 && *rpY == 4) { /* it's a double gap, remove */
	length_gap += 1;
      }    
      else 
	Die("unrecognized symbols %d %d in sequences (pos = %d/%d)\n", *rpX, *rpY, i, leg);

      if (sqinfoX.flags & SQINFO_SS) { rpss++; }
    }
  /* a bit of paranoia 
   */
  if (length != leg) Die ("something went wrong when reading the sequences (%d, %d)", length, leg);

  /* if gaps, modify the length and memory allocation of the sequences 
   */
  if (length_gap != 0) length -= length_gap; 

  *ret_leg = length;
}

void 
RevComp (int *rev, int *seq, int L)
{
  int  *bckp;
  long  pos;
  int   c;

  if (rev == NULL) return;
  if (seq == NULL) return;

  bckp = seq + L - 1;
  for (pos = 0; pos < L; pos++)
    {
      c = *bckp;

      if      (c > -1 && c <  4) *rev = 3 - c;
      else if (c == 4) *rev = c;      /* leave gaps as they are */
      else  Warn("Can't reverse complement an %d, pal. Using N.", c);

      rev++;
      bckp--;
    }
}

void 
CompNoRev (int *rev, int *seq, int L)
{
  int  *fwdp;
  long  pos;
  int   c;

  if (rev == NULL) return;
  if (seq == NULL) return;

  fwdp = seq;
  for (pos = 0; pos < L; pos++)
    {
      c = *fwdp;

      if      (c > -1 && c <  4) *rev = 3 - c;
      else if (c == 4) *rev = c;      /* leave gaps as they are */
      else  Warn("Can't reverse complement an %d, pal. Using N.", c);

      rev++;
      fwdp++;
    }
}

void 
RevCompChar (char *rev, char *seq, int L)
{
  char  *bckp;
  long  pos;
  char   c;

  if (rev == NULL) return;
  if (seq == NULL) return;

  bckp = seq + L - 1;
  for (pos = 0; pos < L; pos++)
    {
      c = *bckp;

      if      (c == 'A' || c == 'a') *rev = 'U';
      else if (c == 'C' || c == 'c') *rev = 'G';
      else if (c == 'G' || c == 'g') *rev = 'C';
      else if (c == 'T' || c == 't') *rev = 'A';
      else if (c == 'U' || c == 'u') *rev = 'A';
      else if (c == '-' || c == '.') *rev = c;      /* leave gaps as they are */
      else  Warn("Can't reverse complement an %c, pal. Using N.", c);

      rev++;
      bckp--;
    }
}

/* Function: ShuffleSequence()
 * 
 * Purpose:  Convert a sequence of A,C,G,U into a shuffled sequence
 *           
 *           
 * Args:     seq      - sequence (0..N-1) only A,C,G,U allowed
 *           len      - length of seq
 *           
 * Return:   (void)
 */
void
ShuffleSequence(char *seq, int len, int endpos, int verbose)
{
  int  i;
  int  d, s;
  int  intpos, pos, pos_s;
  char seq_s;
  int  count = 0;  /* number of times we shuffle */
  int  seed;
  int *counts;
  int *shu_counts;
  
  if ((counts     = (int *) malloc (4 * sizeof(int))) == NULL ||
      (shu_counts = (int *) malloc (4 * sizeof(int))) == NULL)
    Die("malloc failed");

  for (i = 0; i < 4; i++) {
    counts[i]     = 0;
    shu_counts[i] = 0;
  }

  seed = (int) time ((time_t *) NULL);
  sre_srandom(seed); /* reinit sre_random each time you shuffle a sequence */

  intpos = endpos - len + 1;

  /* calculate original counts of fragment
   */
  for (d = 0; d < len; d++) 
    if (seq[intpos+d] == 'A') counts [0] += 1;
    else if (seq[intpos+d] == 'C') counts [1] += 1;
    else if (seq[intpos+d] == 'G') counts [2] += 1;
    else if (seq[intpos+d] == 'U') counts [3] += 1;
    else Die("wrong character (%c) in ShuffleSequence()", seq[intpos+d]);
  
  if (verbose) {
    printf("\n\n");
    for (d = 0; d < len; d++) printf("%c ", seq[intpos+d]);
    printf("\n\n");
  }
  
  while (count < 5000) {
    for (d = 0; d < len; d++) {
      s = (int)(sre_random()*(len-1));

      pos   = intpos + d;
      pos_s = intpos + s;

      seq_s = seq[pos_s];

      seq[pos_s] = seq[pos];
      seq[pos] = seq_s;
    }
    
    count++;
  }

  /* calculate counts of shuffled fragment
   */
  for (d = 0; d < len; d++) 
    if (seq[intpos+d] == 'A') shu_counts [0] += 1;
    else if (seq[intpos+d] == 'C') shu_counts [1] += 1;
    else if (seq[intpos+d] == 'G') shu_counts [2] += 1;
    else if (seq[intpos+d] == 'U') shu_counts [3] += 1;
    else Die("wrong character (%c) in ShuffleSequence()", seq[intpos+d]);

  /* paranoia
   */
  for (i = 0; i < 4; i++)
    if (counts[i] != shu_counts[i])
  Die("something went wrong shuffling the sequence in ShuffleSequence()");
   
  if (verbose) {
    printf("\n\n");
    for (d = 0; d < len; d++) printf("%c ", seq[intpos+d]);
    printf("\n\n");
  }

  free(counts);
  free(shu_counts);
}

/* Function: ShuffleIntSequence()
 * 
 * Purpose:  Convert a sequence of 0,1,2,3 into a shuffled sequence
 *           
 *           
 * Args:     seq      - sequence (0..N-1) only 0,1,2,3 allowed
 *           len      - length of seq
 *           
 * Return:   (void)
 */
void
ShuffleIntSequence(int *iseq, int len, int endpos, int verbose)
{
  int  i;
  int  d, s;
  int  intpos, pos, pos_s;
  int  seq_s;
  int  count = 0;  /* number of times we shuffle */
  int  seed;
  int *counts;
  int *shu_counts;
  
  if ((counts     = (int *) malloc (4 * sizeof(int))) == NULL ||
      (shu_counts = (int *) malloc (4 * sizeof(int))) == NULL)
    Die("malloc failed");

  for (i = 0; i < 4; i++) {
    counts[i]     = 0;
    shu_counts[i] = 0;
  }

  seed = (int) time ((time_t *) NULL);
  sre_srandom(seed); /* reinit sre_random each time you shuffle a sequence */

  intpos = endpos - len + 1;

  /* calculate original counts of fragment
   */
  for (d = 0; d < len; d++) 
    if (iseq[intpos+d] == 0) counts [0] += 1;
    else if (iseq[intpos+d] == 1) counts [1] += 1;
    else if (iseq[intpos+d] == 2) counts [2] += 1;
    else if (iseq[intpos+d] == 3) counts [3] += 1;
    else Die("wrong integer (pos = %d, %d) in ShuffleIntSequence()", 
	     intpos+d, iseq[intpos+d]);
  
  if (verbose) {
    printf("\n\n"); printf("%d-%d\n", intpos, endpos);
    for (d = 0; d < len; d++) printf("%d ", iseq[intpos+d]);
    printf("\n\n");
  }
  
  while (count < 5000) {
    for (d = 0; d < len; d++) {
      s = (int)(sre_random()*(len-1));

      pos   = intpos + d;
      pos_s = intpos + s;

      seq_s = iseq[pos_s];

      iseq[pos_s] = iseq[pos];
      iseq[pos] = seq_s;
    }
    
    count++;
  }

  /* calculate counts of shuffled fragment
   */
  for (d = 0; d < len; d++) 
    if (iseq[intpos+d] == 0) shu_counts [0] += 1;
    else if (iseq[intpos+d] == 1) shu_counts [1] += 1;
    else if (iseq[intpos+d] == 2) shu_counts [2] += 1;
    else if (iseq[intpos+d] == 3) shu_counts [3] += 1;
    else Die("wrong integer (%d) in ShuffleIntSequence()", iseq[intpos+d]);

  /* paranoia
   */
  for (i = 0; i < 4; i++)
    if (counts[i] != shu_counts[i])
  Die("something went wrong shuffling the sequence in ShuffleIntSequence()");
   
  if (verbose) {
    printf("\n\n");
    for (d = 0; d < len; d++) printf("%d ", iseq[intpos+d]);
    printf("\n\n");
  }

  free(counts);
  free(shu_counts);
}

/* Function: Shuffle2IntSequences()
 * 
 * Purpose:  Convert 2 sequences into shuffled sequences simultaneously.
 *           keeping matches, mismatches and gaps.
 *           Sequences have to be of equal length, and be in integer form
 *           
 *           
 * Args:     iseqX      - sequence (0..N-1) only 0,1,2,3,4 allowed
 *           iseqY      - sequence (0..N-1) only 0,1,2,3,4 allowed
 *           len        - length of seqs
 *           endpos     - end position
 *           verbose    -
 *           
 * Return:   (void)
 */
void
Shuffle2IntSequences(int *iseqX, int *iseqY, int len, int endposX, int endposY, int verbose)
{
  int  i;
  int  d, s;
  int  intposX, posX, posX_s;
  int  intposY, posY, posY_s;
  int  seqX_s, seqY_s;
  int  count = 0;  /* number of times we shuffle */
  int  seed;
  int *countsX, *countsY;
  int *shu_countsX, *shu_countsY;
  
  if ((countsX     = (int *) malloc (5 * sizeof(int))) == NULL ||
      (shu_countsX = (int *) malloc (5 * sizeof(int))) == NULL)
    Die("malloc failed");
  if ((countsY     = (int *) malloc (5 * sizeof(int))) == NULL ||
      (shu_countsY = (int *) malloc (5 * sizeof(int))) == NULL)
    Die("malloc failed");

  for (i = 0; i < 5; i++) {
    countsX[i]     = 0;
    countsY[i]     = 0;
    shu_countsX[i] = 0;
    shu_countsY[i] = 0;
  }

  seed = (int) time ((time_t *) NULL);
  sre_srandom(seed); /* reinit sre_random each time you shuffle a sequence */

  intposX = endposX - len + 1;
  intposY = endposY - len + 1;

  /* calculate original counts of fragment
   */
  for (d = 0; d < len; d++) {
    if      (iseqX[intposX+d] == 0) countsX[0] += 1;
    else if (iseqX[intposX+d] == 1) countsX[1] += 1;
    else if (iseqX[intposX+d] == 2) countsX[2] += 1;
    else if (iseqX[intposX+d] == 3) countsX[3] += 1;
    else if (iseqX[intposX+d] == 4) countsX[4] += 1;
    else Die("wrong integer (pos = %d, %d) in Shuffle2IntSequences()", 
	     intposX+d, iseqX[intposX+d]);
    if      (iseqY[intposY+d] == 0) countsY[0] += 1;
    else if (iseqY[intposY+d] == 1) countsY[1] += 1;
    else if (iseqY[intposY+d] == 2) countsY[2] += 1;
    else if (iseqY[intposY+d] == 3) countsY[3] += 1;
    else if (iseqY[intposY+d] == 4) countsY[4] += 1;
    else Die("wrong integer (pos = %d, %d) in Shuffle2IntSequences()", 
	     intposY+d, iseqY[intposY+d]);
  }

  if (verbose) {
    printf("\n\n"); printf("%d-%d\n", intposX, endposX);
    for (d = 0; d < len; d++) printf("%d ", iseqX[intposX+d]);
    printf("\n\n");
    printf("\n\n"); printf("%d-%d\n", intposY, endposY);
    for (d = 0; d < len; d++) printf("%d ", iseqY[intposY+d]);
    printf("\n\n");
  }
  
  while (count < 5000) {
    for (d = 0; d < len; d++) {
      s = (int)(sre_random()*(len-1));

      posX   = intposX + d;
      posY   = intposY + d;

      posX_s = intposX + s;
      posY_s = intposY + s;

      seqX_s = iseqX[posX_s];
      seqY_s = iseqY[posY_s];

      iseqX[posX_s] = iseqX[posX];
      iseqY[posY_s] = iseqY[posY];

      iseqX[posX] = seqX_s;
      iseqY[posY] = seqY_s;
    }
    
    count++;
  }

  /* calculate counts of shuffled fragment
   */
  for (d = 0; d < len; d++) {
    if      (iseqX[intposX+d] == 0) shu_countsX[0] += 1;
    else if (iseqX[intposX+d] == 1) shu_countsX[1] += 1;
    else if (iseqX[intposX+d] == 2) shu_countsX[2] += 1;
    else if (iseqX[intposX+d] == 3) shu_countsX[3] += 1;
    else if (iseqX[intposX+d] == 4) shu_countsX[4] += 1;
    else Die("wrong integer (%d) in ShuffleIntSequence()", iseqX[intposX+d]);

    if      (iseqY[intposY+d] == 0) shu_countsY[0] += 1;
    else if (iseqY[intposY+d] == 1) shu_countsY[1] += 1;
    else if (iseqY[intposY+d] == 2) shu_countsY[2] += 1;
    else if (iseqY[intposY+d] == 3) shu_countsY[3] += 1;
    else if (iseqY[intposY+d] == 4) shu_countsY[4] += 1;
    else Die("wrong integer (%d) in ShuffleIntSequence()", iseqY[intposY+d]);
  }

  /* paranoia
   */
  for (i = 0; i < 5; i++)
    if ((countsX[i] != shu_countsX[i]) || (countsY[i] != shu_countsY[i]))
  Die("something went wrong shuffling the sequence in Shuffle2IntSequences()");
   
  if (verbose) {
    printf("\n\n");
    for (d = 0; d < len; d++) printf("%d ", iseqX[intposX+d]);
    printf("\n\n");
    for (d = 0; d < len; d++) printf("%d ", iseqY[intposY+d]);
    printf("\n\n");
  }

  free(countsX);
  free(countsY);
  free(shu_countsX);
  free(shu_countsY);
}

/* Function: QRNAShuffle()
 * Date:     SRE, Mon Dec 10 10:14:12 2001 [St. Louis]
 *
 * Purpose:  Shuffle a pairwise alignment x,y while preserving the
 *           position of gaps; return the shuffled alignment in xs,
 *           ys.
 *           
 *           Works by doing three separate
 *           shuffles, of (1) columns with residues in both
 *           x and y, (2) columns with residue in x and gap in y,
 *           and (3) columns with gap in x and residue in y.
 *           
 *           xs,x and ys,y may be identical: that is, to shuffle
 *           an alignment "in place", destroying the original
 *           alignment, just call:
 *              QRNAShuffle(x,y,x,y);
 *
 * Args:     xs, ys: allocated space for shuffled pairwise ali of x,y [L+1]
 *           x, y: pairwise alignment to be shuffled [0..L-1]
 *
 * Returns:  1 on success, 0 on failure.
 *           The shuffled alignment is returned in xs, ys.
 */
int
QRNAShuffle(char *xs, char *ys, char *x, char *y)
{
  int  L;
  int *xycol, *xcol, *ycol;
  int  nxy, nx, ny;
  int  i;
  int  pos, c;
  char xsym, ysym;

  if (xs != x) strcpy(xs, x);
  if (ys != y) strcpy(ys, y);

  /* First, construct three arrays containing lists of the column positions
   * of the three types of columns. (If a column contains gaps in both x and y,
   * we've already simply copied it to the shuffled sequence.)
   */
  L = strlen(x);
  xycol = MallocOrDie(sizeof(int) * L);
  xcol  = MallocOrDie(sizeof(int) * L);
  ycol  = MallocOrDie(sizeof(int) * L);
  nxy = nx = ny = 0;

  for (i = 0; i < L; i++)
    {
      if      (isgap(x[i]) && isgap(y[i]))     { continue; }
      else if (! isgap(x[i]) && ! isgap(y[i])) { xycol[nxy] = i; nxy++; }
      else if (isgap(x[i]))                    { ycol[ny] = i;   ny++;  }
      else if (isgap(y[i]))                    { xcol[nx] = i;   nx++;  }
    }

  /* Second, shuffle the sequences indirectly, via shuffling these arrays.
   * Yow, careful with those indices, and with order of the statements...
   */
  for (; nxy > 1; nxy--) {
    pos          = CHOOSE(nxy);
    xsym             = xs[xycol[pos]];   ysym             = ys[xycol[pos]];    c            = xycol[pos];   
    xs[xycol[pos]]   = xs[xycol[nxy-1]]; ys[xycol[pos]]   = ys[xycol[nxy-1]];  xycol[pos]   = xycol[nxy-1];
    xs[xycol[nxy-1]] = xsym;             ys[xycol[nxy-1]] = ysym;              xycol[pos]   = xycol[nxy-1];
  }
  for (; nx > 1; nx--) {
    pos        = CHOOSE(nx); 
    xsym           = xs[xcol[pos]];  ysym           = ys[xcol[pos]];  c          = xcol[pos];  
    xs[xcol[pos]]  = xs[xcol[nx-1]]; ys[xcol[pos]]  = ys[xcol[nx-1]]; xcol[pos]  = xcol[nx-1]; 
    xs[xcol[nx-1]] = xsym;           ys[xcol[nx-1]] = ysym;           xcol[nx-1] = c;          
  }
  for (; ny > 1; ny--) {
    pos        = CHOOSE(ny); 
    xsym           = xs[ycol[pos]];  ysym           = ys[ycol[pos]];  c          = ycol[pos]; 
    xs[ycol[pos]]  = xs[ycol[ny-1]]; ys[ycol[pos]]  = ys[ycol[ny-1]]; ycol[pos]  = ycol[ny-1];
    xs[ycol[ny-1]] = xsym;           ys[ycol[ny-1]] = ysym;           ycol[ny-1] = c;          
  }

  free(xycol); free(xcol); free(ycol);
  return 1;
}

/* Function: QRNAIntShuffle()
 * Date:     ER, Tue Dec 18 14:42:18 CST 2001 [St. Louis]
 *
 *           Modification of SRE's QRNAShuffle()
 *
 * Purpose:  Shuffle a pairwise alignment x,y while preserving the
 *           position of gaps; return the shuffled alignment in xs,
 *           ys.
 *           
 *           Works by doing three separate
 *           shuffles, of (1) columns with residues in both
 *           x and y, (2) columns with residue in x and gap in y,
 *           and (3) columns with gap in x and residue in y.
 *           
 *           xs,x and ys,y may be identical: that is, to shuffle
 *           an alignment "in place", destroying the original
 *           alignment, just call:
 *              QRNAShuffle(x,y,x,y);
 *
 * Args:     xs, ys: allocated space for shuffled pairwise ali of x,y [L+1]
 *           x, y: pairwise alignment to be shuffled [0..L-1]
 *
 * Returns:  1 on success, 0 on failure.
 *           The shuffled alignment is returned in xs, ys.
 */
int
QRNAIntShuffle(int *xs, int *ys, int L)
{
  int *xycol, *xcol, *ycol;
  int  nxy, nx, ny;
  int  i;
  int  pos, c;
  int xsym, ysym;

   /* First, construct three arrays containing lists of the column positions
   * of the three types of columns. (If a column contains gaps in both x and y,
   * we've already simply copied it to the shuffled sequence.)
   */
  xycol = MallocOrDie(sizeof(int) * L);
  xcol  = MallocOrDie(sizeof(int) * L);
  ycol  = MallocOrDie(sizeof(int) * L);
  nxy = nx = ny = 0;

  for (i = 0; i < L; i++)
    {
      if      (xs[i] == 4 && ys[i] == 4) { continue; }
      else if (xs[i] <  4 && ys[i] <  4) { xycol[nxy] = i; nxy++; }
      else if (xs[i] == 4)               { ycol[ny] = i;   ny++;  }
      else if (ys[i] == 4)               { xcol[nx] = i;   nx++;  }
    }
  if (nxy+nx+ny != L) Die("QRNAIntShuffle(): bad counting L=%d total=%d, L, nxy+nx+ny");

  /* Second, shuffle the sequences indirectly, via shuffling these arrays.
   * Yow, careful with those indices, and with order of the statements...
   */
  for (; nxy > 1; nxy--) {
    pos          = CHOOSE(nxy);
    xsym             = xs[xycol[pos]];   ysym             = ys[xycol[pos]];    c            = xycol[pos];   
    xs[xycol[pos]]   = xs[xycol[nxy-1]]; ys[xycol[pos]]   = ys[xycol[nxy-1]];  xycol[pos]   = xycol[nxy-1];
    xs[xycol[nxy-1]] = xsym;             ys[xycol[nxy-1]] = ysym;              xycol[pos]   = xycol[nxy-1];
  }
  for (; nx > 1; nx--) {
    pos        = CHOOSE(nx); 
    xsym           = xs[xcol[pos]];  ysym           = ys[xcol[pos]];  c          = xcol[pos];  
    xs[xcol[pos]]  = xs[xcol[nx-1]]; ys[xcol[pos]]  = ys[xcol[nx-1]]; xcol[pos]  = xcol[nx-1]; 
    xs[xcol[nx-1]] = xsym;           ys[xcol[nx-1]] = ysym;           xcol[nx-1] = c;          
  }
  for (; ny > 1; ny--) {
    pos        = CHOOSE(ny); 
    xsym           = xs[ycol[pos]];  ysym           = ys[ycol[pos]];  c          = ycol[pos]; 
    xs[ycol[pos]]  = xs[ycol[ny-1]]; ys[ycol[pos]]  = ys[ycol[ny-1]]; ycol[pos]  = ycol[ny-1];
    xs[ycol[ny-1]] = xsym;           ys[ycol[ny-1]] = ysym;           ycol[ny-1] = c;          
  }

  free(xycol); free(xcol); free(ycol);
  return 1;
}


/* Function: QRNAIntConservedShuffle()
 * Date:     ER, Wed Dec  8 09:12:18 CST 2004 [St. Louis]
 *
 *           Modification of QRNAIntShuffle()
 *
 * Purpose:  Shuffle a pairwise alignment x,y while preserving the
 *           position of gaps and the position of conserved and mutated columns; 
 *           return the shuffled alignment in xs, ys.
 *           
 *           Works by doing three separate
 *           shuffles, of (1) columns with the same residue in both
 *           x and y, (2) columns with different residues in both
 *           x and y, (3) columns with residue in x and gap in y,
 *           and (4) columns with gap in x and residue in y.
 *           
 *           xs,x and ys,y may be identical: that is, to shuffle
 *           an alignment "in place", destroying the original
 *           alignment, just call:
 *              QRNAShuffle(x,y,x,y);
 *
 * Args:     xs, ys: allocated space for shuffled pairwise ali of x,y [L+1]
 *           x, y: pairwise alignment to be shuffled [0..L-1]
 *
 * Returns:  1 on success, 0 on failure.
 *           The shuffled alignment is returned in xs, ys.
 */
int
QRNAIntConservedShuffle(int *xs, int *ys, int L)
{
  int *xxcol,  *xycol, *xcol, *ycol;
  int  nxx, nxy, nx, ny;
  int  i;
  int  pos, c;
  int  xsym, ysym;

   /* First, construct three arrays containing lists of the column positions
   * of the three types of columns. (If a column contains gaps in both x and y,
   * we've already simply copied it to the shuffled sequence.)
   */
  xxcol = MallocOrDie(sizeof(int) * L);
  xycol = MallocOrDie(sizeof(int) * L);
  xcol  = MallocOrDie(sizeof(int) * L);
  ycol  = MallocOrDie(sizeof(int) * L);
  nxx = nxy = nx = ny = 0;

  for (i = 0; i < L; i++)
    {
      if      (xs[i] == 4 && ys[i] == 4)                   { continue; }
      else if (xs[i] <  4 && ys[i] <  4 && xs[i] == ys[i]) { xxcol[nxx] = i; nxx++; }
      else if (xs[i] <  4 && ys[i] <  4)                   { xycol[nxy] = i; nxy++; }
      else if (xs[i] == 4)                                 { ycol[ny] = i;   ny++;  }
      else if (ys[i] == 4)                                 { xcol[nx] = i;   nx++;  }
    }

  if (nxx+nxy+nx+ny != L) Die("QRNAIntConservedShuffle(): bad counting L=%d total=%d, L, nxx+nxy+nx+ny");

  /* Second, shuffle the sequences indirectly, via shuffling these arrays.
   * Yow, careful with those indices, and with order of the statements...
   */
  for (; nxx > 1; nxx--) {
    pos          = CHOOSE(nxx);
    xsym             = xs[xxcol[pos]];   ysym             = ys[xxcol[pos]];    c            = xxcol[pos];   
    xs[xxcol[pos]]   = xs[xxcol[nxx-1]]; ys[xxcol[pos]]   = ys[xxcol[nxx-1]];  xxcol[pos]   = xxcol[nxx-1];
    xs[xxcol[nxx-1]] = xsym;             ys[xxcol[nxx-1]] = ysym;              xxcol[pos]   = xxcol[nxx-1];
  }
  for (; nxy > 1; nxy--) {
    pos          = CHOOSE(nxy);
    xsym             = xs[xycol[pos]];   ysym             = ys[xycol[pos]];    c            = xycol[pos];   
    xs[xycol[pos]]   = xs[xycol[nxy-1]]; ys[xycol[pos]]   = ys[xycol[nxy-1]];  xycol[pos]   = xycol[nxy-1];
    xs[xycol[nxy-1]] = xsym;             ys[xycol[nxy-1]] = ysym;              xycol[pos]   = xycol[nxy-1];
  }
  for (; nx > 1; nx--) {
    pos        = CHOOSE(nx); 
    xsym           = xs[xcol[pos]];  ysym           = ys[xcol[pos]];  c          = xcol[pos];  
    xs[xcol[pos]]  = xs[xcol[nx-1]]; ys[xcol[pos]]  = ys[xcol[nx-1]]; xcol[pos]  = xcol[nx-1]; 
    xs[xcol[nx-1]] = xsym;           ys[xcol[nx-1]] = ysym;           xcol[nx-1] = c;          
  }
  for (; ny > 1; ny--) {
    pos        = CHOOSE(ny); 
    xsym           = xs[ycol[pos]];  ysym           = ys[ycol[pos]];  c          = ycol[pos]; 
    xs[ycol[pos]]  = xs[ycol[ny-1]]; ys[ycol[pos]]  = ys[ycol[ny-1]]; ycol[pos]  = ycol[ny-1];
    xs[ycol[ny-1]] = xsym;           ys[ycol[ny-1]] = ysym;           ycol[ny-1] = c;          
  }

  free(xxcol); free(xycol); free(xcol); free(ycol);

  return 1;
}


/* Function: StripDegeneracy()
 * 
 * Purpose:  Convert degenerate nucleotides into a random choice
 *           of ACGU. String is guaranteed to contain only
 *           ACGU when it comes out. (Gaps are removed.)
 *           
 * Args:     seq    - sequence to strip (null-terminated)
 *           
 * Return:   (void)
 */
void
StripDegeneracy(char *seq, int *len)
{
  char *wp;      /* write pointer (where we're writing seq)             */
  char *rp;      /* read pointer (where we're reading seq)              */
  char *rm;      /* character to remove at the end when there are gaps  */

  int length = 0;
  int length_gap = 0;

  for (wp = rp = seq; *rp != '\0'; rp++)
    {
      length += 1;
      if (isgap(*rp)) continue; /* do not allow gaps */
      if (strchr("ACGU", *rp)) *wp++ = *rp;
      else 
	{
	  /* then it's a degenerate symbol.
	   * According to alphabet, choose a single symbol to represent it.
	   * note the too-clever scheme for random choice: "ABC"[random() % 3]
	   */
	  switch (*rp) {
	  case 'B': *wp++ = "CGU" [random() % 3]; break;
	  case 'D': *wp++ = "AGU" [random() % 3]; break;
	  case 'H': *wp++ = "ACU" [random() % 3]; break;
	  case 'K': *wp++ = "GU"  [random() % 2]; break;
	  case 'M': *wp++ = "AC"  [random() % 2]; break;
	  case 'N': *wp++ = "ACGU"[random() % 4]; break;
	  case 'R': *wp++ = "AG"  [random() % 2]; break;
	  case 'S': *wp++ = "CG"  [random() % 2]; break;
	  case 'T': *wp++ = 'U';                  break;
	  case 'V': *wp++ = "ACG" [random() % 3]; break;
	  case 'W': *wp++ = "AU"  [random() % 2]; break;
	  case 'X': *wp++ = "ACGU"[random() % 4]; break;
	  case 'Y': *wp++ = "CU"  [random() % 2]; break;
	  case 'Z': *wp++ = 'A';                  break;
	  default: Die("unrecognized character %c in sequence\n", *rp);
	  }
	
	}
    }
  /* a bit of paranoia 
   */
  if (length != *len) Die ("something is wrong when reading the sequence (%d, %d)", length, *len);

  /* count the possible gaps 
   */
  for (rm = wp; *rm != '\0'; rm++) length_gap += 1;
    
  /* if gaps, modify the length and memory allocation of the sequence 
   */
  if (length_gap != 0) {*len -= length_gap; realloc(seq, sizeof(char) * (*len));}
}

/* Function: StripSomeDegeneracy()
 * 
 * Purpose:  Convert degenerate nucleotides into a random choice
 *           of ACGU. String is guaranteed to contain only
 *           ACGU- when it comes out. (Gaps are not removed.)
 *           
 * Args:     seq    - sequence to strip (null-terminated)
 *           
 * Return:   (void)
 */
void
StripSomeDegeneracy(char *seq, int len)
{
  char *wp;      /* write pointer (where we're writing seq)             */
  char *rp;      /* read pointer (where we're reading seq)              */

  int length;

  for (wp = rp = seq, length = 0; length < len; rp++, length++)
    {
      if (isgap(*rp)) *wp++ = *rp; /* allow gaps */
      else if (strchr("ACGU", *rp)) *wp++ = *rp;
      else 
	{
	  /* then it's a degenerate symbol.
	   * According to alphabet, choose a single symbol to represent it.
	   * note the too-clever scheme for random choice: "ABC"[random() % 3]
	   */
	  switch (*rp) {
	  case 'B': *wp++ = "CGU" [random() % 3]; break;
	  case 'D': *wp++ = "AGU" [random() % 3]; break;
	  case 'H': *wp++ = "ACU" [random() % 3]; break;
	  case 'K': *wp++ = "GU"  [random() % 2]; break;
	  case 'M': *wp++ = "AC"  [random() % 2]; break;
	  case 'N': *wp++ = "ACGU"[random() % 4]; break;
	  case 'R': *wp++ = "AG"  [random() % 2]; break;
	  case 'S': *wp++ = "CG"  [random() % 2]; break;
	  case 'T': *wp++ = 'U';                  break;
	  case 'V': *wp++ = "ACG" [random() % 3]; break;
	  case 'W': *wp++ = "AU"  [random() % 2]; break;
	  case 'X': *wp++ = "ACGU"[random() % 4]; break;
	  case 'Y': *wp++ = "CU"  [random() % 2]; break;
	  case 'Z': *wp++ = 'A';                  break;
	  default: Die("unrecognized character %c in sequence\n", *rp);
	  }	
	}
    }
}

/* Function: ToFold()
 * Date:     ER, Mon 12 April 1999 [St. Louis]
 *
 * Purpose:  calculates the begin and end positions in between to do stuff
 *
 * Args:     sqinfo, pos_init, pos_end, ret_begin, ret_end, ret_len
 *
 * Returns:  void (calculates ret_begin, ret_end).
 */
void
ToFold(SQINFO sqinfo, int pos_init, int pos_end, int *ret_begin, int *ret_end, int *ret_len)
{
  int begin;
  int end;
  int len;

  /* default values 
   */
  begin = 0;
  end   = sqinfo.len-1;
  len   = sqinfo.len;
  
  /* modify ends according to the input values 
   */
  if (pos_init < 0) 
    Die ("Sorry, starting position has to be positive or zero.");
  else if (pos_init >= sqinfo.len) /* consistency check */
    { 
      Warn("Starting limit, %d,  does not apply for sequence %s (L = %d)", 
	   pos_init, sqinfo.name, sqinfo.len); 
      return;
    }  
  else if (pos_init > 0) begin = pos_init;
  
  if (pos_end != 0 &&  pos_end < end) end = pos_end;
  
  /* consistency check for fragment ends
   */
  if (end < begin) Die("Sorry, the sequence has to have positive length.");
  
  len = end - begin + 1;  /* length of fragment  */
  
  *ret_len = len;
  *ret_begin = begin;
  *ret_end = end;
}

/* Function: WriteSeqinfo()
 * 
 * Purpose:  writes in outf file info about the
 *           sequence it is being traced-back.
 *           
 *           Constructed for the one-hole algorithm.
 */
void
WriteSeqinfo(FILE *outf, SQINFO *sqinfo, int nfile, int nseq, int nbases)
{
  fprintf(outf,"---------------------------------------------------\n");
  fprintf(outf,"---------------------------------------------------\n");
  fprintf(outf, "DBASE_NAM  %s\n", sqinfo->name);
  fprintf(outf, "SOFAR_BASES  %d\n", nbases);
  fprintf(outf, "SEQ_NUMBER  %d\n", nseq+1);

  if (sqinfo->flags & SQINFO_DESC)
    fprintf(outf, "SEQ_NAM  %s \n", sqinfo->desc);
    fprintf(outf, "SEQ_BASES %d\n", sqinfo->len);


  fprintf(outf,"---------------------------------------------------\n");
}

/* Function: WriteSeqkn()
 * 
 * Purpose:  writes in ofp file the Tracekn
 *           
 *
 *           "." are used for single-stranded stuff.
 *           Note that structure is defined by pairwise emissions,
 *           not by Watson-Crick-isms and stacking rules.
 *           
 */

int
WriteSeqkn(FILE *ofp, char *seq, SQINFO *sqinfo, int *ret_ss)
{
  int   numline = 0;
  int   lines = 0, spacer = 5, width = 15, tab = 0;
  int   i, j, l, l1, ibase, m;
  char  endstr[10]; 
  char  s[2000];		  /* buffer for sequence  */
  int   pos[2000];		  /* buffer for structure */
  int   ss[2000];		  /* buffer for secondary structure */
  int   seqlen; 
  int   checksum;
  int   dostruc;		  /* TRUE to print structure lines*/
  int   pairs = 0;

  dostruc    = FALSE;		
  seqlen     = (sqinfo->flags & SQINFO_LEN) ? sqinfo->len : strlen(seq);

  strcpy( endstr,"");
  l1 = 0;

  /* 10Nov91: write this out in all possible formats: */
  checksum = GCGchecksum(seq, seqlen);

  fprintf(ofp, "NAM  %s\n", sqinfo->name);

  if (sqinfo->flags & (SQINFO_ID | SQINFO_ACC | SQINFO_START | SQINFO_STOP | SQINFO_OLEN))
    fprintf(ofp, "SRC  %s %s %d..%d::%d\n",
	    (sqinfo->flags & SQINFO_ID)    ? sqinfo->id     : "-",
	    (sqinfo->flags & SQINFO_ACC)   ? sqinfo->acc    : "-",
	    (sqinfo->flags & SQINFO_START) ? sqinfo->start  : 0,
	    (sqinfo->flags & SQINFO_STOP)  ? sqinfo->stop   : 0,
	    (sqinfo->flags & SQINFO_OLEN)  ? sqinfo->olen   : 0);

  if (sqinfo->flags & SQINFO_DESC)
    fprintf(ofp, "DES  %s\n", sqinfo->desc);

  if (sqinfo->flags & SQINFO_SS) {
    fprintf(ofp, "SEQ  +SS\n");
    dostruc = TRUE;	/* print structure lines too */
  }
  else
    fprintf(ofp, "SEQ\n");

  numline = 1;                /* number seq lines w/ coords  */
  strcpy(endstr, "\n");

  for (i=0, l=0, ibase = 1, lines = 0; i < seqlen; ) {
    if (l1 < 0) 
      l1 = 0;
    else if (l1 == 0) {
      if (numline) 
	fprintf(ofp,"%8d ",ibase);
      for (j=0; j<tab; j++) 
	fputc(' ',ofp);
    }
      
    if (spacer != 0 && l%spacer == 1) {
      s[l] = ' '; 
      ss[l] = 1234; 
      l++;
    }
    
    if (spacer != 0 && l%spacer == 2) {
      s[l] = ' '; 
      ss[l] = 1234; 
      l++;
    }
      
    if (spacer != 0 && l%spacer == 3) {
      s[l] = ' '; 
      ss[l] = 1234; 
      l++;
    }

    if (spacer != 0 && l%spacer == 4) {
      s[l] = ' '; 
      ss[l] = 1234; 
      l++;
    }

    pos[l] = i;
    s[l]   = *(seq+i);
     
    if (sqinfo->ss[i] != '.') {
      ss[l]  = ret_ss[i];
      pairs += 1;
    }
    else 
      ss[l] = 56789;
    
    l++; i++;
    l1++;                 /* don't count spaces for width*/
    if (l1 == width || i == seqlen) {
      s[l]  = '\0';
      ss[l] = 888888;
      
      if (dostruc) {
	fprintf(ofp, "%s\n", s);
	
	if (numline) 
	  fprintf(ofp,"         ");
	
	for (j=0; j<tab; j++) 
	  fputc(' ',ofp);
	
	for (m=0; m<l; m++)
	  if (s[m] != ' '  &&  pos[m] <= 9) 
	    fprintf(ofp,"%d    ", *(pos+m));
	  else if (s[m] != ' '  && pos[m] > 9 && pos[m] <= 99) 
	    fprintf(ofp,"%d   ", *(pos+m));
	  else if (s[m] != ' '  && pos[m] > 99 && pos[m] <= 999) 
	    fprintf(ofp,"%d  ", *(pos+m));
	  else if (s[m] != ' ') 
	    fprintf(ofp,"%d ", *(pos+m));
	
	fprintf(ofp,"\n");
	
	if (numline) 
	  fprintf(ofp,"         ");
	
	for (j = 0; j < tab; j++) 
	  fputc(' ',ofp);
	
	for (m = 0; m < l; m++)
	  if (s[m] != ' '  && ss[m] <= 9 && ss[m] != 56789)
	    fprintf(ofp,"%d    ", *(ss+m));
	  else if (s[m] != ' '  && ss[m] > 9 && ss[m] <= 99 && ss[m] != 56789)
	    fprintf(ofp,"%d   ", *(ss+m));
	  else if (s[m] != ' '  && ss[m] > 99 && ss[m] <= 999 && ss[m] != 56789)
	    fprintf(ofp,"%d  ", *(ss+m));
	  else if (s[m] != ' ' && ss[m] != 56789)
	    fprintf(ofp,"%d ", *(ss+m));
	  else if (s[m] != ' ') 
	    fprintf(ofp, ".    ");
	
	fprintf(ofp,"%s\n",endstr);
      }
      else {
	if (i == seqlen) fprintf(ofp,"%s%s\n",s,endstr);
	else fprintf(ofp,"%s\n",s);
      }
      l = 0; l1 = 0;
      lines++;
      ibase = i+1;
    }
  }

  fprintf(ofp, "number pairs = %f\n", pairs/2.0);
  return lines;
} 



