/*-----------------------------------------------------------------------------

   QUASAR - q-gram Alignment based on Suffix ARrays

   Copyright (C) 1998 Stefan Burkhardt
   Author: Stefan Burkhardt <stburk@mpi-sb.mpg.de>
   This file is part of the QUASAR package.

   QUASAR is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.

   QUASAR is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Library General Public License for more details.

   You should have received a copy of the GNU Library General Public
   License along with the QUASAR package; see the file copying.  If not,
   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
   Boston, MA 02111-1307, USA.  or contact the author. 

-------------------------------------------------------------------------------

  sequence set module
  
  $File$
  $Revision: 1.6 $
  $Date: Wed, 29 Mar 2000 11:07:45 +0200 $

-----------------------------------------------------------------------------*/

#include <string.h>
#include "q_misc.h"
#include "q_options.h"
#include "q_defines.h"
#ifndef _Q_SSET_H
#define _Q_SSET_H

/* String buffer length for reading Fasta files, filename manipulation 
   and system commands  CHANGE!!! DYNAMIC ALLOCATION WOULD BE BETTER */
#define STRING_BUF	2048	

/* Basic quasar datastructure for sequences */
typedef struct q_sset{
  int		size;		/* number of sequences in sset */
  int		*offsets;	/* offsets[i] : offset of sequence i in data
				   offsets[size] : position of last SEP_CHAR + 1  */
  char		*data;		/* data : sequences terminated with SEP_CHAR   */
} Q_SSet;


#define Start(sset, seq)	\
		sset->offsets[seq]
		/* returns index of first character in sequence seq */
#define End(sset, seq)		\
		sset->offsets[seq+1] - 1
		/* returns index of SEP_CHAR in sequence seq */
#define	Bases(sset, seq)	\
		End(sset, seq) - Start(sset, seq)
		/* returns number of bases in sequence seq */
#define Qgrams(sset, seq, q)	\
		Bases(sset, seq) - q + 1 
		/* returns number of q-grams in sequence seq */

#define Sequence(sset, seq)	\
		sset->data + Start(sset, seq)
		/* returns a pointer to the first character of seq */


Q_SSet *NewSSet();
/*  Creates and initializes a sequence set datastructure */

void DeleteSSet(Q_SSet *sset);
/*  Frees Memory allocated for a SSET */

Q_SSet *ReadSSet(char *infile);
/*  Read a sequence set from the file named infile.sset */

char *GetReverseSSetSequence(Q_SSet *sset, int seq);
/*  return the reverse complement sequence of seq in sset terminated
    by \0. allocation is done in this function */

LONG *GetSSetQgrams(Q_SSet *sset, int seq, Q_Options *op);
/*  write all i-gram numbers for i-grams in the sequence with number 
    seq in sset to an integer array and return it write the number of
    tuples in the first array position. i is the length of the q-grams
    used in the index. q is the real length of the q-grams used in the
    search */

LONG *GetReverseSSetQgrams(Q_SSet *sset, int seq, Q_Options *op);
/*  write all i-gram numbers for i-grams in the reverse complement of
    the sequence with number seq in sset to an integer array and return 
    it. write the number of tuples in the first array position i is 
    the length of the q-grams used in the index. q is the real length 
    of the q-grams used in the search */

void FindSSetEntries(Q_SSet *sset, int *zones);
/*  Find the sset entries contained in areas of the db listed
    in zones. overwrite zones with the numbers of the sset
    entries. */

void PrintSSetInfo(Q_SSet *sset);
/*  Prints size and offsets stored in sset */

void PrintSSet(Q_SSet *sset);
/*  Prints complete sequence set sset */

void WriteSSet(Q_SSet *sset, char *outfile);
/*  Writes all sequences in sset out to oufile in ASCII format */

void PrintSSetEntry(Q_SSet *sset, int seq);
/*  Print the sequence with number seq in sset */

void PrintSSetEntryQgrams(Q_SSet *sset, int seq, int q);
/*  Print all q-grams in the sequence with number seq in sset */

void FastaToSSet(char *fastafile);
/*  Read a Fasta File named fastafile. Produce the following files:
    name.sset  :	sequence set 
    name.raw   :	raw data (sequences in ASCII, separated by SEPCHAR)
    name.headers :	headers (headers in ASCII) */

#endif  /* _Q_SSET_H */
