/*-----------------------------------------------------------------------------

   QUASAR - q-gram Alignment based on Suffix ARrays

   Copyright (C) 1998 Stefan Burkhardt
   Author: Stefan Burkhardt <stburk@mpi-sb.mpg.de>
   This file is part of the QUASAR package.

   QUASAR is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.

   QUASAR is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Library General Public License for more details.

   You should have received a copy of the GNU Library General Public
   License along with the QUASAR package; see the file copying.  If not,
   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
   Boston, MA 02111-1307, USA.  or contact the author. 

-------------------------------------------------------------------------------

  blast interface/filter output module
  
  $File$
  $Revision: 1.15 $
  $Date: Wed, 29 Mar 2000 11:07:45 +0200 $

-----------------------------------------------------------------------------*/

#include "q_out.h"

void WriteFilteredToDisk(int *fb, int query_num, Q_Headers *headers, Q_SSet *db)
{
  int	i, j;
  char  outname[1000];		/* NOTE: THIS IS SAFE (LOCAL USE/ASSIGNMENT) */
  FILE	*outfile;
  sprintf(outname, "quasar_hits_q%d", query_num);
  
  outfile = MyFopen(outname, "w");
  for(i=1; i<fb[0]; i+=2) {
    for(j=fb[i]; j<=fb[i+1]; j++) {
      if(j<db->size) {
	fwrite(Header(headers, j), sizeof(char), Length(headers,j), outfile); 
	fprintf(outfile,"%s\n", Sequence(db,j));
      }	
      else {
	printf("ERROR: sequence out of db-range (%d)\n",j);
	printf("LAST SEQUENCE IN db-sset: %d\n",db->size);
      }
    }
  }
  fclose(outfile);
}

int WriteNINHeader(int *nin, int lmax, int seq_num, int bases)
  /* Writes some standard info to nin and nsq, returns pointer to end of HDR */
{
  char message[] = "Commercials here!!!!!!!!";

  nin[1] = 3;				/* formatdb version */
  nin[2] = 0;			      	/* seq_type */
  nin[3] = 0;			       	/* title_length */
  nin[4] = 24;			       	/* date_length */
  memcpy(&nin[5], &message, nin[4]);   	/* nin header message (usually date) */
  nin[11] = seq_num;			/* number of sequences */
  nin[12] = bases;	       		/* number of bases in db */
  nin[13] = lmax;			/* length of longest seq in db */
  nin[14] = 0;				/* offset of first header in nhr */
  return 15;
}


int WriteHeader(int *nin, char *nhr, int db_num, int hdr_num, Q_Headers *headers)
  /* writes the header with number hdr_num to nhr and its offset to nin */
{
  int	hdr_len;
  char	blasthdr[100];		/* THIS IS SAFE, ONLY LOCAL USE */
  sprintf(blasthdr, "gnl|BL_ORD_ID|%d ", db_num);
  hdr_len = strlen(blasthdr);
  memcpy(nhr, &blasthdr, hdr_len * sizeof(char));
  memcpy(nhr+hdr_len, Header(headers, hdr_num)+1, (Length(headers, hdr_num)-1) * sizeof(char));
  nin[0] = nin[-1] + hdr_len + Length(headers, hdr_num) - 2;
  return (hdr_len + Length(headers, hdr_num) - 2);
}

int WriteSequence(int *nin, char *nsq, int seq_num, Q_SSet *db)
  /* writes the sequences with number seq_num to nsq and its offset to
     nin. returns the number of bytes written to nsq */
{
  int	i;
  int	thischar;
  int	bytes;
  static int	convert[]   = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
				 0, 2, 0, 0, 0, 0, 0, 0, 0, 0,
				 0, 0, 0, 0, 3};


  bytes = (Bases(db, seq_num))>>2;

  for(i=0; i<bytes; i++) {
    /* Magic to convert to numbers for blast */
    thischar = convert[(int)db->data[Start(db, seq_num) + (i<<2)]] << 6;
    thischar += convert[(int)db->data[Start(db, seq_num) + (i<<2)+1]] << 4;
    thischar += convert[(int)db->data[Start(db, seq_num) + (i<<2)+2]] << 2;
    thischar += convert[(int)db->data[Start(db, seq_num) + (i<<2)+3]];
    nsq[i] = (char)thischar;
  }
#ifdef DEBUG
  printf("Sequence(%d): %s\n", Bases(db, seq_num), Sequence(db, seq_num));
#endif
  if ((Bases(db, seq_num)) % 4 == 0) {
    nsq[bytes] = '\0';
    nin[0] = nin[-1] + bytes + 1;
    return bytes+1;
  }
  else {
    thischar = (Bases(db, seq_num))%4;  /* number of bases in last byte */
    thischar += convert[(int)db->data[Start(db, seq_num) + (bytes<<2)]] << 6;
    if((Bases(db, seq_num) % 4) > 1)
      thischar += convert[(int)db->data[Start(db, seq_num) + (bytes<<2)+1]] << 4;
    if((Bases(db, seq_num) % 4) > 2)
      thischar += convert[(int)db->data[Start(db, seq_num) + (bytes<<2)+2]] << 2;
    nsq[bytes] = (char)thischar;
    nin[0] = nin[-1] + bytes + 1;
    return bytes+1;
  }
}


int **CreateBlastDB(Q_SSet *db, int *fb, Q_Headers *headers)
  /*  Creates 3 memory blocks (pointed to by blastdb[0], blastdb[1] and 
      blastdb[2]) that contain the NCBI-BLAST database templates for
      the files .nin, .nhr and .nsq */
{
  int	i, j;
  int	db_num = 0;
  int	written = 0;
  int	bases = 0;
  int	hlen = 0;
  int	lmax = 0;
  int	seq_num = 0;
  int	nin_ct;
  int	nhr_ct;
  int	nsq_ct;
  int	*nin;
  char	*nhr;
  char	*nsq;
  int	**blastdb;

#ifdef DEBUG
  printf("starting CreateBlastDB\n");
#endif
  
  for(i=1; i<fb[0]; i+=2) {
    for(j=fb[i]; j<=fb[i+1]; j++) {
      if(j<db->size) {
	seq_num++;
	if (Bases(db,j) > lmax) {
	  lmax = Bases(db,j);
	}
	bases += Bases(db, j);
	hlen += Length(headers, j);
      }	
      else {
	printf("ERROR: sequence out of db-range (%d)\n",j);
	printf("LAST SEQUENCE IN db-sset: %d\n",db->size);
	exit(-1);
      }
    }
  }

#ifdef DEBUG
  printf("sequences: %d\nbases: %d\nheader_length: %d\nlmax:%d\n", seq_num, bases, hlen, lmax);
#endif  

  /* All these mallocs are with some extra space */

  nin = (int *)MyMalloc(sizeof(int) * hlen, ".nhr map");
  nhr = (char *) nin;
  nin = (int *)MyMalloc(sizeof(int) * (bases/4), ".nsq map");
  nsq = (char *) nin;
  nin = (int *)MyMalloc(sizeof(int) * (100 + seq_num * 4), ".nin map");
  nhr_ct = 4;
  nsq_ct = 4;
  nin_ct = WriteNINHeader(nin, lmax, seq_num, bases);

  for(i=1; i<fb[0]; i+=2) {
    for(j=fb[i]; j<=fb[i+1]; j++) {
      written = WriteHeader(nin + nin_ct, nhr + nhr_ct, db_num, j, headers);
      db_num++;
      nhr_ct += written;
      nin_ct++;
    }
  }
  nin[nin_ct] = 1;
  nin_ct++;
  nsq[nsq_ct] = '\0';
  nsq_ct++;
  for(i=1; i<fb[0]; i+=2) {
    for(j=fb[i]; j<=fb[i+1]; j++) {
      written = WriteSequence(nin + nin_ct, nsq + nsq_ct, j, db);
      nsq_ct += written;
      nin_ct++;
    }
  }
  for(i=1; i<fb[0]; i+=2) {
    for(j=fb[i]; j<=fb[i+1]; j++) {
      nin[nin_ct] = nin[nin_ct-seq_num];
      nin_ct++;
    }
  }
  nin[nin_ct] = nin[nin_ct-1];
  nin_ct++;

  blastdb = (int **)MyMalloc(sizeof(int *)*3, "ptrs to .nin, .nsq and .nhr");  
  blastdb[0] = (int *)nin;
  blastdb[0][0] = nin_ct - 1;
  blastdb[1] = (int *)nhr;
  blastdb[1][0] = nhr_ct - 4;
  blastdb[2] = (int *)nsq;
  blastdb[2][0] = nsq_ct - 4;
#ifdef DEBUG
  printf("nin: %d bytes\nnhr: %d bytes\nnsq: %d bytes\n", blastdb[0][0], blastdb[1][0], blastdb[2][0]);
#endif
  return blastdb; 
}

void WriteBlastDB(int **blastdb)
{
  FILE	*outfile;
  
  outfile = MyFopen("filtered_db.nin", "wb");
  fwrite(blastdb[0]+1, sizeof(int), blastdb[0][0], outfile);
  fclose(outfile);
  outfile = MyFopen("filtered_db.nhr", "wb");
  fwrite(blastdb[1]+1, sizeof(char), blastdb[1][0], outfile);
  fclose(outfile);
  outfile = MyFopen("filtered_db.nsq", "wb");
  fwrite(blastdb[2]+1, sizeof(char), blastdb[2][0], outfile);
  fclose(outfile);
}


void CallBlastBySystem(Q_Headers *headers, Q_SSet *queries,int query_num, Q_SSet *db, int *fb)
{
  int	i, j;
  char  outname[1000];		/* NOTE: THIS IS SAFE (LOCAL USE/ASSIGNMENT) */

  FILE	*queryfile;
  FILE	*dbfile;
  struct timeval	tp;
  long		ts[8];

  gettimeofday(&tp, NULL);
  ts[0] = tp.tv_sec;
  ts[1] = tp.tv_usec;
  printf("Started processing filtered data  at: %ld / %ld (%ld sec %ld microsec)\n", ts[0], ts[1], ts[0], ts[1]%1000000);
  queryfile = MyFopen("blast_query", "w");
  fprintf(queryfile, ">query\n%s", queries->data+Start(queries, query_num));
  fclose(queryfile);
  sprintf(outname, "filtered_db");
  dbfile = MyFopen(outname, "w");
  for(i=1; i<fb[0]; i+=2) {
    for(j=fb[i]; j<=fb[i+1]; j++) {
      if(j<db->size) {
	fwrite(Header(headers, j), sizeof(char), Length(headers,j), dbfile); 
	fprintf(dbfile,"%s\n", db->data+Start(db,j));
      }	
      else {
	printf("ERROR: sequence out of db-range (%d)\n",j);
	printf("LAST SEQUENCE IN db-sset: %d\n",db->size);
      }
    }
  }
  fclose(dbfile);
  gettimeofday(&tp, NULL);
  ts[2] = tp.tv_sec;
  ts[3] = tp.tv_usec;
  printf("Time for write: %ld / %ld (%ld sec %ld microsec)\n", ts[2]-ts[0], ts[3]-ts[1], ts[2], ts[3]%1000000);

  /* THESE TWO system's CALL NCBI BLAST AND PASS THE DATA VIA DISK */
  sprintf(outname, "formatdb -i filtered_db -p F");
  system(outname);
  
  gettimeofday(&tp, NULL);
  ts[4] = tp.tv_sec;
  ts[5] = tp.tv_usec;
  printf("Time for formatdb: %ld / %ld (%ld sec %ld microsec)\n", ts[4]-ts[2], ts[5]-ts[3], ts[4], ts[5]%1000000);
  
  sprintf(outname, "blastall -p blastn -i blast_query -d filtered_db -o correct_hits_q%d -z %d", query_num, db->offsets[db->size]);
  system(outname);
  
  gettimeofday(&tp, NULL);
  ts[6] = tp.tv_sec;
  ts[7] = tp.tv_usec;
  printf("Time for blast: %ld / %ld (%ld sec %ld microsec)\n", ts[6]-ts[4], ts[7]-ts[5], ts[6], ts[7]%1000000);
}

void ProcessHits(Q_Headers *headers, Q_SSet *queries,int query_num, Q_SSet *db, int *fb, int mode)
{
  int	**blastdb;
  char  outname[1000];		/* NOTE: THIS IS SAFE (LOCAL USE/ASSIGNMENT) */

  FILE	*outfile = NULL;
  FILE	*queryfile;
  
#ifdef QUERY_WITH_HEADER
  char	*query;
  query = (char *)MyMalloc(sizeof(char) * (100 + Bases(queries, query_num)),
			     "query sequence for NCBI-BLAST");
  sprintf(query, ">query %d\n%s", query_num, Sequence(queries, query_num));
#endif

  if(mode == DISK_FILTERED) 
    WriteFilteredToDisk(fb, query_num, headers, db);
  if(mode == DISK_FMT_BLAST) 
    /* This calls formatdb and blastall using system, communication via disk */
    CallBlastBySystem(headers, queries, query_num, db, fb);
  if(mode == DISK_BLAST ) {
    /* This is my own code for creating the three blast databases in ram */
    blastdb = CreateBlastDB(db, fb, headers);
    /* This writes the databases to disk, passing via ram is to be done  */
    WriteBlastDB(blastdb);
    /* Write the query to the temporary query file */
    queryfile = MyFopen("blast_query", "w");
    fprintf(queryfile, ">query\n%s", queries->data+Start(queries, query_num));
    fclose(queryfile);
    /* Call blast via system */
    sprintf(outname, "blastall -p blastn -i blast_query -d filtered_db -o quasar_hits_q%d", query_num);
    system(outname);
    /* Clean up the allocated datastructures */
    free(blastdb[0]);
    free(blastdb[1]);
    free(blastdb[2]);
    free(blastdb);
  }
  
  if(mode == MEM_BLAST) {
    /* This is my own code for creating the three blast databases in ram */
    blastdb = CreateBlastDB(db, fb, headers);
    /* Still passing the query via disk */
    queryfile = MyFopen("blast_query", "w");
    fprintf(queryfile, ">query\n%s", queries->data+Start(queries, query_num));
    fclose(queryfile);

#ifdef OUTFILE
    /* this will (some day) be the output file */
    sprintf(outname, "mblast_hits_q%d", query_num);
    outfile = MyFopen(outname, "w");
#endif

#ifdef DEBUG
    printf("Calling Blast\n");
#endif
    /* The call to my own function that invokes the blast search engine */
    if (CallBlast(Sequence(queries, query_num), fb, db, headers, blastdb, outfile) != 0) {
      printf("ERROR IN BLAST\n");
      exit(-1);
    }
#ifdef OUTFILE
    fclose(outfile);
#endif
    free(blastdb[0]);
    free(blastdb[1]);
    free(blastdb[2]);
    free(blastdb);

  }
#ifdef QUERY_WITH_HEADER
  free(query);
#endif
}
