/*-----------------------------------------------------------------------------

   QUASAR - q-gram Alignment based on Suffix ARrays

   Copyright (C) 1998 Stefan Burkhardt
   Author: Stefan Burkhardt <stburk@mpi-sb.mpg.de>
   This file is part of the QUASAR package.

   QUASAR is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.

   QUASAR is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Library General Public License for more details.

   You should have received a copy of the GNU Library General Public
   License along with the QUASAR package; see the file copying.  If not,
   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
   Boston, MA 02111-1307, USA.  or contact the author. 

-------------------------------------------------------------------------------

  block array module
  
  $File$
  $Revision: 1.4 $
  $Date: Wed, 16 Feb 2000 16:16:22 +0100 $

-----------------------------------------------------------------------------*/

#include "q_blocks.h"

Q_Blocks *NewBlocks()
/*  Creates and initializes a block array */
{
  Q_Blocks *blocks;
  
  blocks = (Q_Blocks *)MyMalloc(sizeof(Q_Blocks), "block array structure");
  blocks->n = 0;
  blocks->b = 0;
  blocks->t = 0;
  blocks->ba = NULL;

  return blocks;
}

void DeleteBlocks(Q_Blocks *blocks)
/* Frees all memory allocated for blocks */
{
  if(blocks->ba != NULL)
    free(blocks->ba);
  free(blocks);
}

Q_Blocks *InitBlocks(int n, int b, int t)
/*  Initialize a block array for a database with n letters, a
    block size of b and a block threshold of t, return a pointer to it.
    At the end of the block array, 3 sentinel entries (0, 0, t) are set
    for use in GetFullBlocks 
    a block array covers a db as follows:

db:  ------------------------------------
     0   2   4   6   8   ....           n-1 
ba:|---|---|---|---|---|---|---|---|---|---|
     |---|---|---|---|---|---|---|---|---|
       1   3   5   7   9   ...
*/
{
  Q_Blocks	*blocks;

  printf("\nINITIALIZING BLOCK ARRAY\n");
  
  blocks = NewBlocks();
  
  blocks->n = 2 * (n/b) + 1;
  blocks->b = b;
  blocks->t = t;
  /* allocate 3 more entries for the Sentinels 0 0 t */
  blocks->ba = (int *)MyMalloc(sizeof(int) * (blocks->n+3), "block array");
  /* set the 3 Sentinel entries */
  blocks->ba[blocks->n] = 0;
  blocks->ba[blocks->n+1] = 0;
  blocks->ba[blocks->n+2] = t;


  PrintBlocksInfo(blocks);

  printf("\n");

  return blocks;
}

void ClearBlocks(Q_Blocks *blocks)
/*  Sets all block array counters to 0 */
{
  int		i;
  
  for(i=0; i<blocks->n; i++)
    blocks->ba[i] = 0;
}

void ResetBlocks(Q_Blocks *blocks)
/*  Sets all block array counters with value below t to 0 */
{
  int		i;

  for(i=0; i<blocks->n; i++)
    if(blocks->ba[i] < blocks->t) 
      blocks->ba[i] = 0;
}

void AddHits(Q_Blocks *blocks, int n, int *hits)
/*  Adds n hits to block array */
{
  int		i;
  int		target;

  for(i=0; i<n; i++)
    {
      target = hits[i]/(blocks->b>>1); 
#ifdef DEBUG
      printf("Target: %d\n", target);
      getchar();
#endif      
      blocks->ba[target]++;
      blocks->ba[target+1]++;
    }
}

void DelHits(Q_Blocks *blocks, int n, int *hits)
/*  Deletes n hits from block array in blocks with value below t */
{
  int		i;
  int		target;

  for(i=0; i<n; i++)
    {
      target = hits[i]/(blocks->b>>1);
      if(blocks->ba[target]<blocks->t)
	blocks->ba[target]--;
      if(blocks->ba[target+1]<blocks->t)
	blocks->ba[target+1]--;
    }  
}

void AddSequence(Q_SSet *sset, char *seq, Q_Blocks *blocks, Q_Index *index, LONG *qgrams, Q_Options *op)
/*  Adds the sequence seq whose qgrams are passed in qgrams to 
    blocks using a window length of op->w. the first entry of 
    qgrams contains the number of qgrams. index is required to 
    access the hitlists for the qgrams if op->filter_mode = 0, 
    the standard exact q-gram filtration is applied, if 
    op->filter_mode = 1, the new 1-neighbourhood q-gram 
    filtration is used */
{
  int		i, j, n;
  int		hood_grams = 0;
  LONG		*hood;
  int		*hits;

  if(op->filter_mode == 0) {	/* EXACT Q-GRAM SEARCH */
    for(i=0; i<qgrams[0]; i++) {
#ifdef BLOCKS_DEBUG
      printf("Get Hits for qgram %lld \n", qgrams[i+1]>>(2 * (op->q - op->i)));
      getchar();
#endif
      hits = GetHits(index, qgrams[i+1]>>(2 * (op->q - op->i)), &n);
      if(op->q > op->i)
	hits += GetLongHits(sset, seq+i, index, op->q, qgrams[i+1], hits, &n);
#ifdef BLOCKS_DEBUG
      for(j = -1; j<=n; j++)
	printf("%d: %13.13s\n", j, sset->data+hits[j]);
      printf("Adding %d Hits for position %d \n", n, i);
      getchar();
#endif
      if(n<op->rep_cutoff)	
	AddHits(blocks, n, hits);
#ifdef BLOCKS_DEBUG
      if(n != 1) {
	printf("Number of hits: %d\n", n);
	getchar();
      }
#endif
      if(i>=op->w) {
#ifdef BLOCKS_DEBUG
      printf("Removing Hits for position %d\n", i-op->w);
#endif
	hits = GetHits(index, qgrams[i+1-op->w]>>(2 * (op->q - op->i)), &n);

	if(op->q > op->i)
	  hits += GetLongHits(sset, seq+i, index, op->q, qgrams[i+1-op->w], hits, &n);
	if(n<op->rep_cutoff)	
	  DelHits(blocks, n, hits);
      }
    }
  }
  else {		/* 1-NEIGHBOURHOOD SEARCH */
    hood = NewHood(op->q);
    for(i=0; i<qgrams[0]; i++) {
#ifdef DEBUG
      printf("Iteration %d \n", i);
      printf("Getting Neighbourhood for qgram %lld \n", qgrams[i+1]);
#endif
      if(i+1 == qgrams[0])	/* SPECIAL CASE: LAST QGRAM IN QUERY */
	hood_grams = CreateHood(op->q, qgrams[i+1], -qgrams[i], hood);
      else
	hood_grams = CreateHood(op->q, qgrams[i+1], qgrams[i+2], hood);
#ifdef DEBUG
      printf("Adding %d Neighbourhood qgrams\n", hood_grams);
#endif
      for(j=0; j<hood_grams; j++) {
	hits = GetHits(index, hood[j]>>(2 * (op->q - op->i)), &n);

#ifdef DEBUG
	printf("qgram %llx, hits: %d len: %d\n", hood[j], hits, n);
#endif
	if(op->q > op->i) {
	  hits += GetLongHits(sset, seq+i, index, op->q, hood[j], hits, &n);
#ifdef DEBUG
	  printf("hits: %d len: %d\n", hits, n);
	  printf("-1 :%20.20s\n",sset->data+hits[-1]);
	  printf("0  :%20.20s\n",sset->data+hits[0]);
	  printf("n-1:%20.20s\n",sset->data+hits[n-1]);
	  printf("n  :%20.20s\n",sset->data+hits[n]);
	  printf("n+1:%20.20s\n",sset->data+hits[n+1]);
#endif
	}
	if(n<op->rep_cutoff)	
	  AddHits(blocks, n, hits);
      }
      if(i>=op->w) {
	hood_grams=CreateHood(op->q,qgrams[i+1-op->w],qgrams[i+2-op->w],hood);
#ifdef DEBUG
	printf("Removing %d Neighbourhood qgrams\n", hood_grams);
#endif
	for(j=0; j<hood_grams; j++) {
	  hits = GetHits(index, hood[j]>>(2 * (op->q - op->i)), &n);
	  if(op->q > op->i)
	    hits += GetLongHits(sset, seq+i, index, op->q, hood[j], hits, &n);
	  if(n<op->rep_cutoff)	
	    DelHits(blocks, n, hits);
	}
      }
    }
    free(hood);
  }
}

int *GetFullBlocks(Q_Blocks *blocks)
/*  Returns an integer array fb with the following structure:
    fb[0] 2 * number of interesting zones (number of used array cells)
    fb[1] - fb[fb[0]] pairs of integers delimiting interesting zones */
{
  int		i=0;
  int		j=1;
  int		*fb;

  fb = MyMalloc(sizeof(int) * blocks->n, "list of full blocks");

  while(i<blocks->n) {
    while(blocks->ba[i] < blocks->t) {	/* Find next full block */
      i++;				/* i contains start     */
#ifdef DEBUG
      printf("First block: %d\n", i);
#endif
    }
    if(i < blocks->n) {
#ifdef DEBUG
      if(j >= blocks->n) {
	printf("ERROR:j >= blocks->n\n");
	getchar();
      }
#endif
      fb[j] = (i-1)*blocks->b>>1;
#ifdef DEBUG
      printf("fb[%d]: %d\n", j, fb[j]);
#endif
      j++;
      while((blocks->ba[i+1] >= blocks->t) || (blocks->ba[i+2] >= blocks->t)) {
	i++;
	if(blocks->ba[i+2] >= blocks->t)
	  i++;
      }
      i++;
#ifdef DEBUG
      if(j >= blocks->n) {
	printf("ERROR:j >= blocks->n\n");
	getchar();
      }
#endif
      fb[j]=(i*blocks->b>>1)-1;
      j++;
    }
  }
  
  /* correct the start of the first zone (if its -b/2) */
  if(fb[1]<0)
    fb[1] = 0;

  /* save the number of interesting zones */
  fb[0] = j;
  return fb;
}

void PrintBlocksInfo(Q_Blocks *blocks)
/*  Prints block array information */
{
  printf("number of blocks: %d\n", blocks->n);
  printf("block size:       %d\n", blocks->b);
  printf("block threshold:  %d\n", blocks->t);
}

void PrintBlocks(Q_Blocks *blocks)
/*  Prints complete block array */
{
  int		i;
  
  PrintBlocksInfo(blocks);
  
  for(i=0; i<blocks->n; i++)
    {
      printf("%d: %d", i, blocks->ba[i]);
      if(blocks->ba[i] >= blocks->t)
	printf(" FULL");
      printf("\n");
    }
}

void PrintBlocksStatistics(Q_Blocks *blocks)
/*  Prints block array statistics */
{
  int		i;
  int		full = 0;
  int		values[101];

  for(i=0; i<101; i++)
    values[i]=0;

  for(i=0; i<blocks->n; i++)
    {
      if(blocks->ba[i]>100)
	values[100]++;
      else
	values[blocks->ba[i]]++;
      if(blocks->ba[i] >= blocks->t)
	full++;
    }


  printf("BLOCK ARRAY STATISTICS\n");
  printf("number of blocks: %d\n", blocks->n);
  printf("full blocks:      %d\n", full);
  printf("filtration rate:  %f percent\n", 100*(float)full/(float)blocks->n);
#ifdef DEBUG  
  for(i=0; i<100; i++)
    if(values[i] != 0)
      printf("%4d: %d\n",i, values[i]);
  
  if(values[100]>0)
    printf("100 or more hits: %d blocks\n", values[100]);
  printf("\n");
#endif
}
