/*-----------------------------------------------------------------------------

   QUASAR - q-gram Alignment based on Suffix ARrays

   Copyright (C) 1998 Stefan Burkhardt
   Author: Stefan Burkhardt <stburk@mpi-sb.mpg.de>
   This file is part of the QUASAR package.

   QUASAR is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.

   QUASAR is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Library General Public License for more details.

   You should have received a copy of the GNU Library General Public
   License along with the QUASAR package; see the file copying.  If not,
   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
   Boston, MA 02111-1307, USA.  or contact the author. 

-------------------------------------------------------------------------------

  neighbourhood generation module
  
  $File$
  $Revision: 1.3 $
  $Date: Wed, 16 Feb 2000 16:16:22 +0100 $

-----------------------------------------------------------------------------*/

#include "q_hood.h"

LONG *NewHood(int q)
/* allocates the memory required for the 1-neighbourhood of a string of 
   length q */
{
  LONG	*hood;
  hood = MyMalloc(sizeof(LONG) * q * 20, "neighbourhood array");
  return(hood);
}

void print_qgram(int q, LONG qgram)
{
  int		i;
  char          alphabet[] = "ACGT";

  for(i=q-1; i>=0; i--) {
    printf("%c", alphabet[(qgram>>(2*i))%4]);
  }
  printf("\n");
}


int GenerateReplace(int q, LONG qgram, LONG *hood)
{
  int		generated = 0;  
  LONG		i, j;
  LONG		base, t;

  for(i=0; i<q; i++) {
    base=qgram & (~((LONG)3<<2*i));	
    for(j=0; j<4; j++) {	
      t = base + j * ((LONG)1<<2*i);
      if(t != qgram) {
#ifdef DEBUG
	print_qgram(q, t);
#endif
	hood[generated] = t;
	generated++;
      }
    }
  }
  return generated;
}

int GenerateInsert(int q, LONG qgram, LONG *hood)
{
  int		generated = 0;
  LONG		i, j;
  LONG		lo, hi, t;

  for(i=q-2; i>1; i--) {		/* THE -2/1 IS BECAUSE INSERTIONS    */
    lo = (qgram%((LONG)1<<(2*i)))>>2;   /* BEFORE THE LAST LETTER            */
    hi = (qgram >> (2*i)) << (2*i);	/* AS WELL AS BEFORE AND AFTER THE   */
    for(j=0; j<4; j++) {	        /* FIRST ARE COVERED BY REPLACEMENTS */
      if(lo>>(2*(i-2)) != j) {
	t = lo + hi + j * ((LONG)1<<(2*(i-1)));
#ifdef DEBUG
	print_qgram(q, t);
#endif
	hood[generated] = t; 
	generated++;
      }
    }
  }
  return generated;
}

int GenerateDelete(int q, LONG qgram, LONG next, LONG *hood)
{
  int		generated = 0;
  LONG		i;
  LONG		lo, hi, t;
  LONG		last = -1;

  for(i=q-1; i>1; i--) {	     /* DELETION OF THE FIRST/LAST CHARACTER */
    lo = next%((LONG)1<<(2*i));	     /* HANDLED BY REPLACEMENTS/ORIGINAL     */
    hi = (qgram >> (2*i)) << (2*i);
    t = lo + hi;
    if((last != t) || (i==q-1)) {       /* LAST IS USED TO AVOID MULTIPLE */
      hood[generated] = t;		/* OCCURENCES OF THE SAME STRING  */
      generated++;			/* CAUSED BY REPEATS OF A LETTER  */
#ifdef DEBUG
      print_qgram(q, t);			
#endif
    }
    last = t;
  }
  return generated;
}

int GenerateLastDelete(int q, LONG qgram, LONG prev, LONG *hood)
  /*  Generates all possible deletes for qgram by adding characters 
      from the previous qgram 
  */
{
  int		generated = 0;
  LONG		i;
  LONG		lo, hi, t;
  LONG		last = -1;
  
  for(i=q-1; i>=0; i--) {	      /* DELETION OF THE FIRST CHARACTER  */
    lo = qgram%((LONG)1<<(2*i));      /* HANDLED BY REPLACEMENTS/ORIGINAL */
    hi = (prev >> (2*i)) << (2*i);
    t = lo + hi;
    if((last != t) || (i==q-1)) {       /* LAST IS USED TO AVOID MULTIPLE */
      hood[generated] = t;		/* OCCURENCES OF THE SAME STRING  */
      generated++;			/* CAUSED BY REPEATS OF A LETTER  */
#ifdef DEBUG
      print_qgram(q, t);	
#endif
    }
    last = t;
  }
  return generated;
}

void PrintHood(int q, LONG *hood)
{
  int		i;

  printf("Size of neighbourhood array is: %d\n", q * 4);
  for(i=0; i<((q-2)*4 + (q-1)*4 + 1); i++) {
    printf("%3d", i);
    print_qgram(q, hood[i]);
  }
}

int CreateHood(int q, LONG qgram, LONG next, LONG *hood)
/* generates the 1-neighbourhood for a string of length q that is 
   integer encoded and passed in qgram where the following (overlapping)
   qgram is given in next. If it is the last qgram in a query, this
   has to be indicated by passing next=-prev (i.e. the negative value of
   the preceeding q-gram, returns the number of q_grams generated.
*/
{
  int	current = 0;

    
#ifdef DEBUG
  printf("Original q-gram:\n");
  print_qgram(q, qgram);
#endif
  hood[current] = qgram;
  current++;
  current += GenerateReplace(q, qgram, hood+current);
  current += GenerateInsert(q, qgram, hood+current);
  if(next >= 0) {
    current += GenerateDelete(q, qgram, next, hood+current);
#ifdef DEBUG
    printf("CREATED A TOTAL OF %d NEIGHBOURHOOD %d-GRAMS\n", current, q);
    getchar();
#endif
    return(current);
  }
  else {
    current += GenerateLastDelete(q, qgram, -next, hood+current);
    return (current);
  }
}

