
static char const rcsid [] = "$Id: facim.c,v 1.1 1998/04/21 10:41:36 stojanov Exp $";


/*****************************************************************************/
/*                                                                           */
/* Program: facim (matching set of sequences against IMD motifs)             */
/*                                                                           */
/* Author: Nikola Stojanovic                                                 */
/*                                                                           */
/* Revision:    21 MAR 97   Version 1.0                                      */
/*                                                                           */
/*                                                                           */
/*   Given a file of ranges with represented sequences, program matches them */
/* against factors given by matrices of the Information Matrix Database,     */
/* i.e. it's specified file(s); program sends it's output, in extended plain */
/* (landmark) format, to the standard output                                 */
/*                                                                           */
/*****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "ntl.h"

/*****************************************************************************/
/*                                                                           */
/* Definitions section                                                       */
/*                                                                           */
/*****************************************************************************/


/*****************************************************************************/
/* Definitions of the constants of the program unit                          */
/*****************************************************************************/


#define DEFAULT_DIRECTORY              "."
#define DEFAULT_STRAND                 BOTH_MATCH
#define DEFAULT_AMBIGUITY              FALSE
#define DEFAULT_HIGH                   FALSE
#define DEFAULT_TRUNCATION             FALSE

#define UNKNOWN_SYMBOL                 '?'


/*****************************************************************************/
/* Types used in the program unit                                            */
/*****************************************************************************/



/*****************************************************************************/
/* Prototypes of locally used functions of the program unit                  */
/*****************************************************************************/


int facim_collect_data (strlist_ptr directories, char *input_file,
                        strlist_ptr matrix_files, bool high,
			plain_ptr *sequences, imd_ptr *imd_list);
void facim_comments (char *input_file, strlist_ptr matrix_files, int strand,
                     bool high, bool ambiguity, bool truncation);
int facim_match (plain_ptr sequences, imd_ptr matrices,
                 int strand, bool ambiguity, bool truncation);
int facim_matrix (long int start, long int pos, long int stop, char *text,
                  imd_ptr matrix, int strand, bool ambiguity);
int facim_score (long int pos, char *text, double **matrix,
                 long int end, bool ambiguity, double *score);
int facim_inverse (long int pos, char *text, long int length, double **matrix,
                   long int end, bool ambiguity, double *score);
void facim_output_line (long int start, long int pos, long int end_index,
                        char *text, imd_ptr matrix, char code, double ratio);
int facim_ambiguous (char target, double *column, double *score);
char facim_complement (char symbol);


/*****************************************************************************/
/* Static variables of the program unit                                      */
/*****************************************************************************/


int Alphabet_Rows [26];
double Alphabet_Frequences [26];

/*****************************************************************************/
/*                                                                           */
/* Code section                                                              */
/*                                                                           */
/*****************************************************************************/


/*****************************************************************************/
/*                                                                           */
/* Procedure: main                                                           */
/*                                                                           */
/* "main" procedure of the program. Receives and analyses the command line   */
/*   parameters, sets the control variables of the program, checks their     */
/*   consistency and passes control to internal procedures which actually    */
/*   process the input data; returns 0 if everything is OK, non-zero status  */
/*   in case of any errors                                                   */

int main (int argc, char **argv)
{
 char *input_file; int arg_count, strand, match_count; bool truncation;
 bool strand_set, ambiguity_set, ambiguity, high_set, high, truncation_set;
 strlist_ptr data_directory, new_path, path_scan;
 strlist_ptr matrix_files, new_file, file_scan;
 plain_ptr sequences; imd_ptr imd_motifs;
 
 input_file = NULL;
 data_directory = (strlist_ptr) NTL0_ckalloc (sizeof (StrList_Struct));
 data_directory -> string = NTL0_strsave (DEFAULT_DIRECTORY);
 data_directory -> next = NULL;
 matrix_files = NULL;
 strand_set = FALSE;
 strand = DEFAULT_STRAND;
 ambiguity_set = FALSE;
 ambiguity = DEFAULT_AMBIGUITY;
 high_set = FALSE;
 high = DEFAULT_HIGH;
 truncation_set = FALSE;
 truncation = DEFAULT_TRUNCATION;

 if (argc < 2) {                       /* Display instructions and terminate */
 
  fprintf (stderr, "usage: %s <input_file>\n", argv [0]);
  fprintf (stderr, "             [-I <directory_path>]*\n");
  fprintf (stderr, "             [-f <matrix_file>]+\n");
  fprintf (stderr, "             [-d | -i | -b]\n");
  fprintf (stderr, "             [-y | -n]\n");
  fprintf (stderr, "             [-a | -x]\n");
  fprintf (stderr, "             [-t | -c]\n");
  exit (1);
 }
 else {                           /* Some parameters provided - process them */

  /* Proceed to extract the command line parameters and create the settings  */

  arg_count = 1; while (arg_count < argc) {
 
   if (argv [arg_count] [0] != '-') {       /* Not an "-" option - file name */
    if (input_file != NULL) {
     fprintf (stderr, "Repeated file name '%s'.\n", argv [arg_count]);
     exit (1);
    }
    else {
     input_file = NTL0_strsave (argv [arg_count]);
     arg_count++;
    }
   }
   else if (!strcmp (argv [arg_count], "-I")) {        /* New "include" path */
    arg_count++;
    if (arg_count == argc) {
     fprintf (stderr, "Missing directory path for inclusion.\n"); exit (1);
    }
    else {
     new_path = (strlist_ptr) NTL0_ckalloc (sizeof (StrList_Struct));
     new_path -> string = NTL0_strsave (argv [arg_count]);
     new_path -> next = NULL;

     if (data_directory == NULL) data_directory = new_path;
     else {
      path_scan = data_directory;
      while (path_scan -> next != NULL) path_scan = path_scan -> next;
      path_scan -> next = new_path;
     }
     arg_count++;
    }
   }
   else if (!strcmp (argv [arg_count], "-f")) {           /* New matrix file */
    arg_count++;
    if (arg_count == argc) {
     fprintf (stderr, "Missing matrix file name.\n"); exit (1);
    }
    else {
     new_file = (strlist_ptr) NTL0_ckalloc (sizeof (StrList_Struct));
     new_file -> string = NTL0_strsave (argv [arg_count]);
     new_file -> next = NULL;

     if (matrix_files == NULL) matrix_files = new_file;
     else {
      file_scan = matrix_files;
      while (file_scan -> next != NULL) file_scan = file_scan -> next;
      file_scan -> next = new_file;
     }
     arg_count++;
    }
   }
   else if (!strcmp (argv [arg_count], "-d")) {   /* Direct matching request */
    if (strand_set) {
     fprintf (stderr, "Strand for matching already set.\n"); exit (1);
    }
    else { strand = LITERAL_MATCH; strand_set = TRUE; arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-i")) {  /* Inverse complement match */
    if (strand_set) {
     fprintf (stderr, "Strand for matching already set.\n"); exit (1);
    }
    else { strand = INVERSE_MATCH; strand_set = TRUE; arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-b")) {        /* Match both strands */
    if (strand_set) {
     fprintf (stderr, "Strand for matching already set.\n"); exit (1);
    }
    else { strand = BOTH_MATCH; strand_set = TRUE; arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-y")) {       /* Include all request */
    if (high_set) {
     fprintf (stderr, "Matrix selection already set.\n"); exit (1);
    }
    else { high = TRUE; high_set = TRUE; arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-n")) {    /* Exclude high positives */
    if (high_set) {
     fprintf (stderr, "Matrix selection already set.\n"); exit (1);
    }
    else { high = FALSE; high_set = TRUE; arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-a")) {    /* Permit ambiguity codes */
    if (ambiguity_set) {
     fprintf (stderr, "Ambiguity codes treatment already set.\n"); exit (1);
    }
    else { ambiguity = TRUE; ambiguity_set = TRUE; arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-x")) {        /* No ambiguity codes */
    if (ambiguity_set) {
     fprintf (stderr, "Ambiguity codes treatment already set.\n"); exit (1);
    }
    else { ambiguity = FALSE; ambiguity_set = TRUE; arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-t")) {    /* Permit site truncation */
    if (truncation_set) {
     fprintf (stderr, "Truncation criterion already set.\n"); exit (1);
    }
    else { truncation = TRUE; truncation_set = TRUE; arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-c")) {       /* Disallow truncation */
    if (truncation_set) {
     fprintf (stderr, "Truncation criterion already set.\n"); exit (1);
    }
    else { truncation = FALSE; truncation_set = TRUE; arg_count++; }
   }
   else {                                     /* Unknown command-line option */
    fprintf (stderr, "Illegal option (%s).\n", argv [arg_count]);
    fprintf (stderr, "usage: %s <input_file>\n", argv [0]);
    fprintf (stderr, "             [-I <directory_path>]*\n");
    fprintf (stderr, "             [-f <matrix_file>]+\n");
    fprintf (stderr, "             [-d | -i | -b]\n");
    fprintf (stderr, "             [-y | -n]\n");
    fprintf (stderr, "             [-a | -x]\n");
    fprintf (stderr, "             [-t | -c]\n");
    exit (1);
   }
  }
  /* Now check whether all necessary parameters have been provided           */

  if (input_file == NULL) {
   fprintf (stderr, "Must have sequences file to process.\n"); exit (1);
  }
  if (matrix_files == NULL) {
   fprintf (stderr, "Must have at least one matrix data file to match.\n");
   exit (1);
  }

  /* If everything is OK so far, proceed to collect data from the files      */

  if (facim_collect_data (data_directory, input_file, matrix_files, high,
                          &sequences, &imd_motifs) < 0) exit (1);
  else {
   facim_comments (input_file, matrix_files, strand, high, ambiguity,
                   truncation);
   if ((match_count = facim_match (sequences, imd_motifs,
                                   strand, ambiguity, truncation)) < 0)
    exit (1);
   else {
    printf ("\n# Found %d matching sites.\n", match_count);
    exit (0);
   }
  }
 }
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: facim_collect_data                                             */
/*                                                                           */

int facim_collect_data (strlist_ptr directories, char *input_file,
                        strlist_ptr matrix_files, bool high,
                        plain_ptr *sequences, imd_ptr *imd_list)
{
 errind report; char *new_path; strlist_ptr current_path, current_file;
 imd_ptr last_matrix, temp_list, old_record; int index; char *suffix;

 *sequences = NULL; *imd_list = NULL; last_matrix = NULL;     /* Initialize */
 
 /* Open the sequences input file, check existence and load the data         */

 if ((directories == NULL) ||
     (input_file [0] == '/') || (input_file [0] == '~')) {
  if ((report = NTL2_Load_Plain (input_file, sequences)) != NULL) {
   fprintf (stderr, "Can't open sequences file '%s' (%s).\n",
                    input_file, report -> message);
   return -1;
  }
  else if (*sequences == NULL) {        /* File found, but nothing retrieved */
   fprintf (stderr, "No sequences to match.\n"); return -2;
  }
 }
 else {                     /* Directory path(s) provided, try them in order */
  current_path = directories;
  while ((*sequences == NULL) && (current_path != NULL)) {
   new_path = NTL0_ckalloc ((strlen (current_path -> string) +
                             strlen (input_file) + 2) * sizeof (char));
   strcpy (new_path, current_path -> string);
   if (input_file [0] != '/') strcat (new_path, "/");
   strcat (new_path, input_file);
   
   if ((report = NTL2_Load_Plain (new_path, sequences)) != NULL) {
    *sequences = NULL; current_path = current_path -> next;
   }
   free (new_path);
  }
  if (report != NULL) {                        /* File not found on any path */
   fprintf (stderr, "Can't open sequences file '%s' (%s).\n",
                    input_file, report -> message);
   return -3;
  }
  else if (*sequences == NULL) {        /* File found, but nothing retrieved */
   fprintf (stderr, "No sequences to match.\n"); return -4;
  }
 }
 /* If sequences are correctly obtained, get the requested matrices          */
 /* Load the alphabet data first, necessary for processing raw matrices read */
 
 NTL2_Get_Alphabet (Alphabet_Rows, Alphabet_Frequences);

 /* Loop for all requested files to load, convert and connect the data       */
 
 for (current_file = matrix_files; current_file != NULL;
      current_file = current_file -> next) {

  /* Load matrices from any file to temporary list first, then connect       */
  
  temp_list = NULL;
  if ((directories == NULL) ||                 /* No directory path provided */
      ((current_file -> string) [0] == '/') ||
      ((current_file -> string) [0] == '~')) {
   if ((report = NTL2_Load_IMDfile (current_file -> string, Alphabet_Rows,
                                    Alphabet_Frequences, &temp_list)) != NULL)
   {
    fprintf (stderr, "Can't open IMD file '%s' (%s).\n",
                     current_file -> string, report -> message);
    return -5;
   }
   else if (temp_list == NULL) {       /* File found, but no matrices loaded */
    fprintf (stderr, "No matrices in '%s'.\n", current_file -> string);
    return -6;
   }
  }
  else {                    /* Directory path(s) provided, try them in order */
   current_path = directories;
   while ((temp_list == NULL) && (current_path != NULL)) {
    new_path = NTL0_ckalloc ((strlen (current_path -> string) +
                      strlen (current_file -> string) + 2) * sizeof (char));
    strcpy (new_path, current_path -> string);
    if ((current_file -> string) [0] != '/') strcat (new_path, "/");
    strcat (new_path, current_file -> string);
  
    if ((report = NTL2_Load_IMDfile (new_path, Alphabet_Rows,
                                     Alphabet_Frequences, &temp_list)) != NULL)
    {
     temp_list = NULL; current_path = current_path -> next;
    }
    free (new_path);
   }
   if (report != NULL) {                       /* File not found on any path */
    fprintf (stderr, "Can't open IMD file '%s' (%s).\n",
                     current_file -> string, report -> message);
    return -7;
   }
   else if (temp_list == NULL) {        /* File found, but no matrices in it */
    fprintf (stderr, "No matrices in '%s'.\n", current_file -> string);
    return -8;
   }
  }
  /* If loading matrices from the current file was successful, connect data  */

  if (*imd_list == NULL) *imd_list = temp_list;
  else last_matrix -> next = temp_list;

  if (last_matrix == NULL) last_matrix = *imd_list;
  while (last_matrix -> next != NULL) last_matrix = last_matrix -> next;
 }
 if (!high) {           /* If there are matrices to be excluded, remove them */
  suffix =
        &(((*imd_list) -> file_name) [strlen ((*imd_list) -> file_name) - 1]);
  while ((suffix != (*imd_list) -> file_name) && (*suffix != '/')) suffix--;
  if (*suffix == '/') suffix++;
  while ((*imd_list != NULL) &&
         (((!strcmp (suffix, "matrix.mam")) &&
	   ((!strcmp ((*imd_list) -> site_name, "TCF-1")) ||
	    (!strcmp ((*imd_list) -> site_name, "AP-2")) ||
	    (!strcmp ((*imd_list) -> site_name, "AP-3")) ||
	    (!strcmp ((*imd_list) -> site_name, "GATA-1")) ||
	    (!strcmp ((*imd_list) -> site_name, "HNF-3")) ||
	    (!strcmp ((*imd_list) -> site_name, "LBP-1")) ||
	    (!strcmp ((*imd_list) -> site_name, "NF-IL6")) ||
	    (!strcmp ((*imd_list) -> site_name, "T-Ag")) ||
	    (!strcmp ((*imd_list) -> site_name, "TEF")))) ||
	  ((!strcmp (suffix, "matrix.ins")) &&
	   ((!strcmp ((*imd_list) -> site_name, "Ftz.2")) ||
	    (!strcmp ((*imd_list) -> site_name, "Tll")))) ||
	  ((!strcmp (suffix, "matrix.bir")) &&
	   ((!strcmp ((*imd_list) -> site_name, "Sp1")) ||
	    (!strcmp ((*imd_list) -> site_name, "EFII")) ||
	    (!strcmp ((*imd_list) -> site_name, "NF1")))))) {
   old_record = *imd_list; *imd_list = (*imd_list) -> next;
   if (old_record -> file_name != NULL) free (old_record -> file_name);
   if (old_record -> site_name != NULL) free (old_record -> site_name);
   if (old_record -> bind_factor != NULL) free (old_record -> bind_factor);
   if (old_record -> sequence != NULL) free (old_record -> sequence);
   for (index = 0; index < old_record -> seq_length; index++) 
    free ((old_record -> matrix) [index]);
   free (old_record -> matrix);
   if (old_record -> reference != NULL) free (old_record -> reference);
   free (old_record);
   
   suffix =
        &(((*imd_list) -> file_name) [strlen ((*imd_list) -> file_name) - 1]);
   while ((suffix != (*imd_list) -> file_name) && (*suffix != '/')) suffix--;
   if (*suffix == '/') suffix++;
  }
  if (*imd_list != NULL) {
   if ((*imd_list) -> next != NULL) {
    suffix = &((((*imd_list) -> next) -> file_name) [
                          strlen (((*imd_list) -> next) -> file_name) - 1]);
    while ((suffix != ((*imd_list) -> next) -> file_name) && (*suffix != '/'))
     suffix--;
    if (*suffix == '/') suffix++;
   }
   last_matrix = *imd_list; while (last_matrix -> next != NULL) {
    if ((last_matrix -> next != NULL) &&
        (((!strcmp (suffix, "matrix.mam")) &&
          ((!strcmp ((last_matrix -> next) -> site_name, "TCF-1")) ||
           (!strcmp ((last_matrix -> next) -> site_name, "AP-2")) ||
           (!strcmp ((last_matrix -> next) -> site_name, "AP-3")) ||
           (!strcmp ((last_matrix -> next) -> site_name, "GATA-1")) ||
           (!strcmp ((last_matrix -> next) -> site_name, "HNF-3")) ||
           (!strcmp ((last_matrix -> next) -> site_name, "LBP-1")) ||
           (!strcmp ((last_matrix -> next) -> site_name, "NF-IL6")) ||
           (!strcmp ((last_matrix -> next) -> site_name, "T-Ag")) ||
           (!strcmp ((last_matrix -> next) -> site_name, "TEF")))) ||
         ((!strcmp (suffix, "matrix.ins")) &&
          ((!strcmp ((last_matrix -> next) -> site_name, "Ftz.2")) ||
           (!strcmp ((last_matrix -> next) -> site_name, "Tll")))) ||
         ((!strcmp (suffix, "matrix.bir")) &&
          ((!strcmp ((last_matrix -> next) -> site_name, "Sp1")) ||
           (!strcmp ((last_matrix -> next) -> site_name, "EFII")) ||
           (!strcmp ((last_matrix -> next) -> site_name, "NF1")))))) {
     old_record = last_matrix -> next; last_matrix -> next = old_record -> next;
     if (old_record -> file_name != NULL) free (old_record -> file_name);
     if (old_record -> site_name != NULL) free (old_record -> site_name);
     if (old_record -> bind_factor != NULL) free (old_record -> bind_factor);
     if (old_record -> sequence != NULL) free (old_record -> sequence);
     for (index = 0; index < old_record -> seq_length; index++) 
      free ((old_record -> matrix) [index]);
     free (old_record -> matrix);
     if (old_record -> reference != NULL) free (old_record -> reference);
     free (old_record);
    }
    else last_matrix = last_matrix -> next;

    if (last_matrix -> next != NULL) {
     suffix = &(((last_matrix -> next) -> file_name) [
                           strlen ((last_matrix -> next) -> file_name) - 1]);
     while ((suffix != (last_matrix -> next) -> file_name) && (*suffix != '/'))
      suffix--;
     if (*suffix == '/') suffix++;
    }
   }
  }
  if (*imd_list == NULL) {
   fprintf (stderr,
            "No matrices left after elimination of high false positives.");
   return -9;
  }
 }
 return 0;
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: facim_comments                                                 */
/*                                                                           */

void facim_comments (char *input_file, strlist_ptr matrix_files, int strand,
                     bool high, bool ambiguity, bool truncation)
{
 strlist_ptr data_scan;

 printf ("\n");        /* Separate output from whatever was on screen before */
 printf ("#:plain:\n\n");       /* Data ID, for utilities to process further */
 
 printf ("# Information matrix matching sites within sequences from: '%s'\n",
         input_file);
 printf ("# Information Matrix Database (IMD) files used:  ");
 for (data_scan = matrix_files;
      data_scan != NULL; data_scan = data_scan -> next) {
  printf ("%s", data_scan -> string);
  if (data_scan -> next == NULL) printf ("\n"); else printf (", ");
 }
 printf ("# Matching done ");
 if (strand == LITERAL_MATCH) printf ("directly.\n");
 else if (strand == INVERSE_MATCH) printf ("in inverse complement.\n");
 else printf ("in both strands.\n");
 if (high)
  printf ("# Matching data includes matrices with high false positive ratio");
 else printf (
   "# Matching data does not include matrices with high false positive ratio");
 printf (".\n");
 if (ambiguity)
  printf (
      "# Ambiguity codes in sequences permitted - conservative approach.\n");
 else
  printf ("# Only characters of matrix alphabet permitted at match sites.\n");
 if (truncation)
  printf (
       "# Partially matched (above threshold) truncated sites permitted.\n\n");
 else
  printf ("# Only matches of full length permitted.\n\n");
 printf ("# Output structure:\n");
 printf (
     "#  from to seq_text ratio consensus site_name strand IMD file ref\n\n");
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: facim_match                                                    */
/*                                                                           */

int facim_match (plain_ptr sequences, imd_ptr matrices,
                 int strand, bool ambiguity, bool truncation)
{
 plain_ptr current_seq; long int p, match_count, ind; imd_ptr current_matrix;

 match_count = 0;

 /* Loop for all given sequences to try to find matching sites in them       */

 for (current_seq = sequences;
      current_seq != NULL; current_seq = current_seq -> next) {
  if (current_seq -> text != NULL) {

   /* Loop for each position in the sequence - search for matches on it      */

   for (p = 0; p < (current_seq -> stop) - (current_seq -> start) + 1; p++) {
    for (current_matrix = matrices; current_matrix != NULL;
         current_matrix = current_matrix -> next) {
     if ((truncation) ||
         (current_matrix -> seq_length <=
                   (current_seq -> stop) - (current_seq -> start) + 1 - p)) {
      if ((ind = facim_matrix (current_seq -> start, p, current_seq -> stop,
                               current_seq -> text, current_matrix,
                               strand, ambiguity)) < 0) return -21;
      else if (ind) match_count += ind;
     }
    }
   }
  }
 }
 return match_count;
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: facim_matrix                                                   */
/*                                                                           */

int facim_matrix (long int start, long int pos, long int stop, char *text,
                  imd_ptr matrix, int strand, bool ambiguity)
{
 double literal_score, inverse_score, ratio; long int end_pos, end_index;
 bool full; char match_code; int match_ind;

 /* Determine the position where matching should stop, compare to seq. end   */ 

 end_pos = start + pos + (matrix -> seq_length) - 1;
 if (end_pos > stop) {                /* Matching can be only partially done */
  full = FALSE; end_index = stop - start - pos;
 }
 else { full = TRUE; end_index = (matrix -> seq_length) - 1; } /* Full match */
 
 if ((strand == LITERAL_MATCH) || (strand == BOTH_MATCH)) {
  if (facim_score (pos, text, matrix -> matrix,
                   end_index, ambiguity, &literal_score) < 0)
   return -31;
 }
 else literal_score = 0.0;
 
 if ((strand == INVERSE_MATCH) || (strand == BOTH_MATCH)) {
  if (facim_inverse (pos, text, (long int) (matrix -> seq_length),
                     matrix -> matrix, end_index, ambiguity,
                     &inverse_score) < 0) return -32;
 }
 else inverse_score = 0.0;

 /* If the score is above the threshold for matching, report the site        */

 match_ind = 0;
 if (literal_score > (matrix -> cutoff) - EPSILON) {
  ratio = literal_score / (matrix -> max_score);
  if (full) match_code = LITERAL_CODE;
  else match_code = TRUNCATED_LITERAL_CODE;
  facim_output_line (start, pos, end_index, text, matrix, match_code, ratio);
  match_ind++;
 }
 if (inverse_score > (matrix -> cutoff) - EPSILON) {
  ratio = inverse_score / (matrix -> max_score);
  if (full) match_code = INVERSE_CODE;
  else match_code = TRUNCATED_INVERSE_CODE;
  facim_output_line (start, pos, end_index, text, matrix, match_code, ratio);
  match_ind++;
 }
 return match_ind;
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: facim_score                                                    */
/*                                                                           */

int facim_score (long int pos, char *text, double **matrix, long int end,
                 bool ambiguity, double *score)
{
 double add_val; long int index; int letter_row;
 
 *score = 0.0;
 for (index = 0; index <= end; index++) {
  if ((text [pos + index] != GAP_SYMBOL) &&
      ((text [pos + index] < 'A') || (text [pos + index] > 'Z'))) {
   fprintf (stderr, "Can't process character '%c' found in sequence.\n",
                    text [pos + index]);
   return -101;
  }
  if (text [pos + index] != GAP_SYMBOL) {
   letter_row = Alphabet_Rows [(int) (text [pos + index]) - (int) 'A'];
   if (letter_row < 0) {
    if (ambiguity) {
     if (facim_ambiguous (text [pos + index], matrix [index], &add_val) < 0) {
      fprintf (stderr, "Illegal ambiguity code '%c' found in sequence.\n",
                       text [pos + index]);
      return -102;
     }
     else *score += add_val;
    }
    else { *score = 0.0; return 0; }
   }
   else *score += matrix [index] [letter_row];
  }
  else if (!ambiguity) { *score = 0.0; return 0; }
 }
 return 0;
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: facim_inverse                                                  */
/*                                                                           */

int facim_inverse (long int pos, char *text, long int length, double **matrix,
                   long int end, bool ambiguity, double *score)
{
 double add_val; long int start_index, index; int letter_row;
 char target_letter;
 
 *score = 0.0;
 start_index = length - end - 1;
 for (index = start_index; index < length; index++) {
  target_letter = facim_complement (text [pos + length - index - 1]);
  if ((target_letter != GAP_SYMBOL) &&
      ((target_letter < 'A') || (target_letter > 'Z'))) {
   if (target_letter == UNKNOWN_SYMBOL)
    fprintf (stderr, "Unknown complement letter for '%c' in sequence.\n",
                     text [pos + length - index - 1]);
   else
    fprintf (stderr, "Can't process complement letter '%c'.\n", target_letter);
   return -201;
  }
  if (target_letter != GAP_SYMBOL) {
   letter_row = Alphabet_Rows [(int) target_letter - (int) 'A'];
   if (letter_row < 0) {
    if (ambiguity) {
     if (facim_ambiguous (target_letter, matrix [index], &add_val) < 0) {
      fprintf (stderr, "Illegal ambiguity code '%c' in inverse complement.\n",
                       target_letter);
      return -102;
     }
     else *score += add_val;
    }
    else { *score = 0.0; return 0; }
   }
   else *score += matrix [index] [letter_row];
  }
  else if (!ambiguity) { *score = 0.0; return 0; }
 }
 return 0;
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: facim_output_line                                              */
/*                                                                           */

void facim_output_line (long int start, long int pos, long int end_index,
                        char *text, imd_ptr matrix, char code, double ratio)
{
 int index; char *suffix;
 
 printf ("%ld %ld ", start + pos, start + pos + end_index);
 for (index = pos; index <= pos + end_index; index++)
  printf ("%c", text [index]);
 printf (" %4.2f ", ratio);
 if (matrix -> sequence == NULL) printf ("null ");
 else printf ("\"%s\" ", matrix -> sequence);
 if (matrix -> site_name == NULL) printf ("null ");
 else printf ("\"%s\" ", matrix -> site_name);
 printf ("%c IMD ", code);
 suffix = &((matrix -> file_name) [strlen (matrix -> file_name) - 1]);
 while ((suffix != matrix -> file_name) && (*suffix != '/')) suffix--;
 if (*suffix == '/') suffix++;
 printf ("%s %s\n", suffix, matrix -> reference);
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: facim_ambiguous                                                */
/*                                                                           */

int facim_ambiguous (char target, double *column, double *score)
{
 int a, c, g, t; double selection;
 
 if (target == GAP_SYMBOL) { *score = 0.0; return 0; }
 else if ((target < 'A') || (target > 'Z')) return -301;
 else if (Alphabet_Rows [(int) (target - 'A')] >= 0) return -302;
 else {
  a = Alphabet_Rows [(int) ('A' - 'A')];
  c = Alphabet_Rows [(int) ('C' - 'A')];
  g = Alphabet_Rows [(int) ('G' - 'A')];
  t = Alphabet_Rows [(int) ('T' - 'A')];

  if ((a < 0) || (c < 0) || (g < 0) || (t < 0)) return -303;

  if (target == 'W') {
   if (column [a] < column [t]) *score = column [a]; else *score = column [t];
   return 0;
  }
  else if (target == 'Y') {
   if (column [c] < column [t]) *score = column [c]; else *score = column [t];
   return 0;
  }
  else if (target == 'S') {
   if (column [c] < column [g]) *score = column [c]; else *score = column [g];
   return 0;
  }
  else if (target == 'R') {
   if (column [a] < column [g]) *score = column [a]; else *score = column [g];
   return 0;
  }
  else if (target == 'M') {
   if (column [a] < column [c]) *score = column [a]; else *score = column [c];
   return 0;
  }
  else if (target == 'K') {
   if (column [g] < column [t]) *score = column [g]; else *score = column [t];
   return 0;
  }
  else if (target == 'B') {
   selection = column [c];
   if (selection > column [g]) selection = column [g];
   if (selection > column [t]) selection = column [t];
   if (Alphabet_Rows [(int) ('K' - 'A')] >= 0) {
    if (selection > column [Alphabet_Rows [(int) ('K' - 'A')]])
     selection = column [Alphabet_Rows [(int) ('K' - 'A')]];
   }
   if (Alphabet_Rows [(int) ('S' - 'A')] >= 0) {
    if (selection > column [Alphabet_Rows [(int) ('S' - 'A')]])
     selection = column [Alphabet_Rows [(int) ('S' - 'A')]];
   }
   if (Alphabet_Rows [(int) ('Y' - 'A')] >= 0) {
    if (selection > column [Alphabet_Rows [(int) ('Y' - 'A')]])
     selection = column [Alphabet_Rows [(int) ('Y' - 'A')]];
   }
   *score = selection; return 0;
  }
  else if (target == 'D') {
   selection = column [a];
   if (selection > column [g]) selection = column [g];
   if (selection > column [t]) selection = column [t];
   if (Alphabet_Rows [(int) ('K' - 'A')] >= 0) {
    if (selection > column [Alphabet_Rows [(int) ('K' - 'A')]])
     selection = column [Alphabet_Rows [(int) ('K' - 'A')]];
   }
   if (Alphabet_Rows [(int) ('R' - 'A')] >= 0) {
    if (selection > column [Alphabet_Rows [(int) ('R' - 'A')]])
     selection = column [Alphabet_Rows [(int) ('R' - 'A')]];
   }
   if (Alphabet_Rows [(int) ('W' - 'A')] >= 0) {
    if (selection > column [Alphabet_Rows [(int) ('W' - 'A')]])
     selection = column [Alphabet_Rows [(int) ('W' - 'A')]];
   }
   *score = selection; return 0;
  } 
  else if (target == 'H') {
   selection = column [a];
   if (selection > column [c]) selection = column [c];
   if (selection > column [t]) selection = column [t];
   if (Alphabet_Rows [(int) ('M' - 'A')] >= 0) {
    if (selection > column [Alphabet_Rows [(int) ('M' - 'A')]])
     selection = column [Alphabet_Rows [(int) ('M' - 'A')]];
   }
   if (Alphabet_Rows [(int) ('W' - 'A')] >= 0) {
    if (selection > column [Alphabet_Rows [(int) ('W' - 'A')]])
     selection = column [Alphabet_Rows [(int) ('W' - 'A')]];
   }
   if (Alphabet_Rows [(int) ('Y' - 'A')] >= 0) {
    if (selection > column [Alphabet_Rows [(int) ('Y' - 'A')]])
     selection = column [Alphabet_Rows [(int) ('Y' - 'A')]];
   }
   *score = selection; return 0;
  }
  else if (target == 'V') {
   selection = column [a];
   if (selection > column [c]) selection = column [c];
   if (selection > column [g]) selection = column [g];
   if (Alphabet_Rows [(int) ('M' - 'A')] >= 0) {
    if (selection > column [Alphabet_Rows [(int) ('M' - 'A')]])
     selection = column [Alphabet_Rows [(int) ('M' - 'A')]];
   }
   if (Alphabet_Rows [(int) ('R' - 'A')] >= 0) {
    if (selection > column [Alphabet_Rows [(int) ('R' - 'A')]])
     selection = column [Alphabet_Rows [(int) ('R' - 'A')]];
   }
   if (Alphabet_Rows [(int) ('S' - 'A')] >= 0) {
    if (selection > column [Alphabet_Rows [(int) ('S' - 'A')]])
     selection = column [Alphabet_Rows [(int) ('S' - 'A')]];
   }
   *score = selection; return 0;
  }
  else if ((target == 'N') || (target == 'X')) {
   selection = column [a];
   if (selection > column [c]) selection = column [c];
   if (selection > column [g]) selection = column [g];
   if (selection > column [t]) selection = column [t];
   if (Alphabet_Rows [(int) ('W' - 'A')] >= 0) {
    if (selection > column [Alphabet_Rows [(int) ('W' - 'A')]])
     selection = column [Alphabet_Rows [(int) ('W' - 'A')]];
   }
   if (Alphabet_Rows [(int) ('Y' - 'A')] >= 0) {
    if (selection > column [Alphabet_Rows [(int) ('Y' - 'A')]])
     selection = column [Alphabet_Rows [(int) ('Y' - 'A')]];
   }
   if (Alphabet_Rows [(int) ('S' - 'A')] >= 0) {
    if (selection > column [Alphabet_Rows [(int) ('S' - 'A')]])
     selection = column [Alphabet_Rows [(int) ('S' - 'A')]];
   }
   if (Alphabet_Rows [(int) ('R' - 'A')] >= 0) {
    if (selection > column [Alphabet_Rows [(int) ('R' - 'A')]])
     selection = column [Alphabet_Rows [(int) ('R' - 'A')]];
   }
   if (Alphabet_Rows [(int) ('M' - 'A')] >= 0) {
    if (selection > column [Alphabet_Rows [(int) ('M' - 'A')]])
     selection = column [Alphabet_Rows [(int) ('M' - 'A')]];
   }
   if (Alphabet_Rows [(int) ('K' - 'A')] >= 0) {
    if (selection > column [Alphabet_Rows [(int) ('K' - 'A')]])
     selection = column [Alphabet_Rows [(int) ('K' - 'A')]];
   }
   if (Alphabet_Rows [(int) ('B' - 'A')] >= 0) {
    if (selection > column [Alphabet_Rows [(int) ('B' - 'A')]])
     selection = column [Alphabet_Rows [(int) ('B' - 'A')]];
   }
   if (Alphabet_Rows [(int) ('D' - 'A')] >= 0) {
    if (selection > column [Alphabet_Rows [(int) ('D' - 'A')]])
     selection = column [Alphabet_Rows [(int) ('D' - 'A')]];
   }
   if (Alphabet_Rows [(int) ('H' - 'A')] >= 0) {
    if (selection > column [Alphabet_Rows [(int) ('H' - 'A')]])
     selection = column [Alphabet_Rows [(int) ('H' - 'A')]];
   }
   if (Alphabet_Rows [(int) ('V' - 'A')] >= 0) {
    if (selection > column [Alphabet_Rows [(int) ('V' - 'A')]])
     selection = column [Alphabet_Rows [(int) ('V' - 'A')]];
   }
   *score = selection; return 0;
  }
  else return -304;
 }
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: facim_complement                                               */
/*                                                                           */

char facim_complement (char symbol)
{
 if (symbol == 'A') return 'T';
 else if (symbol == 'C') return 'G';
 else if (symbol == 'G') return 'C';
 else if (symbol == 'T') return 'A';
 else if (symbol == 'W') return 'W';
 else if (symbol == 'Y') return 'R';
 else if (symbol == 'S') return 'S';
 else if (symbol == 'R') return 'Y';
 else if (symbol == 'M') return 'K';
 else if (symbol == 'K') return 'M';
 else if (symbol == 'B') return 'V';
 else if (symbol == 'D') return 'H';
 else if (symbol == 'H') return 'D';
 else if (symbol == 'V') return 'B';
 else if (symbol == 'N') return 'N';
 else if (symbol == 'X') return 'X';
 else if (symbol == GAP_SYMBOL) return GAP_SYMBOL;
 else return UNKNOWN_SYMBOL;
}


