
static char const rcsid [] = "$Id: factra.c,v 1.1 1998/04/21 10:39:42 stojanov Exp $";


/*****************************************************************************/
/*                                                                           */
/* Program: factra (matching set of sequences against TRANSFAC motifs)       */
/*                                                                           */
/* Author: Nikola Stojanovic                                                 */
/*                                                                           */
/* Revision:    20 MAR 97   Version 1.0                                      */
/*                                                                           */
/*                                                                           */
/*   Given a file of ranges with represented sequences, program matches them */
/* against patterns in the specified TRANSFAC (Wingender) Database file(s);  */
/* program sends it's output, in extended plain (landmark) file format       */
/*                                                                           */
/*****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "ntl.h"

/*****************************************************************************/
/*                                                                           */
/* Definitions section                                                       */
/*                                                                           */
/*****************************************************************************/


/*****************************************************************************/
/* Definitions of the constants of the program unit                          */
/*****************************************************************************/


#define DEFAULT_DIRECTORY              "."
#define DEFAULT_PRECISION              1.0
#define DEFAULT_STRAND                 BOTH_MATCH
#define DEFAULT_GAP                    TRUE

#define MAX_WHOLE                      4
#define MAX_FRAC                       5
#define MIN_PRECISION                  0.05-EPSILON
#define MAX_PRECISION                  1.0+EPSILON


/*****************************************************************************/
/* Types used in the program unit                                            */
/*****************************************************************************/


/* Motifs loaded from Wingender database files are expanded into format more */
/*   suitable for processing by this program and kept in a linked list       */

typedef struct tmotifs_fields {         /* A single TRANSFAC motif, expanded */

 char *file; /* Wingender database file from which the motif has been loaded */
 char *name;                                            /* Motif's site name */
 char *bind_factor;                        /* Motif's binding factor, if any */
 int length;                  /* Length of the motif (number of nucleotides) */
 char *text;                                                 /* Motif itself */
 char *inverse;                           /* Inverse complement of the motif */
 long int *coded;     /* Bit codes to represent letters of the motif (array) */
 long int *inverted;   /* Bit codes represent the motif's inverse complement */
 char *reference;                   /* TRANSFAC reference code for the motif */
 struct tmotifs_fields *next;              /* ... and link to the next motif */
} Tmotifs_Struct;

typedef Tmotifs_Struct *tmotifs_ptr;    /* ... pointer type, for convinience */


/*****************************************************************************/
/* Prototypes of locally used functions of the program unit                  */
/*****************************************************************************/


int factra_collect_data (strlist_ptr directories, char *input_file,
                         strlist_ptr motif_files, plain_ptr *sequences,
		                 win_ptr *win_motifs);
int factra_expand_motifs (win_ptr win_motifs, tmotifs_ptr *motifs);
void factra_comments (char *input_file, strlist_ptr motif_files,
                      float precision, int strand, bool gaps);
int factra_match (plain_ptr sequences, tmotifs_ptr motifs,
                  float precision, int strand, bool gap);
int factra_single_sequence (bool acgt, long int start, long int stop,
                            char *seq_text, long int *sequence,
                            tmotifs_ptr motifs, float precision,
                            int strand, bool gap);
int factra_motif (bool acgt, long int start, int pos, char *seq_text,
                  long int *sequence, tmotifs_ptr motif, float precision,
                  int strand, bool gap);
int factra_acgt_score (int pos, long int *sequence, int length,
                       long int *motif);
int factra_universal_score (int pos, char *sequence,
                            int length, char *motif, bool gap);
void factra_output_line (long int start, int pos, char *text,
                         tmotifs_ptr motif, char code);
char factra_inverse (char symbol);
bool factra_subsumed (char minor, char major);
bool factra_is_float (char *string, float *value);


/*****************************************************************************/
/* Static variables of the program unit                                      */
/*****************************************************************************/


char **codes;
char **inverses;
long int Bit_Codes [26];
long int Bit_Inverses [26];

/*****************************************************************************/
/*                                                                           */
/* Code section                                                              */
/*                                                                           */
/*****************************************************************************/


/*****************************************************************************/
/*                                                                           */
/* Procedure: main                                                           */
/*                                                                           */
/* "main" procedure of the program. Receives and analyses the command line   */
/*   parameters, sets the control variables of the program, checks their     */
/*   consistency and passes control to internal procedures which actually    */
/*   process the input data; returns 0 if everything is OK, non-zero status  */
/*   in case of any errors                                                   */

int main (int argc, char **argv)
{
 char *input_file; int arg_count, strand, match_count; float precision;
 bool precision_set, strand_set, gap_set, gap;
 strlist_ptr data_directory, new_path, path_scan;
 strlist_ptr motif_files, new_file, file_scan;
 plain_ptr sequences; win_ptr win_motifs; tmotifs_ptr motifs;
 
 input_file = NULL;
 data_directory = (strlist_ptr) NTL0_ckalloc (sizeof (StrList_Struct));
 data_directory -> string = NTL0_strsave (DEFAULT_DIRECTORY);
 data_directory -> next = NULL;
 motif_files = NULL;
 precision_set = FALSE;
 precision = DEFAULT_PRECISION;
 strand_set = FALSE;
 strand = DEFAULT_STRAND;
 gap_set = FALSE;
 gap = DEFAULT_GAP;

 if (argc < 2) {                       /* Display instructions and terminate */
 
  fprintf (stderr, "usage: %s <input_file>\n", argv [0]);
  fprintf (stderr, "              [-I <directory_path>]*\n");
  fprintf (stderr, "              [-f <motif_file>]+\n");
  fprintf (stderr, "              [-p <matching_precision>]\n");
  fprintf (stderr, "              [-d | -i | -b]\n");
  fprintf (stderr, "              [-g | -x]\n");
  exit (1);
 }
 else {                           /* Some parameters provided - process them */

  /* Proceed to extract the command line parameters and create the settings  */

  arg_count = 1; while (arg_count < argc) {
 
   if (argv [arg_count] [0] != '-') {       /* Not an "-" option - file name */
    if (input_file != NULL) {
     fprintf (stderr, "Repeated file name '%s'.\n", argv [arg_count]);
     exit (1);
    }
    else {
     input_file = NTL0_strsave (argv [arg_count]);
     arg_count++;
    }
   }
   else if (!strcmp (argv [arg_count], "-I")) {        /* New "include" path */
    arg_count++;
    if (arg_count == argc) {
     fprintf (stderr, "Missing directory path for inclusion.\n"); exit (1);
    }
    else {
     new_path = (strlist_ptr) NTL0_ckalloc (sizeof (StrList_Struct));
     new_path -> string = NTL0_strsave (argv [arg_count]);
     new_path -> next = NULL;

     if (data_directory == NULL) data_directory = new_path;
     else {
      path_scan = data_directory;
      while (path_scan -> next != NULL) path_scan = path_scan -> next;
      path_scan -> next = new_path;
     }
     arg_count++;
    }
   }
   else if (!strcmp (argv [arg_count], "-f")) {   /* New TRANSFAC motif file */
    arg_count++;
    if (arg_count == argc) {
     fprintf (stderr, "Missing Wingender database motif file name.\n");
     exit (1);
    }
    else {
     new_file = (strlist_ptr) NTL0_ckalloc (sizeof (StrList_Struct));
     new_file -> string = NTL0_strsave (argv [arg_count]);
     new_file -> next = NULL;

     if (motif_files == NULL) motif_files = new_file;
     else {
      file_scan = motif_files;
      while (file_scan -> next != NULL) file_scan = file_scan -> next;
      file_scan -> next = new_file;
     }
     arg_count++;
    }
   }
   else if (!strcmp (argv [arg_count], "-p")) {     /* Precision set request */
    if (precision_set) {
     fprintf (stderr, "Match precision already set to %f.\n", precision);
     exit (1);
    }
    else if (arg_count > argc - 2) {
     fprintf (stderr, "Incomplete precision specification.\n"); exit (1);
    }
    else if (!factra_is_float (argv [arg_count + 1], &precision)) {
     fprintf (stderr, "Illegal match precision request.\n"); exit (1);
    }
    else if ((precision < MIN_PRECISION) || (precision > MAX_PRECISION)) {
     fprintf (stderr, "Illegal value for match precision.\n");
     exit (1);
    }
    else {
     precision_set = TRUE; arg_count += 2;
    }
   }
   else if (!strcmp (argv [arg_count], "-d")) {   /* Direct matching request */
    if (strand_set) {
     fprintf (stderr, "Strand for matching already set.\n"); exit (1);
    }
    else { strand = LITERAL_MATCH; strand_set = TRUE; arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-i")) {  /* Inverse complement match */
    if (strand_set) {
     fprintf (stderr, "Strand for matching already set.\n"); exit (1);
    }
    else { strand = INVERSE_MATCH; strand_set = TRUE; arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-b")) {        /* Match both strands */
    if (strand_set) {
     fprintf (stderr, "Strand for matching already set.\n"); exit (1);
    }
    else { strand = BOTH_MATCH; strand_set = TRUE; arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-g")) {       /* Ignore gaps request */
    if (gap_set) {
     fprintf (stderr, "Gap treatment already set.\n"); exit (1);
    }
    else { gap = FALSE; gap_set = TRUE; arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-x")) {      /* Exclude gaps request */
    if (gap_set) {
     fprintf (stderr, "Gap treatment already set.\n"); exit (1);
    }
    else { gap = TRUE; gap_set = TRUE; arg_count++; }
   }
   else {                                     /* Unknown command-line option */
    fprintf (stderr, "Illegal option (%s).\n", argv [arg_count]);
    fprintf (stderr, "usage: %s <input_file>\n", argv [0]);
    fprintf (stderr, "              [-I <directory_path>]*\n");
    fprintf (stderr, "              [-f <motif_file>]+\n");
    fprintf (stderr, "              [-p <matching_precision>]\n");
    fprintf (stderr, "              [-d | -i | -b]\n");
    fprintf (stderr, "              [-g | -x]\n");
    exit (1);
   }
  }
  /* Now check whether all necessary parameters have been provided           */

  if (input_file == NULL) {
   fprintf (stderr, "Must have sequences file to process.\n"); exit (1);
  }
  if (motif_files == NULL) {
   fprintf (stderr,
            "Must have at least one TRANSFAC data file to get motifs.\n");
   exit (1);
  }

  /* If everything is OK so far, proceed to collect data from the files      */

  if (factra_collect_data (data_directory, input_file, motif_files,
                           &sequences, &win_motifs) < 0) exit (1);
  else if (factra_expand_motifs (win_motifs, &motifs) < 0) exit (1);
  else {
   factra_comments (input_file, motif_files, precision, strand, gap);
   if ((match_count = factra_match (sequences, motifs,
                                    precision, strand, gap)) < 0) exit (1);
   else {
    printf ("\n# Found %d matching sites.\n", match_count);
    exit (0);
   }
  }
 }
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: factra_collect_data                                            */
/*                                                                           */

int factra_collect_data (strlist_ptr directories, char *input_file,
                         strlist_ptr motif_files, plain_ptr *sequences,
                         win_ptr *win_motifs)
{
 errind report; char *new_path; strlist_ptr current_path, current_file;
 win_ptr last_motif, temp_list;

 *sequences = NULL; *win_motifs = NULL; last_motif = NULL;     /* Initialize */
 
 /* Open the sequences input file, check existence and load the data         */

 if ((directories == NULL) ||
     (input_file [0] == '/') || (input_file [0] == '~')) {
  if ((report = NTL2_Load_Plain (input_file, sequences)) != NULL) {
   fprintf (stderr, "Can't open sequences file '%s' (%s).\n",
                    input_file, report -> message);
   return -1;
  }
  else if (*sequences == NULL) {        /* File found, but nothing retrieved */
   fprintf (stderr, "No sequences to match.\n"); return -2;
  }
 }
 else {                     /* Directory path(s) provided, try them in order */
  current_path = directories;
  while ((*sequences == NULL) && (current_path != NULL)) {
   new_path = NTL0_ckalloc ((strlen (current_path -> string) +
                             strlen (input_file) + 2) * sizeof (char));
   strcpy (new_path, current_path -> string);
   if (input_file [0] != '/') strcat (new_path, "/");
   strcat (new_path, input_file);
   
   if ((report = NTL2_Load_Plain (new_path, sequences)) != NULL) {
    *sequences = NULL; current_path = current_path -> next;
   }
   free (new_path);
  }
  if (report != NULL) {                        /* File not found on any path */
   fprintf (stderr, "Can't open sequences file '%s' (%s).\n",
                    input_file, report -> message);
   return -3;
  }
  else if (*sequences == NULL) {        /* File found, but nothing retrieved */
   fprintf (stderr, "No sequences to match.\n"); return -4;
  }
 }
 /* If sequences are correctly obtained, get the TRANSFAC motifs             */

 for (current_file = motif_files; current_file != NULL;
      current_file = current_file -> next) {

  /* Load motifs from any file to temporary list first, then connect others  */
  
  temp_list = NULL;
  if ((directories == NULL) ||                 /* No directory path provided */
      ((current_file -> string) [0] == '/') ||
      ((current_file -> string) [0] == '~')) {
   if ((report = NTL2_Load_WinFile (current_file -> string,
                                    &temp_list)) != NULL) {
    fprintf (stderr, "Can't open Wingender database file '%s' (%s).\n",
                     current_file -> string, report -> message);
    return -5;
   }
   else if (temp_list == NULL) {         /* File found, but no motifs loaded */
    fprintf (stderr, "No motifs in '%s'.\n", current_file -> string);
    return -6;
   }
  }
  else {                    /* Directory path(s) provided, try them in order */
   current_path = directories;
   while ((temp_list == NULL) && (current_path != NULL)) {
    new_path = NTL0_ckalloc ((strlen (current_path -> string) +
                      strlen (current_file -> string) + 2) * sizeof (char));
    strcpy (new_path, current_path -> string);
    if ((current_file -> string) [0] != '/') strcat (new_path, "/");
    strcat (new_path, current_file -> string);
   
    if ((report = NTL2_Load_WinFile (new_path, &temp_list)) != NULL) {
     temp_list = NULL; current_path = current_path -> next;
    }
    free (new_path);
   }
   if (report != NULL) {                       /* File not found on any path */
    fprintf (stderr, "Can't open Wingender database file '%s' (%s).\n",
                     current_file -> string, report -> message);
    return -7;
   }
   else if (temp_list == NULL) {          /* File found, but no motifs in it */
    fprintf (stderr, "No motifs in '%s'.\n", current_file -> string);
    return -8;
   }
  }
  /* If loading motifs from the current file was successful, connect data    */

  if (*win_motifs == NULL) *win_motifs = temp_list;
  else last_motif -> next = temp_list;

  if (last_motif == NULL) last_motif = *win_motifs;
  while (last_motif -> next != NULL) last_motif = last_motif -> next;
 }
 return 0;
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: factra_expand_motifs                                           */
/*                                                                           */

int factra_expand_motifs (win_ptr win_motifs, tmotifs_ptr *motifs)
{
 tmotifs_ptr new_motif, last_motif; win_ptr current_win; int index;
 
 /* Get the ambiguity codes and bit strings for all meaningful letters       */
 
 NTL1_Predefined_Codes (&codes, &inverses);
 NTL1_Integer_Codes (codes, Bit_Codes);
 NTL1_Integer_Codes (inverses, Bit_Inverses);

 *motifs = NULL; last_motif = NULL;
 
 /* Now convert all TRANSFAC motif records into internal format for matching */

 while (win_motifs != NULL) {
  current_win = win_motifs; win_motifs = win_motifs -> next;
  
  new_motif = (tmotifs_ptr) NTL0_ckalloc (sizeof (Tmotifs_Struct));

  new_motif -> file = current_win -> file_name;
  new_motif -> name = current_win -> site_name;
  new_motif -> bind_factor = current_win -> bind_factor;
  
  if (current_win -> seq_length != strlen (current_win -> sequence)) {
   fprintf (stderr, "Incorrect sequence length for motif '%s' in file '%s'.\n",
                    current_win -> site_name, current_win -> file_name);
   return -11;
  }
  else new_motif -> length = current_win -> seq_length;

  new_motif -> text = current_win -> sequence;
  new_motif -> inverse = (char *)
                    NTL0_ckalloc (((new_motif -> length) + 1) * sizeof (char));
  for (index = 0; index < new_motif -> length; index++) {
   (new_motif -> inverse) [index] =
      factra_inverse ((new_motif -> text) [(new_motif -> length) - index - 1]);
  }
  (new_motif -> inverse) [new_motif -> length] = '\0';
  
  new_motif -> coded = (long int *)
                    NTL0_ckalloc ((new_motif -> length) * sizeof (long int));
  for (index = 0; index < new_motif -> length; index++) {
   (new_motif -> coded) [index] =
                   Bit_Codes [(int) ((new_motif -> text) [index]) - (int) 'A'];
  }
  new_motif -> inverted = (long int *)
                    NTL0_ckalloc ((new_motif -> length) * sizeof (long int));
  for (index = 0; index < new_motif -> length; index++) {
   (new_motif -> inverted) [index] =
               Bit_Inverses [(int) ((new_motif -> text)
                             [(new_motif -> length) - index - 1]) - (int) 'A'];
  }

  new_motif -> reference = current_win -> reference;
  new_motif -> next = NULL;

  free (current_win);

  /* Connect the expanded TRANSFAC entry to the list of known motifs         */

  if (*motifs == NULL) *motifs = new_motif;
  else last_motif -> next = new_motif;
  last_motif = new_motif;
 }
 return 0;
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: factra_comments                                                */
/*                                                                           */

void factra_comments (char *input_file, strlist_ptr motif_files,
                      float precision, int strand, bool gap)
{
 strlist_ptr data_scan;

 printf ("\n");        /* Separate output from whatever was on screen before */
 printf ("#:plain:\n\n");       /* Data ID, for utilities to process further */
 
 printf (
   "# TRANSFAC (Wingender) motif matching sites within sequences from: '%s'\n",
         input_file);
 printf ("# TRANSFAC data files used:  ");
 for (data_scan = motif_files;
      data_scan != NULL; data_scan = data_scan -> next) {
  printf ("%s", data_scan -> string);
  if (data_scan -> next == NULL) printf ("\n"); else printf (", ");
 }
 printf ("# Matching with precision %4.2f, ", precision);
 if (strand == LITERAL_MATCH) printf ("directly.\n");
 else if (strand == INVERSE_MATCH) printf ("in inverse complement.\n");
 else printf ("both strands.\n");
 if (gap) printf ("# No match in a sequence can contain a gap (if any).\n\n");
 else printf (
       "# Gaps (if any) in sequences are treated as any other character.\n\n");
 printf ("# Output structure:\n");
 printf (
  "#  from to seq_text motif_text site_name bind_fac strand WIN file ref\n\n");
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: factra_match                                                     */
/*                                                                           */

int factra_match (plain_ptr sequences, tmotifs_ptr motifs,
                  float precision, int strand, bool gap)
{
 plain_ptr current_seq; bool acgt_only; char *seq; long int *sequence_codes;
 int codes_length, seq_len, index, match_add, match_count;

 /* Initialize the buffer to hold integer codes of sequence letters          */
 
 sequence_codes = (long int *) NTL0_ckalloc (sizeof (long int));
 *sequence_codes = 0; codes_length = 1;

 match_count = 0;
 
 /* Loop for all given sequences to try to find matching sites in them       */

 for (current_seq = sequences;
      current_seq != NULL; current_seq = current_seq -> next) {
  if (current_seq -> text != NULL) {
   acgt_only = TRUE; seq = current_seq -> text; seq_len = strlen (seq);
   if (seq_len > codes_length) {
    sequence_codes = (long int *) NTL0_ckrealloc ((char *) sequence_codes,
                                                  seq_len * sizeof (long int));
    codes_length = seq_len;
   }
   for (index = 0; index < seq_len; index++) {
    if ((seq [index] == 'A') || (seq [index] == 'C') ||
        (seq [index] == 'G') || (seq [index] == 'T'))
     sequence_codes [index] = Bit_Codes [(int) (seq [index]) - (int) 'A'];
    else acgt_only = FALSE;
   }
   if ((match_add = factra_single_sequence (acgt_only, current_seq -> start,
                                            current_seq -> stop, seq,
                                            sequence_codes, motifs,
                                            precision, strand, gap)) < 0)
     return -21;
   else match_count += match_add;
  }
 }
 free (sequence_codes); return match_count;
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: factra_single_sequence                                         */
/*                                                                           */

int factra_single_sequence (bool acgt, long int start, long int stop,
                            char *seq_text, long int *sequence,
                            tmotifs_ptr motifs, float precision,
                            int strand, bool gap)
{
 int pos, match_ind, match_count; tmotifs_ptr current_motif;

 match_count = 0;
 
 /* Loop for each position in the received sequence - search matches there   */

 for (pos = 0; pos < stop - start + 1; pos++) {
  for (current_motif = motifs; current_motif != NULL;
       current_motif = current_motif -> next) {
   if (current_motif -> length <= stop - start - pos + 1) {
    if ((match_ind = factra_motif (acgt, start, pos, seq_text, sequence,
                                   current_motif, precision, strand, gap)) < 0)
     return -31;
    else if (match_ind) match_count++;
   }
  }
 }
 return match_count;
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: factra_motif                                                   */
/*                                                                           */

int factra_motif (bool acgt, long int start, int pos, char *seq_text,
                  long int *sequence, tmotifs_ptr motif, float precision,
                  int strand, bool gap)
{
 int literal_score, inverse_score, match_ind;
 float literal_value, inverse_value; char match_code;
 
 if ((strand == LITERAL_MATCH) || (strand == BOTH_MATCH)) {
  if (acgt) {
   if ((literal_score = factra_acgt_score (pos, sequence, motif -> length,
                                           motif -> coded)) < 0) return -41;
  }
  else if ((literal_score = factra_universal_score (pos, seq_text,
                                                    motif -> length,
                                                    motif -> text, gap)) < 0)
   return -42;
 }
 else literal_score = 0;
 
 if ((strand == INVERSE_MATCH) || (strand == BOTH_MATCH)) {
  if (acgt) {
   if ((inverse_score = factra_acgt_score (pos, sequence, motif -> length,
                                           motif -> inverted)) < 0) return -43;
  }
  else if ((inverse_score = factra_universal_score (pos, seq_text,
                                                    motif -> length,
                                                    motif -> inverse,
                                                    gap)) < 0)
   return -44;
 }
 else inverse_score = 0;

 literal_value = ((float) literal_score) / ((float) (motif -> length));
 inverse_value = ((float) inverse_score) / ((float) (motif -> length));

 if ((literal_value < precision - EPSILON) &&
     (inverse_value < precision - EPSILON)) match_code = MISMATCH_CODE;
 else if (inverse_value < precision - EPSILON) match_code = LITERAL_CODE;
 else if (literal_value < precision - EPSILON) match_code = INVERSE_CODE;
 else match_code = BOTH_STRAND_CODE;

 if (match_code != MISMATCH_CODE) {
  match_ind = 1;
  factra_output_line (start, pos, seq_text, motif, match_code);
 }
 else match_ind = 0;

 return match_ind;
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: factra_acgt_score                                              */
/*                                                                           */

int factra_acgt_score (int pos, long int *sequence,
                       int length, long int *motif)
{
 int score, index;
 
 score = 0;
 for (index = 0; index < length; index++) {
  if ((sequence [pos + index]) & (motif [index])) score++;
 }
 return score;
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: factra_universal_score                                         */
/*                                                                           */

int factra_universal_score (int pos, char *sequence,
                            int length, char *motif, bool gap)
{
 int score, index;
 
 score = 0;
 for (index = 0; index < length; index++) {
  if ((gap) && (sequence [pos + index] == GAP_SYMBOL)) return 0;
  else if (sequence [pos + index] == motif [index]) score++;
  else if (factra_subsumed (sequence [pos + index], motif [index])) score++;
 }
 return score;
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: factra_output_line                                             */
/*                                                                           */

void factra_output_line (long int start, int pos, char *text,
                         tmotifs_ptr motif, char code)
{
 int index; char *suffix;
 
 printf ("%ld %ld ", start + pos, start + pos + (motif -> length) - 1);
 for (index = pos; index < pos + (motif -> length); index++)
  printf ("%c", text [index]);
 printf (" %s ", motif -> text);
 if (motif -> name == NULL) printf ("null ");
 else printf ("\"%s\" ", motif -> name);
 if (motif -> bind_factor == NULL) printf ("null ");
 else printf ("\"%s\" ", motif -> bind_factor);
 printf ("%c WIN ", code);
 suffix = &((motif -> file) [strlen (motif -> file) - 1]);
 while ((suffix != motif -> file) && (*suffix != '/')) suffix--;
 if (suffix != motif -> file) suffix++;
 printf ("%s %s\n", suffix, motif -> reference);
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: factra_inverse                                                 */
/*                                                                           */

char factra_inverse (char symbol)
{
 if (symbol == 'A') return 'T';
 else if (symbol == 'C') return 'G';
 else if (symbol == 'G') return 'C';
 else if (symbol == 'T') return 'A';
 else if (symbol == 'W') return 'W';
 else if (symbol == 'Y') return 'R';
 else if (symbol == 'S') return 'S';
 else if (symbol == 'R') return 'Y';
 else if (symbol == 'M') return 'K';
 else if (symbol == 'K') return 'M';
 else if (symbol == 'B') return 'V';
 else if (symbol == 'D') return 'H';
 else if (symbol == 'H') return 'D';
 else if (symbol == 'V') return 'B';
 else if (symbol == 'N') return 'N';
 else if (symbol == 'X') return 'X';
 else if (symbol == GAP_SYMBOL) return GAP_SYMBOL;
 else return 'N';
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: factra_subsumed                                                */
/*                                                                           */

bool factra_subsumed (char minor, char major)
{
 if (major == minor) return TRUE;
 else if (minor == GAP_SYMBOL) return FALSE;
 else if ((major == 'N') || (major == 'X')) return TRUE;
 else if ((major == 'A') || (major == 'C') || (major == 'G') || (major == 'T'))
  return FALSE;
 else if ((minor == 'A') && ((major == 'W') ||
                             (major == 'R') ||
                             (major == 'M') ||
                             (major == 'D') ||
                             (major == 'H') ||
                             (major == 'V'))) return TRUE;
 else if ((minor == 'C') && ((major == 'Y') ||
                             (major == 'S') ||
                             (major == 'M') ||
                             (major == 'B') ||
                             (major == 'H') ||
                             (major == 'V'))) return TRUE;
 else if ((minor == 'G') && ((major == 'S') ||
                             (major == 'R') ||
                             (major == 'K') ||
                             (major == 'B') ||
                             (major == 'D') ||
                             (major == 'V'))) return TRUE;
 else if ((minor == 'T') && ((major == 'W') ||
                             (major == 'Y') ||
                             (major == 'K') ||
                             (major == 'B') ||
                             (major == 'D') ||
                             (major == 'H'))) return TRUE;
 else if ((major == 'B') && ((minor == 'K') ||
                             (minor == 'S') ||
                             (minor == 'Y'))) return TRUE;
 else if ((major == 'D') && ((minor == 'K') ||
                             (minor == 'R') ||
                             (minor == 'W'))) return TRUE;
 else if ((major == 'H') && ((minor == 'M') ||
                             (minor == 'W') ||
                             (minor == 'Y'))) return TRUE;
 else if ((major == 'V') && ((minor == 'M') ||
                             (minor == 'R') ||
                             (minor == 'S'))) return TRUE;
 else return FALSE;
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: factra_is_float                                                */
/*                                                                           */
/* Procedure determines whether the input string represents a floating point */
/*   (real) number, and places its value into a reference parameter; returns */
/*   TRUE if a correct number has been extracted, FALSE otherwise            */

bool factra_is_float (char *string, float *value)
{
 char *s; bool negative, fraction; int whole, frac, count; float real_frac;
 
 s = string; whole = 0; frac = 0; count = 0;
 if (*s == '-') { negative = TRUE; s++; if (*s == '\0') return FALSE; }
 else negative = FALSE;
 if (*s == '.') { fraction = TRUE; s++; if (*s == '\0') return FALSE; }
 else fraction = FALSE;

 if ((*s == '0') && (s [1] != '\0') && (s [1] != '.')) return FALSE;
 else {
  while ((*s >= '0') && (*s <= '9')) {
   if (!fraction) {
    whole = whole * 10 + (int) (*s - '0'); count++; s++;
    if (count > MAX_WHOLE) return FALSE;
   }
   else {
    frac = frac * 10 + (int) (*s - '0'); count++; s++;
    if (count > MAX_FRAC) return FALSE;
   }
  }
  if ((*s != '.') && (*s != '\0')) return FALSE;
  else if ((*s == '.') && (fraction)) return FALSE;
  else if (*s == '.') {
   fraction = TRUE; count = 0; s++; while ((*s >= '0') && (*s <= '9')) {
    frac = frac * 10 + (int) (*s - '0'); count++; s++;
    if (count > MAX_FRAC) return FALSE;
   }
   if (*s != '\0') return FALSE;
  }
  *value = (float) whole; real_frac = (float) frac;
  if (fraction) {
   while (count > 0) { real_frac = real_frac / 10; count--; }
  }
  *value += real_frac; if (negative) *value = -(*value);
  return TRUE;
 }
}


