
static char const rcsid [] = "$Id: agree.c,v 1.1 1998/04/21 10:27:17 stojanov Exp $";


/*****************************************************************************/
/*                                                                           */
/* Program: agree (utility for locating regions of good agreement in algns.) */
/*                                                                           */
/* Author: Nikola Stojanovic                                                 */
/*                                                                           */
/* Revision:    22 JUL 97   Version 1.0                                      */
/*                                                                           */
/*   Given the alignment (file and the sequences) this program determines    */
/* ...                                                                       */
/*                                                                           */
/*****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "ntl.h"

/*****************************************************************************/
/*                                                                           */
/* Definitions section                                                       */
/*                                                                           */
/*****************************************************************************/


/*****************************************************************************/
/* Definitions of the constants of the program unit                          */
/*****************************************************************************/



#define UNDEFINED                        -31847

#define DEFAULT_PATH                     "."

#define DEFAULT_RANGE_SEQUENCE           "human"
#define DEFAULT_SEQUENCE                 NULL
#define DEFAULT_RANGE_START              0
#define DEFAULT_RANGE_STOP               0

#define DEFAULT_COUNTER_NUMBER           1

#define MAX_LONG_LEN                     9
#define MININT_VALUE                     -32000
#define MAXINT_VALUE                     32000

#define PERCENTAGE_AGREEMENT             1
#define EXCLUSION_AGREEMENT              2
#define DEFAULT_AGREEMENT                PERCENTAGE_AGREEMENT

#define DEFAULT_PERCENTAGE               75
#define DEFAULT_EXCLUSION                UNDEFINED

#define DEFAULT_MINIMAL_LENGTH           6
#define DEFAULT_MINIMAL_ACTIVE           3
#define DEFAULT_GAP_TOLERANCE            FALSE

#define INITIAL_CONS_LEN                 66


/*****************************************************************************/
/* Types used in the program unit                                            */
/*****************************************************************************/



/*****************************************************************************/
/* Prototypes of locally used functions of the program unit                  */
/*****************************************************************************/


int agree_load_data (strlist_ptr paths, char *file_name, char *sequence,
                     long int start, long int stop, restricted_ptr *alignment,
                     long int *range_len, char **counter_seq, int *counter,
                     long int *track);
void agree_output_header (char *file_name, char *sequence, long int start,
                          long int stop, char *counter_seq, int counter,
                          int agreement, int percentage, int exclusion,
                          int length, int active, bool gaps);
int agree_process_data (long int range_len, char *counter_seq, long int track,
                        restricted_ptr alignment, int agreement,
                        int percentage, int exclusion, int length, int active,
                        bool gaps);
int agree_next_region (long int range_len, char *counter_seq,
                       restricted_ptr alignment, int agreement,
                       int percentage, int exclusion, int active, bool gaps,
                       long int *index, long int *track_report,
                       long int *start, long int *stop, long int *low,
                       long int *high, char **consensus, bool *accepted);
int agree_accept_column (restricted_ptr alignment, int index, int agreement,
                         int percentage, int exclusion, int active, bool gaps,
                         char *ch, bool *accepted);
bool agree_is_long (char *string, long int *value);
bool agree_is_int (char *string, int *value);

/*****************************************************************************/
/*                                                                           */
/* Code section                                                              */
/*                                                                           */
/*****************************************************************************/


/*****************************************************************************/
/*                                                                           */
/* Procedure: main                                                           */
/*                                                                           */
/* "main" procedure of the program. Receives and analyses the command line   */
/*   parameters, sets the control variables of the program, checks their     */
/*   consistency and passes control to internal procedures which actually    */
/*   process the input data; returns 0 if everything is OK, non-zero status  */
/*   in case of any errors                                                   */

int main (int argc, char **argv)
{
 char *file_name, *sequence, *counter_seq, *save_count;
 bool range_set, counter_locked, strictness_set, gaps;
 long int start, stop, range_len, track;
 strlist_ptr registered_paths, new_path, path_scan;
 int counter, agreement, percentage, exclusion, length, active, arg_count;
 restricted_ptr alignment;

 /* Initialize the settings and prepare for the processing of the arguments  */
 
 file_name = NULL;
 range_set = FALSE; start = stop = UNDEFINED; sequence = NULL;
 if (DEFAULT_PATH == NULL) registered_paths = NULL;
 else {
  registered_paths = (strlist_ptr) NTL0_ckalloc (sizeof (StrList_Struct));
  registered_paths -> string = DEFAULT_PATH;
  registered_paths -> next = NULL;
 }
 counter_locked = FALSE; counter_seq = NULL; counter = UNDEFINED;
 agreement = UNDEFINED; percentage = UNDEFINED; exclusion = UNDEFINED;
 length = UNDEFINED; active = UNDEFINED;
 strictness_set = FALSE; gaps = FALSE;
 
 if (argc < 2) {
  fprintf (stderr, "usage: %s <alignment_file>\n", argv [0]);
  fprintf (stderr, "             [-I <directory_path>]*\n");
  fprintf (stderr, "             [-r <from> <to> [-s <sequence>]]\n");
  fprintf (stderr, "             [-c <counter_sequence> | -n <seq_number>]\n");
  fprintf (stderr, "             [-p <percentage> | -e <exclusion>]\n");
  fprintf (stderr, "             [-l <min_length_of_conserved_region]\n");
  fprintf (stderr, "             [-a <min_active_sequences>]\n");
  fprintf (stderr, "             [-g | -x]\n");
  exit (1);
 }
 else {                           /* Some parameters provided - process them */
 
  /* Proceed to extract the command line parameters and create the settings  */

  arg_count = 1; while (arg_count < argc) {
  
   if (argv [arg_count] [0] != '-') {       /* Not an "-" option - file name */
    if (file_name != NULL) {
     fprintf (stderr, "Repeated alignment file name ('%s').\n",
                      argv [arg_count]);
     exit (1);
    }
    else { file_name = NTL0_strsave (argv [arg_count]); arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-I")) {        /* New "include" path */
    arg_count++;
    if (arg_count == argc) {
     fprintf (stderr, "Missing directory path for inclusion.\n"); exit (1);
    }
    else {
     new_path = (strlist_ptr) NTL0_ckalloc (sizeof (StrList_Struct));
     new_path -> string = NTL0_strsave (argv [arg_count]);
     new_path -> next = NULL;
     
     if (registered_paths == NULL) registered_paths = new_path;
     else {
      path_scan = registered_paths;
      while (path_scan -> next != NULL) path_scan = path_scan -> next;
      path_scan -> next = new_path;
     }
     arg_count++;
    }
   }
   else if (!strcmp (argv [arg_count], "-r")) {             /* Range request */
    if (range_set) {
     fprintf (stderr,
              "Range for alignment loading already set to [%ld-%ld].\n",
              start, stop);
     exit (1);
    }
    else if (arg_count > argc - 3) {
     fprintf (stderr, "Incomplete range specification.\n"); exit (1);
    }
    else if ((!agree_is_long (argv [arg_count + 1], &start)) ||
             (!agree_is_long (argv [arg_count + 2], &stop))) {
     fprintf (stderr, "Illegal specification for the range.\n"); exit (1);
    }
    else if ((start < 0) || (stop < 0)) {
     fprintf (stderr, "Negative values not permitted for the range.\n");
     exit (1);
    }
    else if (start > stop) {
     fprintf (stderr, "Range start (%ld) greater than range end (%ld).\n",
                      start, stop);
     exit (1);
    }
    else { range_set = TRUE; arg_count += 3; }
   }
   else if (!strcmp (argv [arg_count], "-s")) {    /* Range sequence specif. */
    arg_count++;
    if (sequence != NULL) {
     fprintf (stderr, "Sequence for alignment range already set.\n");
     exit (1);
    }
    else { sequence = NTL0_strsave (argv [arg_count]); arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-c")) {     /* Counter sequence name */
    arg_count++;
    if (counter_locked) {
     fprintf (stderr, "Counter sequence in alignment already set.\n");
     exit (1);
    }
    else {
     counter_seq = NTL0_strsave (argv [arg_count]); counter_locked = TRUE;
     arg_count++;
    }
   }
   else if (!strcmp (argv [arg_count], "-n")) {   /* Counter sequence number */
    if (counter_locked) {
     fprintf (stderr, "Counter sequence in alignment already set.\n");
     exit (1);
    }
    else if (arg_count > argc - 2) {
     fprintf (stderr, "Incomplete sequence number specification.\n"); exit (1);
    }
    else if (!agree_is_int (argv [arg_count + 1], &counter)) {
     fprintf (stderr, "Illegal sequence number (%s).\n", argv [arg_count + 1]);
     exit (1);
    }
    else if (counter < 0) {
     fprintf (stderr, "Negative values not permitted for sequence number.\n");
     exit (1);
    }
    else { counter_locked = TRUE; arg_count += 2; }
   }
   else if (!strcmp (argv [arg_count], "-p")) { /* Percentage agreement req. */
    if (agreement != UNDEFINED) {
     fprintf (stderr, "Kind of the agreement already set.\n"); exit (1);
    }
    else if (arg_count > argc - 2) {
     fprintf (stderr, "Incomplete agreement kind specification.\n"); exit (1);
    }
    else if (!agree_is_int (argv [arg_count + 1], &percentage)) {
     fprintf (stderr, "Illegal percentage agreement (%s).\n",
                      argv [arg_count + 1]);
     exit (1);
    }
    else if ((percentage < 1) || (percentage > 100)) {
     fprintf (stderr, "Illegal percentage agreement (%d).\n", percentage);
     exit (1);
    }
    else { agreement = PERCENTAGE_AGREEMENT; arg_count += 2; }
   }
   else if (!strcmp (argv [arg_count], "-e")) {  /* Exclusion agreement req. */
    if (agreement != UNDEFINED) {
     fprintf (stderr, "Kind of the agreement already set.\n");
     exit (1);
    }
    else if (arg_count > argc - 2) {
     fprintf (stderr, "Incomplete agreement kind specification.\n"); exit (1);
    }
    else if (!agree_is_int (argv [arg_count + 1], &exclusion)) {
     fprintf (stderr, "Illegal exclusion agreement (%s).\n",
                      argv [arg_count + 1]);
     exit (1);
    }
    else if (exclusion < 0) {
     fprintf (stderr, "Negative number of exclusion sequences (%d).\n",
                      exclusion);
     exit (1);
    }
    else { agreement = EXCLUSION_AGREEMENT; arg_count += 2; }
   }
   else if (!strcmp (argv [arg_count], "-l")) {     /* Region length request */
    if (length != UNDEFINED) {
     fprintf (stderr,
              "Minimal length of conserved region already set to %d.\n",
              length);
     exit (1);
    }
    else if (arg_count > argc - 2) {
     fprintf (stderr, "Incomplete length specification.\n"); exit (1);
    }
    else if (!agree_is_int (argv [arg_count + 1], &length)) {
     fprintf (stderr, "Illegal minimal region length (%s).\n",
                      argv [arg_count + 1]);
     exit (1);
    }
    else if (length <= 0) {
     fprintf (stderr, "Positive length required for conserved regions.");
     exit (1);
    }
    else arg_count += 2;
   }
   else if (!strcmp (argv [arg_count], "-a")) {  /* Active sequences request */
    if (active != UNDEFINED) {
     fprintf (stderr,
              "Minimal number of active sequences already set to %d.\n",
              active);
     exit (1);
    }
    else if (arg_count > argc - 2) {
     fprintf (stderr, "Incomplete active sequences specification.\n");
     exit (1);
    }
    else if (!agree_is_int (argv [arg_count + 1], &active)) {
     fprintf (stderr, "Illegal minimal number of active sequences (%s).\n",
                      argv [arg_count + 1]);
     exit (1);
    }
    else if (active <= 0) {
     fprintf (stderr, "Positive number of active sequences required.");
     exit (1);
    }
    else arg_count += 2;
   }
   else if (!strcmp (argv [arg_count], "-g")) {         /* Tolerate the gaps */
    if (strictness_set) {
     fprintf (stderr, "Kind of requested regions already set.\n"); exit (1);
    }
    else { gaps = TRUE; strictness_set = TRUE; arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-x")) {        /* Gaps not permitted */
    if (strictness_set) {
     fprintf (stderr, "Kind of requested regions already set.\n"); exit (1);
    }
    else { gaps = FALSE; strictness_set = TRUE; arg_count++; }
   }
   else {                                              /* Unknown "-" option */
    fprintf (stderr, "usage: %s <alignment_file>\n", argv [0]);
    fprintf (stderr, "             [-I <directory_path>]*\n");
    fprintf (stderr, "             [-r <from> <to> [-s <sequence>]]\n");
    fprintf (stderr,
                  "             [-c <counter_sequence> | -n <seq_number>]\n");
    fprintf (stderr, "             [-p <percentage> | -e <exclusion>]\n");
    fprintf (stderr, "             [-l <min_length_of_conserved_region]\n");
    fprintf (stderr, "             [-a <min_active_sequences>]\n");
    fprintf (stderr, "             [-g | -x]\n");
    exit (1);
   }
  }
  /* Check whether all necessary data have been collected and consistent     */

  if (file_name == NULL) {
   fprintf (stderr, "Must have an alignment to process.\n"); exit (1);
  }
  else if ((range_set) && (sequence == NULL)) {
   sequence = NTL0_strsave (DEFAULT_RANGE_SEQUENCE);
  }
  else if (!range_set) {
   if (sequence != NULL) {
    fprintf (stderr, "Sequence '%s' provided without range.\n", sequence);
    exit (1);
   }
   else {
    sequence = NTL0_strsave (DEFAULT_SEQUENCE);
    start = DEFAULT_RANGE_START; stop = DEFAULT_RANGE_STOP;
   }
  }

  if (!counter_locked) counter = DEFAULT_COUNTER_NUMBER;
  if (agreement == UNDEFINED) {
   agreement = DEFAULT_AGREEMENT;
   percentage = DEFAULT_PERCENTAGE; exclusion = DEFAULT_EXCLUSION;
  }
  if (length == UNDEFINED) length = DEFAULT_MINIMAL_LENGTH;
  if (active == UNDEFINED) active = DEFAULT_MINIMAL_ACTIVE;

  if (!strictness_set) gaps = DEFAULT_GAP_TOLERANCE;

  save_count = NTL0_strsave (counter_seq);
  
  /* Proceed to collect (load) the data to be processed by the algorithm     */

  if (agree_load_data (registered_paths, file_name, sequence, start, stop,
                       &alignment, &range_len, &counter_seq, &counter,
                       &track) < 0)
   exit (1);

  agree_output_header (file_name, sequence, start, stop, save_count, counter,
                       agreement, percentage, exclusion, length, active, gaps);
                       
  if (agree_process_data (range_len, counter_seq, track, alignment, agreement,
                          percentage, exclusion, length, active, gaps) < 0)
   exit (1);

  exit (0);
 }
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: agree_load_data                                                 */
/*                                                                           */
/* Procedure collects alignment data with respect to     */
/*   provided settings and returns them within its reference parameters;     */
/*   returns 0 if everything is OK, negative status otherwise                */

int agree_load_data (strlist_ptr paths, char *file_name, char *sequence,
                     long int start, long int stop, restricted_ptr *alignment,
                     long int *range_len, char **counter_seq, int *counter,
                     long int *track)
{
 strlist_ptr current_path; char *new_path; int index, count;
 header_ptr file_info; align_ptr packed; unpacked_ptr expanded;
 line_ptr line_al, last_line; errind err_stat; bool found;
 
 /* Load the alignment into an internal structure first                      */

 if ((paths == NULL) ||
     (file_name [0] == '/') || (file_name [0] == '~')) {
  if ((err_stat = NTL2_Load_AlignFile (file_name, &file_info)) != NULL) {
   fprintf (stderr, "Can't open alignment file '%s' (%s).\n",
                    file_name, err_stat -> message);
   return -1;
  }
  else if (file_info == NULL) {        /* File found, but nothing retrieved */
   fprintf (stderr, "No alignment to process.\n"); return -2;
  }
 }
 else {                     /* Directory path(s) provided, try them in order */
  current_path = paths; file_info = NULL;
  while ((file_info == NULL) && (current_path != NULL)) {
   new_path = NTL0_ckalloc ((strlen (current_path -> string) +
                             strlen (file_name) + 2) * sizeof (char));
   strcpy (new_path, current_path -> string);
   if (new_path [strlen (current_path -> string) - 1] != '/')
    strcat (new_path, "/");
   strcat (new_path, file_name);
   
   if ((err_stat = NTL2_Load_AlignFile (new_path, &file_info)) != NULL) {
    file_info = NULL; current_path = current_path -> next;
   }
   free (new_path);
  }
  if (err_stat != NULL) {                      /* File not found on any path */
   fprintf (stderr, "Can't open alignment file '%s' (%s).\n",
                    file_name, err_stat -> message);
   return -3;
  }
  else if (file_info == NULL) {         /* File found, but nothing retrieved */
   fprintf (stderr, "No alignment to process.\n"); return -4;
  }
 }
 /* Now proceed to partially unpack the information from the loaded file     */
 
 if ((err_stat = NTL1_Load_Alignment (1, file_info, &packed)) != NULL) {
  fprintf (stderr, "Can't extract the alignment from the file (%s).\n",
                   err_stat -> message);
  return -5;
 }
 else {
  if ((err_stat = NTL2_Unpack_Alignment (1, file_info, packed, paths,
                                         &expanded)) != NULL) {
   fprintf (stderr, "Can't expand the alignment (%s).\n", err_stat -> message);
   return -6;
  }
  else {   /* Expansion of the full alignment done OK, check for restriction */

   if (sequence != NULL) {
    if ((err_stat = NTL1_Cut_Alignment (file_info, expanded, sequence,
                                        start, stop)) != NULL) {
     if (err_stat -> kind == WARNING) {
      fprintf (stderr, "WARNING:  %s.\n", err_stat -> message);
     }
     else {
      fprintf (stderr, "Can't get the range from the alignment (%s).\n",
                       err_stat -> message);
      return -7;
     }
    }
   }
  }
 }
 /* Locate the sequence in the alignment that sets the counter for output    */

 if (*counter != UNDEFINED) {            /* Selection by the sequence number */
  if (*counter > expanded -> dimension) {
   fprintf (stderr,
            "Counter sequence (%d) not in the alignment (of dimension %d).\n",
            *counter, expanded -> dimension);
   return -8;
  }
  else if ((expanded -> segment_code) [(*counter) - 1] != VALID_SEGMENT) {
   fprintf (stderr, "Counter sequence (%d) not active in the range.\n",
                    *counter);
   return -9;
  }
  else {                         /* Existing and valid sequence in the range */
   *counter_seq = (expanded -> texts) [(*counter) - 1];
   *range_len = expanded -> size;
   *track = (expanded -> begin) [(*counter) - 1];
  }
 }
 else if (*counter_seq == NULL) {
  fprintf (stderr, "Must have a way to locate the counter sequence.\n");
  return -10;
 }
 else {                                /* Counter sequence given by its name */
  index = 0; found = FALSE;
  while ((!found) && (index < expanded -> dimension)) {
   if ((expanded -> segment_code) [index] == VALID_SEGMENT) {
    if (!strcmp (((file_info -> sequences) [index]).seq_name, *counter_seq))
     found = TRUE;
    else index++;
   }
   else index++;
  }
  if (!found) {
   fprintf (stderr, "Counter sequence '%s' not found in the selected range.\n",
                    *counter_seq);
   return -11;
  }
  index++; *counter = index; found = FALSE;
  while ((!found) && (index < expanded -> dimension)) {
   if ((expanded -> segment_code) [index] == VALID_SEGMENT) {
    if (!strcmp (((file_info -> sequences) [index]).seq_name, *counter_seq))
     found = TRUE;
    else index++;
   }
   else index++;
  }
  if (found) {
   fprintf (stderr, "Ambiguos counter sequence '%s' in the selected range.\n",
            *counter_seq);
   return -12;
  }
  else {                        /* Unique sequence found with the given name */
   free (*counter_seq);
   *counter_seq = (expanded -> texts) [(*counter) - 1];
   *range_len = expanded -> size;
   *track = (expanded -> begin) [(*counter) - 1];
  }
 }
 /* Now copy expanded alignment into more compact structure for processing   */
 
 *alignment = (restricted_ptr) NTL0_ckalloc (sizeof (Restricted_Struct));
 
 count = 0;
 for (index = 0; index < expanded -> dimension; index++) {
  if ((expanded -> segment_code) [index] == VALID_SEGMENT) count++;
 }
 (*alignment) -> dimension = count;
 (*alignment) -> size = expanded -> size;
 
 (*alignment) -> texts = (char **) NTL0_ckalloc (count * sizeof (char *));
 (*alignment) -> begin = (long int *)
                         NTL0_ckalloc (count * sizeof (long int *));
 (*alignment) -> end = (long int *)
                       NTL0_ckalloc (count * sizeof (long int *));
 (*alignment) -> starts = (long int *)
                          NTL0_ckalloc (count * sizeof (long int *));
 (*alignment) -> stops = (long int *)
                         NTL0_ckalloc (count * sizeof (long int *));

 count = 0;
 for (index = 0; index < expanded -> dimension; index++) {
  if ((expanded -> segment_code) [index] == VALID_SEGMENT) {
   ((*alignment) -> texts) [count] = (expanded -> texts) [index];
   ((*alignment) -> begin) [count] = (expanded -> begin) [index];
   ((*alignment) -> end) [count] = (expanded -> end) [index];
   ((*alignment) -> starts) [count] = (expanded -> starts) [index];
   ((*alignment) -> stops) [count] = (expanded -> stops) [index];
   count++;
  }
 }
 /* Destroy the "file_info" structure, since its contents are not needed     */

 /* ### To be defined ###################################################### */
    
 /* Destroy "partially unpacked" structure used in the expansion - done      */
    
 if (packed -> begin != NULL) free (packed -> begin);
 if (packed -> end != NULL) free (packed -> end);
    
 if (packed -> lines != NULL) {
  for (index = 0; index < expanded -> dimension; index++) {
   line_al = (packed -> lines) [index]; while (line_al != NULL) {
    last_line = line_al; line_al = line_al -> next; free (last_line);
   }
  }
 }
 free (packed);

 /* Destroy the expanded alignment info now when the restricted copy made    */
    
 /* ### To be defined ###################################################### */

 return 0;
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: agree_output_header                                            */
/*                                                                           */

void agree_output_header (char *file_name, char *sequence, long int start,
                          long int stop, char *counter_seq, int counter,
                          int agreement, int percentage, int exclusion,
                          int length, int active, bool gaps)
{
 printf ("#:plain:\n\n");
 printf ("# Output of the column agreement region location utility.\n");
 printf ("# Alignment file:  '%s'\n", file_name);
 if (sequence != NULL)
  printf ("#   restricted to the range [%ld,%ld] in \"%s\".\n",
          start, stop, sequence);
 if (counter_seq != NULL)
  printf ("# Counting done in '%s' (sequence %d in the alignment).\n",
          counter_seq, counter);
 else printf ("# Counting done in sequence %d of the alignment.\n", counter);
 if (agreement == PERCENTAGE_AGREEMENT) {
  if (percentage == 100)
   printf (
   "# All rows in a column must have the same character (full agreement).\n");
  else
   printf ("# Rows within a column must be in at least %d percent agreement.\n",
           percentage);
 }
 else {
  if (exclusion == 0)
   printf (
   "# All rows in a column must have the same character (full agreement).\n");
  else
   printf (
 "# At most %d rows in a column can have character different from majority.\n",
           exclusion);
 }
 printf ("# Minimal length of the region required:  %d\n", length);
 printf ("# Minimal number of active sequences required in regions:  %d\n",
         active);
 printf ("# Kind of regions requested:  ");
 if (gaps)
  printf ("general (no special treatment of gaps).\n\n\n");
 else
  printf ("strict (no gaps permitted within regions).\n\n\n");
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: agree_process_data                                             */
/*                                                                           */

int agree_process_data (long int range_len, char *counter_seq, long int track,
                        restricted_ptr alignment, int agreement,
                        int percentage, int exclusion, int length, int active,
                        bool gaps)
{
 long int lines_in_output, index, track_report, start, stop, low, high;
 char *consensus; bool accepted;
 
 lines_in_output = 0;
 
 if (alignment -> dimension < active) {
  fprintf (stderr,
           "WARNING: The number of active sequences within the region (%d)\n",
           alignment -> dimension);
  fprintf (stderr, "         is less than minimum required (%d).\n", active);
 }
 else if (range_len < length) {
  fprintf (stderr, "WARNING: The total length of the segment (%ld)\n",
           range_len);
  fprintf (stderr, "         is less than minimal length of a region (%d).\n",
           length);
 }
 else {
  index = 0; track_report = track; while (index < range_len - length + 1) {
   if (agree_next_region (range_len, counter_seq, alignment, agreement,
                          percentage, exclusion, active, gaps, &index,
                          &track_report, &start, &stop, &low, &high,
                          &consensus, &accepted) < 0) {
    fprintf (stderr, "General error locating a region.\n");
    return -101;
   }
   else {
    if (accepted) {
     if (stop - start + 1 >= length) {
      printf ("%ld %ld %s\n", low, high, consensus); lines_in_output++;
     }
     if (consensus != NULL) free (consensus);
    }
   }
  }
 }
 printf ("\n\n# Total number of regions detected:  %ld\n", lines_in_output);
 return 0;
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: agree_next_region                                              */
/*                                                                           */

int agree_next_region (long int range_len, char *counter_seq,
                       restricted_ptr alignment, int agreement,
                       int percentage, int exclusion, int active, bool gaps,
                       long int *index, long int *track_report,
                       long int *start, long int *stop, long int *low,
                       long int *high, char **consensus, bool *accepted)
{
 long int extension, cons_size; char ch;
 
 extension = 0; *stop = *start = *index; *high = *low = *track_report;
 cons_size = INITIAL_CONS_LEN;
 *consensus = (char *) NTL0_ckalloc (cons_size); (*consensus) [0] = '\0';

 do {
  if (agree_accept_column (alignment, *index, agreement, percentage,
                           exclusion, active, gaps, &ch, accepted) < 0)
   return -201;
  if (*accepted) {
   extension++; if (counter_seq [*index] != GAP_SYMBOL) (*high)++; (*stop)++;
   if (extension > cons_size - 2) {
    cons_size *= 2;
    *consensus = (char *) NTL0_ckrealloc (*consensus, cons_size);
   }
   (*consensus) [extension - 1] = ch; (*consensus) [extension] = '\0';
  }
  else {
   if (extension > 0) { if (*high > *low) (*high)--; (*stop)--; }
  }
  if (counter_seq [*index] != GAP_SYMBOL) (*track_report)++; (*index)++;
  
 } while ((*accepted) && (*index < range_len));
 
 if (*accepted) { if (*high > *low) (*high)--; (*stop)--; (*index)++; }
 if (extension > 0) *accepted = TRUE; else *accepted = FALSE;
 return 0;
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: agree_accept_column                                            */
/*                                                                           */

int agree_accept_column (restricted_ptr alignment, int index, int agreement,
                         int percentage, int exclusion, int active, bool gaps,
                         char *ch, bool *accepted)
{
 int total, na, nc, ng, nt, no, ngap, row, major; float ratio;

 total = na = nc = ng = nt = no = ngap = 0;

 for (row = 0; row < alignment -> dimension; row++) {
  if (((alignment -> starts) [row] <= index) &&
      ((alignment -> stops) [row] >= index)) {
   total++;
   if ((alignment -> texts) [row] [index] == GAP_SYMBOL) ngap++;
   else if ((alignment -> texts) [row] [index] == 'A') na++;
   else if ((alignment -> texts) [row] [index] == 'C') nc++;
   else if ((alignment -> texts) [row] [index] == 'G') ng++;
   else if ((alignment -> texts) [row] [index] == 'T') nt++;
   else no++;
  }
 }
 if (total < active) { *ch = '\0'; *accepted = FALSE; return 0; }
 else if ((!gaps) && (ngap > 0)) { *ch = '\0'; *accepted = FALSE; return 0; }
 else {
  major = na; *ch = 'A';
  if (nc > major) { major = nc; *ch = 'C'; }
  if (ng > major) { major = ng; *ch = 'G'; }
  if (nt > major) { major = nt; *ch = 'T'; }
  if (ngap > major) { major = ngap; *ch = GAP_SYMBOL; }
  if (no > major) { major = no; *ch = 'N'; }

  if (*ch == 'N') {
   fprintf (stderr,
    "WARNING: Cannot estimate agreement with ambiguity codes in majority.\n");
   *accepted = FALSE; return 0;
  }
  else {
   if (agreement == PERCENTAGE_AGREEMENT) {
    ratio = (((float) major) / ((float) total)) * 100.0;
    if ((int) ratio >= percentage) *accepted = TRUE; else *accepted = FALSE;
   }
   else {
    if (total - major <= exclusion) *accepted = TRUE; else *accepted = FALSE;
   }
   return 0;
  }
 }
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: agree_is_long                                                  */
/*                                                                           */

bool agree_is_long (char *string, long int *value)
{
 char *scan, *num_start, saver; int count; long int long_val; bool negative;
 
 scan = string; *value = 0;
 while ((*scan == ' ') || (*scan == '\t') || (*scan == '\n')) scan++;
 if (*scan == '\0') return FALSE;
 else {
  if (*scan == '-') {
   negative = TRUE; scan++;
   while ((*scan == ' ') || (*scan == '\t') || (*scan == '\n')) scan++;
  }
  else negative = FALSE;

  if (*scan == '\0') return FALSE;
  else if (*scan == '0') {
   if (negative) return FALSE;
   else {
    scan++;
    while ((*scan == ' ') || (*scan == '\t') || (*scan == '\n')) scan++;
    *value = 0;
    if (*scan == '\0') return TRUE; else return FALSE;
   }
  }
  else {
   count = 0; num_start = scan; while ((*scan >= '0') && (*scan <= '9')) {
    count++; scan++;
   }
   if ((count < 1) || (count > MAX_LONG_LEN)) return FALSE;
   else {
    if ((*scan != '\0') && (*scan != ' ') &&
        (*scan != '\t') && (*scan != '\n')) return FALSE;
    else {
     saver = *scan; *scan = '\0'; long_val = atol (num_start);
     *scan = saver;
     
     while ((*scan == ' ') || (*scan == '\t') || (*scan == '\n')) scan++;
     if (*scan != '\0') return FALSE;
     else {
      if (negative) *value = -long_val; else *value = long_val; return TRUE;
     }
    }
   }
  }
 }
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: agree_is_int                                                   */
/*                                                                           */

bool agree_is_int (char *string, int *value)
{
 long int long_val;

 if (!agree_is_long (string, &long_val)) { *value = 0; return FALSE; }
 else if ((long_val < MININT_VALUE) || (long_val > MAXINT_VALUE)) {
  *value = 0; return FALSE;
 }
 else { *value = (int) long_val; return TRUE; }
}


