
static char const rcsid [] = "$Id: kunk.c,v 1.3 1999/08/10 04:07:24 cathy Exp $";


/*****************************************************************************/
/*                                                                           */
/* Program: kunk (implementation of the k-mismatch algorithm)                */
/*               (formerly "kcon")                                           */
/*                                                                           */
/* Author: Nikola Stojanovic                                                 */
/*                                                                           */
/* Revision:    08 SEP 96   Version 1.0                                      */
/*              20 SEP 96   Version 2.0                                      */
/*              12 JAN 98   Version 3.0                                      */
/*                                                                           */
/*   Given an alignment (file and the sequences), program determines the     */
/* conserved regions within it, using the maximal k-region rule, and the     */
/* parameters provided in the command line                                   */
/*                                                                           */
/*****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "ntl.h"

/*****************************************************************************/
/*                                                                           */
/* Definitions section                                                       */
/*                                                                           */
/*****************************************************************************/


/*****************************************************************************/
/* Definitions of the constants of the program unit                          */
/*****************************************************************************/


#define ALPHABET_SIZE                    128

#define DEFAULT_REFERENCE                "human"

#define DEFAULT_PATH                     "."

#define DEFAULT_RANGE_ROW                NULL
#define DEFAULT_LOW                      0
#define DEFAULT_HIGH                     0

#define GENERAL                          0
#define STRICT                           1
#define WILDCARD                         2
#define ENFORCED                         3
#define PROMOTED                         4
#define PROHIBITED                       5
#define MASKED                           6

#define DEFAULT_MAX_MISMATCHES           1
#define DEFAULT_MIN_LENGTH               6
#define DEFAULT_MIN_ACTIVE               3
#define DEFAULT_STRICTNESS               STRICT

#define MAX_LONG_LEN                     9
#define MININT_VALUE                     -32000
#define MAXINT_VALUE                     32000


/*****************************************************************************/
/* Prototypes of locally used functions of the program unit                  */
/*****************************************************************************/


int kcon_load_data (strlist_ptr paths, char *alignment_file, char *range_row,
                    long int start, long int stop, restricted_ptr *alignment,
                    int *track_seq, long int *track);
void kcon_output_header (char *alignment_file, char *range_row, bool range_set,
                         long int start, long int stop, int max_mismatches,
                         int min_length, int min_active, int strictness);
int kcon_process_data (restricted_ptr alignment, int max_mismatches,
                       int min_length, int min_active, int strictness,
                       int track_seq, long int start_track);
int kcon_all_regions (restricted_ptr alignment, int max_mismatches,
                      int min_length, int strictness, long int count,
                      int track_seq, long int *track, long int *track_scan,
                      long int *reported);
void kcon_max_wildcard (restricted_ptr alignment, int max_mismatches,
                        long int count, long int pos, long int offset,
                        int *mis);
void kcon_max_enforced (restricted_ptr alignment, int max_mismatches,
                        long int count, long int pos, long int offset,
                        int *mis);
void kcon_max_prohibited (restricted_ptr alignment, int max_mismatches,
                          long int count, long int pos, long int offset,
                          int *mis);
void kcon_max_center (restricted_ptr alignment, int max_mismatches,
                      long int count, long int pos, long int offset, int *mis);
void kcon_report_region (restricted_ptr alignment, long int track_seq,
                         long int *track, long int *track_scan, int strictness,
                         long int left, long int right);
void kcon_bits (void);
bool kcon_subsumes (char set, char subset);
bool kcon_is_long (char *string, long int *value);
bool kcon_is_int (char *string, int *value);


/*****************************************************************************/
/* Global variables used in the program unit                                 */
/*****************************************************************************/


static long int *diverse;  /* Array keeping the positions of non-trivial columns */

static char *current;             /* Current center sequence for examination */
static char *best;       /* Best-so-far center examined for a current region */
static long int best_len;          /* The length of the best-so-far sequence */
static long int best_dist;  /* Distance between the center and the alignment */

static bool **apool;      /* Simulated stack of arrays to speed-up recursion */
static int **mpool;       /* Simulated stack of arrays to speed-up recursion */

static long int *bits;      /* Bit codes of alphabet characters, to speed up */

/*****************************************************************************/
/*                                                                           */
/* Code section                                                              */
/*                                                                           */
/*****************************************************************************/


/*****************************************************************************/
/*                                                                           */
/* Procedure: main                                                           */
/*                                                                           */
/* "main" procedure of the program. Receives and analyses the command line   */
/*   parameters, sets the control variables of the program, checks their     */
/*   consistency and passes control to internal procedures which actually    */
/*   process the input data; returns 0 if everything is OK, non-zero status  */
/*   in case of any errors                                                   */

int main (int argc, char **argv)
{
 int max_mismatches, min_length, min_active, strictness, arg_count, track_seq;
 char *alignment_file, *range_row; bool range_set, mismatches_set, length_set;
 bool active_set, strictness_set, range_row_set; long int start, stop, track;
 strlist_ptr registered_paths, new_path, path_scan; restricted_ptr alignment;

 /* Initialize the default settings and prepare for processing of arguments  */

 alignment_file = NULL; range_row_set = FALSE; range_row = DEFAULT_RANGE_ROW;
 range_set = FALSE; start = DEFAULT_LOW; stop = DEFAULT_HIGH;
 if (DEFAULT_PATH == NULL) registered_paths = NULL;
 else {
  registered_paths = (strlist_ptr) NTL0_ckalloc (sizeof (StrList_Struct));
  registered_paths -> string = DEFAULT_PATH;
  registered_paths -> next = NULL;
 }
 mismatches_set = FALSE; max_mismatches = DEFAULT_MAX_MISMATCHES;
 length_set = FALSE; min_length = DEFAULT_MIN_LENGTH;
 active_set = FALSE; min_active = DEFAULT_MIN_ACTIVE;
 strictness_set = FALSE; strictness = DEFAULT_STRICTNESS;

 if (argc < 2) {
  fprintf (stderr, "usage: %s <alignment_file>\n", argv [0]);
  fprintf (stderr, "            [-I <directory_path>]*\n");
  fprintf (stderr, "            [-r <from> <to>]\n");
  fprintf (stderr, "            [-p <pivot_sequence>]\n");
  fprintf (stderr, "            [-k <max_mismatches_per_sequence>]\n");
  fprintf (stderr, "            [-l <min_length_of_conserved_region>]\n");
  fprintf (stderr, "            [-a <min_active_sequences>]\n");
  fprintf (stderr, "            [-g | -s | -w | -f | -o | -c | -m]\n");
  exit (1);
 }
 else {                           /* Some parameters provided - process them */

  /* Proceed to extract the command line parameters and create the settings  */

  arg_count = 1; while (arg_count < argc) {

   if (argv [arg_count] [0] != '-') {       /* Not an "-" option - file name */
    if (alignment_file != NULL) {
     fprintf (stderr, "Repeated alignment file name ('%s').\n",
                      argv [arg_count]);
     exit (1);
    }
    else { alignment_file = NTL0_strsave (argv [arg_count]); arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-I")) {        /* New "include" path */
    arg_count++;
    if (arg_count == argc) {
     fprintf (stderr, "Missing directory path for inclusion.\n"); exit (1);
    }
    else {
     new_path = (strlist_ptr) NTL0_ckalloc (sizeof (StrList_Struct));
     new_path -> string = NTL0_strsave (argv [arg_count]);
     new_path -> next = NULL;

     if (registered_paths == NULL) registered_paths = new_path;
     else {
      path_scan = registered_paths;
      while (path_scan -> next != NULL) path_scan = path_scan -> next;
      path_scan -> next = new_path;
     }
     arg_count++;
    }
   }
   else if (!strcmp (argv [arg_count], "-r")) {             /* Range request */
    if (range_set) {
     fprintf (stderr,
              "Range for alignment loading already set to [%ld-%ld].\n",
              start, stop);
     exit (1);
    }
    else if (arg_count > argc - 3) {
     fprintf (stderr, "Incomplete range specification.\n"); exit (1);
    }
    else if ((!kcon_is_long (argv [arg_count + 1], &start)) ||
             (!kcon_is_long (argv [arg_count + 2], &stop))) {
     fprintf (stderr, "Illegal specification for the range.\n"); exit (1);
    }
    else if ((start < 0) || (stop < 0)) {
     fprintf (stderr, "Negative values not permitted for the range.\n");
     exit (1);
    }
    else if (start > stop) {
     fprintf (stderr, "Range start (%ld) greater than range end (%ld).\n",
                      start, stop);
     exit (1);
    }
    else { range_set = TRUE; arg_count += 3; }
   }
   else if (!strcmp (argv [arg_count], "-p")) {      /* Pivot sequence spec. */
    arg_count++;
    if (range_row_set) {
     fprintf (stderr, "Pivot sequence for alignment range already set.\n");
     exit (1);
    }
    else {
     range_row_set = TRUE; range_row = NTL0_strsave (argv [arg_count]);
     arg_count++;
    }
   }
   else if (!strcmp (argv [arg_count], "-k")) {  /* Maximal mismatch request */
    if (mismatches_set) {
     fprintf (stderr,
              "Maximal mismatch number per sequence already set to %d.\n",
              max_mismatches);
     exit (1);
    }
    else if (arg_count > argc - 2) {
     fprintf (stderr, "Incomplete mismatch specification.\n"); exit (1);
    }
    else if (!kcon_is_int (argv [arg_count + 1], &max_mismatches)) {
     fprintf (stderr, "Illegal number of permitted mismatches (%s).\n",
              argv [arg_count + 1]);
     exit (1);
    }
    else if (max_mismatches < 0) {
     fprintf (stderr, "Negative values not permitted for mismatches.\n");
     exit (1);
    }
    else { mismatches_set = TRUE; arg_count += 2; }
   }
   else if (!strcmp (argv [arg_count], "-l")) {     /* Region length request */
    if (length_set) {
     fprintf (stderr,
              "Minimal length of conserved region already set to %d.\n",
              min_length);
     exit (1);
    }
    else if (arg_count > argc - 2) {
     fprintf (stderr, "Incomplete length specification.\n"); exit (1);
    }
    else if (!kcon_is_int (argv [arg_count + 1], &min_length)) {
     fprintf (stderr, "Illegal minimal region length (%s).\n",
                      argv [arg_count + 1]);
     exit (1);
    }
    else if (min_length <= 0) {
     fprintf (stderr, "Positive length required for conserved regions.");
     exit (1);
    }
    else { length_set = TRUE; arg_count += 2; }
   }
   else if (!strcmp (argv [arg_count], "-a")) {  /* Active sequences request */
    if (active_set) {
     fprintf (stderr,
              "Minimal number of active sequences already set to %d.\n",
              min_active);
     exit (1);
    }
    else if (arg_count > argc - 2) {
     fprintf (stderr, "Incomplete active sequences specification.\n");
     exit (1);
    }
    else if (!kcon_is_int (argv [arg_count + 1], &min_active)) {
     fprintf (stderr, "Illegal minimal number of active sequences (%s).\n",
                      argv [arg_count + 1]);
     exit (1);
    }
    else if (min_active <= 0) {
     fprintf (stderr, "Positive number of active sequences required.");
     exit (1);
    }
    else { active_set = TRUE; arg_count += 2; }
   }
   else if (!strcmp (argv [arg_count], "-g")) {    /* Search general regions */
    if (strictness_set) {
     fprintf (stderr, "Kind of requested regions already set.\n"); exit (1);
    }
    else { strictness = GENERAL; strictness_set = TRUE; arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-s")) {     /* Search strict regions */
    if (strictness_set) {
     fprintf (stderr, "Kind of requested regions already set.\n"); exit (1);
    }
    else {
     strictness = STRICT; strictness_set = TRUE; arg_count++;
    }
   }
   else if (!strcmp (argv [arg_count], "-w")) {   /* Search wildcard regions */
    if (strictness_set) {
     fprintf (stderr, "Kind of requested regions already set.\n"); exit (1);
    }
    else { strictness = WILDCARD; strictness_set = TRUE; arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-f")) {   /* Search enforced regions */
    if (strictness_set) {
     fprintf (stderr, "Kind of requested regions already set.\n"); exit (1);
    }
    else { strictness = ENFORCED; strictness_set = TRUE; arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-o")) {   /* Search promoted regions */
    if (strictness_set) {
     fprintf (stderr, "Kind of requested regions already set.\n"); exit (1);
    }
    else { strictness = PROMOTED; strictness_set = TRUE; arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-c")) { /* Search prohibited regions */
    if (strictness_set) {
     fprintf (stderr, "Kind of requested regions already set.\n"); exit (1);
    }
    else { strictness = PROHIBITED; strictness_set = TRUE; arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-m")) {     /* Search masked regions */
    if (strictness_set) {
     fprintf (stderr, "Kind of requested regions already set.\n"); exit (1);
    }
    else { strictness = MASKED; strictness_set = TRUE; arg_count++; }
   }
   else {                                              /* Unknown "-" option */
    fprintf (stderr, "Illegal command line option (%s).\n", argv [arg_count]);
    fprintf (stderr, "usage: %s <alignment_file>\n", argv [0]);
    fprintf (stderr, "            [-I <directory_path>]*\n");
    fprintf (stderr, "            [-r <from> <to>]\n");
    fprintf (stderr, "            [-p <pivot_sequence>]\n");
    fprintf (stderr, "            [-k <max_mismatches_per_sequence>]\n");
    fprintf (stderr, "            [-l <min_length_of_conserved_region>]\n");
    fprintf (stderr, "            [-a <min_active_sequences>]\n");
    fprintf (stderr, "            [-g | -s | -w | -f | -o | -c | -m]\n");
    exit (1);
   }
  }
  /* Check whether all necessary data have been collected and consistent     */

  if (alignment_file == NULL) {
   fprintf (stderr, "Must have an alignment to process.\n"); exit (1);
  }
  else if ((range_set) && (range_row == NULL))
   range_row = DEFAULT_REFERENCE;
  else if ((range_row_set) && (!range_set)) {
   fprintf (stderr, "Must have range in requested '%s' in alignment.\n",
                    range_row);
   exit (1);
  }
  /* Proceed to collect (load) the data to be processed by the algorithm     */

  if (kcon_load_data (registered_paths, alignment_file, range_row, start,
                      stop, &alignment, &track_seq, &track) < 0)
   exit (1);

  /* Apply the backtracking k-mismatch algorithm to the data, output results */

  kcon_output_header (alignment_file, range_row, range_set, start, stop,
                      max_mismatches, min_length, min_active, strictness);
  if (kcon_process_data (alignment, max_mismatches, min_length, min_active,
                         strictness, track_seq, track) < 0)
   exit (1);

  exit (0);
 }
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: kcon_load_data                                                 */
/*                                                                           */
/* Procedure collects alignment data with respect to the provided settings   */
/*   and returns the information within its reference parameters; returns 0  */
/*   if everything is OK, negative status otherwise                          */

int kcon_load_data (strlist_ptr paths, char *alignment_file, char *range_row,
                    long int start, long int stop, restricted_ptr *alignment,
                    int *track_seq, long int *track)
{
 bool found, duplicate; int index, count;
 char *new_path, *track_row; strlist_ptr current_path;
 header_ptr file_info; align_ptr packed; unpacked_ptr expanded;
 line_ptr line_al, last_line; errind err_stat;

 /* Load the alignment into an internal structure first                      */

 if ((paths == NULL) ||
     (alignment_file [0] == '/') || (alignment_file [0] == '~')) {
  if ((err_stat = NTL2_Load_AlignFile (alignment_file, &file_info)) != NULL) {
   fprintf (stderr, "Can't open alignment file '%s' (%s).\n",
                    alignment_file, err_stat -> message);
   return -1;
  }
  else if (file_info == NULL) {        /* File found, but nothing retrieved */
   fprintf (stderr, "No alignment to process.\n"); return -2;
  }
 }
 else {                     /* Directory path(s) provided, try them in order */
  current_path = paths; file_info = NULL;
  while ((file_info == NULL) && (current_path != NULL)) {
   new_path = NTL0_ckalloc ((strlen (current_path -> string) +
                             strlen (alignment_file) + 2) * sizeof (char));
   strcpy (new_path, current_path -> string);
   if (alignment_file [0] != '/') strcat (new_path, "/");
   strcat (new_path, alignment_file);

   if ((err_stat = NTL2_Load_AlignFile (new_path, &file_info)) != NULL) {
    file_info = NULL; current_path = current_path -> next;
   }
   free (new_path);
  }
  if (err_stat != NULL) {                      /* File not found on any path */
   fprintf (stderr, "Can't open alignment file '%s' (%s).\n",
                    alignment_file, err_stat -> message);
   return -3;
  }
  else if (file_info == NULL) {         /* File found, but nothing retrieved */
   fprintf (stderr, "No alignment to process.\n"); return -4;
  }
 }
 /* Now proceed to partially unpack the information from the loaded file     */

 if ((err_stat = NTL1_Load_Alignment (1, file_info, &packed)) != NULL) {
  fprintf (stderr, "Can't extract the alignment from the file (%s).\n",
                   err_stat -> message);
  return -5;
 }
 else {              /* Row file information collected, handle the alignment */

  if ((err_stat = NTL2_Unpack_Alignment (1, file_info, packed, paths,
                                         &expanded)) != NULL) {
   fprintf (stderr, "Can't expand the alignment (%s).\n", err_stat -> message);
   return -6;
  }
  else {   /* Expansion of the full alignment done OK, check for restriction */

   if (range_row != NULL) {
    if ((err_stat = NTL1_Cut_Alignment (file_info, expanded, range_row,
                                        start, stop)) != NULL) {
     if (err_stat -> kind == WARNING) {
      fprintf (stderr, "WARNING:  %s.\n", err_stat -> message);
     }
     else {
      fprintf (stderr, "Can't get the range from the alignment (%s).\n",
                       err_stat -> message);
      return -7;
     }
    }
   }
  }
 }
 /* Determine the "tracking" sequence and the starting position for counting */

 if (range_row == NULL) track_row = DEFAULT_REFERENCE;
 else track_row = range_row;

 found = FALSE; duplicate = FALSE;
 for (index = 0; index < expanded -> dimension; index++) {
  if ((!(strcmp (((file_info -> sequences) [index]).seq_name, track_row))) &&
      ((expanded -> segment_code) [index] == VALID_SEGMENT)) {
   if (range_row == NULL) {
    if ((found) && (!duplicate)) {
     fprintf (stderr,
     "WARNING: Ambiguous counter sequence '%s' - using the first occurence.\n",
              track_row);
     duplicate = TRUE;
    }
    else if (!duplicate) {
     *track_seq = index; *track = (expanded -> begin) [index]; found = TRUE;
    }
   }
   else if (((expanded -> begin) [index] >= start) &&
            ((expanded -> end) [index] <= stop)) {
    if (found) {
     fprintf (stderr, "Ambiguos counter sequence '%s'.\n", track_row);
     return -8;
    }
    else {
     *track_seq = index; *track = (expanded -> begin) [index]; found = TRUE;
    }
   }
  }
 }
 if (!found) {
  fprintf (stderr, "Can't find counter sequence '%s'.\n", track_row);
  return -9;
 }
 /* Now copy expanded alignment into more compact structure for processing   */

 *alignment = (restricted_ptr) NTL0_ckalloc (sizeof (Restricted_Struct));

 count = 0; for (index = 0; index < expanded -> dimension; index++) {
  if ((expanded -> segment_code) [index] == VALID_SEGMENT) count++;
 }
 (*alignment) -> dimension = count; (*alignment) -> size = expanded -> size;

 (*alignment) -> texts = (char **) NTL0_ckalloc (count * sizeof (char *));
 (*alignment) -> begin = (long int *)
                         NTL0_ckalloc (count * sizeof (long int *));
 (*alignment) -> end = (long int *)
                       NTL0_ckalloc (count * sizeof (long int *));
 (*alignment) -> starts = (long int *)
                          NTL0_ckalloc (count * sizeof (long int *));
 (*alignment) -> stops = (long int *)
                         NTL0_ckalloc (count * sizeof (long int *));

 count = 0;
 for (index = 0; index < expanded -> dimension; index++) {
  if ((expanded -> segment_code) [index] == VALID_SEGMENT) {
   ((*alignment) -> texts) [count] = (expanded -> texts) [index];
   ((*alignment) -> begin) [count] = (expanded -> begin) [index];
   ((*alignment) -> end) [count] = (expanded -> end) [index];
   ((*alignment) -> starts) [count] = (expanded -> starts) [index];
   ((*alignment) -> stops) [count] = (expanded -> stops) [index];
   if (index == *track_seq) *track_seq = count;
   count++;
  }
 }
 /* Destroy the "file_info" structure, since its contents are not needed     */

 /* ### To be defined ###################################################### */

 /* Destroy "partially unpacked" structure used in the expansion - done      */

 if (packed -> begin != NULL) free (packed -> begin);
 if (packed -> end != NULL) free (packed -> end);

 if (packed -> lines != NULL) {
  for (index = 0; index < expanded -> dimension; index++) {
   line_al = (packed -> lines) [index]; while (line_al != NULL) {
    last_line = line_al; line_al = line_al -> next; free (last_line);
   }
  }
 }
 free (packed);

 /* Destroy the expanded alignment info now when the restricted copy made    */

 /* ### To be defined ###################################################### */

 /* Set up the bit codes of the alphabet characters, to speed up comparisons */

 kcon_bits ();

 return 0;
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: kcon_output_header                                             */
/*                                                                           */
/* Procedure writes a number of comment lines in the output - output type    */
/*   identification and data about the settings under which it was           */
/*   generated, alignment file name and requested range, counter data,       */
/*   maximal number of mismatches per sequence permitted, minimal number of  */
/*   active sequences required and the kind of requested regions             */

void kcon_output_header (char *alignment_file, char *range_row, bool range_set,
                         long int start, long int stop, int max_mismatches,
                         int min_length, int min_active, int strictness)
{
 printf ("#:plain:\n\n");
 printf ("# Output of the maximal k-regions location utility.\n");
 printf ("# Alignment file:  '%s'\n", alignment_file);
 if (range_set) {
  printf ("#   restricted to the range [%ld,%ld] in \"%s\".\n",
          start, stop, range_row);
  printf ("# Counting done in the located sequence \"%s\".\n", range_row);
 }
 else printf ("# Counting done in the first occurence of sequence:  \"%s\".\n",
              DEFAULT_REFERENCE);
 printf (
     "# Maximal number of mismatches (per row) with center permitted:  %d\n",
         max_mismatches);
 printf ("# Minimal length of the region required:  %d\n", min_length);
 printf ("# Minimal number of active sequences required in regions:  %d\n",
         min_active);
 printf ("# Kind of regions requested:  ");
 switch (strictness) {
  case GENERAL: {
   printf ("general (no special treatments of gaps).\n\n"); break;
  }
  case STRICT: {
   printf ("strict (no gaps permitted within regions).\n\n"); break;
  }
  case WILDCARD: {
   printf (
     "wildcard (gaps matching other characters, but can't be centers).\n\n");
   break;
  }
  case ENFORCED: {
   printf ("gaps enforced to be in the center of a containing column.\n\n");
   break;
  }
  case PROMOTED: {
   printf (
         "gaps masking the center character of gap-containing columns.\n\n");
   break;
  }
  case PROHIBITED: {
   printf ("gaps prohibited from being part of center sequence.\n\n"); break;
  }
  case MASKED: {
   printf ("gaps occuring in the center masked by another character.\n\n");
   break;
  }
  default: {
   fprintf (stderr, "PROBLEM: Unknown strictness request (%d).\n", strictness);
   exit (1);
  }
 }
 printf ("# Output format:   start  stop  center_sequence\n\n\n");
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: kcon_process_data                                              */
/*                                                                           */
/* Procedure does the preliminary separation of legal alignment segments     */
/*   (with respect to the settings) and non-trivial columns, before passing  */
/*   control to determining the k-regions, iterating for each legal part;    */
/*   returns 0 if everything is OK, negative status otherwise                */

int kcon_process_data (restricted_ptr alignment, int max_mismatches,
                       int min_length, int min_active, int strictness,
                       int track_seq, long int start_track)
{
 bool legal, trivial, gap; char pivot; int active, col;
 long int track, track_scan, reported, index, count, scan;

 /* Initialize the tracking position and the number of reported regions      */

 track = start_track; track_scan = 0; reported = 0;

 /* Allocate space to hold the non-trivial columns of current part of algn.  */

 diverse = (long int *)
       NTL0_ckalloc (((alignment -> size) + 3) * sizeof (long int));

 /* Allocate initial space for global arrays used by the k-mismatch algor.   */

 current = (char *)
           NTL0_ckalloc ((((alignment -> dimension) * max_mismatches) + 1) *
                         sizeof (char));
 best = (char *)
        NTL0_ckalloc ((((alignment -> dimension) * max_mismatches) + 1) *
                      sizeof (char));

 /* Reserve the space for stack simulation for local arrays used by the      */
 /*   algorithm, in order to speed up the execution of recursive routine     */

 apool = (bool **)
         NTL0_ckalloc ((((alignment -> dimension) * max_mismatches) + 1) *
                       sizeof (bool *));
 mpool = (int **)
         NTL0_ckalloc ((((alignment -> dimension) * max_mismatches) + 2) *
                       sizeof (int *));

 for (index = 0; index <= (alignment -> dimension) * max_mismatches; index++) {
  apool [index] = (bool *)
                  NTL0_ckalloc ((alignment -> dimension) * sizeof (bool));
  mpool [index] = (int *)
                  NTL0_ckalloc ((alignment -> dimension) * sizeof (int));
 }
 mpool [(alignment -> dimension) * max_mismatches + 1] =
                (int *) NTL0_ckalloc ((alignment -> dimension) * sizeof (int));

 /* Process the alignment in parts that satisfy the constraints - minimal    */
 /*   number of active sequences, gaps or non-trivial columns as separators  */

 index = 0; while (index < alignment -> size) {  /* Scan the whole alignment */

  /* Skip positions which cannot be a part of a k-region, under conditions   */

  do {
   active = 0; pivot = '\0'; legal = TRUE; trivial = TRUE; gap = FALSE;

   /* Examine the next full column of the alignment for conditions and type  */

   col = 0; while ((legal) && (col < alignment -> dimension)) {

    if (((alignment -> starts) [col] <= index) &&
        ((alignment -> stops) [col] >= index)) {     /* If the row is active */
     active++;
     if (strictness == WILDCARD) {
      if ((alignment -> texts) [col] [index] != GAP_SYMBOL) {
       if (pivot == '\0') pivot = (alignment -> texts) [col] [index];
       else if (pivot != (alignment -> texts) [col] [index]) trivial = FALSE;
      }
      else gap = TRUE;
     }
     else {
      if (pivot == '\0') pivot = (alignment -> texts) [col] [index];
      else if (pivot != (alignment -> texts) [col] [index]) trivial = FALSE;
      if ((alignment -> texts) [col] [index] == GAP_SYMBOL) gap = TRUE;
      if ((gap) && ((strictness == STRICT) || (max_mismatches == 0)))
       legal = FALSE;
     }
     if ((!trivial) && (max_mismatches == 0)) legal = FALSE;
    }
    col++;
   }
   if ((legal) && (active < min_active)) legal = FALSE;
   if (!legal) index++;
  } while ((index < alignment -> size) && (!legal));

  if (legal) {               /* First column of the next legal segment found */

   /* Place a boundary column at 0 position, and possibly first non-trivial  */

   diverse [0] = index - 1; count = 1; if (!trivial) diverse [count++] = index;
   index++;                               /* Next column to examine (if any) */

   /* If not alignment end, copy legal non-trivial columns in diverse align. */

   while ((legal) && (index < alignment -> size)) {

    legal = TRUE; active = 0; pivot = '\0'; trivial = TRUE; gap = FALSE;

    /* Examine the current column for the conditions and its type            */

    col = 0; while ((legal) && (col < alignment -> dimension)) {
     if (((alignment -> starts) [col] <= index) &&
         ((alignment -> stops) [col] >= index)) {     /* Another actiive row */
      active++;
      if (strictness == WILDCARD) {
       if ((alignment -> texts) [col] [index] != GAP_SYMBOL) {
        if (pivot == '\0') pivot = (alignment -> texts) [col] [index];
        else if (pivot != (alignment -> texts) [col] [index]) trivial = FALSE;
       }
       else gap = TRUE;
      }
      else {
       if (pivot == '\0') pivot = (alignment -> texts) [col] [index];
       else if (pivot != (alignment -> texts) [col] [index]) trivial = FALSE;
       if ((alignment -> texts) [col] [index] == GAP_SYMBOL) gap = TRUE;
       if ((gap) && ((strictness == STRICT) || (max_mismatches == 0)))
        legal = FALSE;
      }
      if ((!trivial) && (max_mismatches == 0)) legal = FALSE;
     }
     col++;
    }
    if ((legal) && (active < min_active)) legal = FALSE;
    if ((legal) && (!trivial)) diverse [count++] = index;
    if (legal) index++;
   }
   diverse [count++] = index;            /* First non-legal (or end) as boundary */

   /* If the legal area is of sufficient length, proceed to determine k-reg. */

   if (diverse [count - 1] - diverse [0] > min_length) {
    if (count == 2) {      /* Only trivial columns contained in legal region */

     /* Set tracking to the right starting position in track sequence        */

     while (track_scan <= diverse [0]) {
      if ((alignment -> texts) [track_seq] [track_scan] != GAP_SYMBOL) track++;
      track_scan++;
     }
     printf ("%ld ", track);

     /* Set tracking to the right ending position in track sequence          */

     while (track_scan < diverse [count - 1] - 1) {
      if ((alignment -> texts) [track_seq] [track_scan] != GAP_SYMBOL) track++;
      track_scan++;
     }
     printf ("%ld ", track);

     /* Print all characters of the reported region                          */

     for (scan = diverse [0] + 1; scan < diverse [count - 1]; scan++) {
      pivot = '\0'; col = 0;
      while ((pivot == '\0') && (col < alignment -> dimension)) {
       if (((alignment -> starts) [col] <= scan) &&
           ((alignment -> stops) [col] >= scan)) {
        if ((strictness != WILDCARD) ||
            ((alignment -> texts) [col] [scan] != GAP_SYMBOL)) {
         pivot = (alignment -> texts) [col] [scan];
         printf ("%c", pivot);
        }
       }
       col++;
      }
     }
     printf ("\n"); reported++;
    }
    else { /* There are some non-trivial columns in the segment - process it */

     if (kcon_all_regions (alignment, max_mismatches, min_length, strictness,
            count, track_seq, &track, &track_scan, &reported) < 0) return -11;
    }
   }
  }
  index++;        /* Set for the next column to examine (legal area, if any) */
 }
 printf ("\n\n# Number of regions reported:  %ld\n", reported);
 free (diverse); free (current); free (best); return 0;
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: kcon_all_regions                                               */
/*                                                                           */
/* Procedure implements the k-mismatch algorithm for locating conserved      */
/*   regions in a given part of an alignment; returns 0 if the location of   */
/*   regions was successful, negative status otherwise                       */

int kcon_all_regions (restricted_ptr alignment, int max_mismatches,
                      int min_length, int strictness, long int count,
                      int track_seq, long int *track, long int *track_scan,
                      long int *reported)
{
 int index; long int pos, last_reported, offset;

 pos = 0; last_reported = diverse [0];
 while (last_reported < diverse [count - 1] - 1) {
  current [0] = '\0'; best [0] = '\0'; best_len = 0; best_dist = 0;
  for (index = 0; index < alignment -> dimension; index++)
   mpool [0] [index] = 0;
  offset = 0;

  if (strictness == WILDCARD) {
   kcon_max_wildcard (alignment, max_mismatches, count, pos, offset,
                      mpool [0]);
  }
  else if (strictness == ENFORCED) {
   kcon_max_enforced (alignment, max_mismatches, count, pos, offset,
                      mpool [0]);
  }
  else if (strictness == PROHIBITED) {
   kcon_max_prohibited (alignment, max_mismatches, count, pos, offset,
                        mpool [0]);
  }
  else {
   kcon_max_center (alignment, max_mismatches, count, pos, offset, mpool [0]);
  }
  /* Check if the located maximal k-region should be reported and do, if so  */

  if (diverse [pos + best_len] > last_reported) {         /* If maximal k-region */
   if (diverse [pos + best_len + 1] - diverse [pos] > min_length) {   /* long enough */
    kcon_report_region (alignment, track_seq, track, track_scan, strictness,
                        pos, pos + best_len + 1);
    (*reported)++;
   }
   last_reported = diverse [pos + best_len + 1] - 1;  /* Last encountered column */
  }
  pos++;
 }
 return 0;
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: kcon_max_wildcard                                              */
/*                                                                           */
/* Procedure finds the longest and best center sequence starting at the      */
/*   given position plus offset, under the "wildcard" condition - a gap in a */
/*   columns is considered to match every character, but it must not itself  */
/*   be selected as the center character for the column; it recurses for     */
/*   increased offsets                                                       */

void kcon_max_wildcard (restricted_ptr alignment, int max_mismatches,
                        long int count, long int pos, long int offset,
                        int *mis)
{
 bool *appears, shut; char pivot, candidate; int *mvec, row;
 long int col, distance;

 if (pos + offset + 1 < count - 1) {     /* Check for the end of the segment */

  /* Get the next column to be examined from the diverse alignment array     */

  col = diverse [pos + offset + 1];

  /* Prepare the supplementary arrays to keep track of this invocation       */

  mvec = mpool [offset + 1]; appears = apool [offset];
  for (row = 0; row < alignment -> dimension; row++) {   /* Discard the gaps */
   if ((alignment -> texts) [row] [col] == GAP_SYMBOL) appears [row] = TRUE;
   else appears [row] = FALSE;
  }

  /* Find a valid character to start with, as a prospective column center    */

  pivot = '\0'; row = 0;
  while ((pivot == '\0') && (row < alignment -> dimension)) {
   if (!(appears [row])) pivot = (alignment -> texts) [row] [col];
   row++;
  }
  while (pivot != NULL) { /* Loop for all characters appearing in the column */

   shut = FALSE; candidate = '\0'; row = 0;

   /* Examine the column for possible continuation by the current pivot      */

   while ((!shut) && (row < alignment -> dimension)) {
    if ((alignment -> texts) [row] [col] != GAP_SYMBOL) {
     if ((alignment -> texts) [row] [col] == pivot) {
      appears [row] = TRUE; mvec [row] = mis [row];
     }
     else if (((bits [(alignment -> texts) [row] [col]]) & (bits [pivot])) &&
              (kcon_subsumes (pivot, (alignment -> texts) [row] [col]))) {
      mvec [row] = mis [row];
      if ((!(appears [row])) && (candidate == '\0'))
       candidate = (alignment -> texts) [row] [col];
     }
     else {                                 /* Character mismatch in the row */
      mvec [row] = mis [row] + 1;
      if (mvec [row] > max_mismatches) shut = TRUE;
      if ((!(appears [row])) && (candidate == '\0'))
       candidate = (alignment -> texts) [row] [col];
     }
    }
    else mvec [row] = mis [row];                           /* Wildcard match */

    row++;
   }
   if (!shut) {   /* If the continuation with the current letter is possible */

    current [offset] = pivot; current [offset + 1] = '\0';
    kcon_max_wildcard (alignment, max_mismatches, count, pos, offset + 1,
                       mvec);
   }
   pivot = candidate;         /* Select a new current character, if any left */
  }
 }
 /* Examine whether the current branch gives a longer center for the region  */

 if (offset > best_len) {
  strcpy (best, current); best_len = offset;
  best_dist = 0; for (row = 0; row < alignment -> dimension; row++)
   best_dist += (mis [row]) * (mis [row]);
 }
 else if (offset == best_len) {          /* If equal, is this center better? */
  distance = 0; for (row = 0; row < alignment -> dimension; row++)
   distance += (mis [row]) * (mis [row]);
  if (distance < best_dist) { strcpy (best, current); best_dist = distance; }
 }
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: kcon_max_enforced                                              */
/*                                                                           */
/* Procedure finds the longest and best center sequence starting at the      */
/*   given position plus offset, under the restriction that if a column      */
/*   contains a gap it must be selected as its center character - it         */
/*   recurses for increased offsets                                          */

void kcon_max_enforced (restricted_ptr alignment, int max_mismatches,
                        long int count, long int pos, long int offset,
                        int *mis)
{
 bool *appears, gap, shut; char pivot, candidate; int *mvec, row;
 long int col, distance;

 if (pos + offset + 1 < count - 1) {     /* Check for the end of the segment */

  /* Get the next column to be examined from the diverse alignment array     */

  col = diverse [pos + offset + 1];

  /* Prepare the supplementary arrays to keep track of this invocation       */

  mvec = mpool [offset + 1]; appears = apool [offset];
  for (row = 0; row < alignment -> dimension; row++) appears [row] = FALSE;

  /* Find a valid character to start with and examine if there is a gap -    */
  /*   prepare mismatch vector with it, in case that there is                */

  pivot = '\0'; gap = FALSE; shut = FALSE;
  for (row = 0; row < alignment -> dimension; row++) {
   if (((alignment -> starts) [row] <= col) &&
        ((alignment -> stops) [row] >= col)) {       /* If the row is active */
    if (pivot == '\0') pivot = (alignment -> texts) [row] [col];
    if ((alignment -> texts) [row] [col] == GAP_SYMBOL) {
     gap = TRUE; mvec [row] = mis [row];
    }
    else {                                        /* Gap mismatch in the row */
     mvec [row] = mis [row] + 1;
     if (mvec [row] > max_mismatches) shut = TRUE;
    }
   }
   else mvec [row] = mis [row];                  /* Inactive row - ignore it */
  }
  if (gap) {
   if (!shut) {       /* If the continuation with the gap center is possible */

    current [offset] = GAP_SYMBOL; current [offset + 1] = '\0';
    kcon_max_enforced (alignment, max_mismatches, count, pos, offset + 1,
                       mvec);
   }
  }
  else { /* Column does not contain a gap - examine the character candidates */

   while (pivot != NULL) {    /* Loop for characters appearing in the column */

    shut = FALSE; candidate = '\0'; row = 0;

    /* Examine the column for possible continuation by the current pivot     */

    while ((!shut) && (row < alignment -> dimension)) {
     if (((alignment -> starts) [row] <= col) &&
          ((alignment -> stops) [row] >= col)) {     /* If the row is active */
      if ((alignment -> texts) [row] [col] == pivot) {
       appears [row] = TRUE; mvec [row] = mis [row];
      }
      else if (((bits [(alignment -> texts) [row] [col]]) & (bits [pivot])) &&
               (kcon_subsumes (pivot, (alignment -> texts) [row] [col]))) {
       mvec [row] = mis [row];
       if ((!(appears [row])) && (candidate == '\0'))
        candidate = (alignment -> texts) [row] [col];
      }
      else {                                /* Character mismatch in the row */
       mvec [row] = mis [row] + 1;
       if (mvec [row] > max_mismatches) shut = TRUE;
       if ((!(appears [row])) && (candidate == '\0'))
        candidate = (alignment -> texts) [row] [col];
      }
     }
     else mvec [row] = mis [row]; /* In case of row active in part of region */

     row++;
    }
    if (!shut) {  /* If the continuation with the current letter is possible */

     current [offset] = pivot; current [offset + 1] = '\0';
     kcon_max_enforced (alignment, max_mismatches, count, pos, offset + 1,
                        mvec);
    }
    pivot = candidate;        /* Select a new current character, if any left */
   }
  }
 }
 /* Examine whether the current branch gives a longer center for the region  */

 if (offset > best_len) {
  strcpy (best, current); best_len = offset;
  best_dist = 0; for (row = 0; row < alignment -> dimension; row++)
   best_dist += (mis [row]) * (mis [row]);
 }
 else if (offset == best_len) {          /* If equal, is this center better? */
  distance = 0; for (row = 0; row < alignment -> dimension; row++)
   distance += (mis [row]) * (mis [row]);
  if (distance < best_dist) { strcpy (best, current); best_dist = distance; }
 }
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: kcon_max_prohibited                                            */
/*                                                                           */
/* Procedure finds the longest and best center sequence starting at the      */
/*   given position plus offset, under the restriction that a gap in a       */
/*   column can never be selected as its center character - it recurses for  */
/*   increased offsets                                                       */

void kcon_max_prohibited (restricted_ptr alignment, int max_mismatches,
                          long int count, long int pos, long int offset,
                          int *mis)
{
 bool *appears, shut; char pivot, candidate; int *mvec, row;
 long int col, distance;

 if (pos + offset + 1 < count - 1) {     /* Check for the end of the segment */

  /* Get the next column to be examined from the diverse alignment array     */

  col = diverse [pos + offset + 1];

  /* Prepare the supplementary arrays to keep track of this invocation       */

  mvec = mpool [offset + 1]; appears = apool [offset];
  for (row = 0; row < alignment -> dimension; row++) appears [row] = FALSE;

  /* Find a valid character to start with, as a prospective column center    */

  pivot = '\0'; row = 0;
  while ((pivot == '\0') && (row < alignment -> dimension)) {
   if (((alignment -> starts) [row] <= col) &&
        ((alignment -> stops) [row] >= col)) {       /* If the row is active */
    pivot = (alignment -> texts) [row] [col];
   }
   row++;
  }
  while (pivot != NULL) { /* Loop for all characters appearing in the column */

   shut = FALSE; candidate = '\0'; row = 0;

   /* Examine the column for possible continuation by the current pivot      */

   while ((!shut) && (row < alignment -> dimension)) {
    if (((alignment -> starts) [row] <= col) &&
         ((alignment -> stops) [row] >= col)) {      /* If the row is active */
     if ((alignment -> texts) [row] [col] == pivot) {
      appears [row] = TRUE; mvec [row] = mis [row];
     }
     else if (((bits [(alignment -> texts) [row] [col]]) & (bits [pivot])) &&
              (kcon_subsumes (pivot, (alignment -> texts) [row] [col]))) {
      mvec [row] = mis [row];
      if ((!(appears [row])) && (candidate == '\0'))
       candidate = (alignment -> texts) [row] [col];
     }
     else {                                 /* Character mismatch in the row */
      mvec [row] = mis [row] + 1;
      if (mvec [row] > max_mismatches) shut = TRUE;
      if ((!(appears [row])) && (candidate == '\0'))
       candidate = (alignment -> texts) [row] [col];
     }
    }
    else mvec [row] = mis [row];  /* In case of row active in part of region */

    row++;
   }
   if ((!shut) && (pivot != GAP_SYMBOL)) {       /* If continuation possible */

    current [offset] = pivot; current [offset + 1] = '\0';
    kcon_max_prohibited (alignment, max_mismatches, count, pos, offset + 1,
                         mvec);
   }
   pivot = candidate;         /* Select a new current character, if any left */
  }
 }
 /* Examine whether the current branch gives a longer center for the region  */

 if (offset > best_len) {
  strcpy (best, current); best_len = offset;
  best_dist = 0; for (row = 0; row < alignment -> dimension; row++)
   best_dist += (mis [row]) * (mis [row]);
 }
 else if (offset == best_len) {          /* If equal, is this center better? */
  distance = 0; for (row = 0; row < alignment -> dimension; row++)
   distance += (mis [row]) * (mis [row]);
  if (distance < best_dist) { strcpy (best, current); best_dist = distance; }
 }
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: kcon_max_center                                                */
/*                                                                           */
/* Procedure finds the longest and best center sequence starting at the      */
/*   given position plus offset - it recurses for increased offsets          */

void kcon_max_center (restricted_ptr alignment, int max_mismatches,
                      long int count, long int pos, long int offset, int *mis)
{
 bool *appears, shut; char pivot, candidate; int *mvec, row;
 long int col, distance;

 if (pos + offset + 1 < count - 1) {     /* Check for the end of the segment */

  /* Get the next column to be examined from the diverse alignment array     */

  col = diverse [pos + offset + 1];

  /* Prepare the supplementary arrays to keep track of this invocation       */

  mvec = mpool [offset + 1]; appears = apool [offset];
  for (row = 0; row < alignment -> dimension; row++) appears [row] = FALSE;

  /* Find a valid character to start with, as a prospective column center    */

  pivot = '\0'; row = 0;
  while ((pivot == '\0') && (row < alignment -> dimension)) {
   if (((alignment -> starts) [row] <= col) &&
        ((alignment -> stops) [row] >= col)) {       /* If the row is active */
    pivot = (alignment -> texts) [row] [col];
   }
   row++;
  }
  while (pivot != NULL) { /* Loop for all characters appearing in the column */

   shut = FALSE; candidate = '\0'; row = 0;

   /* Examine the column for possible continuation by the current pivot      */

   while ((!shut) && (row < alignment -> dimension)) {
    if (((alignment -> starts) [row] <= col) &&
         ((alignment -> stops) [row] >= col)) {      /* If the row is active */
     if ((alignment -> texts) [row] [col] == pivot) {
      appears [row] = TRUE; mvec [row] = mis [row];
     }
     else if (((bits [(alignment -> texts) [row] [col]]) & (bits [pivot])) &&
              (kcon_subsumes (pivot, (alignment -> texts) [row] [col]))) {
      mvec [row] = mis [row];
      if ((!(appears [row])) && (candidate == '\0'))
       candidate = (alignment -> texts) [row] [col];
     }
     else {                                 /* Character mismatch in the row */
      mvec [row] = mis [row] + 1;
      if (mvec [row] > max_mismatches) shut = TRUE;
      if ((!(appears [row])) && (candidate == '\0'))
       candidate = (alignment -> texts) [row] [col];
     }
    }
    else mvec [row] = mis [row];  /* In case of row active in part of region */

    row++;
   }
   if (!shut) {   /* If the continuation with the current letter is possible */

    current [offset] = pivot; current [offset + 1] = '\0';
    kcon_max_center (alignment, max_mismatches, count, pos, offset + 1, mvec);
   }
   pivot = candidate;         /* Select a new current character, if any left */
  }
 }
 /* Examine whether the current branch gives a longer center for the region  */

 if (offset > best_len) {
  strcpy (best, current); best_len = offset;
  best_dist = 0; for (row = 0; row < alignment -> dimension; row++)
   best_dist += (mis [row]) * (mis [row]);
 }
 else if (offset == best_len) {          /* If equal, is this center better? */
  distance = 0; for (row = 0; row < alignment -> dimension; row++)
   distance += (mis [row]) * (mis [row]);
  if (distance < best_dist) { strcpy (best, current); best_dist = distance; }
 }
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: kcon_report_region                                             */
/*                                                                           */
/* Procedure reports the located k-region between diverse alignment entries  */
/*   indexed as "left" and "right" (exclusive); non-trivial columns center   */
/*   character are reported from "best" center string, enclosed trivial      */
/*   columns are reported by their characters                                */

void kcon_report_region (restricted_ptr alignment, long int track_seq,
                         long int *track, long int *track_scan, int strictness,
                         long int left, long int right)
{
 bool gap; char pivot; int row; long int end_scan, end_track, offset, scan;

 /* Found the tracking position for the start of the region and report it    */

 while (*track_scan <= diverse [left]) {
  if ((alignment -> texts) [track_seq] [*track_scan] != GAP_SYMBOL) (*track)++;
  (*track_scan)++;
 }
 printf ("%ld ", *track);

 /* Find the tracking position for the end of the region and report it       */

 end_scan = *track_scan; end_track = *track;
 while (end_scan < diverse [right] - 1) {
  if ((alignment -> texts) [track_seq] [end_scan] != GAP_SYMBOL) (end_track)++;
  end_scan++;
 }
 if ((end_track > *track) &&
     ((alignment -> texts) [track_seq] [end_scan] == GAP_SYMBOL)) end_track--;
 printf ("%ld ", end_track);

 /* Report the actual character of the determined best center sequence       */

 offset = 0;
 while (left + offset < right) {
  if (offset > 0) {
   if ((strictness == PROMOTED) && (best [offset - 1] != GAP_SYMBOL)) {
    gap = FALSE; row = 0;
    while ((!gap) && (row < alignment -> dimension)) {
     if (((alignment -> starts) [row] <= diverse [left + offset]) &&
         ((alignment -> stops) [row] >= diverse [left + offset])) {
      if ((alignment -> texts) [row] [diverse [left + offset]] == GAP_SYMBOL)
       gap = TRUE;
     }
     row++;
    }
    if (gap) printf ("%c", GAP_SYMBOL); else printf ("%c", best [offset - 1]);
   }
   else if ((strictness == MASKED) && (best [offset - 1] == GAP_SYMBOL))
    printf ("N");
   else printf ("%c", best [offset - 1]);
  }

  /* Now report all trivial columns to the right of the reported one         */

  scan = diverse [left + offset] + 1; while (scan < diverse [left + offset + 1]) {
   pivot = '\0'; row = 0;
   while ((pivot == '\0') && (row < alignment -> dimension)) {
    if (((alignment -> starts) [row] <= scan) &&
         ((alignment -> stops) [row] >= scan)) {     /* If the row is active */
     if ((strictness != WILDCARD) ||
         ((alignment -> texts) [row] [scan] != GAP_SYMBOL))
      pivot = (alignment -> texts) [row] [scan];
    }
    row++;
   }
   printf ("%c", pivot); scan++;
  }
  offset++;
 }
 printf ("\n");
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: kcon_bits                                                      */
/*                                                                           */
/* Procedure assigns bit codes to characters of the alphabet, such that      */
/*   bits reflect the set denotations by the characters (ambiguity codes)    */

void kcon_bits (void)
{
 int index;

 bits = (long int *) NTL0_ckalloc (ALPHABET_SIZE * sizeof (long int));
 for (index = 0; index < ALPHABET_SIZE; index++) bits [index] = 0;

 bits ['A'] = 0x1;
 bits ['C'] = 0x2;
 bits ['G'] = 0x4;
 bits ['T'] = 0x8;
 bits [GAP_SYMBOL] = 0x10;

 bits ['R'] = (bits ['A']) | (bits ['G']);
 bits ['Y'] = (bits ['C']) | (bits ['T']);
 bits ['S'] = (bits ['C']) | (bits ['G']);
 bits ['W'] = (bits ['A']) | (bits ['T']);
 bits ['M'] = (bits ['A']) | (bits ['C']);
 bits ['K'] = (bits ['G']) | (bits ['T']);

 bits ['B'] = (bits ['C']) | (bits ['G']) | (bits ['T']);
 bits ['D'] = (bits ['A']) | (bits ['G']) | (bits ['T']);
 bits ['H'] = (bits ['A']) | (bits ['C']) | (bits ['T']);
 bits ['V'] = (bits ['A']) | (bits ['C']) | (bits ['G']);

 bits ['X'] = (bits ['A']) | (bits ['C']) | (bits ['G']) | (bits ['T']);
 bits ['N'] = (bits ['A']) | (bits ['C']) | (bits ['G']) | (bits ['T']);
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: kcon_subsumes                                                  */
/*                                                                           */
/* Given two character codes (possibly ambiguity codes) procedure determines */
/*   whether the second one represents a set of characters fully contained   */
/*   in that represented by first; returns TRUE if it is, FALSE otherwise    */

bool kcon_subsumes (char set, char subset)
{
 if (set == subset) return TRUE;
 else if ((set == GAP_SYMBOL) ||
          (set == 'A') || (set == 'C') || (set == 'G') || (set == 'T'))
  return FALSE;
 else if (subset == GAP_SYMBOL) return FALSE;
 else if ((set == 'N') || (set == 'X')) return TRUE;
 else if ((subset == 'N') || (subset == 'X')) return FALSE;
 else if ((subset == 'A') && ((set == 'W') ||
                              (set == 'R') ||
                              (set == 'M') ||
                              (set == 'D') ||
                              (set == 'H') ||
                              (set == 'V'))) return TRUE;
 else if ((subset == 'C') && ((set == 'Y') ||
                              (set == 'S') ||
                              (set == 'M') ||
                              (set == 'B') ||
                              (set == 'H') ||
                              (set == 'V'))) return TRUE;
 else if ((subset == 'G') && ((set == 'S') ||
                              (set == 'R') ||
                              (set == 'K') ||
                              (set == 'B') ||
                              (set == 'D') ||
                              (set == 'V'))) return TRUE;
 else if ((subset == 'T') && ((set == 'W') ||
                              (set == 'Y') ||
                              (set == 'K') ||
                              (set == 'B') ||
                              (set == 'D') ||
                              (set == 'H'))) return TRUE;
 else if ((set == 'B') && ((subset == 'K') ||
                           (subset == 'S') ||
                           (subset == 'Y'))) return TRUE;
 else if ((set == 'D') && ((subset == 'K') ||
                           (subset == 'R') ||
                           (subset == 'W'))) return TRUE;
 else if ((set == 'H') && ((subset == 'M') ||
                           (subset == 'W') ||
                           (subset == 'Y'))) return TRUE;
 else if ((set == 'V') && ((subset == 'M') ||
                           (subset == 'R') ||
                           (subset == 'S'))) return TRUE;
 else if ((set != 'W') && (set != 'Y') && (set != 'S') && (set != 'R') &&
          (set != 'M') && (set != 'K') && (set != 'B') && (set != 'D') &&
          (set != 'H') && (set != 'V')) {
  fprintf (stderr,
           "WARNING: Unrecognized character ('%c') in center sequence.\n",
           set);
  return FALSE;
 }
 else if ((subset != 'A') && (subset != 'C') && (subset != 'G') &&
          (subset != 'T') && (subset != 'W') && (subset != 'Y') &&
          (subset != 'S') && (subset != 'R') && (subset != 'M') &&
          (subset != 'K') && (subset != 'B') && (subset != 'D') &&
          (subset != 'H') && (subset != 'V')) {
  fprintf (stderr, "WARNING: Unrecognized character ('%c') in alignment.\n",
                   subset);
  return FALSE;
 }
 else return FALSE;
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: kcon_is_long                                                   */
/*                                                                           */
/* Supplementary procedure checks whether the supplied string represents a   */
/*   legal long integer as a command-line argument, and determines its       */
/*   value; returns TRUE if it is legal, FALSE otherwise                     */

bool kcon_is_long (char *string, long int *value)
{
 char *scan, *num_start, saver; int count; long int long_val; bool negative;

 scan = string; *value = 0;
 while ((*scan == ' ') || (*scan == '\t') || (*scan == '\n')) scan++;
 if (*scan == '\0') return FALSE;
 else {
  if (*scan == '-') {
   negative = TRUE; scan++;
   while ((*scan == ' ') || (*scan == '\t') || (*scan == '\n')) scan++;
  }
  else negative = FALSE;

  if (*scan == '\0') return FALSE;
  else if (*scan == '0') {
   if (negative) return FALSE;
   else {
    scan++;
    while ((*scan == ' ') || (*scan == '\t') || (*scan == '\n')) scan++;
    *value = 0;
    if (*scan == '\0') return TRUE; else return FALSE;
   }
  }
  else {
   count = 0; num_start = scan; while ((*scan >= '0') && (*scan <= '9')) {
    count++; scan++;
   }
   if ((count < 1) || (count > MAX_LONG_LEN)) return FALSE;
   else {
    if ((*scan != '\0') && (*scan != ' ') &&
        (*scan != '\t') && (*scan != '\n')) return FALSE;
    else {
     saver = *scan; *scan = '\0'; long_val = atol (num_start);
     *scan = saver;

     while ((*scan == ' ') || (*scan == '\t') || (*scan == '\n')) scan++;
     if (*scan != '\0') return FALSE;
     else {
      if (negative) *value = -long_val; else *value = long_val; return TRUE;
     }
    }
   }
  }
 }
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: kcon_is_int                                                    */
/*                                                                           */
/* Supplementary procedure checks whether the supplied string represents a   */
/*   legal integer as a command-line argument, and determines its value;     */
/*   returns TRUE if it is legal, FALSE otherwise                            */

bool kcon_is_int (char *string, int *value)
{
 long int long_val;

 if (!kcon_is_long (string, &long_val)) { *value = 0; return FALSE; }
 else if ((long_val < MININT_VALUE) || (long_val > MAXINT_VALUE)) {
  *value = 0; return FALSE;
 }
 else { *value = (int) long_val; return TRUE; }
}


