
static char const rcsid [] = "$Id: phylogen.c,v 1.1 1998/04/21 10:34:49 stojanov Exp $";


/*****************************************************************************/
/*                                                                           */
/* Program: phylogen (locating regions of minimal evolutionary change)       */
/*                                                                           */
/* Author: Nikola Stojanovic                                                 */
/*                                                                           */
/* Revision:    03 FEB 98   Version 1.0                                      */
/*                                                                           */
/*   Given an alignment (file and the sequences) and a phulogenetic          */
/* (evolutionary) tree this program determines all full runs of columns      */
/* with little evolutionary change needed to create their layout             */
/*                                                                           */
/*****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <float.h>
#include <math.h>
#include "ntl.h"

/*****************************************************************************/
/*                                                                           */
/* Definitions section                                                       */
/*                                                                           */
/*****************************************************************************/


/*****************************************************************************/
/* Definitions of the constants of the program unit                          */
/*****************************************************************************/


#define UNDEFINED                        31781
#define INFINITY                         31873
#define FP_SHIELD                        283
#define SEPARATOR                        '#'
#define INTERNAL_CODE                    -1

#define DEFAULT_PATH                     "."

#define DEFAULT_RANGE_SEQUENCE           "human"
#define DEFAULT_SEQUENCE                 NULL
#define DEFAULT_RANGE_START              0
#define DEFAULT_RANGE_STOP               0

#define MAX_LONG_LEN                     9
#define MININT_VALUE                     -32000
#define MAXINT_VALUE                     32000

#define MAX_WHOLE                        3
#define MAX_FRAC                         6

#define DEFAULT_MINIMAL_LENGTH           6
#define DEFAULT_MINIMAL_ACTIVE           3

#define GENERAL                          0
#define STRICT                           1
#define WILDCARD                         2
#define ENFORCED                         3
#define PROMOTED                         4
#define PROHIBITED                       5
#define MASKED                           6
#define DEFAULT_GAP_TOLERANCE            STRICT

#define NO_ANCHOR                        0
#define COLUMN_SIZE_ANCHOR               1
#define TRIM_ANCHOR                      2
#define DEFAULT_ANCHOR_TYPE              NO_ANCHOR

#define DEFAULT_ADJUSTMENT               0.5

#define INIT_BUFFER_SIZE                 1024

#define ROOT_CODE                        0
#define LEFT_CODE                        1
#define RIGHT_CODE                       2

#define A_BIT                            0x0001
#define C_BIT                            0x0002
#define G_BIT                            0x0004
#define T_BIT                            0x0008
#define GAP_BIT                          0x0010


/*****************************************************************************/
/* Types used in the program unit                                            */
/*****************************************************************************/


typedef struct master_fields {          /* Representation of the entire tree */

 int number;          /* Internal, or number of sequence represented at leaf */
 bool active;       /* Is the sequence active at current position; for leafs */
 struct master_fields *left;       /* Pointer to node's left child (subtree) */
 struct master_fields *right;     /* Pointer to node's right child (subtree) */
} Master_Struct;

typedef Master_Struct *master_ptr;                 /* Pointer to a tree node */


typedef struct control_fields { /* Representation of tree for sequence check */

 char *name;         /* Name of the sequence at tree leaf, NULL for internal */
 long int begin;    /* Absolute position (in seq file) where sequence begins */
 long int end;        /* Absolute position (in seq file) where sequence ends */
 bool visited;     /* To avoid seeing any leaf more than once while checking */
 master_ptr derived;   /* Pointer to the corresponding node in formed master */
 struct control_fields *left;      /* Pointer to node's left child (subtree) */
 struct control_fields *right;    /* Pointer to node's right child (subtree) */
} Control_Struct;

typedef Control_Struct *control_ptr;               /* Pointer to a tree node */


/*****************************************************************************/
/* Variables used globally in the program unit                               */
/*****************************************************************************/


static master_ptr *queue_pointers;   /* Array of tree node pointers in queue */
static int *queue_levels;                   /* Array of node levels in queue */
static int *queue_parents;               /* Array of parent indices in queue */
static int *queue_sides;  /* Array of indicators (left/right child) in queue */
static int queue_head;                    /* First filled entry in the queue */
static int queue_tail;         /* First entry available to fill in the queue */
static int queue_size;   /* Number of items which can be stored in the queue */


/*****************************************************************************/
/* Prototypes of locally used functions of the program unit                  */
/*****************************************************************************/


int phylogen_load_data (strlist_ptr registered_paths, char *alignment_file,
                        char *pivot_sequence, long int start, long int stop,
                        char **tree_string, char *tree_file,
                        restricted_ptr *alignment, master_ptr *master_tree,
                        master_ptr **master_index, long int **change_points,
                        int *counter, char **counter_seq, long int *track);
int phylogen_entire_alignment (strlist_ptr paths, char *file_name,
                               char *pivot, long int start, long int stop,
                               header_ptr *file_info, align_ptr *packed,
                               unpacked_ptr *expanded, int *counter,
                               char **counter_seq, long int *track);
int phylogen_tree_string (strlist_ptr paths, char *file_name, char **tree);
control_ptr phylogen_build_tree (char *tree_string, int *offset);
control_ptr phylogen_search_control (control_ptr tree, char *name,
                                     long int start, long int stop);
master_ptr phylogen_build_guide (control_ptr original);
control_ptr phylogen_destroy_control (control_ptr tree);
master_ptr phylogen_restructure (master_ptr guide);
master_ptr phylogen_clean_copy (master_ptr tree);
void phylogen_output_header (char *alignment_file, char *pivot_sequence,
                             long int start, long int stop, int counter,
                             char *tree_string, int anchor_type,
                             float adjustment, int length, int active,
                             int gaps);
int phylogen_process_data (char *counter_seq, long int track,
                           restricted_ptr alignment, master_ptr master_tree,
                           master_ptr *master_index, long int *change_points,
                           int anchor_type, float adjustment, int length,
                           int active, int gaps);
int phylogen_score_columns (restricted_ptr alignment, master_ptr master_tree,
                            master_ptr *master_index, long int *change_points,
                            int anchor_type, float adjustment, int active,
                            int gaps, float **scores, char **ancestral);
int phylogen_new_structures (restricted_ptr alignment, long int position,
                             master_ptr master_tree, master_ptr *master_index,
                             int *nodes, int *levels, int *left, int *right,
                             int active, int *actual_actives, int *num_levels);
int phylogen_score_column (restricted_ptr alignment, long int column,
                           int num_levels, int *nodes, int *levels, int *left,
                           int *right, int actual_actives, int anchor_type,
                           float adjustment, int gaps, int *mutation,
                           long int *best, float *score, char *root);
void phylogen_wildcard (int num_levels, int *nodes, int *levels, int *left,
                        int *right, int *mutation, long int *best,
                        int *substitution, char *root);
void phylogen_enforced (int num_levels, int *nodes, int *levels, int *left,
                        int *right, int *mutation, long int *best,
                        int *substitution, char *root);
void phylogen_prohibited (int num_levels, int *nodes, int *levels, int *left,
                          int *right, int *mutation, long int *best,
                          int *substitution, char *root);
void phylogen_general (int num_levels, int *nodes, int *levels, int *left,
                       int *right, int *mutation, long int *best,
                       int *substitution, char *root);
int phylogen_locate_regions (float *scores, char *ancestral, long int size,
                             int length, char *counter_seq, long int track);
void phylogen_full_runs (float *scores, long int range_len,
                         long int *intervals, long int **low_ends,
                         long int **high_ends);
bool phylogen_is_long (char *string, long int *value);
bool phylogen_is_int (char *string, int *value);
bool phylogen_is_float (char *string, float *value);
void phylogen_init_queue (int size);
void phylogen_enqueue (master_ptr node, int level, int parent, int side);
bool phylogen_dequeue (master_ptr *node, int *level, int *parent, int *side);
void phylogen_destroy_queue (void);
void phylogen_print_tree (master_ptr tree);
void phylogen_show_tree (int level_total, int *nodes, int *levels, int *left,
                         int *right);
void phylogen_print_scores (float *scores, char *ancestral, long int size);

/*****************************************************************************/
/*                                                                           */
/* Code section                                                              */
/*                                                                           */
/*****************************************************************************/


/*****************************************************************************/
/*                                                                           */
/* Procedure: main                                                           */
/*                                                                           */
/* "main" procedure of the program. Receives and analyses the command line   */
/*   parameters, sets the control variables of the program, checks their     */
/*   consistency and passes control to internal procedures which actually    */
/*   process the input data; returns 0 if everything is OK, non-zero status  */
/*   in case of any errors                                                   */

int main (int argc, char **argv)
{
 char *alignment_file, *tree_file, *tree_string, *pivot_sequence, *counter_seq;
 bool range_set, length_set, active_set, gap_set, anchor_set, adjustment_set;
 int length, active, arg_count, gaps, anchor_type, counter; float adjustment;
 long int start, stop, track, *change_points;
 strlist_ptr registered_paths, new_path, path_scan; restricted_ptr alignment;
 master_ptr master_tree, *master_index;

 /* Initialize the settings and prepare for the processing of the arguments  */

 alignment_file = NULL; tree_file = NULL; tree_string = NULL;
 range_set = FALSE; start = stop = UNDEFINED; pivot_sequence = NULL;
 if (DEFAULT_PATH == NULL) registered_paths = NULL;
 else {
  registered_paths = (strlist_ptr) NTL0_ckalloc (sizeof (StrList_Struct));
  registered_paths -> string = DEFAULT_PATH;
  registered_paths -> next = NULL;
 }
 length_set = FALSE; length = DEFAULT_MINIMAL_LENGTH;
 active_set = FALSE; active = DEFAULT_MINIMAL_ACTIVE;
 gap_set = FALSE; gaps = DEFAULT_GAP_TOLERANCE;
 anchor_set = FALSE; anchor_type = DEFAULT_ANCHOR_TYPE;
 adjustment_set = FALSE; adjustment = DEFAULT_ADJUSTMENT;
 
 if (argc < 2) {
  fprintf (stderr, "usage: %s <alignment_file>\n", argv [0]);
  fprintf (stderr, "                -t <tree_string> | -f <tree_file>\n");
  fprintf (stderr, "                [-I <directory_path>]*\n");
  fprintf (stderr, "                [-r <from> <to> [-p <pivot_sequence>]]\n");
  fprintf (stderr, "                [-l <min_length_of_region]\n");
  fprintf (stderr, "                [-a <min_active_sequences>]\n");
  fprintf (stderr, "                [-c | -b | -o]\n");
  fprintf (stderr, "                [-v <anchor_value>]\n");
  fprintf (stderr, "                [-g | -s | -w | -e | -d | -x | -m]\n");
  exit (1);
 }
 else {                           /* Some parameters provided - process them */

  /* Proceed to extract the command line parameters and create the settings  */

  arg_count = 1; while (arg_count < argc) {

   if (argv [arg_count] [0] != '-') {       /* Not an "-" option - file name */
    if (alignment_file != NULL) {
     fprintf (stderr, "Repeated alignment file name ('%s').\n",
                      argv [arg_count]);
     exit (1);
    }
    else { alignment_file = NTL0_strsave (argv [arg_count]); arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-t")) {      /* Line tree definition */
    arg_count++;
    if (arg_count == argc) {
     fprintf (stderr, "Missing the string defining a tree.\n"); exit (1);
    }
    else if ((tree_string != NULL) || (tree_file != NULL)) {
     fprintf (stderr, "Phylogenetic tree already set.\n"); exit (1);
    }
    else { tree_string = NTL0_strsave (argv [arg_count]); arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-f")) {      /* File tree definition */
    arg_count++;
    if (arg_count == argc) {
     fprintf (stderr, "Missing a tree file name.\n"); exit (1);
    }
    else if ((tree_string != NULL) || (tree_file != NULL)) {
     fprintf (stderr, "Phylogenetic tree already set.\n"); exit (1);
    }
    else { tree_file = NTL0_strsave (argv [arg_count]); arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-I")) {        /* New "include" path */
    arg_count++;
    if (arg_count == argc) {
     fprintf (stderr, "Missing directory path for inclusion.\n"); exit (1);
    }
    else {
     new_path = (strlist_ptr) NTL0_ckalloc (sizeof (StrList_Struct));
     new_path -> string = NTL0_strsave (argv [arg_count]);
     new_path -> next = NULL;

     if (registered_paths == NULL) registered_paths = new_path;
     else {
      path_scan = registered_paths;
      while (path_scan -> next != NULL) path_scan = path_scan -> next;
      path_scan -> next = new_path;
     }
     arg_count++;
    }
   }
   else if (!strcmp (argv [arg_count], "-r")) {             /* Range request */
    if (range_set) {
     fprintf (stderr,
              "Range for alignment loading already set to [%ld-%ld].\n",
              start, stop);
     exit (1);
    }
    else if (arg_count > argc - 3) {
     fprintf (stderr, "Incomplete range specification.\n"); exit (1);
    }
    else if ((!phylogen_is_long (argv [arg_count + 1], &start)) ||
             (!phylogen_is_long (argv [arg_count + 2], &stop))) {
     fprintf (stderr, "Illegal specification for the range.\n"); exit (1);
    }
    else if ((start < 0) || (stop < 0)) {
     fprintf (stderr, "Negative values not permitted for the range.\n");
     exit (1);
    }
    else if (start > stop) {
     fprintf (stderr, "Range start (%ld) greater than range end (%ld).\n",
                      start, stop);
     exit (1);
    }
    else { range_set = TRUE; arg_count += 3; }
   }
   else if (!strcmp (argv [arg_count], "-p")) {  /* Pivot sequence for range */
    arg_count++;
    if (pivot_sequence != NULL) {
     fprintf (stderr, "Sequence for alignment range already set.\n"); exit (1);
    }
    else { pivot_sequence = NTL0_strsave (argv [arg_count]); arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-l")) {     /* Region length request */
    if (length_set) {
     fprintf (stderr, "Minimal length of full intervals already set to %d.\n",
              length);
     exit (1);
    }
    else if (arg_count > argc - 2) {
     fprintf (stderr, "Incomplete length specification.\n"); exit (1);
    }
    else if (!phylogen_is_int (argv [arg_count + 1], &length)) {
     fprintf (stderr, "Illegal minimal region length (%s).\n",
                      argv [arg_count + 1]);
     exit (1);
    }
    else if (length <= 0) {
     fprintf (stderr, "Positive length required for full intervals.");
     exit (1);
    }
    else { length_set = TRUE; arg_count += 2; }
   }
   else if (!strcmp (argv [arg_count], "-a")) {  /* Active sequences request */
    if (active_set) {
     fprintf (stderr,
              "Minimal number of active sequences already set to %d.\n",
              active);
     exit (1);
    }
    else if (arg_count > argc - 2) {
     fprintf (stderr, "Incomplete active sequences specification.\n");
     exit (1);
    }
    else if (!phylogen_is_int (argv [arg_count + 1], &active)) {
     fprintf (stderr, "Illegal minimal number of active sequences (%s).\n",
                      argv [arg_count + 1]);
     exit (1);
    }
    else if (active <= 0) {
     fprintf (stderr, "Positive number of active sequences required.");
     exit (1);
    }
    else { active_set = TRUE; arg_count += 2; }
   }
   else if (!strcmp (argv [arg_count], "-c")) {   /* Column size anchor type */
    if (anchor_set) {
     fprintf (stderr, "Duplicate anchor type request.\n"); exit (1);
    }
    else { anchor_type = COLUMN_SIZE_ANCHOR; anchor_set = TRUE; arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-b")) {     /* Column size, no gaps */
    if (anchor_set) {
     fprintf (stderr, "Duplicate anchor type request.\n"); exit (1);
    }
    else { anchor_type = TRIM_ANCHOR; anchor_set = TRUE; arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-o")) {      /* No calculated anchor */
    if (anchor_set) {
     fprintf (stderr, "Duplicate anchor type request.\n"); exit (1);
    }
    else { anchor_type = NO_ANCHOR; anchor_set = TRUE; arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-v")) {         /* Fixed anchor req. */
    if (adjustment_set) {
     fprintf (stderr, "Duplicate anchor value request.\n"); exit (1);
    }
    else if (arg_count > argc - 2) {
     fprintf (stderr, "Incomplete anchor value specification.\n"); exit (1);
    }
    else if (!phylogen_is_float (argv [arg_count + 1], &adjustment)) {
     fprintf (stderr, "Illegal anchor value (%s).\n", argv [arg_count + 1]);
     exit (1);
    }
    else { adjustment_set = TRUE; arg_count += 2; }
   }
   else if (!strcmp (argv [arg_count], "-g")) {           /* General scoring */
    if (gap_set) {
     fprintf (stderr, "Kind of requested scoring already set.\n"); exit (1);
    }
    else { gaps = GENERAL; gap_set = TRUE; arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-s")) {            /* Strict scoring */
    if (gap_set) {
     fprintf (stderr, "Kind of requested scoring already set.\n"); exit (1);
    }
    else { gaps = STRICT; gap_set = TRUE; arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-w")) {          /* Wildcard scoring */
    if (gap_set) {
     fprintf (stderr, "Kind of requested scoring already set.\n"); exit (1);
    }
    else { gaps = WILDCARD; gap_set = TRUE; arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-e")) {          /* Enforced scoring */
    if (gap_set) {
     fprintf (stderr, "Kind of requested scoring already set.\n"); exit (1);
    }
    else { gaps = ENFORCED; gap_set = TRUE; arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-d")) {          /* Promoted scoring */
    if (gap_set) {
     fprintf (stderr, "Kind of requested scoring already set.\n"); exit (1);
    }
    else { gaps = PROMOTED; gap_set = TRUE; arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-x")) {        /* Prohibited scoring */
    if (gap_set) {
     fprintf (stderr, "Kind of requested scoring already set.\n"); exit (1);
    }
    else { gaps = PROHIBITED; gap_set = TRUE; arg_count++; }
   }
   else if (!strcmp (argv [arg_count], "-m")) {            /* Masked scoring */
    if (gap_set) {
     fprintf (stderr, "Kind of requested scoring already set.\n"); exit (1);
    }
    else { gaps = MASKED; gap_set = TRUE; arg_count++; }
   }
   else {                                              /* Unknown "-" option */
    fprintf (stderr, "Illegal command line option (%s).\n", argv [arg_count]);
    fprintf (stderr, "usage: %s <alignment_file>\n", argv [0]);
    fprintf (stderr, "                -t <tree_string> | -f <tree_file>\n");
    fprintf (stderr, "                [-I <directory_path>]*\n");
    fprintf (stderr,
                 "                [-r <from> <to> [-p <pivot_sequence>]]\n");
    fprintf (stderr, "                [-l <min_length_of_region]\n");
    fprintf (stderr, "                [-a <min_active_sequences>]\n");
    fprintf (stderr, "                [-c | -b | -o]\n");
    fprintf (stderr, "                [-v <anchor_value>]\n");
    fprintf (stderr, "                [-g | -s | -w | -e | -d | -x | -m]\n");
    exit (1);
   }
  }
  /* Check whether all necessary data have been collected and consistent     */

  if (alignment_file == NULL) {
   fprintf (stderr, "Must have an alignment to process.\n"); exit (1);
  }
  else if ((range_set) && (pivot_sequence == NULL)) {    /* Default sequence */
   pivot_sequence = NTL0_strsave (DEFAULT_RANGE_SEQUENCE);
  }
  else if (!range_set) {  /* Range for loading not provided - assume default */
   if (pivot_sequence != NULL) {
    fprintf (stderr, "Sequence '%s' provided without range.\n",
                     pivot_sequence);
    exit (1);
   }
   else {                              /* Use the default sequence and range */
    pivot_sequence = NTL0_strsave (DEFAULT_SEQUENCE);
    start = DEFAULT_RANGE_START; stop = DEFAULT_RANGE_STOP;
   }
  }
  if ((tree_string == NULL) && (tree_file == NULL)) {
   fprintf (stderr, "Must have a tree to guide the scoring.\n"); exit (1);
  }
  /* Check whether the provided anchor makes sense for the settings          */

  if ((adjustment < 0.0) && (anchor_type == NO_ANCHOR)) {
   fprintf (stderr, "WARNING: Negative anchor - no ranges can be reported.\n");
  }
  /* Proceed to assemble the data to be processed by the algorithm           */

  if (phylogen_load_data (registered_paths, alignment_file, pivot_sequence,
                          start, stop, &tree_string, tree_file, &alignment,
                          &master_tree, &master_index, &change_points,
                          &counter, &counter_seq, &track) < 0)
   exit (1);

  phylogen_output_header (alignment_file, pivot_sequence, start, stop,
                          counter, tree_string, anchor_type, adjustment,
                          length, active, gaps);

  if (phylogen_process_data (counter_seq, track, alignment, master_tree,
                             master_index, change_points, anchor_type,
                             adjustment, length, active, gaps) < 0)
   exit (1);

  exit (0);
 }
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: phylogen_load_data                                             */
/*                                                                           */
/* Procedure collects alignment data with respect to provided settings and   */
/*   returns them within its reference parameters; structures the provided   */
/*   tree into internal master form and sets other control data based on the */
/*   alignment and the tree; returns 0 if everything is OK, negative status  */
/*   otherwise                                                               */
/*                                                                           */
/* Structures set: alignment, points of layout change, master tree & index   */
/*                                                                           */
/* Strategy:                                                                 */
/*                                                                           */
/*   Load the entire alignment, expand but not cut                           */
/*   Process the tree string into internal form (with data for checking)     */
/*   Form the tree based on the specification, from file or string           */
/*   Form the index for the leafs of the tree                                */
/*   Check the tree against the alignment, assign pointers to tree leafs     */
/*   Copy the tree for all sequences into master tree form                   */
/*   Reassign the array pointers to the new tree                             */
/*   Destroy the old tree structure                                          */
/*   If specific range in the alignment was requested:                       */
/*     Cut the alignment to range                                            */
/*     Make the new alignment structure, without inactive sequences; also    */
/*       deactivate array entries and activate tree nodes                    */
/*     Create a new tree based on the active leafs (sequences) only          */
/*     Destroy the internal nodes of the old tree structure                  */
/*   Create an array of locations of the points of tree change               */
/*   Sort the array of locations of the points of tree change                */

int phylogen_load_data (strlist_ptr registered_paths, char *alignment_file,
                        char *pivot_sequence, long int start, long int stop,
                        char **tree_string, char *tree_file,
                        restricted_ptr *alignment, master_ptr *master_tree,
                        master_ptr **master_index, long int **change_points,
                        int *counter, char **counter_seq, long int *track)
{
 header_ptr file_info; align_ptr packed; unpacked_ptr expanded;
 char *copy_tree, *scan; bool white_space;
 int items, left_count, right_count, left_square, right_square, row, offset;
 control_ptr *catalog, control_tree; master_ptr guide_tree, *guide_index;
 int count, pos, iscan; line_ptr line_al, last_line; long int *points, current;
 long int last;
 
 /* Load the alignment range, necessary for checking the tree consistency    */

 if (phylogen_entire_alignment (registered_paths, alignment_file,
                                pivot_sequence, start, stop, &file_info,
                                &packed, &expanded, counter, counter_seq,
                                track) < 0)
  return -11;

 /* If the phylogenetic tree is to be read from a file, read the data        */

 if (*tree_string == NULL) {
  if (phylogen_tree_string (registered_paths, tree_file, tree_string) < 0)
   return -12;
 }
 /* Process the tree string - compress any possible extra whitespace chars   */

 copy_tree = (char *)
             NTL0_ckalloc ((strlen (*tree_string) + 1) * sizeof (char));
 white_space = TRUE; scan = *tree_string;
 items = 0; left_count = right_count = 0; left_square = right_square = 0;
 while (*scan != '\0') {
  if ((!white_space) || ((*scan != ' ') && (*scan != '\t') && (*scan != '\n')))
  {
   if ((*scan == ' ') || (*scan == '\t') || (*scan == '\n')) {
    copy_tree [items++] = SEPARATOR; white_space = TRUE;
   }
   else if (*scan == '(') {
    left_count++;
    if ((white_space) && (items > 0)) {
     items--; copy_tree [items++] = '('; white_space = FALSE;
    }
    else { copy_tree [items++] = '('; white_space = FALSE; }
   }
   else if (*scan == ')') {
    right_count++;
    if ((white_space) && (items > 0)) {
     items--; copy_tree [items++] = ')'; white_space = FALSE;
    }
    else { copy_tree [items++] = ')'; white_space = FALSE; }
   }
   else if (*scan == '[') {
    left_square++; white_space = FALSE; copy_tree [items++] = '[';
   }
   else if (*scan == ']') {
    right_square++; white_space = FALSE; copy_tree [items++] = ']';
   }
   else { white_space = FALSE; copy_tree [items++] = *scan; }
  }
  scan++;
 }
 if (white_space) items--; copy_tree [items] = '\0';

 /* Remove the unnecessary separators and form the final tree string */

 scan = copy_tree; items = 0; while (*scan != '\0') {
  if ((*scan == '(') || (*scan == ')')) {
   (*tree_string) [items++] = *scan; scan++; if (*scan == SEPARATOR) scan++;
  }
  else if ((*scan == '[') || (*scan == ',')) {
   (*tree_string) [items++] = *scan; scan++; if (*scan == SEPARATOR) scan++;
  }
  else if ((*scan == ']') || (*scan == ',')) {
   if (scan == copy_tree) {
    fprintf (stderr,
             "Tree string starting with illegal symbol (%c).\n", *scan);
    return -13;
   }
   else {
    scan--;
    if (*scan == SEPARATOR) { scan++; items--; (*tree_string) [items++] = *scan; }
    else { scan++; (*tree_string) [items++] = *scan; }
    scan++;
   }
  }
  else { (*tree_string) [items++] = *scan; scan++; }
 }
 (*tree_string) [items] = '\0';
 free (copy_tree);

 /* Check whether the format is OK and the number of leafs correct for algn. */
 
 if (left_count != right_count) {
  fprintf (stderr, "Invalid string specification of a tree.\n"); return -14;
 }
 else if (left_square != right_square) {
  fprintf (stderr, "Invalid range definition of a tree leaf.\n"); return -15;
 }
 /* Only one pair of square brackets allowed per leaf, so check the number   */

 if (left_square == 0) {
  fprintf (stderr, "Illegal phylogenetic tree specification.\n"); return -16;
 } 
 else if (left_square != expanded -> dimension) {
  fprintf (stderr,
      "Number of tree leafs (%d) does not match alignment dimension (%d).\n",
           left_square, expanded -> dimension);
  return -16;
 }
 /* Form the catalogue of alignment sequences to serve as pointers to leafs  */

 catalog = (control_ptr *)
           NTL0_ckalloc ((expanded -> dimension) * sizeof (control_ptr));
 for (row = 0; row < expanded -> dimension; row++) catalog [row] = NULL;

 /* Expand the tree structure from the string containing the description     */

 offset = 0;      /* Current offset in tree string - to be used in recursion */
 
 if ((control_tree = phylogen_build_tree (*tree_string, &offset)) == NULL) {
  fprintf (stderr, "Invalid phylogenetic tree description.\n"); return -17;
 }
 else if ((*tree_string) [offset] != '\0') {    /* Wrong string or recursion */
  fprintf (stderr, "PROBLEM: Unterminated string after tree assembly.\n");
  return -18;
 }
 /* Check the alignment against the tree - same sequences and all sequences  */

 for (row = 0; row < expanded -> dimension; row++) {
  catalog [row] = phylogen_search_control (control_tree,
                              ((file_info -> sequences) [row]).seq_name,
                              ((file_info -> sequences) [row]).begin,
                              ((file_info -> sequences) [row]).end);
  if (catalog [row] == NULL) {           /* Corresponding leaf must be found */
   fprintf (stderr,
            "Incorrect or duplicate tree leaf for alignment (%d).\n", row);
   return -19;
  }
 }
 /* Create a (preliminary) master tree, based on the control tree            */

 guide_tree = phylogen_build_guide (control_tree); /* ... for full alignment */

 /* Replace the catalog with a preliminary master index, for full alignment  */

 guide_index = (master_ptr *)
               NTL0_ckalloc ((expanded -> dimension) * sizeof (master_ptr));
 for (row = 0; row < expanded -> dimension; row++) {
  guide_index [row] = (catalog [row]) -> derived;
  (guide_index [row]) -> number = row;
 }
 /* Destroy the control tree - not needed any more; nor the catalogue        */

 control_tree = phylogen_destroy_control (control_tree); free (catalog);
 
 /* Copy the alignment into compact structure and activate/deactivate guides */

 *alignment = (restricted_ptr) NTL0_ckalloc (sizeof (Restricted_Struct));
 
 count = 0;
 for (row = 0; row < expanded -> dimension; row++) {
  if ((expanded -> segment_code) [row] == VALID_SEGMENT) count++;
 }
 (*alignment) -> dimension = count;
 (*alignment) -> size = expanded -> size;

 (*alignment) -> texts = (char **) NTL0_ckalloc (count * sizeof (char *));
 (*alignment) -> begin = (long int *)
                         NTL0_ckalloc (count * sizeof (long int *));
 (*alignment) -> end = (long int *)
                       NTL0_ckalloc (count * sizeof (long int *));
 (*alignment) -> starts = (long int *)
                          NTL0_ckalloc (count * sizeof (long int *));
 (*alignment) -> stops = (long int *)
                         NTL0_ckalloc (count * sizeof (long int *));

 *master_index = (master_ptr *) NTL0_ckalloc (count * sizeof (master_ptr *));

 /* Examine the individual alignment sequences for activity and form index   */
 
 count = 0;
 for (row = 0; row < expanded -> dimension; row++) {
  if ((expanded -> segment_code) [row] == VALID_SEGMENT) {
   ((*alignment) -> texts) [count] = (expanded -> texts) [row];
   ((*alignment) -> begin) [count] = (expanded -> begin) [row];
   ((*alignment) -> end) [count] = (expanded -> end) [row];
   ((*alignment) -> starts) [count] = (expanded -> starts) [row];
   ((*alignment) -> stops) [count] = (expanded -> stops) [row];

   (*master_index) [count] = guide_index [row];
   ((*master_index) [count]) -> number = count;
   ((*master_index) [count]) -> active = TRUE;
   
   count++;
  }
  else {             /* Sequence not active in the range under consideration */
   (guide_index [row]) -> active = FALSE;
  }
 }
 /* Now restructure the guide tree into master tree, containing active leafs */
 
 *master_tree = phylogen_restructure (guide_tree);

 /* Destroy the internal nodes of the guide - not needed any more            */

 guide_tree = phylogen_clean_copy (guide_tree);

 /* Destroy the inactive leaf nodes of the guide tree - not to be accessible */

 for (row = 0; row < expanded -> dimension; row++) {
  if ((guide_index [row]) -> active == FALSE) free (guide_index [row]);
 }
 free (guide_index);
 
 /* Destroy the "file_info" structure, since its contents are not needed     */

 /* ### To be defined ###################################################### */

 /* Destroy "partially unpacked" structure used in the expansion - done      */

 if (packed -> begin != NULL) free (packed -> begin);
 if (packed -> end != NULL) free (packed -> end);

 if (packed -> lines != NULL) {
  for (row = 0; row < expanded -> dimension; row++) {
   line_al = (packed -> lines) [row]; while (line_al != NULL) {
    last_line = line_al; line_al = line_al -> next; free (last_line);
   }
  }
 }
 free (packed);

 /* Destroy the expanded alignment info now when the restricted copy made    */

 /* ### To be defined ###################################################### */

 /* Set up the array of change points for the alignment and sort them        */

 points = (long int *)
          NTL0_ckalloc (2 * ((*alignment) -> dimension) * sizeof (long int));
 count = 0; for (row = 0; row < (*alignment) -> dimension; row++) {
  points [count] = ((*alignment) -> starts) [row];
  points [count + 1] = ((*alignment) -> stops) [row] + 1;
  count += 2;
 }
 for (pos = 2; pos < count; pos++) { /* Insertion sort - not too many points */
 
  current = points [pos]; iscan = pos - 1;
  while ((iscan >= 0) && (points [iscan] > current)) {
   points [iscan + 1] = points [iscan]; iscan--;
  }
  points [iscan + 1] = current;
 }
 /* Eliminate the duplicate points of change and set up the array dimension  */

 pos = 0; last = -1; for (iscan = 0; iscan < count; iscan++) {
  if (points [iscan] != last) {
   points [pos++] = points [iscan]; last = points [iscan];
  }
 }
 while ((pos > 0) && (points [pos - 1] >= (*alignment) -> size)) pos--;

 /* Reallocate space for the change array, copy points and terminate it (-1) */
 
 (*change_points) = (long int *) NTL0_ckalloc ((pos + 1) * sizeof (long int));
 for (iscan = 0; iscan < pos; iscan++)
  (*change_points) [iscan] = points [iscan];
 (*change_points) [pos] = -1;

 free (points);

 /****************************************************************************/
 /*                                                                          */
 /* Test code checks how the master tree, index and changes array are set    */
 /*                                                                          */
 /* fprintf (stderr, "Tree:\n"); phylogen_print_tree (*master_tree);         */
 /* fprintf (stderr, "\nIndex:\n");                                          */
 /* for (pos = 0; pos < (*alignment) -> dimension; pos++) {                  */
 /*  fprintf (stderr, "%d  ", ((*master_index) [pos]) -> number);            */
 /* }                                                                        */
 /* fprintf (stderr, "\nChange array:\n");                                   */
 /* pos = 0; while ((*change_points) [pos] >= 0) {                           */
 /*  fprintf (stderr, "%ld - ", (*change_points) [pos]); pos++;              */
 /* }                                                                        */
 /* fprintf (stderr, "\n");                                                  */
 /*                                                                          */
 /****************************************************************************/

 return 0;
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: phylogen_entire_alignment                                      */
/*                                                                           */
/* Procedure reads the entire alignment from the specified file, expands it  */
/*   and cuts it to a range if a resticted one is requested; locates the     */
/*   track sequence (to be used when reporting regions) and the initial      */
/*   track number; returns 0 if everything is OK, negative status otherwise  */

int phylogen_entire_alignment (strlist_ptr paths, char *file_name,
                               char *pivot, long int start, long int stop,
                               header_ptr *file_info, align_ptr *packed,
                               unpacked_ptr *expanded, int *counter,
                               char **counter_seq, long int *track)
{
 strlist_ptr current_path; char *new_path, *special; int index; bool found;
 errind err_stat;
 
 /* Load the alignment into an internal structure first                      */

 if ((paths == NULL) || (file_name [0] == '/') || (file_name [0] == '~')) {
  if ((err_stat = NTL2_Load_AlignFile (file_name, file_info)) != NULL) {
   fprintf (stderr, "Can't open alignment file '%s' (%s).\n",
                    file_name, err_stat -> message);
   return -1;
  }
  else if (*file_info == NULL) {        /* File found, but nothing retrieved */
   fprintf (stderr, "No alignment to process.\n"); return -2;
  }
 }
 else {                     /* Directory path(s) provided, try them in order */
  current_path = paths; *file_info = NULL;
  while ((*file_info == NULL) && (current_path != NULL)) {
   new_path = NTL0_ckalloc ((strlen (current_path -> string) +
                             strlen (file_name) + 2) * sizeof (char));
   strcpy (new_path, current_path -> string);
   if (new_path [strlen (current_path -> string) - 1] != '/')
    strcat (new_path, "/");
   strcat (new_path, file_name);

   if ((err_stat = NTL2_Load_AlignFile (new_path, file_info)) != NULL) {
    *file_info = NULL; current_path = current_path -> next;
   }
   free (new_path);
  }
  if (err_stat != NULL) {                      /* File not found on any path */
   fprintf (stderr, "Can't open alignment file '%s' (%s).\n",
                    file_name, err_stat -> message);
   return -3;
  }
  else if (*file_info == NULL) {        /* File found, but nothing retrieved */
   fprintf (stderr, "No alignment to process.\n"); return -4;
  }
 }
 /* Now proceed to partially unpack the information from the loaded file     */

 if ((err_stat = NTL1_Load_Alignment (1, *file_info, packed)) != NULL) {
  fprintf (stderr, "Can't extract the alignment from the file (%s).\n",
                   err_stat -> message);
  return -5;
 }
 else {
  if ((err_stat = NTL2_Unpack_Alignment (1, *file_info, *packed, paths,
                                         expanded)) != NULL) {
   fprintf (stderr, "Can't expand the alignment (%s).\n", err_stat -> message);
   return -6;
  }
  else {                          /* Expansion of the full alignment done OK */

   /* Check if a range within the alignment is requested, and cut it to size */

   if (pivot != NULL) {
    if ((err_stat = NTL1_Cut_Alignment (*file_info, *expanded, pivot,
                                        start, stop)) != NULL) {
     if (err_stat -> kind == WARNING) {
      fprintf (stderr, "WARNING:  %s.\n", err_stat -> message);
     }
     else {
      fprintf (stderr, "Can't get the range from the alignment (%s).\n",
                       err_stat -> message);
      return -7;
     }
    }
   }
  }
 }
 /* Locate the sequence in the alignment that sets the counter for output    */

 if (pivot == NULL) special = DEFAULT_RANGE_SEQUENCE; else special = pivot;
 
 index = 0; found = FALSE;
 while ((!found) && (index < (*expanded) -> dimension)) {
  if (((*expanded) -> segment_code) [index] == VALID_SEGMENT) {
   if (!strcmp ((((*file_info) -> sequences) [index]).seq_name, special))
    found = TRUE;
   else index++;
  }
  else index++;
 }
 if (!found) {
  fprintf (stderr, "Counter sequence '%s' not found in the selected range.\n",
                    special);
  return -8;
 }
 /* Search for a duplicate sequence in the range - counter must be unique    */
 
 index++; *counter = index; found = FALSE;
 while ((!found) && (index < (*expanded) -> dimension)) {
  if (((*expanded) -> segment_code) [index] == VALID_SEGMENT) {
   if (!strcmp ((((*file_info) -> sequences) [index]).seq_name, special))
    found = TRUE;
   else index++;
  }
  else index++;
 }
 if (found) {
  fprintf (stderr, "Ambiguos counter sequence '%s' in the selected range.\n",
                   special);
  return -9;
 }
 else {                         /* Unique sequence found with the given name */
  *counter_seq = ((*expanded) -> texts) [(*counter) - 1];
  *track = ((*expanded) -> begin) [(*counter) - 1];
 }
 return 0;
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: phylogen_tree_string                                           */
/*                                                                           */
/* Procedure reads data from the specified tree file and stores the tree     */
/*   string into a character array structure; returns 0 if everything is OK, */
/*   negative status otherwise                                               */

int phylogen_tree_string (strlist_ptr paths, char *file_name, char **tree)
{
 FILE *tree_file; strlist_ptr current_path; char *new_path, *tree_buffer, ch;
 bool in_comment, white_space; int buffer_size, items, left_count, right_count;
 
 /* Locate the file containing the tree information first                    */
 
 if ((paths == NULL) || (file_name [0] == '/') || (file_name [0] == '~')) {
  if ((tree_file = fopen (file_name, "r")) == NULL) {
   fprintf (stderr, "Can't open tree definition file '%s'.\n", file_name);
   return -21;
  }
 }
 else {                     /* Directory path(s) provided, try them in order */
  current_path = paths; tree_file = NULL;
  while ((tree_file == NULL) && (current_path != NULL)) {
   new_path = NTL0_ckalloc ((strlen (current_path -> string) +
                             strlen (file_name) + 2) * sizeof (char));
   strcpy (new_path, current_path -> string);
   if (new_path [strlen (current_path -> string) - 1] != '/')
    strcat (new_path, "/");
   strcat (new_path, file_name);

   if ((tree_file = fopen (new_path, "r")) == NULL)
    current_path = current_path -> next;
   free (new_path);
  }
  if (tree_file == NULL) {                     /* File not found on any path */
   fprintf (stderr, "Can't open tree definition file '%s'.\n", file_name);
   return -22;
  }
 }
 /* Proceed to load the data from the file into the tree-defining string     */

 tree_buffer = (char *) NTL0_ckalloc (INIT_BUFFER_SIZE * sizeof (char));
 buffer_size = INIT_BUFFER_SIZE; items = 0;
 in_comment = FALSE; white_space = TRUE; left_count = right_count = 0;
 do {

  /* Get the next character and do limited processing for string correctness */
  
  ch = (char) fgetc (tree_file);
  if (in_comment) { if (ch == '\n') in_comment = FALSE; }
  
  else if (ch != EOF) {                /* Character is not part of a comment */
   if (ch == '#') in_comment = TRUE;
   else if ((!white_space) || ((ch != ' ') && (ch != '\t') && (ch != '\n'))) {

    /* If there are more characters than current buffer can hold, reallocate */
    
    if (items == buffer_size) {
     tree_buffer = (char *) NTL0_ckrealloc (tree_buffer,
                                            2 * buffer_size * sizeof (char));
     buffer_size = 2 * buffer_size;
    }
    if (ch == '(') {               /* Count to check matching of parentheses */
     left_count++; tree_buffer [items++] = ch; white_space = FALSE;
    }
    else if (ch == ')') {          /* Count to check matching of parentheses */
     right_count++; tree_buffer [items++] = ch; white_space = FALSE;
    }
    else if ((!white_space) && ((ch == ' ') || (ch == '\t') || (ch == '\n'))) {
     white_space = TRUE; tree_buffer [items++] = ' ';
    }
    else { tree_buffer [items++] = ch; white_space = FALSE; }
   }
  }
 } while (ch != EOF);

 /* Allocate the space for the tree string and copy the loaded contents      */
 
 *tree = (char *) NTL0_ckrealloc (tree_buffer, (items + 1) * sizeof (char));
 (*tree) [items] = '\0';

 if (left_count != right_count) {          /* Superficial structure checking */
  fprintf (stderr, "Improperly balanced parenthesis in file '%s'.\n",
                   file_name);
  return -23;
 }
 else return 0;
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: phylogen_build_tree                                            */
/*                                                                           */
/* Procedure recursively scans the received character array describing the   */
/*   tree, doing one node per recursive call (internal or leaf - internal    */
/*   nodes are specified by a matching set of parenthesis); returns pointer  */
/*   to the formed node if everything is OK, NULL pointer otherwise          */
/*                                                                           */
/* Algorithm:                                                                */
/*                                                                           */
/*   If the next position is '(' (start grouping corresponding internal n.): */
/*     Recurse on the next character for left subtree                        */
/*     Recurse on the next character for right subtree                       */
/*     If the next character in the string is not ')', error - return NULL   */
/*     Create the internal node and set its left and right children ptrs     */
/*     Return the pointer to the formed internal node of the tree            */
/*   Else (string at current position staring data to store at a leaf):      */
/*     Extract the sequence name, start and end position from the string     */
/*     Form a leaf node, record name and range, set child pointers to NULL   */
/*     Return the pointer to the formed leaf node of the tree                */

control_ptr phylogen_build_tree (char *tree_string, int *offset)
{
 char *start, *name; long int begin, end;
 control_ptr left_child, right_child, internal, leaf;

 /* Decide whether to form an internal node or leaf, based on first char.    */
 
 if (tree_string [*offset] == '(') {     /* More grouping - an internal node */
  (*offset)++;
  if ((left_child = phylogen_build_tree (tree_string, offset)) == NULL)
   return NULL;
  if (tree_string [*offset] == SEPARATOR) (*offset)++;
  if ((right_child = phylogen_build_tree (tree_string, offset)) == NULL)
   return NULL;
  if (tree_string [*offset] != ')') return NULL;   /* End of level grouping? */
  else (*offset)++;

  /* Form and return an internal node corresponding to the processed group   */
  
  internal = (control_ptr) NTL0_ckalloc (sizeof (Control_Struct));
  internal -> name = NULL; internal -> begin = 0; internal -> end = 0;
  internal -> visited = FALSE; internal -> derived = NULL;
  internal -> left = left_child; internal -> right = right_child;

  return internal;
 }
 else if (tree_string [*offset] == ')') {     /* Cannot start group or leaf! */
  fprintf (stderr, "Illegal layout of the tree string.\n"); return NULL;
 }
 else if (tree_string [*offset] == SEPARATOR) {    /* Bad string preparation */
  fprintf (stderr, "PROBLEM: Separator starting a recursion.\n"); return NULL;
 }
 else {        /* Encountered character should start the name at a leaf node */

  /* Extract the name of the sequence associated with the leaf first         */
  
  start = &(tree_string [*offset]);
  while ((tree_string [*offset] != '[') && (tree_string [*offset] != '\0'))
   (*offset)++;
  if (tree_string [*offset] != '[') {
   fprintf (stderr, "Illegal leaf specification in the tree string ([).\n");
   return NULL;
  }
  tree_string [*offset] = '\0'; name = NTL0_strsave (start);
  tree_string [*offset] = '[';

  /* Extract the beginning position (sequence recording) to assign the leaf  */
  
  (*offset)++; start = &(tree_string [*offset]);
  while ((tree_string [*offset] != ',') && (tree_string [*offset] != '\0'))
   (*offset)++;
  if (tree_string [*offset] != ',') {
   fprintf (stderr, "Illegal leaf specification in the tree string (,).\n");
   return NULL;
  }
  tree_string [*offset] = '\0';
  if (!phylogen_is_long (start, &begin)) {
   fprintf (stderr,
            "Illegal start position for a sequence in the tree (%s).\n",
            start);
   return NULL;
  }
  tree_string [*offset] = ',';

  /* Extract the ending position (sequence recording) to assign to the leaf  */
  
  (*offset)++; start = &(tree_string [*offset]);
  while ((tree_string [*offset] != ']') && (tree_string [*offset] != '\0'))
   (*offset)++;
  if (tree_string [*offset] != ']') {
   fprintf (stderr, "Illegal leaf specification in the tree string (]).\n");
   return NULL;
  }
  tree_string [*offset] = '\0';
  if (!phylogen_is_long (start, &end)) {
   fprintf (stderr,
            "Illegal ending position for a sequence in the tree (%s).\n",
            start);
   return NULL;
  }
  tree_string [*offset] = ']';

  (*offset)++;           /* Position to the right of the last processed leaf */

  /* Form an return the leaf node corresponding to the processed description */
  
  leaf = (control_ptr) NTL0_ckalloc (sizeof (Control_Struct));
  leaf -> name = name; leaf -> begin = begin; leaf -> end = end;
  leaf -> visited = FALSE;
  leaf -> derived = NULL; leaf -> left = NULL; leaf -> right = NULL;

  return leaf;
 }
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: phylogen_search_control                                        */
/*                                                                           */
/* Procedure recursively searches the provided tree (with nodes in expanded  */
/*   form, containing full sequence information for leafs) for a leaf which  */
/*   uniquely correspond to a given sequence in the alignment; returns       */
/*   pointer to the discovered leaf, if found, or NULL pointer otherwise     */

control_ptr phylogen_search_control (control_ptr tree, char *name,
                                     long int start, long int stop)
{
 control_ptr node;

 if (tree -> name != NULL) {    /* If this is a leaf node with sequence data */
 
  if ((!(tree -> visited)) && (!strcmp (tree -> name, name)) &&
      (tree -> begin == start) && (tree -> end == stop)) {
   tree -> visited = TRUE; return tree;          /* Corresponding leaf found */
  }
  else return NULL;                      /* This is not a corresponding leaf */
 }
 /* Otherwise, if the node is internal, search its children until found      */
 
 else if ((node = phylogen_search_control (tree -> left, name,
                                           start, stop)) != NULL) return node;
 else return phylogen_search_control (tree -> right, name, start, stop);
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: phylogen_build_guide                                           */
/*                                                                           */
/* Procedure recursively copies the entire tree used for checking the        */
/*   structure against the alignment sequences into a more compact form,     */
/*   same structure but only necessary information stored at nodes; returns  */
/*   pointer to the formed copy of a node                                    */
/*                                                                           */
/* A link is still maintained between every original node and its created    */
/*   copy in order to permit easy redirection of the index array to leafs    */

master_ptr phylogen_build_guide (control_ptr original)
{
 master_ptr left_child, right_child;
 
 if (original == NULL) return NULL;            /* Ran beyond a leaf - return */
 else {
  left_child = phylogen_build_guide (original -> left);
  right_child = phylogen_build_guide (original -> right);

  /* Now form the record representing the node, set its contents and return  */
  
  original -> derived = (master_ptr) NTL0_ckalloc (sizeof (Master_Struct));
  
  (original -> derived) -> number = INTERNAL_CODE;  /* Leafs to be set later */
  (original -> derived) -> active = FALSE;       /* Activity to be set later */
  (original -> derived) -> left = left_child;
  (original -> derived) -> right = right_child;

  return (original -> derived); /* Independent, but pointed to from original */
 }
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: phylogen_destroy_control                                       */
/*                                                                           */
/* Procedure destroys the received tree in the expanded form (full sequence  */
/*   information stored at leafs), recursively; returns NULL pointer         */

control_ptr phylogen_destroy_control (control_ptr tree)
{
 if (tree == NULL) return NULL;   /* Ran beyond leaf - terminating condition */
 else {
  tree -> left = phylogen_destroy_control (tree -> left);
  tree -> right = phylogen_destroy_control (tree -> right);

  tree -> derived = NULL; /* Pointer to active memory that shall not be used */
  if (tree -> name != NULL) free (tree -> name); free (tree); /* Release rec.*/
  return NULL;
 }
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: phylogen_restructure                                           */
/*                                                                           */
/* Procedure recursively builds an induced tree structure such that its      */
/*   leafs are only these which are active in the original tree; it makes    */
/*   copies of internal nodes of the induced tree, but does not copy the     */
/*   active leafs - their parents point to the original leafs instead;       */
/*   returns pointer to the induced tree                                     */
/*                                                                           */
/* Algorithm:                                                                */
/*                                                                           */
/*   If the encountered node is a leaf:                                      */
/*     If inactive return NULL else return pointer to it                     */
/*   Otherwise (if the encountered node is internal):                        */
/*     Recurse on the left child                                             */
/*     Recurse on the right child                                            */
/*     If both children are NULL, return NULL                                */
/*     If only one child is non-NULL, return that pointer                    */
/*     If both children are non-NULL:                                        */
/*       Form a new node, set left and right child, return pointer to it     */
/*     If the encountered node has only one child,                           */
/*         report system error and return NULL                               */

master_ptr phylogen_restructure (master_ptr guide)
{
 master_ptr left_child, right_child, internal;
 
 if ((guide -> left == NULL) && (guide -> right == NULL)) {        /* A leaf */
  if (guide -> active) return guide;  /* Return pointer to original active l.*/
  else return NULL;                  /* If the leaf is not active, ignore it */
 }
 else if ((guide -> left != NULL) && (guide -> right != NULL)) { /* Internal */

  /* Examine both its children and create induced subtrees first             */
  
  left_child = phylogen_restructure (guide -> left);
  right_child = phylogen_restructure (guide -> right);

  /* If nothing in its subtree is active, ignore it, otherwise examine more  */
  
  if ((left_child == NULL) && (right_child == NULL)) return NULL;
  
  else if ((left_child != NULL) && (right_child != NULL)) {  /* New internal */
   internal = (master_ptr) NTL0_ckalloc (sizeof (Master_Struct));
   internal -> number = INTERNAL_CODE;
   internal -> active = FALSE;
   internal -> left = left_child;
   internal -> right = right_child;

   return internal;
  }
  else if (left_child != NULL) return left_child;   /* Single leaf resulting */
  else return right_child;                                          /* Ditto */
 }
 else {     /* Encountered an internal node parenting only one child - error */
 
  fprintf (stderr, "PROBLEM: Invalid tree encountered while restructuring.\n");
  return NULL;
 }
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: phylogen_clean_copy                                            */
/*                                                                           */
/* Procedure destroys an induced tree based only on the active children of   */
/*   the original - as the induced tree had only internal nodes created anew */
/*   and used leafs of the original, only internal nodes should be removed;  */
/*   returns the NULL pointer, as the received tree is destroyed             */

master_ptr phylogen_clean_copy (master_ptr tree)
{
 if ((tree -> left == NULL) && (tree -> right == NULL)) return NULL; /* Leaf */
 
 else if ((tree -> left != NULL) && (tree -> right != NULL)) {   /* Internal */
 
  tree -> left = phylogen_clean_copy (tree -> left);  /* Recursively destroy */
  tree -> right = phylogen_clean_copy (tree -> right);              /* Ditto */
  free (tree); return NULL;   /* After subtrees are handled, remove the node */
 }
 else {       /* Every internal node must have two children - internal error */
  fprintf (stderr, "PROBLEM: Invalid tree encountered while cleaning.\n");
  return NULL;
 }
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: phylogen_output_header                                         */
/*                                                                           */
/* Procedure writes a number of comment lines in the output - output type    */
/*   identification and data about the settings under which it was           */
/*   generated, alignment file name and requested range, counter data,       */
/*   phylogenetic tree used for column scoring, anchor type and/or value     */
/*   used, minimal length of regions, minimal number of active sequences     */
/*   required and the kind of requested regions                              */

void phylogen_output_header (char *alignment_file, char *pivot_sequence,
                             long int start, long int stop, int counter,
                             char *tree_string, int anchor_type,
                             float adjustment, int length, int active,
                             int gaps)
{
 char *scan; int open_par, count;

 printf ("#:plain:\n\n");
 printf ("# Output of the minimal evolutionary change region location.\n");
 printf ("# Alignment file:  '%s'\n", alignment_file);
 if (pivot_sequence != NULL) {
  printf ("#   restricted to the range [%ld,%ld] in \"%s\".\n",
          start, stop, pivot_sequence);
  printf ("# Counting done in '%s' (sequence %d in the alignment).\n",
          pivot_sequence, counter);
 }
 else {
  printf ("# Counting done in sequence %d of the alignment.\n", counter);
 }
 
 printf ("#\n# Phylogenetic tree used:\n#\n# ");
 open_par = 0; scan = tree_string; while (*scan != '\0') {
  if (*scan == '(') { open_par++; printf ("("); }
  else if (*scan == ')') {
   open_par--;
   printf (")\n# "); for (count = 0; count < open_par; count++) printf (" ");
  }
  else if (*scan == SEPARATOR) printf (" ");
  else printf ("%c", *scan);
  scan++;
 }
 printf ("\n");

 switch (anchor_type) {
  case NO_ANCHOR: {
   printf ("# Anchor value:  constant %7.3f\n", adjustment); break;
  }
  case COLUMN_SIZE_ANCHOR: {
   printf ("# Anchor value:  column size (number of active rows)");
   if (adjustment < 0.0) printf (" minus constant %7.3f\n", - adjustment);
   else if (adjustment > 0.0) printf (" plus constant %7.3f\n", adjustment);
   else printf ("\n");
   break;
  }
  case TRIM_ANCHOR: {
   printf ("# Anchor value:  number of active rows with no internal gap\n");
   printf ("#                in the column");
   if (adjustment < 0.0) printf (", minus constant %7.3f\n", - adjustment);
   else if (adjustment > 0.0) printf (", plus constant %7.3f\n", adjustment);
   else printf ("\n");
   break;
  }
 }
 printf ("# Minimal length of the region required:  %d\n", length);
 printf ("# Minimal number of active sequences required in regions:  %d\n",
         active);
 printf ("# Kind of regions requested:  ");
 switch (gaps) {
  case GENERAL: {
   printf ("general (no special treatments of gaps).\n\n"); break;
  }
  case STRICT: {
   printf ("strict (no gaps permitted within regions).\n\n"); break;
  }
  case WILDCARD: {
   printf ("wildcard (gaps matching other characters,\n");
   printf ("#      but cannot precede a non-gap).\n\n");
   break;
  }
  case ENFORCED: { printf ("gaps enforced to precede a gap.\n\n"); break; }
  case PROMOTED: {
   printf (
        "gaps masking the ancestral character of gap-containing columns.\n\n");
   break;
  }
  case PROHIBITED: {
   printf ("gaps prohibited from preceding other characters.\n\n"); break;
  }
  case MASKED: {
   printf ("gaps occuring in the ancestral sequence\n");
   printf ("#       masked by another character.\n\n");
   break;
  }
  default: {
   fprintf (stderr, "PROBLEM: Unknown strictness request (%d).\n", gaps);
   exit (2);
  }
 }
 printf ("# Output format:   start  stop  ancestral_sequence\n\n\n");
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: phylogen_process_data                                          */
/*                                                                           */
/* General dispatch procedure to score alignment columns and determine high  */
/*   scoring regions based on the received alignment and tree data; returns  */
/*   0 if everything is OK, negative status otherwise                        */

int phylogen_process_data (char *counter_seq, long int track,
                           restricted_ptr alignment, master_ptr master_tree,
                           master_ptr *master_index, long int *change_points,
                           int anchor_type, float adjustment, int length,
                           int active, int gaps)
{
 float *scores; char *ancestral;
 
 if (phylogen_score_columns (alignment, master_tree, master_index,
                             change_points, anchor_type, adjustment, active,
                             gaps, &scores, &ancestral) < 0)
  return -101;
 else return phylogen_locate_regions (scores, ancestral, alignment -> size,
                                      length, counter_seq, track);
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: phylogen_score_columns                                         */
/*                                                                           */
/* Top level procedure for scoring all columns in the alignment and creating */
/*   an array of obtained scores (along with the ancestor sequence of root   */
/*   letter assignments for all columns); returns 0 if everything is OK,     */
/*   negative status otherwise                                               */
/*                                                                           */
/* Strategy:                                                                 */
/*                                                                           */
/*   Allocate memory space for all structures to be used in scoring process  */
/*   For every column position:                                              */
/*     If the position is a point of change:                                 */
/*       Scan through the current column, activate/deactivate tree leafs     */
/*       Create current tree description, in the packed form, consisting of: */
/*         Array NODES, negative INTERNAL_CODE at internals, number at leafs */
/*         Array LEVELS, indices in NODES of last entry of each tree level   */
/*         Array LEFT, indices in NODES of left child for each internal node */
/*         Array RIGHT, indices in NODES of right child for each internal n. */
/*         Other supplementary arrays to be used when scoring the columns    */
/*       If the current part of alignment is not to be scored, just skip it  */
/*     Score the current column (one which got to be current at this point   */
/*   Release the allocated space after the scoring is complete               */

int phylogen_score_columns (restricted_ptr alignment, master_ptr master_tree,
                            master_ptr *master_index, long int *change_points,
                            int anchor_type, float adjustment, int active,
                            int gaps, float **scores, char **ancestral)
{
 int next_point, position, actual_actives, *nodes, *levels, *left, *right;
 int *mutation, num_levels; long int *best;

 /* Allocate space for all arrays to be used in the process                  */
 
 nodes = (int *) NTL0_ckalloc (2 * (alignment -> dimension) * sizeof (int));
 levels = (int *) NTL0_ckalloc (2 * (alignment -> dimension) * sizeof (int));
 left = (int *) NTL0_ckalloc (2 * (alignment -> dimension) * sizeof (int));
 right = (int *) NTL0_ckalloc (2 * (alignment -> dimension) * sizeof (int));
 mutation = (int *) NTL0_ckalloc (2 * (alignment -> dimension) * sizeof (int));
 best = (long int *)
        NTL0_ckalloc (2 * (alignment -> dimension) * sizeof (long int));

 *scores = (float *) NTL0_ckalloc ((alignment -> size) * sizeof (float));
 *ancestral = (char *)
              NTL0_ckalloc (((alignment -> size) + 1) * sizeof (char));

 phylogen_init_queue (2 * (alignment -> dimension));     /* For tree packing */

 /* Score the alignment columns position by position                         */

 next_point = 0; position = 0; while (position < alignment -> size) {

  if (position == change_points [next_point]) {   /* Change of align. layout */
   next_point++;
   if (phylogen_new_structures (alignment, position, master_tree, master_index,
                                nodes, levels, left, right, active,
                                &actual_actives, &num_levels) < 0)
    return -111;
   else {                          /* Current tree successfully restructured */
   
    if (active > actual_actives) {   /* No point in scoring ignored segments */
     while ((position < alignment -> size) &&
            ((change_points [next_point] < 0) ||
             (position < change_points [next_point]))) {   /* Avance through */
      (*scores) [position] = (float) (-INFINITY);
      (*ancestral) [position] = 'N';
      position++;
     }
    }
    else { /* The alignment segment is OK - proceed to score the next column */
    
     /************************************************************************/
     /*                                                                      */
     /* Test code checks the layout of the restructured tree (packed form)   */
     /*                                                                      */
     /* phylogen_show_tree (num_levels, nodes, levels, left, right);         */
     /*                                                                      */
     /************************************************************************/

     if (phylogen_score_column (alignment, position, num_levels, nodes, levels,
                                left, right, actual_actives, anchor_type,
                                adjustment, gaps, mutation, best,
                                &((*scores) [position]),
                                &((*ancestral) [position])) < 0) return -121;
     else position++;
    }
   }
  }
  else {                /* Continuation of a segment - not a point of change */

   if (phylogen_score_column (alignment, position, num_levels, nodes, levels,
                                left, right, actual_actives, anchor_type,
                                adjustment, gaps, mutation, best,
                                &((*scores) [position]),
                                &((*ancestral) [position])) < 0) return -122;
   else position++;
  }
 }
 free (nodes); free (levels); free (left); free (right);
 free (mutation); free (best);
 phylogen_destroy_queue ();
 return 0;
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: phylogen_new_structures                                        */
/*                                                                           */
/* Procedure creates the current tree in the packed (array) form based on    */
/*   the sequences (leafs of the master tree) active in the current column,  */
/*   the first one in the segment with new (changed) layout; doesn't bother  */
/*   to create the tree if it is not going to be used (too few active        */
/*   sequences) returns 0 if restructuring is OK, negative status otherwise  */
/*                                                                           */
/* Strategy:                                                                 */
/*                                                                           */
/*   Scan through the current column, activate/deactivate master tree leafs  */
/*   Create a temporary induced tree structure based on active leafs only    */
/*   Scan the induced tree to pack it, using queue for breadth-first search  */
/*     Packed tree representation: arrays NODES, LEVELS, LEFT, RIGHT         */
/*     Enqueued information:                                                 */
/*       Current node pointer, for sequence number & children ptrs (if any)  */
/*       Tree level of the current enqueued node, to be used for LEVELS      */
/*       Index in NODES, formed so far, of the parent node of current one    */
/*       Indication whether current node is left or right child of parent    */
/*   Destroy the induced tree structure - information packed in arrays now   */
    
int phylogen_new_structures (restricted_ptr alignment, long int position,
                             master_ptr master_tree, master_ptr *master_index,
                             int *nodes, int *levels, int *left, int *right,
                             int active, int *actual_actives, int *num_levels)
{
 int row, index, last_level, level, parent, side;
 master_ptr current_tree, pointer;
 
 /* Traverse the current column to count active sequences and label master   */

 *actual_actives = 0;
 for (row = 0; row < alignment -> dimension; row++) {
  if ((position >= (alignment -> starts) [row]) &&
      (position <= (alignment -> stops) [row])) {    /* Sequence active here */
   (*actual_actives)++;
   (master_index [row]) -> active = TRUE;       /* Corresponding leaf active */
  }
  else (master_index [row]) -> active = FALSE;              /* Leaf inactive */
 }
 if (*actual_actives >= active) {        /* If the calculations will be used */

  /* Create a supplementary tree containing only the active leafs            */
  
  current_tree = phylogen_restructure (master_tree);

  /* Convert the pointer tree structure into a set of integer arrays         */
  
  *num_levels = 0; index = 0; last_level = 0;
  phylogen_enqueue (current_tree, (int) 0, (int) -1, (int) ROOT_CODE);
  while (phylogen_dequeue (&pointer, &level, &parent, &side)) {

   /* Process the current node into appropriate array entries                */
   
   nodes [index] = pointer -> number;
   if (side == LEFT_CODE) left [parent] = index;
   else if (side == RIGHT_CODE) right [parent] = index;
   if (level > last_level) {
    levels [*num_levels] = index - 1; (*num_levels)++; last_level = level;
   }
   /* Continue the breadth-first search if there are any children of current */
   
   if (pointer -> left != NULL)
    phylogen_enqueue (pointer -> left, level + 1, index, (int) LEFT_CODE);
   if (pointer -> right != NULL)
    phylogen_enqueue (pointer -> right, level + 1, index, (int) RIGHT_CODE);
   index++;
  }
  
  levels [*num_levels] = index - 1; (*num_levels)++;   /* Last of last level */

  /* Destroy the supplementary tree - info already packed in integer arrays  */
  
  current_tree = phylogen_clean_copy (current_tree);
 }
 return 0;
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: phylogen_score_column                                          */
/*                                                                           */
/* Procedure scores the current column of the alignment based on the tree of */
/*   active sequences (in packed array form); uses shortcut reasoning when   */
/*   the column can be instantly scored (based on its layout and/or the type */
/*   of requested regions; returns 0 if the scoring is done correctly,       */
/*   negative status otherwise                                               */

int phylogen_score_column (restricted_ptr alignment, long int column,
                           int num_levels, int *nodes, int *levels, int *left,
                           int *right, int actual_actives, int anchor_type,
                           float adjustment, int gaps, int *mutation,
                           long int *best, float *score, char *root)
{
 int num_gaps, row, substitution, num_diff; char pivot;

 /* Scan the column, make the initial assignments to BEST and MUTATION,      */
 /*   check if all letters are same or how many differ from the first        */
 /*   encountered and count the gaps                                         */
 
 num_gaps = 0; pivot = '\0'; num_diff = 0;
 for (row = 0; row <= levels [num_levels - 1]; row++) {    /* All tree nodes */
 
  if (nodes [row] < 0) best [row] = 0x0000;                 /* Internal node */
  else {                                             /* A leaf node recorded */
  
   if ((alignment -> texts) [nodes [row]] [column] == GAP_SYMBOL) {
    num_gaps++; best [row] = GAP_BIT;
   }
   else {             /* Some real nucleotide character occurs for this leaf */
   
    if (pivot == '\0') pivot = (alignment -> texts) [nodes [row]] [column];
    else if (pivot != (alignment -> texts) [nodes [row]] [column]) num_diff++;

    if ((alignment -> texts) [nodes [row]] [column] == 'A')
     best [row] = A_BIT;
    else if ((alignment -> texts) [nodes [row]] [column] == 'C')
     best [row] = C_BIT;
    else if ((alignment -> texts) [nodes [row]] [column] == 'G')
     best [row] = G_BIT;
    else if ((alignment -> texts) [nodes [row]] [column] == 'T')
     best [row] = T_BIT;
    else {                /* Cannot process ambiguous nucleotide definitions */
     fprintf (stderr,
              "WARNING: Non-base nucleotide in alignment column ('%c').\n",
              (alignment -> texts) [nodes [row]] [column]);
     *score = (float) (-INFINITY); *root = 'N';
     return 0;
    }
   }
  }
  mutation [row] = 0;                    /* Initialization of mutation count */
 }
 /* Proceed to actualy assign the score to the column - deduce or calculate  */
 
 if (num_diff + num_gaps == 0) {  /* Trivial column - same letter everywhere */
  substitution = 0; *root = pivot;
 }
 else if ((num_gaps > 0) && (gaps == STRICT)) {            /* Illegal column */
  *score = (float) (-INFINITY); *root = GAP_SYMBOL; return 0;
 }
 else if ((num_gaps == 0) && (num_diff == 1)) { /* Only one letter different */
  substitution = 1; *root = pivot;
 }
 else if (gaps == WILDCARD) {  /* Special reasoning required - wildcard gaps */
 
  if (num_diff == 0) { substitution = 0; *root = pivot; }  /* Trivial column */
  else if (num_diff == 1) { substitution = 1; *root = pivot; }  /* One diff. */
  else
   phylogen_wildcard (num_levels, nodes, levels, left, right, mutation, best,
                      &substitution, root);
 }
 else if (gaps == ENFORCED)
  phylogen_enforced (num_levels, nodes, levels, left, right, mutation, best,
                     &substitution, root);
 else if (gaps == PROHIBITED)
  phylogen_prohibited (num_levels, nodes, levels, left, right, mutation, best,
                        &substitution, root);

 /* In the general case (only remaining) a single difference can be gap, too */
 
 else if (num_diff + num_gaps == 1) { substitution = 1; *root = pivot; }
 else
   phylogen_general (num_levels, nodes, levels, left, right, mutation, best,
                     &substitution, root);

 /* Do the calculation of the actual score based on anchor type and/or value */
 
 if (anchor_type == NO_ANCHOR)
  *score = adjustment - ((float) substitution);
 else if (anchor_type == COLUMN_SIZE_ANCHOR)
  *score = (float) (actual_actives + adjustment - substitution);
 else if (anchor_type == TRIM_ANCHOR)
  *score = (float) (actual_actives - num_gaps + adjustment - substitution);
 else {
  fprintf (stderr, "PROBLEM: Illegal anchor type.\n"); return -131;
 }
 /* Set the ancestral character in speacial cases of promoted or masked gaps */
 
 if ((gaps == PROMOTED) && (num_gaps > 0)) *root = GAP_SYMBOL;
 else if ((gaps == MASKED) && (*root == GAP_SYMBOL)) *root = 'N';

 return 0;
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: phylogen_wildcard                                              */
/*                                                                           */
/* Procedure applies the column-scoring algorithm for wildcard gap treatment */

void phylogen_wildcard (int num_levels, int *nodes, int *levels, int *left,
                        int *right, int *mutation, long int *best,
                        int *substitution, char *root)
{
 int level, high_end, low_end, node;
 
 for (level = num_levels; level > 0; level--) {
  high_end = levels [level - 1];
  if (level == 1) low_end = -1; else low_end = levels [level - 2];
  for (node = high_end; node > low_end; node--) {
   if (nodes [node] < 0) {
    if (best [left [node]] & best [right [node]]) {
     best [node] = best [left [node]] & best [right [node]];
     mutation [node] = mutation [left [node]] + mutation [right [node]];
    }
    else if (best [left [node]] & GAP_BIT) {
     best [node] = best [right [node]];
     mutation [node] = mutation [left [node]] + mutation [right [node]];
    }
    else if (best [right [node]] & GAP_BIT) {
     best [node] = best [left [node]];
     mutation [node] = mutation [left [node]] + mutation [right [node]];
    }
    else {
     best [node] = best [left [node]] | best [right [node]];
     mutation [node] = mutation [left [node]] + mutation [right [node]] + 1;
    }
   }
  }
 }
 *substitution = mutation [0];

 if (best [0] & A_BIT) *root = 'A';
 else if (best [0] & C_BIT) *root = 'C';
 else if (best [0] & G_BIT) *root = 'G';
 else if (best [0] & T_BIT) *root = 'T';
 else if (best [0] & GAP_BIT) *root = GAP_SYMBOL;
 else {
  fprintf (stderr, "PROBLEM: Illegal letter assignments to root.\n"); exit (1);
 }
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: phylogen_enforced                                              */
/*                                                                           */
/* Procedure applies the column-scoring algorithm for enforced gap treatment */

void phylogen_enforced (int num_levels, int *nodes, int *levels, int *left,
                        int *right, int *mutation, long int *best,
                        int *substitution, char *root)
{
 int level, high_end, low_end, node;
 
 for (level = num_levels; level > 0; level--) {
  high_end = levels [level - 1];
  if (level == 1) low_end = -1; else low_end = levels [level - 2];
  for (node = high_end; node > low_end; node--) {
   if (nodes [node] < 0) {
    if (best [left [node]] & best [right [node]]) {
     best [node] = best [left [node]] & best [right [node]];
     mutation [node] = mutation [left [node]] + mutation [right [node]];
    }
    else if ((best [left [node]] & GAP_BIT) ||
             (best [right [node]] & GAP_BIT)) {
     best [node] = GAP_BIT;
     mutation [node] = mutation [left [node]] + mutation [right [node]] + 1;
    }
    else {
     best [node] = best [left [node]] | best [right [node]];
     mutation [node] = mutation [left [node]] + mutation [right [node]] + 1;
    }
   }
  }
 }
 *substitution = mutation [0];

 if (best [0] & A_BIT) *root = 'A';
 else if (best [0] & C_BIT) *root = 'C';
 else if (best [0] & G_BIT) *root = 'G';
 else if (best [0] & T_BIT) *root = 'T';
 else if (best [0] & GAP_BIT) *root = GAP_SYMBOL;
 else {
  fprintf (stderr, "PROBLEM: Illegal letter assignments to root.\n"); exit (1);
 }
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: phylogen_prohibited                                            */
/*                                                                           */
/* Procedure applies column-scoring algorithm for prohibited gap treatment   */

void phylogen_prohibited (int num_levels, int *nodes, int *levels, int *left,
                          int *right, int *mutation, long int *best,
                          int *substitution, char *root)
{
 int level, high_end, low_end, node;
 
 for (level = num_levels; level > 0; level--) {
  high_end = levels [level - 1];
  if (level == 1) low_end = -1; else low_end = levels [level - 2];
  for (node = high_end; node > low_end; node--) {
   if (nodes [node] < 0) {
    if (best [left [node]] & best [right [node]]) {
     best [node] = best [left [node]] & best [right [node]];
     mutation [node] = mutation [left [node]] + mutation [right [node]];
    }
    else if (best [left [node]] & GAP_BIT) {
     best [node] = best [right [node]];
     mutation [node] = mutation [left [node]] + mutation [right [node]] + 1;
    }
    else if (best [right [node]] & GAP_BIT) {
     best [node] = best [left [node]];
     mutation [node] = mutation [left [node]] + mutation [right [node]] + 1;
    }
    else {
     best [node] = best [left [node]] | best [right [node]];
     mutation [node] = mutation [left [node]] + mutation [right [node]] + 1;
    }
   }
  }
 }
 *substitution = mutation [0];

 if (best [0] & A_BIT) *root = 'A';
 else if (best [0] & C_BIT) *root = 'C';
 else if (best [0] & G_BIT) *root = 'G';
 else if (best [0] & T_BIT) *root = 'T';
 else {
  fprintf (stderr, "PROBLEM: Illegal letter assignments to root.\n"); exit (1);
 }
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: phylogen_general                                               */
/*                                                                           */
/* Procedure applies the general column-scoring algorithm with leaf letters  */

void phylogen_general (int num_levels, int *nodes, int *levels, int *left,
                       int *right, int *mutation, long int *best,
                       int *substitution, char *root)
{
 int level, high_end, low_end, node;
 
 for (level = num_levels; level > 0; level--) {
  high_end = levels [level - 1];
  if (level == 1) low_end = -1; else low_end = levels [level - 2];
  for (node = high_end; node > low_end; node--) {
   if (nodes [node] < 0) {
    if (best [left [node]] & best [right [node]]) {
     best [node] = best [left [node]] & best [right [node]];
     mutation [node] = mutation [left [node]] + mutation [right [node]];
    }
    else {
     best [node] = best [left [node]] | best [right [node]];
     mutation [node] = mutation [left [node]] + mutation [right [node]] + 1;
    }
   }
  }
 }
 *substitution = mutation [0];

 if (best [0] & A_BIT) *root = 'A';
 else if (best [0] & C_BIT) *root = 'C';
 else if (best [0] & G_BIT) *root = 'G';
 else if (best [0] & T_BIT) *root = 'T';
 else if (best [0] & GAP_BIT) *root = GAP_SYMBOL;
 else {
  fprintf (stderr, "PROBLEM: Illegal letter assignments to root.\n"); exit (1);
 }
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: phylogen_locate_regions                                        */
/*                                                                           */
/* Procedure determines all full runs within the established array of scores */
/*   and reports them; returns 0 if OK, negative status otherwise            */

int phylogen_locate_regions (float *scores, char *ancestral, long int size,
                             int length, char *counter_seq, long int track)
{
 long int lines_in_output, *low, *high, index, num_intervals, scanner;
 long int low_track, high_track, spell;
 
 /****************************************************************************/
 /*                                                                          */
 /* Test code prints the entire array of calculated scores and letter assgns */
 /*                                                                          */
 /* phylogen_print_scores (scores, ancestral, size);                         */
 /*                                                                          */
 /****************************************************************************/
 
 lines_in_output = 0;      /* Initialize the counter of the reported regions */

 /* Determine all full runs in the scores array first, then report them      */
 
 phylogen_full_runs (scores, size, &num_intervals, &low, &high);
 free (scores);         /* Full runs determined - scores not needed any more */

 if (num_intervals > 0) {      /* There are some full runs in this alignment */

  /* Report all the full runs with their ancestral sequences                 */

  scanner = 0; for (index = 0; index < num_intervals; index++) {

   /* Adjust the track value for reporting to the start position of region   */
   
   while (scanner < low [index]) {
    if (counter_seq [scanner] != GAP_SYMBOL) track++; scanner++;
   }
   /* Proceed to find the track for the end position of the region           */
   
   low_track = track; while (scanner < high [index]) {
    if (counter_seq [scanner] != GAP_SYMBOL) track++; scanner++;
   }
   if (counter_seq [scanner] == GAP_SYMBOL) {
    if (track > low_track) high_track = track - 1; else high_track = track;
   }
   else high_track = track;

   /* If the region is sufficiently long, report it                          */
   
   if (high [index] - low [index] + 1 >= length) {
    printf ("%ld %ld ", low_track, high_track);
    for (spell = low [index]; spell <= high [index]; spell++)
     printf ("%c", ancestral [spell]);
    printf ("\n"); lines_in_output++;
   }
  }
 }
 printf ("\n\n# Total number of regions detected:  %ld\n", lines_in_output);
 return 0;
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: phylogen_full_runs                                             */
/*                                                                           */
/* Procedure determines all full runs in the given array of scores,          */
/*   piecewise processing it (in contiguos finitelly scoring intervals) by   */
/*   invoking the full intervals algorithm (implemented as library routine)  */

void phylogen_full_runs (float *scores, long int range_len,
                         long int *intervals, long int **low_ends,
                         long int **high_ends)
{
 long int *low, *high, range_start, range_stop, current, scan, items;
 bool all_positive; float *low_score, *high_score, *sigma;

 /* Allocate space required by the implementation of full runs algorithm     */

 low = (long int *) NTL0_ckalloc (range_len * sizeof (long int));
 high = (long int *) NTL0_ckalloc (range_len * sizeof (long int));
 low_score = (float *) NTL0_ckalloc (range_len * sizeof (float));
 high_score = (float *) NTL0_ckalloc (range_len * sizeof (float));

 /* Allocate space needed for the temporary scores and output delivery str.  */

 sigma = (float *) NTL0_ckalloc ((range_len + 1) * sizeof (float));
 *low_ends = (long int *) NTL0_ckalloc (range_len * sizeof (long int));
 *high_ends = (long int *) NTL0_ckalloc (range_len * sizeof (long int));

 /* Initialize the internal control variables for patching the score array   */

 range_start = 0; range_stop = 0; *intervals = 0;
 while (range_stop < range_len) {   /* Continue until last segment processed */

  /* Skip all position scoring negative (either infinite or patch starting)  */
  
  while ((range_start < range_len) && (scores [range_start] < 0))
   range_start++;
  range_stop = range_start;     /* Initialize range stop to start of segment */

  if (range_stop < range_len) {    /* If there is a segment before array end */
   all_positive = TRUE;
   while ((range_stop < range_len) &&
          (scores [range_stop] > -INFINITY + FP_SHIELD)) {
    if (scores [range_stop] < 0.0) all_positive = FALSE;
    range_stop++;
   }
   if (all_positive) {           /* The interval is automatically a full run */
    (*low_ends) [*intervals] = range_start;
    (*high_ends) [*intervals] = range_stop - 1;
    (*intervals)++;
   }
   else {       /* There must be one non-negative score, but not all of them */
    current = range_stop - range_start;
    for (scan = 0; scan < current; scan++)
     sigma [scan] = scores [range_start + scan];

    /* Determine all full runs within the copied vector - library call       */

    NTL0_Full_Runs (sigma, current, &items, low, high, low_score, high_score);

    /* Add all discovered full runs to the list of these already established */
    
    for (scan = 0; scan < items; scan++) {
     (*low_ends) [*intervals] = range_start + low [scan];
     (*high_ends) [*intervals] = range_start + high [scan];
     (*intervals)++;
    }
   }
   range_start = range_stop;       /* Prepare to search for the next segment */
  }
 }
 /* Release structures which are not needed after full runs are determined   */
 
 free (sigma); free (low); free (high); free (low_score); free (high_score);
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: phylogen_is_long                                               */
/*                                                                           */
/* Procedure determines whether the input string represents a long integer   */
/*   number, and places its value into a reference parameter; returns TRUE   */
/*   if a correct number has been extracted, FALSE otherwise                 */

bool phylogen_is_long (char *string, long int *value)
{
 char *scan, *num_start, saver; int count; long int long_val; bool negative;

 scan = string; *value = 0;
 while ((*scan == ' ') || (*scan == '\t') || (*scan == '\n')) scan++;
 if (*scan == '\0') return FALSE;
 else {
  if (*scan == '-') {
   negative = TRUE; scan++;
   while ((*scan == ' ') || (*scan == '\t') || (*scan == '\n')) scan++;
  }
  else negative = FALSE;

  if (*scan == '\0') return FALSE;
  else if (*scan == '0') {
   if (negative) return FALSE;
   else {
    scan++;
    while ((*scan == ' ') || (*scan == '\t') || (*scan == '\n')) scan++;
    *value = 0;
    if (*scan == '\0') return TRUE; else return FALSE;
   }
  }
  else {
   count = 0; num_start = scan; while ((*scan >= '0') && (*scan <= '9')) {
    count++; scan++;
   }
   if ((count < 1) || (count > MAX_LONG_LEN)) return FALSE;
   else {
    if ((*scan != '\0') && (*scan != ' ') &&
        (*scan != '\t') && (*scan != '\n')) return FALSE;
    else {
     saver = *scan; *scan = '\0'; long_val = atol (num_start);
     *scan = saver;

     while ((*scan == ' ') || (*scan == '\t') || (*scan == '\n')) scan++;
     if (*scan != '\0') return FALSE;
     else {
      if (negative) *value = -long_val; else *value = long_val; return TRUE;
     }
    }
   }
  }
 }
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: phylogen_is_int                                                */
/*                                                                           */
/* Procedure determines whether the input string represents an integer       */
/*   number, and places its value into a reference parameter; returns TRUE   */
/*   if a correct number has been extracted, FALSE otherwise                 */

bool phylogen_is_int (char *string, int *value)
{
 long int long_val;

 if (!phylogen_is_long (string, &long_val)) { *value = 0; return FALSE; }
 else if ((long_val < MININT_VALUE) || (long_val > MAXINT_VALUE)) {
  *value = 0; return FALSE;
 }
 else { *value = (int) long_val; return TRUE; }
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: phylogen_is_float                                              */
/*                                                                           */
/* Procedure determines whether the input string represents a floating point */
/*   (real) number, and places its value into a reference parameter; returns */
/*   TRUE if a correct number has been extracted, FALSE otherwise            */

bool phylogen_is_float (char *string, float *value)
{
 char *s; bool negative, fraction; int whole, frac, count; float real_frac;

 s = string; whole = 0; frac = 0; count = 0;                   /* Initialize */

 /* Consult the first symbol in the string and set the appropriate controls  */

 if (*s == '-') { negative = TRUE; s++; if (*s == '\0') return FALSE; }
 else negative = FALSE;
 if (*s == '.') { fraction = TRUE; s++; if (*s == '\0') return FALSE; }
 else fraction = FALSE;

 if ((*s == '0') && (s [1] != '\0') && (s [1] != '.')) return FALSE;

 else {            /* Proceed to collect the number in the part of the value */
  while ((*s >= '0') && (*s <= '9')) {
   if (!fraction) {                /* The collected part is the whole number */
    whole = whole * 10 + (int) (*s - '0'); count++; s++;
    if (count > MAX_WHOLE) return FALSE;
   }
   else {           /* The collected part is the fraction part of the number */
    frac = frac * 10 + (int) (*s - '0'); count++; s++;
    if (count > MAX_FRAC) return FALSE;
   }
  }
  if ((*s != '.') && (*s != '\0')) return FALSE;
  else if ((*s == '.') && (fraction)) return FALSE;
  else if (*s == '.') {
   fraction = TRUE; count = 0; s++; while ((*s >= '0') && (*s <= '9')) {
    frac = frac * 10 + (int) (*s - '0'); count++; s++;
    if (count > MAX_FRAC) return FALSE;
   }
   if (*s != '\0') return FALSE;
  }
  *value = (float) whole; real_frac = (float) frac;
  if (fraction) {
   while (count > 0) { real_frac = real_frac / 10; count--; }
  }
  *value += real_frac; if (negative) *value = -(*value);
  return TRUE;
 }
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: phylogen_init_queue                                            */
/*                                                                           */
/* Procedure allocates memory for the specialized queue, of provided maximal */
/*   size, for storage of information while packing a tree; also initializes */
/*   the internal queue control (head and tail pointers and queue size)      */

void phylogen_init_queue (int size)
{
 queue_pointers = (master_ptr *) NTL0_ckalloc (size * sizeof (master_ptr));
 queue_levels = (int *) NTL0_ckalloc (size * sizeof (int));
 queue_parents = (int *) NTL0_ckalloc (size * sizeof (int));
 queue_sides = (int *) NTL0_ckalloc (size * sizeof (int));
 queue_head = queue_tail = 0; queue_size = size;
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: phylogen_enqueue                                               */
/*                                                                           */
/* Procedure enqueues the received data in its appropriate arrays; it trusts */
/*   the invoking code, so there are no checks for overflow                  */

void phylogen_enqueue (master_ptr node, int level, int parent, int side)
{
 queue_pointers [queue_tail] = node;
 queue_levels [queue_tail] = level;
 queue_parents [queue_tail] = parent;
 queue_sides [queue_tail] = side;
 queue_tail++; if (queue_tail == queue_size) queue_tail = 0;
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: phylogen_dequeue                                               */
/*                                                                           */
/* Procedure retrieves the data next for output in the queue; if the queue   */
/*   is empty it returns FALSE, otherwise it packs the data in the output    */
/*   parameters and returns TRUE                                             */

bool phylogen_dequeue (master_ptr *node, int *level, int *parent, int *side)
{
 if (queue_head == queue_tail) return FALSE;               /* Queue is empty */
 
 else {    /* There is at least one entry in the queue - retrieve and update */
  *node = queue_pointers [queue_head];
  *level = queue_levels [queue_head];
  *parent = queue_parents [queue_head];
  *side = queue_sides [queue_head];
  queue_head++; if (queue_head == queue_size) queue_head = 0;
  return TRUE;
 }
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: phylogen_destroy_queue                                         */
/*                                                                           */
/* Procedure releases memory used by queue structures and resets its control */

void phylogen_destroy_queue (void)
{
 free (queue_pointers); queue_pointers = NULL;
 free (queue_levels); queue_levels = NULL;
 free (queue_parents); queue_parents = NULL;
 free (queue_sides); queue_sides = NULL;
 queue_size = queue_head = queue_tail = 0;
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: phylogen_print_tree                                            */
/*                                                                           */
/* Test utility only - not part of the program; assembles a tree string      */
/*   based on the master tree structure and prints it                        */

void phylogen_print_tree (master_ptr tree)
{
 if (tree != NULL) {
  if (tree -> number == INTERNAL_CODE) {
   fprintf (stderr, "(");
   phylogen_print_tree (tree -> left); phylogen_print_tree (tree -> right);
   fprintf (stderr, ")");
  }
  else {
   if ((tree -> left != NULL) || (tree -> right != NULL)) {
    fprintf (stderr, "PROBLEM: Tree.\n");
   }
   else fprintf (stderr, "<%d>", tree -> number);
  }
 }
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: phylogen_show_tree                                             */
/*                                                                           */
/* Test utility only - not part of the program; prints the layout of the     */
/*   tree described by the received arrays and reports levels data           */

void phylogen_show_tree (int level_total, int *nodes, int *levels, int *left,
                         int *right)
{
 int index, full_one [40], node, skip, entries, show, ent, i, in_full;
 
 if (level_total > 5) fprintf (stderr, "Can't show tree.\n");
 else {
  for (index = 0; index < 40; index++) full_one [index] = UNDEFINED;
  full_one [1] = nodes [0]; in_full = 0;
  for (node = 0; node <= levels [level_total - 1]; node++) {
   in_full++; while (full_one [in_full] != nodes [node]) in_full++;
   if (full_one [in_full] < 0) {
    full_one [2 * in_full] = nodes [left [node]];
    full_one [2 * in_full + 1] = nodes [right [node]];
   }
  }
  skip = 32; entries = 1; show = 1;
  for (index = 0; index < 5; index++) {
   for (ent = 0; ent < entries; ent++) {
    for (i = 0; i < skip - 1; i++) printf (" ");
    if (ent > 0) for (i = 0; i < skip - 1; i++) printf (" ");
    if (full_one [show] == UNDEFINED) printf ("   ");
    else if (full_one [show] < 0) printf (" X ");
    else printf ("<%d>", full_one [show]);
    show++;
   }
   printf ("\n"); skip = skip / 2; entries *= 2;
  }
  printf ("\n");

  printf ("Levels:    ");
  for (index = 0; index < level_total; index++)
   printf ("%d     ", levels [index]);
  printf ("\n\n");
 }
}


/*****************************************************************************/
/*                                                                           */
/* Procedure: phylogen_print_scores                                          */
/*                                                                           */
/* Test utility only - not part of the program; prints the scores from the   */
/*   received array along with character assignments to tree roots (if any)  */

void phylogen_print_scores (float *scores, char *ancestral, long int size)
{
 long int current, scale;
 
 current = 0; while (current < size) {
  scale = 0; while ((current + scale < size) && (scale < 10)) {
   printf ("%7.1f", scores [current + scale]); scale++;
  }
  printf ("\n"); scale = 0; while ((current + scale < size) && (scale < 10)) {
   printf ("      %c", ancestral [current + scale]); scale++;
  }
  printf ("\n\n"); current += 10;
 }
}


