/*****************************************************************************/ /* */ /* UNIT: NTL1_Cut_Alignment (Level 1 library routine) */ /* */ /* Author: Nikola Stojanovic */ /* */ /* Revision: 05 SEP 96 Version 1.0 */ /* */ /* Function: */ /* */ /* Procedure uses the unpacked information about an alignment and the */ /* requested range for its representation to "trim" it to the right interval */ /* and update the corresponding information stored in the appropriate */ /* internal structure; return the status of the restricting in a "standard" */ /* error structure, NULL if there were no errors */ /* */ /*****************************************************************************/ #include #include #include #include "ntl1.h" /*****************************************************************************/ /* */ /* Definitions section */ /* */ /*****************************************************************************/ /*****************************************************************************/ /* Definitions of local constants of the unit */ /*****************************************************************************/ /* Limits for predefined buffer sizes */ #define ERR_MSGLIMIT 128 /*****************************************************************************/ /* Prototypes of all locally used functions of this unit */ /*****************************************************************************/ errind NTL1_CA_Range_Sequence (header_ptr file_info, char *sequence, long int *start, long int *stop, int *pivot); errind NTL1_CA_Do_Cut (header_ptr file_info, unpacked_ptr alignment, int pivot, long int start, long int stop); errind NTL1_CA_Assemble_Error (int severity, int code, char *comment, int description); /*****************************************************************************/ /* Definitions of global (static) variables of the unit */ /*****************************************************************************/ static char Error_Message [ERR_MSGLIMIT]; /* Temporary buffer, error passing */ /*****************************************************************************/ /* */ /* Code section */ /* */ /*****************************************************************************/ /*****************************************************************************/ /* */ /* PROCEDURE: NTL1_Cut_Alignment */ /* */ /* Central procedure for the unit - alignment restriction; returns error */ /* status, NULL if restriction was done without problems */ errind NTL1_Cut_Alignment (header_ptr file_info, unpacked_ptr alignment, char *range_sequence, long int from, long int to) { errind retstat, errseq, errcut; int pivot; long int range_start, range_stop; /* Check the legality of the provided "from-to" range, then of the sequence */ /* specified as a "leader" for the cut within the range */ if ((from < 1) || (to < 1)) return NTL1_CA_Assemble_Error (USER_ERROR, ERR_BAD_VALUE, "Illegal 'from' range value for the alignment", 0); else if (from > to) { retstat = NTL1_CA_Assemble_Error (WARNING, ERR_BAD_VALUE, "Reversed range for alignment cut", 0); range_start = to; range_stop = from; } else { retstat = NULL; range_start = from; range_stop = to; } /* Now check the sequence with respect to which to cut the alignment */ if ((errseq = NTL1_CA_Range_Sequence (file_info, range_sequence, &range_start, &range_stop, &pivot)) != NULL) { if (errseq -> kind != WARNING) return errseq; else retstat = errseq; } /* Proceed to do the actual cut of the alignment, use supplementary func. */ if ((errcut = NTL1_CA_Do_Cut (file_info, alignment, pivot, range_start, range_stop)) != NULL) return errcut; else return retstat; } /*****************************************************************************/ /* */ /* Internal procedures for maintenance and execution of external requests */ /* */ /*****************************************************************************/ /*****************************************************************************/ /* */ /* PROCEDURE: NTL1_CA_Range_Sequence */ /* */ /* Procedure finds the unique sequence whose name matches the specified one */ /* and whose range overlaps the provided one - list of aliases is searched */ /* first, under the assumption that all names in the list of aliases must */ /* be different - if the right aliased sequence has not been found, the */ /* actual sequence names are checked; returns the error structure, */ /* containing either error or warning, depending on the situation */ /* encountered, or NULL if everything was OK and sequence properly found */ errind NTL1_CA_Range_Sequence (header_ptr file_info, char *sequence, long int *start, long int *stop, int *pivot) { bool found, stopped; int index; /* Check first if the name of the sequence occurs in the list of aliases */ /* for the alignment, and, if it does, whether the given range overlaps */ /* with that of the aliased sequence */ found = FALSE; stopped = FALSE; index = 0; while ((!found) && (!stopped) && (index < file_info -> dimension)) { if (((file_info -> sequences) [index]).alias != NULL) { if (!strcmp (((file_info -> sequences) [index]).alias, sequence)) { if (NTL0_overlaps (*start, *stop, ((file_info -> sequences) [index]).begin, ((file_info -> sequences) [index]).end)) found = TRUE; else stopped = TRUE; } else index++; } else index++; } *pivot = index; /* Remember the index where sequence is potentially found */ if (!found) { /* Desired sequence could not be located in the alias list */ /* Check the list of sequence names to see if there is an unique occurence */ /* of the name where there is an overlap with the desired region */ index = 0; while ((!found) && (index < file_info -> dimension)) { if (((file_info -> sequences) [index]).seq_name != NULL) { if (!strcmp (((file_info -> sequences) [index]).seq_name, sequence)) { if (NTL0_overlaps (*start, *stop, ((file_info -> sequences) [index]).begin, ((file_info -> sequences) [index]).end)) found = TRUE; else index++; } else index++; } else index++; } *pivot = index; /* Remember the index where sequence is potentially found */ if (found) { /* Sequence with the right name overlapping the region found */ /* Check the list of sequence names for possible overlapping duplicates */ index++; while (index < file_info -> dimension) { if (((file_info -> sequences) [index]).seq_name != NULL) { if (!strcmp (((file_info -> sequences) [index]).seq_name, sequence)) { if (NTL0_overlaps (*start, *stop, ((file_info -> sequences) [index]).begin, ((file_info -> sequences) [index]).end)) return NTL1_CA_Assemble_Error (USER_ERROR, ERR_BAD_VALUE, "Ambiguous range request for alignment", 0); else index++; } else index++; } else index++; } } } if (!found) { /* Sequence in the given range does not exist */ sprintf (Error_Message, "Range %ld to %ld in '%s' not present in the alignment", *start, *stop, sequence); return NTL1_CA_Assemble_Error (USER_ERROR, ERR_BAD_VALUE, Error_Message, 0); } else { /* Unique matching sequence found - check the details */ if (*start < ((file_info -> sequences) [*pivot]).begin) { if (*stop > ((file_info -> sequences) [*pivot]).end) { sprintf (Error_Message, "Range '%s(%ld,%ld)' trimmed to '%s(%ld,%ld)'", sequence, *start, *stop, sequence, ((file_info -> sequences) [*pivot]).begin, ((file_info -> sequences) [*pivot]).end); *start = ((file_info -> sequences) [*pivot]).begin; *stop = ((file_info -> sequences) [*pivot]).end; return NTL1_CA_Assemble_Error (WARNING, ERR_BAD_VALUE, Error_Message, 0); } else { sprintf (Error_Message, "Range start in '%s' modified from %ld to %ld", sequence, *start, ((file_info -> sequences) [*pivot]).begin); *start = ((file_info -> sequences) [*pivot]).begin; return NTL1_CA_Assemble_Error (WARNING, ERR_BAD_VALUE, Error_Message, 0); } } else if (*stop > ((file_info -> sequences) [*pivot]).end) { sprintf (Error_Message, "Range end in '%s' modified from %ld to %ld", sequence, *stop, ((file_info -> sequences) [*pivot]).end); *stop = ((file_info -> sequences) [*pivot]).end; return NTL1_CA_Assemble_Error (WARNING, ERR_BAD_VALUE, Error_Message, 0); } else return NULL; } } /*****************************************************************************/ /* */ /* PROCEDURE: NTL1_CA_Do_Cut */ /* */ /* Procedure "trims" the already expanded alignment record to the requested */ /* range only, given the index of the sequence in respect to which the cut */ /* is to be made; returns the 'standard' error structure, NULL if cut OK */ errind NTL1_CA_Do_Cut (header_ptr file_info, unpacked_ptr alignment, int pivot, long int start, long int stop) { char **temp; int seqs, index; long int cutoff, cutend, scanseq, subsize, chscan, old_start; /* Find the absolute sequence positions (start & end) where to make a cut */ cutoff = 0; scanseq = ((file_info -> sequences) [pivot]).begin; while (scanseq != start) { while ((alignment -> texts) [pivot] [cutoff] == GAP_SYMBOL) cutoff++; cutoff++; scanseq++; /* Assume that it can not be EOS at this point */ } /* Skip all possible gap symbols encountered once the start counter reached */ while ((alignment -> texts) [pivot] [cutoff] == GAP_SYMBOL) cutoff++; cutend = cutoff; /* Now proceed to find the end of selected part */ while (scanseq != stop) { while ((alignment -> texts) [pivot] [cutend] == GAP_SYMBOL) cutend++; cutend++; scanseq++; /* Assume that it can not be EOS at this point */ } /* Skip all possible gap symbols between current and the actual ending pos. */ while ((alignment -> texts) [pivot] [cutend] == GAP_SYMBOL) cutend++; /* Now check if the entire alignment has been enclosed in this range */ if ((cutoff == 0) && (cutend == alignment -> size)) return NULL; else { /* Requested is a real substring */ alignment -> cut = SELECTED_RANGE; subsize = cutend - cutoff + 1; /* Proceed to adjust the "begin" and "end" vectors for the alignment */ for (seqs = 0; seqs < alignment -> dimension; seqs++) { old_start = (alignment -> begin) [seqs]; scanseq = old_start; for (chscan = 0; chscan < cutoff; chscan++) { if ((alignment -> texts) [seqs] [chscan] != GAP_SYMBOL) scanseq++; } if (scanseq > (alignment -> end) [seqs]) { (alignment -> segment_code) [seqs] = ALREADY_ENDED_SEGMENT; (alignment -> starts) [seqs] = 0; (alignment -> stops) [seqs] = 0; } (alignment -> begin) [seqs] = scanseq; for (chscan = cutoff; chscan <= cutend; chscan++) { if ((alignment -> texts) [seqs] [chscan] != GAP_SYMBOL) scanseq++; } if (scanseq == old_start) { (alignment -> segment_code) [seqs] = NOTYET_STARTED_SEGMENT; (alignment -> starts) [seqs] = cutend - cutoff; (alignment -> stops) [seqs] = cutend - cutoff; } (alignment -> end) [seqs] = scanseq - 1; /* Also modify recorded place for the start of the sequence in the buffer */ if ((alignment -> segment_code) [seqs] == VALID_SEGMENT) { (alignment -> starts) [seqs] = (alignment -> starts) [seqs] - cutoff; if ((alignment -> starts) [seqs] > cutend - cutoff) { return NTL1_CA_Assemble_Error (SYSTEM_ERROR, ERR_BAD_VALUE, "Start position incorrectly calculated", 0); } else if ((alignment -> starts) [seqs] < 0) (alignment -> starts) [seqs] = 0; } /* Modify the recorded place of the end of the sequence in the buffer */ if ((alignment -> segment_code) [seqs] == VALID_SEGMENT) { (alignment -> stops) [seqs] = (alignment -> stops) [seqs] - cutoff; if ((alignment -> stops) [seqs] > cutend - cutoff) (alignment -> stops) [seqs] = cutend - cutoff; else if ((alignment -> stops) [seqs] < 0) { return NTL1_CA_Assemble_Error (SYSTEM_ERROR, ERR_BAD_VALUE, "Stop position incorrectly calculated", 0); } } } /* Continue to physically cut the text buffer to the requested range */ temp = (char **) NTL0_ckalloc ((alignment -> dimension) * sizeof (char *)); for (seqs = 0; seqs < alignment -> dimension; seqs++) { temp [seqs] = (char *) NTL0_ckalloc ((subsize + 1) * sizeof (char)); index = 0; for (chscan = cutoff; chscan <= cutend; chscan++) { temp [seqs] [index++] = (alignment -> texts) [seqs] [chscan]; } temp [seqs] [index] = '\0'; free ((alignment -> texts) [seqs]); (alignment -> texts) [seqs] = temp [seqs]; } free (temp); /* All strings attached to current_alignment */ alignment -> size = subsize; return NULL; } } /*****************************************************************************/ /* */ /* Procedure: NTL1_CA_Assemble_Error */ /* */ /* Service procedure for assembling and returnning an error report, based on */ /* the values of the input parameters; returns the record with the report */ errind NTL1_CA_Assemble_Error (int severity, int code, char *comment, int description) { char *report; errind assembled; report = (char *) NTL0_ckalloc ( (strlen (comment) + strlen ("_Cut_Alignment: ") + 1) * sizeof (char)); sprintf (report, "_Cut_Alignment: %s", comment); assembled = NTL1_Error_Record (severity, code, report, description); free (report); return assembled; }