/*****************************************************************************/ /* */ /* UNIT: NTL1_Abs_To_Human (Level 1 library routine) */ /* */ /* Author: Nikola Stojanovic */ /* */ /* Revision: 07 SEP 94 Version 1.0 */ /* 31 OCT 95 Version 2.0 */ /* */ /* Function: */ /* */ /* Procedure determines the corresponding positions in the human sequence */ /* for the given list of "absolute" intervals, with respect to the received */ /* alignment; structures containing data about the alignment are provided as */ /* input parameters; returns the "standard" error structure, NULL if there */ /* were no errors */ /* */ /* NOTE: If there are no corresponding position(s) in "human", but otherwise */ /* everything else is OK, the procedure still returns NULL, however */ /* the list of offsets is empty (NULL, too) */ /* */ /*****************************************************************************/ #include #include #include #include "ntl1.h" /*****************************************************************************/ /* */ /* Definitions section */ /* */ /*****************************************************************************/ /*****************************************************************************/ /* Prototypes of all locally used functions of this unit */ /*****************************************************************************/ fourint_ptr NTL1_AH_Flip_Result (fourint_ptr results); errind NTL1_AH_Assemble_Error (int severity, int code, char *comment, int description); /*****************************************************************************/ /* */ /* Code section */ /* */ /*****************************************************************************/ /*****************************************************************************/ /* */ /* PROCEDURE: NTL1_Abs_To_Human */ /* */ /* Central procedure for the unit; returns NULL if the alternative offset */ /* has been found and returned, diagnostic structure otherwise */ /* */ /* Procedure abs_to_human determines offsets of pairs of positions specified */ /* as absolute within the alignment, with respect to "humhbb_orig"; */ /* positions are calculated as close as possible, and error made (if any) */ /* is returned together with offsets */ /* */ /* ASSUMPTION: Only one sequence in the alignment is "human" */ /* NOTE: If there are no corresponding position(s) in "human", but otherwise */ /* everything else is OK, the procedure still returns NULL, however */ /* the list of offsets is empty (NULL, too) */ /* Central procedure for the unit; returns NULL if the absolute position */ /* has been found and set of corresponding values allocated, error */ /* diagnostics structure otherwise */ errind NTL1_Abs_To_Human (twoint_ptr absolutes, header_ptr file_data, align_ptr alignment, fourint_ptr *offsets) { long int lookup_1, lookup_2, position_1, position_2, correction_1, correction_2; long int counter, corresponding, trail_align, trail_seq, dist_1, dist_2; int index; bool reversed, found_1, found_2; line_ptr hseq, tracker; twoint_ptr current_pair; fourint_ptr new_cell; /* Find the human sequence in the alignment - assume that there is only one */ hseq = NULL; index = 0; while ((index < file_data -> dimension) && (hseq == NULL)) { if (!strcmp (((file_data -> sequences) [index]).seq_name, "human")) { hseq = (alignment -> lines) [index]; } else index++; } if (hseq == NULL) { /* No "human" sequence found */ return NTL1_AH_Assemble_Error (USER_ERROR, ERR_BAD_REFERENCE, "Human sequence not found in the alignment file", 1); } /* Loop for every pair in the input to determine the corresponding ofsets */ *offsets = NULL; /* Start with the empty list of offsets and add new ones */ for (current_pair = absolutes; current_pair != NULL; current_pair = current_pair -> next) { if (current_pair -> value_1 > current_pair -> value_2) { lookup_1 = current_pair -> value_2; lookup_2 = current_pair -> value_1; reversed = TRUE; } else { lookup_1 = current_pair -> value_1; lookup_2 = current_pair -> value_2; reversed = FALSE; } found_1 = found_2 = FALSE; /* Search indicators for interval endpoints */ /* Traverse through the "human" sequence of the alignment and determine */ /* the offset of the leftmost point of the absolute interval */ trail_align = -1; trail_seq = (alignment -> begin) [index] - 1; counter = 0; corresponding = (alignment -> begin) [index]; tracker = hseq; while ((tracker != NULL) && (!found_1)) { if (tracker -> code == GAP_STRETCH) { if (counter + tracker -> length > lookup_1) { if (tracker -> next == NULL) { if (trail_align == -1) { return NTL1_AH_Assemble_Error (FATAL_ERROR, ERR_BAD_VALUE, "No text in human sequence", 2); } position_1 = trail_seq; correction_1 = trail_align - lookup_1; found_1 = TRUE; } else { /* Still not at the end of the "human" sequence */ if (trail_align != -1) { dist_1 = lookup_1 - trail_align; dist_2 = counter + tracker -> length - lookup_1; if (dist_1 > dist_2) { position_1 = trail_seq + 1; correction_1 = dist_2; found_1 = TRUE; } else { position_1 = trail_seq; correction_1 = -dist_1; found_1 = TRUE; } } else { /* Not still at the beginning of the sequence */ position_1 = (alignment -> begin) [index]; correction_1 = counter + tracker -> length - lookup_1; found_1 = TRUE; } } } else counter += tracker -> length; /* Still counting for the position */ } else if (tracker -> code == TEXT_STRETCH) { if (counter + tracker -> length > lookup_1) { found_1 = TRUE; position_1 = corresponding + lookup_1 - counter; correction_1 = 0; } else { /* Still counting for the position */ corresponding += tracker -> length; /* Keep track of position of the end of the last strip of text seen in */ /* this sequence, in the alignment and in sequence absolute position */ trail_align = counter + tracker -> length - 1; trail_seq = trail_seq + tracker -> length; counter += tracker -> length; } } if (!found_1) tracker = tracker -> next; /* Next part of sequence */ } /* %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% */ /* if (found_1 == 0) { */ /* return NTL1_AH_Assemble_Error (FATAL_ERROR, ERR_BAD_REFERENCE, */ /* "Alignment position out of alignment", 3); */ /* } */ /* %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% */ if (found_1) { /* Proceed only if the left end has been found */ /* Now repeat the same actions to locate the second point */ while ((tracker != NULL) && (!found_2)) { if (tracker -> code == GAP_STRETCH) { if (counter + tracker -> length > lookup_2) { if (tracker -> next == NULL) { if (trail_align == -1) { return NTL1_AH_Assemble_Error (FATAL_ERROR, ERR_BAD_VALUE, "No text in human sequence", 4); } position_2 = trail_seq; correction_2 = trail_align - lookup_2; found_2 = TRUE; } else { /* Still not at the end of the sequence */ if (trail_align != -1) { dist_1 = lookup_2 - trail_align; dist_2 = counter + tracker -> length - lookup_2; if (dist_1 > dist_2) { position_2 = trail_seq + 1; correction_2 = dist_2; found_2 = TRUE; } else { position_2 = trail_seq; correction_2 = -dist_1; found_2 = TRUE; } } else { /* Not at the beginning of the sequence, as well */ position_2 = (alignment -> begin) [index]; correction_2 = counter + tracker -> length - lookup_2; found_2 = TRUE; } } } else counter += tracker -> length; /* Continue counting through the seq.*/ } else if (tracker -> code == TEXT_STRETCH) { if (counter + tracker -> length > lookup_2) { found_2 = TRUE; position_2 = corresponding + lookup_2 - counter; correction_2 = 0; } else { /* Continue counting through the sequence */ corresponding += tracker -> length; /* Keep track of position of the end of the last strip of text seen in */ /* this sequence, in the alignment and in sequence absolute position */ trail_align = counter + tracker -> length - 1; trail_seq = trail_seq + tracker -> length; counter += tracker -> length; } } if (!found_2) tracker = tracker -> next; /* Examine next part */ } /* %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% */ /* if (found_2 == 0) { */ /* return NTL1_AH_Assemble_Error (FATAL_ERROR, ERR_BAD_REFERENCE, */ /* "Alignment position out of alignment", 5); */ /* } */ /* %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% */ if (found_2) { /* Proceed only if both ends are properly found */ /* Allocate a new cell for the obtained offsets and link it to result */ new_cell = (fourint_ptr) NTL0_ckalloc (sizeof (FourInt_Struct)); if (!reversed) { new_cell -> value_1 = position_1; new_cell -> correction_1 = correction_1; new_cell -> value_2 = position_2; new_cell -> correction_2 = correction_2; } else { new_cell -> value_1 = position_2; new_cell -> correction_1 = correction_2; new_cell -> value_2 = position_1; new_cell -> correction_2 = correction_1; } new_cell -> next = *offsets; *offsets = new_cell; if (current_pair -> trunc != NULL) { if (current_pair -> trunc == LEFT_TRUNC_CODE) { new_cell -> correction_1 += TRUNCATED_CORRECTION; } else if (current_pair -> trunc == RIGHT_TRUNC_CODE) { new_cell -> correction_2 += TRUNCATED_CORRECTION; } else { new_cell -> correction_1 += TRUNCATED_CORRECTION; new_cell -> correction_2 += TRUNCATED_CORRECTION; } } } } } *offsets = NTL1_AH_Flip_Result (*offsets); return NULL; } /*****************************************************************************/ /* */ /* Procedure: NTL1_AH_Flip_Result */ /* */ /* Reverse all pointers in the received list of cells; return the pointer */ /* to the last (now first) cell */ fourint_ptr NTL1_AH_Flip_Result (fourint_ptr results) { fourint_ptr ret_val; if (results -> next == NULL) return results; else { ret_val = NTL1_AH_Flip_Result (results -> next); (results -> next) -> next = results; results -> next = NULL; return ret_val; } } /*****************************************************************************/ /* */ /* Procedure: NTL1_AH_Assemble_Error */ /* */ /* Service procedure for assembling and returnning an error report, based on */ /* the values of the input parameters; returns the record with the report */ errind NTL1_AH_Assemble_Error (int severity, int code, char *comment, int description) { char *report; errind assembled; report = (char *) NTL0_ckalloc ( (strlen (comment) + strlen ("_Abs_To_Human: ") + 1) * sizeof (char)); sprintf (report, "_Abs_To_Human: %s", comment); assembled = NTL1_Error_Record (severity, code, report, description); free (report); return assembled; }