/*****************************************************************************/ /* */ /* UNIT: NTL1_Load_Alignment (Level 1 library routine) */ /* */ /* Author: Nikola Stojanovic */ /* */ /* Revision: 15 JUL 94 Version 1.0 */ /* */ /* Function: */ /* */ /* Procedure uses the "packed" information about an alignment to partially */ /* unpack it into internal structures (alignment information must have */ /* already been read from the file, along with other information, and stored */ /* in a block of correct internal format); returns status of expansion in */ /* "standard" error structure, NULL if there were no errors */ /* */ /*****************************************************************************/ #include #include #include #include "ntl1.h" /*****************************************************************************/ /* */ /* Definitions section */ /* */ /*****************************************************************************/ /*****************************************************************************/ /* Definitions of local constants of the unit */ /*****************************************************************************/ /* Limits for predefined buffer sizes */ #define ERR_MSGLIMIT 128 /*****************************************************************************/ /* Prototypes of all locally used functions of this unit */ /*****************************************************************************/ errind NTL1_LA_Expand_Alignment (header_ptr file_data, char *al_data, align_ptr alignment); errind NTL1_LA_Expand_Lines (header_ptr file_data, char *al_data, align_ptr alignment); errind NTL1_LA_lav_Lines (int dimension, char *al_data, align_ptr alignment); errind NTL1_LA_lat_Lines (int dimension, char *al_data, align_ptr alignment); errind NTL1_LA_Assemble_Error (int severity, int code, char *comment, int description); /*****************************************************************************/ /* Definitions of global (static) variables of the unit */ /*****************************************************************************/ static char Error_Message [ERR_MSGLIMIT]; /* Temporary buffer, error passing */ /*****************************************************************************/ /* */ /* Code section */ /* */ /*****************************************************************************/ /*****************************************************************************/ /* */ /* PROCEDURE: NTL1_Load_Alignment */ /* */ /* Central procedure for the unit - partial alignment expansion; returns */ /* error status, NULL if expansion was done without problems */ errind NTL1_Load_Alignment (int al_num, header_ptr file_data, align_ptr *alignment) { block_ptr current; int index; char *al_data; errind status; /* Check whether the input parameters to the routine are legal first */ if (file_data == NULL) return NTL1_LA_Assemble_Error (USER_ERROR, ERR_ILLEGAL_INVOCATION, "Alignment file not loaded before expansion", 0); else if ((al_num <= 0) || (al_num > file_data -> numalign)) { sprintf (Error_Message, "Alignment %d does not exist", al_num); return NTL1_LA_Assemble_Error (USER_ERROR, ERR_ILLEGAL_INVOCATION, Error_Message, 0); } else { /* Get to the buffer containing text describing the alignment first */ current = file_data -> alignments; for (index = 1; index < al_num; index++) current = current -> next; if (current -> contents == NULL) return NTL1_LA_Assemble_Error (USER_ERROR, ERR_BAD_STRUCTURE, "No unpacked alignment data", 0); else { al_data = current -> contents; /* Text of alignment, as read from file */ /* Allocate space for the record that will contain the unpacked alignment */ *alignment = (align_ptr) NTL0_ckalloc (sizeof (Align_Struct)); (*alignment) -> number = al_num; (*alignment) -> dimension = file_data -> dimension; (*alignment) -> score = 0.0; (*alignment) -> size = 0; (*alignment) -> begin = NULL; (*alignment) -> end = NULL; (*alignment) -> lines = NULL; /* Now proceed to fill in the alignment structure and return the status */ status = NTL1_LA_Expand_Alignment (file_data, al_data, *alignment); /* Now when all lines of the alignment have been unpacked, the packed */ /* character buffer contents are not needed any more, so release it */ if (status == NULL) { /* Expansion correctly done */ free (current -> contents); current -> contents = NULL; } else if (status -> kind != WARNING) *alignment = NTL1_Destroy_AlStruct (*alignment); return status; } } } /*****************************************************************************/ /* */ /* Procedure: NTL1_LA_Expand_Alignment */ /* */ /* Procedure fills the allocated alignment structure with contents based on */ /* the string with information from the "alignment" block from the file; */ /* returns the status of expansion (error structure if the string with */ /* infomation is incorrect), NULL if no errors were detected */ errind NTL1_LA_Expand_Alignment (header_ptr file_data, char *al_data, align_ptr alignment) { int index; char *scan, *buff, *head; /* Find start and end vectors for the alignment; fill in their contents */ alignment -> begin = (long int *) NTL0_ckalloc (file_data -> dimension * sizeof (long int)); alignment -> end = (long int *) NTL0_ckalloc (file_data -> dimension * sizeof (long int)); /* Get the alignment score first and store it to its appropriate field */ scan = al_data; while (*scan != 's') scan++; scan++; while (*scan == ' ') scan++; NTL0_tsplit (scan, &head, &buff); alignment -> score = atof (head); free (head); /* Now get the "begin" vector for the alignment */ scan = buff; while (*scan != 'b') scan++; scan++; while (*scan == ' ') scan++; buff = scan; for (index = 0; index < file_data -> dimension; index++) { NTL0_tsplit (buff, &head, &buff); (alignment -> begin) [index] = atol (head) + ((file_data -> sequences) [index]).begin - 1; free (head); } /* Get the "end" vector for the alignment */ scan = buff; while (*scan != 'e') scan++; scan++; while (*scan == ' ') scan++; buff = scan; for (index = 0; index < file_data -> dimension; index++) { NTL0_tsplit (buff, &head, &buff); (alignment -> end) [index] = atol (head) + ((file_data -> sequences) [index]).begin - 1; free (head); } /* Now proceed to record the contents of lines of the aligned sequences */ return NTL1_LA_Expand_Lines (file_data, buff, alignment); } /*****************************************************************************/ /* */ /* Procedure: NTL1_LA_Expand_Lines */ /* */ /* Procedure unpacks the contents of the alignment lines from the received */ /* character buffer; returns the error status, NULL if no errors seen */ errind NTL1_LA_Expand_Lines (header_ptr file_data, char *al_data, align_ptr alignment) { int line_count, index; line_count = file_data -> dimension; /* Fixed # of lines, one per seq. */ alignment -> lines = (line_ptr *) NTL0_ckalloc (line_count * sizeof (line_ptr *)); /* Initialize all list entries for line description lists */ for (index = 0; index < line_count; index++) (alignment -> lines) [index] = NULL; /*--------------------------------------------------------------------------*/ /* At this point it must be distinguished between different file formats */ switch (file_data -> format) { case FORMAT_LAV: { /* File is in old, verbose, "#:lav" format */ return NTL1_LA_lav_Lines (line_count, al_data, alignment); } case FORMAT_LAT: { /* File is in compressed, "#:lat" format */ return NTL1_LA_lat_Lines (line_count, al_data, alignment); } default: { /* Unknown file format */ return NTL1_LA_Assemble_Error (USER_ERROR, ERR_ILLEGAL_CODE, "Unknown alignment file format for lines", 0); } } } /*****************************************************************************/ /* */ /* Procedure: NTL1_LA_lav_Lines */ /* */ /* Procedure unpacks alignment lines data from the buffer conforming to the */ /* packing in the verbose "lav" format; returns error status, NULL if */ /* there were no errors */ errind NTL1_LA_lav_Lines (int dimension, char *al_data, align_ptr alignment) { int index, line_index, old_count; char *scan, *head; long int encountered_length, current_length, *line_begins, *line_ends; line_ptr stretch, checker; /* Count the lines ("l" fields) within the alignment first */ scan = al_data; old_count = 0; while (*scan != '\0') { if (*scan == 'l') old_count++; scan++; } if (old_count < 1) return NTL1_LA_Assemble_Error (FATAL_ERROR, ERR_CODE_PROBLEM, "lav counted illegal number of lines", 0); scan = al_data; line_begins = (long int *) NTL0_ckalloc (dimension * sizeof (long int)); line_ends = (long int *) NTL0_ckalloc (dimension * sizeof (long int)); /* Handle the first "l" compound separatelly, to guarantee non-NULL vector */ /* entry during further processing (gap & size determinations) */ while (*scan != 'l') scan++; scan++; while ((*scan == ' ') || (*scan == '\t')) scan++; for (index = 0; index < dimension; index++) { NTL0_tsplit (scan, &head, &scan); line_begins [index] = atol (head); free (head); } for (index = 0; index < dimension; index++) { NTL0_tsplit (scan, &head, &scan); line_ends [index] = atol (head); free (head); } encountered_length = -1; for (index = 0; index < dimension; index++) { current_length = line_ends [index] - line_begins [index]; if ((encountered_length == -1) && (current_length >= 0)) encountered_length = current_length; else if ((current_length >= 0) && (encountered_length != current_length)) return NTL1_LA_Assemble_Error (FATAL_ERROR, ERR_CODE_PROBLEM, "Lengths in lav line do not match", 0); } if (encountered_length < 0) return NTL1_LA_Assemble_Error (FATAL_ERROR, ERR_CODE_PROBLEM, "Bad fragment length in lav line", 0); else encountered_length++; for (index = 0; index < dimension; index++) { current_length = line_ends [index] - line_begins [index]; stretch = (line_ptr) NTL0_ckalloc (sizeof (Line_Struct)); if (current_length < 0) stretch -> code = GAP_STRETCH; else stretch -> code = TEXT_STRETCH; stretch -> length = encountered_length; stretch -> next = (alignment -> lines) [index]; (alignment -> lines) [index] = stretch; } /* Proceed now with all other "l" compounds in the alignment block */ for (line_index = 1; line_index < old_count; line_index++) { while (*scan != 'l') scan++; scan++; while ((*scan == ' ') || (*scan == '\t')) scan++; for (index = 0; index < dimension; index++) { NTL0_tsplit (scan, &head, &scan); line_begins [index] = atol (head); free (head); } for (index = 0; index < dimension; index++) { NTL0_tsplit (scan, &head, &scan); line_ends [index] = atol (head); free (head); } encountered_length = -1; for (index = 0; index < dimension; index++) { current_length = line_ends [index] - line_begins [index]; if ((encountered_length == -1) && (current_length >= 0)) encountered_length = current_length; else if ((current_length >= 0) && (encountered_length != current_length)) return NTL1_LA_Assemble_Error (FATAL_ERROR, ERR_CODE_PROBLEM, "Lengths in lav line do not match", 0); } if (encountered_length < 0) return NTL1_LA_Assemble_Error (FATAL_ERROR, ERR_CODE_PROBLEM, "Bad fragment length in lav line", 0); else encountered_length++; for (index = 0; index < dimension; index++) { current_length = line_ends [index] - line_begins [index]; if (current_length < 0) { if (((alignment -> lines) [index]) -> code == GAP_STRETCH) ((alignment -> lines) [index]) -> length += encountered_length; else { stretch = (line_ptr) NTL0_ckalloc (sizeof (Line_Struct)); stretch -> code = GAP_STRETCH; stretch -> length = encountered_length; stretch -> next = (alignment -> lines) [index]; (alignment -> lines) [index] = stretch; } } else { if (((alignment -> lines) [index]) -> code == TEXT_STRETCH) ((alignment -> lines) [index]) -> length += encountered_length; else { stretch = (line_ptr) NTL0_ckalloc (sizeof (Line_Struct)); stretch -> code = TEXT_STRETCH; stretch -> length = encountered_length; stretch -> next = (alignment -> lines) [index]; (alignment -> lines) [index] = stretch; } } } } /* For each aligned sequence, reverse assembled list to get the right order */ for (index = 0; index < dimension; index++) { (alignment -> lines) [index] = NTL1_Flip_Lines ((alignment -> lines) [index]); } /* Now check whether all estimated lengths match (all sequence sizes) */ encountered_length = 0; for (checker = (alignment -> lines) [0]; checker != NULL; checker = checker -> next) encountered_length += checker -> length; for (line_index = 1; line_index < dimension; line_index++) { current_length = 0; for (checker = (alignment -> lines) [line_index]; checker != NULL; checker = checker -> next) current_length += checker -> length; if (current_length != encountered_length) return NTL1_LA_Assemble_Error (FATAL_ERROR, ERR_CODE_PROBLEM, "Lengths of stretched alignments in lav do not match", 0); } free (line_begins); free (line_ends); alignment -> size = encountered_length; return NULL; } /*****************************************************************************/ /* */ /* Procedure: NTL1_LA_lat_Lines */ /* */ /* Procedure unpacks alignment lines data from the buffer conforming to the */ /* packing in the compressd "lat" format; returns error status, NULL if */ /* there were no errors */ errind NTL1_LA_lat_Lines (int dimension, char *al_data, align_ptr alignment) { int stretch, index; char *scan, *head, save_scan; line_ptr temp_stretch; long int encountered_length, current_length, next_num; /* Get data for all lines and store them in their respective lists */ scan = al_data; encountered_length = 0; /* No complete expanded length seen yet */ for (index = 0; index < dimension; index++) { current_length = 0; /* Initialize encountered length of current sequence */ while (*scan != 'l') scan++; scan++; while ((*scan == ' ') || (*scan == '\t') || (*scan == '\n')) scan++; stretch = NULL_STRETCH; while ((*scan != 'l') && (*scan != '\0')) { /* For this line */ temp_stretch = (line_ptr) NTL0_ckalloc (sizeof (Line_Struct)); head = scan; while ((*scan != ' ') && (*scan != '\t') && (*scan != '\n') && (*scan != '\0')) scan++; save_scan = *scan; *scan = '\0'; next_num = atol (head); /* Restore character and skip all white spaces before the next number */ *scan = save_scan; while ((*scan == ' ') || (*scan == '\t') || (*scan == '\n')) scan++; /* Process the collected number before proceeding to the next one */ if ((next_num < 0) && (stretch != NULL_STRETCH) && (stretch != TEXT_STRETCH)) { return NTL1_LA_Assemble_Error (USER_ERROR, ERR_BAD_STRUCTURE, "Illegal line interleaving in lat", 0); } else if ((next_num > 0) && (stretch != NULL_STRETCH) && (stretch != GAP_STRETCH)) { return NTL1_LA_Assemble_Error (USER_ERROR, ERR_BAD_STRUCTURE, "Illegal line interleaving in lat", 0); } else if (next_num < 0) { temp_stretch -> code = GAP_STRETCH; temp_stretch -> length = -next_num; temp_stretch -> next = (alignment -> lines) [index]; (alignment -> lines) [index] = temp_stretch; stretch = GAP_STRETCH; current_length = current_length - next_num; } else if (next_num > 0) { temp_stretch -> code = TEXT_STRETCH; temp_stretch -> length = next_num; temp_stretch -> next = (alignment -> lines) [index]; (alignment -> lines) [index] = temp_stretch; stretch = TEXT_STRETCH; current_length = current_length + next_num; } else { return NTL1_LA_Assemble_Error (USER_ERROR, ERR_BAD_STRUCTURE, "Illegal line part length in lat", 0); } } if (encountered_length == 0) encountered_length = current_length; else if (encountered_length != current_length) { return NTL1_LA_Assemble_Error (FATAL_ERROR, ERR_CODE_PROBLEM, "Bad length of lat line", 0); } (alignment -> lines) [index] = NTL1_Flip_Lines ((alignment -> lines) [index]); } alignment -> size = encountered_length; return NULL; } /*****************************************************************************/ /* */ /* Procedure: NTL1_LA_Assemble_Error */ /* */ /* Service procedure for assembling and returnning an error report, based on */ /* the values of the input parameters; returns the record with the report */ errind NTL1_LA_Assemble_Error (int severity, int code, char *comment, int description) { char *report; errind assembled; report = (char *) NTL0_ckalloc ( (strlen (comment) + strlen ("_Load_Alignment: ") + 1) * sizeof (char)); sprintf (report, "_Load_Alignment: %s", comment); assembled = NTL1_Error_Record (severity, code, report, description); free (report); return assembled; }