static char const rcsid [] = "$Id: maps_util.c,v 1.1 1998/04/22 10:23:30 stojanov Exp $"; /*****************************************************************************/ /* */ /* Unit: MAPS_UTIL () */ /* */ /* Author: Nikola Stojanovic */ /* */ /* Revision: 15 APR 98 */ /* */ /* Unit contains utility code for the maps program */ /* */ /*****************************************************************************/ #include #include #include #include "maps.h" #include "maps_vars.h" /*****************************************************************************/ /* */ /* Constants whose values should be known locally in this unit */ /* */ /*****************************************************************************/ #define MAX_LONG_LEN 8 #define MAXINT_VALUE 32767 #define MININT_VALUE -32768 /*****************************************************************************/ /* */ /* Types used only locally in this unit */ /* */ /*****************************************************************************/ /*****************************************************************************/ /* */ /* Global Variables Section - module level */ /* */ /*****************************************************************************/ /*****************************************************************************/ /* */ /* Forward declarations of unit internal procedures - not visible from out. */ /* */ /*****************************************************************************/ /*****************************************************************************/ /* */ /* Module routines callable from outside - interface procedures */ /* */ /*****************************************************************************/ /*****************************************************************************/ /* */ /* Procedure: ALGN_Find_Sequence */ /* */ seqover_ptr ALGN_Find_Sequence (char *search_name, int search_list) { seqover_ptr findings, findings_alias, findings_seq, last_finding, new_finding; seqover_ptr scan_seq, trail_seq, trash_seq; int index; findings = NULL; findings_alias = NULL; findings_seq = NULL; if ((search_list == ANY_LIST) || (search_list == ALIAS_LIST)) { last_finding = NULL; /* Loop through all sequences in the alignment and look for aliases */ for (index = 0; index < alignment_records -> dimension; index++) { if (((file_data -> sequences) [index]).alias != NULL) { if (!strcmp (((file_data -> sequences) [index]).alias, search_name)) { new_finding = (seqover_ptr) NTL0_ckalloc (sizeof (SeqOver_Struct)); new_finding -> source_list = ALIAS_LIST; new_finding -> seq_num = index + 1; new_finding -> overlap = UNDEFINED; new_finding -> seq_begin = (alignment_records -> begin) [index]; new_finding -> seq_end = (alignment_records -> end) [index]; new_finding -> next = NULL; if (last_finding == NULL) findings_alias = new_finding; else last_finding -> next = new_finding; last_finding = new_finding; } } } } if ((search_list == ANY_LIST) || (search_list == FILE_LIST)) { last_finding = NULL; /* Loop through all sequences in the alignment and look for sequence names */ for (index = 0; index < alignment_records -> dimension; index++) { if (((file_data -> sequences) [index]).seq_name != NULL) { if (!strcmp (((file_data -> sequences) [index]).seq_name, search_name)) { new_finding = (seqover_ptr) NTL0_ckalloc (sizeof (SeqOver_Struct)); new_finding -> source_list = FILE_LIST; new_finding -> seq_num = index + 1; new_finding -> overlap = UNDEFINED; new_finding -> seq_begin = (alignment_records -> begin) [index]; new_finding -> seq_end = (alignment_records -> end) [index]; new_finding -> next = NULL; if (last_finding == NULL) findings_seq = new_finding; else last_finding -> next = new_finding; last_finding = new_finding; } } } } /* Now traverse the assembled lists to remove duplicate occurences of the */ /* sequences with the same name and alias, favoring the alias findings */ last_finding = NULL; while (findings_alias != NULL) { scan_seq = findings_seq; trail_seq = NULL; while (scan_seq != NULL) { if (findings_alias -> seq_num == scan_seq -> seq_num) { trash_seq = scan_seq; if (trail_seq == NULL) { findings_seq = findings_seq -> next; scan_seq = findings_seq; } else { trail_seq -> next = scan_seq -> next; scan_seq = scan_seq -> next; } free (trash_seq); } else { trail_seq = scan_seq; scan_seq = scan_seq -> next; } } if (last_finding == NULL) { findings = findings_alias; last_finding = findings; } else { last_finding -> next = findings_alias; last_finding = last_finding -> next; } findings_alias = findings_alias -> next; last_finding -> next = NULL; } while (findings_seq != NULL) { if (last_finding == NULL) { findings = findings_seq; last_finding = findings; } else { last_finding -> next = findings_seq; last_finding = last_finding -> next; } findings_seq = findings_seq -> next; last_finding -> next = NULL; } return findings; } /*****************************************************************************/ /* */ /* Procedure: ALGN_Get_Sequence_In_Range */ /* */ seqover_ptr ALGN_Get_Sequence_In_Range (char *seq, long int from, long int to, int search_list) { seqover_ptr findings, range_list, last_range, trash_finding; if (search_list == ANY_LIST) { if ((findings = ALGN_Find_Sequence (seq, ANY_LIST)) == NULL) return NULL; } else if (search_list == ALIAS_LIST) { if ((findings = ALGN_Find_Sequence (seq, ALIAS_LIST)) == NULL) return NULL; } else { /* Must be list of sequence file names */ if ((findings = ALGN_Find_Sequence (seq, FILE_LIST)) == NULL) return NULL; } range_list = NULL; last_range = NULL; while (findings != NULL) { if ((alignment_records -> segment_code) [(findings -> seq_num) - 1] == VALID_SEGMENT) { if (NTL0_overlaps (findings -> seq_begin, findings -> seq_end, from, to)) { if (findings -> seq_begin > from) { if (findings -> seq_end < to) findings -> overlap = TRUNC_BOTH_SIDES; else findings -> overlap = START_TRUNCATED; } else if (findings -> seq_end < to) findings -> overlap = END_TRUNCATED; else findings -> overlap = FULL_INCLUSION; if (last_range == NULL) range_list = findings; else last_range -> next = findings; last_range = findings; findings = findings -> next; last_range -> next = NULL; } else { trash_finding = findings; findings = findings -> next; free (trash_finding); } } else { trash_finding = findings; findings = findings -> next; free (trash_finding); } } return range_list; } /*****************************************************************************/ /* */ /* Procedure: ALGN_List_Names */ /* */ char **ALGN_List_Names (void) { char **names; int index; names = (char **) NTL0_ckalloc (((alignment_records -> dimension) + 1) * sizeof (char *)); for (index = 0; index < alignment_records -> dimension; index++) names [index] = NTL0_strsave (((file_data -> sequences) [index]).seq_name); names [alignment_records -> dimension] = NULL; return names; } /*****************************************************************************/ /* */ /* Procedure: ALGN_Get_Aliases */ /* */ strlist_ptr ALGN_Get_Aliases (char *sequence) { strlist_ptr results, last_result, new_alias; int index; results = NULL; last_result = NULL; for (index = 0; index < alignment_records -> dimension; index++) { if (!strcmp (((file_data -> sequences) [index]).seq_name, sequence)) { if (((file_data -> sequences) [index]).alias != NULL) { new_alias = (strlist_ptr) NTL0_ckalloc (sizeof (StrList_Struct)); new_alias -> string = NTL0_strsave (((file_data -> sequences) [index]).alias); new_alias -> next = NULL; if (last_result == NULL) results = new_alias; else last_result -> next = new_alias; last_result = new_alias; } } } return results; } /*****************************************************************************/ /* */ /* Procedure: ALGN_Get_Sequence */ /* */ char *ALGN_Get_Sequence (char *alias) { char *result; bool found; int index; result = NULL; found = FALSE; index = 0; while ((!found) && (index < alignment_records -> dimension)) { if (((file_data -> sequences) [index]).alias != NULL) { if (!strcmp (((file_data -> sequences) [index]).alias, alias)) { result = NTL0_strsave (((file_data -> sequences) [index]).seq_name); found = TRUE; } else index++; } else index++; } return result; } /*****************************************************************************/ /* */ /* Procedure: ALGN_Principal_Convert */ /* */ long int ALGN_Principal_Convert (long int absolute) { long int abs_scan, seq_scan; abs_scan = 0; seq_scan = (alignment_records -> begin) [principal_seqnum - 1]; while (abs_scan < absolute) { if (((alignment_records -> texts) [principal_seqnum - 1] [abs_scan] != GAP_SYMBOL) && ((alignment_records -> texts) [principal_seqnum - 1] [abs_scan] != ' ')) seq_scan++; abs_scan++; } return seq_scan; } /*****************************************************************************/ /* */ /* Procedure: UTIL_Is_Long */ /* */ bool UTIL_Is_Long (char *string, long int *result) { char *scan, *num_start, saver; int count; long int long_val; bool negative; scan = string; *result = UNDEFINED; while ((*scan == ' ') || (*scan == '\t') || (*scan == '\n')) scan++; if (*scan == '\0') return FALSE; else { if (*scan == '-') { negative = TRUE; scan++; while ((*scan == ' ') || (*scan == '\t') || (*scan == '\n')) scan++; } else negative = FALSE; if (*scan == '\0') return FALSE; else if (*scan == '0') return FALSE; else { count = 0; num_start = scan; while ((*scan >= '0') && (*scan <= '9')) { count++; scan++; } if ((count < 1) || (count > MAX_LONG_LEN)) return FALSE; else { if ((*scan != '\0') && (*scan != ' ') && (*scan != '\t') && (*scan != '\n')) return FALSE; else { saver = *scan; *scan = '\0'; long_val = atol (num_start); *scan = saver; while ((*scan == ' ') || (*scan == '\t') || (*scan == '\n')) scan++; if (*scan != '\0') return FALSE; else { if (negative) *result = -long_val; else *result = long_val; return TRUE; } } } } } } /*****************************************************************************/ /* */ /* Procedure: UTIL_Is_Integer */ /* */ bool UTIL_Is_Integer (char *string, int *result) { long int long_val; if (!UTIL_Is_Long (string, &long_val)) { *result = UNDEFINED; return FALSE; } else if ((long_val < MININT_VALUE) || (long_val > MAXINT_VALUE)) { *result = UNDEFINED; return FALSE; } else { *result = (int) long_val; return TRUE; } } /*****************************************************************************/ /* */ /* Procedure: UTIL_Copy_Strlist */ /* */ strlist_ptr UTIL_Copy_Strlist (strlist_ptr list) { strlist_ptr copy_list, last_item, scan_list, new_item; copy_list = NULL; last_item = NULL; for (scan_list = list; scan_list != NULL; scan_list = scan_list -> next) { new_item = (strlist_ptr) NTL0_ckalloc (sizeof (StrList_Struct)); new_item -> string = NTL0_strsave (scan_list -> string); new_item -> next = NULL; if (last_item == NULL) copy_list = new_item; else last_item -> next = new_item; last_item = new_item; } return copy_list; } /*****************************************************************************/ /* */ /* Procedure: UTIL_Release_StrList */ /* */ strlist_ptr UTIL_Release_StrList (strlist_ptr list) { strlist_ptr scan, trash; scan = list; while (scan != NULL) { trash = scan; scan = scan -> next; free (trash -> string); free (trash); } return NULL; }