/*****************************************************************************/ /* */ /* UNIT: NTL3_Matches (Level 3 library routine) */ /* */ /* Author: Nikola Stojanovic */ /* */ /* Revision: 08 SEP 94 Version 1.0 */ /* */ /* Function: */ /* */ /* Procedure checks whether the given pattern (including all predefined */ /* "ambiguity" codes, loaded and stored only once during the execution of */ /* the program, by this level3 routine) matches the given string (within */ /* which ambiguity codes are permitted, as well); match direction (direct, */ /* inverse complement or both) is as requested in the input, as well as the */ /* match precision; calculates the position at which the first match has */ /* been found, if any, 0 if there was no match between the sequence and the */ /* pattern; returns the "standard" error structure in case of error(s), NULL */ /* if everything is OK */ /* */ /*****************************************************************************/ #include #include #include #include "ntl3.h" /*****************************************************************************/ /* */ /* Definitions section */ /* */ /*****************************************************************************/ /*****************************************************************************/ /* Prototypes of all locally used functions of this unit */ /*****************************************************************************/ errind NTL3_MC_Assemble_Error (int severity, int code, char *comment, int description); /*****************************************************************************/ /* Definitions of global (static) variables of the unit */ /*****************************************************************************/ static int codes_loaded = 0; /* Before first invocation, no codes are loaded */ char **code_table; /* Array of character codes (unique and ambiguity) */ char **inverse_table; /* Array of character codes for inverse of given code */ long int Bit_Codes [26]; /* Integer codes for character ambiguity codes */ long int Bit_Inverses [26]; /* Integer codes for inverses of character codes */ /*****************************************************************************/ /* */ /* Code section */ /* */ /*****************************************************************************/ /*****************************************************************************/ /* */ /* PROCEDURE: NTL3_Matches */ /* */ /* Execution of the unit code; returns 1 if there is a match, 0 otherwise */ errind NTL3_Matches (char *sequence, char *pattern, int direction, float precision, int *match) { int pattern_size, string_size, index, matcher; long int *coded, *inverted; float score, inverse_score; /* It is mandatory for this procedure to receive both the sequence and the */ /* pattern as proper strings (no NULLs allowed) */ if ((sequence == NULL) || (pattern == NULL)) { return NTL3_MC_Assemble_Error (USER_ERROR, ERR_BAD_REFERENCE, "NULL string for matching", 1); } /* Check if the pattern is longer than the sequence - if it is, then it is */ /* immediate "no match" */ pattern_size = strlen (pattern); string_size = strlen (sequence); if (pattern_size > string_size) { /* Match not possible */ *match = 0; return NULL; } /* Convert the sequence and the pattern into uppercase - check if only */ /* character codes are involved in any of them */ NTL0_uppercase (sequence); NTL0_uppercase (pattern); index = 0; while (sequence [index] != '\0') { if ((sequence [index] < 'A') || (sequence [index] > 'Z')) return NTL3_MC_Assemble_Error (USER_ERROR, ERR_BAD_STRUCTURE, "Illegal sequence in input", 2); index++; } for (index = 0; index < pattern_size; index++) { if ((sequence [index] < 'A') || (sequence [index] > 'Z')) return NTL3_MC_Assemble_Error (USER_ERROR, ERR_BAD_STRUCTURE, "Illegal pattern to match", 3); } /* Check if this is the first invocation of this routine: load the */ /* predefined character ambiguity codes if it is */ if (!codes_loaded) { NTL1_Predefined_Codes (&code_table, &inverse_table); /* Determine the integer codes for ambiguity characters and their inverses */ NTL1_Integer_Codes (code_table, Bit_Codes); NTL1_Integer_Codes (inverse_table, Bit_Inverses); } /* Convert the pattern into a sequence of integer codes, for faster match */ coded = (long int *) NTL0_ckalloc (pattern_size * sizeof (long int)); inverted = (long int *) NTL0_ckalloc (pattern_size * sizeof (long int)); for (index = 0; index < pattern_size; index++) { coded [index] = Bit_Codes [(int) (pattern [index]) - (int) 'A']; inverted [pattern_size - index - 1] = Bit_Inverses [(int) (pattern [index]) - (int) 'A']; } /* Proceed to search for a position that has a match, until end-of-string */ index = 0; *match = 0; while ((!(*match)) && (index <= string_size - pattern_size)) { score = 0.0; inverse_score = 0.0; for (matcher = 0; matcher < pattern_size; matcher++) { if (coded [matcher] & Bit_Codes [(int) (sequence [index + matcher] - 'A')]) score += 1.0; if (inverted [matcher] & Bit_Codes [(int) (sequence [index + matcher] - 'A')]) inverse_score += 1.0; } score = score / ((float) pattern_size); inverse_score = inverse_score / ((float) pattern_size); if ((direction == LITERAL_MATCH) || (direction == BOTH_MATCH)) { if (score > precision - EPSILON) *match = index + 1; } if ((direction == INVERSE_MATCH) || (direction == BOTH_MATCH)) { if (inverse_score > precision - EPSILON) *match = index + 1; } index++; } return NULL; } /*****************************************************************************/ /* */ /* Procedure: NTL3_MC_Assemble_Error */ /* */ /* Service procedure for assembling and returnning an error report, based on */ /* the values of the input parameters; returns the record with the report */ errind NTL3_MC_Assemble_Error (int severity, int code, char *comment, int description) { char *report; errind assembled; report = (char *) NTL0_ckalloc ( (strlen (comment) + strlen ("_Matches: ") + 1) * sizeof (char)); sprintf (report, "_Matches: %s", comment); assembled = NTL1_Error_Record (severity, code, report, description); free (report); return assembled; }