/*****************************************************************************/ /* */ /* UNIT: NTL2_Load_TFDfile (Level 2 library routine) */ /* */ /* Author: Nikola Stojanovic */ /* */ /* Revision: 21 JUL 94 Version 1.0 */ /* */ /* Function: */ /* */ /* Procedure loads the specified Transcription Factor Database file, in */ /* the "plain text" format, into internal structures more convenient for */ /* processing the information. */ /* Procedure receives the name of the file containing TFD data, fills the */ /* pointer (reference parameter) to the begining of the assembled list of */ /* motifs; returns the error structure, NULL if everything was OK */ /* */ /* */ /* Expected format of the Transcription Factor Database file: */ /* */ /* */ /*****************************************************************************/ #include #include #include #include "ntl2.h" /*****************************************************************************/ /* */ /* Definitions section */ /* */ /*****************************************************************************/ /*****************************************************************************/ /* Definitions of local constants of the unit */ /*****************************************************************************/ /* Limit for the length of an error message that can be assembled */ #define ERR_MSGLIMIT 128 /* Information is read from database files by supplementary buffer of size: */ #define IN_LIMIT 100 /* Delimiting positions for various blocks of information within a line */ #define BIND_FACTOR_START 1 #define BIND_FACTOR_STOP 15 #define SITE_NAME_START 17 #define SITE_NAME_STOP 36 #define SEQUENCE_START 38 #define SEQUENCE_STOP 67 #define SEQ_LEN_START 69 #define SEQ_LEN_STOP 70 #define REFERENCE_START 72 #define REFERENCE_STOP 77 /*****************************************************************************/ /* Prototypes of all locally used functions of this unit */ /*****************************************************************************/ errind NTL2_LT_Read_TFDfile (char *file_name, FILE **tfd_file, tfd_ptr *motifs); errind NTL2_LT_Assemble_Error (int severity, int code, char *comment, int description); /*****************************************************************************/ /* Definitions of global (static) variables of the unit */ /*****************************************************************************/ static char In_Buff [IN_LIMIT]; /* Global input line buffer, for convenience */ static char Error_Message [ERR_MSGLIMIT]; /* Temporary buffer, error passing */ /*****************************************************************************/ /* */ /* Code section */ /* */ /*****************************************************************************/ /*****************************************************************************/ /* */ /* Procedure: NTL2_Load_TFDfile */ /* */ /* Main (interface) procedure of this unit */ errind NTL2_Load_TFDfile (char *file_name, tfd_ptr *motifs) { FILE *tfd_file; errind erret; /* Initialize the list of loaded motifs to return to "empty" before actions */ *motifs = NULL; /* Check whether the file with given file name exists and open it for read */ if ((tfd_file = fopen (file_name, "r")) == NULL) { /* Error condition */ sprintf (Error_Message, "File <%s> does not exist", file_name); return NTL2_LT_Assemble_Error (USER_ERROR, ERR_NO_FILE, Error_Message, 0); } else { /* There is a file with the specified name */ /* Now load the motifs from the file and close the file when done */ erret = NTL2_LT_Read_TFDfile (file_name, &tfd_file, motifs); /* Read motif */ if ((erret != NULL) && (erret -> kind != WARNING)) { /* Error status ret. */ /* Release the assembled part of the motifs list - it won't be needed */ *motifs = NTL1_Destroy_TFDlist (*motifs); } fclose (tfd_file); return erret; } } /*****************************************************************************/ /* */ /* Procedure: NTL2_LT_Read_TFDfile */ /* */ /* Top level procedure for reading the Transcription Factor Database file */ /* contents; loops for each motif (line) defined in the file; receives the */ /* file pointer and (already initialized) list of motifs to be filled, */ /* returns the assembled error record, NULL if there were no errors in */ /* loading */ errind NTL2_LT_Read_TFDfile (char *file_name, FILE **tfd_file, tfd_ptr *motifs) { tfd_ptr new_tfd; int space_count; char save_char; while ((fgets (In_Buff, IN_LIMIT, *tfd_file) != NULL) && (In_Buff [0] != '\0')) { if (strlen (In_Buff) < REFERENCE_STOP) return NTL2_LT_Assemble_Error (USER_ERROR, ERR_FILE_FORMAT, "Illegal line length in TFD file", 0); /* Allocate a record to store a newly collected motif and assemble struct. */ new_tfd = (tfd_ptr) NTL0_ckalloc (sizeof (TFD_Struct)); new_tfd -> file_name = NTL0_strsave (file_name); space_count = BIND_FACTOR_STOP - 1; while (((In_Buff [space_count] == ' ') || (In_Buff [space_count] == '\t')) && (space_count >= BIND_FACTOR_START)) space_count--; if ((In_Buff [space_count] != ' ') && (In_Buff [space_count] != '\t')) space_count++; save_char = In_Buff [space_count]; In_Buff [space_count] = '\0'; new_tfd -> bind_factor = NTL0_strsave (&(In_Buff [BIND_FACTOR_START - 1])); In_Buff [space_count] = save_char; space_count = SITE_NAME_STOP - 1; while (((In_Buff [space_count] == ' ') || (In_Buff [space_count] == '\t')) && (space_count >= SITE_NAME_START)) space_count--; if ((In_Buff [space_count] != ' ') && (In_Buff [space_count] != '\t')) space_count++; save_char = In_Buff [space_count]; In_Buff [space_count] = '\0'; new_tfd -> site_name = NTL0_strsave (&(In_Buff [SITE_NAME_START - 1])); In_Buff [space_count] = save_char; space_count = SEQUENCE_STOP - 1; while (((In_Buff [space_count] == ' ') || (In_Buff [space_count] == '\t')) && (space_count >= SEQUENCE_START)) space_count--; if ((In_Buff [space_count] != ' ') && (In_Buff [space_count] != '\t')) space_count++; save_char = In_Buff [space_count]; In_Buff [space_count] = '\0'; new_tfd -> sequence = NTL0_strsave (&(In_Buff [SEQUENCE_START - 1])); In_Buff [space_count] = save_char; NTL0_uppercase (new_tfd -> sequence); space_count = SEQ_LEN_STOP - 1; while (((In_Buff [space_count] == ' ') || (In_Buff [space_count] == '\t')) && (space_count >= SEQ_LEN_START)) space_count--; if ((In_Buff [space_count] != ' ') && (In_Buff [space_count] != '\t')) space_count++; save_char = In_Buff [space_count]; In_Buff [space_count] = '\0'; new_tfd -> seq_length = atoi (&(In_Buff [SEQ_LEN_START - 1])); In_Buff [space_count] = save_char; space_count = REFERENCE_STOP - 1; while (((In_Buff [space_count] == ' ') || (In_Buff [space_count] == '\t')) && (space_count >= REFERENCE_START)) space_count--; if ((In_Buff [space_count] != ' ') && (In_Buff [space_count] != '\t')) space_count++; save_char = In_Buff [space_count]; In_Buff [space_count] = '\0'; new_tfd -> reference = NTL0_strsave (&(In_Buff [REFERENCE_START - 1])); In_Buff [space_count] = save_char; space_count = REFERENCE_STOP; while ((In_Buff [space_count] == ' ') || (In_Buff [space_count] == '\t')) space_count++; if (In_Buff [space_count] == '\n') { if (In_Buff [space_count + 1] != '\0') return NTL2_LT_Assemble_Error (FATAL_ERROR, ERR_CODE_PROBLEM, "Illegal input string layout", 0); } else if (In_Buff [space_count] != '\0') return NTL2_LT_Assemble_Error (FATAL_ERROR, ERR_CODE_PROBLEM, "Illegal input string layout", 0); /* Now connect the newly defined record into the list of known motifs */ new_tfd -> next = *motifs; *motifs = new_tfd; } return NULL; } /*****************************************************************************/ /* */ /* Procedure: NTL2_LT_Assemble_Error */ /* */ /* Service procedure for assembling and returnning an error report, based on */ /* the values of the input parameters; returns the record with the report */ errind NTL2_LT_Assemble_Error (int severity, int code, char *comment, int description) { char *report; errind assembled; report = (char *) NTL0_ckalloc ( (strlen (comment) + strlen ("_Load_TFDfile: ") + 1) * sizeof (char)); sprintf (report, "_Load_TFDfile: %s", comment); assembled = NTL1_Error_Record (severity, code, report, description); free (report); return assembled; }