static char const rcsid [] = "$Id: ptol.c,v 1.1 1998/04/21 10:46:04 stojanov Exp $"; /*****************************************************************************/ /* */ /* Program: ptol (compression of plain format landmark files) */ /* */ /* Author: Nikola Stojanovic */ /* */ /* Revision: 15 APR 97 Version 1.0 */ /* */ /* */ /* Given a set of files containing motif match sites in the plain form */ /* landmarks format, the program sorts the data and outputs the compressed */ /* information, with overlapping areas merged */ /* */ /*****************************************************************************/ #include #include #include #include "ntl.h" /*****************************************************************************/ /* */ /* Definitions section */ /* */ /*****************************************************************************/ /*****************************************************************************/ /* Definitions of the constants of the program unit */ /*****************************************************************************/ #define DEFAULT_DIRECTORY "." /*****************************************************************************/ /* Prototypes of locally used functions of the program unit */ /*****************************************************************************/ int ptol_collect_data (strlist_ptr directories, strlist_ptr files, plist_ptr *records); int ptol_process_records (plist_ptr records, plist_ptr *assembled); int ptol_output_data (strlist_ptr sources, plist_ptr records); /*****************************************************************************/ /* */ /* Code section */ /* */ /*****************************************************************************/ /*****************************************************************************/ /* */ /* Procedure: main */ /* */ /* "main" procedure of the program. Receives and analyses the command line */ /* parameters, sets the control variables of the program, checks their */ /* consistency and passes control to internal procedures which actually */ /* process the input data and output results; returns 0 if everything is */ /* OK, non-zero status in case of any errors */ int main (int argc, char **argv) { strlist_ptr input_files, data_directory, new_string, string_scan; int arg_count; plist_ptr records, sorted; input_files = NULL; data_directory = (strlist_ptr) NTL0_ckalloc (sizeof (StrList_Struct)); data_directory -> string = NTL0_strsave (DEFAULT_DIRECTORY); data_directory -> next = NULL; if (argc < 2) { /* Display instructions and terminate */ fprintf (stderr, "usage: %s []+\n", argv [0]); fprintf (stderr, " [-I ]*\n"); exit (1); } else { /* Some parameters provided - process them */ /* Proceed to extract the command line parameters and create the settings */ arg_count = 1; while (arg_count < argc) { if (argv [arg_count] [0] != '-') { /* Not an "-" option - file name */ new_string = (strlist_ptr) NTL0_ckalloc (sizeof (StrList_Struct)); new_string -> string = NTL0_strsave (argv [arg_count]); new_string -> next = input_files; input_files = new_string; arg_count++; } else if (!strcmp (argv [arg_count], "-I")) { /* New "include" path */ arg_count++; if (arg_count == argc) { fprintf (stderr, "Missing directory path for inclusion.\n"); exit (1); } else { new_string = (strlist_ptr) NTL0_ckalloc (sizeof (StrList_Struct)); new_string -> string = NTL0_strsave (argv [arg_count]); new_string -> next = NULL; if (data_directory == NULL) data_directory = new_string; else { string_scan = data_directory; while (string_scan -> next != NULL) string_scan = string_scan -> next; string_scan -> next = new_string; } arg_count++; } } else { /* Unknown command-line option */ fprintf (stderr, "Illegal option (%s).\n", argv [arg_count]); fprintf (stderr, "usage: %s []+\n", argv [0]); fprintf (stderr, " [-I ]*\n"); exit (1); } } /* Now check whether all necessary parameters have been provided */ if (input_files == NULL) { fprintf (stderr, "Must have at least one file to process.\n"); exit (1); } /* If everything is OK so far, proceed to collect data from the files */ if (ptol_collect_data (data_directory, input_files, &records) < 0) exit (1); else if (ptol_process_records (records, &sorted) < 0) exit (1); else if (ptol_output_data (input_files, sorted) < 0) exit (1); exit (0); } } /*****************************************************************************/ /* */ /* Procedure: ptol_collect_data */ /* */ /* Procedure loads the sequences from the specified plain format files and */ /* assembles them in a list of records in the output format; returns 0 */ /* if everything is OK, negative number otherwise */ int ptol_collect_data (strlist_ptr directories, strlist_ptr files, plist_ptr *records) { errind report; char *new_path; strlist_ptr current_path, file_scan; plain_ptr sequences; plist_ptr new_record; *records = NULL; for (file_scan = files; file_scan != NULL; file_scan = file_scan -> next) { if ((directories == NULL) || ((file_scan -> string) [0] == '/') || ((file_scan -> string) [0] == '~')) { if ((report = NTL2_Load_Plain (file_scan -> string, &sequences)) != NULL) { fprintf (stderr, "Can't open sequences file '%s' (%s).\n", file_scan -> string, report -> message); return -1; } else if (sequences == NULL) { /* File found, but nothing retrieved */ fprintf (stderr, "WARNING: Empty file '%s'.\n", file_scan -> string); } } else { /* Directory path(s) provided, try them in order */ current_path = directories; sequences = NULL; while ((sequences == NULL) && (current_path != NULL)) { new_path = NTL0_ckalloc ((strlen (current_path -> string) + 2 + strlen (file_scan -> string)) * sizeof (char)); strcpy (new_path, current_path -> string); if ((file_scan -> string) [0] != '/') strcat (new_path, "/"); strcat (new_path, file_scan -> string); if ((report = NTL2_Load_Plain (new_path, &sequences)) != NULL) { sequences = NULL; current_path = current_path -> next; } free (new_path); } if (report != NULL) { /* File not found on any path */ fprintf (stderr, "Can't open sequences file '%s' (%s).\n", file_scan -> string, report -> message); return -2; } else if (sequences == NULL) { /* File found, but nothing retrieved */ fprintf (stderr, "WARNING: Empty file '%s'.\n", file_scan -> string); } } new_record = (plist_ptr) NTL0_ckalloc (sizeof (Plist_Struct)); new_record -> lines = sequences; new_record -> from = 0; new_record -> to = 0; new_record -> start_count = 0; new_record -> stop_count = 0; new_record -> consensus = NULL; new_record -> next = *records; *records = new_record; } return 0; } /*****************************************************************************/ /* */ /* Procedure: ptol_process_records */ /* */ /* Procedure rearranges (sorts, and then merges overlaps) the received */ /* lists of records and returns the rearanged list for output; returns a */ /* negative number if any errors were encountered in the process, 0 */ /* otherwise */ int ptol_process_records (plist_ptr records, plist_ptr *assembled) { errind erret; plain_ptr sorted; /* Sort and flatten all line records contained in the input list first */ if ((erret = NTL1_Sort_Plain (&records, &sorted)) != NULL) { fprintf (stderr, "Can't merge the lists (%s).\n", erret -> message); return -11; } /* Merge the overlapping data into blocks of their own - separate blocks */ if ((erret = NTL1_Compress_Plain (&sorted, assembled)) != NULL) { fprintf (stderr, "Can't compress overlaps (%s).\n", erret -> message); return -12; } return 0; } /*****************************************************************************/ /* */ /* Procedure: ptol_output_data */ /* */ int ptol_output_data (strlist_ptr sources, plist_ptr records) { int line_count; strlist_ptr source_scan; plist_ptr data_scan; line_count = 0; printf ("#:plain:\n\n"); printf ("# Merged and compressed data from files: "); for (source_scan = sources; source_scan != NULL; source_scan = source_scan -> next) { printf ("%s", source_scan -> string); if (source_scan -> next != NULL) printf (", "); else printf ("\n"); } printf ("\n\n"); for (data_scan = records; data_scan != NULL; data_scan = data_scan -> next) { line_count++; if (data_scan -> start_count == data_scan -> stop_count) { if (data_scan -> lines == NULL) { fprintf (stderr, "Missing records for compressed data - internal error.\n"); return -21; } else if ((data_scan -> lines) -> next != NULL) { fprintf (stderr, "Incorrect recording of compressed records - internal error.\n"); return -22; } else { printf ("%ld %ld ", (data_scan -> lines) -> start, (data_scan -> lines) -> stop); if ((data_scan -> lines) -> text == NULL) printf ("null "); else printf ("%s ", (data_scan -> lines) -> text); if ((data_scan -> lines) -> matched == NULL) printf ("null "); else printf ("%s ", (data_scan -> lines) -> matched); if ((data_scan -> lines) -> site == NULL) printf ("null "); else printf ("\"%s\" ", (data_scan -> lines) -> site); if ((data_scan -> lines) -> binding == NULL) printf ("null "); else printf ("\"%s\" ", (data_scan -> lines) -> binding); printf ("%c ", (data_scan -> lines) -> strand); if ((data_scan -> lines) -> database == NULL) printf ("null "); else printf ("%s ", (data_scan -> lines) -> database); if ((data_scan -> lines) -> file == NULL) printf ("null "); else printf ("%s ", (data_scan -> lines) -> file); if ((data_scan -> lines) -> reference == NULL) printf ("null "); else printf ("%s ", (data_scan -> lines) -> reference); if ((data_scan -> lines) -> c_range == NULL) printf ("%d\n", data_scan -> start_count); else printf ("%s\n", (data_scan -> lines) -> c_range); } } else if (data_scan -> lines == NULL) { fprintf (stderr, "Missing records for compressed data - internal error.\n"); return -23; } else if ((data_scan -> lines) -> next == NULL) { fprintf (stderr, "Incorrect count of compressed records - internal error.\n"); return -24; } else { printf ("%ld %ld ", data_scan -> from, data_scan -> to); if (data_scan -> consensus == NULL) printf ("null "); else printf ("%s ", data_scan -> consensus); printf ("null null null %c null null null %d-%d\n", NO_STRAND_CODE, data_scan -> start_count, data_scan -> stop_count); } } printf ("\n# Total number of records: %d\n", line_count); return 0; }