/* static char const rcsid [] = "$Id: ntl1.h,v 1.10 1998/10/07 08:57:48 cathy Exp $"; */ /*****************************************************************************/ /* */ /* UNIT: ntl1 (Level 1 library include file) */ /* */ /* Author: Nikola Stojanovic */ /* */ /* Revision: 25 JUL 94 Version 1.0 */ /* 14 MAR 97 Version 1.1 */ /* */ /*****************************************************************************/ #define NTL1 1 #ifndef NTL0 #include "ntl0.h" #endif /*****************************************************************************/ /* Definitions of constants used or defined by level 1 utility functions */ /*****************************************************************************/ #define TRUE 1 #define FALSE 0 #define EPSILON 0.00001 #define SYSTEM_ERROR 0 #define FATAL_ERROR 1 #define USER_ERROR 2 #define WARNING 3 #define FNAME_SIZE 128 /* Codes for various formats in which alignment file can be provided */ #define NO_FORMAT 0 #define FORMAT_LAV 1 #define FORMAT_LAT 2 /* Values for represented portions of alignment lines in line cells (load) */ #define NULL_STRETCH 0 #define GAP_STRETCH 1 #define TEXT_STRETCH 2 #define GAP_SYMBOL '-' #define FULL_ALIGNMENT 0 #define SELECTED_RANGE 1 #define VALID_SEGMENT 0 #define ALREADY_ENDED_SEGMENT 1 #define NOTYET_STARTED_SEGMENT 2 /* Values for different label positions in the alignment block */ #define LEFT_POSITION -1 #define CENTERED_POSITION 0 #define RIGHT_POSITION 1 #define DEFAULT_POSITION 0 #define UP_LABEL 1 #define DOWN_LABEL 2 #define DEFAULT_LABEL 1 #define LEFT_TRUNC_CODE -1 #define RIGHT_TRUNC_CODE 1 #define BOTH_TRUNC_CODE 2 #define TRUNCATED_CORRECTION 1000000 /* Kinds of matches a program can search for when examining motif/alignment */ #define LITERAL_MATCH 0 #define INVERSE_MATCH 1 #define BOTH_MATCH 2 /* Kinds of matches made when seraching for motifs in sequences/alignments */ #define MISMATCH_CODE '#' #define LITERAL_CODE '+' #define INVERSE_CODE '-' #define BOTH_STRAND_CODE '*' #define TRUNCATED_LITERAL_CODE '>' #define TRUNCATED_INVERSE_CODE '<' #define NO_STRAND_CODE '?' /* Codes for various kinds of annotations within the human beta_g_region seq.*/ #define HS_CODE 0 #define GENE_CODE 1 #define LALU_CODE 2 #define RALU_CODE 3 #define LL1_CODE 4 #define RL1_CODE 5 #define PROMOTE_GAP 1 #define ORDINARY_GAP 2 #define PROHIBIT_GAP 3 /* Codes for various errors of which type library routines are aware of */ #define ERR_CODE_PROBLEM -1 #define ERR_NO_FILE -1 #define ERR_FILE_FORMAT -2 #define ERR_BUFFER_OVERFLOW -3 #define ERR_ILLEGAL_INVOCATION -4 #define ERR_BAD_STRUCTURE -5 #define ERR_ILLEGAL_CODE -6 #define ERR_BAD_REFERENCE -7 #define ERR_NO_VALUE -8 #define ERR_BAD_VALUE -9 #define ERR_UNKNOWN -10 /*****************************************************************************/ /* Type definitions for all types used by level 1 utility functions */ /*****************************************************************************/ #define bool int /* Error structure contains detailed information about any error returned as */ /* status from any library routine */ typedef struct error_fields { /* Error structure record */ int kind; /* Severity of the encountered error */ int code; /* Error code, unique per error kind and type */ char *message; /* String with the message describing the error */ int description; /* Suppl. code for identification of error occurence */ } Error_Struct; typedef Error_Struct *errind; /* ... and pointer to it, for convenience */ typedef struct strlist_fields { char *string; struct strlist_fields *next; } StrList_Struct; typedef StrList_Struct *strlist_ptr; /* Origin structure contains data about one origin site - all known sites */ /* are kept in a linked list of such cells */ typedef struct orgref_fields { /* Record corresponding to one origin site */ char *in_species; /* Name of the sequence to which the origin pertains */ char *species; char *site_name; /* Name of the origin site */ long int location; /* Location of the origin with respect to seq. beginning */ struct orgref_fields *next; /* Pointer to the next origin record */ } OrgRef_Struct; typedef OrgRef_Struct *orgref_ptr; /* ... and pointer to it, for convenience */ /* Enzyme structure contains data about one enzyme - all are kept in a list */ typedef struct enzyme_fields { /* Record corresponding to one enzyme */ char *name; /* Name of the enzyme */ char *sequence; /* Sequence at which it cuts DNA */ int cut_site; /* Position in the sequence where the cut is made */ int inverse_cut; char *note; struct enzyme_fields *next; /* Pointer to the next enzyme record */ } Enzyme_Struct; typedef Enzyme_Struct *enzyme_ptr; /* ... and pointer to it, for convenience */ /* Blocks form a linked list of text buffers, containing information */ /* extracted from sections of alignment files, as raw text */ typedef struct block_fields { /* Cell for block representation in a list */ char *contents; /* Contents of a single block */ struct block_fields *next; /* Pointer to the next block of the same type */ } Block_Struct; typedef Block_Struct *block_ptr; /* ... and pointer to it, for convenience */ /* Sequence cell contain data about sequences in the alignment - names of */ /* sequence files, begin and end vectors; sequences are asembled in array */ typedef struct seq_fields { /* Sequence data extracted from file block */ char *seq_name; /* Name of the sequence in the alignment */ char *alias; /* Alias for the sequence name, if any */ long int begin; /* Position from which the observed part begins */ long int end; /* Position where the observed part ends */ } Seq_Struct; typedef Seq_Struct *seq_ptr; /* ... and pointer to it, for convenience */ /* All information extracted from alignment files is kept together via */ /* header record - some data are "unpacked", some "packed" for efficiency */ typedef struct header_fields { /* Cell that corresponds to the whole file */ char tag [FNAME_SIZE]; /* Stored file name */ int format; /* Format of the alignment file loaded */ int dimension; /* How many sequences are aligned in this file? */ char *description; /* Text of the description block of the file */ seq_ptr sequences; /* Vector with information about all contained sequences */ block_ptr generated; /* Pointer to list of all generated blocks */ block_ptr keywords; /* Pointer to list of all keywords blocks */ block_ptr alignments; /* Pointer to list of all alignments lines */ int numalign; /* Number of alignments in the file */ } Header_Struct; typedef Header_Struct *header_ptr; /* ... and pointer to it, for convenience */ /* Unpacked data about alignments that were once specified while working */ /* with this file are stored in appropriate records, one per alignment */ /* Line cell contains information about a single line within the alignment */ typedef struct line_fields { /* Record to store lines of one alignment */ int code; /* Code for the entry (gap or sequence characters) */ long int length; /* Length of the area described in this cell */ struct line_fields *next; /* Pointer to the next cell in the list */ } Line_Struct; typedef Line_Struct *line_ptr; /* ... and pointer to it, for convenience */ /* Align cell contains data about one single alignment - partially unpacked */ typedef struct align_fields { /* Record of one alignment */ int number; /* Number of the alignment within the file */ int dimension; /* Dimension of the alignment (number of sequences) */ double score; /* Overall score of the alignment */ long int *begin; /* Vector of beginning positions for alignment lines */ long int *end; /* Vector of ending positions for alignment lines */ long int size; /* Unique length of all lines in alignment, when expanded */ line_ptr *lines; /* Lines of the alignment */ } Align_Struct; typedef Align_Struct *align_ptr; /* ... and pointer to it, for convenience */ typedef struct unpacked_fields { /* Record of alignment in "unpacked" form */ int number; int dimension; double score; long int *begin; long int *end; int cut; char **texts; long int size; int *segment_code; long int *starts; long int *stops; } Unpacked_Alignment; typedef Unpacked_Alignment *unpacked_ptr; /* Pointer to it, for convenience */ typedef struct restricted_fields { int dimension; long int size; long int *begin; long int *end; long int *starts; long int *stops; char **texts; } Restricted_Struct; typedef Restricted_Struct *restricted_ptr; /* Labels are allowed to be multi-font; each label is thus stored in a list */ /* of description records */ typedef struct label_fields { /* Record to store single-font label portion */ char *text; /* Text of this part of the label */ char *font_name; /* Name of the font for this text */ int font_size; /* Size of the font for this text */ struct label_fields *next; /* Rest of the label, next record */ } Label_Struct; typedef Label_Struct *label_ptr; /* ... and pointer to it, for convenience */ /* Known landmark sites in the alignment are kept in a list of records */ typedef struct landmark_fields { /* One landmark description */ char *reference; char *description; char *contributor; /* Name of the source of information for the landmark */ char *sequence; /* Name of sequence (species) in which landmark is defined */ char *origin; /* Origin name in respect to which the landmark is defined */ long int start; /* Starting offset for the landmark, with respect to origin */ long int stop; /* Ending offset for the landmark, with respect to origin */ label_ptr label; /* Label for this landmark */ int label_pos; /* Position for the label, above or below in alig. block */ int label_shift; /* Placement of the label - left, centered or right */ struct landmark_fields *next; /* Next record in the list */ } Landmark_Struct; typedef Landmark_Struct *landmark_ptr; /* ... pointer to it, for convenience */ typedef struct annotation_fields { int code; /* Kind of annotation: HS or gene */ long int start; long int stop; char *base_text; char symbol; struct annotation_fields *next; } Annotation_Struct; typedef Annotation_Struct *annotation_ptr; typedef struct conserved_fields { char *origin; long int start; long int stop; char *description; char *reference; char *corrections; char *contributor; char *consensus; struct conserved_fields *next; } Conserved_Struct; typedef Conserved_Struct *conserved_ptr; /* Known motifs are stored by the CONTrol module in the linked list of */ /* description records (it is not expected that there will be too many of */ /* them; one record has the form of the Motif_Struct structure below */ typedef struct motif_fields { /* Record with data about one motif */ char *name; /* String containing the motif name */ char *originating; /* Name of the file in which this motif is defined */ char *bind_factor; /* Binding factor associated with this motif */ char *site_name; /* Site name for this motif (nonunique "name") */ int length; /* Number of characters forming the motif */ char *text; /* String containing characters defining this motif */ long int *coded; /* Vector of integer codes for characters of this motif */ long int *inverted; /* Vector of integer codes for the inverse complement */ bool consensus; /* Indicator whether the motif should be searched for in c. */ /* Motifs can be matched with respect to consensus sequences and "local" */ /* precision, or they can be specified with both "global" and "local" */ double global; /* Global precision with which this motif is to be matched */ double local; /* Local precision with which this motif is to be matched */ int outline; /* Kind of mark to be associated with matched motif */ char *style; /* Outline style to label the matching place */ int label_pos; /* Kind of labeling (positioning) for motif matching place */ int label_place; /* Label above or below the labeled block */ label_ptr label; /* Label to be placed at the place of motif match */ int ukind; /* Kind of underline (single or double) for motif matching site */ char *at_seq; /* Indicator of the sequence to be underlined, if underline */ char *reference; /* Reference describing (or defining) this motif */ struct motif_fields *next; /* Next motif in the list */ } Motif_Struct; typedef Motif_Struct *motif_ptr; /* ... and pointer to it, for convenience */ /* List of integer values for exact coresponding positions */ typedef struct int_fields { /* One record in the linked list */ long int value; struct int_fields *next; } Int_Struct; typedef Int_Struct *int_ptr; /* Pair of integer values for ends of interval, with truncation indication */ typedef struct two_int_fields { /* One record in the linked list of such */ long int value_1; long int value_2; int trunc; struct two_int_fields *next; } TwoInt_Struct; typedef TwoInt_Struct *twoint_ptr; /* List of integer corresponding values, with approximation */ typedef struct app_int_fields { /* One record in the linked list */ long int value; long int correction; struct app_int_fields *next; } AppInt_Struct; typedef AppInt_Struct *appint_ptr; /* Pair of integer values for corresponding positions, with approximation */ typedef struct four_int_fields { /* One record in the linked list of such */ long int value_1; long int correction_1; long int value_2; long int correction_2; struct four_int_fields *next; } FourInt_Struct; typedef FourInt_Struct *fourint_ptr; typedef struct tfd_fields { /* Record from Transcription Factor Database */ char *file_name; /* File from which the record has been loaded */ char *bind_factor; char *site_name; char *sequence; int seq_length; char *reference; struct tfd_fields *next; } TFD_Struct; typedef TFD_Struct *tfd_ptr; /* ... and pointer to it, for convenience */ typedef struct win_fields { /* Record from TRANSFAC (Wingender) database */ char *file_name; /* File from which the record has been loaded */ char *bind_factor; char *site_name; char *sequence; int seq_length; char *reference; struct win_fields *next; } Win_Struct; typedef Win_Struct *win_ptr; /* ... and pointer to it, for convenience */ typedef struct imd_fields { /* Record from Information Matrix Database */ char *file_name; char *site_name; char *bind_factor; int seq_length; char *sequence; double **matrix; double cutoff; double max_score; char *reference; struct imd_fields *next; } IMD_Struct; typedef IMD_Struct *imd_ptr; /* ... and pointer to it, for convenience */ typedef struct plain_fields { /* Record from a plain landmark record file */ long int start; long int stop; char *text; char *matched; char *site; char *binding; char strand; char *database; char *file; char *reference; char *c_range; strlist_ptr full_ref; struct plain_fields *next; } Plain_Struct; typedef Plain_Struct *plain_ptr; /* ... and pointer to it, for convenience */ typedef struct plist_fields { /* Header record for a list of "plain" records */ plain_ptr lines; long int from; long int to; int start_count; int stop_count; char *consensus; struct plist_fields *next; } Plist_Struct; typedef Plist_Struct *plist_ptr; /* ... and pointer to it, for convenience */ /*****************************************************************************/ /* Prototypes of all routines contained in the level1 library */ /*****************************************************************************/ errind NTL1_Load_Alignment (int al_num, header_ptr file_data, align_ptr *alignment); errind NTL1_Cut_Alignment (header_ptr file_info, unpacked_ptr alignment, char *range_sequence, long int from, long int to); errind NTL1_Expand_Motifs (tfd_ptr *motif_list, char **codes, char **complements, motif_ptr *expanded); errind NTL1_Abs_Corres_1 (char *reference, long int offset, orgref_ptr origins, header_ptr file_data, align_ptr alignment, int_ptr *result); errind NTL1_Abs_Corres_2 (char *reference, long int offset_1, long int offset_2, orgref_ptr origins, header_ptr file_data, align_ptr alignment, twoint_ptr *result); errind NTL1_Abs_To_Human (twoint_ptr absolutes, header_ptr file_data, align_ptr alignment, fourint_ptr *offsets); errind NTL1_Corresponds (char *reference, long int offset, char *new_origin, orgref_ptr origins, header_ptr file_data, align_ptr alignment, appint_ptr *result); errind NTL1_Abs_Spec (long int position, char *origin, orgref_ptr origins, header_ptr file_data, align_ptr alignment, appint_ptr *result); void NTL1_Predefined_Codes (char ***codes, char ***inverses); void NTL1_Integer_Codes (char **char_codes, long int Bit_Codes []); char NTL1_Consensus_Column (char *column, int length, int gaps_treatment); char *NTL1_Consensus_Sequence (restricted_ptr alignment, int gaps_treatment); header_ptr NTL1_Destroy_Header (header_ptr header); align_ptr NTL1_Destroy_AlStruct (align_ptr alignment); block_ptr NTL1_Flip_Blocks (block_ptr list); block_ptr NTL1_Destroy_Block (block_ptr target); block_ptr NTL1_Destroy_Blocks (block_ptr list); line_ptr NTL1_Flip_Lines (line_ptr list); errind NTL1_Assemble_Label (char *label_string, label_ptr *result); label_ptr NTL1_Flip_Label (label_ptr list); label_ptr NTL1_Copy_Label (label_ptr original); int NTL1_Label_Length (label_ptr label); label_ptr NTL1_Destroy_Label (label_ptr label); motif_ptr NTL1_Destroy_Motif (motif_ptr motif); tfd_ptr NTL1_Destroy_TFDlist (tfd_ptr motif_list); win_ptr NTL1_Destroy_WinList (win_ptr win_list); imd_ptr NTL1_Destroy_IMDlist (imd_ptr motif_list); plain_ptr NTL1_Destroy_Plain_List (plain_ptr plain_list); errind NTL1_Sort_Plain (plist_ptr *lists, plain_ptr *sorted); errind NTL1_Compress_Plain (plain_ptr *list, plist_ptr *compressed); errind NTL1_Get_Annotations (annotation_ptr *annotation); errind NTL1_Error_Record (int severity, int code, char *comment, int description); errind NTL1_Print_Error (errind error_buffer);