#include "decom_lib.h" static char rcsid[] = "$Id: decom.c,v 1.9 2000/03/23 19:49:33 schwartz Exp $"; /* Alignment blocks. */ typedef struct bl { int b1, b2, e1, e2; int SR, SL, R, L, Lower; } block, *block_ptr; #define DEFAULT_Y 300 static uchar *seq1, *seq2; static int len1, len2, from1, from2, to1, to2; static int gop, gep, X, Y; static ss_t ss; static SEQ *sf1, *sf2; static argv_scores_t scores; void read_seqs(FILE *); void decompose(int *S, int n, int b1, int b2, int K); int read_align(FILE *file_ptr, int *script, int *start1, int *start2); int main(int argc, char **argv) { int i, x, y, K; char buf[500]; FILE *ap; int *bp; if (argc < 2) fatalf("%s alignment [O=?] [E=?] [M=?] [I=?] [V=?] [S=?] [Y=?]", argv[0]); ckargs("OEMIVSXY", argc, argv, 2); DNA_scores(&scores, ss); gop = scores.O; gep = scores.E; get_argval_pos('Y', &Y, DEFAULT_Y); X = gop + gep*Y; ap = same_string(argv[1], "-") ? stdin : ckopen(argv[1], "r"); if (fgets(buf, 500, ap) == NULL || strcmp(buf, "#:lav\n")) fatal("Not an alignment file."); read_seqs(ap); bp = ckalloc(sizeof(int)*(len1+len2+1)); print_align_header(sf1, sf2, &scores); K = DNA_thresh(sf1, sf2, &scores); while ((i = read_align(ap, bp, &x, &y)) != 0) decompose(bp, i, x, y, K); free(bp); if (ap != stdin) fclose(ap); seq_close(sf1); seq_close(sf2); return 0; } void decompose(int *A, int n, int b1, int b2, int K) { int i, s, si, score, S, top, x, y, L; block_ptr b; uchar *s1, *s2; b1 += from1; b2+=from2; s1 = seq1+b1-1; s2 = seq2 + b2-1; b = ckalloc(sizeof(block)*n); S = top = 0; x = y = -1; for (i = 0; i < n; i++) { assert(x < len1); assert(y < len2); s = A[i]; if (s < 0) { x -= s; si = -gop+gep*s; } else if (s > 0) { y += s; si = -gop-gep*s; } else si = ss[s1[++x]][s2[++y]]; S += si; if (si >= 0) { /* form a one-column alignment */ top++; b[top].L = b[top].R = i; b[top].SL = S - si; b[top].SR = S; b[top].b1 = b[top].e1 = b1+x; b[top].e2 = b[top].b2 = b2+y; L = top-1; while (L > 0 && b[L].SL > b[top].SL) L = b[L].Lower; b[top].Lower = L; /* merge alignments */ while (L > 0 && b[L].SR - b[top].SL <= X && b[L].SR <= b[top].SR) { b[L].R = b[top].R; b[L].SR = b[top].SR; b[L].e1 = b[top].e1; b[L].e2 = b[top].e2; top = L; L = b[top].Lower; } } } for (i = 1; i <= top; i++) if ((score = b[i].SR-b[i].SL) >= K) print_align(score, seq1, seq2, b[i].b1-from1, b[i].e1-from1, b[i].b2-from2, b[i].e2-from2, A + b[i].L); free(b); } void read_seqs(FILE *alignment) { char buf[500], buf2[500], *p, *q; while (fgets(buf, 500, alignment)) { if (strncmp(buf, "s {", 3)) continue; if (fgets(buf, 500, alignment) == NULL) fatal("Did not find sequence names in alignment file."); if ((q=strchr(buf,'"')) == NULL || (p=strchr(q+1,'"')) == NULL) fatal("Cannot find first sequence name."); *p = '\0'; if (sscanf(p+1, "%d %d", &from1, &to1) != 2) fatal("Could not find positions in sequence 1"); snprintf(buf2, 500, "%s[%d,%d]", q+1, from1, to1); sf1 = seq_open(buf2, 0, 0); if (seq_read(sf1) < 0) fatalf("error reading %s", buf2); len1 = SEQ_LEN(sf1); seq1 = SEQ_CHARS(sf1); --from1; if (fgets(buf, 500, alignment) == NULL) fatal("Did not find sequence names in alignment file."); if ((q=strchr(buf,'"')) == NULL || (p=strchr(q+1,'"')) == NULL) fatal("Cannot find second sequence name."); *p = '\0'; if (sscanf(p+1, "%d %d", &from2, &to2) != 2) fatal("Could not find positions in sequence 2"); snprintf(buf2, 500,"%s[%d,%d]", q+1, from2, to2); sf2 = seq_open(buf2, 0, 0); if (seq_read(sf2) < 0) fatalf("error reading %s", buf2); len2 = SEQ_LEN(sf2); seq2 = SEQ_CHARS(sf2); --from2; return; } fatal("Failed to extract sequence names from alignment file."); } int read_align(FILE *file_ptr, int *script, int *start1, int *start2) { char line[100]; char ell[32]; int i, j, b1, b2, e1, e2, pe1, pe2; i = 0; while (fgets(line, 100, file_ptr)) if (strncmp(line, "a {", 3) == 0) { fgets(line, 100, file_ptr); fgets(line,100, file_ptr); sscanf(line, " b %d %d", &b1, &b2); fgets(line,100, file_ptr); sscanf(line, " e %d %d", &e1, &e2); pe1 = b1; pe2 = b2; *start1 = b1; *start2 = b2; while (fgets(line, 100, file_ptr)) { if (line[0] == '}') return i; sscanf(line, "%*s %d %d %d %d", &b1, &b2, &e1, &e2); if (b1 != pe1) script[i++] = -(b1-pe1); if (b2 != pe2) script[i++] = b2-pe2; for (j = b1; j <= e1; j++) script[i++] = 0; pe1 = e1+1; pe2 = e2+1; } break; } return 0; }