#include "decom_lib.h"

static char rcsid[] =
"$Id: decom.c,v 1.9 2000/03/23 19:49:33 schwartz Exp $";

/* Alignment blocks. */
typedef struct bl {
    int b1, b2, e1, e2;
    int SR, SL, R, L, Lower;
} block, *block_ptr;

#define DEFAULT_Y 300

static uchar *seq1, *seq2;
static int len1, len2, from1, from2, to1, to2;
static int gop, gep, X, Y;
static ss_t ss;
static SEQ *sf1, *sf2;
static argv_scores_t scores;

void read_seqs(FILE *);
void decompose(int *S, int n, int b1, int b2, int K);
int read_align(FILE *file_ptr, int *script, int *start1, int *start2);

int main(int argc, char **argv)
{
	int i, x, y, K;
	char buf[500];
	FILE *ap;
	int *bp;

	if (argc < 2)
		fatalf("%s alignment [O=?] [E=?] [M=?] [I=?] [V=?] [S=?] [Y=?]",
		  argv[0]);

        ckargs("OEMIVSXY", argc, argv, 2);

	DNA_scores(&scores, ss);
	gop = scores.O;
	gep = scores.E;
	get_argval_pos('Y', &Y, DEFAULT_Y);
	X = gop + gep*Y;

	ap = same_string(argv[1], "-") ? stdin : ckopen(argv[1], "r");
	if (fgets(buf, 500, ap) == NULL || strcmp(buf, "#:lav\n"))
		fatal("Not an alignment file.");
	read_seqs(ap);
	bp = ckalloc(sizeof(int)*(len1+len2+1));
	print_align_header(sf1, sf2, &scores);
	K = DNA_thresh(sf1, sf2, &scores);

	while ((i = read_align(ap, bp, &x, &y)) != 0)
	  decompose(bp, i, x, y, K);

	free(bp);
	if (ap != stdin)
		fclose(ap);
	seq_close(sf1);
	seq_close(sf2);
	return 0;
}

void decompose(int *A, int n, int b1, int b2, int K)
{
	int i, s, si, score, S, top, x, y, L;
	block_ptr b;
	uchar *s1, *s2;

	b1 += from1; b2+=from2;
	s1 = seq1+b1-1; s2 = seq2 + b2-1;
	b = ckalloc(sizeof(block)*n);
	S = top = 0; x = y = -1;
	for (i = 0; i < n; i++) {
		assert(x < len1);
		assert(y < len2);
		s = A[i];
		if (s < 0) {
			x -= s;
			si = -gop+gep*s;
		} else if (s > 0) {
			y += s;
			si = -gop-gep*s;
		} else
			si = ss[s1[++x]][s2[++y]];
		S += si;
		if (si >= 0) {
			/* form a one-column alignment */
			top++;
			b[top].L = b[top].R = i;
			b[top].SL = S - si;
			b[top].SR = S;
			b[top].b1 = b[top].e1 = b1+x;
			b[top].e2 = b[top].b2 = b2+y;
			L = top-1;
			while (L > 0 && b[L].SL > b[top].SL) 
				L = b[L].Lower;
			b[top].Lower = L;
			/* merge alignments */
			while (L > 0 && b[L].SR - b[top].SL <= X
		  	    && b[L].SR <= b[top].SR) {
		    		b[L].R = b[top].R;
				b[L].SR = b[top].SR;
	      			b[L].e1 = b[top].e1;
	      			b[L].e2 = b[top].e2;
	      			top = L;
				L = b[top].Lower;
			}
		}
	}
	for (i = 1; i <= top; i++)
		if ((score = b[i].SR-b[i].SL) >= K)
			print_align(score, seq1, seq2, 
				b[i].b1-from1, b[i].e1-from1, 
				b[i].b2-from2, b[i].e2-from2, 
				A + b[i].L);
	free(b);
}

void read_seqs(FILE *alignment)
{
	char buf[500], buf2[500], *p, *q;

	while (fgets(buf, 500, alignment)) {
		if (strncmp(buf, "s {", 3))
			continue;
		if (fgets(buf, 500, alignment) == NULL)
			fatal("Did not find sequence names in alignment file.");
		if ((q=strchr(buf,'"')) == NULL || (p=strchr(q+1,'"')) == NULL)
			fatal("Cannot find first sequence name.");
		*p = '\0';
		if (sscanf(p+1, "%d %d", &from1, &to1) != 2)
			fatal("Could not find positions in sequence 1");
		snprintf(buf2, 500, "%s[%d,%d]", q+1, from1, to1);

		sf1 = seq_open(buf2, 0, 0);
		if (seq_read(sf1) < 0)
			fatalf("error reading %s", buf2);
		len1 = SEQ_LEN(sf1);
		seq1 = SEQ_CHARS(sf1);

		--from1;
		if (fgets(buf, 500, alignment) == NULL)
			fatal("Did not find sequence names in alignment file.");
		if ((q=strchr(buf,'"')) == NULL || (p=strchr(q+1,'"')) == NULL)
			fatal("Cannot find second sequence name.");
		*p = '\0';
		if (sscanf(p+1, "%d %d", &from2, &to2) != 2)
			fatal("Could not find positions in sequence 2");
		snprintf(buf2, 500,"%s[%d,%d]", q+1, from2, to2);

		sf2 = seq_open(buf2, 0, 0);
		if (seq_read(sf2) < 0)
			fatalf("error reading %s", buf2);
		len2 = SEQ_LEN(sf2);
		seq2 = SEQ_CHARS(sf2);

		--from2;
		return;
	}
	fatal("Failed to extract sequence names from alignment file.");
}

int read_align(FILE *file_ptr, int *script, int *start1, int *start2)
{
    char line[100];
    char ell[32];
    int i, j, b1, b2, e1, e2, pe1, pe2;
    i = 0;
    while (fgets(line, 100, file_ptr)) 
        if (strncmp(line, "a {", 3) == 0) {
            fgets(line, 100, file_ptr);
            fgets(line,100, file_ptr);
            sscanf(line, "  b %d %d", &b1, &b2);
            fgets(line,100, file_ptr);
            sscanf(line, "  e %d %d", &e1, &e2);
            pe1 = b1; pe2 = b2;
	    *start1 = b1; *start2 = b2;
            while (fgets(line, 100, file_ptr)) {
                if (line[0] == '}') return i;
                sscanf(line, "%*s %d %d %d %d", &b1, &b2, &e1, &e2);
                if (b1 != pe1) script[i++] = -(b1-pe1);
                if (b2 != pe2) script[i++] = b2-pe2;
                for (j = b1; j <= e1; j++) script[i++] = 0;
                pe1 = e1+1; pe2 = e2+1;
            }
	    break;
        }
    
    return 0;
}
