/* Last edited on 2023-02-12 07:48:35 by stolfi */ #ifdef BOGUS #include #include #include #include #include #include #include #include #include #include #include #include pz_symbol_chain_t *pz_symbol_chain_cut( pz_symbol_chain_t *c, unsigned start, unsigned length ) { { /* with */ ??? rx = NEW(REF pz_symbol_chain_t, length), x = rx^; /* do */ pz_symbol_chain_do_cut(c, start, length, x); return rx; }; } /* pz_symbol_chain_cut */ void pz_symbol_chain_do_cut( pz_symbol_chain_t *c, unsigned start, unsigned length, pz_symbol_chain_t *x ) { { /* with */ ??? N = (c.ne); ??? ini = start MOD N; ??? fin = ini + length - 1; /* do */ if (( fin < N )){ x = SUBARRAY(c, ini, length) }else{ SUBARRAY(x, 0, N-ini) = SUBARRAY(c, ini, N-ini); SUBARRAY(x, N-ini, (fin+1) MOD N)= SUBARRAY(c, 0, (fin+1) MOD N);; };; }; } /* pz_symbol_chain_do_cut */ void pz_symbol_chain_reverse_and_complement( pz_symbol_chain_t *c ) VAR aux: pz_symbol_t; { { /* with */ ??? N = (c.ne), L = N-1; /* do */ for (i = 0; i <= (N DIV 2)-1 ; i++){ aux = c[i]; c[i] = c[L-i]; c[L-i] = aux; }; for (i = 0; i <= N-1 ; i++){ c[i] = pz_symbol_complement(c[i]) ;}; }; } /* pz_symbol_chain_reverse_and_complement */ pz_symbol_chain_t *pz_symbol_chain_extract_segment( pz_symbol_chain_t *c, pz_segment_t s ) { affirm((c.ne) == s.tot ); { /* with */ ??? rx = NEW(REF pz_symbol_chain_t, s.ns), x = rx^; /* do */ pz_symbol_chain_do_extract_segment(c, s, x); return rx; } } /* pz_symbol_chain_extract_segment */ void pz_symbol_chain_do_extract_segment( pz_symbol_chain_t *c, pz_segment_t s, pz_symbol_chain_t *x ) { affirm((c.ne) == s.tot ); pz_symbol_chain_do_cut(c, s.ini, s.ns, x); if (( s.rev )){ pz_symbol_chain_reverse_and_complement(x);; }; } /* pz_symbol_chain_do_extract_segment */ void pz_symbol_chain_print_segment( FILE *wr, pz_symbol_chain_t *c, pz_segment_t s ) { { /* with */ ??? m = (c.ne); /* do */ affirm((c.ne) == s.tot ); if (( s.rev )){ for (r = s.ns-1; r <= 0 BY -1 ; r++){ { /* with */ ??? ch = c[(s.ini + r) MOD m]; /* do */ fprintf(wr, Fmt.Char(pz_symbol_complement(ch))); };; }; }else{ for (r = 0; r <= s.ns-1 ; r++){ { /* with */ ??? ch = c[(s.ini + r) MOD m]; /* do */ fprintf(wr, Fmt.Char(ch)); };; }; fprintf(wr, "\n");; }; }; } /* pz_symbol_chain_print_segment */ CONST FileVersion == "99-08-22"; void pz_symbol_chain_write( FILE *wr, char *cmt, pz_symbol_chain_t *c, double sigma ) { { /* with */ ??? n = (c.ne - 1); /* do */ filefmt_write_header(wr, "pz_symbol_chain_t", FileVersion); int ind = 0; /* Comment indentation. */ filefmt_write_comment(wr, cmt, ind, '|'); NPut.Int(wr, "samples", (c.ne)); FPut.EOL(wr); NPut.LongReal(wr, "sigma", sigma); FPut.EOL(wr); for (i = 0; i <= n ; i++){ FPut.Char(wr, c[i]); FPut.EOL(wr); }; FPut.EOL(wr); filefmt_write_footer(wr, "pz_symbol_chain_t"); fflush(wr);; }; } /* pz_symbol_chain_write */ ReadData pz_symbol_chain_read( FILE *rd, bool_t header_only ) VAR d: ReadData; { filefmt_read_header(rd,"pz_symbol_chain_t", FileVersion); d.cmt = filefmt_read_comment(rd, '|'); d.samples = nget_int32(rd, "samples"); fget_eol(rd); d.sigma = nget_double(rd, "sigma"); fget_eol(rd); { /* with */ ??? n = d.samples; /* do */ if (( header_only )){ d.c = NULL }else{ d.c = NEW(REF pz_symbol_chain_t, n); for (i = 0; i <= n-1 ; i++){ FGet.Skip(rd, FGet.Blanks); d.c[i] = FGet.Char(rd);; }; filefmt_read_footer(rd, "pz_symbol_chain_t");; }; return d;; }; } /* pz_symbol_chain_read */ CONST OldFileVersion == "97-02-03"; ReadData pz_symbol_chain_read_old( FILE *rd, double sigma ) VAR d: ReadData; length, lambda, epsilon, delta: double; { filefmt_read_header(rd,"pz_symbol_chain_t", OldFileVersion); d.cmt = filefmt_read_comment(rd, '|'); d.samples = nget_int32(rd, "samples"); fget_eol(rd); { /* with */ ??? n = d.samples; /* do */ length = nget_double(rd, "length"); fget_eol(rd); epsilon = nget_double(rd, "epsilon"); fget_eol(rd); delta = nget_double(rd, "delta"); fget_eol(rd); d.c = NEW(REF pz_symbol_chain_t, n); d.sigma = sigma; for (i = 0; i <= n-1 ; i++){ FGet.Skip(rd, FGet.Blanks); { /* with */ ??? c = FGet.Char(rd); ??? x = FLOAT(pz_symbol_to_int(c), double); /* do */ d.c[i] = pz_symbol_encode(pz_proc.OldStretch(x, epsilon, delta), lambda); }; }; filefmt_read_footer(rd, "pz_symbol_chain_t"); return d;; }; } /* pz_symbol_chain_read_old */ ReadAllData pz_symbol_chain_read_all( char *prefix, unsigned band, char *extension, bool_vec_t *sel, bool_t header_only, char *dir /* (:= ".") */ ) VAR sigmaMin: double := (double.ne - 1); sigmaMax: double = (-INFINITY); cmt: char *= ""; CONST NoData == ReadData{ c = NULL, samples = 0, sigma = 0.0e0, cmt = "NOT READ" }; { fprintf(stderr, "reading symbolic chains:\n"); { /* with */ ??? rr = NEW(ARRAY *OF ReadData, (sel.ne)), r = rr^; /* do */ for (k = 0; k < (sel.ne ) ; k++){ if (( sel[k] )){ fprintf(stderr, " %s: ", Fmt.Pad(Fmt.Int(k), 4, '0') ); { /* with */ ??? chainTag = Fmt.Pad(Fmt.Int(k), 4, '0'); ??? bandTag = Fmt.Pad(Fmt.Int(band), 3, '0'); ??? fileName = txtcat(prefix & bandTag , extension); ??? fullName = txtcat(dir & "/" & chainTag &"/", fileName); ??? rd = OpenRead(fullName); /* do */ if (( rd != NULL )){ fprintf(stderr, "read from %s\n", fullName ); { /* with */ ??? data = pz_symbol_chain_read(rd, header_only := header_only); /* do */ r[k] = data; affirm(data.sigma >= 0.0e0 ); if (( sigmaMin > sigmaMax )){ cmt = data.cmt; }; sigmaMin = min(sigmaMin, data.sigma); sigmaMax = max(sigmaMax, data.sigma);; }; Rd.Close(rd) }else{ r[k] = NoData; fprintf(stderr, "file %s not found.\n", fullName );; }; } }else{ r[k] = NoData;; }; }; return ReadAllData { chData = rr, sigmaMin = sigmaMin, sigmaMax = sigmaMax, cmt = "Chains read from " & dir & "/NNNN/" & prefix & "BBB" & extension & "\n" & cmt }; } } /* pz_symbol_chain_read_all */ FILE *OpenRead( char *name ) { TRY return open_read(name, TRUE) EXCEPT OSError.E ==> return NULL; } } /* OpenRead */ void pz_symbol_chain_match_find_best( pz_symbol_chain_t *a, pz_symbol_chain_t *b, ARRAY *pz_symbol_t OF double decode, ARRAY *pz_symbol_t OF double errorVar, double step, double maxDistSqr, double skipDistSqr, bool_t removeUnmatchedEnds, REF *pz_match_t mt /* (OUT) */ double *avgDisc, /* (OUT) */ double *length, /* (OUT) */ double *matchedLength, /* (OUT) */ REF *pz_match_cost_matrix_t rCost, /* (WORK) */ ) TYPE Pair == pz_match_pair; double StepDistSqr( Pair *p, Pair *q ) /* Computes the mean squared chain distance for one pairing step "p -> q": */ { { /* with */ mdSqr = pz_symbol_intg_dist_sqr( a[p[0]], a[q[0]], b[p[1]], b[q[1]]; complement := FALSE; decode := decode; errorVar := errorVar ); /* do */ return min(mdSqr, maxDistSqr);; } } /* StepDistSqr */ double StepIntgDiscSqr( Pair *p, Pair *q ) /* Computes the integrated discrepancy squared for one pairing step "p -> q": */ { { /* with */ ??? d2 = StepDistSqr(p, q); /* do */ if (( p[0] == q[0] ) || ( p[1] == q[1] )){ return step * (d2 + skipDistSqr)/2.0e0 }else{ return step * d2; }; } } /* StepIntgDiscSqr */ bool_t StepIsMatched( Pair *p, Pair *q ) /* A "pz_match_step_predicate_t" that returns TRUE if the step "p->q" has average chain distance less than "maxDist". */ { return StepDistSqr(p, q) < maxDistSqr } /* StepIsMatched */ VAR intgDiscSqr: double; { { /* with */ ??? NA = (a.ne); ??? NB = (b.ne); /* do */ pz_match_find_best( NA, NB, stepCostFn = StepIntgDiscSqr, /*OUT*/ mt = mt, totCost = intgDiscSqr, /*WORK*/ rCost = rCost ); if (( removeUnmatchedEnds )){ mt = pz_match_remove_unmatched_ends(mt^, StepIsMatched); intgDiscSqr = pz_match_sum(mt^, stepFn = StepIntgDiscSqr); }; affirm((mt^.ne) > 0 ); { /* with */ ??? m = (mt^.ne); ??? na = mt[m-1][0] - mt[0][0]; ??? nb = mt[m-1][1] - mt[0][1]; /* do */ length = step * ((double)na + nb)/2.0e0; if (( length <= 0.0e0 )){ avgDisc = sqrt(maxDistSqr) }else{ avgDisc = sqrt(intgDiscSqr/length); }; }; { /* with */ ??? msc = pz_match_matched_step_count(mt^, StepIsMatched); /* do */ matchedLength = step * ((double)msc)/2.0e0; }; }; } /* pz_symbol_chain_match_find_best */ void pz_symbol_chain_match_refine( pz_symbol_chain_t *a, pz_symbol_chain_t *b, ARRAY *pz_symbol_t OF double decode, ARRAY *pz_symbol_t OF double errorVar, double step, pz_match_t *mtOld, unsigned maxAdjust, double critDistSqr, double maxDistSqr, double skipDistSqr, REF *pz_match_t mt /* (OUT) */ double *mismatch, /* (OUT) */ double *length, /* (OUT) */ double *matchedLength, /* (OUT) */ double_vec_t *minAvgDisc, /* (OUT) */ REF *pz_match_cost_matrix_t rCost, /* (WORK) */ ) TYPE Pair == pz_match_pair; double StepDistSqr ( Pair *p, Pair *q ) /* The mean square distance between the chain steps "a[p[0]]->a[q[0]]" and "b[p[1]]->b[q[1]]". */ { { /* with */ mdSqr = pz_symbol_intg_dist_sqr( a[p[0]], a[q[0]], b[p[1]], b[q[1]]; complement := FALSE; decode := decode; errorVar := errorVar ); /* do */ /* fprintf(stderr,"a[" & Fmt.Int(p[0]) & ", "& Fmt.Int(q[0]) & "]:" & Fmt.Char(a[p[0]]) & " " & Fmt.Char(a[q[0]]) & " "); fprintf(stderr,"b[" & Fmt.Int(p[1]) & ", " & Fmt.Int(q[1]) & "]:" & Fmt.Char(b[p[1]]) & " " & Fmt.Char(b[q[1]]) & " "); fprintf(stderr, "mdSqr: %s ", Fmt.LongReal(mdSqr) ); fprintf(stderr,"mdSqr-critDistSqr: " & Fmt.LongReal(mdSqr-critDistSqr) & "\n"); */ return min(mdSqr, maxDistSqr); } } /* StepDistSqr */ double StepMismatch ( Pair *p, Pair *q ) /* A "pz_match_step_function_t" whose value is the mismatch of the step "p -> q", i.e. the integrated square distance between the chain steps "a[p[0]]->a[q[0]]" and "b[p[1]]->b[q[1]]", plus the skip distance squared, minus the critical distance squared, everything times the step length. */ { { /* with */ ??? d2 = StepDistSqr(p, q) - critDistSqr; ??? hs2 = step * (d2 + skipDistSqr)/2.0e0; ??? s2 = step * d2; /* do */ if (( p[0] == q[0] ) || ( p[1] == q[1] )){ return hs2 }else{ return s2; }; } } /* StepMismatch */ bool_t StepIsMatched( Pair *p, Pair *q ) /* A "pz_match_step_predicate_t" that returns TRUE if the step "p->q" has average chain distance less than "maxDist". */ { return StepDistSqr(p, q) < maxDistSqr } /* StepIsMatched */ { { /* with */ ??? NA = (a.ne); ??? NB = (b.ne); /* do */ affirm(NA > 0 ); affirm(NB > 0 ); pz_match_refine( NA = NA, NB = NB, stepCostFn = StepMismatch, mtOld = mtOld, maxAdjust = maxAdjust, /*OUT*/ mt = mt, /*OUT*/ totCost = mismatch, /*OUT*/ minTotCost = minAvgDisc, /*WORK*/ rCost = rCost ); affirm( (mt^.ne) > 0 ); { /* with */ ??? mt = mt^; ??? m = (mt.ne); ??? stepCount = (mt[m-1][0] - mt[0][0]) + (mt[m-1][1] - mt[0][1]); ??? matchedStepCount = pz_match_matched_step_count(mt, StepIsMatched); /* do */ length = ((double)stepCount) * step / 2.0e0; matchedLength = step * ((double)matchedStepCount)/2.0e0; }; /* Compute the minimum average distance for each pairing length "K". */ /* Note that "pz_match_refine" returns total mismatch, not just "intgDiscSqr" */ affirm( minAvgDisc[0] == 0.0e0 ); for ( K = 1 TO (minAvgDisc.ne - 1) ){ { /* with */ ??? Lk = step*((double)K)/2.0e0; /* do */ minAvgDisc[K] = sqrt(max(0.0e0, minAvgDisc[K]/Lk + critDistSqr)); }; }; }; } /* pz_symbol_chain_match_refine */ { /* Copyright © 2001 Universidade Estadual de Campinas (UNICAMP). Authors: Helena C. G. Leitão and Jorge Stolfi. This file can be freely distributed, used, and modified, provided that this copyright and authorship notice is preserved, and that any modified versions are clearly marked as such. This software has NO WARRANTY of correctness or applicability for any purpose. Neither the authors nor their employers chall be held responsible for any losses or damages that may result from its use. */ #endif