/* Last edited on 2023-02-12 07:49:42 by stolfi */ #include #include #include #include #include #include #include #include #include #define DebugOverlap (FALSE) /* CANDIDATE LISTS */ vec_typeimpl(pz_candidate_vec_t,pz_candidate_vec,pz_candidate_t); bool_t pz_candidate_is_empty ( pz_cand_t *c ) { return c.seg[0].ns == 0 ) || ( c.seg[1].ns == 0 } pz_cand_t pz_candidate_expand ( pz_cand_t *c, i2_t iniExtra, i2_t finExtra ) { { /* with */ ??? oldSteps = c.seg[0].ns + c.seg[1].ns - 2; newSeg = pz_segment_pair{ pz_segment_expand(c.seg[0], iniExtra[0], finExtra[0]); pz_segment_expand(c.seg[1], iniExtra[1], finExtra[1]); } ??? newSteps = newSeg[0].ns + newSeg[1].ns - 2; /* do */ return pz_cand_t{ seg = newSeg, mismatch = c.mismatch, length = c.length * ((double)newSteps)/((double)oldSteps), matchedLength = c.matchedLength, pm = pz_match_adjust_packed(c.pm, c.seg, newSeg); } } } pz_candidate_t pz_candidate_cut_and_pack ( pz_cand_t *cand, pz_match_t *mt, double mismatch, double length, double matchedLength ) VAR t: pz_segment_pair; /* Matched segments */ { { /* with */ ??? nm = (mt.ne); ??? rpm = pz_match_pack(mt); /* do */ for (j = 0; j <= 1; j++){ { /* with */ ??? sj = cand.seg[j]; ??? tj = t[j]; /* do */ tj = sj; tj.cvx = sj.cvx; tj.tot = sj.tot; if (( sj.rev )){ tj.ini = (sj.ini + sj.ns - 1 - mt[nm-1][j]) MOD sj.tot }else{ tj.ini = (sj.ini + mt[0][j]) MOD sj.tot; } tj.ns = mt[nm-1][j] - mt[0][j] + 1; tj.rev = sj.rev; } } return pz_cand_t{ seg = t, mismatch = mismatch, length = length, matchedLength = matchedLength, pm = pz_match_adjust_packed(rpm, cand.seg, t); } } } CONST OldFileVersion == "97-02-03"; NewFileVersion == "99-07-25"; void pz_candidate_write ( FILE *wr, char *cmt, pz_candidate_vec_t *c, double lambda ) { { /* with */ ??? n = (c.ne - 1); /* do */ filefmt_write_header(wr, "pz_candidate.pz_candidate_vec_t", NewFileVersion); int ind = 0; /* Comment indentation. */ filefmt_write_comment(wr, cmt, '|'); NPut.Int(wr, "candidates", (c.ne)); FPut.EOL(wr); NPut.LongReal(wr, "lambda", lambda); FPut.EOL(wr); for (i = 0; i <= n ; i++){ { /* with */ ??? ci = c[i]; /* do */ for (j = 0; j <= 1; j++){ pz_segment_writ_eone(wr, ci.seg[j]); FPut.Space(wr); } FPut.LongReal(wr, ci.mismatch, prec = 4); FPut.Space(wr); FPut.LongReal(wr, ci.length, prec = 2); FPut.Space(wr); FPut.LongReal(wr, ci.matchedLength, prec = 2); if (( ci.pm != NULL )){ FPut.Space(wr); FPut.Space(wr); pz_match_write_packed(wr, ci.pm^); } FPut.EOL(wr); } } filefmt_write_footer(wr, "pz_candidate.pz_candidate_vec_t"); fflush(wr); } } pz_candidate_read_data pz_candidate_read ( FILE *rd ) VAR d: pz_candidate_read_data; { filefmt_read_header(rd, "pz_candidate.pz_candidate_vec_t", NewFileVersion); d.cmt = filefmt_read_comment(rd, '|'); { /* with */ ??? n = nget_int32(rd, "candidates"); /* do */ fget_eol(rd); d.c = pz_candidate_vec_new(n); d.lambda = nget_double(rd, "lambda"); fget_eol(rd); for (i = 0; i <= n-1; i++){ fget_skip_spaces(rd); { /* with */ ??? ci = d.c[i]; /* do */ for (j = 0; j <= 1; j++){ ci.seg[j] = pz_segment_read_one(rd); fget_skip_spaces(rd); } affirm(ci.seg[0].cvx < ci.seg[1].cvx ); ci.mismatch = fget_double(rd); fget_skip_spaces(rd); ci.length = fget_double(rd); fget_skip_spaces(rd); ci.matchedLength = fget_double(rd); fget_skip_spaces(rd); { /* with */ ??? c = fgetc(rd); /* do */ Rd.UnGetChar(rd); if (( c == '\n' )){ ci.pm = NULL; }else{ ci.pm = pz_match_read_packed(rd); } } fget_eol(rd); } } filefmt_read_footer(rd, "pz_candidate.pz_candidate_vec_t"); return d; } } pz_candidate_read_data pz_candidate_read_old ( FILE *rd, int_vec_t *m, ARRAY [0..1] OF bool_t rev ) VAR d: pz_candidate_read_data; { filefmt_read_header(rd, "pz_candidate.pz_candidate_vec_t", OldFileVersion); d.cmt = filefmt_read_comment(rd, '|'); { /* with */ ??? n = nget_int32(rd, "candidates"); /* do */ fget_eol(rd); d.c = pz_candidate_vec_new(n); d.lambda = nget_double(rd, "lambda"); fget_eol(rd); for (i = 0; i <= n-1; i++){ fget_skip_spaces(rd); { /* with */ ??? ci = d.c[i]; /* do */ for (j = 0; j <= 1; j++){ { /* with */ ??? cij = ci.seg[j]; /* do */ cij.cvx = fget_int32(rd); fget_skip_spaces(rd); cij.tot = m[cij.cvx]; cij.ini = fget_int32(rd); fget_skip_spaces(rd); { /* with */ ??? fin = fget_int32(rd); /* do */ cij.ns = (fin - cij.ini + 1) MOD cij.tot; } fget_skip_spaces(rd); cij.rev = rev[j]; } } ci.mismatch = fget_double(rd); fget_skip_spaces(rd); ci.length = fget_double(rd); fget_skip_spaces(rd); ci.matchedLength = fget_double(rd); fget_skip_spaces(rd); { /* with */ ??? c = fgetc(rd); /* do */ Rd.UnGetChar(rd); if (( c == '\n' )){ ci.pm = NULL; }else{ ci.pm = pz_match_read_packed(rd); } } fget_eol(rd); } } filefmt_read_footer(rd, "pz_candidate.pz_candidate_vec_t"); return d; } } bool_vec_t *pz_candidate_chains_used ( pz_candidate_vec_t *cand ) VAR n: unsigned := 0; { /* Find number of elements to allocate: */ for (i = 0; i < (cand.ne ); i++){ { /* with */ ??? si = cand[i].seg, k0 = si[0].cvx, k1 = si[1].cvx; /* do */ n = max(n, max(k0, k1) + 1); } } /* Mark used chains: */ { /* with */ ??? r = bool_vec_new(n), used = r^; /* do */ for (k = 0; k < (used.ne ); k++){ used[k] = FALSE; } for (i = 0; i < (cand.ne ); i++){ { /* with */ ??? si = cand[i].seg, k0 = si[0].cvx, k1 = si[1].cvx; /* do */ used[k0] = TRUE; used[k1] = TRUE; } } return r; } } void pz_candidate_print ( FILE *wr, pz_cand_t *cand, bool_t pairing /* (:= TRUE) */ ) void pz_candidate_print_half ( [0..1] j, pz_segment_t seg ) { fprintf(wr, " "); fprintf(wr, "seg[%s]: ", Fmt.Int(j) ); pz_segment_print(wr, seg); fprintf(wr, "\n"); } { pz_candidate_print_half(0, cand.seg[0]); pz_candidate_print_half(1, cand.seg[1]); if (( pairing ) ANDAND ( cand.pm != NULL )){ pz_match_write_packed(wr, cand.pm^); fprintf(wr, "\n"); } fprintf(wr,"mismatch == " & FLR(cand.mismatch, 10, 6)); fprintf(wr, " "); fprintf(wr,"length == " & FLR(cand.length, 9, 4)); fprintf(wr, " "); fprintf(wr,"matchedLength == " & FLR(cand.matchedLength, 9, 4)); fprintf(wr, "\n"); } void pz_candidate_sort ( pz_candidate_vec_t *c, CompareProc before ) VAR q, r, mx: unsigned; cc, smx: pz_cand_t; { { /* with */ ??? n = (c.ne); /* do */ /* 1. Build heap with largest element at c[0]: */ for (p = 0; p <= n-1 ; p++){ cc = c[p]; r = p; while (1){ if (( r == 0 )){ EXIT; } q = (r-1) DIV 2; if (( before(c[q], cc) )){ c[r] = c[q]; }else{ EXIT; } r = q; } c[r] = cc; } /* 2. Remove elements from heap and insert at end: */ for (p = n-1; p <= 1 BY -1; p++){ /* save c[p] */ cc = c[p]; /* Move largest heap element to c[p]: */ c[p] = c[0]; /* Insert cc in remaining heap, from root down: */ q = 0; while (1){ c[q] = cc; r = 2*q+1; /* Find largest among cc, c[LEFT(q)], c[RIGHT(q)]: */ mx = q; smx = cc; if (( r < p ) ANDAND ( before(smx, c[r]) )){ mx = r; smx = c[r]; } r++; if (( r < p ) ANDAND ( before(smx, c[r]) )){ mx = r; smx = c[r]; } /* See who won: */ if (( mx == q )){ /* Stop here: */ EXIT }else{ /* Promote child and advance: */ c[q] = c[mx]; q = mx; } } } /* Paranoid check: */ for (i = 1; i <= n-1; i++){ affirm(NOT before(c[i],c[i-1]) ); } } } void pz_candidate_index_sort ( pz_candidate_vec_t *c, int_vec_t *x, CompareProc before ) VAR q, r, mx: unsigned; cc, smx: unsigned; { affirm((c.ne) == (x.ne) ); { /* with */ ??? n = (c.ne); /* do */ /* 1. Build heap with index of largest element at x[0]: */ for (p = 0; p <= n-1; p++){ x[p] = p; } for (p = 0; p <= n-1; p++){ cc = x[p]; r = p; while (1){ if (( r == 0 )){ EXIT; } q = (r-1) DIV 2; if (( before(c[x[q]], c[cc]) )){ x[r] = x[q]; }else{ EXIT; } r = q; } x[r] = cc; } /* 2. Remove elements from heap and insert at end: */ for (p = n-1; p <= 1 BY -1; p++){ /* save x[p] */ cc = x[p]; /* Move largest heap element to x[p]: */ x[p] = x[0]; /* Insert cc in remaining heap, from root down: */ q = 0; while (1){ x[q] = cc; r = 2*q+1; /* Find largest among cc, c[LEFT(q)], c[RIGHT(q)]: */ mx = q; smx = cc; if (( r < p ) ANDAND ( before(c[smx], c[x[r]]) )){ mx = r; smx = x[r]; } r++; if (( r < p ) ANDAND ( before(c[smx], c[x[r]]) )){ mx = r; smx = x[r]; } /* See who won: */ if (( mx == q )){ /* Stop here: */ EXIT }else{ /* Promote child and advance: */ x[q] = x[mx]; q = mx; } } } /* Paranoid check: */ for (i = 1; i <= n-1; i++){ affirm(NOT before(c[x[i]],c[x[i-1]]) ); } } } bool_t pz_candidate_lexically_better ( pz_cand_t *a, pz_cand_t *b ) { if (( a.seg[0].cvx != b.seg[0].cvx )){ return a.seg[0].cvx < b.seg[0].cvx; } affirm(a.seg[0].tot == b.seg[0].tot ); if (( a.seg[1].cvx != b.seg[1].cvx )){ return a.seg[1].cvx < b.seg[1].cvx; } affirm(a.seg[1].tot == b.seg[1].tot ); if (( a.seg[0].ini != b.seg[0].ini )){ return a.seg[0].ini < b.seg[0].ini; } if (( a.seg[1].ini != b.seg[1].ini )){ return a.seg[1].ini < b.seg[1].ini; } if (( a.seg[0].ns != b.seg[0].ns )){ return a.seg[0].ns < b.seg[0].ns; } if (( a.seg[1].ns != b.seg[1].ns )){ return a.seg[1].ns < b.seg[1].ns; } if (( a.mismatch != b.mismatch )){ return a.mismatch < b.mismatch; } if (( a.length != b.length )){ return a.length > b.length; } if (( a.matchedLength != b.matchedLength )){ return a.matchedLength > b.matchedLength; } return FALSE } bool_t pz_candidate_pair_mismatch_better ( pz_cand_t *a, pz_cand_t *b ) { if (( a.seg[0].cvx != b.seg[0].cvx )){ return a.seg[0].cvx < b.seg[0].cvx; } affirm(a.seg[0].tot == b.seg[0].tot ); if (( a.seg[1].cvx != b.seg[1].cvx )){ return a.seg[1].cvx < b.seg[1].cvx; } affirm(a.seg[1].tot == b.seg[1].tot ); if (( a.mismatch != b.mismatch )){ return a.mismatch < b.mismatch; } if (( a.length != b.length )){ return a.length > b.length; } if (( a.matchedLength != b.matchedLength )){ return a.matchedLength > b.matchedLength; } if (( a.seg[0].ns != b.seg[0].ns )){ return a.seg[0].ns > b.seg[0].ns; } if (( a.seg[1].ns != b.seg[1].ns )){ return a.seg[1].ns > b.seg[1].ns; } if (( a.seg[0].ini != b.seg[0].ini )){ return a.seg[0].ini < b.seg[0].ini; } if (( a.seg[1].ini != b.seg[1].ini )){ return a.seg[1].ini < b.seg[1].ini; } return FALSE } bool_t pz_candidate_mismatch_better ( pz_cand_t *a, pz_cand_t *b ) /* Compares by "mismatch", breaking ties by other criteria */ { if (( a.mismatch != b.mismatch )){ return a.mismatch < b.mismatch; } if (( a.length != b.length )){ return a.length > b.length; } if (( a.matchedLength != b.matchedLength )){ return a.matchedLength > b.matchedLength; } if (( a.seg[0].cvx != b.seg[0].cvx )){ return a.seg[0].cvx < b.seg[0].cvx; } affirm(a.seg[0].tot == b.seg[0].tot ); if (( a.seg[1].cvx != b.seg[1].cvx )){ return a.seg[1].cvx < b.seg[1].cvx; } affirm(a.seg[1].tot == b.seg[1].tot ); if (( a.seg[0].ini != b.seg[0].ini )){ return a.seg[0].ini < b.seg[0].ini; } if (( a.seg[1].ini != b.seg[1].ini )){ return a.seg[1].ini < b.seg[1].ini; } if (( a.seg[0].ns != b.seg[0].ns )){ return a.seg[0].ns < b.seg[0].ns; } if (( a.seg[1].ns != b.seg[1].ns )){ return a.seg[1].ns < b.seg[1].ns; } return FALSE } void pz_candidate_remove_duplicates ( pz_candidate_vec_t *cand, unsigned *nCands ) VAR nKept: unsigned := 0; { for (i = 0; i <= nCands-1; i++){ if (( nKept > 0 )){ { /* with */ ??? ci = cand[i], cp = cand[nKept-1]; /* do */ if (( (ci.seg[0] != cp.seg[0] ) || ( ci.seg[1] != cp.seg[1]) )){ if (( nKept != i )){ cand[nKept] = ci; } nKept++; } } } } nCands = nKept; } void pz_candidate_remove_short ( pz_candidate_vec_t *cand, char **cmt, unsigned minSteps ) VAR nKept: unsigned := 0; { int nOrig = c->nel; int nKept = 0; for (i = 0; i < nOrig; i++) { if (( (cand[i].seg[0].ns > minSteps) ) || ( (cand[i].seg[1].ns > minSteps) )){ if (( nKept != i )){ cand[nKept] = cand[i]; } nKept++; } } nCands = nKept; if (nCands < cData.c->nel) { d = NEW(REF pz_candidate.List, nCands), d = rd^; /* do */ d = SUBARRAY(cData.c^, 0, nCands); cData.c = rd; } ??? must reallocate the array if (cnel < nCands) int diff = nCands - c.ne; fprintf(stderr, txt & "\n"); cData.cmt = txtcat(cData.cmt & "\n" & "pz_draw_cands:\n" , txt); char *txt = jsprintf("removed %d candidates with less than %d steps.\n" & " & Fmt.Int(diff) & " " & Fmt.Int(minSteps) & " } void pz_candidate_prune_cands_per_pair ( pz_candidate_vec_t *cand, unsigned *nCands, unsigned maxCands ) /* Assumes candidates are sorted by "pz_candidate_pair_mismatch_better", so that all candidates for the same chain pair are in adjacent positions, sorted by decreasing quality. */ VAR nKept: unsigned := 0; /* Number of candidates kept */ nPair: unsigned; /* Number of candidates of current chain pair */ k0Old, k1Old: unsigned = (unsigned.ne - 1); { for (i = 0; i <= nCands-1; i++){ { /* with */ ??? k0 = cand[i].seg[0].cvx; ??? k1 = cand[i].seg[1].cvx; /* do */ if (( k0 != k0Old ) || ( k1 != k1Old )){ nPair = 0; k0Old = k0; k1Old = k1; } nPair++; if (( (nPair <= maxCands) )){ if (( i > nKept )){ cand[nKept] = cand[i]; } nKept++; } } } nCands = nKept; } void pz_candidate_prune_cands_per_chain ( pz_candidate_vec_t *cand, unsigned *nCands, unsigned maxCands ) VAR nKept: unsigned := 0; /* Number of candidates kept */ { { /* with */ ??? chainUsed = pz_candidate_chains_used(SUBARRAY(cand, 0, nCands))^; ??? nChains = (chainUsed.ne); ??? ctChain = int_vec_new(nChains)^; /* do */ for (i = 0; i <= nChains-1; i++){ ctChain[i] = 0; } for (i = 0; i <= nCands-1; i++){ { /* with */ ??? k0 = cand[i].seg[0].cvx; ??? ct0 = ctChain[k0]; ??? k1 = cand[i].seg[1].cvx; ??? ct1 = ctChain[k1]; /* do */ ct0++; ct1++; if (( (ct0 <= maxCands ) || ( ct1 <= maxCands) )){ if (( i > nKept )){ cand[nKept] = cand[i]; } nKept++; } } } nCands = nKept; } } void pz_candidate_find_similar_cands ( pz_candidate_vec_t *aCand, pz_candidate_vec_t *bCand, unsigned minSteps, unsigned maxAdjust, bool_vec_t *aOK /* (OUT) */ bool_vec_t *bOK, /* (OUT) */ bool_t printAMatched, /* (:= FALSE) */ bool_t printAUnmatched, /* (:= FALSE) */ bool_t printBMatched, /* (:= FALSE) */ bool_t printBUnmatched, /* (:= FALSE) */ bool_t printMatchedCands, /* (:= FALSE) */ bool_t printMatchedChains, /* (:= FALSE) */ ) [ CompareCandsLexChains ( pz_cand_t *a, pz_cand_t *b )-1..+1] = /* Compares the chain pair of "a" with the chain pair of "b", lexicographically. Returns 0 iff the two candidates belong to the same pair of chains. */ { { /* with */ ??? k0a = a.seg[0].cvx; ??? k1a = a.seg[1].cvx; ??? k0b = b.seg[0].cvx; ??? k1b = b.seg[1].cvx; /* do */ if (( k0a < k0b )){ return -1 }else if (( k0a > k0b )){ return +1 }else if (( k1a < k1b )){ return -1 }else if (( k1a > k1b )){ return +1 }else{ return 0; } } } /* CompareCandsLexChains */ void pz_candidate_print_candidate ( char *msg, char *tag, unsigned index, pz_cand_t *cand ) { fprintf(stderr, Fmt.Pad(msg & ":", 11, ' ', Fmt.Align.Left) & tag & "[" & FI(index,6) & "]\n" ); pz_candidate_print(stderr, cand = cand) } VAR aIni, bIni: unsigned; aFin, bFin: unsigned; cmp: [-1..+1]; VAR ra, rb: REF pz_match_t := NIL; /* Work areas for "pz_candidate.pz_candidate_overlap" */ { { /* with */ ??? aN = (aCand.ne); ??? bN = (bCand.ne); ??? ax = int_vec_new(aN)^; ??? bx = int_vec_new(bN)^; /* do */ pz_candidate_index_sort(aCand, ax, pz_candidate_lexically_better); pz_candidate_index_sort(bCand, bx, pz_candidate_lexically_better); /* Paranoid check of sorting: */ for (ai = 1; ai < (aCand.ne ); ai++){ if (( aIni > 0 )){ affirm(CompareCandsLexChains(aCand[ax[ai-1]], aCand[ax[ai]]) <= 0 ); } } for (bi = 1; bi < (bCand.ne ); bi++){ if (( bIni > 0 )){ affirm(CompareCandsLexChains(bCand[bx[bi-1]], bCand[bx[bi]]) <= 0 ); } } for ( aIndex = 0 TO (aCand.ne - 1) ){ aOK[aIndex] = FALSE; } for ( bIndex = 0 TO (bCand.ne - 1) ){ bOK[bIndex] = FALSE; } bIni = 0; aIni = 0; while ( bIni < bN ) || ( aIni < aN ){ /* Decide which should move: */ if (( aIni < (aCand.ne ) ) ANDAND ( bIni < (bCand.ne ) )){ cmp = CompareCandsLexChains(aCand[ax[aIni]], bCand[bx[bIni]]) }else if (( aIni > (aCand.ne - 1) ) ANDAND ( bIni < (bCand.ne ) )){ cmp = +1 }else if (( aIni < (aCand.ne ) ) ANDAND ( bIni > (bCand.ne - 1) )){ cmp = -1 }else{ affirm(FALSE ); } /* Move along: */ if (( cmp < 0 )){ if (( printAUnmatched )){ fprintf(stderr, "\n"); pz_candidate_print_candidate("unmatched", "a", ax[aIni], aCand[ax[aIni]]); fprintf(stderr, "\n"); } aIni++ }else if (( cmp > 0 )){ if (( printBUnmatched )){ fprintf(stderr, "\n"); pz_candidate_print_candidate("unmatched", "b", bx[bIni], bCand[bx[bIni]]); fprintf(stderr, "\n"); } bIni++ }else{ /* Found two candidates with same chain pairs: */ /* Locate the last "a" candidate "aFin-1" of this chain pair: */ aFin = aIni + 1; while ( aFin < aN ) ANDAND ( CompareCandsLexChains(aCand[ax[aIni]], aCand[ax[aFin]]) == 0 ){ aFin++; } /* Locate the last "b" candidate "bFin-1" of this chain pair: */ bFin = bIni+1; while ( bFin < bN ) ANDAND ( CompareCandsLexChains(bCand[bx[bIni]], bCand[bx[bFin]]) == 0 ){ bFin++; } /* Compare all "a" candidates against all "b" candidates: */ VAR nMatches: unsigned := 0; { for (ai = aIni; ai <= aFin-1; ai++){ { /* with */ ??? aC = aCand[ax[ai]]; /* do */ for (bi = bIni; bi <= bFin-1; bi++){ { /* with */ ??? bC = bCand[bx[bi]]; /* do */ { /* with */ ??? overlap = pz_candidate_overlap(bC, aC; maxAdjust := maxAdjust; ra := ra, rb := rb ); /* do */ if (( DebugOverlap )){ fprintf(stderr, "===== overlap == %s =====\n", Fmt.Int(overlap) ); pz_candidate_print(stderr, aC, pairing = FALSE); pz_candidate_print(stderr, bC, pairing = FALSE); } if (( overlap >= 2*minSteps )){ aOK[ax[ai]] = TRUE; bOK[bx[bi]] = TRUE; nMatches++; if (( printMatchedCands )){ fprintf(stderr, "\n"); fprintf(stderr, "overlap == %s\n", Fmt.Int(overlap) ); pz_candidate_print_candidate("match", "a", ax[ai], aCand[ax[ai]]); pz_candidate_print_candidate("match", "b", bx[bi], bCand[bx[bi]]); fprintf(stderr, "\n"); } } } } } } } if (( printMatchedChains ) ANDAND ( nMatches > 0 )){ { /* with */ ??? s = aCand[ax[aIni]].seg; /* do */ fprintf(stderr, "chains: " & FI(s[0].cvx,4) & " × " & FI(s[1].cvx,4)); } fprintf(stderr, ": " & FI(aFin - aIni,6) & " \"a\" cands, "); fprintf(stderr, FI(aFin - aIni,6) & " \"b\" cands,"); fprintf(stderr, FI(nMatches,6) & " pairing pairs.\n"); } } if (( printAMatched )){ for (ai = aIni; ai <= aFin-1; ai++){ if (( aOK[ax[ai]] )){ fprintf(stderr, "\n"); pz_candidate_print_candidate("matched", "a", ax[ai], aCand[ax[ai]]); fprintf(stderr, "\n"); } } } if (( printBMatched )){ for (bi = bIni; bi <= bFin-1; bi++){ if (( bOK[bx[bi]] )){ fprintf(stderr, "\n"); pz_candidate_print_candidate("matched", "b", bx[bi], bCand[bx[bi]]); fprintf(stderr, "\n"); } } } /* Start next chain pair: */ bIni = bFin; aIni = aFin; } } } } void pz_candidate_print_similarity_statistics ( FILE *wr, bool_vec_t *aOK, char *aName, bool_vec_t *bOK, char *bName ) unsigned pz_candidate_count_ok ( bool_vec_t *OK ) VAR n: unsigned := 0; { for (i = 0; i < (OK.ne ) ){ if (( OK[i] ); i++){ n++ } /*; } */ return n } void pz_candidate_pr_parm ( unsigned wd, char *name, char *x ) { fprintf(wr, Fmt.Pad(name, wd, ' ', Fmt.Align.Left) & " == " & x & "\n"); } { /* print statistics: */ { /* with */ ??? aN = (aOK.ne), aOKN = pz_candidate_count_ok(aOK); ??? bN = (bOK.ne), bOKN = pz_candidate_count_ok(bOK); ??? recall = FLOAT(bOKN,double)/FLOAT(MAX(1,bN),double); ??? precision = FLOAT(aOKN,double)/FLOAT(MAX(1,aN),double); ??? multiplicity = FLOAT(aOKN,double)/FLOAT(MAX(1,bOKN),double); /* do */ fprintf(wr, "\n"); pz_candidate_pr_parm(30, aName & " candidates", Fmt.Int(aN)); pz_candidate_pr_parm(30, bName & " candidates", Fmt.Int(bN)); pz_candidate_pr_parm(30, aName & " in " & bName, Fmt.Int(aOKN)); pz_candidate_pr_parm(30, aName & " not in " & bName, Fmt.Int(aN - aOKN)); pz_candidate_pr_parm(30, bName & " in " & aName, Fmt.Int(bOKN)); pz_candidate_pr_parm(30, bName & " not in " & aName, Fmt.Int(bN - bOKN)); fprintf(wr,"\n"); pz_candidate_pr_parm(12, "recall", FLR(recall, 6,4)); pz_candidate_pr_parm(12, "precision", FLR(precision, 6,4)); pz_candidate_pr_parm(12, "multiplicity", FLR(multiplicity, 6,4)); } fprintf(wr,"\n"); } void pz_candidate_merge_overlaps ( pz_candidate_vec_t *cand, unsigned *nCands, unsigned minSteps, unsigned maxAdjust ) VAR nKept: unsigned; /* Number of candidates kept. */ ini: unsigned; /* First kept candidate for this chain pair. */ ra, rb: REF pz_match_t = NULL; { /* Copy the first candidate to the kept part of "cand": */ ini = 0; nKept = 0; /* Process the remaining candidates: */ for (i = 0; i <= nCands; i++){ if (( nKept > 0 )){ if (( i == nCands ) || ( cand[i].seg[0].cvx != cand[nKept-1].seg[0].cvx ) || ( cand[i].seg[1].cvx != cand[nKept-1].seg[1].cvx )){ /* New pair; process all candidates of previous pair. */ /* Those candidates are "cand[ini..nKept-1]". */ pz_candidate_merge_pair_overlaps(cand, minSteps, maxAdjust, ini, /*IO*/ nKept, /*WORK*/ ra, rb ); /* Start copying next pair: */ ini = nKept; } } /* Save candidate for later merging: */ if (( i < nCands )){ if (( i > nKept )){ cand[nKept] = cand[i]; } nKept++; } } nCands = nKept; } void pz_candidate_merge_pair_overlaps ( pz_candidate_vec_t *cand, unsigned minSteps, unsigned maxAdjust, unsigned ini /* Actual index of first candidate in "cand" */ unsigned *lim, /* Index of first non-candidate in "cand" */ REF *pz_match_t ra, REF *pz_match_t rb, /* (WORK) */ ) /* Given a bunch of candidates for the same chain pair, "cand[ini..lim-1]", removes all duplicates and joins all overlapping candidtes, decrementing "lim" accordingly. */ VAR r, s: unsigned; { /* This code assumes that "dist(A, pz_candidate_join(B,C)) == min(dist(A,B), dist(A,C))". */ /* The assumption may not be precisely correct, but... */ r = ini; while ( r < lim ){ /* Invariant: "cand[0..r-1]" are well separated from all other cands. */ /* Try to condense "cand[r]" with the following ones: */ s = r + 1; while ( s < lim ){ if (( pz_candidate_overlap(cand[r], cand[s], maxAdjust, ra, rb) >= 2* minSteps )){ cand[r] = pz_candidate_join(cand[r], cand[s]); /* Fill the hole left by "cand[s]": */ if (( s < lim-1 )){ cand[s] = cand[lim-1]; } lim--; /* Now start over again, since "cand[r]" changed: */ s = r + 1 }else{ s++; } } /* We got to the end, so presumably "cand[r]" is now well-separated: */ r++; } } unsigned pz_candidate_overlap ( pz_cand_t *a, pz_cand_t *b, unsigned maxAdjust, REF *pz_match_t ra, REF *pz_match_t rb /* (WORK) Work areas */ ) { return pz_candidate_packed_match_overlap(a.pm, b.pm, a.seg, b.seg, maxAdjust, ra,rb) } void pz_candidate_realloc_match ( REF *pz_match_t r, unsigned np ) /* Reallocates "r^" if needed to store at least "np" elements. May discard the old contents. */ { if (( r == NULL )){ r = pz_match_new(np) }else if (( (r^.ne) < np )){ r = NEW(REF pz_match_t, max(2*(r^.ne), np)); } } unsigned pz_candidate_packed_match_overlap ( pz_match_packed_t *a, pz_match_packed_t *b, pz_segment_pair *aSeg, pz_segment_pair *bSeg, unsigned maxAdjust, REF *pz_match_t ra, REF *pz_match_t rb /* (WORK) Work area */ ) VAR na, nb: unsigned; { if (( a == NULL )){ na = max(aSeg[0].ns, aSeg[1].ns) }else{ na = a.np; } pz_candidate_realloc_match(ra, na); if (( b == NULL )){ nb = max(bSeg[0].ns, bSeg[1].ns) }else{ nb = b.np; } pz_candidate_realloc_match(rb, nb); { /* with */ ??? am = SUBARRAY(ra^, 0, na); ??? bm = SUBARRAY(rb^, 0, nb); /* do */ if (( a == NULL )){ pz_match_do_most_perfect(aSeg[0].ns, aSeg[1].ns, am) }else{ pz_match_do_unpack(a^, am); } if (( b == NULL )){ pz_match_do_most_perfect(bSeg[0].ns, bSeg[1].ns, bm) }else{ pz_match_do_unpack(b^, bm); } return pz_match_overlap(am, bm, aSeg, bSeg, maxAdjust); } } pz_cand_t pz_candidate_join ( pz_cand_t *a, pz_cand_t *b ) /* Joins two candidates, assuming that they belong to the same chain pair and they overlap. */ VAR c: pz_cand_t; oldSteps, newSteps: unsigned = 0; { /* Compute the union of the two candidates on each chain: */ for (j = 0; j <= 1; j++){ c.seg[j] = pz_segment_join(a.seg[j], b.seg[j]); oldSteps = oldSteps + (a.seg[j].ns + b.seg[j].ns) - 2; newSteps = newSteps + c.seg[j].ns; } affirm(newSteps <= oldSteps ); { /* with */ ??? r = ((double)newSteps)/((double)oldSteps); /* do */ c.length = r*(a.length + b.length); c.mismatch = 0.0e0; c.matchedLength = 0.0e0; c.pm = NULL; } return c } char *FLR ( double x, unsigned w, unsigned p ) { return Fmt.Pad(Fmt.LongReal(x, Fmt.Style.Fix, prec = p), w) } /* FLR */ char *FI ( unsigned x, unsigned w ) { return Fmt.Pad(Fmt.Int(x), w) } /* FI */ pz_candidate_t pz_candidate_map ( pz_candidate_t *cOld /* Candidate on old chains. */ pz_cand_t *t0Old, pz_cand_t *t1Old, /* Sampling times of old chains. */ pz_cand_t *t0New, pz_cand_t *t1New, /* Sampling times of new chains. */ r2_t stride, /* Period of "{t0Old,t1Old}" and "{t0New,t1New}". */ r2_t stepNew, /* Sampling step of new chains. */ i2_t extra, /* Extra steps to add to each end of each segment */ ) { { /* with */ ??? sOld = cOld.seg; sNew = pz_segment_pair{ pz_segment_expand(pz_double_chain_map_segment(sOld[0], t0Old, t0New, stride[0]), extra[0], extra[0]); pz_segment_expand(pz_double_chain_map_segment(sOld[1], t1Old, t1New, stride[1]), extra[1], extra[1]); } ??? pmNew = pz_double_chain_map_packed_match(cOld.pm, sOld, t0Old, t1Old, sNew, t0New, t1New, stride); totLengthNew = ((double)sNew[0].ns - 1) * stepNew[0] + ((double)sNew[1].ns - 1) * stepNew[1]; /* do */ { /* with */ mapCand = pz_candidate_t{ seg := sNew; mismatch := 0.0e0; length := totLengthNew; matchedLength := 0.0e0; pm := pmNew; } /* do */ return mapCand; } } } /* Copyright © 2001 Universidade Estadual de Campinas (UNICAMP). Authors: Helena C. G. Leitão and Jorge Stolfi. This file can be freely distributed, used, and modified, provided that this copyright and authorship notice is preserved, and that any modified versions are clearly marked as such. This software has NO WARRANTY of correctness or applicability for any purpose. Neither the authors nor their employers chall be held responsible for any losses or damages that may result from its use. */