/* Last edited on 2015-01-20 16:47:29 by stolfilocal */ /* Refines a list of candidate pairings, using DP on coded curvature chains. */ #include #include #include #include #include #include #include #include #include #include #include TYPE MismatchOptions == pz_mismatch.Options; Options = struct ??? { /* The input data: */ char *input; /* Input candidate file name. */ char *chainDir; /* Directry where to find chain files. */ char *chainPrefix; /* Invariant chain file name prefix. */ unsigned band; /* Nominal band width (lambda) for file names. */ double step; /* Sampling step. */ double curvSigma; /* "sigma" parameter for curvature encoding. */ unsigned maxInputCands; /* For debugging: truncate input at this count. */ /* Parameters of the refinement: */ MismatchOptions mp; /* Mismatch formula parameters. */ double minLength; /* Minimum length of pairing segments. */ double blurFactor; /* Corner broadening factor. */ double extraLength; /* Extra length to add at each candidate end. */ double maxRefineShift; /* Max align. shift during refinement. */ /* Parameters that control the pruning of the candidate list: */ unsigned maxChainCands; /* Max candidates per chain. */ unsigned maxPairCands; /* Max candidates per chain pair. */ /* Parameters that control the output: */ char *output; /* Output candidate file (without ".???"). */ } ???; TYPE VTable == double_vec_t; int main(int argc, char **argv ) { { /* with */ ??? o = pz_get_options(int argc, char **argv); ??? candData = pz_candidate_read(open_read(o.input & ".can", TRUE)); ??? oldCand = SUBARRAY(candData.c^, 0, MIN(o.maxInputCands, (candData.c^.ne))); ??? lambda = candData.lambda; ??? chainUsed = pz_candidate_chains_used(oldCand)^; chAllData = pz_symbol_chain_read_all( o.chainPrefix, o.band, ".cvc"; sel := chainUsed, header_only := FALSE; dir := o.chainDir ); ??? minLength = o.minLength - o.blurFactor * 2.0e0 * lambda; ??? newCand = RefineCandidates(oldCand, chAllData.chData^; maxShift := o.maxRefineShift; mp := o.mp; step := o.step; curvSigma := o.curvSigma; minLength := minLength; extraLength := o.extraLength; maxChainCands := o.maxChainCands; maxPairCands := o.maxPairCands )^; ??? wr = open_write(o.output & ".can", TRUE); /* do */ pz_candidate_write(wr, candData.cmt & RefineComments(o), newCand, lambda);; }; } /* Main */ Options pz_get_options(int argc, char **argv ) VAR o: Options; { argparser_t *pp = argparser_new(stderr, argc, argv); argparser_set_help(pp, PROG_NAME " version " PROG_VERS ", usage:\n" PROG_HELP); argparser_set_info(pp, PROG_INFO); argparser_process_help_info_options(pp); { /* with */ /* do */ TRY argparser_get_keyword(pp, "-input"); o.input = argparser_get_next(pp); argparser_get_keyword(pp, "-chainPrefix"); o.chainPrefix = argparser_get_next(pp); if (( argparser_keyword_present(pp, "-chainDir") )){ o.chainDir = argparser_get_next(pp) }else{ o.chainDir = "."; }; argparser_get_keyword(pp, "-band"); o.band = argparser_get_next_int(pp); argparser_get_keyword(pp, "-step"); o.step = argparser_get_next_double(pp, 0.0e0, 1024.0e0); argparser_get_keyword(pp, "-curvSigma"); o.curvSigma = argparser_get_next_double(pp, 0.0e0, 1024.0e0); argparser_get_keyword(pp, "-output"); o.output = argparser_get_next(pp); argparser_get_keyword(pp, "-minLength"); o.minLength = argparser_get_next_double(pp); argparser_get_keyword(pp, "-blurFactor"); o.blurFactor = argparser_get_next_double(pp, 0.0e0, 5.0e0); argparser_get_keyword(pp, "-extraLength"); o.extraLength = argparser_get_next_double(pp, 0.0e0); argparser_get_keyword(pp, "-maxRefineShift"); o.maxRefineShift = argparser_get_next_double(pp, 0.0e0); o.mp = pz_mismatch.ParseOptions(pp); if (( argparser_keyword_present(pp, "-maxInputCands") )){ o.maxInputCands = argparser_get_next_int(pp, 1) }else{ o.maxInputCands = (unsigned.ne - 1); }; if (( argparser_keyword_present(pp, "-maxChainCands") )){ o.maxChainCands = argparser_get_next_int(pp, 1) }else{ o.maxChainCands = (unsigned.ne - 1); }; if (( argparser_keyword_present(pp, "-maxPairCands") )){ o.maxPairCands = argparser_get_next_int(pp, 1, o.maxChainCands) }else{ o.maxPairCands = (unsigned.ne - 1); }; argparser_finish(pp); EXCEPT | ParseParams.Error ==> fprintf(stderr, "Usage: pz_refine_cands \\\n"); fprintf(stderr, " -input NAME \\\n"); fprintf(stderr, " -chainPrefix NAME [ -chainDir DIR ] \\\n"); fprintf(stderr, " -band NUMBER -step PIXELS \\\n"); fprintf(stderr, " -curvSigma NUMBER \\\n"); fprintf(stderr, " -output NAME \\\n"); fprintf(stderr, " -minLength PIXELS -blurFactor NUMBER \\\n"); fprintf(stderr, " -extraLength PIXELS \\\n"); fprintf(stderr, " -maxRefineShift PIXELS \\\n"); fprintf(stderr, " %s \\\n", pz_mismatch.OptionsHelp ); fprintf(stderr, " [ -maxInputCands NUMBER ] \\\n"); fprintf(stderr, " [ -maxChainCands NUMBER ] \\\n"); fprintf(stderr, " [ -maxPairCands NUMBER ]\n"); Process.Exit(1);; };; }; return o } /* GetOptions */ pz_candidate.List *RefineCandidates( pz_candidate.List *oldCands, ARRAY *OF pz_symbol_chain_read_data ch, double maxShift /* Max shift to try alignment using PD ??. */ MismatchOptions mp, /* Mismatch formula parameters. */ double step, /* Sampling step. */ double curvSigma, /* Sigma for curvature encoding. */ double minLength, /* Min required length of matched segments. */ double extraLength, /* Extra length to consider at both ends. */ unsigned maxChainCands, /* Max candidates per chain. */ unsigned maxPairCands, /* Max candidates per chain pair. */ ) VAR newCands : REF pz_candidate.List; rCost: REF pz_match_cost_matrix_t = NULL; minAvgDist: REF double_vec_t = NULL; VAR rca, rcb: REF pz_symbol_chain_t := NIL; VAR rmt: REF pz_match_t := NIL; { { /* with */ ??? nCands = (oldCands.ne); ??? start = CPUTime.Now(); ??? dch = pz_symbol_make_decode_table(curvSigma)^; ??? ech = pz_symbol_make_error_var_table(curvSigma)^; /* do */ /* fprintf(stderr, "minLength == %s\n", Fmt.LongReal(minLength) ); */ fprintf(stderr, "candidates before refine loop == %s\n", Fmt.Int(nCands) ); newCands = NEW(REF pz_candidate.List, nCands); for (i = 0; i <= nCands-1 ; i++){ { /* with */ ??? cOld = oldCands[i]; ??? cNew = RefineOneCandidate(cOld, ch; maxShift := maxShift; mp := mp; dch := dch, ech := ech; step := step; minLength := minLength; extraLength := extraLength; minAvgDist := minAvgDist; /*WORK*/ rCost := rCost; rca := rca, rcb := rcb; rmt := rmt ); /* do */ if (( cNew.mismatch <= 0.0e0 ) ANDAND ( cNew.length > minLength )){ newCands[nNew] = cNew; nNew++;; };; }; if (( (i MOD 80 == 0) )){ fprintf(stderr, "\n") ;}; fprintf(stderr, ".");; }; fprintf(stderr, "\n"); { /* with */ ??? t = CPUTime.Now() - start; /* do */ fprintf(stderr, "time for RefineCandidates == %s sec\n", FLR(t,0,1) ); }; fprintf(stderr, "candidates before pruning == %s\n", Fmt.Int(nNew) ); pz_candidate_sort(SUBARRAY(newCands^, 0, nNew), pz_candidate_pair_mismatch_better); pz_candidate_remove_duplicates(newCands^, nNew); fprintf(stderr, "distinct candidates == %s\n", Fmt.Int(nNew) ); pz_candidate_prune_cands_per_pair(newCands^, nNew, maxPairCands); fprintf(stderr, "candidates after pair pruning == %s\n", Fmt.Int(nNew) ); pz_candidate_sort(SUBARRAY(newCands^, 0, nNew), pz_candidate_mismatch_better); pz_candidate_prune_cands_per_chain(newCands^, nNew, maxChainCands); fprintf(stderr, "candidates after chain pruning == %s\n", Fmt.Int(nNew) ); { /* with */ ??? ct = NEW(REF pz_candidate.List, nNew); /* do */ ct^ = SUBARRAY(newCands^,0,nNew); return ct; };; } } /* RefineCandidates */ pz_candidate_t RefineOneCandidate( pz_candidate_t *cOld, ARRAY *OF pz_symbol_chain_read_data ch, double maxShift, /* Max shift to try alignment using PD ?? */ MismatchOptions mp, /* Mismatch formula parameters. */ VTable *dch, VTable *ech, double step, /* Sampling step */ double minLength, /* Min required length of matched segments */ double extraLength, /* Extra length to consider at both ends */ REF *double_vec_t minAvgDist, /* (WORK) */ REF *pz_match_cost_matrix_t rCost, REF *pz_symbol_chain_t rca, REF *pz_symbol_chain_t rcb, REF *pz_match_t rmt ) VAR newMatch: REF pz_match_t; mismatch: double; matchedLength: double; length: double; chStep: r2_t; maxAdjust: unsigned; sExp: pz_segment_pair; { /* Compute the expansion needed, in samples: */ { /* with */ ??? extra = CEILING(extraLength / step); /* do */ for (j = 0; j <= 1 ; j++){ chStep[j] = step; sExp[j] = pz_segment_expand(cOld.seg[j], extra, extra); }; }; { /* with */ ??? cvxa = cOld.seg[0].cvx; ??? NA = sExp[0].ns; ??? ca = SUBARRAY(ReallocateSymbolChain(rca, NA)^, 0, NA); ??? cvxb = cOld.seg[1].cvx; ??? NB = sExp[1].ns; ??? cb = SUBARRAY(ReallocateSymbolChain(rcb, NB)^, 0, NB); ??? step = 0.5e0 * (chStep[0] + chStep[1]); ??? minSteps = MAX(0, FLOOR((2.0e0 * minLength)/step)); ??? ctr = pz_match_pair{NA DIV 2, NB DIV 2}; ??? NM = PairingLength(cOld); ??? mtOld = SUBARRAY(ReallocatePairing(rmt, NM)^, 0, NM); /* do */ affirm((ca.ne) == NA ); affirm((cb.ne) == NB ); if (( minSteps < (NA)+(NB-1) )){ pz_symbol_chain_do_extract_segment(ch[cvxa].c^, sExp[0], ca); pz_symbol_chain_do_extract_segment(ch[cvxb].c^, sExp[1], cb); ObtainOldPairing(cOld, sExp, mtOld); /* fprintf(stderr, "minSteps == %s\n", Fmt.Int(minSteps) ); */ if (( minAvgDist == NULL ) || ( (minAvgDist^.ne) < NA + NB - 1 )){ minAvgDist = NEW(REF double_vec_t, NA + NB - 1); }; maxAdjust = CEILING(maxShift/step); if (( cOld.pm == NULL )){ { /* with */ ??? n0 = cOld.seg[0].ns, n1 = cOld.seg[1].ns; /* do */ maxAdjust = max(maxAdjust, abs(n0 - n1)); };; }; pz_symbol_chain_match_refine( ca, cb, decode = dch, errorVar = ech, step = step, mtOld = mtOld, maxAdjust = maxAdjust, maxDistSqr = mp.maxDistSqr, critDistSqr = mp.critDistSqr, skipDistSqr = mp.skipDistSqr, /*OUT*/ mt = newMatch, mismatch = mismatch, length = length, matchedLength = matchedLength, minAvgDisc = SUBARRAY(minAvgDist^, 0, NA+NB-1), /*WORK*/ rCost = rCost ); /* fprintf(stderr, "mismatch == %s\n", Fmt.LongReal(mismatch) ); fprintf(stderr, "matchedLength == %s\n", Fmt.LongReal(matchedLength) ); */ }else{ mismatch = 0.0e0; length = 0.0e0; matchedLength = 0.0e0; newMatch = pz_match_new(1); newMatch[0] = ctr; }; { /* with */ cExp = pz_candidate_t{ seg := sExp; mismatch := 0.0e0; length := 0.0e0; matchedLength := 0.0e0; pm := NIL }; /* do */ return pz_candidate_cutAndPack(cExp, newMatch^, mismatch = mismatch, length = length, matchedLength = matchedLength ); }; }; } /* RefineOneCandidate */ unsigned PairingLength( pz_candidate_t *c ) /* Length of the pairing "c.pm", if any; else length of `most perfect' pairing. */ { if (( c.pm != NULL )){ return c.pm.np }else{ return max(c.seg[0].ns, c.seg[1].ns); } } /* PairingLength */ void ObtainOldPairing( pz_candidate_t *cOld /* Original candidate. */ pz_segment_pair *sExp, /* Expanded segments. */ pz_match_t *mt, /* (OUT) Pairing rel. to "cExp. */ ) { if (( cOld.pm != NULL )){ pz_match_do_unpack(cOld.pm^, mt); }else{ /* Create a "most perfect" pairing for the two old chains: */ { /* with */ ??? n0 = cOld.seg[0].ns, n1 = cOld.seg[1].ns; /* do */ pz_match_do_most_perfect(n0, n1, mt);; };; }; /* Adjust pairing to account for candidate expansion: */ for (j = 0; j <= 1 ; j++){ { /* with */ ??? so = cOld.seg[j]; ??? sn = sExp[j]; ??? d = (so.ini - sn.ini) MOD sn.tot; /* do */ affirm(so.tot == sn.tot ); affirm(so.ns + d <= sn.ns ); for (k = 0; k < (mt.ne ) ; k++){ mt[k][j] = mt[k][j] + d ;}; }; } } /* ObtainOldPairing */ pz_symbol_chain_t *ReallocateSymbolChain( REF *pz_symbol_chain_t rc, unsigned N ) /* If "rc^" does not exist or has less than "N" elements, reallocates it (discarding its old contents). In any case returns the final "rc". */ { if (( rc == NULL )){ rc = pz_symbol_chain_new(N) }else if (( (rc^.ne) < N )){ rc = NEW(REF pz_symbol_chain_t, max(N, 2*(rc^.ne))); }; return rc } /* ReallocateSymbolChain */ pz_match_t *ReallocatePairing( REF *pz_match_t rmt, unsigned N ) /* If "rmt^" does not exist or has less than "N" elements, reallocates it (discarding its old contents). In any case returns the final "rmt". */ { if (( rmt == NULL )){ rmt = pz_match_new(N) }else if (( (rmt^.ne) < N )){ rmt = NEW(REF pz_match_t, max(N, 2*(rmt^.ne))); }; return rmt } /* ReallocatePairing */ char *RefineComments( Options *o ) { { /* with */ ??? wr = NEW(TextFILE *).init(); /* do */ fprintf(wr, "pz_refine_cands: \n"); fprintf(wr, " input: %s\n", o.input ); fprintf(wr, " chainDir: %s\n", o.chainDir ); fprintf(wr, " chainPrefix: %s\n", o.chainPrefix ); fprintf(wr, " band: %s\n", Fmt.Int(o.band) ); fprintf(wr, " curvSigma: %s\n", FLR(o.curvSigma, 8,4) ); fprintf(wr, " critDist: %s\n", FLR(sqrt(o.mp.critDistSqr), 8,4) ); fprintf(wr, " skipDist: %s\n", FLR(sqrt(o.mp.skipDistSqr), 8,4) ); fprintf(wr, " maxDist: %s\n", FLR(sqrt(o.mp.maxDistSqr), 8,4) ); fprintf(wr, " minLength: %s\n", FLR(o.minLength, 8,4) ); fprintf(wr, " blurFactor: %s\n", FLR(o.blurFactor, 8,4) ); fprintf(wr, " extraLength: %s\n", FLR(o.extraLength, 8,4) ); fprintf(wr, " maxRefineShift: %s\n", FLR(o.maxRefineShift, 8,4) ); fprintf(wr, " maxChainCands: %s\n", Fmt.Int(o.maxChainCands) ); fprintf(wr, " maxPairCands: %s\n", Fmt.Int(o.maxPairCands) ); fprintf(wr, " output: %s\n", o.output ); return(TextFILE *oText(wr)); } /* ){ */; } /* RefineComments */ char *FLR( double x, unsigned w, unsigned d ) { return Fmt.Pad(Fmt.LongReal(x, prec = d, style = Fmt.Style.Fix), w) } /* FLR */ { /* Copyright © 2001 Universidade Estadual de Campinas (UNICAMP). Authors: Helena C. G. Leitão and Jorge Stolfi. This file can be freely distributed, used, and modified, provided that this copyright and authorship notice is preserved, and that any modified versions are clearly marked as such. This software has NO WARRANTY of correctness or applicability for any purpose. Neither the authors nor their employers chall be held responsible for any losses or damages that may result from its use. */