MODULE PZRefineCands EXPORTS Main; (* Last edited on 1999-11-15 01:29:57 by hcgl *) (* Refines a list of candidate matchings, using DP on coded curvature chains. *) IMPORT ParseParams, FileRd, FileWr, TextWr, Wr, Fmt, CPUTime; IMPORT Process, OSError, Thread, Stdio; IMPORT PZSymbol, PZSymbolChain, PZCandidate; IMPORT PZSegment, PZMismatch, PZMatch; FROM Math IMPORT sqrt; FROM Stdio IMPORT stderr; FROM PZTypes IMPORT LONG, LONGS, NAT; <* FATAL Wr.Failure, Thread.Alerted, OSError.E *> TYPE MismatchOptions = PZMismatch.Options; Options = RECORD (* The input data: *) input: TEXT; (* Input candidate file name. *) chainDir: TEXT; (* Directry where to find chain files. *) chainPrefix: TEXT; (* Invariant chain file name prefix. *) band: NAT; (* Nominal band width (lambda) for file names. *) step: LONG; (* Sampling step. *) curvSigma: LONG; (* "sigma" parameter for curvature encoding. *) maxInputCands: NAT; (* For debugging: truncate input at this count. *) (* Parameters of the refinement: *) mp: MismatchOptions; (* Mismatch formula parameters. *) minLength: LONG; (* Minimum length of matching segments. *) blurFactor: LONG; (* Corner broadening factor. *) extraLength: LONG; (* Extra length to add at each candidate end. *) maxRefineShift: LONG; (* Max shift during refinement of the matching. *) (* Parameters that control the pruning of the candidate list: *) maxCurveCands: NAT; (* Max candidates per curve. *) maxPairCands: NAT; (* Max candidates per curve pair. *) (* Parameters that control the output: *) output: TEXT; (* Output candidate file (without ".???"). *) END; TYPE VTable = PZSymbol.Table; PROCEDURE Main() = BEGIN WITH o = GetOptions(), candData = PZCandidate.Read(FileRd.Open(o.input & ".can")), oldCand = SUBARRAY(candData.c^, 0, MIN(o.maxInputCands, NUMBER(candData.c^))), lambda = candData.lambda, curves = PZCandidate.GetCurves(oldCand)^, chData = PZSymbolChain.ReadAll( o.chainPrefix, o.band, ".cvc", sel := curves, headerOnly := FALSE, dir := o.chainDir ), minLength = o.minLength - o.blurFactor * 2.0d0 * lambda, newCand = RefineCandidates(oldCand, chData.chain^, maxShift := o.maxRefineShift, mp := o.mp, step := o.step, curvSigma := o.curvSigma, minLength := minLength, extraLength := o.extraLength, maxCurveCands := o.maxCurveCands, maxPairCands := o.maxPairCands )^, wr = FileWr.Open(o.output & ".can") DO PZCandidate.Write(wr, candData.cmt & RefineComments(o), newCand, lambda); END; END Main; PROCEDURE GetOptions(): Options = VAR o: Options; BEGIN WITH pp = NEW(ParseParams.T).init(stderr) DO TRY pp.getKeyword("-input"); o.input := pp.getNext(); pp.getKeyword("-chainPrefix"); o.chainPrefix := pp.getNext(); IF pp.keywordPresent("-chainDir") THEN o.chainDir := pp.getNext() ELSE o.chainDir := "." END; pp.getKeyword("-band"); o.band := pp.getNextInt(); pp.getKeyword("-step"); o.step := pp.getNextLongReal(0.0d0, 1024.0d0); pp.getKeyword("-curvSigma"); o.curvSigma := pp.getNextLongReal(0.0d0, 1024.0d0); pp.getKeyword("-output"); o.output := pp.getNext(); pp.getKeyword("-minLength"); o.minLength := pp.getNextLongReal(); pp.getKeyword("-blurFactor"); o.blurFactor := pp.getNextLongReal(0.0d0, 5.0d0); pp.getKeyword("-extraLength"); o.extraLength := pp.getNextLongReal(0.0d0); pp.getKeyword("-maxRefineShift"); o.maxRefineShift := pp.getNextLongReal(0.0d0); o.mp := PZMismatch.ParseOptions(pp); IF pp.keywordPresent("-maxInputCands") THEN o.maxInputCands := pp.getNextInt(1) ELSE o.maxInputCands := LAST(NAT) END; IF pp.keywordPresent("-maxCurveCands") THEN o.maxCurveCands := pp.getNextInt(1) ELSE o.maxCurveCands := LAST(NAT) END; IF pp.keywordPresent("-maxPairCands") THEN o.maxPairCands := pp.getNextInt(1, o.maxCurveCands) ELSE o.maxPairCands := LAST(NAT) END; pp.finish(); EXCEPT | ParseParams.Error => Wr.PutText(stderr, "Usage: PZRefineCands \\\n"); Wr.PutText(stderr, " -input NAME \\\n"); Wr.PutText(stderr, " -chainPrefix NAME [ -chainDir DIR ] \\\n"); Wr.PutText(stderr, " -band NUMBER -step PIXELS \\\n"); Wr.PutText(stderr, " -curvSigma NUMBER \\\n"); Wr.PutText(stderr, " -output NAME \\\n"); Wr.PutText(stderr, " -minLength PIXELS -blurFactor NUMBER \\\n"); Wr.PutText(stderr, " -extraLength PIXELS \\\n"); Wr.PutText(stderr, " -maxRefineShift PIXELS \\\n"); Wr.PutText(stderr, " " & PZMismatch.OptionsHelp & " \\\n"); Wr.PutText(stderr, " [ -maxInputCands NUMBER ] \\\n"); Wr.PutText(stderr, " [ -maxCurveCands NUMBER ] \\\n"); Wr.PutText(stderr, " [ -maxPairCands NUMBER ]\n"); Process.Exit(1); END; END; RETURN o END GetOptions; PROCEDURE RefineCandidates( READONLY oldCands: PZCandidate.List; READONLY ch: ARRAY OF PZSymbolChain.ReadData; maxShift: LONG; (* Max shift to try alignment using PD ??. *) mp: MismatchOptions; (* Mismatch formula parameters. *) step: LONG; (* Sampling step. *) curvSigma: LONG; (* Sigma for curvature encoding. *) minLength: LONG; (* Min required length of matched segments. *) extraLength: LONG; (* Extra length to consider at both ends. *) maxCurveCands: NAT; (* Max candidates per curve. *) maxPairCands: NAT; (* Max candidates per curve pair. *) ) : REF PZCandidate.List = VAR nNew: NAT := 0; (* Refined candidate count *) newCands : REF PZCandidate.List; rCost: REF PZMatch.CostMatrix := NIL; minAvgDist: REF LONGS := NIL; VAR rca, rcb: REF PZSymbolChain.T := NIL; VAR rmt: REF PZMatch.T := NIL; BEGIN WITH nCands = NUMBER(oldCands), start = CPUTime.Now(), dch = PZSymbol.MakeDecodeTable(curvSigma)^, ech = PZSymbol.MakeErrorVarTable(curvSigma)^ DO (* Wr.PutText(stderr, "minLength = " & Fmt.LongReal(minLength) & "\n"); *) Wr.PutText(stderr, "candidates before refine loop = " & Fmt.Int(nCands) & "\n"); newCands := NEW(REF PZCandidate.List, nCands); FOR i := 0 TO nCands-1 DO WITH cOld = oldCands[i], cNew = RefineOneCandidate(cOld, ch, maxShift := maxShift, mp := mp, dch := dch, ech := ech, step := step, minLength := minLength, extraLength := extraLength, minAvgDist := minAvgDist, (*WORK*) rCost := rCost, rca := rca, rcb := rcb, rmt := rmt ) DO IF cNew.mismatch <= 0.0d0 AND cNew.length > minLength THEN newCands[nNew] := cNew; INC(nNew); END; END; IF (i MOD 80 = 0) THEN Wr.PutText(stderr, "\n") END; Wr.PutText(stderr, "."); END; Wr.PutText(stderr, "\n"); WITH t = CPUTime.Now() - start DO Wr.PutText(stderr, "time for RefineCandidates = " & FLR(t,0,1) & " sec\n") END; Wr.PutText(stderr, "candidates before pruning = " & Fmt.Int(nNew) & "\n"); PZCandidate.Sort(SUBARRAY(newCands^, 0, nNew), PZCandidate.PairMismatchBetter); PZCandidate.RemoveDuplicates(newCands^, nNew); Wr.PutText(stderr, "distinct candidates = " & Fmt.Int(nNew) & "\n"); PZCandidate.PruneCandsPerPair(newCands^, nNew, maxPairCands); Wr.PutText(stderr, "candidates after pair pruning = " & Fmt.Int(nNew) & "\n"); PZCandidate.Sort(SUBARRAY(newCands^, 0, nNew), PZCandidate.MismatchBetter); PZCandidate.PruneCandsPerCurve(newCands^, nNew, maxCurveCands); Wr.PutText(stderr, "candidates after curve pruning = " & Fmt.Int(nNew) & "\n"); WITH ct = NEW(REF PZCandidate.List, nNew) DO ct^ := SUBARRAY(newCands^,0,nNew); RETURN ct END; END END RefineCandidates; PROCEDURE RefineOneCandidate( READONLY cOld: PZCandidate.T; READONLY ch: ARRAY OF PZSymbolChain.ReadData; maxShift: LONG; (* Max shift to try alignment using PD ?? *) mp: MismatchOptions; (* Mismatch formula parameters. *) READONLY dch: VTable; READONLY ech: VTable; step: LONG; (* Sampling step *) minLength: LONG; (* Min required length of matched segments *) extraLength: LONG; (* Extra length to consider at both ends *) VAR minAvgDist: REF LONGS; (*WORK*) VAR rCost: REF PZMatch.CostMatrix; VAR rca, rcb: REF PZSymbolChain.T; VAR rmt: REF PZMatch.T; ): PZCandidate.T = VAR newMatch: REF PZMatch.T; mismatch: LONG; matchedLength: LONG; length: LONG; chStep: ARRAY [0..1] OF LONG; maxAdjust: NAT; sExp: PZSegment.Pair; BEGIN (* Compute the expansion needed, in samples: *) WITH extra = CEILING(extraLength / step) DO FOR j := 0 TO 1 DO chStep[j] := step; sExp[j] := PZSegment.Expand(cOld.seg[j], extra, extra) END END; WITH cvxa = cOld.seg[0].cvx, NA = sExp[0].ns, ca = SUBARRAY(ReallocateSymbolChain(rca, NA)^, 0, NA), cvxb = cOld.seg[1].cvx, NB = sExp[1].ns, cb = SUBARRAY(ReallocateSymbolChain(rcb, NB)^, 0, NB), step = 0.5d0 * (chStep[0] + chStep[1]), minSteps = MAX(0, FLOOR((2.0d0 * minLength)/step)), ctr = PZMatch.Pair{NA DIV 2, NB DIV 2}, NM = MatchingLength(cOld), mtOld = SUBARRAY(ReallocateMatching(rmt, NM)^, 0, NM) DO <* ASSERT NUMBER(ca) = NA *> <* ASSERT NUMBER(cb) = NB *> IF minSteps <= (NA-1)+(NB-1) THEN PZSymbolChain.DoExtractSegment(ch[cvxa].c^, sExp[0], ca); PZSymbolChain.DoExtractSegment(ch[cvxb].c^, sExp[1], cb); ObtainOldMatching(cOld, sExp, mtOld); (* Wr.PutText(stderr, "minSteps = " & Fmt.Int(minSteps) & "\n"); *) IF minAvgDist = NIL OR NUMBER(minAvgDist^) < NA + NB - 1 THEN minAvgDist := NEW(REF LONGS, NA + NB - 1) END; maxAdjust := CEILING(maxShift/step); IF cOld.pm = NIL THEN WITH n0 = cOld.seg[0].ns, n1 = cOld.seg[1].ns DO maxAdjust := MAX(maxAdjust, ABS(n0 - n1)) END; END; PZSymbolChain.RefineMatch( ca, cb, decode := dch, errorVar := ech, step := step, mtOld := mtOld, maxAdjust := maxAdjust, maxDistSqr := mp.maxDistSqr, critDistSqr := mp.critDistSqr, skipDistSqr := mp.skipDistSqr, (*OUT*) mt := newMatch, mismatch := mismatch, length := length, matchedLength := matchedLength, minAvgDisc := SUBARRAY(minAvgDist^, 0, NA+NB-1), (*WORK*) rCost := rCost ); (* Wr.PutText(stderr, "mismatch = " & Fmt.LongReal(mismatch) & "\n"); Wr.PutText(stderr, "matchedLength = " & Fmt.LongReal(matchedLength) & "\n"); *) ELSE mismatch := 0.0d0; length := 0.0d0; matchedLength := 0.0d0; newMatch := NEW(REF PZMatch.T, 1); newMatch[0] := ctr END; WITH cExp = PZCandidate.T{ seg := sExp, mismatch := 0.0d0, length := 0.0d0, matchedLength := 0.0d0, pm := NIL } DO RETURN PZCandidate.TrimAndPack(cExp, newMatch^, mismatch := mismatch, length := length, matchedLength := matchedLength ) END END; END RefineOneCandidate; PROCEDURE MatchingLength(READONLY c: PZCandidate.T): NAT = (* Length of the matching "c.pm", if any; else length of `most perfect' matching. *) BEGIN IF c.pm # NIL THEN RETURN c.pm.np ELSE RETURN MAX(c.seg[0].ns, c.seg[1].ns) END END MatchingLength; PROCEDURE ObtainOldMatching( READONLY cOld: PZCandidate.T; (* Original candidate. *) READONLY sExp: PZSegment.Pair; (* Expanded segments. *) VAR (*OUT*) mt: PZMatch.T; (* Matching rel. to "cExp. *) ) = BEGIN IF cOld.pm # NIL THEN PZMatch.DoUnpack(cOld.pm^, mt); ELSE (* Create a "most perfect" matching for the two old chains: *) WITH n0 = cOld.seg[0].ns, n1 = cOld.seg[1].ns DO PZMatch.DoMostPerfect(n0, n1, mt); END; END; (* Adjust matching to account for candidate expansion: *) FOR j := 0 TO 1 DO WITH so = cOld.seg[j], sn = sExp[j], d = (so.ini - sn.ini) MOD sn.tot DO <* ASSERT so.tot = sn.tot *> <* ASSERT so.ns + d <= sn.ns *> FOR k := 0 TO LAST(mt) DO mt[k][j] := mt[k][j] + d END END END END ObtainOldMatching; PROCEDURE ReallocateSymbolChain(VAR rc: REF PZSymbolChain.T; N: NAT): REF PZSymbolChain.T = (* If "rc^" does not exist or has less than "N" elements, reallocates it (discarding its old contents). In any case returns the final "rc". *) BEGIN IF rc = NIL THEN rc := NEW(REF PZSymbolChain.T, N) ELSIF NUMBER(rc^) < N THEN rc := NEW(REF PZSymbolChain.T, MAX(N, 2*NUMBER(rc^))) END; RETURN rc END ReallocateSymbolChain; PROCEDURE ReallocateMatching(VAR rmt: REF PZMatch.T; N: NAT): REF PZMatch.T = (* If "rmt^" does not exist or has less than "N" elements, reallocates it (discarding its old contents). In any case returns the final "rmt". *) BEGIN IF rmt = NIL THEN rmt := NEW(REF PZMatch.T, N) ELSIF NUMBER(rmt^) < N THEN rmt := NEW(REF PZMatch.T, MAX(N, 2*NUMBER(rmt^))) END; RETURN rmt END ReallocateMatching; PROCEDURE RefineComments(READONLY o: Options):TEXT= BEGIN WITH wr = NEW(TextWr.T).init() DO Wr.PutText(wr, "PZRefineCands: \n"); Wr.PutText(wr, " input: " & o.input & "\n"); Wr.PutText(wr, " chainDir: " & o.chainDir & "\n"); Wr.PutText(wr, " chainPrefix: " & o.chainPrefix & "\n"); Wr.PutText(wr, " band: " & Fmt.Int(o.band) & "\n"); Wr.PutText(wr, " curvSigma: " & FLR(o.curvSigma, 8,4) & "\n"); Wr.PutText(wr, " critDist: " & FLR(sqrt(o.mp.critDistSqr), 8,4) & "\n"); Wr.PutText(wr, " skipDist: " & FLR(sqrt(o.mp.skipDistSqr), 8,4) & "\n"); Wr.PutText(wr, " maxDist: " & FLR(sqrt(o.mp.maxDistSqr), 8,4) & "\n"); Wr.PutText(wr, " minLength: " & FLR(o.minLength, 8,4) & "\n"); Wr.PutText(wr, " blurFactor: " & FLR(o.blurFactor, 8,4) & "\n"); Wr.PutText(wr, " extraLength: " & FLR(o.extraLength, 8,4) & "\n"); Wr.PutText(wr, " maxRefineShift: " & FLR(o.maxRefineShift, 8,4) & "\n"); Wr.PutText(wr, " maxCurveCands: " & Fmt.Int(o.maxCurveCands) & "\n"); Wr.PutText(wr, " maxPairCands: " & Fmt.Int(o.maxPairCands) & "\n"); Wr.PutText(wr, " output: " & o.output & "\n"); RETURN(TextWr.ToText(wr)) END (* DO *); END RefineComments; PROCEDURE FLR(x: LONG; w, d: NAT): TEXT = BEGIN RETURN Fmt.Pad(Fmt.LongReal(x, prec := d, style := Fmt.Style.Fix), w) END FLR; BEGIN Main() END PZRefineCands. (* Copyright © 2001 Universidade Estadual de Campinas (UNICAMP). Authors: Helena C. G. Leitão and Jorge Stolfi. This file can be freely distributed, used, and modified, provided that this copyright and authorship notice is preserved, and that any modified versions are clearly marked as such. This software has NO WARRANTY of correctness or applicability for any purpose. Neither the authors nor their employers chall be held responsible for any losses or damages that may result from its use. *)