MODULE PZGetRandomCands EXPORTS Main; (* Generate random candidates from a set of contours. *) (* Last edited on 1999-12-30 11:01:06 by hcgl *) (* Reads a bunch of encoded curvature chains, and generates a set of random candidates from them, restricted to a given set of segments. Each candidate will consist of two segments of approximately the same length, on two distinct contours, the second one being implicitly reversed. The length of the two segments is restricted to a range specified by the user. (Actually, the specified lengths are automatically reduced to account corner blurring.) IMPORTANT: The input segments should probably wnot include pieces of straight segments, or of blurred corners. Check the parameters of "PZMapSegs". *) IMPORT ParseParams, Process, FileRd, FileWr, Wr, Random; IMPORT OSError, Thread, Text, Stdio, Fmt; IMPORT PZSymbolChain, PZSymbol, PZProc, PZCandidate, PZSegment; FROM Stdio IMPORT stderr; FROM PZTypes IMPORT LONG, NAT; <* FATAL Wr.Failure, Thread.Alerted, OSError.E *> TYPE Options = RECORD segments: TEXT; (* Filename of given segments. *) chainDir: TEXT; (* Directory where to find chain data. *) chainPrefix: TEXT; (* Invariant chain file name prefix. *) nCurves: NAT; (* Number of curves. *) output: TEXT; (* Output candidate file name. *) band: NAT; (* Filtering "band" (wavelength parameter). *) step: LONG; (* Sampling step. *) maxCands: NAT; (* Max number of candidates to generate *) minLength: LONG; (* Min length of matched segments. *) maxLength: LONG; (* Max length of matched segments. *) blurFactor: LONG; (* Corner broadening factor. *) printCands: NAT; (* Max candidates to print. *) END; TYPE Segments = PZSegment.List; SegmentPair = ARRAY [0..1] OF PZSegment.T; SegData = PZSegment.ReadData; SegmentPairs = ARRAY OF SegmentPair; Candidates = PZCandidate.List; Chains = ARRAY OF PZSymbolChain.ReadData; VTable = PZSymbol.Table; PROCEDURE Main() = BEGIN WITH o = GetOptions(), band = o.band, lambda = FLOAT(band, LONG), blurAmount = o.blurFactor * lambda, minLength = EnsurePositive(o.minLength - 2.0d0 * blurAmount), maxLength = EnsurePositive(o.maxLength - 2.0d0 * blurAmount), segData = ReadSegments(o.segments), chData = ReadChains(o.chainDir, o.chainPrefix, o.nCurves, band), sigma = GetSigma(chData), cand = GenerateRandomCandidates( seg := segData.s^, ch := chData.chain^, sigma := sigma, step := o.step, minLength := minLength, maxLength := maxLength, maxCands := o.maxCands )^ DO WITH wr = FileWr.Open(o.output & ".can") DO PZCandidate.Write(wr, CandComment(o, segData.cmt, chData.cmt), cand, lambda) END; PrintCandidates(cand, o.printCands); END; END Main; PROCEDURE EnsurePositive(length: LONG): LONG = BEGIN IF length < 0.0d0 THEN Wr.PutText(stderr, "warning: requested candidate length too small for this scale\n"); END; RETURN MAX(1.0d0, length) END EnsurePositive; PROCEDURE GetSigma(READONLY chData: PZSymbolChain.ReadAllData): LONG = BEGIN IF chData.sigmaMax > 0.0d0 THEN <* ASSERT chData.sigmaMax = chData.sigmaMin *> RETURN chData.sigmaMax ELSE (* No chains, return any sensible value: *) RETURN 1.0d0 END END GetSigma; PROCEDURE GetOptions(): Options = VAR o: Options; BEGIN WITH pp = NEW(ParseParams.T).init(stderr) DO TRY pp.getKeyword("-segments"); o.segments := pp.getNext(); pp.getKeyword("-chainPrefix"); o.chainPrefix := pp.getNext(); IF pp.keywordPresent("-chainDir") THEN o.chainDir := pp.getNext() ELSE o.chainDir := "." END; pp.getKeyword("-nCurves"); o.nCurves := pp.getNextInt(); pp.getKeyword("-band"); o.band := pp.getNextInt(); pp.getKeyword("-step"); o.step := pp.getNextLongReal(0.0d0); pp.getKeyword("-output"); o.output := pp.getNext(); pp.getKeyword("-minLength"); o.minLength := pp.getNextLongReal(0.0d0); pp.getKeyword("-maxLength"); o.maxLength := pp.getNextLongReal(o.minLength); pp.getKeyword("-blurFactor"); o.blurFactor := pp.getNextLongReal(0.0d0); pp.getKeyword("-maxCands"); o.maxCands := pp.getNextInt(1); IF pp.keywordPresent("-printCands") THEN o.printCands := pp.getNextInt(1) ELSE o.printCands := 1000 END; pp.finish(); EXCEPT | ParseParams.Error => Wr.PutText(stderr, "Usage: PZGetRandomCands \\\n"); Wr.PutText(stderr, " -segments FILE \\\n"); Wr.PutText(stderr, " [ -chainDir DIR ] -chainPrefix NAME \\\n"); Wr.PutText(stderr, " -nCurves NUMBER \\\n"); Wr.PutText(stderr, " -band NUMBER -step NUMBER \\\n"); Wr.PutText(stderr, " -output NAME \\\n"); Wr.PutText(stderr, " -minLength NUMBER -maxlength NUMBER \\\n"); Wr.PutText(stderr, " -blurFactor NUMBER \\\n"); Wr.PutText(stderr, " -maxCands NUMBER \\\n"); Wr.PutText(stderr, " [ -printCands NUMBER ]\n"); Process.Exit(1); END; END; RETURN o END GetOptions; PROCEDURE GenerateRandomCandidates( READONLY seg: Segments; (* Segments to sample & match. *) READONLY ch: Chains; (* Chain data. *) sigma: LONG; (* Encoding parameter for "ch". *) step: LONG; (* Sampling step. *) minLength: LONG; (* Min length of candidate. *) maxLength: LONG; (* Max length of candidate. *) maxCands: NAT; (* Max number of candidates desired. *) ): REF Candidates = BEGIN WITH rnd = NEW(Random.Default).init(fixed := TRUE), dch = PZSymbol.MakeDecodeTable(sigma)^, ech = PZSymbol.MakeErrorVarTable(sigma)^, minSamples = CEILING(minLength/step) + 1, maxSamples = MAX(minSamples, FLOOR(maxLength/step) + 1), sp = EnumSegmentPairs(rnd, seg, minSamples, maxSamples)^, nCands = MIN(maxCands, NUMBER(sp)), rCand = NEW(REF Candidates, nCands), cand = rCand^ DO FOR i := 0 TO nCands-1 DO cand[i] := PickCandidateInSegmentPair( rnd, sp[i], ch, dch, ech, minSamples, maxSamples, step ) END; PZCandidate.Sort(cand, PZCandidate.LexicallyBetter); RETURN rCand END END GenerateRandomCandidates; PROCEDURE EnumSegmentPairs( rnd: Random.T; (* Fountain of surprises. *) READONLY seg: Segments; (* Segments to pick. *) minSamples: NAT; (* Min number of steps per candidate. *) maxSamples: NAT; (* Max number of steps per candidate. *) ): REF SegmentPairs = (* Returns a list of all candidates (actually, segment pairs) that can be formed from the segments "seg" that have "nSteps" or more steps on both sides. Each qualifying candidate is chopped into half-overlapping pieces with "maxSteps" steps, and the list is then scrambled. *) BEGIN WITH avgSamples = (minSamples + maxSamples) DIV 2 DO Wr.PutText(stderr, "generating list of all basic cands...\n"); WITH sp = EnumPairsFromSegments(seg, minSamples := avgSamples)^, rsc = ChopSegmentPairs(sp, maxSamples), sc = rsc^ DO RandomizeSegmentPairs(rnd, sc); RETURN rsc END END; END EnumSegmentPairs; PROCEDURE EnumPairsFromSegments( READONLY seg: Segments; (* Segments to pick. *) minSamples: NAT; (* Min samples in candidate *) ): REF SegmentPairs = (* Selects all segments from "seg" that have at least "minSamples" samples, and returns a list of all pairs from the selected set that belong to different curves. The two segments are returned with the same direction ("rev = FALSE"). *) VAR r: REF SegmentPairs := NIL; nPairs: NAT := 0; BEGIN FOR i := 0 TO LAST(seg) DO WITH si = seg[i] DO IF si.ns >= minSamples THEN FOR j := 0 TO i-1 DO WITH sj = seg[j] DO IF sj.ns >= minSamples AND si.cvx # sj.cvx THEN IF r = NIL OR nPairs >= NUMBER(r^) THEN WITH s = NEW(REF SegmentPairs, 2*nPairs + 100) DO IF r # NIL THEN SUBARRAY(s^,0,nPairs) := r^ END; r := s END END; WITH p = r^[nPairs] DO p := SegmentPair{si, sj}; p[0].rev := FALSE; p[1].rev := FALSE; END; INC(nPairs) END END END END END END; Wr.PutText(stderr, "found " & FI(nPairs,1) & " segment pairs of sufficient size\n"); WITH s = NEW(REF SegmentPairs, nPairs) DO IF r # NIL THEN s^ := SUBARRAY(r^,0,nPairs) END; RETURN s END END EnumPairsFromSegments; PROCEDURE ChopSegmentPairs( READONLY sp: SegmentPairs; nSamples: NAT; ): REF SegmentPairs = (* Breaks each pair from the list "sp" into half-overlapping pieces of size "nSamples". Ideally, should break each segment of the pair into as many pieces as would fit, and pair the pieces in all possible ways. However this may generate far too many segments, and would allow the same segment to appear several times in the output. Moreover, the probability of an original candidate to be sampled would be proportional to the product of its segment's lengths, which may not be a good idea. Thus instead we break the two segments into the same number of pieces, and pair them sequentially. *) VAR sChop: SegmentPair; skip: ARRAY [0..1] OF NAT; base: ARRAY [0..1] OF NAT; r: REF SegmentPairs := NIL; nPairs: NAT := 0; BEGIN Wr.PutText(stderr, "chopping candidates into pieces with " & FI(nSamples,1) & " samples...\n"); FOR i := 0 TO LAST(sp) DO WITH sBig = sp[i], minSkip = (nSamples + 1) DIV 2, candSamples = MIN(sBig[0].ns, sBig[1].ns) DO IF candSamples >= nSamples THEN WITH nPieces = (candSamples - nSamples) DIV minSkip + 1 DO <* ASSERT nPieces >= 1 *> FOR j := 0 TO 1 DO WITH sCj = sChop[j], sBj = sBig[j] DO sCj := sBj; skip[j] := (sBj.ns - nSamples) DIV MAX(1, nPieces-1); sCj.ns := nSamples; base[j] := (sBj.ns - sCj.ns - (nPieces-1)*skip[j]) DIV 2; END; <* ASSERT nPieces = 1 OR skip[j] >= minSkip *> END; FOR k := 0 TO nPieces-1 DO FOR j := 0 TO 1 DO WITH sCj = sChop[j], sBj = sBig[j] DO sCj.ini := sBj.ini + base[j] + k*skip[j]; <* ASSERT sCj.ini >= sBj.ini *> <* ASSERT sCj.ini + sCj.ns <= sBj.ini + sBj.ns *> END END; IF r = NIL OR nPairs = NUMBER(r^) THEN WITH s = NEW(REF SegmentPairs, 2*nPairs+100) DO IF r # NIL THEN SUBARRAY(s^, 0, NUMBER(r^)) := r^ END; r := s END END; r[nPairs] := sChop; INC(nPairs) END END END END END; Wr.PutText(stderr, "chopped them into " & FI(nPairs,1) & " pieces\n"); WITH s = NEW(REF SegmentPairs, nPairs) DO IF r # NIL THEN s^ := SUBARRAY(r^,0,nPairs) END; RETURN s END END ChopSegmentPairs; PROCEDURE RandomizeSegmentPairs(rnd: Random.T; VAR sp: SegmentPairs) = BEGIN Wr.PutText(stderr, "randomizing list...\n"); FOR i := 0 TO LAST(sp) - 1 DO WITH j = rnd.integer(i, LAST(sp)) DO IF i # j THEN VAR t := sp[i]; BEGIN sp[i] := sp[j]; sp[j] := t END END END END END RandomizeSegmentPairs; PROCEDURE PickCandidateInSegmentPair( rnd: Random.T; READONLY sBig: SegmentPair; (* Parent segments. *) READONLY ch: Chains; (* Chain table. *) READONLY dch: VTable; (* Curvature decoding table. *) READONLY ech: VTable; (* Quantization noise variances. *) minSamples: NAT; (* Minimum samples in candidate. *) maxSamples: NAT; (* Maximum samples in candidate. *) step: LONG; (* Sampling step. *) ): PZCandidate.T = VAR sCut: SegmentPair; d, n: NAT; BEGIN <* ASSERT sBig[0].ns >= minSamples AND sBig[1].ns >= minSamples *> FOR j := 0 TO 1 DO sCut[j] := sBig[j]; WITH sj = sCut[j], u = minSamples, v = MIN(sj.ns, maxSamples), a = sj.ns - v, b = sj.ns - u, c = a + b DO <* ASSERT c >= 0 *> <* ASSERT u <= v *> d := rnd.integer(0, c); n := rnd.integer(u, v); IF d + n > sj.ns THEN d := c - d; n := u + (v - n) END; <* ASSERT d + n <= sj.ns *> <* ASSERT n >= minSamples *> <* ASSERT n <= maxSamples *> sj.ini := sj.ini + d; sj.ns := n; sj.rev := (j = 1) END END; WITH avgDistSqr = AverageDistSqr( ch[sCut[0].cvx].c^, sCut[0], ch[sCut[1].cvx].c^, sCut[1], dch, ech ), length = 0.5d0 * step * FLOAT(sCut[0].ns + sCut[1].ns - 2, LONG) DO RETURN PZCandidate.T { seg := sCut, mismatch := avgDistSqr, length := length, matchedLength := length, pm := NIL } END END PickCandidateInSegmentPair; PROCEDURE AverageDistSqr( READONLY a: PZSymbolChain.T; READONLY sa: PZSegment.T; READONLY b: PZSymbolChain.T; READONLY sb: PZSegment.T; READONLY dch: VTable; (* Curvature decoding table. *) READONLY ech: VTable; (* Quantization noise variances. *) ): LONG = (* Computes the average distance squared between segments "sa" and "sb" of the chains "a" and "b", by the trapezoid rule. Assumes the most perfect match possible (Bresenham's path in the matching grid). *) VAR sum: LONG := 0.0d0; h: LONG; BEGIN WITH nPairs = MAX(sa.ns, sb.ns), fp = FLOAT(nPairs-1, LONG), ca = PZSymbolChain.ExtractSegment(a, sa)^, fa = FLOAT(sa.ns-1, LONG), cb = PZSymbolChain.ExtractSegment(b, sb)^, fb = FLOAT(sb.ns-1, LONG) DO IF nPairs = 1 THEN RETURN PZSymbol.DistSqr( ca[0], cb[0], complement := FALSE, decode := dch, errorVar := ech ) ELSE FOR k := 0 TO nPairs-1 DO IF k = 0 OR k = nPairs-1 THEN h := 0.5d0 ELSE h := 1.0d0 END; WITH fk = FLOAT(k, LONG), ia = ROUND(fk*fa/fp), ib = ROUND(fk*fb/fp) DO sum := sum + h * PZSymbol.DistSqr( ca[ia], cb[ib], complement := FALSE, decode := dch, errorVar := ech ) END END; RETURN sum/fp END END END AverageDistSqr; PROCEDURE PrintCandidates ( READONLY cand: Candidates; maxCands: NAT; ) = BEGIN IF maxCands = 0 THEN RETURN END; Wr.PutText(stderr, "\n"); FOR i := 0 TO MIN(maxCands, NUMBER(cand))-1 DO WITH c = cand[i] DO Wr.PutText(stderr, "cand[" & Fmt.Int(i) & "]\n"); PZCandidate.Print(stderr, c, pairing := FALSE) END END; Wr.Flush(stderr); END PrintCandidates; PROCEDURE ReadChains( chainDir: TEXT; chainPrefix : TEXT; nCurves, band: NAT; ): PZSymbolChain.ReadAllData = BEGIN <* ASSERT NOT Text.Empty(chainPrefix) *> WITH sel = PZProc.SelectAll(nCurves)^ DO RETURN PZSymbolChain.ReadAll( prefix := chainPrefix, band := band, extension := ".cvc", sel := sel, dir := chainDir, headerOnly := FALSE ) END END ReadChains; PROCEDURE ReadSegments(segName: TEXT): SegData = BEGIN <* ASSERT NOT Text.Empty(segName) *> WITH fileName = segName & ".seg" DO Wr.PutText(stderr, fileName & "\n"); WITH rd = FileRd.Open(fileName), sData = PZSegment.Read(rd) DO RETURN sData END END END ReadSegments; PROCEDURE CandComment(READONLY o: Options; segCmt, chCmt: TEXT): TEXT = BEGIN RETURN "PZGetRandomCandidates\n" & " segments " & o.segments & "\n" & " chainDir " & o.chainDir & "\n" & " chainPrefix " & o.chainPrefix & "\n" & " nCurves = " & Fmt.Int(o.nCurves) & "\n" & " band = " & Fmt.Int(o.band) & "\n" & " step = " & Fmt.LongReal(o.step) & "\n" & " maxCands = " & Fmt.Int(o.maxCands) & "\n" & " minLength = " & Fmt.LongReal(o.minLength) & "\n" & " maxLength = " & Fmt.LongReal(o.maxLength) & "\n" & " blurFactor = " & Fmt.LongReal(o.blurFactor) & "\n" & " \n" & " Segments:\n" & segCmt & " Curves:\n" & chCmt & " \n" END CandComment; PROCEDURE FI(x: NAT; w: NAT): TEXT = BEGIN RETURN Fmt.Pad(Fmt.Int(x), w) END FI; BEGIN Main() END PZGetRandomCands. (* Copyright © 2001 Universidade Estadual de Campinas (UNICAMP). Authors: Helena C. G. Leitão and Jorge Stolfi. This file can be freely distributed, used, and modified, provided that this copyright and authorship notice is preserved, and that any modified versions are clearly marked as such. This software has NO WARRANTY of correctness or applicability for any purpose. Neither the authors nor their employers chall be held responsible for any losses or damages that may result from its use. *)