MODULE PZDistSqrHistogram EXPORTS Main; (* Plots histogram of sample discrepancies in a given set of candidates *) (* Last edited on 2001-11-15 18:54:01 by stolfi *) (* This program reads a set of candidates (not necessarily good ones), and outputs the histogram of the sample discrepancies (point distances and curvature differences, both plain and encoded/decoded). The histograms are written to files ending with "-Pt.plt", "-Cv.plt", and "-Sy.plt", respetively. If a candidate "c" has an explicit matching "c.pm", the discrepancies are computed between the pairs of samples indicated by "pm". Otherwise one uses the matching that covers the entire candidate "c" and minimizes the sum of squared distances time step length. *) IMPORT Wr, Fmt, Thread, OSError, Stdio, ParseParams; IMPORT Process, FileRd, FileWr; IMPORT LR3, LR4x4; IMPORT PZSymbol, PZMatch, PZGeo, PZCandidate, PZHistogram; IMPORT PZLRChain, PZLR3Chain, PZSymbolChain; FROM Math IMPORT sqrt; FROM PZHistogram IMPORT Avg; FROM Stdio IMPORT stderr; FROM PZTypes IMPORT INT, LONG, NAT, BOOLS; TYPE Options = RECORD input: TEXT; (* Input candidate file (without extensions). *) maxCands: NAT; (* Max num of candidates to consider. *) chainDir: TEXT; (* Directory where to find chain files. *) chainPrefix: TEXT; (* Input chain file name prefix. *) band: NAT; (* Nominal band width (for chain file names). *) output: TEXT; (* Output plot file name (without extensions). *) (* If any of these is zero, the corresponding plot is skipped: *) maxPtDist: LONG; (* Maximum plotted point distance. *) maxCvDist: LONG; (* Maximum plotted curvature difference. *) maxSyDist: LONG; (* Maximum plotted encoded curvature difference. *) END; <* FATAL Wr.Failure, Thread.Alerted, OSError.E *> PROCEDURE Main() = BEGIN WITH o = GetOptions() DO WITH candData = PZCandidate.Read(FileRd.Open(o.input & ".can")), cand = SUBARRAY(candData.c^, 0, MIN(o.maxCands, NUMBER(candData.c^))), chainUsed = PZCandidate.ChainsUsed(cand)^ DO IF o.maxPtDist # 0.0d0 THEN WITH h = ComputePtHistogram(cand, chainUsed, o.maxPtDist, o) DO OutputHistogram(o.output, "Pt", h) END END; IF o.maxCvDist # 0.0d0 THEN WITH h = ComputeCvHistogram(cand, chainUsed, o.maxCvDist, o) DO OutputHistogram(o.output, "Cv", h) END END; IF o.maxSyDist # 0.0d0 THEN WITH h = ComputeSyHistogram(cand, chainUsed, o.maxSyDist, o) DO OutputHistogram(o.output, "Sy", h) END END END END END Main; PROCEDURE ComputePtHistogram( VAR (*IO*) cand: PZCandidate.List; READONLY chainUsed: BOOLS; maxDist: LONG; READONLY o: Options; ): PZHistogram.T = (* Computes the histogram of squared distances between points. Also computes a packed matching "c.pm" for each candidate, if it is missing one. *) CONST NBins = 51; (* Bins in histograms *) VAR rmt: REF PZMatch.T; VAR rCost: REF PZMatch.CostMatrix := NIL; BEGIN WITH chAllData = PZLR3Chain.ReadAll( prefix := o.chainPrefix, band := o.band, extension := ".flc", sel := chainUsed, dir := o.chainDir, headerOnly := FALSE, centralize := FALSE ), chData = chAllData.chData^, h = PZHistogram.New(0.0d0, maxDist*maxDist, NBins)^ DO FOR i := 0 TO LAST(cand) DO WITH c = cand[i], seg = c.seg, cvxa = seg[0].cvx, cvxb = seg[1].cvx, a = PZLR3Chain.ExtractSegment(chData[cvxa].c^, seg[0])^, b = PZLR3Chain.ExtractSegment(chData[cvxb].c^, seg[1])^ DO MatchGeometricChains(c, a, b, rmt, rCost); WITH mt = rmt^ DO FOR i := 0 TO LAST(mt) DO WITH ia = mt[i][0], ib = mt[i][1] DO (* Wr.PutText(stderr, "ia = " & FI(ia,4) & " ib = " & FI(ib,4) & "\n"); Wr.PutText(stderr, "a[ia] = " & FLR3(a[ia],6,2) & " b[ib] = " & FLR3(b[ib], 6,2) & "\n"); *) PZHistogram.Add(h, LR3.DistSqr(a[ia], b[ib])) END END; IF c.pm = NIL THEN c.pm := PZMatch.Pack(mt) END END END END; Wr.PutText(stderr, "rms geometric distance = " & FLR(sqrt(Avg(h)), 9,6) & "\n"); RETURN h END END ComputePtHistogram; PROCEDURE MatchGeometricChains( VAR (*IO*) c: PZCandidate.T; VAR (*IO*) a, b: PZLR3Chain.T; VAR (*OUT*) rmt: REF PZMatch.T; VAR (*WORK*) rCost: REF PZMatch.CostMatrix; ) = (* Translates and rotates the chains "a" and "b" so as to minimize the average distance between them. Also returns a matching "rmt^" between their samples. If candidate "c" has an explicit matching "c.pm", that matching is used to adjust the two chains, and is returned in "rmt". If the candidate has no explicit matching, the chains are positioned by matching their endpoints only. (This simple method is probably better than any fancier schema, since candidates without explicit matchings are probably created by hand and checked visually with "PZDrawCands" in which case the simple alignment by endpoints is just what the user intended.) *) VAR avgDist: LONG; BEGIN (* Get the initial matching: *) IF c.pm # NIL THEN rmt := PZMatch.Unpack(c.pm^); AlignMatchedPoints(a, b, rmt^) ELSE MakeHorizontal(a); MakeHorizontal(b); PZLR3Chain.FindBestMatch( (*IN:*) a, b, maxDistSqr := LAST(LONG), removeUnmatchedEnds := FALSE, (*OUT:*) mt := rmt, avgDist := (*OUT*) avgDist, length := c.length, matchedLength := c.matchedLength, (*WORK:*) rCost := rCost ); c.mismatch := c.length * avgDist*avgDist; END; END MatchGeometricChains; PROCEDURE MakeHorizontal(VAR a: PZLR3Chain.T) = (* Translates and rotates a chain "a" so that its end-points are on the X-axis, at equal distances from the origin. *) BEGIN WITH p = a[0], q = a[LAST(a)], ctr = PZGeo.SegMid(p, q), u = PZGeo.SegDir(p, ctr), disp = (LR3.T){-ctr[0],-ctr[1],-ctr[2]}, trn = PZGeo.Translation(disp), (* -ctr *) rot = PZGeo.Rotation(u, LR3.T{1.0d0, 0.0d0, 0.0d0}), map = LR4x4.Mul(trn,rot) DO PZLR3Chain.DoMap(a, map) END; END MakeHorizontal; PROCEDURE AlignMatchedPoints( VAR a, b: PZLR3Chain.T; READONLY mt: PZMatch.T; ) = (* Translates and rotates the two chains so as to minimize the sum of the squared distances from "a[mt[i,0]]" to "b[mt[i,1]]", weighted by the length of the steps of "mt" adjacent to "mt[i]". *) PROCEDURE PairWeight(i: NAT): LONG = (* The weight of the pair "mt[i]", defined as half of the weight of the adjacent steps of "mt", where a full step has weight 1 and a half-step has weight 1/2. *) VAR s: LONG := 0.0d0; BEGIN IF i > FIRST(mt) THEN IF mt[i-1,0] = mt[i,0] OR mt[i-1,1] = mt[i,1] THEN s := s + 0.25d0 ELSE s := s + 0.5d0 END END; IF i < LAST(mt) THEN IF mt[i+1,0] = mt[i,0] OR mt[i+1,1] = mt[i,1] THEN s := s + 0.25d0 ELSE s := s + 0.5d0 END END; RETURN s END PairWeight; BEGIN (* Bring the barycenters of "a" and "b" "(0,0)": *) VAR aSum, bSum: LR3.T := LR3.T{0.0d0, ..}; wSum: LONG := 0.0d0; BEGIN (* Compute the weighted barycenters of "a" and "b": *) FOR i := 0 TO LAST(mt) DO WITH ia = mt[i][0], ib = mt[i][1], w = PairWeight(i) DO aSum := LR3.Mix(1.0d0, aSum, w, a[ia]); bSum := LR3.Mix(1.0d0, bSum, w, b[ib]); wSum := wSum + w END END; (* Translate the chains: *) WITH aBar = LR3.Scale(1.0d0/wSum, aSum) DO FOR i := 0 TO LAST(a) DO a[i] := LR3.Sub(a[i], aBar) END END; WITH bBar = LR3.Scale(1.0d0/wSum, bSum) DO FOR i := 0 TO LAST(b) DO b[i] := LR3.Sub(b[i], bBar) END END; END; (* Rotate "b" to minimize the discrepancy with "a": *) VAR s, c: LONG := 0.0d0; BEGIN (* Now the sum of the weighted distances squared is a sinusoid of the form "K - 2*(c*cos(theta) + s*sin(theta))" where "theta" is the rotation of "b". *) (* Compute the coeficients "c" and "s": *) FOR i := 0 TO LAST(mt) DO WITH ia = mt[i][0], xa = a[ia][0], ya = a[ia][1], ib = mt[i][1], xb = b[ib][0], yb = b[ib][1], w = PairWeight(i) DO c := c + w * (xa*xb + ya*yb); s := s - w * (xa*yb - ya*xb); END END; IF c # 0.0d0 OR s # 0.0d0 THEN (* Rotate the chain "b" by the angle of (c,s) *) WITH r = sqrt(c*c + s*s), c = c/r, s = s/r DO FOR i := 0 TO LAST(b) DO WITH x = b[i][0], y = b[i][1], xr = x*c - y*s, yr = x*s + y*c DO x := xr; y := yr END END END END END END AlignMatchedPoints; PROCEDURE ComputeCvHistogram( READONLY cand: PZCandidate.List; READONLY chainUsed: BOOLS; maxDist: LONG; READONLY o: Options; ): PZHistogram.T = CONST NBins = 51; (* Bins in histograms *) BEGIN WITH chAllData = PZLRChain.ReadAll( prefix := o.chainPrefix, band := o.band, extension := ".fcv", sel := chainUsed, dir := o.chainDir, headerOnly := FALSE ), chData = chAllData.chData^, h = PZHistogram.New(0.0d0, maxDist*maxDist, NBins)^ DO FOR i := 0 TO LAST(cand) DO WITH c = cand[i], seg = c.seg, mt = GetCandMatching(c)^, cvxa = seg[0].cvx, cvxb = seg[1].cvx, a = PZLRChain.ExtractSegment(chData[cvxa].c^, seg[0], curvature := TRUE)^, b = PZLRChain.ExtractSegment(chData[cvxb].c^, seg[1], curvature := TRUE)^ DO FOR i := 0 TO LAST(mt) DO WITH ia = mt[i][0], ib = mt[i][1], d = a[ia] - b[ib] DO PZHistogram.Add(h, d*d) END END END END; Wr.PutText(stderr, "rms curvature difference = " & FLR(sqrt(Avg(h)), 9,6) & "\n"); RETURN h END END ComputeCvHistogram; PROCEDURE ComputeSyHistogram( READONLY cand: PZCandidate.List; READONLY chainUsed: BOOLS; maxDist: LONG; READONLY o: Options; ): PZHistogram.T = CONST NBins = 53; (* Bins in histograms *) BEGIN WITH chAllData = PZSymbolChain.ReadAll( prefix := o.chainPrefix, band := o.band, extension := ".cvc", sel := chainUsed, dir := o.chainDir, headerOnly := FALSE ), chData = chAllData.chData^, h = PZHistogram.New(0.0d0, maxDist*maxDist, NBins)^, curvSigma = chAllData.sigmaMax, decode = PZSymbol.MakeDecodeTable(curvSigma)^, errorVar = PZSymbol.MakeErrorVarTable(curvSigma)^ DO <* ASSERT chAllData.sigmaMax = chAllData.sigmaMin *> FOR i := 0 TO LAST(cand) DO WITH c = cand[i], seg = c.seg, mt = GetCandMatching(c)^, cvxa = seg[0].cvx, cvxb = seg[1].cvx, a = PZSymbolChain.ExtractSegment(chData[cvxa].c^, seg[0])^, b = PZSymbolChain.ExtractSegment(chData[cvxb].c^, seg[1])^ DO FOR i := 0 TO LAST(mt) DO WITH ia = mt[i][0], ib = mt[i][1], d2 = PZSymbol.DistSqr(a[ia],b[ib], complement := FALSE, decode := decode, errorVar := errorVar ) DO PZHistogram.Add(h, d2) END END END END; Wr.PutText(stderr, "rms curvature difference = " & FLR(sqrt(Avg(h)), 9,6) & "\n"); RETURN h END END ComputeSyHistogram; PROCEDURE GetCandMatching(READONLY c: PZCandidate.T): REF PZMatch.T = BEGIN IF c.pm # NIL THEN RETURN PZMatch.Unpack(c.pm^) ELSE RETURN PZMatch.MostPerfect(c.seg[0].ns, c.seg[1].ns) END; END GetCandMatching; PROCEDURE GetOptions(): Options = VAR o: Options; BEGIN WITH pp = NEW(ParseParams.T).init(stderr) DO TRY pp.getKeyword("-input"); o.input := pp.getNext(); IF pp.keywordPresent("-maxCands") THEN o.maxCands := pp.getNextInt(1) ELSE o.maxCands := LAST(NAT) END; IF pp.keywordPresent("-chainDir") THEN o.chainDir := pp.getNext() ELSE o.chainDir := "." END; pp.getKeyword("-chainPrefix"); o.chainPrefix := pp.getNext(); pp.getKeyword("-band"); o.band := pp.getNextInt(); IF pp.keywordPresent("-maxPtDist") THEN o.maxPtDist := pp.getNextLongReal(0.000001d0) ELSE o.maxPtDist := 0.0d0 END; IF pp.keywordPresent("-maxCvDist") THEN o.maxCvDist := pp.getNextLongReal(0.000001d0) ELSE o.maxCvDist := 0.0d0 END; IF pp.keywordPresent("-maxSyDist") THEN o.maxSyDist := pp.getNextLongReal(0.000001d0) ELSE o.maxSyDist := 0.0d0 END; pp.getKeyword("-output"); o.output := pp.getNext(); pp.finish(); EXCEPT | ParseParams.Error => Wr.PutText(stderr, "Usage: PZDistSqrHistogram \\\n"); Wr.PutText(stderr, " -input FNAME [ -maxCands NUMBER ] \\\n"); Wr.PutText(stderr, " [ -chainDir NAME ] -chainPrefix NAME \\\n"); Wr.PutText(stderr, " -band NAT \\\n"); Wr.PutText(stderr, " -output FNAME \\\n"); Wr.PutText(stderr, " [ -maxPtDistSqr NUM ] \\\n"); Wr.PutText(stderr, " [ -maxCvDistSqr NUM ] \\\n"); Wr.PutText(stderr, " [ -maxSyDistSqr NUM ] \n"); Process.Exit(1); END; END; RETURN o END GetOptions; PROCEDURE OutputHistogram( output: TEXT; typeTag: TEXT; READONLY h: PZHistogram.T; ) = BEGIN WITH fileName = output & "-" & typeTag & ".plt" DO Wr.PutText(stderr, "writing " & fileName & "\n"); WITH wr = FileWr.Open(fileName) DO PZHistogram.Output(wr, h); Wr.Close(wr) END END END OutputHistogram; <*UNUSED*> PROCEDURE FI(x: INT; w: NAT): TEXT = BEGIN RETURN Fmt.Pad(Fmt.Int(x), w) END FI; PROCEDURE FLR(x: LONG; w,p: NAT): TEXT = BEGIN RETURN Fmt.Pad(Fmt.LongReal(x, style := Fmt.Style.Fix, prec := p), w) END FLR; <*UNUSED*> PROCEDURE FLR3(x: LR3.T; w,p: NAT): TEXT = BEGIN RETURN "( " & FLR(x[0],w,p) & " " & FLR(x[1],w,p) & " " & FLR(x[2],w,p) & " )" END FLR3; BEGIN Main() END PZDistSqrHistogram. (* Copyright © 2001 Universidade Estadual de Campinas (UNICAMP). Authors: Helena C. G. Leitão and Jorge Stolfi. This file can be freely distributed, used, and modified, provided that this copyright and authorship notice is preserved, and that any modified versions are clearly marked as such. This software has NO WARRANTY of correctness or applicability for any purpose. Neither the authors nor their employers chall be held responsible for any losses or damages that may result from its use. *)