MODULE BITExtractSubCands EXPORTS Main; (* Last edited on 1999-12-24 05:54:33 by hcgl *) (* Extracts matching segments of given chordlength from a set of candidates. *) (* Reads a list of candidates. From each candidate, if large enough, extracts a sub-candidate with specified number of steps. Each sub-candidate is shifted so as to minimize the mean square distance between samples, after a crude realignment. *) IMPORT ParseParams, FileRd, FileWr, TextWr, Wr, Fmt; IMPORT Process, OSError, Thread, Stdio; IMPORT PZLR3Chain, PZCandidate, PZSegment, PZMatch; IMPORT LR3; FROM Math IMPORT sqrt; FROM PZTypes IMPORT LONG, LONGS, INT, NAT; FROM Stdio IMPORT stderr; <* FATAL Wr.Failure, Thread.Alerted, OSError.E *> TYPE Options = RECORD (* Data identification parameters: *) input: TEXT; (* Input candidate file name. *) output: TEXT; (* Output candidate file name. *) chainDir: TEXT; (* Directory where to find the chains *) chainPrefix: TEXT; (* Chain file name prefix. *) band: NAT; (* Nominal band width (lambda) for file names. *) extension: TEXT; (* Extension of chain files, usually ".flc" *) (* Matching parameters: *) maxShift: LONG; (* Max shift to try in alignment (pixels). *) (* Segment extraction parameters: *) maxCands: NAT; (* Number of candidates to generate *) nSteps: NAT; (* Desired number of output sampling steps. *) END; TYPE ChainData = PZLR3Chain.ReadData; Pair = PZMatch.Pair; PROCEDURE Main() = BEGIN WITH o = GetOptions(), oldCandData = PZCandidate.Read(FileRd.Open(o.input & ".can")), oldCand = oldCandData.c^, oldCmt = oldCandData.cmt, needed = PZCandidate.GetCurves(oldCand)^, chData = PZLR3Chain.ReadAll( o.chainPrefix, o.band, o.extension, sel := needed, dir := o.chainDir, headerOnly := FALSE ), ch = chData.chain^, lambda = FLOAT(o.band, LONG), newCmt = oldCmt & "\n\n" & NewCandComments(o), newCand = ProcessCandidates(oldCand, ch, o)^ DO PZCandidate.Write(FileWr.Open(o.output & ".can"), newCmt, newCand, lambda) END END Main; PROCEDURE ProcessCandidates( READONLY oldCand: PZCandidate.List; READONLY ch: ARRAY OF ChainData; READONLY o: Options; ): REF PZCandidate.List = VAR nNewCands: NAT := 0; iCand: NAT := 0; BEGIN WITH nOldCands = NUMBER(oldCand), nCurves = NUMBER(ch), length = NEW(REF LONGS, nCurves)^, newCand = NEW(REF PZCandidate.List, MIN(nOldCands, o.maxCands))^ DO FOR k := 0 TO nCurves-1 DO IF ch[k].c # NIL THEN length[k] := ChainLength(ch[k].c^) END END; WHILE iCand < nOldCands AND nNewCands < o.maxCands DO Wr.PutText(stderr, "# candidate " & FZI(iCand, 6) & "\n"); WITH newCd = ProcessOneCandidate(oldCand[iCand], ch, length, o) DO IF NOT PZCandidate.IsEmpty(newCd) THEN newCand[nNewCands] := newCd; INC(nNewCands); END END; INC(iCand) END; WITH r = NEW(REF PZCandidate.List, nNewCands) DO r^ := SUBARRAY(newCand, 0, nNewCands); RETURN r END END END ProcessCandidates; PROCEDURE ChainLength(READONLY c: PZLR3Chain.T): LONG = VAR s: LONG := 0.0d0; VAR j := LAST(c); BEGIN FOR i := 0 TO LAST(c) DO s := s + LR3.Dist(c[i], c[j]); j := i END; RETURN s END ChainLength; PROCEDURE ProcessOneCandidate( READONLY cand: PZCandidate.T; READONLY ch: ARRAY OF ChainData; READONLY chLength: LONGS; READONLY o: Options ): PZCandidate.T = (* Let "a,b" be the two curves that constitute candidate "cand". This procedure rotates and translates curve "a" (through a 4×4 matrix "m") so as to optimize the match with curve "b". Then it extracts two matching segments of prescribed length from the two segments, and returns them as a candidate. If there aren't enough samples in the candidate, returns an trivial candidate with only one sample. *) BEGIN WITH aSeg = cand.seg[0], cha = ch[aSeg.cvx], ma = NUMBER(cha.c^), rna = aSeg.ns, aStep = chLength[aSeg.cvx] / FLOAT(ma, LONG), aExtra = CEILING(o.maxShift/2.0d0 / aStep), na = MIN(rna + 2*aExtra, ma), aStretch = (na - rna) DIV 2, bSeg = cand.seg[1], chb = ch[bSeg.cvx], mb = NUMBER(chb.c^), rnb = bSeg.ns, bStep = chLength[bSeg.cvx] / FLOAT(mb, LONG), bExtra = CEILING(o.maxShift/2.0d0 / bStep), nb = MIN(rnb + 2*bExtra, mb), bStretch = (nb - rnb) DIV 2, extra = ARRAY [0..1] OF INT { aStretch, bStretch}, eCand = PZCandidate.Expand(cand, extra, extra), ca = PZLR3Chain.ExtractSegment(cha.c^, eCand.seg[0])^, cb = PZLR3Chain.ExtractSegment(chb.c^, eCand.seg[1])^, mt = GetMatching(eCand)^, step = (aStep + bStep) / 2.0d0, ctr = SelectSubSegments( a := ca, b := cb, mt := mt, nSteps := o.nSteps, maxAdjust := CEILING(o.maxShift / step) ) DO Wr.PutText(stderr, "\n"); RETURN MakeSubCand(cand, ctr, o.nSteps) END END ProcessOneCandidate; PROCEDURE MakeSubCand( READONLY cand: PZCandidate.T; ctr: Pair; nSteps: NAT ): PZCandidate.T = (* Returns a sub-candidate of "cand" with two segments of length "nSteps" centered on samples "ctr[0]" and "ctr[1]". (relative to the ends of "cand"). If "ctr = (0,0)", returns an "Empty" candidate. *) VAR aSeg: PZSegment.T := cand.seg[0]; bSeg: PZSegment.T := cand.seg[1]; BEGIN <* ASSERT nSteps MOD 2 = 0 *> IF ctr[0] = 0 OR ctr[1] = 0 THEN RETURN PZCandidate.Empty END; WITH hSteps = nSteps DIV 2, aCtr = PZSegment.AbsIndex(ctr[0], aSeg), aIni = (aCtr - hSteps) MOD aSeg.tot, bCtr = PZSegment.AbsIndex(ctr[1], bSeg), bIni = (bCtr - hSteps) MOD bSeg.tot DO aSeg.ini := aIni; aSeg.ns := nSteps + 1; bSeg.ini := bIni; bSeg.ns := nSteps + 1; RETURN PZCandidate.T{ seg := PZSegment.Pair{aSeg, bSeg}, mismatch := 0.0d0, length := 0.0d0, matchedLength := 0.0d0, pm := NIL } END END MakeSubCand; PROCEDURE GetMatching(READONLY cand: PZCandidate.T): REF PZMatch.T = BEGIN IF cand.pm # NIL THEN (* Return the original matching: *) RETURN PZMatch.Unpack(cand.pm^); ELSE (* Create a "most perfect" matching for the two old chains: *) WITH n0 = cand.seg[0].ns, n1 = cand.seg[1].ns DO RETURN PZMatch.MostPerfect(n0, n1); END; END; END GetMatching; PROCEDURE SelectSubSegments( READONLY a, b: PZLR3Chain.T; (* The sure segments, reversed as needed. *) READONLY mt: PZMatch.T; (* The matching between "a" and "b" samples. *) nSteps: NAT; (* Requires number of sampling steps in output. *) maxAdjust: NAT; (* Max number of steps to shift "b" rel to "a". *) ): Pair = (* Extracts from the given pair of segments a pair of sub-segments with the specified number of sampling steps, and returns the center pair. The sub-segments are aligned according to the matching "mt" plus or minus "maxAdjust" steps. *) BEGIN <* ASSERT nSteps MOD 2 = 0 *> WITH hStepsPlus = nSteps DIV 2 + (maxAdjust + 1) DIV 2, i = 0, j = LAST(mt), k = Midpoint(mt, i,j) DO IF mt[k][0] - mt[i][0] < hStepsPlus OR mt[k][1] - mt[i][1] < hStepsPlus OR mt[j][0] - mt[k][0] < hStepsPlus OR mt[j][1] - mt[k][1] < hStepsPlus THEN Wr.PutText(stderr, "# candidate too short\n"); RETURN Pair{0,0} ELSE RETURN FindAlignment(a, b, mt[i], mt[k], mt[j], nSteps, maxAdjust) END END END SelectSubSegments; PROCEDURE FindAlignment( READONLY a, b: PZLR3Chain.T; ini: Pair; ctr: Pair; fin: Pair; nSteps: NAT; maxAdjust: NAT; ): Pair = VAR ctrBest: Pair; d2Best: LONG := LAST(LONG); shiftBest: INT; BEGIN <* ASSERT nSteps MOD 2 = 0 *> WITH hSteps = nSteps DIV 2 DO FOR shift := -maxAdjust TO maxAdjust DO WITH da = shift DIV 2, db = shift - da, aCtr = ctr[0] + da, aIni = aCtr - hSteps, aFin = aCtr + hSteps, bCtr = ctr[1] + db, bIni = bCtr - hSteps, bFin = bCtr + hSteps DO IF aIni >= ini[0] AND aFin <= fin[0] AND bIni >= ini[1] AND bFin <= fin[1] THEN WITH ca = SUBARRAY(a, aIni, nSteps+1), cb = SUBARRAY(b, bIni, nSteps+1), m = PZLR3Chain.AlignmentMatrix(ca, cb), cam = PZLR3Chain.Map(ca, m)^, d2 = MeanDistSqr(cam, cb) DO IF shift = 0 THEN Wr.PutText(stderr, "# shift = " & FI(shift,3)); Wr.PutText(stderr, " dist = " & FLR(sqrt(d2),8,3) & "\n"); END; IF d2 < d2Best THEN d2Best := d2; shiftBest := shift; ctrBest := Pair{aCtr, bCtr} END END END END END; Wr.PutText(stderr, "# shift = " & FI(shiftBest,3)); Wr.PutText(stderr, " dist = " & FLR(sqrt(d2Best),8,3) & " (best)\n"); RETURN ctrBest END END FindAlignment; PROCEDURE MeanDistSqr( READONLY a, b: PZLR3Chain.T; (* The two extracted segments, mapped *) ): LONG = VAR d2: LONG; BEGIN WITH NA = NUMBER(a), NB = NUMBER(b), N = MAX(NA, NB) DO d2 := LR3.DistSqr(a[0], b[0]); FOR i := 1 TO N-1 DO WITH ia = ROUND(FLOAT((NA-1)*i,LONG)/FLOAT(N-1,LONG)), ib = ROUND(FLOAT((NB-1)*i,LONG)/FLOAT(N-1,LONG)) DO d2 := d2 + LR3.DistSqr(a[ia], b[ib]) END END; RETURN 2.0d0 * d2/FLOAT(NA + NB, LONG) END END MeanDistSqr; PROCEDURE Midpoint(READONLY mt: PZMatch.T; i, j: NAT): NAT = (* Finds an index "k" such that "s[k]" is approximately "(s[i]+s[j])/2" where "s[r] = (mt[r][0] + mt[r][1])". *) VAR kMin, kMax: NAT; BEGIN WITH sm = ((mt[i][0] + mt[i][1]) + (mt[j][0] + mt[j][1])) DIV 2 DO kMin := i; kMax := j; WHILE kMin < kMax DO WITH sMin = mt[kMin][0] + mt[kMin][1], sMax = mt[kMax][0] + mt[kMax][1], r = FLOAT(sm - sMin, LONG)/FLOAT(sMax - sMin, LONG), k = MIN(kMin + FLOOR(r*FLOAT(kMax - kMin, LONG)), kMax-1), sk0 = mt[k][0] + mt[k][1], sk1 = mt[k+1][0] + mt[k+1][1] DO <* ASSERT sMin <= sm *> <* ASSERT sMax >= sm *> IF sm < sk0 THEN kMax := k ELSIF sm > sk1 THEN kMin := k+1 ELSIF sm - sk0 < sk1 - sm THEN RETURN k ELSE RETURN k+1 END END END; <* ASSERT kMin = kMax *> RETURN kMin END END Midpoint; PROCEDURE GetOptions(): Options = VAR o: Options; BEGIN WITH pp = NEW(ParseParams.T).init(stderr) DO TRY (* Data identification parameters: *) pp.getKeyword("-input"); o.input := pp.getNext(); pp.getKeyword("-chainPrefix"); o.chainPrefix := pp.getNext(); IF pp.keywordPresent("-chainDir") THEN o.chainDir := pp.getNext() ELSE o.chainDir := "." END; pp.getKeyword("-band"); o.band := pp.getNextInt(); IF pp.keywordPresent("-extension") THEN o.extension := pp.getNext() ELSE o.extension := ".flc" END; pp.getKeyword("-output"); o.output := pp.getNext(); (* Matching parameters: *) pp.getKeyword("-maxShift"); o.maxShift := pp.getNextLongReal(); (* Segment extraction parameters: *) pp.getKeyword("-nSteps"); o.nSteps := pp.getNextInt(2,4096); IF pp.keywordPresent("-maxCands") THEN o.maxCands := pp.getNextInt(1) ELSE o.maxCands := LAST(NAT) END; pp.finish(); EXCEPT | ParseParams.Error => Wr.PutText(stderr, "Usage: BITExtractSubCands \\\n"); Wr.PutText(stderr, " -input NAME \\\n"); Wr.PutText(stderr, " -chainPrefix NAME [ -chainDir DIR ] \\\n"); Wr.PutText(stderr, " -band BAND [ -extension EXT ] \\\n"); Wr.PutText(stderr, " -output NAME \\\n"); Wr.PutText(stderr, " -maxShift NUM \\\n"); Wr.PutText(stderr, " -nSteps NUMBER [ -maxCands NUMBER ]\n"); Process.Exit(1); END; END; RETURN o END GetOptions; PROCEDURE NewCandComments(READONLY o: Options):TEXT= BEGIN WITH wr = NEW(TextWr.T).init() DO Wr.PutText( wr, "BITExtracSubCands \n"); Wr.PutText( wr, " input = \"" & o.input & "\"\n"); Wr.PutText( wr, " output = \"" & o.output & "\"\n"); Wr.PutText( wr, " chainDir = \"" & o.chainDir & "\"\n"); Wr.PutText( wr, " chainPrefix = \"" & o.chainPrefix & "\"\n"); Wr.PutText( wr, " band = " & Fmt.Int(o.band) & "\n"); Wr.PutText( wr, " extension = \"" & o.extension & "\"\n"); Wr.PutText( wr, " maxShift = " & FLR(o.maxShift,8,3) & " pixels\n"); Wr.PutText( wr, " maxCands = " & Fmt.Int(o.maxCands) & "\n"); Wr.PutText( wr, " nSteps = " & Fmt.Int(o.nSteps) & "\n"); RETURN(TextWr.ToText(wr)) END (* DO *); END NewCandComments; PROCEDURE FI(x: INT; w: NAT): TEXT = BEGIN RETURN Fmt.Pad(Fmt.Int(x), w) END FI; PROCEDURE FZI(x: INT; w: NAT): TEXT = BEGIN RETURN Fmt.Pad(Fmt.Int(x), w, '0') END FZI; PROCEDURE FLR(x: LONG; w, d: NAT): TEXT = BEGIN RETURN Fmt.Pad(Fmt.LongReal(x, prec := d, style := Fmt.Style.Fix), w) END FLR; BEGIN Main() END BITExtractSubCands.