MODULE PZRefineCands EXPORTS Main;
(* Last edited on 1999-11-15 01:29:57 by hcgl *)

(* Refines a list of candidate matchings, using DP on coded curvature chains.  *)

IMPORT ParseParams, FileRd, FileWr, TextWr, Wr, Fmt, CPUTime;
IMPORT Process, OSError, Thread, Stdio;
IMPORT PZSymbol, PZSymbolChain, PZCandidate; 
IMPORT PZSegment, PZMismatch, PZMatch;  
 
FROM Math IMPORT sqrt;
FROM Stdio IMPORT stderr;
FROM PZTypes IMPORT LONG, LONGS, NAT;
 
<* FATAL Wr.Failure, Thread.Alerted, OSError.E *>

TYPE
  MismatchOptions = PZMismatch.Options;
  Options = RECORD
      (* The input data: *)
      input: TEXT;            (* Input candidate file name. *)
      chainDir: TEXT;         (* Directry where to find chain files. *)
      chainPrefix: TEXT;      (* Invariant chain file name prefix. *)
      band: NAT;              (* Nominal band width (lambda) for file names. *)
      step: LONG;             (* Sampling step. *)
      curvSigma: LONG;        (* "sigma" parameter for curvature encoding. *)
      maxInputCands: NAT;     (* For debugging: truncate input at this count. *)
      
      (* Parameters of the refinement: *)
      mp: MismatchOptions;    (* Mismatch formula parameters. *)
      minLength: LONG;        (* Minimum length of matching segments. *)
      blurFactor: LONG;       (* Corner broadening factor. *)
      extraLength: LONG;      (* Extra length to add at each candidate end. *)
      maxRefineShift: LONG;   (* Max shift during refinement of the matching. *)
      
      (* Parameters that control the pruning of the candidate list: *)
      maxCurveCands: NAT;     (* Max candidates per curve. *)
      maxPairCands: NAT;      (* Max candidates per curve pair. *)

      (* Parameters that control the output: *)
      output: TEXT;           (* Output candidate file (without ".???"). *)
    END;

TYPE
  VTable = PZSymbol.Table;

PROCEDURE Main() =
  BEGIN
    WITH 
      o = GetOptions(),
      candData = PZCandidate.Read(FileRd.Open(o.input & ".can")),
      oldCand = SUBARRAY(candData.c^, 0, MIN(o.maxInputCands, NUMBER(candData.c^))),
      lambda = candData.lambda,
      curves = PZCandidate.GetCurves(oldCand)^,
      chData = PZSymbolChain.ReadAll(
        o.chainPrefix, o.band, ".cvc",
        sel := curves, headerOnly := FALSE,
        dir := o.chainDir
      ),
      minLength = o.minLength - o.blurFactor * 2.0d0 * lambda,
      newCand = RefineCandidates(oldCand, chData.chain^,
        maxShift := o.maxRefineShift,
        mp := o.mp,
        step := o.step,
        curvSigma := o.curvSigma,
        minLength := minLength, 
        extraLength := o.extraLength, 
        maxCurveCands := o.maxCurveCands, 
        maxPairCands := o.maxPairCands
      )^,
      wr = FileWr.Open(o.output & ".can")
    DO
      PZCandidate.Write(wr, candData.cmt & RefineComments(o), newCand, lambda);
    END;
  END Main;          

      
PROCEDURE GetOptions(): Options =
  VAR o: Options;
  BEGIN
    WITH
      pp = NEW(ParseParams.T).init(stderr)
    DO
      TRY
        pp.getKeyword("-input");
        o.input := pp.getNext();
        
        pp.getKeyword("-chainPrefix");
        o.chainPrefix := pp.getNext();
        
        IF pp.keywordPresent("-chainDir") THEN 
          o.chainDir := pp.getNext()
        ELSE
          o.chainDir := "."
        END;
        
        pp.getKeyword("-band");
        o.band := pp.getNextInt();
        
        pp.getKeyword("-step");
        o.step := pp.getNextLongReal(0.0d0, 1024.0d0);
        
        pp.getKeyword("-curvSigma");
        o.curvSigma := pp.getNextLongReal(0.0d0, 1024.0d0);
        
        pp.getKeyword("-output");
        o.output := pp.getNext();
        
        pp.getKeyword("-minLength");
        o.minLength := pp.getNextLongReal();
        
        pp.getKeyword("-blurFactor");
        o.blurFactor := pp.getNextLongReal(0.0d0, 5.0d0);
        
        pp.getKeyword("-extraLength");
        o.extraLength := pp.getNextLongReal(0.0d0);
        
        pp.getKeyword("-maxRefineShift");
        o.maxRefineShift := pp.getNextLongReal(0.0d0);
        
        o.mp := PZMismatch.ParseOptions(pp);
        
        IF pp.keywordPresent("-maxInputCands") THEN
          o.maxInputCands := pp.getNextInt(1)
        ELSE
          o.maxInputCands := LAST(NAT)
        END;

        IF pp.keywordPresent("-maxCurveCands") THEN
          o.maxCurveCands := pp.getNextInt(1)
        ELSE
          o.maxCurveCands := LAST(NAT)
        END;
                         
        IF pp.keywordPresent("-maxPairCands") THEN
          o.maxPairCands := pp.getNextInt(1, o.maxCurveCands)
        ELSE
          o.maxPairCands := LAST(NAT)
        END;
        
        pp.finish();                                       
      EXCEPT                                                            
      | ParseParams.Error =>                                              
          Wr.PutText(stderr, "Usage: PZRefineCands \\\n");
          Wr.PutText(stderr, "  -input NAME \\\n");
          Wr.PutText(stderr, "  -chainPrefix NAME [ -chainDir DIR ] \\\n");
          Wr.PutText(stderr, "  -band NUMBER -step PIXELS \\\n");
          Wr.PutText(stderr, "  -curvSigma NUMBER \\\n");
          Wr.PutText(stderr, "  -output NAME \\\n");
          Wr.PutText(stderr, "  -minLength PIXELS -blurFactor NUMBER \\\n");
          Wr.PutText(stderr, "  -extraLength PIXELS \\\n");
          Wr.PutText(stderr, "  -maxRefineShift PIXELS \\\n");
          Wr.PutText(stderr, "  " & PZMismatch.OptionsHelp & " \\\n");
          Wr.PutText(stderr, "  [ -maxInputCands NUMBER ] \\\n");
          Wr.PutText(stderr, "  [ -maxCurveCands NUMBER ] \\\n");
          Wr.PutText(stderr, "  [ -maxPairCands NUMBER ]\n");
        Process.Exit(1);
      END;
    END;
    RETURN o
  END GetOptions;


PROCEDURE RefineCandidates(
    READONLY oldCands: PZCandidate.List;
    READONLY ch: ARRAY OF PZSymbolChain.ReadData; 
    maxShift: LONG;        (* Max shift to try alignment using PD ??. *)        
    mp: MismatchOptions;   (* Mismatch formula parameters. *)
    step: LONG;            (* Sampling step. *)
    curvSigma: LONG;       (* Sigma for curvature encoding. *)
    minLength: LONG;       (* Min required length of matched segments. *)
    extraLength: LONG;     (* Extra length to consider at both ends. *)
    maxCurveCands: NAT;    (* Max candidates per curve. *)
    maxPairCands: NAT;     (* Max candidates per curve pair. *)
  ) : REF PZCandidate.List =
  VAR 
    nNew: NAT := 0;        (* Refined candidate count *)
    newCands : REF PZCandidate.List;
    rCost: REF PZMatch.CostMatrix := NIL;
    minAvgDist: REF LONGS := NIL;
    VAR rca, rcb: REF PZSymbolChain.T := NIL;
    VAR rmt: REF PZMatch.T := NIL;
  BEGIN 
    WITH
      nCands = NUMBER(oldCands),
      start = CPUTime.Now(),
      dch = PZSymbol.MakeDecodeTable(curvSigma)^,
      ech = PZSymbol.MakeErrorVarTable(curvSigma)^
    DO
      (* Wr.PutText(stderr, "minLength = " &  Fmt.LongReal(minLength) & "\n"); *) 
      Wr.PutText(stderr, "candidates before refine loop = " & Fmt.Int(nCands) & "\n"); 
      newCands := NEW(REF PZCandidate.List, nCands);
      FOR i := 0 TO nCands-1 DO 
        WITH 
          cOld = oldCands[i],
          cNew = RefineOneCandidate(cOld, ch,
            maxShift := maxShift,
            mp := mp,
            dch := dch, ech := ech,
            step := step,
            minLength := minLength, 
            extraLength := extraLength,
            minAvgDist := minAvgDist,
            (*WORK*)
            rCost := rCost, 
            rca := rca, rcb := rcb,
            rmt := rmt
          )
        DO
          IF cNew.mismatch <= 0.0d0
          AND cNew.length > minLength THEN 
            newCands[nNew] := cNew;
            INC(nNew);
          END;
        END;
        IF (i MOD 80 = 0) THEN Wr.PutText(stderr, "\n") END;
        Wr.PutText(stderr, ".");
      END;
      Wr.PutText(stderr, "\n");
      WITH t = CPUTime.Now() - start DO
        Wr.PutText(stderr, "time for RefineCandidates = " & FLR(t,0,1) & " sec\n")
      END;
      Wr.PutText(stderr, "candidates before pruning      = " & Fmt.Int(nNew) & "\n"); 
      PZCandidate.Sort(SUBARRAY(newCands^, 0, nNew), PZCandidate.PairMismatchBetter);
      PZCandidate.RemoveDuplicates(newCands^, nNew);
      Wr.PutText(stderr, "distinct candidates            = " & Fmt.Int(nNew) & "\n"); 
      PZCandidate.PruneCandsPerPair(newCands^, nNew, maxPairCands);
      Wr.PutText(stderr, "candidates after pair pruning  = " & Fmt.Int(nNew) & "\n"); 
      PZCandidate.Sort(SUBARRAY(newCands^, 0, nNew), PZCandidate.MismatchBetter);
      PZCandidate.PruneCandsPerCurve(newCands^, nNew, maxCurveCands);
      Wr.PutText(stderr, "candidates after curve pruning = " & Fmt.Int(nNew) & "\n"); 
      WITH ct = NEW(REF PZCandidate.List, nNew) DO
        ct^ := SUBARRAY(newCands^,0,nNew);
        RETURN ct
      END;
    END
  END RefineCandidates;
  
PROCEDURE RefineOneCandidate(
    READONLY cOld: PZCandidate.T;
    READONLY ch: ARRAY OF PZSymbolChain.ReadData; 
    maxShift: LONG;         (* Max shift to try alignment using PD ?? *)        
    mp: MismatchOptions;    (* Mismatch formula parameters. *)
    READONLY dch: VTable;   
    READONLY ech: VTable;
    step: LONG;             (* Sampling step *)
    minLength: LONG;        (* Min required length of matched segments *)
    extraLength: LONG;      (* Extra length to consider at both ends *)
    VAR minAvgDist: REF LONGS;
    (*WORK*)
    VAR rCost: REF PZMatch.CostMatrix;
    VAR rca, rcb: REF PZSymbolChain.T;
    VAR rmt: REF PZMatch.T;
  ): PZCandidate.T =
  VAR
    newMatch: REF PZMatch.T;
    mismatch: LONG;
    matchedLength: LONG;
    length: LONG;
    chStep: ARRAY [0..1] OF LONG;
    maxAdjust: NAT;
    sExp: PZSegment.Pair;
    
  BEGIN
    (* Compute the expansion needed, in samples: *)
    WITH extra = CEILING(extraLength / step) DO
      FOR j := 0 TO 1 DO
        chStep[j] := step;
        sExp[j] := PZSegment.Expand(cOld.seg[j], extra, extra)
      END
    END;
    
    WITH 
      cvxa = cOld.seg[0].cvx,
      NA = sExp[0].ns,
      ca = SUBARRAY(ReallocateSymbolChain(rca, NA)^, 0, NA),

      cvxb = cOld.seg[1].cvx,
      NB = sExp[1].ns,
      cb = SUBARRAY(ReallocateSymbolChain(rcb, NB)^, 0, NB),

      step = 0.5d0 * (chStep[0] + chStep[1]),
      minSteps = MAX(0, FLOOR((2.0d0 * minLength)/step)),
      ctr = PZMatch.Pair{NA DIV 2, NB DIV 2},
      
      NM = MatchingLength(cOld),
      mtOld = SUBARRAY(ReallocateMatching(rmt, NM)^, 0, NM)
    DO
      <* ASSERT NUMBER(ca) = NA *>
      <* ASSERT NUMBER(cb) = NB *>

      IF minSteps <= (NA-1)+(NB-1) THEN
        PZSymbolChain.DoExtractSegment(ch[cvxa].c^, sExp[0], ca);
        PZSymbolChain.DoExtractSegment(ch[cvxb].c^, sExp[1], cb);
        ObtainOldMatching(cOld, sExp, mtOld);

        (* Wr.PutText(stderr, "minSteps = " &  Fmt.Int(minSteps) & "\n"); *)

        IF minAvgDist = NIL OR NUMBER(minAvgDist^) < NA + NB - 1 THEN
          minAvgDist := NEW(REF LONGS, NA + NB - 1)
        END;

        maxAdjust := CEILING(maxShift/step);
        IF cOld.pm = NIL THEN
          WITH n0 = cOld.seg[0].ns, n1 = cOld.seg[1].ns DO
            maxAdjust := MAX(maxAdjust, ABS(n0 - n1))
          END;
        END;

        PZSymbolChain.RefineMatch(
          ca, cb, 
          decode := dch, errorVar := ech,
          step := step,
          mtOld := mtOld,
          maxAdjust := maxAdjust,
          maxDistSqr := mp.maxDistSqr,
          critDistSqr := mp.critDistSqr,
          skipDistSqr := mp.skipDistSqr,
          (*OUT*)
          mt := newMatch,
          mismatch := mismatch,
          length := length,
          matchedLength := matchedLength,
          minAvgDisc := SUBARRAY(minAvgDist^, 0, NA+NB-1),
          (*WORK*)
          rCost := rCost
        );
       (* Wr.PutText(stderr, "mismatch = " &  Fmt.LongReal(mismatch) & "\n");
        Wr.PutText(stderr, "matchedLength = " &  Fmt.LongReal(matchedLength) & "\n");
        *)
      ELSE
        mismatch := 0.0d0;
        length := 0.0d0;
        matchedLength := 0.0d0;
        newMatch := NEW(REF PZMatch.T, 1);
        newMatch[0] := ctr
      END;
      WITH
        cExp = PZCandidate.T{
          seg := sExp,
          mismatch := 0.0d0,
          length := 0.0d0,
          matchedLength := 0.0d0,
          pm := NIL
        }
      DO
        RETURN PZCandidate.TrimAndPack(cExp, newMatch^,
          mismatch := mismatch, 
          length := length, 
          matchedLength := matchedLength
        )
      END
    END;
  END RefineOneCandidate;
  
PROCEDURE MatchingLength(READONLY c: PZCandidate.T): NAT =
  (*
    Length of the matching "c.pm", if any; else length of 
    `most perfect' matching. *)
  BEGIN
    IF c.pm # NIL THEN
      RETURN c.pm.np
    ELSE 
      RETURN MAX(c.seg[0].ns, c.seg[1].ns)
    END
  END MatchingLength;

PROCEDURE ObtainOldMatching(
    READONLY cOld: PZCandidate.T;   (* Original candidate. *)
    READONLY sExp: PZSegment.Pair;  (* Expanded segments. *)
    VAR (*OUT*) mt: PZMatch.T;      (* Matching rel. to "cExp. *)
  ) =
  BEGIN
    IF cOld.pm # NIL THEN
      PZMatch.DoUnpack(cOld.pm^, mt);
    ELSE 
      (* Create a "most perfect" matching for the two old chains: *)
      WITH n0 = cOld.seg[0].ns, n1 = cOld.seg[1].ns DO
        PZMatch.DoMostPerfect(n0, n1, mt);
      END;
    END;
    (* Adjust matching to account for candidate expansion: *)
    FOR j := 0 TO 1 DO
      WITH 
        so = cOld.seg[j],
        sn = sExp[j],
        d = (so.ini - sn.ini) MOD sn.tot
      DO
        <* ASSERT so.tot = sn.tot *>
        <* ASSERT so.ns + d <= sn.ns *>
        FOR k := 0 TO LAST(mt) DO mt[k][j] := mt[k][j] + d END
      END
    END
  END ObtainOldMatching;

PROCEDURE ReallocateSymbolChain(VAR rc: REF PZSymbolChain.T; N: NAT): REF PZSymbolChain.T =
  (*
    If "rc^" does not exist or has less than "N" elements, reallocates it
    (discarding its old contents). In any case returns the final "rc". *)
  BEGIN
    IF rc = NIL THEN 
      rc := NEW(REF PZSymbolChain.T, N)
    ELSIF NUMBER(rc^) < N THEN
      rc := NEW(REF PZSymbolChain.T, MAX(N, 2*NUMBER(rc^)))
    END;
    RETURN rc
  END ReallocateSymbolChain;
 
PROCEDURE ReallocateMatching(VAR rmt: REF PZMatch.T; N: NAT): REF PZMatch.T =
  (*
    If "rmt^" does not exist or has less than "N" elements, reallocates it
    (discarding its old contents). In any case returns the final "rmt". *)
  BEGIN
    IF rmt = NIL THEN 
      rmt := NEW(REF PZMatch.T, N)
    ELSIF NUMBER(rmt^) < N THEN
      rmt := NEW(REF PZMatch.T, MAX(N, 2*NUMBER(rmt^)))
    END;
    RETURN rmt
  END ReallocateMatching;
 
PROCEDURE RefineComments(READONLY o: Options):TEXT=
  BEGIN 
    WITH 
      wr = NEW(TextWr.T).init()
    DO 
      Wr.PutText(wr, "PZRefineCands: \n"); 
      Wr.PutText(wr, "  input:          " & o.input & "\n"); 
      Wr.PutText(wr, "  chainDir:       " & o.chainDir & "\n");
      Wr.PutText(wr, "  chainPrefix:    " & o.chainPrefix & "\n");
      Wr.PutText(wr, "  band:           " & Fmt.Int(o.band) & "\n");
      Wr.PutText(wr, "  curvSigma:      " & FLR(o.curvSigma,     8,4) & "\n");
      Wr.PutText(wr, "  critDist:       " & FLR(sqrt(o.mp.critDistSqr),  8,4) & "\n");
      Wr.PutText(wr, "  skipDist:       " & FLR(sqrt(o.mp.skipDistSqr),  8,4) & "\n");
      Wr.PutText(wr, "  maxDist:        " & FLR(sqrt(o.mp.maxDistSqr),  8,4) & "\n");
      Wr.PutText(wr, "  minLength:      " & FLR(o.minLength,    8,4) & "\n");
      Wr.PutText(wr, "  blurFactor:     " & FLR(o.blurFactor, 8,4) & "\n");
      Wr.PutText(wr, "  extraLength:    " & FLR(o.extraLength,  8,4) & "\n");
      Wr.PutText(wr, "  maxRefineShift: " & FLR(o.maxRefineShift,     8,4) & "\n");
      Wr.PutText(wr, "  maxCurveCands:  " & Fmt.Int(o.maxCurveCands) & "\n");
      Wr.PutText(wr, "  maxPairCands:   " & Fmt.Int(o.maxPairCands) & "\n");
      Wr.PutText(wr, "  output:         " & o.output & "\n"); 
      RETURN(TextWr.ToText(wr))
    END (* DO *);
  END RefineComments;

PROCEDURE FLR(x: LONG; w, d: NAT): TEXT =
  BEGIN
    RETURN Fmt.Pad(Fmt.LongReal(x, prec := d, style := Fmt.Style.Fix), w)
  END FLR;

BEGIN
  Main()
END PZRefineCands.
(*
  Copyright © 2001 Universidade Estadual de Campinas (UNICAMP).
  Authors: Helena C. G. Leitão and Jorge Stolfi.
  
  This file can be freely distributed, used, and modified, provided
  that this copyright and authorship notice is preserved, and that any
  modified versions are clearly marked as such.
  
  This software has NO WARRANTY of correctness or applicability for
  any purpose. Neither the authors nor their employers chall be held
  responsible for any losses or damages that may result from its use.
*)