INTERFACE PZCandidate;

(* A `candidate' is a pair of segments from two different contours *)
(* Last edited on 2001-11-16 17:15:41 by stolfi *)

IMPORT Rd, Wr;
IMPORT PZSymbolChain, PZSegment, PZMatch;
FROM PZTypes IMPORT LONG, INT, NAT, NATS, BOOL, BOOLS;

TYPE
  T = RECORD 
      seg: PZSegment.Pair;     (* Segments being matched. *)
      mismatch:  LONG;         (* Mismatch function, or 0 if not available. *)
      length: LONG;            (* Mean length of the two segments. *)
      matchedLength: LONG;     (* Length effectively matched. *)
      pm: REF PZMatch.PackedT; (* Matching for this candidate, or NIL *)
    END;              
      (*         
        A "PZCandaidate.T" consists of two segments of contour,
        with a tentative matching between them. *)
  
TYPE
  List = ARRAY OF T;
  
  ReadData = RECORD
      cmt: TEXT;
      lambda: LONG;
      c: REF List
    END;
  
CONST
  Empty = T{
    seg := PZSegment.Pair{PZSegment.Empty, PZSegment.Empty},
    mismatch := 0.0d0,
    length := 0.0d0,
    matchedLength := 0.0d0,
    pm := NIL
  };
  (*
    An `empty' candidate (NOT the only one!).  May be handy. *)
    
PROCEDURE IsEmpty(READONLY c: T): BOOL;
  (*
    TRUE if the candidate is empty (either segment has zero samples). *)

PROCEDURE Expand(READONLY c: T; iniExtra, finExtra: ARRAY [0..1] OF INT): T;
  (*
    Expands the two segments of candidate "c" by "extra" steps in each
    direction, preserving its matching (if any).  If either argument
    is negative, removes that many steps from the respective end, and
    trims the matching "c.pm" accordingly. *)

PROCEDURE TrimAndPack(
    READONLY cand: T;
    READONLY mt: PZMatch.T;
    mismatch: LONG;
    length: LONG;
    matchedLength: LONG;
  ): T;
  (*
    Assumes that "mt" is a partial matching between "cand.seg[0]"
    and "cand.seg[1]".  Returns a copy "ct" of "cand", trimmed to the
    part actually covered by "mt", and sets "ct.mismatch",
    "ct.length", "ct.matchedLength" as given.  Also packs "mt" and stores
    it in "ct.pm". *)

PROCEDURE Write(
    wr :Wr.T; 
    cmt: TEXT;
    READONLY c: List;
    lambda: LONG;
  );
  (*
    Writes the list of candidates "c" and the parameter "lambda"
    to the writer "wr". *)

PROCEDURE Read(rd :Rd.T): ReadData;
  (*
    Reads from "rd" a list of candidates that was written by "Write". *)
    
PROCEDURE ReadOld(
    rd :Rd.T; 
    READONLY m: NATS; 
    rev: ARRAY [0..1] OF BOOL;
  ): ReadData;
  (*
    Reads from "rd" a list of candidates that was written by the old
    version of "Write".  The "s.tot" field of each segment "s" is taken
    from the array "m", indexed by the corresponding contour number;
    and the "s.rev" flag is taken from the "rev" parameter, indexed
    by the candidate's side. *)

PROCEDURE ChainsUsed(READONLY cand: List): REF BOOLS;
  (*
    Returns a vector "used" such that "used[k]" is TRUE
    iff chain "k" occurs in some candidate of "cand[i]". *)

TYPE 
  ChainPair = ARRAY [0..1] OF PZSymbolChain.ReadData;

PROCEDURE Print(wr: Wr.T; READONLY cand: T; pairing: BOOL := TRUE);
  (*
    Prints the data of candidate "cand" to "wr", in a multiline
    readable format.  If the candidate has a matching, prints it too
    (in compact form), unless "pairing" is FALSE. *)

(* COMPARISON AND SORTING *)

TYPE CompareProc = PROCEDURE(READONLY a, b: T): BOOL;
  (* 
    A procedure that tells whether "a" must come before "b". *)

PROCEDURE Sort(VAR c: List; before: CompareProc);
  (*
    Sorts the list "c" according to the given comparison procedure. *)
    
PROCEDURE IndexSort(READONLY c: List; VAR x: NATS; before: CompareProc);
  (*
    Returns in "x" a permutation of the indices of "c" so that 
    "c[x[k]]" is non-decreasing with "k", according to the given
    comparison procedure. *)
    
PROCEDURE LexicallyBetter(READONLY a, b: T): BOOL;
  (*
    Compares by chain pair, then by then start of segments, 
    by length of segments, and breaks ties by mismatch and
    other criteria. *)
    
PROCEDURE PairMismatchBetter(READONLY a, b: T): BOOL;
  (*
    Compares by chain pair, then by mismatch and total length,
    and breaks ties by other criteria. *)

PROCEDURE MismatchBetter(READONLY a, b: T): BOOL;
  (*
    Compares by "mismatch", breaking ties by other criteria. *)
    
PROCEDURE Overlap(
    READONLY a, b: T;
    maxAdjust: NAT;
    VAR (*WORK*) ra, rb: REF PZMatch.T;   (* Work areas *)
  ): NAT; 
  (*
    Compares the matchings (explicit or implied) of candidates
    "a" and "b", and returns the number of
    half-steps of "a" that lie at most "maxAdjust" from some half-step of
    "b" with same position.  Note that "Overlap(a,b,...) = Overlap(b,a,...)".
    
    In particular, if "a" and "b" are the same candidate, returns the
    total number of steps in the two segments of that candidate (i.e.
    twice the mean length of that candidate). Returns 0 if
    corresponding segments of the two candidates are on different
    chains or have different orientations. *)

PROCEDURE FindSimilarCands(
    READONLY aCand: List;
    READONLY bCand: List;
    minSteps: NAT;
    maxAdjust: NAT;
    VAR (*OUT*) aOK: BOOLS;
    VAR (*OUT*) bOK: BOOLS;
    printAMatched: BOOL := FALSE;
    printAUnmatched: BOOL := FALSE;
    printBMatched: BOOL := FALSE;
    printBUnmatched: BOOL := FALSE;
    printMatchedCands: BOOL := FALSE;
    printMatchedChains: BOOL := FALSE;
  );
  (*
    Compares the candidates "aCand" against the candidates "bCand".
    Any pair of candidates "acand[i]", "bCand[j]" that overlaps 
    by at least "minSteps" steps, with alignments differing
    by at most "maxAdjust" samples, is flagged by setting 
    "aOK[i] = TRUE" and "bOK[j] = TRUE". 
    
    The options "printAMatched", "printBMatched" "printAUnmatched",
    and "printBUnmatched" cause printout of the corresponding matched
    or unmatched elements, once each.
    
    The "printMatchedCands" option prints all matching pairs (inclunding
    multiple matches of the same "aCand" or "bCand" element.
    The "printMatchedChains" option prints all chain pairs with
    at least one matching pair of candidates. *)

PROCEDURE PrintSimilarityStatistics(
    wr: Wr.T;
    READONLY aOK: BOOLS; aName: TEXT;
    READONLY bOK: BOOLS; bName: TEXT;
  );
  (* 
    Prints to "wr" the basic statistics of the results of
    "FindSimilarCands", such as counts of false/true
    positives/negatives. Prints also the retrieval efficiency
    parameters of "a" (seen as a search result) relative to "b" (seen
    as the ideal result): sensitivity (fracion of "b"'s that were
    found in "a"), significance (fraction of "a" that are in "b"), and
    multiplicity (how many "a"s were found for each "b"). *)

(* CANDIDATE LIST PRUNING *)

(*
  The following procedures remove candidates from the list 
  "cand[0..nCands]".  Unless observed otherwise, they 
  preserve the order of the candidates and normally change
  "nCands". *)

PROCEDURE RemoveDuplicates(VAR cand: List; VAR nCands: NAT);
  (*
    Removes candidates that cover the same segments of the same chains
    as previous candidates.  Assumes "cand" is sorted in such a way
    that duplicates will end up in adjacent positions. *)

PROCEDURE RemoveShort(VAR cand: List; VAR nCands: NAT; minSteps: NAT);
  (*
    Removes any candidates that don't have at least "minSteps" steps
    on both segments. *)

PROCEDURE PruneCandsPerPair(VAR cand: List; VAR nCands: NAT; maxCands: NAT);
  (*
    Removes the worst candidates from each pair of chains,
    leaving at most "maxCands" of them.  Assumes that candidates are 
    sorted by chain pair and then by decreasing quality,
    as in "PairMismatchBetter". *)

PROCEDURE PruneCandsPerChain(VAR cand: List; VAR nCands: NAT; maxCands: NAT);
  (*
    Removes the worst candidates for chains that have more than 
    "candsPerChain" candidates.  Assumes that the candidates are sorted
    by decreasing quality, as in "MismatchBetter"  or "LexicallyBetter". *)

PROCEDURE MergeOverlaps(
    VAR cand: List; 
    VAR nCands: NAT;
    minSteps: NAT;
    maxAdjust: NAT;
  );
  (*
    Joins candidates FOR which "Overlap(... minSteps,maxAdjust)" is
    TRUE. Assumes candidates are sorted by chain pair first, as in
    "PairMismatchBetter". *)

END PZCandidate.
(*
  Copyright © 2001 Universidade Estadual de Campinas (UNICAMP).
  Authors: Helena C. G. Leitão and Jorge Stolfi.
  
  This file can be freely distributed, used, and modified, provided
  that this copyright and authorship notice is preserved, and that any
  modified versions are clearly marked as such.
  
  This software has NO WARRANTY of correctness or applicability for
  any purpose. Neither the authors nor their employers chall be held
  responsible for any losses or damages that may result from its use.
*)