/* Last edited on 2013-03-26 17:10:23 by stolfilocal */
/* Representation and basic tools for multichannel 2D images with {float}-valued samples. */

package minetto.utils;

import java.lang.Math;
import java.lang.Float;
import java.util.Arrays;

import java.io.File;
import java.io.IOException;

import minetto.utils.WordCand;
import minetto.utils.WordCandElem;

public final class CharGrouper {

    private static double Inf = Double.POSITIVE_INFINITY;
    private static double SQRT2 = Math.sqrt(2);

    private static boolean debug = true;
    
    /* The character grouping module takes as input the set of all
    bounding boxes of possible glyphs identified by the character
    detection module, after the box and shape filter stages. It breaks
    that set into a number of lists, each comprising two or more input
    /word elements/ that are presumably letters belonging to the same
    word or (depending on the setting of certain parameters) of the same
    line of text. For convenience we will refer to each of these lists
    as a /word candidate/, even when it is a multi-word line.
    
    Each input character box {B} is described by the indices of the
    pixel at the upper left corner {B.ixmin,B.iymin} and of the pixel at
    the lower right corner {B.ixmax,B.iymax}. Therefore its width and
    height (in pixels) are {B.w = B.ixmax-B.ixmin+1} and {B.h =
    B.iymax-B.iymin+1}, all measured in pixels. (Note that the
    coordinate origin is at the upper left corner of the image, and the
    {y} axis points down, as usual in image processing.)
    
    !!! The character detection module should output a probability weight  for each character. !!!

    Criteria
    
    This module does not take into account the pixels or any other
    attributes of the characters, except their bounding boxes. It is
    designed specifically for /Greco-Roman alphabets/, whose character
    shapes and sizes are similar to those of the Roman and Greek print
    alphabets.
    
    Specifically, the module assumes that the top and bottom of every
    character in the same word are approximately aligned with two of
    four main alignment lines: the word's /descender line/ DL, the
    /baseline/ BL, the /topline/ TL, and the /ascender line/ AL. See
    figure~\ref{fig.reflines}. We define the /body height/ of the font
    as the vertical distance between BL and TL, and the /midline/ ML of
    the text as being the vertical bisector of BL and TL.
    
    In Greco-Roman alphabets, most uppercase letters and digits span
    from BL to AL. Lowercase letters generally span from BL to TL (for
    example, 'x' or 'm'), from DL to TL ('p'), or from BL to AL ('b',
    't'). Depending on the font, some characters may span from DL to AL:
    for example, 'R' and 'Q' with elongated ``tails'', old-style digits,
    the italic 'f', the Greek letters {\xi} and {\zeta}, as well as
    brackets and slashes.  Needless to say, these alignments are 
    only approximate, being usually disturbed by fancy font design 
    and by segmentation errors.
    
    The input to this module typically includes many segments found by
    the character detector that pass the character size and shape
    filters, but are actually non-letter objects such as tiles, holes,
    stains, etc.. Therefore, any input box that this module fails to
    group with any other box is assumed to be spurious and discarded.
    Unfortunately, this unavoidable decision means discarding any
    legitimate texts that consist of a single letter.
    
    The grouping module currently tries to ignore diacritics,
    punctuation, hyphens, and the dots on lowercase 'i' and 'j'. If
    separate boxes for those items are present in the input, they will
    probably fail to be grouped and therefore will be discarded. 
    
    The module is also unable to group letters that are split into two
    or more segments, either by accident or by font design. Hopefully
    these characters will be extracted as a single segment when the
    image is processed at a coarser scale.
    
    !!! Add a pre-filter that joins diacritics and {ij}-dots to the base letters. !!!
    
    The reference lines on urban text objects, such as signs and store
    names, may not be straight and horizontal, and may be further
    distorted by perspective projection. Therefore, this module allows
    the reference lines to be slanted, curved, and non-parallel to some
    limited extent. However, this module will not group correctly, and will
    probably discard, text that is rotated by more than about 30
    degrees, circular text, and vertical text with upright characters.
    In any case, the vertical distances DL-BL, BL-TL, and TL-AL are
    expected to be in the fixed ratios {fdes:1:fasc} specified by the
    client.
    
    Algorithm
    
    The algorithm maintains a set of /word candidates/, that represent
    possible true words, or prefixes thereof. 
    
    A word candidate is basically a sequence of zero or more character
    boxes, plus some bits of infomation described below. The character
    boxes that are assigned to the same word candidate must be strictly
    increasing in {ixmin} and also strictly increasing in {ixmax}.
    Moreover, the {y} range {iymin..iymax} of each box must have a
    non-empty intersection with the previous one. Those boxes are
    allowed to overlap by some extent, to account for slanted text and
    fonts (see below).
    
    The output of the module is a set of candidate words that are
    nominally disjoint, in the sense that each input character box is
    assigned to at most one candidate word. (However, two distinct output
    words may use distinct but partially overlapping character boxes.)
    
    Within a word candidate, each letter is represented by a /word
    element/ {L} that contains its bounding box {L.box} and two booleans
    {L.asc,L.des}. The field {L.asc} specifies whether the character
    is being assumed to extend upwards to the AL (true) or onlt to the TL (false).
    Similarly {L.des} specifies whether the character extends downwards
    to the DL (true) or only to the BL (false). The
    field {L.box} is copied from the input character box and has the
    same format, namely a 2x2 array {{L.ixmin,L.ixmax},{L.iymin,L.iymax}};
    while {L.asc,L.des} are tentatively guessed by the grouping module
    itself as explained below.
    
    The set of word candidates is maintained by the /dynamic
    programming/ (DP) method. In general the, DP algorithm looks for
    maximum-score paths in a directed acyclic graph with score attached
    to its vertices (/DP states/) and edges (/DP transitions/); where
    the score of a path is the sum of the score of the
    initial vertex and the scores of the edges along the path. 
    
    In our case, the set of DP states comprises all the single elements
    derived from the input boxes, and all pairs of elements {(L0,L1)}
    such that the {y}-ranges {L0.box[1],L1.box[1]} overlap by at least
    one pixel, {L0.ixmin < L1.ixmin}, {L0.ixmax < L1.ixmax}, and the
    separation {L1.ixmin-L0.ixmax} (which may be negative) does not
    exceed a fixed limit {gBmax}, defined below. Thus, for each input
    box there are four single-element DP states, and for every two input
    boxes that satisfy the constraints above there are up to 16 DP
    states.

    Each word candidate {W} has a /word score/ {W.score}, a real number
    that can be interpreted as the natural logarithm of {W.PrT/W.PrF},
    where {W.PrT} is the probability that the candidate is a true word
    or a proper prefix thereof, and {W.PrF = 1 - W.PrT} is the
    probability that is its not. Thus the score is zero if the two
    possibilities are equally likely, positive if the candidate is more
    likely to be true, and negative if it is more likely to be false.
    The score may be {-oo}, meaning that {W} (in the assumed model) 
    cannot possibly be a true word prefix.
    
    For each DP state {S} the algorithm maintains at most one candidate
    {W(S)}. If present, {W(S)} is the candidate with maximum score among
    all the candidates that end with state {S}. Specifically, if {S} is
    a single element {L}, then {W(S)} is the unique word candidate that
    has {L} is its single element. If {S} is a pair {(L0,L1)} then
    {W(S)} is a word candidate with two or more elements, ending with
    {L0} and {L1}.
    
    The module scans the input character boxes in order of increasing
    {B.ixmin} attribute, and attempts to append each box to one or more
    extant word candidates. To do that, it considers each word
    candidate {W} in turn (including the empty candidate), and tries to
    extend it with each element {L} that has {L.box = B} and each of the
    four possible values of {L.asc,L.des}; that is, with each of the
    four possible hypotheses about whether the character has an ascender
    and/or a descender. 
    
    For each element {L} thus obtained, the algorithm appends {L} to the
    existing word candidate {W} to obtain a new candidate {W' = W+L},
    and computes the probabilities {W'.PrT,W'.PrF}, and the score
    {W'.score}, according to a probabilistic model defined below. As
    dictated by the DP paradigm, at any time the algorithm discards all
    candidates {W'} with the same final state {S}, except the candidate
    with highest score. If that candidate has a finite score it is
    {W(S)} by definition; if the score is {-oo}, the candidate is
    discarded and {W(S)} is {null} (which means "no candidade", not "the
    empty candidate").
    
    ??? Currently the algorithm discards {W(S)} if its score is
    below a certain threshold {semin}.  That is probably pointless. ???
    
    ??? This too may be unnecessary: If, after trying all elements
    derived from a box {B}, at least one element {L} was sucessfully
    added to an open candidate {W}, and {L} has little or no overlap
    with the last element {L1} of {W}, then {W} is removed. That element
    {L} is assumed to interfere with any DP state {S'=(L1,L')} where
    {L'} is any element derived from a character box still to be
    considered, rendering the state {S'} invalid. (To justify this rule,
    consider that the input boxes are usually pairwise disjoint or have
    little overlap, and the conditions for appending are very strict; so
    if a box {B} was appended to a word candidate {W}, there is little
    chance of it being a spurious box appearing between two characters
    of the same word.) On the other hand, if {W} was not successfully
    extended, it is left in the set of word candidates, in case some box
    further to the right than {B} is still close enough to be added to
    it. ???
    
    Note that the same input box {B} may be addded several times with
    different values of {L.asc} and {L.des} to several candidates, or
    even to the same candidate. Note also that, at this stage, distinct
    word candidates may overlap and may share the same element, or two
    elements with the same input character box.
    
    Candidate scoring
    
    It is a fundamental requirement of the DP paradigm that the 
    score of each path be the sum of certain scores associated with the 
    states and transitions along the path.  This is valid in our case
    since, in our probabilistic model, the probability of a word
    candidate {W' = W+L} being a true word prefix depends only on {W.PrT}.
    and the position of {L} relative to the two last elements {L0,L1}
    of {W}; that is, on the final state {(L0,L1)} of {W} and on the 
    transition to the final state {(L1,L)} of {W'}.  Since the 
    fields {score,PrT,PrF} are just remappings of each other,
    the difference {W'.score - W.score} is a function of that
    transition only.
    
    The partial score associated with a single letter state {S=(Li)}
    depends on the size and aspect ratio of the box, relative to
    specified parameters {hmin,hmax,rmin,rmax}. The partial score
    associated with a state {S=(Li,Lj)} depends on the horizontal
    spacing between the boxes {Li.box,Lj.box}, and on the mean slopes of
    the presumed topline and baseline between those two characters. The
    partial score associated with a transition from {S'=(Li,Lj)} to
    {S''=(Lj,Lk)} depends on the curvature of the presumed topline and
    baseline in the span of those three elements. If these
    attributes exceed certain ranges, the partial score is {-oo},
    so that the transition is forbidden.
    
    To evaluate the scores that depend on reference lines, the algorithm
    defines five /reference points/ for each word element {L} located on
    the presumed position of the reference lines DL, BL, ML, TL and AL.
    These points all have the same {x}-cordinate, the mean abscissa
    {L.xctr = (L.ixmin + L.ixmax + 1)/2} of the bounding box. Their
    abscissas {L.yasc}, {L.ytop}, {L.ymid}, {L.ybas} and {L.ydes} are defined so
    that they are consistent with the vertical box bounds {L.iymax+1} and
    {L.iymin} and the hypotheses {L.asc,L.des}.

    Namely, {L.iymin} is equal to {L.yasc} if the character is
    conjectured to have an ascender, and to {L.ytop} if not. Likewise,
    {L.iymax+1} is equal to {L.ydes} if the character is conjectured to
    have a descender, and to {L.ybas} if not. In any case, the the
    differences {L.ydes-L.ybas}, {L.ybas-L.ytop}, and {L.yasc-L.ytop}
    are assumed to be in the fixed ratios {fdes:1:fasc}; and {L.ymid} is
    always {(L.ybas+L.ytop)/2}.
    
    Thus, the mean slope of the baseline between two successive elements
    {Li,Lj} is assumed to be the slope of the straight line connecting
    their baseline points {(Li.xctr,Li.ybas)} and {(Lj.xctr,Lj.ybas)}.
    The curvature of that line between three consecutive elements
    {Li,Lj,Lk} is evaluated by comparing the ordinate {Lj.ybas} of
    {Lj}'s baseline point with that of the straight line from
    {(Li.xctr,Li.ybas)} to {(Lk.xctr,Lk.ybas)}, evaluated at {Lj.xctr}.

    Final processing
    
    Once all input character boxes have been processed, the algorithm
    looks for pairs of surviving word candidates that share the same
    input character box, and discards the candidate with lower score in any
    such pair. Thus the surviving word candidates are disjoint as sets 
    of character boxes. 
    
    Probabilistic model
    
    In order to score the candidates we assume that all input character
    boxes are contained in a rectangle of width {wtot} and height {htot}
    (in pixels). We assume that the image is much bigger than the
    character boxes, so that we can disregard the slight changes in
    probability distributions that would be expected near the edges of
    the image.
    
    Some of the boxes are /true boxes/, the character bounding boxes of
    a a set of legible /true words/. The rest are /false boxes/, namely
    bounding boxes of non-letter image segments that passed the
    geometric and shape character filters.
    
    We will denote by {NB} the number of input character boxes, of which
    {NT} are true and {NF} are false. A false element is either one
    built from a false box, or one built from a true box with the wrong
    ascender/descender hypotheses. The total number of elements is
    therefore {NE=4*NB}, of which {NT} are true and {NE-NT = 4*NF+3*NT} are
    false.
    
    Let {avgLen} be the average number of letters per true word; the
    probability that a randomly chosen true box is the first letter of
    its word is {pbeg = 1/avgLen}, and the expected number of true words
    in the input is {NT*pbeg = NT/avgLen}.
    
    Single-element candidate probabilities
    
    [TBW]

      // Define FLTW = "first letter of a true word".
      // Let PrT = Prob(L.box,L.des,L.asc is FLTW).
      // Let PrT0 = Prob(L is FLTW) a priori.
      // Let PrF0 = Prob(L is FLTW) a priori.
      // Let PrLT = Prob(L.box,L.des,L.asc | L is FLTW).
      // Let PrLF = Prob(L.box,L.des,L.asc | L is not FLTW).

      // Then PrT = PrT0*PrLT/(PrT0*PrLT + PrF0*PrLF).
   
        
    Multiple-element candidate probabilities
    
    We consider the extension of a word candidate {W} by an element {L} that
    satisfies the criteria {box_seems_usable,box_seems_appendable}.
    These scriteria must accept any box that has a positive probability of being the next
    letter of {W}.
    
       // Define TWP = true word prefix.
       // Let PrT = Prob(W is a TWP).
       // Let PrT1 = Prob(W+L is a TWP).
       // Let PrTx = Prob(L is the next letter of W | L.box,L.asc,L.des & W is a TWP).

       // Then PrT1 = PrT*PrTx.

       // Let PrLT = Prob(L.box,L.asc,L.des | W is a TWP & L is the next letter of W).
       // Let PrT0 = Prob(L is the next letter of W | W is a TWP) a priori.

       // Let PrLF = Prob(L.box,L.asc,L.des | W is a TWP & L is *not* the next letter of W).
       // Let PrF0 = Prob(L is *not* the next letter of W | W is a TWP) a priori.

       // Then PrTx = PrLT*PrT0/(PrLT*PrT0 + PrLF*PrF0)

    Data structures
    
    The surviving candidates are kept in an array {wds[0..nwd-1]} stored in the 
    {CharGrouper} instance.  
   
    At some point during the algorithm, the character boxes that remain
    to be processed will be so far to the right from the last element of
    a word candidate {W} that any candidate {W+L} that may be considered
    in the future will have score {-oo}. The algorithm then marks the
    candidate {W} as /closed/ and removes it from the candidate list
    used in the main loop. The candidate is discarded if {W.score} is
    below a client-specified threshold {skmin}, otherwise it is saved
    until the end of the algorithm. The closed candidates are {wds[0..ncl-1]},
    and the open (still not closed) ones are in {wds[ncl..nwd-1]}.

    Because of the geometric constraints, and the fact that input boxes
    are ``mostly disjoint'', the number of states is actually linear in
    the number {NB} of input boxes.
    
    
    */

    /* === PARAMETERS ===================================================== */
    
    /* Parameters of the input distribution: */
    public int wBtot = -1;                /* Total image width (px). */             
    public int hBtot = -1;                /* Total image height (px). */            
     
    public double avgWArea = Double.NaN;  /* Expected number of image pixels per word. */
    public double avgWLen = Double.NaN    /* Expected number of letters per word. */
    
    /* Parameters of the grouping algorithm proper: */
    public double hmin = Double.NaN;      /* Min char body height of a true letter for this scale (px). */
    public double hmax = Double.NaN;      /* Max char body height of a true letter for this scale (px). */
    public double rmax = Double.NaN;      /* Max char body aspect of a true letter {w/h}. */
    public double rmin = Double.NaN;      /* Min char body aspect of a true letter {w/h}. */

    public double dxmax = Double.NaN;     /* Max allowed char-char gap rel body height. */
    public double dxmin = Double.NaN;     /* Min allowed char-char gap (may be negative) rel body height. */

    public double MLsmax = Double.NaN;    /* Max slope of midline between chars. */
    public double TBsmax = Double.NaN;    /* Max slope of topline minus baseline. */

    public double fdes = Double.NaN;      /* Nominal ratio of descender depth to lowercase x-height. */
    public double fasc = Double.NaN;      /* Nominal ratio of ascender height to lowercase x-height. */

    public double semin = Double.NaN;     /* Min word cand score for extending a word cand. */
    public double ssmin = Double.NaN;     /* Min word cand score to consider an element grouped. */
    public double skmin = Double.NaN;     /* Min word cand score to keep a closed word cand. */

    public boolean delExtended = false;   /* If TRUE deletes candidates that have been successfully extended. */

    /* === WORKING STORAGE ================================================= */
    
    int nch = 0;            /* Number of input characters. */
    int nwd = 0;            /* Number of total word cands, open or closed. */ 
    int ncl = 0;            /* Number of closed word cands. */ 
    WordCand[] wds = null;  /* The word cands are {wds[0..nwd-1]}; {wds[0..ncl-1]} are closed. */

    /* === MAIN PROCEDURE ================================================== */

    /* ----------------------------------------------------------------------
    Groups the given candidate character boxes into word or text lines. Returns a
    list of surviving word candidates. The client must set all class
    parameters before calling this method. */
    public WordCand[] group(int[][][] bboxes) {
        
        if (debug) { System.err.printf("--- begin grouping ------------------------------\n"); }

        /* Allocate the word cand set, initially empty. */
        /* Note that each input charater may generate four word cands. */
        this.nch = bboxes.length;
        this.wds = new WordCand[10*nch]; /* Will be extended if necessary. */
        this.nwd = 0;   /* Number of total word cands, active or inactive. */ 
        this.ncl = 0;   /* Number of closed word cands. */ 
        
        /* Sort candidate boxes by their {ixmin} ascending: */
        int[] sort_ich = sort_boxes_by_ixmin(bboxes);
        
        /* Add all boxes to the word cands, creating new ones as needed: */
        for (int kch = 0; kch < nch; kch++) {
            int ich = sort_ich[kch];
            int[][] box = bboxes[ich];
            /* Close any open cands that cannot be extended any further: */
            this.close_hopeless_word_cands(box[0][0]);
            /* Add the box to the open word cands, existing or new: */
            this.try_to_append_char_box(ich, box);
        }
        /* Close any remaining open word cands: */
        this.close_hopeless_word_cands(+Inf);
        assert this.ncl == this.nwd;
        /* Eliminate overlapping word cands and return the rest: */
        this.remove_dup_word_cands();
        WordCand[] res = java.util.Arrays.copyOf(this.wds, this.nwd);
        this.wds = null;
        this.nwd = 0;        
        this.ncl = 0;        
        if (debug) { System.err.printf("--- nigeb grouping ------------------------------\n"); }
        return res;
    }
    
    /* === MAIN AUXILIARY PROCS ============================================= */
    
    /* ----------------------------------------------------------------------
    Given a list {bboxes[0..nbx-1]} of character bounding boxes, returns
    a permutation {ixs[0..nbx-1]} if {0..nbx-1} such that
    {bboxes[ixs[k]][0][0]} is non-decreasing with {k}. */
    public static int[] sort_boxes_by_ixmin(int [][][] bboxes) {
        int nbx = bboxes.length;
        int[] ixs = new int[nbx];
        /* Bubble sort for now. Should use bucket sort.: */
        for (int i = 1; i < nbx; i++) {
            /* Insert box {i} in its proper place: */
            int[][] boxi = bboxes[i];
            int j = i;
            while ((j > 0) && (bboxes[ixs[j-1]][0][0] > boxi[0][0])) { 
                ixs[j] = ixs[j-1]; j--;
            }
            ixs[j] = i;
        }
        return ixs;
    }
    
    /* ----------------------------------------------------------------------
    Scans the open word candidates {this.wds[this.ncl..this.nwd-1]} and tries to
    append to each one the candidate character with input index {ich}
    and bounding box {bbox}, assuming that it may have or may not have
    ascenders or descenders.  Also considers starting new candidates with this
    box.  Keeps the highest-scoring cand with each final state. */
    private void try_to_append_char_box(int ich, int [][] box) {
        if (debug) { dbgb(0,"processing character", ich, box); }
        assert ich < this.nch;
        
        if (! box_seems_usable(box)) { return; }
        
        int maxHyps = 4; /* Maximum hypotheses about {ich}. */
        /* Creates the four word candidate elements for this character: */
        WordCandElem[] L = new WordCandElem[maxHyps];
        int nL = 0; /* The elements to be considered are {L[0..nL-1]}. */
        for (int iasc = 0; iasc <= 1; iasc++) {
            for (int ides = 0; ides <= 1; ides++) {
                /* Make {box} into a word cand element {L} with specific ascender descencer assumptions: */
                L[nL] = new WordCandElem(ich, box, (iasc == 1), (ides == 1));
                nL++;
            }
        }
        /* Scan all open word cands and tries to append {ich} to them: */
        /* Scans in reverse order so that additions and deletions do not interfere with the scan. */
        /* Also keeps note of the best score {sbest} for the cands that incorporated character {ich}.*/
        double sbest = 0;
        for (int iwd = this.ncl; iwd < this.nwd; iwd++) {
            WordCand W = this.wds[iwd];
              if (this.box_seems_appendable(W, box)) {
                  if (debug) { dbgW(2,"trying to append to word cand", iwd, W); }
                  WordCand[] Wx = new WordCand[maxHyps];
                  int nWx = 0; /* Successfully extended cands are {Wx[0..nWx-1]}. */
                  /* Try to append the four variants of {ich} to {W}, note number of successes {nadd}: */
                  for (int k = 0; k < nL; k++) {
                      if (debug) { dbgL(4,"trying element", L[k]); }
                      WordCand W1 = this.try_to_append_elem(W, L[k]);
                      if (W1 != null) {
                          if (W1.score > sbest) { sbest = W1.score; }
                          assert W1.score >= this.semin;
                          if ((W.els.length >= 2) && (nWx > 0)) { 
                              /* Keep only the highest-scoring extended cand: */
                              assert nWx == 1;
                              if (Wx[0].score < W1.score) { Wx[0] = W1; }
                          } else { Wx[nWx] = W1; nWx++; }
                      }
                  }
                  /* Add the candidate(s) sucessfuly extended to the cand set:*/
                  boolean goodext = false; /* Will be true if {W} was extended with little overlap. */
                  for (int k = 0; k < nWx; k++) {
                      if (debug) { dbgW(4,"adding extended word cand", this.nwd, Wx[k]); }
                      goodext = goodext || this.good_cand_extension(Wx[k]);
                      this.add_open_word_cand(Wx[k]);
                  }
                  if (this.delExtended && goodext) {
                      /* The candidate {W} was sucessfully extended, so there is no point keeping it around: */
                      if (debug) { dbgW(4,"deleting word cand", iwd, W); }
                      this.remove_open_word_cand(iwd);
                  }
              }
        }
        if (sbest < this.ssmin) {
            /* Character {ich} was not satisfactorily grouped. Make new word cands for it: */
            for (int k = 0; k < 4; k++) {
                WordCand W1 = this.create_word_cand(L[k]);
                if (debug) { dbgW(4,"starting new word cand", this.nwd, W1); }
                this.add_open_word_cand(W1);
            }
        }
        if (debug) { System.err.printf("\n"); }
    }
    
    /* ----------------------------------------------------------------------
    Closes all open word cands that cannot be extended any more, because
    their right edge is to the left of the current sweepline {xcur} and
    too far from it. Removes any of those candidates that have only one
    letter or have a word cand score less than {skmin}. Increments
    {this.ncl}, decrements {this.nwd}, and and rearranges {this.wds} as
    needed. */
    private void close_hopeless_word_cands(double xcur) {
        assert this.skmin > 0;  /* Word cands with score 0 must be removed. */
        int iwd = this.nwd - 1;
        while (iwd >= this.ncl) {
            WordCand W = this.wds[iwd];
            WordCandElem L = W.els[W.els.length-1]; /* Last element of {W}. */
            /* Compute max allowed gap {exmax}: */
            double exmax = this.dxmax*this.hmax;
            if (L.box[0][1] + exmax < xcur) {
               /* candidate cannot be extended any longer: */
                if ((W.score < skmin) || (W.els.length <= 1)) { 
                    /* Delete the word candidate. This may bring the last cand to {this.wds[iwd]}: */
                    if (debug) { dbgW(0,"removing word cand", iwd, W); }
                    remove_open_word_cand(iwd); 
                    /* Decrement {iwd} since {this.wds[iwd]} has been examined or is not there. */
                    iwd--;
                } else {
                    /* Close the candidate, this will bring the first open candidate to its place: */
                    if (debug) { dbgW(0,"closing word cand", iwd, W); }
                    close_word_cand(iwd); 
                    /* Do not decrement {iwd} since {this.wds[iwd]} may be a candidate not examined yet. */
                }
            } else { 
                /* Candidate may still be extended, leave it open. */
                iwd--;
            }
        }
    }
    
    /* ----------------------------------------------------------------------
    Check the word candidates {this.wds[0..this.nwd-1]} (assumed to be
    all closed) for collisions, that is, sharing of the same input
    character. Any word that collides with a wordof higher score is
    removed. Ties are broken arbitrarily. */
    public void remove_dup_word_cands() {
        /* Bucket sort of candidates by character number: */
        /* Later {wuse[ich]} is the index of some word cand in {this.wds} that uses character {ich}, or -1. */
        int[] wuse = new int[this.nch]; 
        for (int ich = 0; ich < this.nch; ich++) { wuse[ich] = -1; }
        
        /* Cannot remove colliding word cands right away because it changes the indices. */
        /* So sets their scores to 0, and removes them later. */
        for (int iwd = 0; iwd < this.nwd; iwd++) {
            assert this.ncl == this.nwd;
            WordCand Wi = this.wds[iwd];
            if (debug) { dbgW(2,"trying to keep word cand", iwd, Wi); }
            /* Save words that collide with {W} in {wcol[0..W.nel-1]}, note max score {smax}: */
            double smax = -1;
            for (int iel = 0; iel < Wi.els.length; iel++) {
                int ich = Wi.els[iel].ich;
                if (debug) { System.err.printf("    checking elem "); prtL(Wi.els[iel]); }
                int jwd = wuse[ich]; 
                if (jwd >= 0) {
                    WordCand Wj = this.wds[jwd];
                    if (debug) { dbgW(0," collided with word cand", jwd, Wj); }
                    if ( Wj.score > smax) { smax = Wj.score; }
                } else { 
                    System.err.printf("\n");
                }
            }
            /* Check who wins: */
            if (smax >= Wi.score) {
                /* Word {iwd} loses, remove it: */
                if (debug) { dbgS(4,"lost, will be removed"); }
                Wi.score = -Inf;
            } else {
                /* Word {iwd} wins, remove the rest: */
                if (debug) { dbgS(4,"won, removing collisions"); }
                for (int iel = 0; iel < Wi.els.length; iel++) {
                    int ich = Wi.els[iel].ich;
                    int jwd = wuse[ich];
                    if (jwd >= 0) { 
                        if (jwd >= iwd) { System.err.printf(" ** %d:%d %d **\n", iel, jwd, iwd); }
                        assert jwd < iwd;
                        WordCand Wj = this.wds[jwd];
                        if (debug) { dbgW(6,"will remove word cand", jwd, Wj); }
                        /* Clear {Wj} from {wuse}: */
                        for (int jel = 0; jel < Wj.els.length; jel++) {
                            int jch = Wj.els[jel].ich;
                            assert wuse[jch] == jwd;
                            wuse[jch] = -1;
                        }
                        /* Flag word {jwd} for removal: */
                        Wj.score = -Inf;
                    }
                    assert wuse[ich] < 0;
                    wuse[ich] = iwd;
                }
            }
        }

        /* Now scan the words again and remove those with insufficient score. */
        /* Scan backwards so that removals do not mess up the scan. */
        assert this.skmin > 0;  /* Word cands with score 0 must be removed. */
        for (int iwd = this.nwd - 1; iwd >= 0; iwd--) {
            assert this.ncl == this.nwd;
            WordCand Wi = this.wds[iwd];
            if (Wi.score < this.skmin) {
                if (debug) { dbgW(0,"removing word cand", iwd, Wi); }
                this.remove_closed_word_cand(iwd);
            } else {
                if (debug) { dbgW(0,"keeping word cand", iwd, Wi); }
            }
        }
        if (debug) { System.err.printf("left %d word cands\n", this.nwd); }
    }
    
    /* === GEOMETRIC PRE-FILTER =============================================== */
        
    /* ----------------------------------------------------------------------
    Quick check of the box size against limits implied by {hBmin,hBmax,rBmin,rBmax}. */
    private boolean box_seems_usable(int [][] box) {
        
        /* raw box width and height: */
        double wB = L.box[0][1] - L.box[1][1] + 1; */ 
        double hB = box[1][1] - box[1][0] + 1; /* Box height. */
        
        /* Expects the geometric filter to be working: */
        assert hB <= this.hBmax; 
        assert hB >= this.hBmin;
        
        /* Require the box height to be in the range allowed by the specified body height range: */
        double hBmin = this.hmin; /* Min box height, assuming no ascenders and descenders. */
        double hBmax = this.hmax*(1 + this.fdes + this.fasc); /* Max box height, with *scenders. */
        if (hB < hBmin) { if (debug) { dbgD2(2,"!hBmin",hB,hBmin); } return false; }
        if (hB > hBmax) { if (debug) { dbgD2(2,"!hBmax",hB,hBmax); } return false; }
        
        /* !!! Use {hB} modified by {(1 + this.fdes + this.fasc)} instead of {hmin.hmax}. !!! */
        double wBmin = this.hmin*this.rmin;  /* Min width if true letter. */
        double wBmax = this.hmax*this.rmax;  /* Max width if true letter. */ 
        if (wB < wBmin) { if (debug) { dbgD2(2,"!wBmin",wB,wBmin); } return false; }
        if (wB > wBmax) { if (debug) { dbgD2(2,"!wBmax",wB,wBmax); } return false; }

        return true;
    }

    /* === PRIVATE METHODS =============================================== */
        
    /* ----------------------------------------------------------------------
    Quick check of whether a character with bounding rectangle {box} can
    be appended to a word candidate {W}; namely, if the left and right
    edges are strictly increasing. It is assumed that appending any word
    element with this box to {W} would make its score to be zero
    forever. */
    private boolean box_seems_appendable(WordCand W, int[][] box) {
        WordCandElem L1 = W.els[W.els.length-1]; /* Last element. */
        assert box[0][0] >= L1.box[0][0];   /* Input boxes must be sorted by {ixmin}. */

        /* Chech for {y} range overlap. Failures are too common, do not debug: */
        if (! this.ranges_overlap(L1.box[1], box[1])) { return false; }
        
        /* Compute maximum possible gap: */
        /* !!! Compute it !!! */

        /* Check for strictly increasing order of {ixmax} and {ixmin}: */
        double dxleft = box[0][0] - L1.box[0][0];
        double dxrite = box[0][1] - L1.box[0][1];
        if (dxleft < 1) { if (debug) { dbgD(6,"!dxleft",dxleft); } return false; }
        if (dxrite < 1) { if (debug) { dbgD(6,"!dxrite",dxrite); } return false; }

        return true;
    }
    
    /* !!! The procedures {create_word_cand} and {append_element} should return {null} if the score is {-oo}. !!! */
    
    /* ----------------------------------------------------------------------
    Returns {true} iff the integer coordinate ranges {A[0]..A[1]} and {B[0]..B[1]} 
    overlap by at least one pixel. */
    private static boolean ranges_overlap(int[] A, int[] B) {
        double lo = Math.max(A[0], B[0]);
        double hi = Math.min(A[1], B[1]);
        double ov = hi - lo + 1; /* Length of intersection of ranges. */
        return ov >= 1;
    }

    /* ----------------------------------------------------------------------
    Returns true iff the last element added to {W} has little or no {x} overlap
    with the previous element. */
    private static boolean good_cand_extension(WordCand W) {
        if (W.els.length < 2) { return false; }
        WordCandElem L0 = W.els[W.els.length-2]; /* Next to last elem. */
        WordCandElem L1 = W.els[W.els.length-1]; /* Last elem. */
        double w0 = L0.box[0][1] - L0.box[0][0] + 1; /* Width of {L0}. */
        double w1 = L1.box[0][1] - L1.box[0][0] + 1; /* Width of {L1}. */
        double wi = L0.box[0][1] - L1.box[0][0] + 1; /* Width of intersection. */
        return wi <= 0.5*Math.min(w0,w1);
    }

    /* ----------------------------------------------------------------------
    Append the element {L} to a clone of the word cand {W}, provided the
    result has score {semin} or more, and returns that score. Otherwise
    returns {null}.  Assumes that {box_seems_appendable(W, L.box)} is true. */
    public WordCand try_to_append_elem(WordCand W, WordCandElem L) {
        WordCand W1 = this.append_element(W, L);
        if (W1.score >= this.semin) { 
            return W1;
        } else {
            if (debug) { dbgD(6,"!semin", W1.score); }
            return null;
        }
    }
     
    /* ----------------------------------------------------------------------
    Creates a word candidate with a single element {L}. */
    private WordCand create_word_cand(WordCandElem L) {
        double = first_elem_score(L);
        if (score == -Inf) { if (debug) { dbgS(6,"!first_elem_score"); } }
        WordCand W = new WordCand(null, L);
        W.score = score;
        return W;
    }
    
    /* ----------------------------------------------------------------------
    Creates a new word candidate resulting from appending element {L} to
    candidate {W}. The probabilities and scores are computed
    incrementally from those of {W}. Assumes {box_seems_ok(L.box)} and
    {box_seems_appendable(W,L.box)} is true, and {W.score} is
    defined. */
    private WordCand append_element(WordCand W, WordCandElem L) {
        
        score = W.score;
        if (score == -Inf) { 
            if (debug) { dbgS(6,"!Wscore"); }
        } else {
            int nel = W.els.length;
            assert nel >= 1;

            WordCandElem L1 =  W.els[nel-1]; /* Last element. */
            if (nel == 1) {
                score = score + second_elem_score_term(L1, L);
                if (score == -Inf) { if (debug) { dbgS(6,"!second_elem_score_term"); } }
            } else {
                WordCandElem L0 = W.els[nel-2]; /* Next-to-last element. */
                score = score + general_elem_score_term(L0, L1, L);
                if (score == -Inf) { if (debug) { dbgS(6,"!general_elem_score_term"); }; }
            }
        }

        WordCand W1 = new WordCand(W.els, L);
        W1.score = score;
        assert !Double.isNaN(W1.score);
        return W1;
    }
    
    /* ----------------------------------------------------------------------
    Computes the score for a word candidate single
    {L} of a word cand. Assumes that {box_seems_ok(L.box)} is true. */
    private double first_elem_score(WordCandElem L) {

        /* Define FLTW(L) = "the first letter of a true word". */
        /* BOK(L) = "{box_seems_ok(L.box)} is true" */
        
        // Define FLTW = "first letter of a true word".
        // Let PrT = Prob(L.box,L.des,L.asc is FLTW).
        // Let PrT0 = Prob(L is FLTW) a priori.
        // Let PrF0 = Prob(L is FLTW) a priori.
        // Let PrLT = Prob(L.box,L.des,L.asc | L is FLTW).
        // Let PrLF = Prob(L.box,L.des,L.asc | L is not FLTW).

        // Then PrT = PrT0*PrLT/(PrT0*PrLT + PrF0*PrLF).


        double logPrT0 = compute_logPrT0();  /* {log(Prob(FLTW(L)))} a priori. */
        double logPrLT = compute_logPrLT(L); /* {log(Prob(L.* | FLTW(L))} */
        
        double logPrF0 = compute_logPrF0();  /* {log(Prob(~FLTW(L)))} a priori. */
        double logPrLF = compute_logPLF(L);  /* {log(Prob(L.* | ~FLTW(L))} */
        
        double logPrT = log_bayes(logPrT0 + logPrLT, logPrF0 + logPrLF);
        
        return logPrT;
    }
    
    /* ----------------------------------------------------------------------
    Computes the score increment for appending element {L} to the candidate {W},
    whose single element is {L1}.  Assumes {box_seems_ok(L.box)} and
    {box_seems_appendable(W,L.box)} are true. */
    private double second_elem_score_term(L1, L) {
    
        /* All these probabilities restricted to elements {box_seems_ok,box_seems_appendable}. */
    
       // Define TWP(W) = "W is a true word prefix".
       // Define NXL(W,L) = "L is the next letter of the true word W is a prefix of".
       
       
       // Let PrT1 = Prob(TWP(L1+L) | L1.* & L.*).
       // Let PrTx = Prob(NXL(L1,L) |  & TWP(W) & L passed tests).

       // Then PrT1 = PrT*PrTx.

       // Let PrLT = Prob(L.box,L.asc,L.des | TWP(L) & NXL(W,L)).
       // Let PrT0 = Prob(NXL(W,L) | TWP(L)) a priori.

       // Let PrLF = Prob(L.box,L.asc,L.des | TWP(L) & ~NXL(W)).
       // Let PrF0 = Prob(~NXL(W) | TWP(L)) a priori.

       // Then PrTx = PrLT*PrT0/(PrLT*PrT0 + PrLF*PrF0)
       
       
    }
    
    
    /* ----------------------------------------------------------------------
    Computes the score increment for appending element {L} to the candidate {W},
    whose last elem is {L1}, under the random box hypothesis. {L1} may be null.
    Assumes {box_seems_ok(L.box)} is true, and (if {L1} is not null) also
    {box_seems_appendable(W,L.box)} is true. */
    private double elem_handicap(WordCandElem L1, WordCandElem L) {
    
        ??? Must compute uniform distribution of boxes that pass criteria. ???

        double score = 0; /* The result. */

        /* The body height of {L} is required to be in the specified 
        range but is assumed to be equally likely within that range,
        and no score is given for it: */
        double h = this.body_height(L1); /* Quantiz. variance is {2*V} */ 
        double hminT = this.hmin;  /* Min body height if true letter. */
        double hmaxT = this.hmax;  /* Max body height if true letter. */ 
        /* Compute ratio {hBscale} of box height to body height: */
        double hBscale = 1.0 + (L.asc?this.fasc:0) + (L.des?this.fdes:0);
        double hminF = this.hBmin/hBscale; /* Min body height if random box. */
        double hmaxF = this.hBmax; /* Max body height if random box. */ 
        
        if (h < this.hmin) { if (debug) { dbgD(6,"!hmin",h); } return -Inf; }
        if (h > this.hmax) { if (debug) { dbgD(6,"!hmax",h); } return -Inf; }

        /* Add a term based on the aspect ratio of the body: */
        /* Since the geometric filter enforces the  */
        double w = L.box[0][1] - L.box[1][1] + 1; /* Quantiz. variance is {2*V} */ 
        double wminT = h*this.rmin;  /* Min width if true letter. */
        double wmaxT = h*this.rmax;  /* Max width if true letter. */ 
        double wminF = h*this.rmin;  /* Min width if random box. */
        double wmaxF = h*this.rmax;  /* Max width if random box. */ 
        score += log_prob_interval(w/SQRT2, wmin/SQRT2, wmax/SQRT2);
        if (score == -Inf) { if (debug) { dbgD3(6,"!w1",w,wmin,wmax); } return -Inf; }
        
        assert !Double.isNaN(score);
        return score;
    }
    
    /* ----------------------------------------------------------------------
    Computes the logarithm of the probability factor associated to the
    two consecutive elements {L0,L1}. It is the product of three
    bounded-range distributions on the horizontal separation of the two
    boxes, on the slope of the midline, and on the difference between
    the slopes of the topline and baseline; where those three refernce
    lines are defined by the reference points of the two elements. */
    private double elem_pair_score(WordCandElem L0, WordCandElem L1) {


        /* ----------------------------------------------------------------------
        Quick check of whether element {L} can be appended to {W}, based on
        whether the horizontal spacing and the slope of the nominal midline
        are within the allowed ranges. Must be called only if
        {box_seems_appendable(W,L.box)} is true. It is assumed that appending
        any word element with this box to {W} would make its score to be zero
        forever. */
        private boolean element_seems_appendable(WordCand W, WordCandElem L) {
        WordCandElem L1 = W.els[W.els.length-1]; /* Last element. */
        /* Sanity checks: */
        assert L.box[0][0] > L1.box[0][0];  /* The {ixmin}s must be strictly increasing. */
        assert L.box[0][1] > L1.box[0][1];  /* The {ixmax}s must be strictly increasing. */
        /* Convert relative {x} spacing range to absolute, check overlap/gap: */
        double h = WordCandElem.mean_body_height(L1,L);
        double gx = L.box[0][0] - L1.box[0][1] - 1; 
        if (gx <= h*this.dxmin) { if (debug) { dbgD2(6,"!dxmin",gx,h); } return false; }
        if (gx >= h*this.dxmax) { if (debug) { dbgD2(6,"!dxmax",gx,h); } return false; }
        /* Check slope of midline if {L} were to be appended: */
        double[] ML = WordCandElem.ref_line(L1,L,0.5);
        if (Math.abs(ML[1]) >= this.MLsmax) { if (debug) { dbgD(6,"!MLsmax",ML[1]); } return false; }
        return true;
        }


        double logPr2 = 0; /* The result. */
        /* Check again body height of {L1}: */
        double h1 = this.body_height(L1); /* Quantiz. variance is {2*V} */ 
        if (h1 < this.hmin) { if (debug) { dbgD(6,"!hmin",h1); } return -Inf; }
        if (h1 > this.hmax) { if (debug) { dbgD(6,"!hmax",h1); } return -Inf; }
        /* Add the body width term for {L1}: */
        double w1 = L1.box[0][1] - L1.box[1][1] + 1; /* Quantiz. variance is {2*V} */ 
        double w1min = h1*this.rmin;  
        double w1max = h1*this.rmax;  
        logPr2 += log_prob_interval(w1/SQRT2, w1min/SQRT2, w1max/SQRT2);
        if (logPr2 == -Inf) { if (debug) { dbgD3(6,"!w1",w1,w1min,w1max); } return -Inf; }
        /* Add the horizontal separation term: */
        double h01 = this.mean_body_height(L0, L1);
        double gx = L1.box[0][0] - L0.box[0][1] - 1; /* Quantiz. variance is {2*V} */
        double gxmin = this.dxmin*h01;
        double gxmax = this.dxmax*h01;
        if ((gx <= gxmin) || (gx >= gxmax)) { return -Inf; }
        logPr2 += log_prob_interval(gx/SQRT2, gxmin/SQRT2, gxmax/SQRT2);
        if (logPr2 == -Inf) { if (debug) { dbgD2(6,"!gx",gx,h01); } return -Inf; }
        /* Get the {y} difference in midpoints: */
        double dy = (this.yref(L1,0) + this.yref(L1,1))/2 - (this.yref(L0,0) + this.yref(L0,1))/2;/* Quantiz. variance is {V} */ 
        double dx = L1.xctr - L0.xctr; /* Quantiz. variance is {V} */ 
        logPr2 += log_prob_interval(dy, -this.MLsmax*dx, +this.MLsmax*dx);
        if (logPr2 == -Inf) { if (debug) { dbgD2(6,"!dy/dx",dy,dx); } return -Inf; }
        /* Get the difference in body heights: */
        double dh = (this.yref(L1,0) - this.yref(L1,1)) - (this.yref(L0,0) - this.yref(L0,1)); /* Quantiz. variance is {4*V} */ 
        logPr2 += log_prob_interval(dh/2, -this.TBsmax*dx/2, +this.TBsmax*dx/2);
        if (logPr2 == -Inf) { if (debug) { dbgD2(6,"!dh/dx",dh,dx); } return -Inf; }
        assert !Double.isNaN(score);
        return logPr2;
    }
    
    /* ----------------------------------------------------------------------
    Computes the logarithm of the probability factor associated to the
    three consecutive elements {L0,L1,L2}. It is the product of two
    bounded-range distributions on {L1.ybas} and {L1.ytop},
    relative to the topline {TL} and baseline {BL} defined by {L0} and {L2}
    at abscissa {L1.xctr}.  The probability is zero if {TL} is closer to 
    {L1.yasc} or {L1.ybas} than to {L1.ytop}, and also if {BL} is closer to {L1.ydes} or {L1.ytop}
    than to {L1.ybas}. */
    private double elem_triple_score(WordCandElem L0, WordCandElem L1, WordCandElem L2) {
        double logPr3 = 0; /* The result. */
        double h1 = this.body_height(L1); /* Body height of {L1}. */ 
        double ymid1 = this.yref(L1,0.5); /* Body midline height of {L1}. */ 
        /* Valid {y} range for the baseline at {L1.xctr}: */
        double yminBL1 = ymid1;
        double ymaxBL1 = this.yref(L1,0) + this.fdes*h1/2;
        logPr3 += elem_alignment_score(L0, L1, L2, 0, yminBL1, ymaxBL1);
        if (logPr3 == -Inf) { return -Inf; }
        /* Valid {y} range for the topline at {L1.xctr}: */
        double yminTL1 = this.yref(L1,1) - this.fasc*h1/2;
        double ymaxTL1 = ymid1;
        logPr3 += elem_alignment_score(L0, L1, L2, 1, yminTL1, ymaxTL1);
        assert !Double.isNaN(score);
        return logPr3;
    }
    
    /* ----------------------------------------------------------------------
    Computes the logarithm of the probability factor {Pr} associated to selected
    reference points of three consecutive elements {L0,L1,L2}.  The reference
    point is indicated by the {pos} argument (0 for base point, 1 for top point).
    The factor {Pr} is a bounded-range distribution on {L1}'s reference ordinate {L1.yref},
    relative to the corresponding reference line defined by {L0} and {L2}
    and evaluated at abscissa {L1.xctr}.  The probability is zero if {L1.yref}
    is outside the range {(ymin _ ymax)}. */
    private double elem_alignment_score (
        WordCandElem L0, 
        WordCandElem L1, 
        WordCandElem L2, 
        double pos, 
        double ymin, 
        double ymax
    ) {
        /* Get the reference line equation: */
        double[] RL = WordCandElem.ref_line(L0,L2, pos);
        /* Evaluate it at the center abscissa of {L1}: */
        double y1 = RL[0] + (L1.xctr - L0.xctr)*RL[1];
        double logPr = log_prob_interval(y1, ymin, ymax);
        if (logPr == -Inf) { if (debug) { dbgD3(6,"!L0:y1:L2",y1,ymin,ymax); } return -Inf; }
        return logPr;
    }
    
    compute_logPrT0()
    compute_logPrLT(L)
    compute_logPrF0()
    compute_logPLF(L)
    
        /* Compute the range {hBminF..hBmaxF} of false letters that pass the criteria: */
        int 

        // ??? Must account for prob of starting  ??? 

        /* The body height of {L} is required to be in the specified: */
        double h = this.body_height(L1); /* Quantiz. variance is {2*V} */ 
        double hminT = this.hmin;  /* Min body height if true letter. */
        double hmaxT = this.hmax;  /* Max body height if true letter. */ 
        double hBscale = 1.0 + (L.asc?this.fasc:0) + (L.des?this.fdes:0);
        
        ???
        /* Compute ratio {hBscale} of box height to body height: */
        double hBscale = 1.0 + (L.asc?this.fasc:0) + (L.des?this.fdes:0);
        double hminF = this.hBmin/hBscale; /* Min body height if random box. */
        double hmaxF = this.hBmax; /* Max body height if random box. */ 
        
        if (h < this.hmin) { if (debug) { dbgD(6,"!hmin",h); } return -Inf; }
        if (h > this.hmax) { if (debug) { dbgD(6,"!hmax",h); } return -Inf; }

        /* Add a term based on the aspect ratio of the body: */
        /* Since the geometric filter enforces the  */
        double w = L.box[0][1] - L.box[1][1] + 1; /* Quantiz. variance is {2*V} */ 
        double wminT = h*this.rmin;  /* Min width if true letter. */
        double wmaxT = h*this.rmax;  /* Max width if true letter. */ 
        double wminF = h*this.rmin;  /* Min width if random box. */
        double wmaxF = h*this.rmax;  /* Max width if random box. */ 
        score += log_prob_interval(w/SQRT2, wmin/SQRT2, wmax/SQRT2);
        if (score == -Inf) { if (debug) { dbgD3(6,"!w1",w,wmin,wmax); } return -Inf; }
        
        assert !Double.isNaN(score);
        return score;
    }
    

    /* ----------------------------------------------------------------------
    Log of a probability distribution for a variable {z} bounded to the interval {(zmin _ zmax)}.
    Assumes {z} has the same quantization variance as a coordinate of a character bbox. */
    private static double log_prob_interval(double z, double zmin, double zmax) {
        if ((z <= zmin) || (z >= zmax)) { return -Inf; }
        double r = 2*(z - zmin)/(zmax - zmin) - 1;
        /* Assuming the prob distribution {Pr(z) = (1-r^2)/((2/3)*(zmax - zmin))}.  */
        return Math.log(1.5*(1 - r*r)/(zmax - zmin));
    }

    /* ----------------------------------------------------------------------
    Computes the field {W1.logPrF} for the word cand {W1} that would result from 
    appending element {L} to {W}.  Assumes  {box_seems_appendable(W,L.box)} and
    {element_seems_appendable(W,L)} are true, and {W.logPrF} is defined. */
    private double update_logPrF(WordCand W, WordCandElem L) {
        /* Let {W.PrF} denote {exp(W.logPrF)}. In this implementation,
        {W.prF} is defined as the product of two terms: the default
        probability {1 - this.probTrue} of a candidate to be false, and
        the product of a uniform probability distribution over all pairs
        of reals {L.iymin,L.iymax} with {0 \leq L.iymin}, {L.iymax \leq
        htot-1}, and {hmin \leq (L.iymax-L.iymin+1) \leq
        hmax*(1+fdes+fasc)}, for each element {L} in {W}. Therefore {W1.PrF} can
        be computed incrementally from {W.PrF} by appending one more
        factor to this product. */
        
        /* !!! The increment {logPrUnif} is a constant and should be computed just once. !!! */
        if (W.score == -Inf) { return -Inf; }
        assert L.box[1][0] >= 0;
        assert L.box[1][1] + 1 <= this.htot;
        int hB = L.box[1][1] - L.box[1][0] + 1;
        double hBmin = this.hmin;
        double hBmax = this.hmax*(1 + this.fasc + this.fdes);
        assert (hB >= hBmin) && (hB <= hBmax);
        double hBavg = (hBmax + hBmin)/2;
        double logPrUnif = - (Math.log(this.htot - hBavg) + Math.log(hBmax - hBmin)); 
        return W.logPrF + logPrUnif;
    }
   
    /* === WORD ELEMENT TOOLS =============================================== */
    
    /* ----------------------------------------------------------------------
    Estimated body size from baseline to topline (excluding ascenders and descenders): */
    public static double body_height(WordCandElem L) {
        /* Compute total box height {hB}: */
        double hB = L.box[1][1] - L.box[1][0] + 1;
        /* Compute ratio {hBscale} of box height to body height: */
        double hBscale = 1.0 + (L.asc?this.fasc:0) + (L.des?this.fdes:0);
        return hB/hBscale;
    }

    /* ----------------------------------------------------------------------
    Estimated {y} coordinate of reference point of {L} selected by {pos}: {L.ybas} if {pos=0},
    {L.ymid} if {pos=0.5}, {L.ytop} if {pos=1}. Specify {pos=-this.fdes} to get {L.ydes}
    and {pos=1+this.fasc} to get {L.asc}. */
    public static double yref(WordCandElem L) {
        double ysize = this.body_height(L);
        double ybas = L.box[1][1] + 1 - (L.des ? this.fdes*ysize : 0);
        double ytop = L.box[1][0] + (L.asc ? this.fasc*ysize :0);
        return (1-pos)*ybas + pos*ytop;
    }

    /* ----------------------------------------------------------------------
    Computes the coefficients {RL} of a reference line from two word
    candidate elements {L0,L1}. The parameter {pos} selects the
    reference line, by linear interpolation between the base line ({pos
    = 0}) and the topline ({pos=1}). The line equation will be {y =
    RL[0] + (x-x0)*RL[1]} where {x0} is {L0.xctr}. The coefficients are
    determined by fitting a straight line to the appropriate reference
    points of {L0} and {L1}. If {L1} is null, the line is horizontal. */
    public static double[] ref_line(WordCandElem L0, WordCandElem L1, double pos) {
        double x0 = L0.xctr;
        double y0 = this.yref(L0, pos);  /* Reference {y} of {L0}. */
        double slope = 0;
        if (L1 != null) {
            double x1 = L1.xctr;
            double y1 = this.yref(L1, pos);   /* Reference {y} of {L1}. */
            assert x0 < x1;
            slope = (y1 - y0)/(x1 - x0);
        }
        return new double[] { y0, slope };
    }
    
    /* ----------------------------------------------------------------------
    Computes the mean body height of the elements {L0,L1}, the mean {y} distance
    between their base and top reference points: */
    public static double mean_body_height(WordCandElem L0, WordCandElem L1) {
        double h0 = this.body_height(L0);
        double h1 = this.body_height(L1);
        return (h0 + h1)/2;
    }

    /* ----------------------------------------------------------------------
    Computes the horizontal spacing betwen the right edge of {L0} and the left edge of {L1},
    divided by {mean_body_height(L0,L1)}. */
    public static double relative_horizontal_spacing(WordCandElem L0, WordCandElem L1) {
        double h01 = this.mean_body_height(L0,L1);
        double dx = L1.box[0][0] - L0.box[0][1] - 1;
        return dx/h01;
    }

    /* === WORD CANDIDATE TOOLS ============================================= */
     
    /* ----------------------------------------------------------------------
    Computes the mean body height of the elements of a word candidate
    {W}, including {L} if not null: */
    public static double mean_body_height(WordCand W, WordCandElem L) {
        int nel = W.els.length;
        assert nel > 0;
        double hsum = 0;
        for (int iel = 0; iel < W.els.length; iel++) {
            hsum += this.body_height(W.els[iel]);
        }
        if (L != null) {
            hsum += this.body_height(L);
            nel++;
        }
        return hsum/nel;
    }
    
    /* ----------------------------------------------------------------------
    Computes the coefficients {RL} of a word candidate's {W} mean reference
    line. The parameter {pos} selects the reference line, by linear
    interpolation between the base line ({pos = 0}) and the topline
    ({pos=1}). The line equation will be {y = RL[0] + (x-x0)*RL[1]}
    where {x0} is the center {x} of the first element in {W}. If {W} has
    only one element, the line is horizontal. The coefficients are
    determined by fitting a straight line to the appropriate reference
    points of the elements, by some numerical method. */
    public static double[] mean_ref_line(WordCand W, double pos) {
        /* Currently connects the reference points of the first and last elements. */
        /* !!! Should try least squares. !!! */
        assert W.els.length >= 1;
        WordCandElem L0 = W.els[0];
        WordCandElem L1 = (W.els.length == 1 ? null : W.els[W.els.length-1]);
        return this.ref_line(L0, L1, pos);
    }

    /* === CANDIDATE SET MANAGEMENT ======================================== */
    
    /* ----------------------------------------------------------------------
    Adds a new open candidate {W} at the end of the word candidate set. */  
    private void add_open_word_cand(WordCand W) {
        if (this.nwd >= this.wds.length) { 
            this.wds = java.util.Arrays.copyOf(this.wds, 2*this.nwd+1);
        }
        this.wds[this.nwd] = W; this.nwd++;
    }
    
    /* ----------------------------------------------------------------------
    Closes an open candidate {this.wds[iwd]}, demotes the first open candidate
    to its place. */  
    private void close_word_cand(int iwd) {
        WordCand W = this.wds[iwd];
        assert (iwd >= this.ncl) && (iwd < this.nwd);
        this.wds[iwd] = this.wds[this.ncl]; 
        this.wds[this.ncl] = W;
        this.ncl++;
    }
    
    /* ----------------------------------------------------------------------
    Removes the open candidate {this.wds[iwd]} from the word candidate set.
    Promotes the last open candidate to its place. */  
    private void remove_open_word_cand(int iwd) {
        assert (iwd >= this.ncl) && (iwd < this.nwd);
        this.wds[iwd] = this.wds[this.nwd-1]; 
        this.nwd--;
    }
    
    /* ----------------------------------------------------------------------
    Removes the closed candidate {this.wds[iwd]} from the word candidate set.
    Promotes the last closed candidate to its place, and the last open candidate
    (if any) to the front of the open candidate set. */  
    private void remove_closed_word_cand(int iwd) {
        assert (iwd >= 0) && (iwd < this.ncl);
        this.wds[iwd] = this.wds[this.ncl-1];
        if (this.ncl < this.nwd) {
            this.wds[this.ncl-1] = this.wds[this.nwd-1];
        }
        this.nwd--;
        this.ncl--;
    }
    
    /* === DEBUGGING ======================================================== */
    
    /* ----------------------------------------------------------------------
    Prints {msg} then the index {ich} and the rectangle {box}. */
    public static void dbgb(int ind, String msg, int ich, int[][] box) {
        prtSP(ind);
        System.err.printf(
            "%s %d [[%d %d][%d %d]]\n", 
            msg, ich, box[0][0], box[0][1], box[1][0], box[1][1]
        );
    }
    
    /* ----------------------------------------------------------------------
    Prints {msg} then word cand elem {L}. */
    public static void dbgL(int ind, String msg, WordCandElem L) {
        prtSP(ind);
        System.err.printf("%s ", msg);
        prtL(L);
        System.err.printf (" = [[%d %d][%d %d]]", L.box[0][0], L.box[0][1], L.box[1][0], L.box[1][1]);
        System.err.printf(" XC:%.1f  TL:%.1f BL:%.1f", L.xctr, L.ytop(), L.ybas());
        System.err.printf("\n");
    }
    
    /* ----------------------------------------------------------------------
    Prints {msg} then the index {iwd}, the word cand {W}. */
    public static void dbgW(int ind, String msg, int iwd, WordCand W) {
        prtSP(ind);
        System.err.printf("%s %d ", msg, iwd);
        String sep = "(";
        for (int iel = 0; iel < W.els.length; iel++) { 
            System.err.printf("%s", sep);
            prtL(W.els[iel]);
            sep = ",";
        }
        System.err.printf(") SC: %+9.4f\n", W.score);
    }

    /* ----------------------------------------------------------------------
    Prints {msg} and the double {v}. */
    public static void dbgD(int ind, String msg, double v) {
        prtSP(ind);
        System.err.printf("%s %+10.6f\n", msg, v);
    }
 
    /* ----------------------------------------------------------------------
    Prints {msg} and the doubles {u,v}. */
    public static void dbgD2(int ind, String msg, double u, double v) {
        prtSP(ind);
        System.err.printf("%s %+10.6f %+10.6f\n", msg, u, v);
    }
 
    /* ----------------------------------------------------------------------
    Prints {msg} and the doubles {t,u,v}. */
    public static void dbgD3(int ind, String msg, double t, double u, double v) {
        prtSP(ind);
        System.err.printf("%s %+10.6f %+10.6f %+10.6f\n", msg, t, u, v);
    }
 
    /* ----------------------------------------------------------------------
    Prints {msg}. */
    public static void dbgS(int ind, String msg) {
        prtSP(ind);
        System.err.printf("%s\n", msg);
    }
        
    /* ----------------------------------------------------------------------
    Prints the word cand elem {L}, no indent and no newline. */
    public static void prtL(WordCandElem L) {
        System.err.printf("%d:%d%d", L.ich, (L.asc?0:1), (L.des?0:1));  
    }
        
    /* ----------------------------------------------------------------------
    Prints {ind} spaces. */
    public static void prtSP(int ind) {
        if (ind > 0) { System.err.printf("%"+ind+"s",""); }
    }
}