#ifndef pz_match_H
#define pz_match_H

/* Chain distance by dynamic programming. */
/* Last edited on 2015-01-20 16:49:24 by stolfilocal */

#include <pz_types.h>
#include <pz_segment.h>

typedef i2_t pz_match_pair;

typedef i2_vec_t pz_match_t;
  /* A {pz_match_t} is a `monotone pairing' between two arbitrary sequences
    {a[0..NA-1]} and {b[0..NB-1]}, defined as a list of pairs
    {mt[0..m-1] == (r[0],s[0]),.. (r[m-1],s[m-1])} such that

    | r[0] >= 0, s[0] >= 0;
    |
    | r[m-1] <= NA-1, s[m-1] <= NB-1;
    |
    | r[k+1] == r[k]  ) || (  r[k+1] == r[k]+1,
    | s[k+1] == s[k]  ) || (  s[k+1] == s[k]+1,
    |
    | mt[k+1] != mt[k]

    for all {k}.  By definition, element {a[r[k]] == a[mt[k][0]]} is
    paired with sample {b[s[k]] == b[mt[k][1]]}, for all {k}.

    We say that a pair {(r1,s1)} `precedes' another pair {(r2,s2)} if
    {r1 <= r2} and {s1 <= s2}, and at least one of these inequalities
    is strict.  In that case we also say that {(r2,s2)} `follows'
    {(r1,s1)}.  Note that, in any monotone pairing {mt}, {mt[i]}
    precedes {mt[j]} iff {i < j}.

    Two consecutive pairs {mt[k]==(r[k],s[k])} and
    {mt[k+1]==(r[k+1],s[k+1])} are said to be a `step' of the match.
    The step is said to be `full' if both {r} and {s} increase by 1,
    and `half' if only one of them increases.  A `perfect pairing' is
    one that uses only full steps.

    The pairing is said to be `complete' if {mt[0] == (0,0)} and
    {mt[m-1] == (NA-1,NB-1)}; and `incomplete' otherwise.

  */

#define pz_match_new i2_vec_new
#define pz_match_expand i2_vec_expand

/* MINIMUM-COST PAIRINGS */

/*
  The procedures in this section take as argument an arbitrary `step
  cost function' {step_cost_fn} (which may return negative values).

  The `total cost' of a pairing {mt} is the sum of {step_cost_fn(s[k])}
  over all steps {S[k] == mt[k]->mt[k+1]} of {mt}.

  The `step count' of a match is defined as the number of
  half-steps plus twice the number of full steps.  It is also the
  number of samples matched in the two ckains, minus two; thus any
  complete pairing has step count {NA+NB-2}.

  The optimization procedures also use internally a {CostMatrix} {rCost}
  with at least {NA} rowns and {NB} columns, which may be preallocated
  and given by the client.  The procedures will (re)allocate said
  matrix if it is NULL or too small.

*/

typedef double pz_match_step_function_t ( pz_match_pair *p1, pz_match_pair *p2 );

typedef double_vec_t pz_match_cost_matrix_t;  /* Work array used by DP algorithm */

void pz_match_find_best
  ( int NA,
    int NB,
    pz_match_step_function_t step_cost_fn,
    pz_match_t *mt, /* (OUT) */
    double *totCost, /* (OUT) */
    pz_match_cost_matrix_t *rCost  /* (WORK) */
  );
  /*
    Returns in {mt} a complete monotone pairing between
    two strings with {NA} and {NB} elements, with mimimum
    total cost (which is returned in {totCost}. */

void pz_match_refine
  ( int NA,
    int NB,
    pz_match_step_function_t step_cost_fn,
    pz_match_t *mtOld,
    int maxAdjust,
    pz_match_t *mt, /* (OUT) */
    double *totCost, /* (OUT) */
    double_vec_t *minTotCost, /* (OUT) */
    pz_match_cost_matrix_t *rCost  /* (WORK) */
  );
  /*
    Computes a partial monotone pairing, between two arbitrary
    sequences with {NA} and {NB} elements, that is not too far from {mtOld}
    and has minimum total cost.

    The `input pairing' {mtOld} is an arbitrary (but non-empty)
    pairing between the two sequences, which is to be improved by the
    procedure.

    On output, {mt} will point to the pairing of minimum total cost
    found by the procedure, within certain constraints (or to NULL if
    the constraints cannot be satisfied). The total cost of {mt}
    will be returned in {totCost}.

    Let's define the `initial pairs' as consisting of all pairs of the
    input pairing {mtOld}, plus all possible index pairs that precede
    its first pair, plus all possible index pairs that follow its last
    pair.  One constraint on the refined pairing {mt} is that is
    that every pair {mt[k]} must lie within {maxAdjust} distance
    from some initial pair, in the Manhattan metric.  In other words,
    the `valid pairs' for the refined pairing are the union of all
    sets of the form {{ (r+dr,s+ds) : |dr| <= maxAdjust, |ds| <=
    maxAdjust }} where {(r,s)} is an initial pair.

    A second constraint is that the refined pairing {mt} will
    contain exactly one pair {(ro,so)} such that {ro+so == rc+sc},
    where {(rc,sc)} is the middle pair of the input pairing {mtOld}.
    (This constraint is meant to erclude partial pairings that cover
    only short initial or final segments of the two sequences.)

    Thus, the returned match will be one that:

      (1) contains only valid pairs, as defined above;

      (2) begins with some pair {(ri,si)} with {ri+si <= rc+sc};

      (3) ends with some pair {(rf,sf)} with {rf+sf >= rc+sc};

      (4) has minimum total cost, among all pairings that satisfy (1)-(3).

    Note that there is always a pairing that satisfies (1)-(3), namely the
    pairing {mtOld} itself (or any piece of it that includes
    its central pair).

    The procedure also returns in {minTotCost} a table such that
    {minTotCost[k]} is the minimum total cost (NOT the mismatch) of
    any pairing with {k} steps that satisfies the above constraints,
    for each {k} in {[0..NA+NB-2]}.
  */

/* UTILITY FUNCTIONS */

pz_match_t *pz_match_cut(pz_match_t *mt, int start, int count);
  /* Returns the segment {mt[start..start+count-1]} of pairing {mt}. */

pz_match_t *pz_match_adjust
  ( pz_match_t *mt,
    pz_segment_pair *sOld,
    pz_segment_pair *sNew
  );
  /*
    Given a pairing between segments {sOld[0]} and {sOld[1]}, returns
    the equivalent pairing between {sNew[0]} and {sNew[1]}.  This
    entails mapping the relative sample indices in {mt} so that they
    refer to the same samples, and possibly trimming pairs that fall
    outside the new segments.

    The result is NULL if none of the old pairs are contained in the
    new segments, or the intersection of the restriction of {mt}
    to the new segments is not a single, continuous pairing. */

pz_match_t *pz_match_remove_dangling_ends ( pz_match_t *mt );
  /*
    Returns a new pairing obtained by removing
    the ``dangling ends'' of {mt}, i.e. all initial and final
    half-steps of {mt} where one of the ``legs'' remains stuck at
    the initial or final point of the corresponding chain. */

pz_match_t pz_match_most_perfect ( int n0, int n1 );
void pz_match_domost_perfect ( int n0, int n1, pz_match_t *mt );
  /*
    Generates a maximally perfect pairing between two segments
    with {n0} and {n1} samples, respectively. The corrresponding
    path is a Bresenham line between {(0,0)} and {(n0,n1)}.

    {pz_match_domost_perfect} is like {pz_match_most_perfect} but stores the
    pairing in the client-provided array {mt}, which should
    have {max(n0,n1)} elements exactly. */

pz_match_pair pz_match_abs_pair_clip(
    pz_match_pair p,
    pz_segment_pair *s
  );
  /*
    Given a pair {p} of sample indices, relative to segments {s[0]}
    and {s[1]}, returns the corresponding indices in the whole chains. */

pz_match_pair pz_match_rel_pair_clip(
    pz_match_pair p,
    pz_segment_pair *s
  );
  /*
    Given a pair {p} of sample indices in two chains,
    returns the corresponding sample indices relative to segments {s[0]}
    and {s[1]}.  Each returned index {p[j]} will be {(int.ne - 1)}
    if the corresponding sample lies outside the segment {s[j]}. */

typedef bool_t pz_match_step_predicate_t (
    pz_match_pair *p1,
    pz_match_pair *p2
  );

typedef double pz_match_step_measure_t (
    pz_match_pair *p1,
    pz_match_pair *p2
  );

pz_match_t *pz_match_remove_unmatched_ends(
    pz_match_t *mt,
    pz_match_step_predicate_t isMatched
  );
  /*
    Removes all initial and final steps {mt[k]->mt[k+1]} of {mt} for which
    {isMatched(mt[k],mt[k+1])} returns FALSE.  */

double pz_match_length(
    pz_match_t *mt,
    pz_match_step_measure_t lengthFn
  );
  /*
    Returns the sum of of {lengthFn(S)} for all steps of the pairing {mt}. */

double pz_match_sum(
    pz_match_t *mt,
    pz_match_step_function_t stepFn
  );
  /*
    Returns the sum of {stepFn(S)} over the pairing {mt}. */

double pz_match_integral(
    pz_match_t *mt,
    pz_match_step_measure_t lengthFn,
    pz_match_step_function_t stepFn
  );
  /*
    Returns the integral of {stepFn(S)} over the pairing {mt},
    with respect to step length as returned by {lengthFn(S)}. */

int pz_match_matched_step_count(
    pz_match_t *mt,
    pz_match_step_predicate_t isMatched
  );
  /*
    Returns the step count of {mt} (number of half-steps plus twice
    the number of full steps), considering only steps {S}
    for which {isMatched(S)} is TRUE. */

double pz_match_matched_length(
    pz_match_t *mt,
    pz_match_step_measure_t lengthFn,
    pz_match_step_predicate_t isMatched
  );
  /*
    Returns the sum of {lengthFn(S)} for all steps {S} in {mt}
    for which {isMatched(S)} is true. */

int pz_match_overlap(
    pz_match_t *a,
    pz_match_t *b,
    pz_segment_pair *aSeg,
    pz_segment_pair *bSeg,
    int maxAdjust
  );
  /*
    Compares two pairings {a} and {b}, which are relative to segment
    pairs {aSeg} and {bSeg}, respectively.  Returns the number of
    half-steps of {a} that lie at most {maxAdjust} from some half-step of
    {b} with same position.  Note that {pz_match_overlap(a,b,...) == pz_match_overlap(b,a,...)}. */

/* PACKED PAIRINGS */

typedef struct 
  pz_match_packed_t 
    { /* A compact description of a pairing */
      int np;      /* Number of pairs (> 0). */
      pz_match_pair ini;   /* Initial pair */
      pz_match_pair fin;    /* Final pair */
      char *steps; /* Encoded steps */
    } pz_match_packed_t;
    /*
      Code for encoded steps '/' advance [0], '\\' advance [1], '|' both. */

pz_match_packed_t *pz_match_pack ( pz_match_t *mt );
  /*
    Returns a packed encoding of {mt}. */

pz_match_packed_t *pz_match_packPerfect ( int nSamples );
  /*
    Returns a packed perfect pairing between two
    segments, both with {nSamples} segments. */

pz_match_t pz_match_unpack ( pz_match_packed_t *pm );

void pz_match_do_unpack ( pz_match_packed_t *pm, pz_match_t *mt );
  /*
    Expands a packed representation into an explicit pairing.
    {pz_match_do_unpack} is like {pz_match_unpack} but returns the result in
    the client-provided array {mt}. */

void pz_match_write_packed ( FILE *wr, pz_match_packed_t *pm );
  /*
    Writes the pairing {pm} in compact form to writer {wr}. */

pz_match_packed_t *pz_match_read_packed ( FILE *rd );
  /*
    Reads from {rd} a packed pairing, in the format written by {Write}. */

pz_match_packed_t *pz_match_adjustPacked 
 (  pz_match_packed_t *rpm,
    pz_segment_pair *oldSeg,
    pz_segment_pair *newSeg
  );
  /*
    Given a packed pairing {rpm^} between two segments
    {oldSeg[0]} and {oldSeg[1]}, returns a new packed pairing
    that describes the same pairing between the segments
    {newSeg[0]} and {newSeg[1]}.  If the new segments are
    not contained in the original ones, the pairing may be
    trimmed.

    The result is NULL if {rpm} is NULL or if the intersection of the
    new segments and the old pairings is not a single interval. */

/*
  Copyright © 2001 Universidade Estadual de Campinas (UNICAMP).
  Authors: Helena C. G. Leitão and Jorge Stolfi.

  This file can be freely distributed, used, and modified, provided
  that this copyright and authorship notice is preserved, and that any
  modified versions are clearly marked as such.

  This software has NO WARRANTY of correctness or applicability for
  any purpose. Neither the authors nor their employers chall be held
  responsible for any losses or damages that may result from its use.
*/
#endif