#ifndef pz_candidate_H #define pz_candidate_H /* A `candidate' is a pair of segments from two different contours */ /* Last edited on 2015-01-20 16:48:52 by stolfilocal */ #include #include #include #include #include typedef struct pz_candidate_t { pz_segment_pair seg; /* Segments being matched. */ double mismatch; /* Mismatch function, or 0 if not available. */ double length; /* Mean length of the two segments. */ double matchedLength; /* Length effectively matched. */ pz_match_packed_t *pm; /* Matching for this candidate, or NIL */ } pz_candidate_t; /* A "pz_candidate_t" consists of two segments of contour, with a tentative pairing between them. */ /* CANDIDATE LISTS */ vec_typedef(pz_candidate_vec_t,pz_candidate_vec,pz_candidate_t); typedef struct pz_candidate_read_data { char *cmt; double lambda; pz_candidate_vec_t c; } pz_candidate_read_data; #define pz_candidate_empty \ (pz_candidate_t) \ { /* seg */ (pz_segment_pair){ pz_segment_empty, pz_segment_empty }, \ /* mismatch */ 0.0, \ /* length */ 0.0, \ /* matchedLength */ 0.0, \ /* pm */ NULL \ } /* An `empty' candidate (NOT the only one!). May be handy. */ bool_t pz_candidate_is_empty(pz_candidate_t *c); /* TRUE if the candidate is empty (either segment has zero samples). */ pz_candidate_t pz_candidate_expand ( pz_candidate_t *c, int *iniExtra, int *finExtra ); /* Expands segment "i" of candidate "c", for "i = 0,1", by "iniExtra[i]" steps at the beginning and "finExtra[i]" steps at the end, preserving its pairing (if any). If either argument is negative, removes that many steps from the respective end, and trims the pairing "c.pm" accordingly. */ pz_candidate_t pz_candidate_cut_and_pack ( pz_candidate_t *cand, pz_match_t *mt, double mismatch, double length, double matchedLength ); /* Assumes that "mt" is a partial pairing between "cand.seg[0]" and "cand.seg[1]". Returns a copy "ct" of "cand", trimmed to the part actually covered by "mt", and sets "ct.mismatch", "ct.length", "ct.matchedLength" as given. Also packs "mt" and stores it in "ct.pm". */ void pz_candidate_write ( FILE *wr, char *cmt, pz_candidate_vec_t *c, double lambda ); /* Writes the list of candidates "c" and the parameter "lambda" to the writer "wr". */ pz_candidate_read_data pz_candidate_read (FILE *rd); /* Reads from "rd" a list of candidates that was written by "Write". */ pz_candidate_read_data pz_candidate_read_old (FILE *rd, int_vec_t *m, bool_t *rev); /* Reads from "rd" a list of candidates that was written by the old version of "Write". The "s.tot" field of each segment "s" is taken from the array "m", indexed by the corresponding contour number; and the "s.rev" flag is set to "rev[i]", where "i" is the candidate's side. */ bool_vec_t pz_candidate_chains_used (pz_candidate_vec_t *cand); /* Returns a vector "used" such that "used[k]" is TRUE iff chain "k" occurs in some candidate of "cand[i]". */ typedef struct pz_chain_pair { pz_symbol_chain_read_data d[2]; } pz_chain_pair; void pz_candidate_print ( FILE *wr, pz_candidate_t *cand, bool_t pairing /* (:= TRUE) */ ); /* Prints the data of candidate "cand" to "wr", in a multiline readable format. If the candidate has a pairing, prints it too (in compact form), unless "pairing" is FALSE. */ /* COMPARISON AND SORTING */ typedef bool_t pz_candidate_compare_proc (pz_candidate_t *a, pz_candidate_t *b); /* A procedure that tells whether "a" must come before "b". */ void pz_candidate_sort (pz_candidate_vec_t *c, pz_candidate_compare_proc before); /* Sorts the list "c" according to the given comparison procedure. */ void pz_candidate_index_sort ( pz_candidate_vec_t *c, int_vec_t *x, pz_candidate_compare_proc before ); /* Returns in "x" a permutation of the indices of "c" so that "c[x[k]]" is non-decreasing with "k", according to the given comparison procedure. */ bool_t pz_candidate_lexically_better (pz_candidate_t *a, pz_candidate_t *b); /* Compares by chain pair, then by then start of segments, by length of segments, and breaks ties by mismatch and other criteria. */ bool_t pz_candidate_pair_mismatch_better (pz_candidate_t *a, pz_candidate_t *b); /* Compares by chain pair, then by mismatch and total length, and breaks ties by other criteria. */ bool_t pz_candidate_mismatch_better (pz_candidate_t *a, pz_candidate_t *b); /* Compares by "mismatch", breaking ties by other criteria. */ unsigned pz_candidate_overlap ( pz_candidate_t *a, pz_candidate_t *b, unsigned maxAdjust, pz_match_t *ra, /* Work area */ pz_match_t *rb /* Work area */ ); /* Compares the pairings (explicit or implied) of candidates "a" and "b", and returns the number of half-steps of "a" that lie at most "maxAdjust" from some half-step of "b" with same position. Note that "Overlap(a,b,...) == Overlap(b,a,...)". In particular, if "a" and "b" are the same candidate, returns the total number of steps in the two segments of that candidate (i.e. twice the mean length of that candidate). Returns 0 if corresponding segments of the two candidates are on different chains or have different orientations. */ void pz_candidate_find_similar_cands ( pz_candidate_vec_t *aCand, pz_candidate_vec_t *bCand, unsigned minSteps, unsigned maxAdjust, bool_vec_t *aOK, /* (OUT) */ bool_vec_t *bOK, /* (OUT) */ bool_t printAMatched, /* (:= FALSE) */ bool_t printAUnmatched, /* (:= FALSE) */ bool_t printBMatched, /* (:= FALSE) */ bool_t printBUnmatched, /* (:= FALSE) */ bool_t printMatchedCands, /* (:= FALSE) */ bool_t printMatchedChains /* (:= FALSE) */ ); /* Compares the candidates "aCand" against the candidates "bCand". Any pair of candidates "acand[i]", "bCand[j]" that overlaps by at least "minSteps" steps, with alignments differing by at most "maxAdjust" samples, is flagged by setting "aOK[i] == TRUE" and "bOK[j] == TRUE". The options "printAMatched", "printBMatched" "printAUnmatched", and "printBUnmatched" cause printout of the corresponding matched or unmatched elements, once each. The "printMatchedCands" option prints all pairing pairs (inclunding multiple matches of the same "aCand" or "bCand" element. The "printMatchedChains" option prints all chain pairs with at least one pairing pair of candidates. */ void pz_candidate_print_similarity_statistics ( FILE *wr, bool_vec_t *aOK, char *aName, bool_vec_t *bOK, char *bName ); /* Prints to "wr" the basic statistics of the results of "FindSimilarCands", such as counts of false/true positives/negatives. Prints also the retrieval efficiency parameters of "a" (seen as a search result) relative to "b" (seen as the ideal result): sensitivity (fracion of "b"'s that were found in "a"), significance (fraction of "a" that are in "b"), and multiplicity (how many "a"s were found for each "b"). */ /* CANDIDATE LIST PRUNING */ /* The following procedures remove candidates from the list "cand[0..nCands]". Unless observed otherwise, they preserve the order of the candidates and normally change "cand.ne". */ void pz_candidate_remove_duplicates (pz_candidate_vec_t *cand); /* Removes candidates that cover the same segments of the same chains as previous candidates. Assumes "cand" is sorted in such a way that duplicates will end up in adjacent positions. */ void pz_candidate_remove_short ( pz_candidate_vec_t *cand, unsigned minSteps ); /* Removes any candidates that don't have at least "minSteps" steps on both segments. The resulting set is returned in {c} itself. If any candidates were removed, reallocates {c.el} as needed. */ void pz_candidate_prune_cands_per_pair ( pz_candidate_vec_t *cand, unsigned maxCands ); /* Removes the worst candidates from each pair of chains, leaving at most "maxCands" of them. Assumes that candidates are sorted by chain pair and then by decreasing quality, as in "PairMismatchBetter". */ void pz_candidate_prune_cands_per_chain ( pz_candidate_vec_t *cand, unsigned maxCands ); /* Removes the worst candidates for chains that have more than "candsPerChain" candidates. Assumes that the candidates are sorted by decreasing quality, as in "MismatchBetter" or "LexicallyBetter". */ void pz_candidate_merge_overlaps ( pz_candidate_vec_t *cand, unsigned minSteps, unsigned maxAdjust ); /* Joins candidates for ( which "Overlap(... minSteps,maxAdjust)" is TRUE. Assumes candidates are sorted by chain pair first, as in "PairMismatchBetter". */ pz_candidate_t pz_candidate_map( pz_candidate_t *cOld, /* Candidate on old curves. */ pz_double_chain_t *t0Old, pz_double_chain_t *t1Old, /* Sampling times of old curves. */ pz_double_chain_t *t0New, pz_double_chain_t *t1New, /* Sampling times of new curves. */ r2_t stride, /* Period of "{t0Old,t1Old}" and "{t0New,t1New}". */ r2_t stepNew, /* Sampling step of new curves. */ i2_t extra /* Extra steps to add to each end of each segment */ ); /* Maps the candidate "cOld" to a corresponding candidate "cNew" on new versions of the same curves (presumably filtered at different scales and/or resampled with different times). The procedure assumes that corresponding points of two versions of the same curve have the same parameter "t". They assume that sample "i" of each curve has parameter value "tOld[i]" or "tNew[i]", respectively; and that both versions are periodic, with same period "stride". Segment "j" is mapped to the new curves, then extended by "extra[j]" steps in both directions. (If "extra[j]" is negative, the ends are trimmed by that amount). The pairing "cOld.pm" is converted with "pz_double_chain_map_packed_match". */ /* Copyright © 2001 Universidade Estadual de Campinas (UNICAMP). Authors: Helena C. G. Leitão and Jorge Stolfi. This file can be freely distributed, used, and modified, provided that this copyright and authorship notice is preserved, and that any modified versions are clearly marked as such. This software has NO WARRANTY of correctness or applicability for any purpose. Neither the authors nor their employers chall be held responsible for any losses or damages that may result from its use. */ #endif