/* Last edited on 2022-10-19 19:29:09 by stolfi */ /* ---------------------------------------------------------------------- */ /* msm_seq_desc.h */ typedef struct msm_seq_desc_t {...} msm_seq_desc_t; /* In this case, however, the first and last valid indices will correspond to integer original indices. That is to say, if {estep} is negative then {skip} and {size-1} are multiples of {2^{-estep}}. */ /* ---------------------------------------------------------------------- */ /* msm_seq_desc.h : msm_seq_desc_make */ if ((size > 0) && (estep < 0)) { int m = (1 << (-estep)); demand (skip % m == 0, "{skip} must be a multiple of {2^{-estep}}"); demand ((size - 1) % m == 0, "{size-1} must be a multiple of {2^{-estep}}"); } /* ---------------------------------------------------------------------- */ /* msm_seq_desc.h : msm_seq_desc_trim */ /* In particular, if {s.estep} is negative, the procedure impliclty adjusts {itrim} and {frtim} upwards so that both {t.skip} and {t.size-1} remain multiples of {2^{-s.estep}}. */ if ((s->estep < 0) && (tsize > 0)) { int32_t m = (1 << (- s->estep)); msm_seq_desc_adjust_skip_and_size(&tskip, &tsize, m); } /* ---------------------------------------------------------------------- */ /* msm_pairing.h, msm_pairing.c */ bool_t msm_pairing_is_atomic_increasing(msm_pairing_t *p, bool_t strictly ,bool_t die); /* Returns TRUE iff all steps in pairing {p} are monotonic in both sequences, and increase by exactly 1 along one (or both) of them. Otherwise, if {die} is true, fails with an error message, else returns FALSE silently. If {strictly} is false the increment on either side may be zero, otherwise it must be at least 1 on both sides. */ bool_t msm_pairing_is_atomic(msm_pairing_t *p, bool_t die) { int ng = msm_pairing_num_rungs(p); demand(ng >= 0, "invalid rung count"); if ((ng == 0) || msm_pairing_is_perfect_condensed(p)) { /* Condensed perfect pairing: */ return TRUE; } else { /* Check steps in {gv}: */ msm_rung_t g = msm_pairing_get_rung(p, 0); int k; for (k = 1; k < ng ; k++) { /* Get next rung: */ msm_rung_t h = msm_pairing_get_rung(p, k); if (! msm_rung_step_is_atomic(g, h, die)) { return FALSE; } g = h; } } return TRUE; } bool_t msm_pairing_is_atomic_increasing(msm_pairing_t *p, bool_t strictly, bool_t die) { int ng = msm_pairing_num_rungs(p); demand(ng >= 0, "invalid rung count"); if ((ng == 0) || msm_pairing_is_perfect_condensed(p)) { /* Condensed perfect pairing: */ return TRUE; } else { /* Check steps in {gv}: */ int minIncrEach = (strictly ? 1 : 0); /* Minimum increment on each side. */ int minIncrSum = 0; /* Minimum total increment. Not necessary. */ msm_rung_t g = msm_pairing_get_rung(p, 0); int k; for (k = 1; k < ng ; k++) { /* Get next rung: */ msm_rung_t h = msm_pairing_get_rung(p, k); if (! msm_rung_step_is_increasing(g, h, minIncrEach, minIncrSum, die)) { return FALSE; } if (! msm_rung_step_is_atomic(g, h, die)) { return FALSE; } g = h; } } return TRUE; } /* ---------------------------------------------------------------------- */ /* msm_cand.c : msm_cand_is_valid */ int ng = msm_pairing_num_rungs(p); if (ng != 0) { /* Get position counts of the two sequences: */ int nx = cd->seq[0].size; int ny = cd->seq[1].size; /* The sequences must be non-empty: */ if (nx <= 0) { fail_test(die, "invalid {nx} in cand"); } if (ny <= 0) { fail_test(die, "invalid {ny} in cand"); } /* The initial rung must lie in the valid range: */ msm_rung_t gini = msm_pairing_get_rung(p, 0); int ix = gini.c[0], iy = gini.c[1]; if ((ix < 0) || (ix >= nx)) { fail_test(die, "invalid {ix} in cand"); } if ((iy < 0) || (iy >= ny)) { fail_test(die, "invalid {iy} in cand"); } /* The final rung must be greater than or equal to the first one: */ msm_rung_t gfin = msm_pairing_get_rung(p, ng-1); int fx = gfin.c[0], fy = gfin.c[1]; if (fx < ix) { fail_test(die, "invalid cand - {fx < ix}"); } if (fy < iy) { fail_test(die, "invalid cand - {fy < iy}"); } /* The pairing must not fall off the end of the sequence: */ if ((fx >= nx)) { fail_test(die, "invalid open cand - {fx >= nx}"); } if ((fy >= ny)) { fail_test(die, "invalid open cand - {fy >= ny}"); } } /* ---------------------------------------------------------------------- */ /* msm_ps_tools : msm_ps_tools_draw_graph */ /* If {circ} is true the graphs are assumed to be periodic, and an extra segment is drawn at each end of the plot to show that. In that case the vector {x} must have {nd+1} elements, and the period is assumed to be {x[nd] - x[0]}. */ /* ---------------------------------------------------------------------- */ /* msm_cand.h : msm_cand_t */ /* Moreover, if {seq[j].circ} is FALSE, this constraint must be satisfied for every rung. Note that if {seq[j].circ} is TRUE the pairing may have indices {c[j]} that exceed {seq[j].npos}. Those indices should be taken modulo {seq[j].npos} when indexing into sequence {seq[j]}. The pairing {pr} itself may be circular. In that case one must have {seq[0].circ==seq[1].circ==TRUE}. Moreover, the pairing's coordinate period vector {per = msm_pairing_period(pr)} must have {per.c[j] % seq[j].npos == 0} and {per.c[j] > 0} (i.e., it must make a positive number of full turns around sequence {seq[j]}). A circular pairing may cover the same part of {seq[j]} more than once. This situation may be useful during multiscale search (to represent multiple pre-candidates between two circular sequences), and may even make sense for some applications. */ /* ---------------------------------------------------------------------- */ /* msm_cand.h : msm_cand_from_pairing */ /* If the pairing {pr} is circular, each coordinate period must be positive and a multiple of the corresponding sequence length. The two sequences must be circular. If the pairing {pr} is open, each sequence must be either circular or long enough to contain the corresponding endpoint of the last rung of {pr}. */ /* ---------------------------------------------------------------------- */ /* msm_cand.h : msm_cand_count_common_rungs */ /* Takes circularity of the sequences into account. */ /* ---------------------------------------------------------------------- */ /* msm_cand.h : msm_cand_throw*/ /* {msm_pairing_throw(xp->npos,xp->circ, yp->npos,yp->circ, len,circp,atomic,diagonal,skipProb)}. */ /* ---------------------------------------------------------------------- */ /* msm_seq_desc.h */ /* The {level} field indicates the number of coarsening (filtering and downsampling) steps applied to the original biosequence; so that {level} is zero for the original biosequence itself. */ msm_seq_desc_t msm_seq_desc_reverse(msm_seq_desc_t *s, int32_t osize); /* Returns a descriptor for the sequence {r} that would result from reversing the sequence described by {s}. The result has the same {id}, {name}, {size}, and {estep} fields, the opposite {rev}, and an appropriate {skip} that is computed by assuming that the original sequence had {osize} samples. */ /* INDEX MAPPING BETWEEN SUCCESSIVE SCALES OF RESOLUTION The procedures in this section provide index mapping between a sequence {sFine} with {nFine} distinct matching positions and a filtered and downsampled version {sCoarse} of it, with {nCoarse} positions. These procedures assume that the filter performs a discrete convolution of the fine sequence with a filter kernel that spans {nwtb} sequence positions, and then takes one every {step} samples. Positions in the range {[0 .. s]} and {[s + nFine - nwtb -1 .. nFine-1]} of {sFine}, where {s = floor((nwtb-1)/2)} will have no correspondents in {sCoarse}. The procedures assume that each integer position {ix} of {sCoarse} corresponds to position {floor(a*ix) + s} of {sFine}; where {a} is a real constant that depends on the lengths of the two sequences. These procedures do not assume any particular value for the scale factor {a}, or for the position counts {nFine} and {nCoarse}, except for {nFine >= nwtb} and {nCoarse >= 1}. In particular, {a} may be less than one; this is may occur if the {sCoarse} sequence is subsampled at a higher rate than {sFine}. In any case, the mapping between the positions of the two sequences are monotonic non-decreasing: i.e. increasing one index does not decrease the other. Note that for certain values of {nFine} and {nCoarse} there may be `hiccups' in the mapping, due to {floor()} rounding. */ int msm_seq_desc_map_index_to_coarser(int ixFine, int nFine, int nCoarse, int nwtb); /* Maps a position {ixFine} of a sequence with {nFine} positions to the approximately corresponding position {ixCoarse} in a filtered and downsampled version with {nCoarse} positions, assumin a filter kernel that covers {nwtb} positions of the finer sequence. */ int msm_seq_desc_map_index_to_finer(int ixCoarse, int nCoarse, int nFine, int nwtb); /* Maps a position {ixCoarse} in a filtered sequence with {nCoarse} positions to the approximately corresponding position {ixFine} in the original sequence with {nFine} positions, assuming a filter kernel that covers {nwtb} positions of the fine sequence. */ int msm_seq_dusc_map_index_to_coarser(int ixFine, int nFine, int nCoarse, int nwtb) { demand(nwtb % 2 == 0, "kernel must have odd length"); demand(nFine >= nwtb, "nFine is too small for filter kernel"); demand(nCoarse >= 1, "cannot map to empty sequence"); /* Account for the loss of samples due to convolution: */ ixFine = ixFine - (nwtb-1)/2; /* Compute max shifted position in {sFine}, accounting for filter kernel: */ int ixFineMax = nFine - nwtb - 1; /* Clip the index {ixFine} to the valid range: */ if (ixFine < 0) { ixFine = 0; } if (ixFine > ixFineMax) { ixFine = ixFineMax; } /* Map {ixFine} from {0..ixFineMax} to {0..nCoarse-1}: */ int64_t ixTemp = ((int64_t) nCoarse - 1) * ixFine; int ixCoarse = (ixFineMax == 0 ? 0 : ixTemp/ixFineMax); assert((ixCoarse >= 0) && (ixCoarse < nCoarse)); return ixCoarse; } int msm_seq_desc_map_index_to_finer(int ixCoarse, int nCoarse, int nFine, int nwtb) { demand(nwtb % 2 == 0, "kernel must have odd length"); demand(nFine >= nwtb, "nFine is too small for filter kernel"); demand(nCoarse >= 1, "cannot map to empty sequence"); /* Clip the index {ixCoarse} to the valid range: */ if (ixCoarse < 0) { ixCoarse = 0; } if (ixCoarse >= nCoarse) { ixCoarse = nCoarse - 1; } /* Compute max shifted position in {sFine}, accounting for filter kernel: */ int ixFineMax = nFine - nwtb; /* Map {ixCoarse} from {0..nCoarse-1} to {0..ixFineMax}: */ int ixFine = (nCoarse == 1 ? 0 : (ixFineMax * ixCoarse)/(nCoarse - 1)); double ixFinetmp = (nCoarse == 1 ? 0 : ((double)ixFineMax * (double)ixCoarse)/(double)(nCoarse -1)); ixFine = (int) ixFinetmp; /* Undo the convolution shift: */ ixFine += (nwtb-1)/2; assert((ixFine >= 0) && (ixFine < nFine)); return ixFine; } /* ---------------------------------------------------------------------- */ /* msm_cand.c */ msm_cand_t msm_cand_map_to_finer ( msm_cand_t *cd, int nxnew, int nynew, int nwtb ) { bool_t debug = FALSE; if (debug) { fprintf(stderr, " ----------------------------------------------------------\n"); fprintf(stderr, " mapping candidate\n"); msm_cand_debug("old: ", cd); } int nold[2]; /* Counts of positions in old sequences. */ msm_cand_t cdnew; cdnew.score = cd->score; int j; for (j = 0; j <= 1; j++) { nold[j] = cd->seq[j].npos; cdnew.seq[j] = cd->seq[j]; cdnew.seq[j].npos = (j == 0 ? nxnew : nynew); cdnew.seq[j].level --; } cdnew.pr = msm_pairing_map_to_finer ( cd->pr, nold[0], nold[1], nxnew, nynew, nwtb ); if (debug) { fprintf(stderr, "\n"); msm_cand_debug("new: ", &cdnew); fprintf(stderr, " ----------------------------------------------------------\n"); } return cdnew; } /* ---------------------------------------------------------------------- */ /* msm_cand_refine.c : msm_cand_refine_fill_tableau */ auto bool_t in_head(msm_rung_t g); auto bool_t in_tail(msm_rung_t g); /* TRUE if {g} is inside the `head' (resp. `tail') of the valid region, namely the square of side {kappa} whose upper right (resp. lower left) corner is the first (resp. last) rung of {prold}, widened by {delta} on all sides. */ bool_t in_head(msm_rung_t g) { msm_rung_t gini = msm_pairing_get_rung(prold, 0); int dx = g.c[0] - gini.c[0]; int dy = g.c[1] - gini.c[1]; if ((dx > delta) || (dx < -kappa-delta)) { return FALSE; } if ((dy > delta) || (dy < -kappa-delta)) { return FALSE; } return TRUE; } bool_t in_tail(msm_rung_t g) { int nr = msm_pairing_fund_rung_count(prold); msm_rung_t gfin = msm_pairing_get_rung(prold, nr-1); int dx = g.c[0] - gfin.c[0]; int dy = g.c[1] - gfin.c[1]; if ((dx < -delta) || (dx > +kappa+delta)) { return FALSE; } if ((dy < -delta) || (dy > +kappa+delta)) { return FALSE; } return TRUE; } /* ---------------------------------------------------------------------- */ int msm_seq_desc_filtered_size(int nFine, bool_t circ, int nwtb); /* Computes the number of distinct matching positions in the filtered and downsampled version of a sequence with {nFine} positions and circularity {circ}. */ int msm_seq_desc_filtered_size(int nFine, bool_t circ, int nwtb) { return (nFine - (circ ? 0 : nwtb - 1) + 1)/2; } /* ---------------------------------------------------------------------- */ typedef double msm_step_score_proc_t(msm_rung_t g0, msm_rung_t g1); /* A procedure that computes a numerical score for a single step of a pairing, from rung {g0} to rung {g1}. The procedure should allow for either rung to be {msm_rung_none}. */ /* ---------------------------------------------------------------------- */ /* AFFINE INDEX MAPPING */ int msm_seq_map_index_affinely(int ixold, int dold, int nold, int nnew); /* Applies an affine map to the index {ixold} of an element in some sequence {sold} to obtain the index {ixnew} of the approximately corresponding element in another sequence {snew}. The parameters {nold} and {nnew} should be the element counts of the two sequences. The affine map is {(ixold + dold)*nnew/nold}, rounded to an integer. The mapping is non-decreasing: i.e. increasing {ixold} does not decrease the result. */ int msm_seq_unmap_index_affinely(int ixnew, int nnew, int nold, int dold); /* An approximate inverse of {msm_seq_map_index_affinely}. Namely, computes an index {ixold} such that {msm_seq_map_index_affinely(ixold, dold, nold,nnew)} is equal to {ixnew}, apart from rounding. The formula is {ixnew*nold/nnew + dold}, rounded to the nearest integer. */ int msm_seq_map_index_affinely(int ixold, int dold, int nold, int nnew) { /* Apply the increment {dold}: */ ixold += dold; /* Reduce the index {ixold} (which may be negative) modulo {nold}: */ int kxold = (ixold % nold + nold) % nold; /* Map affinely the range {0..nold} to {0..nnew}: */ int kxnew = (int)floor(((double)kxold + 0.5)*((double)nnew)/((double)nold)); affirm((kxnew >= 0) && (kxnew < nnew), "bug"); /* Add the original number {q} of full turns: */ int q = (ixold - kxold)/nold; int ixnew = kxnew + q*nnew; return ixnew; } int msm_seq_unmap_index_affinely(int ixnew, int nnew, int nold, int dold) { /* Reduce the index {ixnew} (which may be negative) modulo {nnew}: */ int kxnew = (ixnew % nnew + nnew) % nnew; /* Map affinely the range {0..nnew} to {0..nold}: */ int kxold = (int)floor(((double)kxnew + 0.5)*((double)nnold)/((double)nnew)); affirm((kxold >= 0) && (kxold < nold), "bug"); /* Add the original number {q} of full turns: */ int q = (ixnew - kxnew)/nnew; int ixold = kxold + q*nold; /* Un-apply the increment {dold}: */ ixold -= dold; return ixold; } epswr_figure_t *msm_ps_tools_get_ps_stream(msm_ps_tools_t *mps) { return mps->eps; } PSStream *msm_ps_tools_get_ps_stream(msm_ps_tools_t *mps); /* Returns the {PSStream} object associated with the {msm_ps_tools_t} object {mps}. The {epswr.h} routines can be used to draw on that stream. For those routines, the client coordinates are measured (in mm) from the lower left corner of the usable plotting area. */