/* Last edited on 2023-02-22 12:29:09 by stolfi */ #include #include #include #include #include #include #include #include #include pz_match_t pz_match_most_perfect ( int n0, int n1 ) { int np = imax(n0, n1); pz_match_t mt = pz_match_new(np); pz_match_domost_perfect(n0, n1, &mt); return mt; } void pz_match_domost_perfect ( int n0, int n1, pz_match_t *mt ) { affirm(n0 > 0, "bad n0"); affirm(n1 > 0, "bad n1"); int np = mt->nel; affirm(np == imax(n0,n1), "bad length"); if (np == 1) { mt->el[0] = (i2_t){{0,0}}; } else { double fn0 = (double)(n0-1); double fn1 = (double)(n1-1); double fnp = (double)(np-1); int i; for (i = 0; i < np; i++) { double fi = (double)i; int p0 = pz_round((fn0 * fi)/fnp); int p1 = pz_round((fn1 * fi)/fnp); mt->el[i] = (i2_t){{p0, p1}}; } } } pz_match_t pz_match_unpack ( pz_match_packed_t *pm ) { int np = pm->np; pz_match_t mt = pz_match_new(np); pz_match_do_unpack(pm, &mt); return mt; } void pz_match_do_unpack ( pz_match_packed_t *pm, pz_match_t *mt ) { int np = pm->np; int i; affirm(mt->nel == np, "size mismatch"); affirm(strlen(pm->steps) == np, "wrong size"); affirm(pm->steps[0] == '|', "bad start"); mt->el[0] = pm->ini; for (i = 1; i < np; i++) { char c = pm->steps[i]; i2_t *ant = &(mt->el[i-1]), *ths = &(mt->el[i]); if (c == '/') { ths->c[0] = ant->c[0] + 1; ths->c[1] = ant->c[1]; } else if (c == '\\') { ths->c[0] = ant->c[0]; ths->c[1] = ant->c[1] + 1; } else if (c == '|') { ths->c[0] = ant->c[0] + 1; ths->c[1] = ant->c[1] + 1; } else { affirm(FALSE, "bad char"); } } affirm(mt->el[np-1].c[0] == pm->fin.c[0], "ends dont match"); affirm(mt->el[np-1].c[1] == pm->fin.c[1], "ends dont match"); } #ifdef BOGUS IMPORT Stdio; /*DEBUG*/ TYPE Direction == [-1..+1]; /* -1 == prefix, +1 == suffix */ void AllocCostMatrix( REF *CostMatrix rCost, unsigned NA, unsigned NB ) /* Ensures that "rCost^" exists and has at least "NA" rows and "NB" columns (but perhaps more). */ { if (( rCost == NULL ) || ( (rCost^.ne) < NA ) || ( (rCost^[0].ne) < NB )){ rCost = NEW(REF CostMatrix, NA, NB); }; } /* AllocCostMatrix */ void pz_match_find_best( unsigned NA, unsigned NB, StepFunction stepCostFn, REF *T match /* (OUT) */ double *totCost, /* (OUT) */ REF *CostMatrix rCost, /* (WORK) */ ) VAR k: unsigned; { assert(NA > 0 ) ANDAND ( NB > 0 ); AllocCostMatrix(rCost, NA, NB); { /* with */ ??? c = rCost^; int np = NA+NB-1; int rp = pz_match_new(np); ??? p = rp^; /* do */ /* Fill the cost matrix: */ /* We set "c[ix,iy]" with the cost of the cheapest partial pairing that begins with "(0,0)" and ends with "(ix,iy)" */ c[0,0] = 0.0e0; for (ix = 1; ix <= NA-1 ; ix++){ c[ix,0] = c[ix-1,0] + stepCostFn((i2_t){{ix-1,0}}, (i2_t){{ix,0}});; }; for (iy = 1; iy <= NB-1 ; iy++){ c[0,iy] = c[0,iy-1] + stepCostFn((i2_t){{0,iy-1}}, (i2_t){{0,iy}});; }; for (ix = 1; ix <= NA-1 ; ix++){ for (iy = 1; iy <= NB-1 ; iy++){ { /* with */ double v1 = c[ix-1,iy] + stepCostFn((i2_t){{ix-1,iy}}, (i2_t){{ix,iy}}); double v2 = c[ix,iy-1] + stepCostFn((i2_t){{ix,iy-1}}, (i2_t){{ix,iy}}); double v3 = c[ix-1,iy-1] + stepCostFn((i2_t){{ix-1,iy-1}}, (i2_t){{ix,iy}}); /* do */ c[ix,iy] = min(min(v1,v2),v3); }; }; }; /* Compute the optimum pairing: */ /* We trace back from "c[NA-1,NB-1]" to "c[0,0]", reconstructing the choice that resulted in the best pairing at each step: */ k = np-1; p[k] = (i2_t){{NA-1,NB-1}}; while ( p[k][0] > 0 ) || ( p[k][1] > 0 ){ { /* with */ int ix = p[k][0]; int iy = p[k][1]; /* do */ if (( ix == 0 )) { p[k-1] = (i2_t){{ix,iy-1}} }else if (( iy == 0 )){ p[k-1] = (i2_t){{ix-1,iy}} }else{ { /* with */ double v0 = c[ix,iy]; double v1 = c[ix-1,iy] + stepCostFn((i2_t){{ix-1,iy}}, (i2_t){{ix,iy}}); double v2 = c[ix,iy-1] + stepCostFn((i2_t){{ix,iy-1}}, (i2_t){{ix,iy}}); double v3 = c[ix-1,iy-1] + stepCostFn((i2_t){{ix-1,iy-1}}, (i2_t){{ix,iy}}); /* do */ if (( v0 == v3 )) { p[k-1] = (i2_t){{ix-1,iy-1}} }else if (( v0 == v1 )){ p[k-1] = (i2_t){{ix-1,iy}} }else if (( v0 == v2 )){ p[k-1] = (i2_t){{ix,iy-1}} }else{ assert(FALSE );; }; }; }; }; k--;; }; /* while */ totCost = c[NA-1,NB-1] - c[0,0]; match = pz_match_old_trim(p, k, np-k); }; } /* pz_match_find_best */ void pz_match_refine( unsigned NA, unsigned NB, StepFunction stepCostFn, T *mtOld, unsigned maxAdjust, REF *T mt /* (OUT) */ double *totCost, /* (OUT) */ double_vec_t *minTotCost, /* (OUT) */ REF *CostMatrix rCost, /* (WORK) */ ) VAR mBest: unsigned; { /* This code uses the following concepts. The `position' of an index pair "(r,s)" is the sum "r+s", and its `alignment' is the difference "r-s". Obviously the the pair positions are strictly increasing as one moves along a match; while their alignments may vary in either direction, although they should remain roughly constant for pairings that are approximately 1-1. Note that the position "p" and alignment "a" of a pair have the same parity, and that "r == (p+a) DIV 2, s == (p-a) DIV 2". Also note that the alignments of a set of valid pairs with a given position "p" comprise a single interval of integers with same parity as "p". This observation allows us to represent the set of valid pairs by two vectors "minAlign[p]" and "maxAlign[p]", where "p" ranges from "minPos == 0+0" to "maxPos == (NA-1)+(NB-1)". */ assert(NA > 0 ) ANDAND ( NB > 0 ); assert((mtOld->nel) > 0 ); AllocCostMatrix(rCost, NA, NB); /* Initialize the "minTotCost" table of min cost paths by step count. */ assert((minTotCost.ne - 1) == NA+NB-2 ); for (k = 0; k < (minTotCost.ne ) ; k++){ minTotCost[k] = (double.ne - 1) ;}; { /* with */ /* The range of the `position' coordinate of valid pairs. */ int minPos = 0; int maxPos = (NA-1)+(NB-1); /* Range of the `alignment' coordinate of valid pairs, for each `position'. */ int minAlign = NEW(REF INTS, maxPos + 1)^; int maxAlign = NEW(REF INTS, maxPos + 1)^; /* The central pair of the input pairing: */ int mOld = (mtOld->nel); int mid = mtOld[mOld DIV 2]; int midPos = mid[0] + mid[1]; /* The current best pairing: */ int maxPairs = NA + NB - 1; int mtBest = NEW(REF T, maxPairs)^; /* Work areas for "pz_match_refineAux": */ int c = rCost^; ??? cMin = NEW(REF double_vec_t, maxPos + 1)^; /* do */ assert(minPos == 0 ); ComputeAlignmentRanges(NA, NB, mtOld, maxAdjust, minAlign, maxAlign); totCost = (double.ne - 1); mBest = 0; for ( dPos = 0 TO 1 ){ /* fprintf(stderr, "dPos: %s\n", Fmt.Int(dPos) ); */ { /* with */ int ctrPos = midPos + dPos; /* do */ /* DebugIntPair("NA,NB:", NA,NB); */ if (( ctrPos <= maxPos )) { for (ctrAlign = minAlign[ctrPos]; ctrAlign <= maxAlign[ctrPos] BY 2 ; ctrAlign++){ assert(ctrAlign MOD 2 == ctrPos MOD 2 ); { /* with */ int rc = (ctrPos+ctrAlign) DIV 2; int sc = (ctrPos-ctrAlign) DIV 2; /* do */ /* DebugIntPair("rc, sc:",rc, sc); */ assert(rc >= 0 ) ANDAND ( rc < NA ) ANDAND ( sc >= 0 ) ANDAND ( sc < NB ); pz_match_refineAux( NA, NB, stepCostFn = stepCostFn, minAlign = minAlign, maxAlign = maxAlign, ctr = (i2_t){{rc, sc}}, mt = mtBest, m = mBest, totCost = totCost, minTotCost = minTotCost, c = c, cMin = cMin ); }; /* DebugIntPair("mtBest[0]:", mtBest[0,0], mtBest[0,1]); DebugIntPair("mtBest[" & Fmt.Int(mBest-1) & "]: ", mtBest[mBest-1,0], mtBest[mBest-1,1]); fprintf(stderr, "minimum total cost : %s\n", Fmt.LongReal(totCost) ); */; }; }; }; }; /* Return the best match: */ assert(mBest > 0 ); mt = pz_match_old_trim(mtBest, 0, mBest); }; } /* pz_match_refine */ void ComputeAlignmentRanges( unsigned NA, unsigned NB, T *mt, unsigned maxAdjust, INTS *minAlign /* (OUT) */ INTS *maxAlign, /* (OUT) */ ) /* This procedure is used by "pz_match_refine". It computes, for each position "p", the minimum and maximum alignments of valid index pairs for pairings between chains of "NA" and "NB" elements, as implied by the initial pairing "mt" and fattening amount "maxAdjust". */ VAR k: unsigned := 0; { /* The "minAlign" and "maxAlign" arrays are basically the alignments of the pairs in "mt", shifted by "maxAdjust". However each full step of "mt" skips over one position, so we must we must interpolate the alignment bounds there. We must also extrapolate beyond the ends of "mt". */ assert((mt->nel) > 0 ); { /* with */ int minPos = 0 + 0; int maxPos = (NA-1) + (NB-1); int m = (mt->nel); int iniPos = mt[0][0] + mt[0][1]; int finPos = mt[m-1][0] + mt[m-1][1]; int iniAlign = mt[0][0] - mt[0][1]; int finAlign = mt[m-1][0] - mt[m-1][1]; /* do */ assert(minPos == 0 ); assert((minAlign.ne - 1) == maxPos ); assert((maxAlign.ne - 1) == maxPos ); for (p = minPos; p <= maxPos ; p++){ /* Compute boundary of extrapolated and expanded pairing "mt": */ if (( p <= iniPos )) { minAlign[p] = iniAlign - (iniPos - p) - 2*maxAdjust; maxAlign[p] = iniAlign + (iniPos - p) + 2*maxAdjust }else if (( p >= finPos )){ minAlign[p] = finAlign - (p - finPos) - 2*maxAdjust; maxAlign[p] = finAlign + (p - finPos) + 2*maxAdjust }else{ /* Get pair of "mt" at this position: */ while ( mt[k][0] + mt[k][1] < p ){ k++ ;}; assert(k < m ); { /* with */ int rk = mt[k][0]; int sk = mt[k][1]; int nxtPos = rk + sk; int nxtAlign = rk - sk; /* do */ assert(nxtPos >= p ) ANDAND ( nxtPos <= p+1 ); minAlign[p] = nxtAlign - 2*maxAdjust; maxAlign[p] = nxtAlign + 2*maxAdjust; if (( nxtPos > p )) { /* Full step, skipped over the position "p": */ assert(k > 0 ); INC(minAlign[p]); DEC(maxAlign[p]); }; }; }; /* Clip valid pairs against rectangle "[0..NA-1]×[0..NB-1]". */ { /* with */ int loAlign = MAX(-p, p - 2*(NB-1)); int hiAlign = MIN(+p, 2*(NA-1) - p); /* do */ minAlign[p] = max(minAlign[p], loAlign); maxAlign[p] = min(maxAlign[p], hiAlign);; }; /* Check parity: */ assert(minAlign[p] MOD 2 == p MOD 2 ); assert(maxAlign[p] MOD 2 == p MOD 2 );; }; } } /* ComputeAlignmentRanges */ void pz_match_refineAux( unsigned NA, unsigned NB, StepFunction stepCostFn, INTS *minAlign, INTS *maxAlign, pz_match_pair *ctr, VAR /*IO*/ mt: T; VAR /*IO*/ m: unsigned; VAR /*IO*/ totCost: double; VAR /*IO*/ minTotCost: double_vec_t; CostMatrix *c /* (WORK) */ double_vec_t *cMin, /* (WORK) */ ) /* Similar to "pz_match_refine", but considers only matches that contain the pair "ctr" (which must be valid). Assumes that the alignments of all valid pairs for a given position "p" are the integers in the interval "[ minAlign[p] .. maxAlign[p] ]" with same parity as "p"; where "p" ranges from "0+0" to "(NA-1)+(NB-1)". The parameters "mtBest", "m", "mismatchBest", and "minTotCost" must be allocated and initialized by the client. Before and after the call, the best valid pairing known is "mt[0..m-1]", and its mismatch is "mismatchBest". The minimum cost of any valid path with step count "k" is "minTotCost[k]". The procedure updates "mtBest" and "mismatchBest" if (and only if) it finds a pairing better that the current one. Similarly, it updates "minTotCost[k]" if (and only if) it finds a path of step count "k" whose total cost is better than the current value. The procedure requires a work vector "cMin" with at least "NA + NB + 1" elements, and a matrix "c" with at least "NA" rows and "NB" columns. */ { { /* with */ /* Range of positions of valid pairs: */ int minPos = 0 + 0; int maxPos = (NA-1) + (NB-1); /* Position and alignment of mandatory pair: */ int ctrPos = ctr[0] + ctr[1]; int ctrAlign = ctr[0] - ctr[1]; /* do */ assert(minPos == 0 ); /* Initialize the "cMin" array. Element "cMin[p]" is the min cost for any pairing from the current center to any pair "(r,s)" with "r+s == p". Note that "cMin[p]" is indexed by the `position' coordinate of the distal pair, whereas "minTotCost" is indexed by the *step count* of the match. */ assert((cMin.ne - 1) >= (NA-1)+(NB-1) ); for (p = minPos; p <= maxPos ; p++){ cMin[p] = (double.ne - 1) ;}; TYPE Step = struct ??? { double cost; pz_match_pair org; } ???; /* The procedures "CheapestStep" and "ExtractPath" below are used by the dynamic programming algorithm to find the optimal path that starts at the "ctr" pair and extends in the direction "dir". Both procedures assume that the valid pairs for the current pass have positions between "ctrPos" and "finPos" (which may be greater or less than "ctrPos"), and that "pos" gets incremented by "dir" or "2*dir" for each step along the path. */ Step CheapestStep( pz_match_pair *dst, Direction dir, unsigned finPos ) /* The inner loop of dynamic programming algorithm. This procedure computes the cheapest step ending at pair "dst" and coming from one of the three neighboring pairs in the direction "-dir". */ VAR bestCost: double; bestOrg: pz_match_pair; { assert(dir != 0 ); { /* with */ double r = dst[0]; double s = dst[1]; double r1 = r - dir; double s1 = s - dir; /* do */ /* Sanity checks about the pair "dst == (r,s)": */ { /* with */ double dstPos = r + s; double dstAlign = r - s; /* do */ assert((dstPos - ctrPos)*dir > 0 ); assert((finPos - dstPos)*dir >= 0 ); assert(minAlign[dstPos] <= dstAlign ) ANDAND ( dstAlign <= maxAlign[dstPos] ); assert(fabs(dstAlign - ctrAlign) <= fabs(dstPos - ctrPos) );; }; bestCost = (double.ne - 1); /* Try full step from "(r1,s1)": */ { /* with */ int orgPos = r1 + s1; int orgAlign = r1 - s1; /* do */ if (( (orgPos - ctrPos)*dir >= 0 ) ANDAND ( minAlign[orgPos] <= orgAlign ) ANDAND ( orgAlign <= maxAlign[orgPos] ) ANDAND ( fabs(orgAlign - ctrAlign) <= fabs(orgPos - ctrPos) )) { { /* with */ int org = (i2_t){{r1,s1}}; ??? cost = c[r1,s1] + stepCostFn(org, dst); /* do */ if (( cost < bestCost )) { bestCost = cost; bestOrg = org ;};; }; }; }; /* Try half-step from "(r1,s)": */ { /* with */ int orgPos = r1 + s; int orgAlign = r1 - s; /* do */ if (( minAlign[orgPos] <= orgAlign ) ANDAND ( orgAlign <= maxAlign[orgPos] ) ANDAND ( fabs(orgAlign - ctrAlign) <= fabs(orgPos - ctrPos) )) { { /* with */ int org = (i2_t){{r1,s}}; int cost = c[r1,s] + stepCostFn(org, dst); /* do */ if (( cost < bestCost )) { bestCost = cost; bestOrg = org ;};; }; }; }; /* Try half-step from "(r,s1)": */ { /* with */ int orgPos = r + s1; int orgAlign = r - s1; /* do */ if (( minAlign[orgPos] <= orgAlign ) ANDAND ( orgAlign <= maxAlign[orgPos] ) ANDAND ( fabs(orgAlign - ctrAlign) <= fabs(orgPos - ctrPos) )) { { /* with */ int org = (i2_t){{r,s1}}; ??? cost = c[r,s1] + stepCostFn(org, dst); /* do */ if (( cost < bestCost )) { bestCost = cost; bestOrg = org ;};; }; };; }; assert(bestCost < (double.ne - 1) ); return Step{cost = bestCost, org = bestOrg}; } } /* CheapestStep */ void ExtractMinCostPath( pz_match_pair dst, Direction dir, unsigned finPos ) /* Extracts result of the dynamic programming pass in direction "dir". Extracts from "c" the cheapest path from the "ctr" pair to the specified "dst" pair. The path is stored into the "mt" array, starting with "mt[m]", and reversed as needed to get the pairs in the correct order. */ VAR mIni: unsigned := m; { /* fprintf(stderr, "ctr == (" & Fmt.Int(ctr[0]) & "," & Fmt.Int(ctr[1]) & ")\n"); fprintf(stderr, "dir == %s\n", Fmt.Int(dir) ); */ assert(dir != 0 ); while ( dst != ctr ){ /* Sanity checks about the pair "dst": */ { /* with */ int r = dst[0]; int s = dst[1]; int dstPos = r + s; int dstAlign = r - s; /* do */ /* fprintf(stderr, "dst == (" & Fmt.Int(dst[0]) & "," & Fmt.Int(dst[1]) & ")\n"); */ assert((dstPos - ctrPos)*dir > 0 ); assert((finPos - dstPos)*dir >= 0 ); assert(minAlign[dstPos] <= dstAlign ) ANDAND ( dstAlign <= maxAlign[dstPos] ); assert(fabs(dstAlign - ctrAlign) <= fabs(dstPos - ctrPos) );; }; /* OK, store pair in "mt" and move to previous one: */ mt[m] = dst; m++; dst = CheapestStep(dst, dir, finPos).org; }; if (( dir == +1 )) { /* Reverse pairs: */ { /* with */ int mx = m - mIni, mtx = SUBARRAY(mt, mIni, mx); /* do */ for (i = 0; i <= (mtx->nel) DIV 2 - 1 ; i++){ { /* with */ int j = (mtx->nel - 1) - i; /* do */ VAR t := mtx[i]; BEGIN mtx[i] := mtx[j]; mtx[j] := t END; }; }; }; } } /* ExtractMinCostPath */ pz_match_pair pz_match_find_bestPairInLayer( unsigned p ) /* Finds the valid pair with position "p" that has minimum cost in the "c" array. Breaks ties towards the center of the valid alignment range (i.e. closer to the original pairing). */ VAR bestCost: double = (double.ne - 1); bestPair: pz_match_pair; bestShift: unsigned = (unsigned.ne - 1); { { /* with */ int aLo = MAX(minAlign[p], ctrAlign - ABS(p - ctrPos)); int aHi = MIN(maxAlign[p], ctrAlign + ABS(p - ctrPos)); int aMd = (minAlign[p] + maxAlign[p]) DIV 2; /* do */ assert(aLo MOD 2 == p MOD 2 ); assert(aHi MOD 2 == p MOD 2 ); for (a = aLo; a <= aHi BY 2 ; a++){ { /* with */ int r = (p+a) DIV 2; int s = (p-a) DIV 2; int cost = c[r,s]; int shift = ABS(a - aMd); /* do */ if (( cost < bestCost ) || ( (cost == bestCost ) ANDAND ( shift < bestShift) )) { bestPair = (i2_t){{r,s}}; bestCost = cost; bestShift = shift;; }; }; }; assert(bestCost < (double.ne - 1) ); return bestPair; } } /* pz_match_find_bestPairInLayer */ { /* Bi-directional dynamic programming: */ c[ctr[0],ctr[1]]= 0.0e0; cMin[ctrPos] = 0.0e0; for (dir = -1; dir <= +1 BY 2 ; dir++){ { /* with */ int finPos = ARRAY Direction OF unsigned{ minPos, 0, maxPos } [dir]; /* do */ for (p = ctrPos+dir; p <= finPos BY dir ; p++){ { /* with */ ??? aLo = MAX(minAlign[p], ctrAlign - ABS(p - ctrPos)); ??? aHi = MIN(maxAlign[p], ctrAlign + ABS(p - ctrPos)); /* do */ assert(aLo MOD 2 == p MOD 2 ); assert(aHi MOD 2 == p MOD 2 ); for (a = aLo; a <= aHi BY 2 ; a++){ { /* with */ ??? r = (p+a) DIV 2; ??? s = (p-a) DIV 2; ??? cost = CheapestStep((i2_t){{r,s}}, dir, finPos).cost; /* do */ c[r,s] = cost; cMin[p] = min(cMin[p], cost); }; }; }; }; }; }; /* Look for minimum mismatch path, updating "mt", "mismatch", etc.: */ VAR bestPosLo, bestPosHi: unsigned; bestTotCost: double = totCost; { for ( pLo= minPos TO ctrPos ){ for ( pHi = ctrPos TO maxPos ){ { /* with */ ??? tc = cMin[pLo] + cMin[pHi]; ??? stepCount = pHi - pLo; /* do */ minTotCost[stepCount] = min(minTotCost[stepCount], tc); if (( tc < bestTotCost )) { bestTotCost = tc; bestPosLo = pLo; bestPosHi = pHi;; }; }; };; }; /* DebugIntPair("bestPosLo,bestPosHi:", bestPosLo,bestPosHi); */ /* If we found a better pairing, let's extract it: */ if (( bestTotCost < totCost )) { { /* with */ ??? org = pz_match_find_bestPairInLayer(bestPosLo); ??? dst = pz_match_find_bestPairInLayer(bestPosHi); /* do */ /* Extract pairing: */ m = 0; ExtractMinCostPath(org, -1, minPos); mt[m] = ctr; m++; ExtractMinCostPath(dst, +1, maxPos); }; totCost = bestTotCost; };; }; }; } } /* pz_match_refineAux */ pz_match_pair pz_match_abs_pair_clip( pz_match_pair p, pz_segment_pair *s ) { return (i2_t){{AbsIndexClip(p[0], s[0]), AbsIndexClip(p[1], s[1])}} } /* pz_match_abs_pair_clip */ pz_match_pair pz_match_rel_pair_clip( pz_match_pair p, pz_segment_pair *s ) { return (i2_t){{RelIndexClip(p[0], s[0]), RelIndexClip(p[1], s[1])}} } /* pz_match_rel_pair_clip */ T *pz_match_old_trim( T *mt, unsigned start, unsigned count ) { { /* with */ ??? r = NEW(REF T, count); /* do */ r^ = SUBARRAY(mt, start, count); return r; } } /* pz_match_old_trim */ T *pz_match_adjust( T *mt, pz_segment_pair *sOld, pz_segment_pair *sNew ) VAR mNewR: REF T; /* The new pairing */ kNew: unsigned; /* Number of new pairs in "mNewR^ */ stop: bool_t; /* TRUE after first interruption. */ void EnsureSpace( REF *T mR, unsigned k ) { if (( mR == NULL )) { mR = NEW(REF T, k) }else if (( k > (mR^->nel - 1) )){ { /* with */ ??? n = (mR^->nel), mT = NEW(REF T, MAX(k+1, 2*n)); /* do */ SUBARRAY(mT^,0,n) = mR^; mR = mT; }; } } /* EnsureSpace */ { kNew = 0; stop = FALSE; mNewR = NEW(REF T, (mt->nel)); for ( kOld = 0 TO (mt->nel - 1) ){ { /* with */ ??? rOld = mt[kOld]; /* pz_match_pair relative to "sOld" */ ??? p = pz_match_abs_pair_clip(rOld, sOld); /* pz_match_pair, absolute */ ??? rNew = pz_match_rel_pair_clip(p, sNew); /* pz_match_pair, relative to "sNew" */ /* do */ if (( rNew[0] == (unsigned.ne - 1) ) || ( rNew[1] == (unsigned.ne - 1) )) { /* pz_match_pair is outside "sNew": */ if (( kNew > 0 )) { stop = TRUE ;} }else{ /* pz_match_pair lies inside "sNew" */ if (( stop )) { return NULL ;}; /* Consistency check: */ if (( kNew > 0 )) { { /* with */ ??? aNew = mNewR[kNew-1]; /* do */ assert(aNew[0] < rNew[0] ); assert(aNew[1] < rNew[1] ); assert(aNew[0] + 1 >= rNew[0] ); assert(aNew[1] + 1 >= rNew[1] );; }; }; EnsureSpace(mNewR, kNew); mNewR^[kNew] = rNew; kNew++; };; }; }; if (( kNew == 0 )){ return NULL ;}; { /* with */ ??? mR = NEW(REF T, kNew); /* do */ mR^ = SUBARRAY(mNewR^, 0, kNew); return mR; } } /* pz_match_adjust */ T *pz_match_remove_dangling_ends( T *mt ) VAR ini, fin: unsigned; { ini = 0; fin = (mt->nel - 1); { /* with */ ??? ix = mt[0][0]; ??? iy = mt[0][1]; /* do */ while ( (ini < fin) ) ANDAND ( ( (mt[ini][0] == ix ) ANDAND ( mt[ini+1][0] == ix) ) || ( (mt[ini][1] == iy ) ANDAND ( mt[ini+1][1] == iy) ) ){ ini++; }; }; { /* with */ ??? ix = mt[(mt->nel - 1)][0]; ??? iy = mt[(mt->nel - 1)][1]; /* do */ while ( (ini < fin) ) ANDAND ( ( (mt[fin][0] == ix ) ANDAND ( mt[fin-1][0] == ix) ) || ( (mt[fin][1] == iy ) ANDAND ( mt[fin-1][1] == iy) ) ){ fin--; }; }; return pz_match_old_trim(mt, ini, fin-ini+1) } /* pz_match_remove_dangling_ends */ T *pz_match_remove_unmatched_ends( T *mt, StepPredicate isMatched ) VAR ini, fin: unsigned; { ini = 0; fin = (mt->nel - 1); while ( ini < fin ) ANDAND ( NOT isMatched(mt[ini], mt[ini+1]) ){ ini++; }; while ( ini < fin ) ANDAND ( NOT isMatched(mt[fin-1], mt[fin]) ){ fin--; }; return pz_match_old_trim(mt, ini, fin-ini+1) } /* pz_match_remove_unmatched_ends */ double pz_match_length( T *mt, StepMeasure lengthFn ) VAR tot : double := 0.0e0; { for (i = 0; i < (mt->nel )-1 ; i++){ { /* with */ ??? p0 = mt[i], p1 = mt[i+1]; /* do */ tot = tot + lengthFn(p0, p1); }; }; return tot } /* pz_match_length */ double pz_match_sum( T *mt, StepFunction stepFn ) VAR tot : double := 0.0e0; { for (i = 0; i < (mt->nel )-1 ; i++){ { /* with */ ??? p0 = mt[i], p1 = mt[i+1]; /* do */ tot = tot + stepFn(p0, p1); }; }; return tot } /* pz_match_sum */ double pz_match_integral( T *mt, StepMeasure lengthFn, StepFunction stepFn ) VAR tot : double := 0.0e0; { for (i = 0; i < (mt->nel )-1 ; i++){ { /* with */ ??? p0 = mt[i], p1 = mt[i+1]; /* do */ tot = tot + stepFn(p0, p1)*lengthFn(p0, p1); }; }; return tot } /* pz_match_integral */ unsigned pz_match_matched_step_count( T *mt, StepPredicate isMatched ) VAR tot: unsigned := 0; { for (i = 0; i < (mt->nel )-1 ; i++){ { /* with */ ??? p0 = mt[i], p1 = mt[i+1]; /* do */ if (( isMatched(p0, p1) )) { tot++ ;}; }; }; return tot } /* pz_match_matched_step_count */ double pz_match_matched_length( T *mt, StepMeasure lengthFn, StepPredicate isMatched ) VAR tot: double := 0.0e0; { for (i = 0; i < (mt->nel )-1 ; i++){ { /* with */ ??? p0 = mt[i], p1 = mt[i+1]; /* do */ if (( isMatched(p0, p1) )) { tot = tot + lengthFn(p0, p1); }; }; }; return tot } /* pz_match_matched_length */ CONST DebugOverlap == FALSE; unsigned pz_match_overlap( T *a, T *b, pz_segment_pair *aSeg, pz_segment_pair *bSeg, unsigned maxAdjust ) VAR N: unsigned := 0; tot: pz_match_pair; /* Total samples in each contour. */ bIni: pz_match_pair; /* Start of "a" in the flipped toroidal grid. */ aIni: pz_match_pair; /* Start of "b" in the flipped toroidal grid. */ /* The two pairings are viewed as paths on the toroidal grid, relative to "aIni" and "bIni", respectively. The points "aIni" and "bIni" have been mirrored, if necessary, so that the paths are increasing along both axes. */ void Tally( unsigned ai, unsigned af, unsigned bi, unsigned bf ) /* Adds to "N" twice the length of steps "aIni+a[ak]->aIni+a[ak+1]" that are within "maxAdjust" some step "bIni+b[bk]->bIni+b[bk+1]", in the X-Y direction, where "ak IN [ai..af-1]" and "bk IN [bi..bf-1]". */ { assert(ai < af ); assert(bi < bf ); if (( DebugOverlap )) { fprintf(stderr, "[ " & FI(ai,1) & "-" & FI(af,1) & "×" & FI(bi,1) & "-" & FI(bf,1) ); }; if (( NOT BoxesOverlap(a[ai], a[af], b[bi], b[bf]) )) { /* Do nothing */ }else if (( af - ai >= bf - bi ) ANDAND ( af - ai >= 2 )){ /* The "a" pairing is longer and has at least two steps; split it: */ { /* with */ ??? am = (ai + af) DIV 2; /* do */ assert(ai < am ) ANDAND ( am < af ); Tally(ai, am, bi, bf); Tally(am, af, bi, bf);; } }else if (( bf - bi >= af - ai ) ANDAND ( bf - bi >= 2 )){ /* The "b" pairing is longer and has at least two steps; split it: */ { /* with */ ??? bm = (bi + bf) DIV 2; /* do */ assert(bi < bm ) ANDAND ( bm < bf ); Tally(ai, af, bi, bm); Tally(ai, af, bm, bf);; } } else { /* Both "a" and "b" are reduced to a single step. Let "B" be the step "b", smeared diagonally by "maxAdjust". Since "B" is convex, it suffices to check whether both endpoints of "a" are in "B". We know that B overlaps non-trivially the bounding box of "a". Moreover the only integer points in "B" are also in the bloating of the two endpoints. */ { /* with */ ??? ov = StepOverlap(a[ai], a[af], b[bi], b[bf]); /* do */ if (( ov > 0 )) { if (( DebugOverlap )) { fprintf(stderr, " " & "+" & FI(ov,1) & " "); }; INC(N, ov); }; }; }; if (( DebugOverlap )) { fprintf(stderr, "]") ;}; } /* Tally */ bool_t BoxesOverlap( pz_match_pair *aiP, pz_match_pair *afP, pz_match_pair *biP, pz_match_pair *bfP ) /* Returns true if the bounding box of the path "aIni+aiP->aIni+afP" intersects the bounding box of "bIni+biP->bIni+bfP", smeared diagonally by "maxAdjust". The paths are assumed to be on the toroidal grid, but it is required that "afP >= aiP" and "bfP >= biP". The boxes are assumed not to include the corners corresponding to the end-pairs of the pairing. */ VAR n, d: pz_match_pair; { /* Let "na[j]" and "nb[j] be the extents of the two paths along axis "j", and "Q == maxAdjust". We need to know whether there are real vectors "va" in "[0 _ na[0]]×[0 _ na[1]]" and "vb" in "[0 _ nb[0]]×[0 _ nb[1]]", minus the two extreme corners, and a vector "vc" in "{ (-k,k) : k in [-Q _ +Q] }", such that "(aIni+aiP+va) - (bIni+bfP-vb) == vc" modulo the two chain lengths "tot[0],tot[1]". This condition is equivalent to asking whether there are real vectors "va,vb,vc" in the ranges above and integers "r,s" such that "va+vb+vc+(Q,Q) == (bIni+bfP)-(aIni+aiP)+(Q,Q) + (r*tot[0],s*tot[1])" over the Cartesian integer grid. The set of all vectors "(x,y)" on the left-hand side is the subset of the box "C == [0 _ na[0]+nb[0]+2*Q]×[0 _ na[1]+nb[1]+2*Q]" that satisfies the `diagonal' inequalities "x+y > 2*Q" "x+y < na[0]+na[1]+nb[0]+nb[1]+2*Q" So we need to check only whether this region contains any vectors congruent to "d == (bIni+bfP)-(aIni+aiP)+(Q,Q)" */ /* Compute the box parameters: */ for (j = 0; j <= 1 ; j++){ { /* with */ ??? m = tot[j]; /* do */ assert(aiP[j] <= afP[j] ) ANDAND ( afP[j] < aiP[j]+m ); assert(biP[j] <= bfP[j] ) ANDAND ( bfP[j] < biP[j]+m ); n[j] = (afP[j] - aiP[j]) + (bfP[j] - biP[j]) + 2*maxAdjust; d[j] = ((bIni[j] + bfP[j]) - (aIni[j] + aiP[j]) + maxAdjust) MOD m; /* Coarse check for separation: */ if (( d[j] > n[j] )) { /* The point "d" lies outside the box "C"; it follows that */ /* all points congruent to "d" also are outside "C". */ return FALSE; }; }; }; /* Now we know that the point "d" lies inside the box "C". We still have to check whether for some "r" and "s" the point "d+(r*tot[0],s*tot[1])" lies in "C" and satisfies the diagonal inequalities. Since we expect "Q" to be small, and "n[j]" to be hardly greater than "2*tot[j]", we can do it by exhaustive enumeration over "r" and "s": */ { /* with */ ??? loD = maxAdjust+maxAdjust; ??? hiD = n[0] + n[1] - loD; /* do */ for (r = 0; r <= (n[0] - d[0]) DIV tot[0] ; r++){ for (s = 0; s <= (n[1] - d[1]) DIV tot[1] ; s++){ { /* with */ ??? x = d[0] + r*tot[0]; ??? y = d[1] + r*tot[1]; /* do */ assert(x <= n[0] ); assert(y <= n[1] ); if (( x+y > loD ) ANDAND ( x+y < hiD )) { return TRUE ;}; }; }; }; }; return FALSE } /* BoxesOverlap */ unsigned StepOverlap( pz_match_pair *aiP, pz_match_pair *afP, pz_match_pair *biP, pz_match_pair *bfP ) /* Returns the fraction (in multiples of a half-step) of the step "a == aIni+aiP->aIni+afP" that lies at most "maxAdjust" away diagonally from the step "b == bIni+biP->bIni+bfP". It is given that the bounding box of "a" intersects the diagonally smeared box of "b". */ VAR na, nb, n, d: pz_match_pair; NS: unsigned; { /* To simplify the logic, we reduce the problem to the steps "A == (Q,Q)->(Q,Q)+na" and "B == d->d+nb", where "d" is in the fundamental rectangle of the toroidal grid. Note that the smeared box of "A" consist of the points "(x,y)" of the real rectangle "C == [0 _ na[0]+2*Q]×[0 _ na[1]+2*Q]" on the Cartesian plane that satisfies the `diagonal' inequalities "x+y > 2*Q" "x+y < na[0]+na[1]+2*Q" */ for (j = 0; j <= 1 ; j++){ { /* with */ ??? m = tot[j]; /* do */ na[j] = afP[j] - aiP[j]; assert(0 <= na[j] ) ANDAND ( na[j] <= 1 ); n[j] = na[j] + maxAdjust+maxAdjust; nb[j] = bfP[j] - biP[j]; assert(0 <= nb[j] ) ANDAND ( nb[j] <= 1 ); d[j] = ((bIni[j] + biP[j]) - (aIni[j] + aiP[j]) + maxAdjust) MOD m; /* Coarse check for separation: */ if (( d[j] + nb[j] > n[j] )) { /* The "b" step ends outside the "C" rectangle. */ /* It follows that any of its translates intersects "C" */ /* at most in two points. */ return 0; }; }; }; /* Now we know that "B" lies inside the closed rectangle "C". We must now tally the intersection of the smeared box of "A" with all congruent copies of step "B", namely all steps "u+d->u+d+nb" where "u==(r*tot[0],s*tot[1])". We only need to consider those translates that intersect non-trivially the closed rectangle "C". Since "d[j]" lies in "[0..tot[j]-1]", we need not consider "r<0", or "r" such that "r*tot[0]+d[0]+nb[0] > na[0]+2*Q"; and ditto for "s". */ { /* with */ ??? aID = maxAdjust+maxAdjust; ??? aFD = aID + na[0] + nb[1]; /* do */ NS = 0; for (r = 0; r <= (na[0]+maxAdjust+maxAdjust-d[0]-nb[0]) DIV tot[0] ; r++){ for (s = 0; s <= (na[1]+maxAdjust+maxAdjust-d[1]-nb[1]) DIV tot[1] ; s++){ { /* with */ ??? bI0 = d[0] + r*tot[0]; ??? bF0 = bI0 + nb[0]; ??? bI1 = d[1] + r*tot[1]; ??? bF1 = bI1 + nb[1]; ??? bID = bI0 + bI1; ??? bFD = bF0 + bF1; /* do */ assert(bF0 <= n[0] ); assert(bF1 <= n[1] ); NS = NS + max(0, min(aFD,bFD) - max(aID,bID)); }; }; }; return NS; } } /* StepOverlap */ { if (( aSeg[0].cvx != bSeg[0].cvx ) || ( aSeg[1].cvx != bSeg[1].cvx )){ return 0 ;}; if (( aSeg[0].rev != bSeg[0].rev ) || ( aSeg[1].rev != bSeg[1].rev )){ return 0 ;}; assert(aSeg[0].tot == bSeg[0].tot ) ANDAND ( aSeg[1].tot == bSeg[1].tot ); /* Flip the problem to account for the "rev" flags: */ for (j = 0; j <= 1 ; j++){ tot[j] = aSeg[j].tot; { /* with */ ??? m = tot[j]; /* do */ if (( NOT aSeg[j].rev )) { aIni[j] = aSeg[j].ini MOD m; bIni[j] = bSeg[j].ini MOD m; }else{ aIni[j] = (m - (aSeg[j].ini + aSeg[j].ns - 1)) MOD m; bIni[j] = (m - (bSeg[j].ini + bSeg[j].ns - 1)) MOD m;; };; };; }; N = 0; { /* with */ ??? aFin = (i2_t){{aIni[0] + aSeg[0].ns-1, aIni[1] + aSeg[1].ns-1}}; ??? bFin = (i2_t){{bIni[0] + bSeg[0].ns-1, bIni[1] + bSeg[1].ns-1}}; /* do */ if (( DebugOverlap )) { fprintf(stderr, " " & "(" & FI(aIni[0],1) & "," & FI(aIni[1],1) & ")->" & "(" & FI(aFin[0],1) & "," & FI(aFin[1],1) & ") × " & "(" & FI(bIni[0],1) & "," & FI(bIni[1],1) & ")->" & "(" & FI(bFin[0],1) & "," & FI(bFin[1],1) & ")" ); };; }; if (( (a->nel - 1) > 0 ) ANDAND ( (b->nel - 1) > 0 )){ Tally(0, (a->nel - 1), 0, (b->nel - 1)); }; if (( DebugOverlap )){ fprintf(stderr, "==%s\n", Fmt.Int(N) ) ;}; return N } /* pz_match_overlap */ pz_match_packedT *pz_match_pack( T *mt ) { { /* with */ ??? np = (mt->nel); ??? rp = NEW(REF pz_match_packedT), pm = rp^; ??? wr = NEW(TextFILE *).init(); /* do */ Wr.PutChar(wr, '|'); for (i = 1; i <= np-1 ; i++){ { /* with */ ??? ri = mt[i][0], ra = mt[i-1][0]; ??? si = mt[i][1], sa = mt[i-1][1]; /* do */ if (( ri == ra+1 ) ANDAND ( si == sa )) { Wr.PutChar(wr, '/') }else if (( ri == ra ) ANDAND ( si == sa+1 )){ Wr.PutChar(wr, '\\') }else if (( ri == ra+1 ) ANDAND ( si == sa+1 )){ Wr.PutChar(wr, '|') }else{ fprintf(stderr, "bad pairing: "); fprintf(stderr, Fmt.Int(ra) & " " & Fmt.Int(ri) & " "); fprintf(stderr, Fmt.Int(sa) & " " & Fmt.Int(si) & "\n"); assert(FALSE );; }; }; }; pm = pz_match_packedT{ np = np, ini = mt[0], fin = mt[np-1], steps = TextFILE *oText(wr) }; return rp; } } /* pz_match_pack */ pz_match_packedT *pz_match_packPerfect( unsigned nSamples ) { { /* with */ ??? rpm = NEW(REF pz_match_packedT), pm = rpm^; ??? wr = NEW(TextFILE *).init(); /* do */ pm->np = nSamples; pm->ini = (i2_t){{0,0}}; pm->fin = (i2_t){{nSamples-1,nSamples-1}}; for (k = 0; k <= nSamples-1 ; k++){ Wr.PutChar(wr, '|') ;}; pm->steps = TextFILE *oText(wr); return rpm; } } /* pz_match_packPerfect */ void pz_match_write_packed( FILE *wr, pz_match_packedT *pm ) { { /* with */ ??? np = pm->np; /* do */ assert(strlen(pm->steps) == np ); FPut.Int(wr, np); FPut.Space(wr); FPut.Char(wr, '('); for (j = 0; j <= 1 ; j++){ FPut.Int(wr, pm->ini[j]); FPut.Space(wr); FPut.Int(wr, pm->fin[j]); FPut.Space(wr); FPut.Space(wr);; }; fprintf(wr, pm->steps); FPut.Char(wr, ')');; } } /* pz_match_write_packed */ pz_match_packedT *pz_match_read_packed( FILE *rd ) VAR np: unsigned; CONST ValidChars == SET OF CHAR{ '/', '|', '\\'}; { np = fget_int32(rd); { /* with */ ??? cm = NEW(ARRAY *OF CHAR, np)^; ??? rp = NEW(REF pz_match_packedT), pm = rp^; /* do */ pm->np = np; fget_skip_spaces(rd); FGet.Match(rd, "("); for (j = 0; j <= 1 ; j++){ pm->ini[j] = fget_int32(rd); pm->fin[j] = fget_int32(rd);; }; fget_skip_spaces(rd); FGet.Match(rd, "|"); cm[0] = '|'; for (i = 1; i <= np-1 ; i++){ cm[i] = FGet.Char(rd); assert(cm[i] IN ValidChars );; }; fget_skip_spaces(rd); FGet.Match(rd, ")"); pm->steps = Text.FromChars(cm); return rp; } } /* pz_match_read_packed */ pz_match_packedT *pz_match_adjustPacked( pz_match_packedT *rpm, pz_segment_pair *oldSeg, pz_segment_pair *newSeg ) pz_match_packedT *DoAdjust( pz_match_packedT pm ) /* pz_match_adjusts "pm" the hard way - unpack,trim,repack. */ { { /* with */ ??? r = pz_match_adjust(pz_match_unpack(pm)^, oldSeg, newSeg); /* do */ if (( r == NULL )) { return NULL }else{ return pz_match_pack(r^); }; } } /* DoAdjust */ VAR adjust: i2_t; { if (( rpm == NULL )){ return NULL ;}; { /* with */ ??? pm = rpm^; /* do */ /* Try to determine if the old pairings are contained in the new segments. If that is the case, we don't need to unpack/repack the pairing. */ for (j = 0; j <= 1 ; j++){ { /* with */ ??? os = oldSeg[j]; ??? ns = newSeg[j]; ??? is = pz_segment_meet(oldSeg[j], newSeg[j]); ??? tot = os.tot; ??? rev = os.rev; ??? iniAdv = (is.ini - os.ini) MOD tot; ??? iniRet = (is.ini - ns.ini) MOD tot; ??? finAdv = (ns.ini + ns.ns - is.ini - is.ns) MOD tot; ??? finRet = (os.ini + os.ns - is.ini - is.ns) MOD tot; /* do */ assert(os.rev == ns.rev ); assert(os.cvx == ns.cvx ); assert(os.tot == ns.tot ); if (( is.ns == 0 )) { return NULL ;}; if (( rev )) { if (( finAdv == 0 )) { if (( finRet > pm->ini[j] )) { return DoAdjust(pm) ;}; adjust[j] = - finRet }else{ adjust[j] = + finAdv; }; if (( iniAdv > os.ns - 1 - pm->fin[j] )) { return DoAdjust(pm) ;}; }else{ if (( iniRet == 0 )) { if (( iniAdv > pm->ini[j] )) { return DoAdjust(pm) ;}; adjust[j] = - iniAdv }else{ adjust[j] = + iniRet; }; if (( finRet > os.ns - 1 - pm->fin[j] )) { return DoAdjust(pm) ;};; }; }; }; { /* with */ ??? rpmNew = NEW(REF pz_match_packedT), pmNew = rpmNew^; /* do */ pmNew->nel = pm->np; for (j = 0; j <= 1 ; j++){ pmNew.ini[j] = pm->ini[j] + adjust[j]; pmNew.fin[j] = pm->fin[j] + adjust[j];; }; pmNew.steps = pm->steps; return rpmNew; }; } } /* pz_match_adjustPacked */ <*UNUSED); void DebugIntPair( char *title, int r, int s ) { fprintf(stderr, title); fprintf(stderr, " "); fprintf(stderr, Fmt.Int(r)); fprintf(stderr, " "); fprintf(stderr, Fmt.Int(s)); fprintf(stderr, "\n"); } /* DebugIntPair */ char *FI( unsigned x, unsigned w ) { return Fmt.Pad(Fmt.Int(x), w) } /* FI */ #endif /* Copyright © 2001 Universidade Estadual de Campinas (UNICAMP). Authors: Helena C. G. Leitão and Jorge Stolfi. This file can be freely distributed, used, and modified, provided that this copyright and authorship notice is preserved, and that any modified versions are clearly marked as such. This software has NO WARRANTY of correctness or applicability for any purpose. Neither the authors nor their employers chall be held responsible for any losses or damages that may result from its use. */