#! /usr/bin/python3
# Last edited on 2026-07-03 00:43:57 by stolfi

import os, sys, re
from sys import stderr as err, stdout as out
import write_parsing_funcs as wpf
import bimatching_eval_funcs as bef
from math import sqrt, hypot, log, exp, floor, ceil, inf, nan, isfinite
import standard_bipatterns as stdbip # For tests.
import numpy as np
  
def match_bitemplate(text0, text1, bitemp, eval_one_gap, max_score, debug_level):
  # Matches two given strings {text0,text1} to a bitemplate {bitemp}. 
  # If there is more than one bimatching possible,
  # chooses the one with the lowest badness score. 
  #
  # A /bitemplate/ is a list {bitemp[0..nr]} of /bipatterns/.  
  # Each bipattern is a list of /variants/.  Each variant is a 
  # triple {(pena, pat0, pat1)} where {pena} is a non-negative float,
  # typically in {[0 _ 1]}; and {pat1,pat2} are two RE patterns,
  # neither of which matches the empty string.
  # 
  # Let {nt0} and {nt1} be the lengths of {text0} and {text1}. A
  # /bimatching/ of {text0,text1} to {bitemp} is a list {bimatch} of
  # {nr} /rungs/. A rung {bimatch[kr]} is a quintuple
  # {(i0,j0,i1,j1,bpvar)} where {bpvar} is a variant of the bipattern
  # {bitemp[kr]}, as above; {i0,j0} are indices in {0..nt0} with {i0 <=
  # j0}, and the string {text0[i0:j0]} matches the RE pattern {pat0} in
  # {bpvar}; and ditto for {i1,j1,text1} with {pat1} of {bpvar}. Among
  # successive
  # 
  # The procedure effectively enumerates all the possible bimatchings
  # {bimatch[0..nr-1]} of {text0,text1} against the bitemplate
  # {bitemp[0..nr-1]}. For each bimatching it computes a badness
  # {score}, and selects the bimatching that yielded the lowest {score}.
  #
  # Any score greater than {max_score} will be mapped to {+inf}
  # A low {max-score} can significantly speed up the search.
  #
  # Actually, this version uses a dynamic programming algorithm
  # to find the best match in time {O(nr*nt0^2*nt1^2)} instead of
  # exponential time.
  #
  # The badness {score} of a bimatching is computed by converting it
  # implicitly to a pair of macro-parsings {segs0,segs1} of {text0} and
  # {text1}, and calling {eval_one_gap(len(gap0),len(gap1),ig,ng)} on
  # each pair of corresponding gaps. The {score} is the sum of all the
  # gap scores, plus the penalties of the variants used to match the
  # keywords.
  #
  # Returns the bimatching {opt_bimatch} that yielded the smallest
  # {score}, as well as its score.
  #
  # If there are no possible bimatchings, returns {None,+inf}.

  # debug_all = (text1[:15] == "pokararkeeyokee")
  # debug_all = (text1[:13] == "poarkeeodaiin")

  debug_top = debug_level >= 2
  debug_steps = debug_level >= 4
  debug_plane = debug_level >= 6
  debug_tableau = debug_level >= 8
  debug_outer_loop = debug_level >= 10
  debug_inner_loop = debug_level >= 20
  
  verbose = True

  nt0 = len(text0); nt1 = len(text1)  # Lengths of the texts.
  nr = len(bitemp)                    # Number of bipatterns.
  nh = nr; ng = nh + 1; ns = ng + nh  # Number of hits, gaps, segs.
  assert ns >= 1

  if debug_steps: 
    err.write(f"!~ enter match_bitemplate\n")
    err.write(f"!~ {nr = } {nh = } {ng = } {ns = }\n")
    err.write(f"!~ {nt0 = :3d} {text0 = !r}\n")
    err.write(f"!~ {nt1 = :3d} {text1 = !r}\n")
  
  tot = dict()
  tot['evgap'] = 0  # Number of gap scores computed.
  tot['evhit'] = 0  # Number of attempts to match a hit pair.
  tot['nohit'] = 0  # Number of hit pairs that failed to match.
  tot['finel'] = 0  # Number of finite elements in the tableaus.
  tot['prune'] = 0  # Number of times that a score exceeded {max_score}.

  # Dynamic programming tableaus:
  tb_segs_shape = (ns-1,nt0+1,nt1+1)   # One plane per segment except the tail.
  tb_sc = np.full(tb_segs_shape, +inf)  # Best score at each brakpoint.
  tb_j0 = np.full(tb_segs_shape, -1)    # Position of prev breakpoint in {text0}.
  tb_j1 = np.full(tb_segs_shape, -1)    # Position of prev breakpoint in {text1}.
  tb_nm = np.full(tb_segs_shape, -1)    # Number of distinct matches.
  tb_rungs_shape = (nr,nt0+1,nt1+1)    # One plane per rung.
  tb_iv = np.full(tb_rungs_shape, -1)   # Position of prev breakpoint in {text1}.
  
  # A /breakpoint/ on text0 is an index in {0..nt0} (note: not {0..nt0-1}).
  # 
  # A /biparsing/ of size {ks} is a list of pairs {bm[ts] =
  # (b0[ts],b1[ts])} for {ts} in {0..ks}, such that {b0[0..ks-1]} and
  # {b1[0..ks-1]} are non-decreasing breakpoints on each text.
  # 
  # A biparsing of size {ms} defines a partition of {text0} and {text1}
  # into {ms} segments. For {ks} in {0..ms-1}, segment {segs0[ks]} is
  # the substring of {text0} between breakpoints {b0[ks-1]} and
  # {b0[ks]}; where {b0[-1]} is defined as 0. Ditto for
  # {segs1,b1,text1}. The /partial badness score/ of that biparsing is
  # the sum of the badness scores of those segments. The pair {bm[ms-1]}
  # is said to be the /end/ of the biparsing.
  #
  # If {ms == ns - 1} we extend the above definition to {segs0[ns-1]} by
  # defining {b0[ns-1]} as {nt0}. That is, {segs0[ns-1]}, the /closing
  # segment/, is the tail of {text0} from {b0[ns-2]} to the end of
  # {text0}. The /final score/ is defined as the partial score of the
  # biparsing plus the badness score of the closing segments.
  #
  # If {ks} is even, segments {segs0[ks]} and {segs1[ks]} are called
  # /gaps/. The badness of those two gaps is a function of the
  # discrepancy between the actual length of {segs1[ks]} and the value
  # predicted from the length of {segs0[ks]}.
  # 
  # If {ks} is odd -- that is {ks = 2*kr+1} for some {kr} in {0..nr-1}
  # -- the segments {segs0[ks]} and {segs1[ks]} are called /hits/. Those
  # two strings must match some variant of the bipattern {bitemp[kr]},
  # namely there should some index {iv} such that {bpvar =
  # bitemp[kr][iv] = (pena, pat0, pat1)} such {segs0[ks]} matches {pat0}
  # and {segs1[ks]} matches {pat1}. The badness score of the segments
  # {segs0[ks]} and {segs1[ks]} is then the variant penalty {pena}. (The
  # penalties must be strictly increasing with {iv}, so that the first
  # {iv} that matches on both sides will also be the one with least
  # penalty.)
  #
  # The goal is to fill the arrays {tb_sc,tb_j0,tb_j1} so that, for {ks}
  # in {0..ns-2}, {tb_sc[ks,i0,i1]} is the badness score the best
  # partial biparsing with {ks+1} segments ending at breakpoint {i0} on
  # {text0} and {i1} on {text1}. Then {k0 = tb_j0[ks,i0,i1]} and {k1 =
  # tb_j1[ks,i0,i1]} will the previous breakpoints of that partial
  # biparsing; or {-1} if {ks} is zero.
  #
  # If the segments {segs0[ks],segs1[ks]} are hits -- that is, if {ks}
  # is odd {2*kr+1} -- then {tb_iv[kr,i0,i1]} will store the index {iv}
  # of the variant {bpvar} of {bitemp[kr]} that matched those segments;
  # or {-1} if no variant matched both segments.
  #
  # The value of {tb_nm[ks,i0,i1]} will be the number of distinct
  # biparsings of size {ks+1} that end at {(i0,i1)}; or -1 if that
  # number was not computed.
  
  def gap_score(ks, k0,i0, k1,i1):
    # Badness score of the gap segments {seg00[ks]} and {segs1[ks]},
    # between breakpoints {(k0,k1)} and {(i0,i1)}.
    # Requires {ks} even, {ks == 2*ig}.
    nonlocal tot
    assert 0 <= k0 and k0 <= i0 and i0 <= nt0
    assert 0 <= k1 and k1 <= i1 and i1 <= nt1
    assert ks % 2 == 0
    gsz0 = i0 - k0; gsz1 = i1 - k1;
    ig = ks//2;
    tot['evgap'] += 1
    score_gap = eval_one_gap(gsz0, gsz1, ig, ng)
    if score_gap > max_score: score_gap = +inf
    return score_gap
    # ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    
  def hit_score(ks, k0,i0, k1,i1):
    # Badness score of the hit segments {segs0[ks]} and {segs1[ks]},
    # between breakpoints {(k0,k1)} and {(i0,i1)}.
    # Requires {ks} odd, {ks == 2*ih + 1}.
    # If the segments match the bipattern {bitemp[ih]}, returns 
    # the penalty {pena} of the variant that matched, and its index {iv}.
    # Otherwise returns {+inf,-1}.
    nonlocal tot
    assert 0 <= k0 and k0 <= i0 and i0 <= nt0
    assert 0 <= k0 and k0 <= i0 and i0 <= nt0
    assert 0 <= k1 and k1 <= i1 and i1 <= nt1
    assert ks % 2 == 1
    kr =  ks//2; 
    bipat = bitemp[kr]
    nv = len(bipat)
    pena_prev = -1
    tot['evhit'] += 1
    seg0 = text0[k0:i0]
    seg1 = text1[k1:i1]
    for iv in range(nv):
      pena, pat0, pat1 = bipat[iv]
      assert pena > pena_prev, f"key penalties not increasing {pena_prev} {pena}"
      if re.fullmatch(pat0, seg0):
        if re.fullmatch(pat1, seg1):
          return pena, iv
      pena_prev = pena
    tot['nohit'] += 1
    return +inf, -1
    # :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::  
  
  def print_tableau(ks):
    # Prints plane {ks} of the tableau.
    # Only the score for now.
    nt_max = 30
    if nt0 <= nt_max and nt1 <= nt_max:
      rt0 = nt0; st0 = nt0 + 1
      rt1 = nt1; st1 = nt1 + 1
    else:
      ht = (nt_max - 2)//2;
      rt0 = ht; st0 = nt0 - ht; 
      rt1 = ht; st1 = nt1 - ht
      
    err.write(f"!o tb[{ks},:,:] = \n")
    for i0 in list(range(0,rt0+1)) + list(range(st0,nt0)):
      err.write("!o   ")
      for i1 in list(range(0,rt1+1)) + list(range(st1,nt1)):
        if i0 < nt0 and i1 < nt1:
          if i0 == rt0 or i1 == rt1:
            valx = "  ... "
          else:
            val = tb_sc[ks,i0,i1]
            valx = f"{val:6.3f}" if isfinite(val) else "  *   "
          if i1 > 0: err.write(" ")
          err.write(valx)
      err.write("\n")
    return
    # ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
  
  def extract_bimatch(j0_fin, j1_fin):
    # Recover the optimum bimatch from {tb_sc,tb_j0,tb_j1,tb_iv}
    # Assumes that the last breakpoints before the end {(nt0,nt1)}
    # were {j0_fin,j1_fin}:
    
    bimatch = [ None ] * nr  # Rungs of partial bimatching.
    kr = nr - 1; ks = ns - 1
    i0 = nt0; i1 = nt1
    while kr >= 0:
      # Get rung {kr}.
      # Now {(i0,i1)} is LOW breakpoint of rung {kr+1}.
      
      # Skip the gaps ending at {(i0,i1)}:
      assert ks >= 1
      assert ks % 2 == 0
      ig = ks//2
      assert i0 >= 0 and i1 >= 0
      if kr == nr - 1:
        assert ks == ns - 1
        j0 = j0_fin; j1 = j1_fin
      else:
        assert ks <= ns - 2
        j0 = tb_j0[ks,i0,i1]; j1 = tb_j1[ks,i0,i1]
        assert isfinite(tb_sc[ks,j0,j1])
      assert 0 <= j0 and j0 <= i0 and i0 <= nt0
      assert 0 <= j1 and j1 <= i1 and i1 <= nt1
      if debug_plane: err.write(f"!B skipped gaps {ig} segs {ks} {j0}..{i0} {j1}..{i1}\n")
      
      ks = ks - 1
      
      # Get the hits that end at {(j0,j1)}:
      assert ks >= 0
      assert ks % 2 == 1
      assert isfinite(tb_sc[ks,j0,j1])
      i0 = tb_j0[ks,j0,j1]; i1 = tb_j1[ks,j0,j1]
      assert 0 <= i0 and i0 <= j0 and j0 <= nt0
      assert 0 <= i1 and i1 <= j1 and j1 <= nt1
      # Asemble the rung:
      assert kr >= 0
      iv = tb_iv[kr,j0,j1]
      assert iv >= 0 and iv < len(bitemp[kr])
      bpvar = bitemp[kr][iv]
      bimatch[kr] = (i0,j0, i1,j1, bpvar)
      pena, pat0, pat1 = bpvar
      ih = kr
      if debug_plane: err.write(f"!B took hits {ih} segs {ks} {i0}..{j0} {i1}..{j1} {iv = } {pena = :.8f}\n")
      
      # Move back one more:
      ks = ks - 1
      kr = kr - 1
    assert ks == 0
    assert kr == -1
    return bimatch
    # ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

  def check_bimatch(bimatch):
    
    assert len(bimatch) == nr
    k0 = 0; k1 = 0
    for kr in range(nr): 
      rngk = bimatch[kr]
      if debug_plane: err.write(f"!@ checking rung {kr} = {rngk = !r}\n")
      assert rngk != None
      i0, j0, i1, j1, bpvar = rngk
      assert k0 <= i0 and i0 <= j0 and j0 <= nt0
      assert k1 <= i1 and i1 <= j1 and j1 <= nt1
      pena, pat0, pat1 = bpvar
      assert isinstance(pena, float) and pena >= 0
      assert isinstance(pat0, str)
      assert isinstance(pat1, str)
      assert re.fullmatch(pat0, text0[i0:j0]) != None
      assert re.fullmatch(pat1, text1[i1:j1]) != None
      k0 = j0; k1 = j1
    return
    # ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::~

  # For each bipat {bitemp[kr]}, find all substrings in {text0} and {text1}
  # that match the corresponding side. Namely 
  # {bghits0[kr][j0]} is a list of all indices {i0} such that {text0[i0:j0]} 
  # matches {pat0} for some variant {(pena, pat0, pat1) = bitemp[kr][iv]}
  # for some {iv}.  Similarly for {bghits1[kr][j1]} wrt {text1}.

  def bipat_matches_seg(bipat, side, seg):
    # Returns true iff some varaint of bipat
    # on side {side} (0 or 1) matches {seg}.
    for (pena, pat0, pat1) in bipat:
      pat = (pat0, pat1)[side]
      if re.fullmatch(pat, seg): return True
    return False
    # ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

  bghits0 = [ [ [ ] for j0 in range(nt0+1) ] for kr in range(nr) ] # Starts of possible hits on text0.
  bghits1 = [ [ [ ] for j1 in range(nt1+1) ] for kr in range(nr) ] # Starts of possible hits on text1.
  bghits = ( bghits0, bghits1, )
  for side in 0, 1:
    text = (text0, text1)[side]
    nt = len(text)
    for jt in range(nt+1):
      for it in range(jt+1):
        seg = text[it:jt]
        for kr in range(nr):
          bipat = bitemp[kr]
          if bipat_matches_seg(bipat, side, seg):
            bghits[side][kr][jt].append(it)
    if debug_steps:
      for kr in range(nr):
        nhits = 0; 
        for jt in range(nt+1): nhits += len(bghits[side][kr][jt])
        err.write(f"!~ found {nhits} potential hits of bipattern {kr} on text{side}\n")
  
  def find_best_extension(ks,i0,i1, k0_k1_list):
    # Finds the best biparsing with {ks} segments from {(0,0)} to {(i0,i0)}.
    # Assumes that plane {ks-1} of tables {tb_sc,tb_j0,tb_j1} has been filled.
    #
    # Also, if {ks>0}, assumes that {k0_k1_list} is a list of pairs of
    # indices {(k0,k1)} such that {k0 <= i0 {k1 <= i1} and the segments
    # {k0..i0} and {k1..i1} are worth considering. The list must include
    # at least every pair {(k0,k1)} such that {tb-sc[ks-1,k0,k1]} is
    # finite and the score of those two segments is finite and the sum
    # of those two score less than {max_score}.
    #
    # Returns the score {score_best} of the extended biparsing
    # and the breakpoints {k0_best,k1_best} of that biparsing prior to 
    # {i0,i1}.
    #
    # If the last segments are hits (that is, {ks} is odd),
    # also returns the index {iv_best} of the variant of the bipat that
    # matched those segments, or {-1} if no variant matched.]
    # If the last segments are gaps, returns {None} as {iv_best}.
    #
    # Also returns the number of matchings with finite badness
    # that end at {(i0,i1)}. If {score_best} is {+inf},
    # this number will be zero.
    #
    # If there is no viable extension, returns {+inf} as the 
    # score, {-1} as {k0_best,k1_best}, and {-1} as {iv_best}.
    
    nonlocal tot, bghits0, bghits1

    if debug_outer_loop: err.write(f"!X segment{ks}, trying {len(k0_k1_list)} edges to ({i0},{i1})\n")
    assert ks >= 0 and ks <= ns - 1
    score_best = +inf      
    k0_best = -1; k1_best = -1; 
    iv_best = None if ks % 2 == 0 else -1
    nb = 0
    if ks == 0:
      score_best = gap_score(ks, 0,i0, 0,i1)
      if isfinite(score_best):
        k0_best = 0; k1_best = 0; nb = 1
      else:
        k0_best = -1; k1_best = -1; nb = 0
      iv_best = None
    else:
      nb = 0
      for k0, k1 in k0_k1_list:
        assert k0 <= i0 and k1 <= i1
        score_old = tb_sc[ks-1,k0,k1]
        if debug_inner_loop: err.write(f"!X   {k0 = } {k1 = } {score_old = :.8f}\n")
        if isfinite(score_old):
          nb_old = tb_nm[ks-1,k0,k1]
          assert nb_old >= 1
          # Get the badness {score_seg} of the segs {(k0,k1)} to {(i0,i1)}:
          if ks % 2 == 0:
            score_seg = gap_score(ks, k0,i0, k1,i1)
            iv_seg = None
          else:
            score_seg, iv_seg = hit_score(ks, k0,i0, k1,i1)
            assert isinstance(iv_seg, int)
          if debug_inner_loop: err.write(f"!X   {score_seg = :.8f} {iv_seg = }\n")
          if isfinite(score_seg):
            score_new = score_old + score_seg
            if debug_inner_loop: err.write(f"!X   {score_new = :.8f}\n")
            if score_new > max_score:
              score_new = +inf; 
              tot['prune'] += 1
            else:
              nb += nb_old
              if score_new < score_best:
                score_best = score_new
                k0_best = k0; k1_best = k1; iv_best = iv_seg
    return score_best, k0_best, k1_best, iv_best, nb
    # ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

  # Fill the tableau with partial biparsings:
  finels_old = [ (0,0) ]  # Finite elements on previous plane of the tableau.
  k0_min = 0; k1_min = 0; # Min indices of finite elems in prev plane.
  hard_test = False
  for ks in range(ns-1):
    # Fill {tb_sc[ks,*,*]}:
    if debug_plane: err.write(f"filling plane {ks} ...\n")
    finels_new = [] # Finite elements on plane {ks} of tableau.
    i0_min = nt0+1; i1_min = nt1+1 # First breakpoints for {ks} with finite badness.
    for i0 in range(k0_min,nt0+1):
      for i1 in range(k1_min,nt1+1):
        if debug_outer_loop: err.write(f"!~  filling tb[{ks},{i0},{i1}] ...\n")
        # Decide which elements of the previous plane are worth looking at:
        k0_k1_list = None;
        if hard_test:
          k0_k1_list = [ (k0,k1) for k0 in range(k0_min,i0)  for k1 in range(k1_min, k1) ]
        elif ks % 2 == 0:
          # Gap segment: loop on finite elements of prev tableau plane:
          k0_k1_list = finels_old
        else:
          # Hit segment: loop on segments that matched the pattern
          kr = ks//2;
          k0_k1_list = [ (k0,k1) for k0 in bghits0[kr][i0] for k1 in bghits1[kr][i1] ]
        k0_k1_list = [ (k0,k1) for (k0,k1) in k0_k1_list if k0 <= i0 and k1 <= i1 ]
        score_best, k0_best, k1_best, iv_best, nb = \
          find_best_extension(ks,i0,i1, k0_k1_list)
        if debug_outer_loop:
          err.write(f"!~  {score_best = :.8f} {k0_best = } {k1_best = }")
          err.write(f"  {iv_best = } {nb = }\n")
        if isfinite(score_best):
          assert score_best >= 0
          assert 0 <= k0_best and k0_best <= i0 and i0 <= nt0
          assert 0 <= k1_best and k1_best <= i1 and i1 <= nt1
          assert nb >= 1
          finels_new.append((i0,i1,))
        else:
          assert k0_best == -1 and k1_best == -1
          assert nb == 0
        tb_sc[ks,i0,i1] = score_best
        tb_j0[ks,i0,i1] = k0_best 
        tb_j1[ks,i0,i1] = k1_best
        tb_nm[ks,i0,i1] = nb
        assert isinstance(tb_sc[ks,i0,i1], float)
        if ks % 2 == 1:
          # Hit - save the variant:
          assert isinstance(iv_best, int)
          assert (iv_best != -1) == isfinite(score_best)
          kr = ks//2
          tb_iv[kr,i0,i1] = iv_best
          if debug_outer_loop: err.write(f"!~ saved tb_iv[{kr},{i0},{i1}] = {iv_best}\n")
        else:
          # Gap - variant not applicable:
          assert iv_best == None
        if isfinite(score_best):
          tot['finel'] += 1
          i0_min = min(i0_min, i0)
          i1_min = min(i1_min, i1)
          finels_new.append((i0,i1,))
    # Remember where it is worth starting:
    finels_old = finels_new; k0_min = i0_min; k1_min = i1_min
    if debug_plane: 
      err.write(f"tableau plane {ks} has {len(finels_new)} finite elements.\n")
    if debug_plane: 
      print_tableau(ks)

  # Now complete the biparsing with the closing segments:
  score_fin, k0_fin, k1_fin, iv_fin, nb_fin = find_best_extension(ns-1,nt0,nt1, finels_old)
  assert iv_fin == None # Last segs are gaps.

  if debug_steps:
    err.write(f"!~ FINAL {score_fin = :.8f}\n")

  if isfinite(score_fin):
    bimatch_fin = extract_bimatch(k0_fin, k1_fin)
    check_bimatch(bimatch_fin)
  else:
    assert score_fin == +inf
    bimatch_fin = None

  if verbose:
    err.write(f"compared lengths of {tot['evgap']} gap pairs.\n")
    err.write(f"considered {tot['evhit']} possible hit pairs ({tot['nohit']} failed).\n")
    err.write(f"total {tot['finel']} finite entries in tableau.\n")
    err.write(f"pruned {tot['prune']} tableau entries because of max score.\n")
    err.write(f"potentially {nb_fin} distinct bimatchings.\n")
    err.write(f"final score {score_fin:.8f}.\n")

  if debug_steps: err.write("!~ exit match_bitemplate\n")

  return bimatch_fin, score_fin
  # ----------------------------------------------------------------------

def extract_segments(text0, text1, bimatch):
  # Given two texts {text0,text1} and a bimatching {bimatch[0..nr-1]} 
  # for them, returns the two macro-parsings {segs0,segs1} of those
  # texts implied by the bimatching.
  #
  # The list {segs0} has {ns=2*nr+1} strings. comprising the {nr}
  # substrings of {text0} that are defined by the indices {i0} and {j0}
  # of the rungs (the /hits/) and the substrings before, between,and
  # after those hits (the /gaps/). The list {segs1} has the {ns} gaps
  # and hits of {text1} defined by the indices {i1} and {j1} of the
  # rung.
  #
  # The procedre also returns a {key_penalty} which is the sum of the
  # {pena} components in their {bpvar} fields.
  
  debug = False
  
  nr = len(bimatch)
  nh = nr; ng = nh + 1; ns = ng + nh
  
  textp = ( text0, text1 )
  segs = [ [ ], [ ] ]
  jtp_prev = [ 0, 0 ]  # End or previous hit
  key_penalty = 0
  for ir in range(nr):
    i0, j0, i1, j1, bpvar = bimatch[ir]
    pena, pat0, pat1 = bpvar
    if debug: err.write(f"!E   {pena = :.8f}\n")
    assert isinstance(pena, float) and pena >= 0
    itp = ( i0, i1 ) 
    jtp = ( j0, j1 )
    for side in range(2):
      jt_prev = jtp_prev[side]
      it = itp[side]
      jt = jtp[side]
      tt = textp[side]
      nt = len(tt)
      assert jt_prev <= it <= nt and it <= jt <= nt
      segs[side].append(tt[jt_prev:it]) # Gap
      segs[side].append(tt[it:jt])      # Hit
    key_penalty += pena
    jtp_prev = jtp
  # Last gaps:
  segs[0].append(text0[jtp_prev[0]:])
  segs[1].append(text1[jtp_prev[1]:])
  return segs[0], segs[1], key_penalty
  # ----------------------------------------------------------------------

def get_gap_sizes_from_bimatching(nt0, nt1, bimatch):
  # Similar to {extract_segments(text0, text1, bimatch)} except that it
  # takes the sizes {nt0,nt1} of the two texts, instead of the texts
  # themselves, and returns the sizes
  # {gsizes0[0..ng-1],gsizes1[0..ng-1]} of the gaps, rather than the
  # gap and hit strings themselves.
  #
  # The procedre also returns a {key_penalty} which is the sum of the
  # {pena} components in their {bpvar} fields.
  
  debug = False
  
  nr = len(bimatch)
  nh = nr; ng = nh + 1; ns = ng + nh
  
  jtp_prev = [ 0, 0 ]  # End or previous hit
  key_penalty = 0
  gsizes = [ [ ], [ ] ]
  ntp = ( nt0, nt1 )
  for ir in range(nr):
    i0, j0, i1, j1, bpvar = bimatch[ir]
    pena, pat0, pat1 = bpvar
    if debug: err.write(f"!Z     {i0 = } {j0 = } {i1 = } {j1 = } {pena = :.8f}\n")
    assert isinstance(pena, float) and pena >= 0
    itp = ( i0, i1 ) 
    jtp = ( j0, j1 )
    for side in range(2):
      jt_prev = jtp_prev[side]
      it = itp[side]
      jt = jtp[side]
      nt = ntp[side]
      assert jt_prev <= it <= nt and it <= jt <= nt
      gszi = it - jt_prev
      if debug: err.write(f"!Z       {side = } {gszi = }\n")
      gsizes[side].append(gszi)
    assert isinstance(pena, float) and pena >= 0
    key_penalty += pena
    jtp_prev = jtp
  # Last gaps:
  assert jtp_prev[0] <= nt0; gsizes[0].append(nt0 - jtp_prev[0])
  assert jtp_prev[1] <= nt1; gsizes[1].append(nt1 - jtp_prev[1])
  return gsizes[0], gsizes[1], key_penalty 
  # ----------------------------------------------------------------------

def write_bimatching(wr, lab, bimatch, text0, pad0, text1, pad1):
  # Writes out a bimatch {bimatch}.  
  #
  # The {bimatch} must be a list of rungs {bimatch[0..nr-1]}
  # as defined above.  Uses {text0} and {text1} to extract the 
  # matched strings on the two sides. They are padded with {pad0} and {pad1}
  # so that the columns align properly.
  #
  # Each line of the output is prefixed with the given {lab} string. 

  nr = len(bimatch) # Num of rungs.
  nt0 = len(text0)
  nt1 = len(text1)
  
  # Find the max width of each keyword:
  wd0_max = 0
  wd1_max = 0
  for ir in range(nr):
    i0, j0, i1, j1, bpvar = bimatch[ir]
    pena, pat0, pat1 = bpvar
    assert 0 <= i0 and i0 <= j0 and j0 <= nt0
    assert 0 <= i1 and i1 <= j1 and j1 <= nt1
    assert isinstance(pena, float) and pena >= 0
    assert re.fullmatch(pat0, text0[i0:j0]) != None
    assert re.fullmatch(pat1, text1[i1:j1]) != None
    wd0_max = max(wd0_max, j0 - i0)
    wd1_max = max(wd1_max, j1 - i1)

  # Prints all rungs
  for ir in range(nr):
    wr.write(f"{lab}")
    i0, j0, i1, j1, bpvar = bimatch[ir]
    pena, pat0, pat1 = bpvar
    key0 = text0[i0:j0].ljust(wd0_max, pad0)
    wr.write(f" | {i0:3d} {j0:3d} {key0}")
    key1 = text1[i1:j1].ljust(wd1_max, pad1)
    wr.write(f" | {i1:3d} {j1:3d} {key1}")
    wr.write(f" | ({pat0},{pat1})")
    wr.write("\n")
  return
  # ----------------------------------------------------------------------

def test_match(text0, utype0, text1, utype1, bitemp, pad0):

  err.write("=== testing {match_bitemplate} ===\n")
  
  debug = True
  
  nr = len(bitemp)
  nh = nr; ng = nh + 1
  max_score = 20.0
   
  def eval_one_gap(gsize0, gsize1, ig, ng):
    debug = False
    if debug: err.write("!e {gsize0 = !r} {gsize1 = !r}\n")
    gap_score = bef.compute_single_gap_score(gsize0, utype0, gsize1, utype1, ig, ng)
    if debug: err.write(f"!e {gap_score = :.3f}\n")
    return gap_score
    # ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
     
  # Find {bimatch[0..nr]}, the best matching of {bitemp}:

  err.write("matching ...\n")
  debug_level = 4
  bimatch, score = match_bitemplate \
    ( text0, text1, bitemp, eval_one_gap, max_score, debug_level )
  if bimatch == None:
    assert score == +inf
    assert False, "matching failed!"
  else:
    err.write(f"{score = :.8f}\n")
    assert len(bimatch) == nr
    
    err.write("bimatching:\n")
    pad0 = '　' if utype0 == "ch" else " "
    pad1 = '　' if utype1 == "ch" else " "
    write_bimatching(err, '', bimatch, text0, pad0, text1, pad1)

    err.write("extracting the segments ...\n")
    segs0, segs1, key_penalty = extract_segments(text0, text1, bimatch)
    err.write(f"{key_penalty = :.3f} {score = :.3f}\n")
    mpars = ((segs0, '?', pad0,), (segs1, '?', ' ',),)
    err.write("matching result:\n")
    wpf.write_parsings(err, "", True, mpars)

    # Checking the score computation:
    score_check = bef.compute_full_score_from_macro_parsings \
      ( segs0, utype0, segs1, utype1, key_penalty )
    if abs(score_check - score) > 1.0e-7:
      err.write(f"{score = :.9f} {score_check = :.9f}\n")
      assert False, "score does not check"

  err.write("=================================\n")
  return bimatch, score
  # ----------------------------------------------------------------------

def test_match_0():
  err.write(f"=== test_match_0() ===========\n")
  text0 = "...XX........XaX..."
  text1 = "...ZZ..ZaZ...ZaZ..."

  bpvar_A1 = ( 0.000, 'XX',   'YY',   )
  bpvar_A2 = ( 0.020, 'XX',   'ZZ',   )
  bpvar_A3 = ( 0.040, 'Xa?X', 'Za?Z', )

  bipatA = ( bpvar_A1, bpvar_A2, bpvar_A3 )

  bitemp = ( bipatA, bipatA )
  
  bimatch, score = test_match(text0, "ec", text1, "ec", bitemp, ' ')
  return
  # ----------------------------------------------------------------------

def test_match_1(with_dain):
  err.write("\n\n\n")
  err.write(f"=== test_match_1({with_dain=!r}) ===========\n")

  text0 = \
    "主杀鬼肪主耳聋肠主遗溺肶胵裹黄皮"
  text1 = \
    "daiinchsdqokeeeydaiinokaiinotaiindaiincheolkallkldaindoeeokcheeoltaiinotcheedychoraiino"
  if not with_dain: text1 = re.sub(r"dain", "xxxx", text1)  
  
  bipat_USES = stdbip.get_bencao_starps_bipattern('USES')
  bitemp = ( bipat_USES, bipat_USES, bipat_USES, )
  
  bimatch, score = test_match(text0, "ch", text1, "ec", bitemp, '　')
  err.write(f"{bimatch = !r}\n")
  err.write(f"{score = :.8f}\n")
  
  assert len(bimatch) == 3

  assert bimatch[0][0:4] == (0,1,0,5) # 'daiin'
  if with_dain:
    assert bimatch[1][0:4] == (4,5,22,27) # 'kaiin'
    assert bimatch[2][0:4] == (8,9,49,53) # 'dain'
  else:
    assert bimatch[1][0:4] == (4,5,16,21) # 'daiin'
    assert bimatch[2][0:4] == (8,9,33,38) # 'daiin'
  
  return
  # ----------------------------------------------------------------------
  
def test_match_2():
  err.write("\n\n\n")
  err.write(f"=== test_match_2() ===========\n")
  text0 = \
    "蜂子主治风头除蛊毒补虚羸伤中久服令人光泽好颜色不老大黄蜂子主心腹胀满痛轻" + \
    "身益气土蜂子主治痈肿"
  text1 = \
    "deechdyopchedaiinypchedyodalychedyqopcheokaiinshedypodairochedalloiinchedy" + \
    "qokaiinchdydaiindchdoseedolchdolkchedychokaiinchdyqoeedyokchedydaiiinchedy" + \
    "daiinykeedyokeeedychedolchdaiinykardarycheold" + \
    "chedydkchsaiinchdedyqodaiinokchedaiinchain"
  
  bipat_USES = stdbip.get_bencao_starps_bipattern('USES')
  bipat_QI =   stdbip.get_bencao_starps_bipattern('QI')
  bipat_LONG = stdbip.get_bencao_starps_bipattern('LONG')
  
  bitemp = ( bipat_USES, bipat_LONG, bipat_USES, bipat_QI, bipat_USES, )
  
  bimatch, score = test_match(text0, "ch", text1, "ec", bitemp, '　')
  err.write(f"{bimatch = !r}\n")
  err.write(f"{score = :.8f}\n")
  
  assert len(bimatch) == 5
  
  return
  # ----------------------------------------------------------------------

def test_match_3(with_dain, with_AMENO):
  err.write("\n\n\n")
  err.write(f"=== test_match_3({with_dain=!r},{with_AMENO=!r}) ===========\n")

  # <b1.4.096> (w1a1d0g0) 74 hanzi
  text0 = \
    "丹雄鸡主女人崩中漏下赤白沃补虚温中止血通神杀毒辟不祥头主杀鬼肪主耳聋肠主遗溺肶胵" + \
    "裹黄皮主泄利屎白主消渴伤寒寒热翮羽主下血闭鸡子主除热火疮痫痉可作虎魄"
  assert len(text0) == 74

  # <f105v.32> 371 eva
  text1 = \
    "poarkeeodaiinqoaiinaracpheeyqoeedeodyqokaiinqotedaiinaporaiinapylsheod" + \
    "ytaiinoteeyoteeoolotaiinokeeyqokaiinoraiiinaldalsheeodaiinchsdqokeeeyd" + \
    "aiinokaiinotaiinchedaiinolkallkldaindoeeokcheeoltaiinotcheedychoraiino" + \
    "daiinchedyotaiinalkaishdlaiinsheodokeeodyqoaiinytaiinotaiinchdaldydaii" + \
    "inchdaiinockheyysheyckhysheoqoeeolkaiinchsokoltchdysheeeyokaiinaraildy" + \
    "cheodyoaiiinainokshey"
  assert len(text1) == 371
  
  if not with_dain: text1 = re.sub(r"dain", "xxxx", text1)  

  bipat_USES =  stdbip.get_bencao_starps_bipattern('USES')
  bipat_AMENO = stdbip.get_bencao_starps_bipattern('AMENO')
  
  if with_AMENO:
    bitemp = ( bipat_USES, ) * 7 + ( bipat_AMENO, bipat_USES, )
  else:
    bitemp = ( bipat_USES, ) * 8
  
  bimatch, score = test_match(text0, "ch", text1, "ec", bitemp, '　')
  
  err.write(f"{bimatch = !r}\n")
  err.write(f"{score = :.8f}\n")
  
  assert bimatch[0][0:4] == (  3,   4,   8,  13) # 'daiin'
  assert bimatch[1][0:4] == ( 27,  28, 123, 128) # 'daiin'
  assert bimatch[2][0:4] == ( 31,  32, 139, 144) # 'daiin'
  assert bimatch[3][0:4] == ( 35,  36, 159, 164) # 'daiin'
  assert bimatch[4][0:4] == ( 43,  44, 210, 215) # 'daiin'
  assert bimatch[5][0:4] == ( 48,  49, 234, 239) # 'laiin'
  assert bimatch[6][0:4] == ( 57,  58, 284, 289) # 'daiin'
  if with_AMENO:
    assert bimatch[7][0:4] == ( 59,  61, 295, 304) # 'ysheyckhy'
    assert bimatch[8][0:4] == ( 63,  64, 314, 319) # 'laiin'
  else:
    assert bimatch[7][0:4] == ( 63,  64, 314, 319) # 'laiin'
  
  return
  # ----------------------------------------------------------------------

if len(sys.argv) > 1 and sys.argv[1] == "BTF.TEST":
  test_match_0()
  test_match_3(with_dain = False, with_AMENO = False)
  test_match_3(with_dain = True,  with_AMENO = False)
  test_match_3(with_dain = False, with_AMENO = True )
  test_match_1(with_dain = False)
  test_match_1(with_dain = True)
  test_match_2()