#! /usr/bin/python3 # Last edited on 2026-04-02 19:14:42 by stolfilocal import os, sys, re from sys import stderr as err, stdout as out from math import sqrt, hypot, log, exp, floor, ceil, inf, nan, isfinite def match_multi_pattern(texts, pats, eval): # Takes a pair of texts {texts[0..1]} and a list # {pats[0..nh-1]} of pairs of patterns. # # Enumerates all the ways to parse each text {texts[it]} into {nh} # substrings {hits[0..nh-1][it]} preceded, separated, and folowed by # {ng=nh+1} substrings {gaps[0..ng-1]}, such that each hit # {hits[ih][it]} is an instance of {pats[ih][it]}, and the # concatenation of all those gaps and hits, alternated, is # {texts[it]}. # # For each such parsing, calls the function {eval(gaps,hits)} # to produce a numeric "badness" score. Returns the parsing # {opt_gaps,opt_hits} with minimum score, and its score {opt_score}. # # If there are no parsings with these properties, returns {None,None,+inf}. nt = ( len(texts[0]), len(texts[1]) ) # Lengths of the texts. nh = len(pats) # Number of patterns, and hits. ng = nh + 1 # Number of gaps before, between, and after them gaps = [ None ] * ng # Elems {gaps[ig][it]} is gap {ig} from {text[it]}. hits = [ None ] * nh # Elems {hits[ih][it]} is hit {ih} from {text[it]}. opt_gaps = None opt_hits = None opt_score = +inf def match_aux(kt0, kt1, ih): # Assumes that {hits[0..ih-1]} and {gaps[0..ih-1]} are defined # for some {ih <= nh} and cover the first {kt0} characters of # {texts[0]} and the first {kt1} characters of {texts[1]}. # Enumerates all complete parsings that # extend that parsing, updaing the optimum. nonlocal opt_gaps, opt_hits, opt_score err.write("! " + ("." * ih)) if ih == nh: err.write(" =\n") gaps[ih] = ( texts[0][kt0:], texts[1][kt1:], ) score = eval(gaps, hits) if score < opt_score: opt_gaps = gaps.copy() opt_hits = hits_copy() opt_score = score # No way to extend: return else: # Still need more hits assert ih < nh err.write(f" < {kt0 = } {kt1 = }\n") def try0(rt0): pat0 = pats[ih][0] tail0 = texts[0][rt0:] # err.write(f" {rt0 = } {tail0 = }\n") m0 = re.match(pat0, tail0) if m0 != None: st0 = m0.end(0) # err.write(f" {st0 = }\n") rt1 = kt1 while rt1 < nt[1]: try1(rt0, st0, rt1); rt1 += 1 return # ........................................ def try1(rt0, st0, rt1): pat1 = pats[ih][1] tail1 = texts[1][rt1:] # err.write(f" {rt1 = } {tail1 = } {pat1 = }\n") m1 = re.match(pat1, texts[1][rt1:]) if m1 != None: st1 = m1.end(0) # err.write(f" {st1 = }\n") gap0 = texts[0][kt0:rt0]; hit0 = texts[0][rt0:st0] gap1 = texts[1][kt1:rt1]; hit1 = texts[1][rt1:st1] gaps[ih] = (gap0,gap1,); hits[ih] = (hit0,hit1,) match_aux(st0,st1,ih+1) gaps[ih] = None; hits[ih] = None return # ........................................ rt0 = kt0 while rt0 < nt[0]: try0(rt0) rt0 += 1 return # :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: err.write(">>\n") match_aux(0,0,0) err.write("<<\n") return opt_gaps, opt_hits, opt_score # ---------------------------------------------------------------------- def test_match(): text0 = "acaBUMxesooninobaPAPAbabGRAmosGRAozoxSEXYzuzuzu" text1 = "<>=PIMBP~:;=PRP=--=PUP::==:=::PHP:==PAPP+++++" texts = (text0, text1 ) pats0 = ( 'BUM|PAPA', 'BUM|PAPA|GRA', 'GRA', 'GRA|SEXY', ) pats1 = ( 'P[A-OQ-Z]*P', 'P[A-OQ-Z]*P', 'P[A-OQ-Z]*P', 'P[A-OQ-Z]*P', ) pats = tuple(zip(pats0, pats1,)) def eval(gaps, hits): ng = len(gaps) nh = len(hits) assert ng == nh + 1 for it in range(2): for ig in range(ng): if ig > 0: err.write(f" [{hits[ig-1][it]}]") err.write(f" {gaps[ig][it]}") err.write("\n") score = 0 for ig in range(ng): sz0 = gaps[ig][0] sz1 = gaps[ig][1] score += (sz0-sz1)**2 err.write(f"{score = }\n") err.write("\n") return score ogaps, ohits, oscore = match_multi_pattern(texts, pats, eval) print((ogaps, ohits, oscore)) return # ---------------------------------------------------------------------- test_match()