#! /bin/python3 # Last edited on 2026-04-27 18:27:50 by stolfi # Functions for aligning micro-parsed SBJ entries with SPS word text. import sys, os, re from sys import stderr as err from error_funcs import prog_error from math import sqrt, hypot, exp, log, pi, inf, nan, floor, ceil def write_parsings(wr, lab, macro, mpars): # Writes a list of macro- or micro-parsings {mpars[0..nm-1]} as the nm columns # of a table. # # Each {mpars[im]} must be a triple {(bites,unk,pad)} where {bites} is # a list of strings which is the micro-parsing proper, {unk}, is a # string, and {pad} is single-char string. Each micro-parsing {bites} is # extended with its {unk} string as needed so that all of them have # the same number of bites, and the bites of each micro-parsing are # padded on the right with the respective {pad} character so that they # all have the same width. # # Each line of the output is prefixed with # the given {lab} string. If {macro} is true, # even lines are labeled "G" for "gap", and # odd lines are labeld "H" for"it". # # If the {bites} of an {mpars[im]} is {None}, that column is # omitted entirely. nm = len(mpars) # Find the max numbe of bites {nb} and the max width of each column: nb = 0; wd = [ None ] * nm for im in range(nm): assert mpars[im] != None bites, unk, pad = mpars[im] if bites != None: nb = max(nb, len(bites)) wd[im] = max(max_width(bites), len(unk)) # Prints all lists for ib in range(nb): wr.write(f"{lab}") if macro: wr.write(" G" if ib % 2 == 0 else " H") for im in range(nm): bites_im, unk_im, pad_im = mpars[im] if bites_im != None: bite_im_ib = unk_im if ib >= len(bites_im) else bites_im[ib] bite_im_ib = bite_im_ib.ljust(wd[im], pad_im) wr.write(f" | {bite_im_ib}") wr.write("\n") return # ---------------------------------------------------------------------- def write_bilevel_parsings(wr, lab, msegs): # Like {write_parsings}, but each {msegs} is a triple {(segs_bites, unk, par)} where # {segs_bites} is a list of lists of strings. All the {segs_bites} lists mut have # the same odd length {ns}. Each list {segs_bites[ks]} is interpreted # as a micro-parsing and printed as per {write_parsings}, except that the # label {lab} is augmented with " G" or " H" dependin on the parity of {ks} nm = num(msegs) ns = None # Find the max max width of each column and max bite count in each seg: wd = [ None ] * nm nb = [] twd = len(lab) + 1 + 1 for im in range(nm): assert msegs[im] != None segs_bites_im, unk_im, pad_im = msegs[im] if segs_bites_im != None: if ns == None: ns = len(segs_bites_im) else: assert ns == len(segs_bites_im), "inconsistent number of segments" for bites_im_ks in segs_bites_im: nb_im_ks = len(bites_im_ks) while len(nb) < ns: nb.append(0) nb[ns-1] = nb_im_ks wd[im] = max(max_width(bites), len(unk)) twd += 3 + wd[im] # Prints all lists sep = " " + "-" * twd + "\n" for ks in range(ns): stag = "G" if ib % 2 == 0 else "H" w.write(sep) for ib in range(nb[ks]): wr.write(f"{lab} {stag}") for im in range(nm): segs_bites_im, unk_im, pad_im = msegs[im] if segs_bites_im != None: bites_im_ks = segs_bites[ks] assert bites_im_ks != None bite_im_ks_ib = unk_im if ib >= len(bites_im_ks) else bites_im_ks[ib] bite_im_ks_ib = bite_im_ks_ib.ljust(wd[im], pad[im]) wr.write(f" | {bite_im_ks_ib}") wr.write("\n") w.write(sep) return # ---------------------------------------------------------------------- def max_width(strs): # Returns the max length of all the strings in the list {strs} # If {strs} is empty or {None}, returns 0. wmax = 0 if strs != None: for it in strs: wmax = max(wmax, len(it)) return wmax # ----------------------------------------------------------------------