#! /usr/bin/python3 # Last edited on 2026-02-27 05:44:53 by stolfi import sys, os, re; from sys import stderr as err from error_funcs import arg_error, prog_error from process_funcs import bash from math import fabs from note_077_funcs import enc_from_unit import make_rules_077_funcs as mrf def add_all_ivt_rules(pre, mak, tit): # Rules to create the specialized ".ivt" files, namely "in/starps-fu.ivt" # and "in/starps-gd.ivt" from the starred parags text of Note/074. targets = [] if True: # Complete ".ivt" file with all parags: target_full = f"starps-fu-eva.ivt" source_full = "../074/st_files/str-parags.ivt" tit[target_full] = f"copying full SPS IVTFF file {target_full} from {source_full}" pre[target_full] = [ source_full ] mak[target_full] = ( f"cat {source_full} \\", f" | egrep -v -e '^]' \\", f" > res/{target_full}" ) targets.append(target_full) if True: # Subset ".ivt" file with good lines only: target_good = f"starps-gd-eva.ivt" source_good = f"../074/st_files/str-parags.ivt" # The filtering script and its imported modules: filter_script = "remove_bad_lines_from_starps_ivt.gawk" erfn_gawk_lib = "work/error_funcs.gawk" tit[target_good] = f"extracting the good SPS source file {target_good} from {source_good}" pre[target_good] = [ source_good, filter_script, erfn_gawk_lib, ] mak[target_good] = ( f"cat {source_good} \\", f" | egrep -v -e '^]' \\", f" | {filter_script} \\", f" -i {erfn_gawk_lib} \\", f" > res/{target_good}", ) targets.append(target_good) return targets # ---------------------------------------------------------------------- def add_all_ivp_rules(pre, mak, tit): # Rules to create the ".ivp" files, like ".ivt" but with each parag joined into a # single line, without comments, weirdos, ligatures, etc. targets = [] for bsub in ("fu", "gd",): target = f"starps-{bsub}-eva.ivp" source = f"res/starps-{bsub}-eva.ivt" # The join script and its imported modules: ivt_to_ivp_script = f"convert_starps_ivt_to_ivp.py" prfn_py_lib = "work/process_funcs.py" ivtff_py_lib = "work/ivtff_format.py" n077_py_lib = "note_077_funcs.py" tit[target] = f"creating '{bsub}' SPS line-per-parag file {target} from {source}" pre[target] = [ source, ivt_to_ivp_script, prfn_py_lib, ivtff_py_lib, n077_py_lib ] mak[target] = ( f"./{ivt_to_ivp_script} {source} res/{target}", ) targets.append(target) return targets # ---------------------------------------------------------------------- def add_all_ivf_rules(pre, mak, tit): # Rules to create the ".ivf" files, like ".ivp" but with # all the parags on the same page joined into a single # big parag. targets = [] # No sense making the "gd" version. for bsub in ("fu",): target = f"starps-{bsub}-eva.ivf" source = f"res/starps-{bsub}-eva.ivp" # The join script and its imported modules: ivt_to_ivf_script = f"convert_starps_ivp_to_ivf.py" prfn_py_lib = "work/process_funcs.py" ivtff_py_lib = "work/ivtff_format.py" n077_py_lib = "note_077_funcs.py" tit[target] = f"creating '{bsub}' SPS line-per-page file {target} from {source}" pre[target] = [ source, ivt_to_ivf_script, prfn_py_lib, ivtff_py_lib, n077_py_lib ] mak[target] = ( f"./{ivt_to_ivf_script} {source} res/{target}", ) targets.append(target) return targets # ---------------------------------------------------------------------- def add_all_upp_rules(pre, mak, tit): # Rules to create the parag size files for two subsets (all parags and only the good ones) # and three metrics (words with and without comma spaces, and EVA letters without spaces). targets = [] prfn_py_lib = "work/process_funcs.py" ivtff_py_lib = "work/ivtff_format.py" n077_py_lib = "note_077_funcs.py" book = "starps" for bsub in ("fu", "gd",): for unit in ("wc", "wp", "ec", ): sub_name = "full" if bsub == "fu" else "good" # Percent probty of ',' being '.': pct_comma = None if unit == "ec" else 0 if unit == "wp" else 100 target = f"starps-{bsub}-{unit}.upp" script_options = "{enc} {unit}" source = f"res/starps-{bsub}-eva.ivp" # Script and its imports: script = f"count_units_per_parag.py" script_options = f"{book} {bsub} {unit}" tit[target] = f"making SPS parag size file {target} from {source}" pre[target] = [ source, script, prfn_py_lib, n077_py_lib, ivtff_py_lib, ] mak[target] = ( f"{script} {script_options}", ) targets.append(target) return targets # ---------------------------------------------------------------------- def add_all_single_hist_rules(pre, mak, tit): targets = [] for bsub in ( "fu", "gd", ): for unit in ("ec", "wc", "wp",): name = f"starps-{bsub}-{unit}" bin_size = 5 if unit == "ec" else 1 color = mrf.starps_hist_color(bsub, unit) target = mrf.add_single_size_hist_plot_rules(pre, mak, tit, name, color, bin_size) targets.append(target) return targets # ---------------------------------------------------------------------- def add_all_double_hist_rules(pre, mak, tit): targets = [] for unit0, sub0, unit1, sub1 in ( \ ("ec", "fu", "ec", "gd", ), # To see full vs good difference in "ec" units. ("wc", "fu", "wc", "gd", ), # To see full vs good difference in "wc" units. ("wc", "gd", "wp", "gd", ), # To see "wc" vs "wp" difference in good subset. ("wc", "gd", "ec", "gd", ), # To see "wc" vs "ec" difference in good subset. ): name0 = f"starps-{sub0}-{unit0}" color0 = mrf.starps_hist_color(sub0, unit0) name1 = f"starps-{sub1}-{unit1}" color1 = mrf.starps_hist_color(sub1, unit1) bin_size = 5 if unit0 == "ec" else 1 target = mrf.add_double_size_hist_plot_rules(pre, mak, tit, name0, color0, name1, color1, bin_size) targets.append(target) return targets # ---------------------------------------------------------------------- def add_all_word_pos_pos_plot_rules(pre, mak, tit): # ---------------------------------------------------------------------- # pos-pos-plots: res/bencao-fu-zhu3-starps-${SPS_TAG}-wpos.png # pos-pos-plots: res/bencao-fu-zhu3-starps-${SPS_TAG}-nwo-hist.png # res/starps-${SPS_TAG}.woc: \ # \ # res/starps-fu.ivp \ # ${MAKEFILE} # ./list_wpositions_in_parags.py voyn-eva '${SPS_WORD}' res/starps-fu.ivp \ # > res/starps-${SPS_TAG}.woc # # res/bencao-fu-zhu3.woc: \ # list_wpositions_in_parags.py \ # in/bencao-fu.chu \ # ${MAKEFILE} # ./list_wpositions_in_parags.py chin-chu 'δΈ»' in/bencao-fu.chu \ # > res/bencao-fu-zhu3.woc # # res/bencao-fu-zhu3-starps-${SPS_TAG}-nwo-hist.png: \ # ${MAKEFILE} \ # \ # res/starps-${SPS_TAG}.woc \ # res/bencao-fu-zhu3.woc # ./plot_two_word_pos_histograms.sh \ # starps-${SPS_TAG} '${SPS_WORD}' \ # bencao-fu-zhu3 zhu3 # # res/bencao-fu-zhu3-starps-${SPS_TAG}-wpos.png: \ # ${MAKEFILE} \ # plot_two_word_pos_files.sh \ # res/starps-${SPS_TAG}.woc \ # res/bencao-fu-zhu3.woc # ./plot_two_word_pos_files.sh \ # starps-${SPS_TAG} '${SPS_WORD}' \ # bencao-fu-zhu3 zhu3 # return targets # ---------------------------------------------------------------------- def add_all_word_pos_plot_rules(pre, mak, tit): # Word position plots. book = "starps" bsub = "fu" iext = "ivf" unit = "ec" pattags = ( \ ( r"daiin", "daiin", ), ( r"daiin|laiin", "DAIIN", ), ( r"[dlrspf]aiii?n", "XAIIN", ), ) pattags_not = ( \ ) cands = ( \ "f105v.1", # 4 hits. "f105v.14", # 4 hits. "f106r.42", # 4 hits. "f104v.22", # 6 hits. "f114r.4", # 6 hits. "f114r.8", # 6 hits. "f114r.19", # 6 hits. "f114r.24", # 6 hits. ) cands_yes = ( \ "f112v.11", # 3 hits. "f105v.20", # 4 hits. "f105v.32", # 5 hits. ) cands_later = ( \ "f104v.1", # 7 hits. ) cands_no_match = ( \ "f105r.42", # 0 hits. "f105v.8", # 2 hits. "f106v.42", # 2 hits. "f115r.13", # 2 hits. "f114r.14", # 4 hits. "f104r.1", # 3 hits. "f104r.12", # 3 hits. "f105r.1", # 3 hits. "f105v.5", # 3 hits. "f106v.1", # 3 hits. "f107r.21", # 3 hits. "f113r.10", # 3 hits. ) cands_messy = ( \ "f105r.17", # 5 hits. "f108v.20", # 5 hits. "f108v.23", # 4 hits. "f108v.23", # 5 hits. "f111r.10", # 3 hits. "f111r.25", # 4 hits. "f114r.32", # 4 hits. "f114v.36", # 4 hits. ) slocs = cands + cands_yes targets = mrf.add_many_word_pos_plot_rules(pre, mak, tit, book, bsub, iext, unit, pattags, slocs) return targets # ---------------------------------------------------------------------- def add_wpos_delta_file_rules(pre, mak, tit, bsub, word, tag): # Adds rules to create the file "res/starps-{bsub}-ec-{tag}.wpd" with # the deltas of positions of word {word} of type "ec" in selected parags. # of the "res/starps-{bsub}-eva.ivp" file. enc = "eva" unit = "ec" color = mrf.starps_hist_color(bsub, unit) parag_cands = ( ) parag_cands_later = ( ) parag_cands_match = ( "f105v.32", # 5 hits. ) parag_cands_maybe = ( "f114r.24", # 6 hits. Maybe b1.4.100. "f114r.19", # 6 hits. Maybe b3.3.088. "f114r.8", # 6 hits. Maybe b2.1.013, b1.4.100, b2.4.094, b2.5.118, b2.5.518, b3.3.088. "f114r.4", # 6 hits. Maybe b2.5.118*, b2.5.518*, b3.3.080*, b1.4.100, b2.4.094, b3.3.088. "f104v.22", # 6 hits. Maybe b3.3.080*, b1.4.100, b2.1.013, b2.4.094, b2.5.118, b2.5.518. "f106r.42", # 4 hits. Maybe b2.1.013. "f105v.14", # 4 hits. Maybe b2.4.094. "f105v.1", # 4 hits. Maybe b2.4.094, b2.5.118, b2.5.518, b3.3.088. "f112v.11", # 3 hits. Maybe b2.4.094, b3.3.080. "f105v.20", # 4 hits. Maybe b2.1.013, b3.3.088. ) parag_cands_not = ( ) target = mrf.add_wpos_delta_file_rules \ (pre, mak, tit, "starps", bsub, unit, word, tag, parag_cands) return target, color # ---------------------------------------------------------------------- def add_all_rules(pre, mak, tit): # Adds rules to build all derived files that # depend only on the SPS sources. # # Returns the list of ultimate targets. targets = \ add_all_ivt_rules(pre, mak, tit) + \ add_all_ivp_rules(pre, mak, tit) + \ add_all_ivf_rules(pre, mak, tit) + \ add_all_upp_rules(pre, mak, tit) + \ add_all_single_hist_rules(pre, mak, tit) + \ add_all_word_pos_plot_rules(pre, mak, tit) # + add_all_double_hist_rules(pre, mak, tit) return targets # ----------------------------------------------------------------------