#! /usr/bin/python3 # Last edited on 2026-02-27 05:44:08 by stolfi import sys, os, re; from sys import stderr as err from error_funcs import arg_error, prog_error from process_funcs import bash from math import fabs from note_077_funcs import enc_from_unit import make_rules_077_funcs as mrf def add_all_ivt_ivp_rules(pre, mak, tit): # Rules to create the "bencao-fu-{enc}.ivt" files, and # "bencao-fu-{enc}.ivp" files, which are just links for the files in # the "in" folder. # # The {enc} specifies the encoding: "chu" hanzi chars, "pys" for # pinyin words with one syllable per word. The two should match, # one character for each syllable. # # Beware that both sources have extemporaneous inline comments in ideographic # parentehses. targets = [] for enc in ("chu", "pys"): # The ".ivt" file is the reference file, with annotations and other dirt: source_ext = "uts" if enc == "chu" else "pys" if enc == "pys" else None assert source_ext is not None ivt_target = f"bencao-fu-{enc}.ivt" source_ivt = f"in/bencao.{source_ext}" tit[ivt_target] = f"making link {ivt_target} to {source_ivt}" pre[ivt_target] = [ source_ivt, ] mak[ivt_target] = ( f"( cd res && rm -f {ivt_target} ; ln -s ../{source_ivt} {ivt_target} )", ) targets.append(ivt_target) # The ".ivp" file is cleaned from annotations and other dirt: ivp_target = f"bencao-fu-{enc}.ivp" ivp_source = f"res/{ivt_target}" ivt_to_ivp_script = "convert_bencao_ivt_to_ivp.py" tit[ivp_target] = f"making link {ivp_target} to {ivp_source}" pre[ivp_target] = [ ivp_source, ivt_to_ivp_script, ] mak[ivp_target] = ( f"./{ivt_to_ivp_script} {enc} {ivp_source} res/{ivp_target}", ) targets.append(ivp_target) return targets # ---------------------------------------------------------------------- def add_all_upp_rules(pre, mak, tit): # Rules to create the "bencao-fu-{unit}.upp" files, with the size of # each recipe. The {unit} specifies the unit of size: "ch" for hanzi # chars, "ps" for pinyin words with one syllable per word. The latter # should match the "ch" version if the pinyin source is correct. targets = [] prfn_py_lib = "work/process_funcs.py" chfn_py_lib = "work/chinese_funcs.py" n077_py_lib = "note_077_funcs.py" book = "bencao" bsub = "fu" for unit in ("ch", "ps"): target = f"{book}-{bsub}-{unit}.upp" source_enc = enc_from_unit(unit) source = f"res/{book}-{bsub}-{source_enc}.ivp" # The size counting script and its imported modules: script = f"count_units_per_parag.py" script_options = f"{book} {bsub} {unit}" tit[target] = f"making SBJ parag size file {target} from {source}" pre[target] = [ source, script, prfn_py_lib, n077_py_lib, chfn_py_lib, ] mak[target] = ( f"{script} {script_options}", ) targets.append(target) return targets # ---------------------------------------------------------------------- def add_all_single_hist_rules(pre, mak, tit): targets = [] bsub = "fu" for unit in ( "ch", "ps", ): name = f"bencao-{bsub}-{unit}" color = mrf.bencao_hist_color(bsub, unit) bin_size = 1 target = mrf.add_single_size_hist_plot_rules(pre, mak, tit, name, color, bin_size) targets.append(target) return targets # ---------------------------------------------------------------------- def add_all_double_hist_rules(pre, mak, tit): targets = [] for unit0, unit1 in ( ("ch", "ps"), ): bsub0 = "fu" name0 = f"bencao-{bsub0}-{unit0}"; color0 = mrf.bencao_hist_color(bsub0, unit0) bsub1 = "fu" name1 = f"bencao-{bsub1}-{unit1}" color1 = mrf.bencao_hist_color(bsub1, unit1) bin_size = 1 target = mrf.add_double_size_hist_plot_rules(pre, mak, tit, name0, color0, name1, color1, bin_size) targets.append(target) return targets # ---------------------------------------------------------------------- def add_all_word_pos_plot_rules(pre, mak, tit): # Word position plots for various words and parags. book = "bencao" bsub = "fu" iext = "ivp" unit = "ch" pattags = ( \ ( "主", "zhu3", ), ) pattags_not = ( \ ( "主治", "zhu3zhi4", ), ( "主下", "zhu3xia4", ), ) cands = ( \ "b1.4.100", # 3 hits. "b2.5.118", # 3 hits?. "b2.5.518", # 3 hits?. "b3.3.080", # 3 hits. "b2.1.013", # 3 hits. ) cands_yes = ( \ "b1.4.096", # 7 hits. "b2.4.094", # 3 hits. "b3.3.088", # 3 hits. ) cands_not = ( \ "b1.1.014", # 1 hit. "b1.2.061", # 1 hit. "b1.1.007", # 1 hit. "b1.1.014", # 1 hit. "b1.2.061", # 1 hit. "b1.4.090", # 2 hits. "b1.5.106", # 1 hit. "b1.5.109", # 2 hits. "b2.1.001", # 1 hit. "b2.2.066", # 1 hit. "b2.3.090", # 2 hits. "b3.3.077", # 1 hit. "b3.5.119", # 2 hits. ) slocs = cands + cands_yes targets = mrf.add_many_word_pos_plot_rules(pre, mak, tit, book, bsub, iext, unit, pattags, slocs) return targets # ---------------------------------------------------------------------- def add_wpos_delta_file_rules(pre, mak, tit, word, tag): # Adds rules to create the file "res/bencao-fu-{unit}-{tag}.wpd" with # the deltas of positions of word {word} in selected parags # of the "res/bencao-fu-chu.ivp" file. bsub = "fu" enc = "chu" unit = "ch" color = mrf.bencao_hist_color(bsub, unit) parag_cands = ( "b1.4.100", # 3 hits. "b2.1.013", # 3 hits. "b2.4.094", # 3 hits. "b2.5.118", # 3 hits?. "b2.5.518", # 3 hits?. "b3.3.080", # 3 hits. "b3.3.088", # 3 hits. ) parag_cands_later = ( ) parag_cands_done = ( "b1.4.096", # 7 hits. ) target = mrf.add_wpos_delta_file_rules \ (pre, mak, tit, "bencao", bsub, unit, word, tag, parag_cands) return target, color # ---------------------------------------------------------------------- def add_all_rules(pre, mak, tit): # Adds rules to build the double histograms of SBJ parag sizes # comparing various size meyrics. # # Also adds rules to build the necessary ".upp" files with size of # each parag. # # Returns the list of ultimate targets (the plot files) targets = \ add_all_ivt_ivp_rules(pre, mak, tit) + \ add_all_upp_rules(pre, mak, tit) + \ add_all_single_hist_rules(pre, mak, tit) + \ add_all_word_pos_plot_rules(pre, mak, tit) # + add_all_double_hist_rules(pre, mak, tit) return targets # ----------------------------------------------------------------------