#! /usr/bin/python3
# Last edited on 2026-02-27 05:44:53 by stolfi

import sys, os, re;
from sys import stderr as err
from error_funcs import arg_error, prog_error
from process_funcs import bash
from math import fabs

from note_077_funcs import enc_from_unit
import make_rules_077_funcs as mrf

def add_all_ivt_rules(pre, mak, tit):
  # Rules to create the specialized ".ivt" files, namely "in/starps-fu.ivt"
  # and "in/starps-gd.ivt" from the starred parags text of Note/074.

  targets = []
  
  if True:
    # Complete ".ivt" file with all parags:
    target_full = f"starps-fu-eva.ivt"
    source_full = "../074/st_files/str-parags.ivt"

    tit[target_full] = f"copying full SPS IVTFF file {target_full} from {source_full}" 
    pre[target_full] = [ source_full ]
    mak[target_full] = (
        f"cat {source_full} \\",
        f"  | egrep -v -e '^<f105r.10[;>]' \\", 
        f"  > res/{target_full}"
      )
    targets.append(target_full)

  if True:
    # Subset ".ivt" file with good lines only:
    target_good = f"starps-gd-eva.ivt"
    source_good = f"../074/st_files/str-parags.ivt"

    # The filtering script and its imported modules:
    filter_script = "remove_bad_lines_from_starps_ivt.gawk"
    erfn_gawk_lib = "work/error_funcs.gawk"

    tit[target_good] = f"extracting the good SPS source file {target_good} from {source_good}" 
    pre[target_good] = [ source_good, filter_script, erfn_gawk_lib, ]
    mak[target_good] = (
        f"cat {source_good} \\",
        f"  | egrep -v -e '^<f105r.10[;>]' \\", 
        f"  | {filter_script} \\",
        f"      -i {erfn_gawk_lib} \\",
        f"  > res/{target_good}",
      )
    targets.append(target_good)
  
  return targets
  # ----------------------------------------------------------------------
  
def add_all_ivp_rules(pre, mak, tit):
  # Rules to create the ".ivp" files, like ".ivt" but with each parag joined into a 
  # single line, without comments, weirdos, ligatures, etc.

  targets = []
  
  for bsub in ("fu", "gd",):
    target = f"starps-{bsub}-eva.ivp"
    source = f"res/starps-{bsub}-eva.ivt"

    # The join script and its imported modules:
    ivt_to_ivp_script = f"convert_starps_ivt_to_ivp.py"
    prfn_py_lib = "work/process_funcs.py"
    ivtff_py_lib = "work/ivtff_format.py"
    n077_py_lib = "note_077_funcs.py"

    tit[target] = f"creating '{bsub}' SPS line-per-parag file {target} from {source}" 
    pre[target] = [ source, ivt_to_ivp_script, prfn_py_lib, ivtff_py_lib, n077_py_lib ]
    mak[target] = (
        f"./{ivt_to_ivp_script} {source} res/{target}",
      )
    targets.append(target)

  return targets
  # ----------------------------------------------------------------------
  
def add_all_ivf_rules(pre, mak, tit):
  # Rules to create the ".ivf" files, like ".ivp" but with 
  # all the parags on the same page joined into a single 
  # big parag.

  targets = []
  
  # No sense making the "gd" version.
  for bsub in ("fu",):
    target = f"starps-{bsub}-eva.ivf"
    source = f"res/starps-{bsub}-eva.ivp"

    # The join script and its imported modules:
    ivt_to_ivf_script = f"convert_starps_ivp_to_ivf.py"
    prfn_py_lib = "work/process_funcs.py"
    ivtff_py_lib = "work/ivtff_format.py"
    n077_py_lib = "note_077_funcs.py"

    tit[target] = f"creating '{bsub}' SPS line-per-page file {target} from {source}" 
    pre[target] = [ source, ivt_to_ivf_script, prfn_py_lib, ivtff_py_lib, n077_py_lib ]
    mak[target] = (
        f"./{ivt_to_ivf_script} {source} res/{target}",
      )
    targets.append(target)

  return targets
  # ----------------------------------------------------------------------

def add_all_upp_rules(pre, mak, tit):
  # Rules to create the parag size files for two subsets (all parags and only the good ones)
  # and three metrics (words with and without comma spaces, and EVA letters without spaces).

  targets = []
  
  prfn_py_lib = "work/process_funcs.py"
  ivtff_py_lib = "work/ivtff_format.py"
  n077_py_lib = "note_077_funcs.py"

  book = "starps"
  for bsub in ("fu", "gd",):
    for unit in ("wc", "wp", "ec", ):
      sub_name = "full" if bsub == "fu" else "good"
      # Percent probty of ',' being '.':
      pct_comma = None if unit == "ec" else 0 if unit == "wp" else 100
      target = f"starps-{bsub}-{unit}.upp"
      script_options = "{enc} {unit}"
      source = f"res/starps-{bsub}-eva.ivp"
      
      # Script and its imports:
      script = f"count_units_per_parag.py"
      script_options = f"{book} {bsub} {unit}"

      tit[target] = f"making SPS parag size file {target} from {source}"
      pre[target] = [ source, script, prfn_py_lib, n077_py_lib, ivtff_py_lib, ]
      mak[target] = (
          f"{script} {script_options}",
        )
      targets.append(target)
  return targets
  # ----------------------------------------------------------------------

def add_all_single_hist_rules(pre, mak, tit):
  targets = []
  for bsub in ( "fu", "gd", ):
    for unit in ("ec", "wc", "wp",):
      name = f"starps-{bsub}-{unit}"
      bin_size = 5 if unit == "ec" else 1
      color = mrf.starps_hist_color(bsub, unit)
      target = mrf.add_single_size_hist_plot_rules(pre, mak, tit, name, color, bin_size)
      targets.append(target)
  return targets
  # ----------------------------------------------------------------------

def add_all_double_hist_rules(pre, mak, tit):
  targets = []
  for unit0, sub0, unit1, sub1 in ( \
      ("ec", "fu", "ec", "gd", ), # To see full vs good difference in "ec" units.
      ("wc", "fu", "wc", "gd", ), # To see full vs good difference in "wc" units.
      ("wc", "gd", "wp", "gd", ), # To see "wc" vs "wp" difference in good subset.
      ("wc", "gd", "ec", "gd", ), # To see "wc" vs "ec" difference in good subset.
    ):
      name0 = f"starps-{sub0}-{unit0}"
      color0 = mrf.starps_hist_color(sub0, unit0)
      name1 = f"starps-{sub1}-{unit1}"
      color1 = mrf.starps_hist_color(sub1, unit1)
      bin_size = 5 if unit0 == "ec" else 1
      target = mrf.add_double_size_hist_plot_rules(pre, mak, tit, name0, color0, name1, color1, bin_size)
      targets.append(target)
  return targets
  # ----------------------------------------------------------------------

def add_all_word_pos_pos_plot_rules(pre, mak, tit):
  # ----------------------------------------------------------------------
  # pos-pos-plots: res/bencao-fu-zhu3-starps-${SPS_TAG}-wpos.png
  # pos-pos-plots: res/bencao-fu-zhu3-starps-${SPS_TAG}-nwo-hist.png
  #   res/starps-${SPS_TAG}.woc: \
  #              \
  #             res/starps-fu.ivp \
  #             ${MAKEFILE}
  #         ./list_wpositions_in_parags.py voyn-eva '${SPS_WORD}' res/starps-fu.ivp \
  #           > res/starps-${SPS_TAG}.woc
  # 
  #   res/bencao-fu-zhu3.woc: \
  #               list_wpositions_in_parags.py \
  #               in/bencao-fu.chu \
  #               ${MAKEFILE}
  #           ./list_wpositions_in_parags.py chin-chu '主' in/bencao-fu.chu \
  #             > res/bencao-fu-zhu3.woc
  # 
  #   res/bencao-fu-zhu3-starps-${SPS_TAG}-nwo-hist.png: \
  #               ${MAKEFILE} \
  #                \
  #               res/starps-${SPS_TAG}.woc \
  #               res/bencao-fu-zhu3.woc
  #           ./plot_two_word_pos_histograms.sh \
  #             starps-${SPS_TAG} '${SPS_WORD}' \
  #             bencao-fu-zhu3  zhu3
  # 
  #   res/bencao-fu-zhu3-starps-${SPS_TAG}-wpos.png: \
  #               ${MAKEFILE} \
  #               plot_two_word_pos_files.sh \
  #               res/starps-${SPS_TAG}.woc \
  #               res/bencao-fu-zhu3.woc
  #           ./plot_two_word_pos_files.sh \
  #             starps-${SPS_TAG} '${SPS_WORD}' \
  #             bencao-fu-zhu3  zhu3
  # 
  return targets
  # ----------------------------------------------------------------------

def add_all_word_pos_plot_rules(pre, mak, tit):
  # Word position plots.

  book = "starps"
  bsub = "fu"
  iext = "ivf"
  unit = "ec"

  pattags = ( \
      ( r"daiin", "daiin", ), 
      ( r"daiin|laiin", "DAIIN", ),
      ( r"[dlrspf]aiii?n", "XAIIN", ),
    )
  pattags_not = ( \
    )
  cands = ( \
      "f105v.1",   # 4 hits. 
      "f105v.14",  # 4 hits. 
      "f106r.42",  # 4 hits. 
      "f104v.22",  # 6 hits. 
      "f114r.4",   # 6 hits. 
      "f114r.8",   # 6 hits. 
      "f114r.19",  # 6 hits. 
      "f114r.24",  # 6 hits.  
     )
  cands_yes = ( \
      "f112v.11",  # 3 hits. 
      "f105v.20",  # 4 hits. 
      "f105v.32",  # 5 hits.
    )
  cands_later = ( \
      "f104v.1",   # 7 hits. 
   )
  cands_no_match = ( \
      "f105r.42",  # 0 hits.
      "f105v.8",   # 2 hits.
      "f106v.42",  # 2 hits.
      "f115r.13",  # 2 hits.
      "f114r.14",  # 4 hits.
      "f104r.1",   # 3 hits. 
      "f104r.12",  # 3 hits. 
      "f105r.1",   # 3 hits. 
      "f105v.5",   # 3 hits. 
      "f106v.1",   # 3 hits. 
      "f107r.21",  # 3 hits. 
      "f113r.10",  # 3 hits. 
    )
  cands_messy = ( \
      "f105r.17",  # 5 hits. 
      "f108v.20",  # 5 hits. 
      "f108v.23",  # 4 hits. 
      "f108v.23",  # 5 hits.
      "f111r.10",  # 3 hits. 
      "f111r.25",  # 4 hits. 
      "f114r.32",  # 4 hits. 
      "f114v.36",  # 4 hits. 
    )

  slocs = cands + cands_yes
  targets = mrf.add_many_word_pos_plot_rules(pre, mak, tit, book, bsub, iext, unit, pattags, slocs)
  return targets
  # ----------------------------------------------------------------------

def add_wpos_delta_file_rules(pre, mak, tit, bsub, word, tag):
  # Adds rules to create the file "res/starps-{bsub}-ec-{tag}.wpd" with
  # the deltas of positions of word {word} of type "ec" in selected parags.
  # of the "res/starps-{bsub}-eva.ivp" file.
  enc = "eva"
  unit = "ec"
  color = mrf.starps_hist_color(bsub, unit)
  parag_cands = (
    )
  parag_cands_later = (
    )
  parag_cands_match = (
      "f105v.32",  # 5 hits.
    )
  parag_cands_maybe = (
      "f114r.24",  # 6 hits. Maybe b1.4.100. 
      "f114r.19",  # 6 hits. Maybe b3.3.088.
      "f114r.8",   # 6 hits. Maybe b2.1.013, b1.4.100, b2.4.094, b2.5.118, b2.5.518, b3.3.088.
      "f114r.4",   # 6 hits. Maybe b2.5.118*, b2.5.518*, b3.3.080*, b1.4.100, b2.4.094, b3.3.088.
      "f104v.22",  # 6 hits. Maybe b3.3.080*, b1.4.100, b2.1.013, b2.4.094, b2.5.118, b2.5.518. 
      "f106r.42",  # 4 hits. Maybe b2.1.013.
      "f105v.14",  # 4 hits. Maybe b2.4.094.
      "f105v.1",   # 4 hits. Maybe b2.4.094, b2.5.118, b2.5.518, b3.3.088.
      "f112v.11",  # 3 hits. Maybe b2.4.094, b3.3.080. 
      "f105v.20",  # 4 hits. Maybe b2.1.013, b3.3.088.
   )
  parag_cands_not = (
    )
  target = mrf.add_wpos_delta_file_rules \
    (pre, mak, tit, "starps", bsub, unit, word, tag, parag_cands)
  return target, color
  # ----------------------------------------------------------------------
  
def add_all_rules(pre, mak, tit):
  # Adds rules to build all derived files that
  # depend only on the SPS sources.
  #
  # Returns the list of ultimate targets.
  
  targets = \
    add_all_ivt_rules(pre, mak, tit) + \
    add_all_ivp_rules(pre, mak, tit) + \
    add_all_ivf_rules(pre, mak, tit) + \
    add_all_upp_rules(pre, mak, tit) + \
    add_all_single_hist_rules(pre, mak, tit) + \
    add_all_word_pos_plot_rules(pre, mak, tit)

  # + add_all_double_hist_rules(pre, mak, tit)
  return targets
  # ----------------------------------------------------------------------