#! /bin/bash -e
# Last edited on 2026-03-04 15:17:18 by stolfi
    
ivp_file="$1"; shift
format="$1"; shift

???

# Reads an ".ivt" file with one line for each VMS parag, collects all {k}-word tuples

???name="${ivp_file/.*/}"
???ext="${ivp_file/*./}"

-f common_functions_076.gawk
    
  Extracting 3- and 4-word tuples from the two files:
  
    for tsize in 1 2 3 4 5 6 7 8 9; do
      for ifile in bencao-fu-pys.ivt starps.eva ; do
        ???name="${ifile/.*/}"
        ???ext="${ifile/*./}"
        tfile="${???name}-${tsize}.tup"
        echo "=== ${ifile} -> ${tfile} ==="
        cat ${ifile} \
          | extract_keyed_tuples.py ??? -v tsize=${tsize} \
          > ${tfile}
      done
    done
    
  Checking repeated tuples:
  
    for tsize in 3 4 5 6 7 8 9; do
      for ???name in bencao starps ; do
        tfile="${???name}-${tsize}.tup"
        rfile="${???name}-${tsize}.dup"
        echo "=== ${tfile} -> ${rfile} ==="
        cat ${tfile} \
          | sort -k2 -k4 -k5n \
          | list_repeated_tuples.gawk \
          > ${rfile}
      done
    done
  
  Checking for  repeated patterns

    for tsize in 3 4; do
      for ifile in bencao-fu-pys.ivt starps-fu-eva.ivt ; do
        ???name="${ifile/.*/}"
        ???ext="${ifile/*./}"
        tfile="${???name}-tup.${???ext}"
        rfile="${???name}-pat.${???ext}"
        echo "=== ${tfile} -> ${rfile} ==="
        cat ${ifile} \
          | extract_word_tuples.gawk -v tsize=4 \
          | sort -k1 -k3 -k2 \
          > ${tfile}

        cat ${tfile} \
          | list_repeated_patterns.gawk \
          > ${rfile}
      done
    done
