#! /bin/bash -eu
# Last edited on 2026-01-17 23:30:08 by stolfi
mkdir -p .errors

tfile="st_words/counts.txt"

# sts=( $( cd st_files/ && ls *.ivt | sed -e 's:[.]ivt::g' | sort -t- -k2,2 -k1,1 ) )
sts=( $( cd st_files/ && ls *.ivt | sed -e 's:[.]ivt::g' ) )
rm -f st_words/*.wff st_words/*.rff

fmt="%-2s %-3s %-8s  %6s %6s %6s  %6s %6s\n"
printf "${fmt}" "#0" "sec" "type" "lines" "JS" "RZ" "words" "roots" > ${tfile}
printf "${fmt}" "#1" "---" "------" "------" "------" "------" "------" "------" >> ${tfile}
for st in ${sts[@]}; do
  # Extract word frequencies "{st}.wff" of section+type {st}:
  cat st_files/${st}.ivt \
    | recode latin-1..utf-8 \
    | ( ivtff_frac_word_counts.py 2>> .errors/wff-${st} ) \
    > st_words/${st}.wff
    
  # Extract root frequencies "{st}.rff" of section+type {st}:
  cat st_files/${st}.ivt \
    | recode latin-1..utf-8 \
    | gawk \
        -i root_from_word_funcs.gawk  \
        '//{ $2 = root_from_word($2); print; }' \
    | ( ivtff_frac_word_counts.py 2>> .errors/rff-${st} ) \
    > st_words/${st}.rff
    
  # Add line to counts table ${tfile}:
  nlin=$( cat st_files/${st}.ivt | wc -l )
  nlin_js=$( cat st_files/${st}.ivt | egrep -e '[;]U[>]' | wc -l )
  nlin_rz=$( cat st_files/${st}.ivt | egrep -e '[;]Z[>]' | wc -l )
  nwds=$( cat st_words/${st}.wff | wc -l )
  nrts=$( cat st_words/${st}.rff | wc -l )
  printf "${fmt}" "|" ${st/-*/} ${st/*-/} ${nlin} ${nlin_js} ${nlin_rz} ${nwds} ${nrts} >> ${tfile}
done
