#! /bin/bash -eu
# Last edited on 2026-01-17 23:31:24 by stolfi

# Get the N most common roots in each file:
ntop=20
for sec in hea heb; do
  cat st_words/${sec}-parags.rff \
    | egrep -v -e '[?]' \
    | sort -b -k2,2 \
    > .dat-${sec}.rff
done
join -1 2 -2 2 -a 1 -a 2 -e 0 -o 1.1,2.1,0 .dat-{hea,heb}.rff \
  | gawk '//{ printf "%8.6f %s\n", $1 + $2, $3 }' \
  | sort -b -k1,1gr \
  | head -n ${ntop} \
  | gawk '//{ print $2 }' \
  > .top.rts

# Get the words from each section that map to the same root:
mfile=st_words/map-by-root.txt
rm -f ${mfile}; touch ${mfile}
for root in $( cat .top.rts ); do 
  echo "root = ${root}" >> ${mfile}
  echo " " >> ${mfile}
  for sec in hea heb; do 
    cat st_wordsa/${sec}-parags.wff \
      | gawk \
          -i root_from_word_funcs.gawk  \
          -v root="${root}" \
          '//{ wd = $2; rt = root_from_word(wd); if (rt == root) { print } }' \
      | head -n 30 \
      | compute_freqs.gawk \
      | gawk '//{ printf "%6.4f %s\n", $2, $3 }' \
      > .eqv-${sec}.wff
  done
  pr -m -t .eqv-{hea,heb}.wff > .eqv.wffwff
  cat .eqv.wffwff >> ${mfile}
  echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" >> ${mfile}
  echo " " >> ${mfile}
done
cat ${mfile}

