#! /bin/bash -ue
# Last edited on 2025-09-24 15:56:39 by stolfi

sampsecs=( \
  voyn/prs/pha,hea,heb,cos,str,zod,bio \
  voyn/prs/pha,hea,heb,cos,str,zod,bio \
  engl/cul/pre,her,rec \
  latn/ptt/gen,exo,lev,num,deu \
  latn/nwt/mat,mrk,luk,joh \
  grek/nwt/mat,mrk,luk,joh \
  span/qvi/one,two \
  russ/ptt/gen,exo,lev,num,deu \
  viet/ptt/gen,exo,lev,num,deu \
  viet/nwt/mat,mrk,luk,jhn \
  chin/ptt/gen,exo,lev,num,deu \
  chin/ptn/gen,exo,lev,num,deu  \
)

for nlexemes in 16 24 40 ; do
  for ss in ${sampsecs[@]} ; do
    echo "${ss}"  1>&2
    sample="${ss%/*}"
    secscm="${ss##*/}"
    mainsecs=( ${secscm//,/ } )
    tfile="${sample}/top-${nlexemes}-words-per-section.tex";
    echo "sample = ${sample}  mainsecs = (${mainsecs[@]})  nlexemes = ${nlexemes}  tex file = ${tfile}" 1>&2
    ./get_top_words.sh ${nlexemes} ${sample} ${mainsecs[@]} \
      | gawk '/./{ print $1, $2, $3, $6; }' \
      | dat/${sample}/reencode_words_for_tex.gawk -v field=4 \
      | ./tex_format_word_freqs.gawk \
          -v showCounts=0 -v showFreqs=1 \
      > dat/${tfile}
    cat dat/${tfile}
    update_paper_include.sh dat/${tfile}  tex/${tfile}
  done
done