#! /bin/bash -ue
# Last edited on 2025-05-04 22:46:21 by stolfi

cmd="$0"; cmd="${cmd##*/}"
usage="${cmd} {LANG}/{BOOK} [whole|trunc] [raw|gud|bad]"

if [[ $# -ne 3 ]]; then
  echo "usage: ${usage}" 1>&2; exit 1
fi

smpsec="$1"; shift;
sizeopt="$1"; shift;
kind="$1"; shift;

smp="${smpsec%/*}"
lang="${smp%/*}"
book="${smp##*/}"
sec="${smpsec##*/}"

dir="dat/${smpsec}"
txsec="`echo ${sec} | tr '.0123456789' 'PABCDEFGHIJ'`"

echo "% Created `date '+%Y-%m-%d %H:%M:%S'` by ${cmd}"
echo "% Token and lexeme counts for ${smpsec}/${kind}.wfr"
echo "% "

# echo "(${cmd}) dir = ${dir}" 1>&2
ctTot=`cat ${dir}/raw.wfr | gawk '/./{t+=$1;} END{print t;}'`
declare -A ct
ct[Tks]=`cat ${dir}/${kind}.wfr | gawk '/./{t+=$1;} END{print t;}'`
ct[Wds]=`cat ${dir}/${kind}.wfr | wc -l`
for what in Tks Wds ; do
  printf '\\def\\'"${lang}${book}${sizeopt}${txsec}${kind}${what}"'{%d}\n' ${ct[${what}]}
  pct=`gawk -v num="${ct[${what}]}" -v tot="${ctTot}" 'BEGIN{printf "%.1f",100*num/tot}'`
  printf '\\def\\'"${lang}${book}${sizeopt}${txsec}${kind}${what}"'Pct{%s}\n' "${pct}"
done
