#! /bin/bash -ue
# Last edited on 2025-05-04 23:02:40 by stolfi

# Makes tables that cover a whole ${lang} and ${book}, such as token and
# lexeme counts per section. To be called after the per-section files
# for ${lang} and ${book} have been created.

lang="$1"; shift
book="$1"; shift

if [[ "/${lang}" == "/" ]]; then echo "** lang not specified" 1>&2; exit 1; fi
if [[ "/${book}" == "/" ]]; then echo "** book not specified" 1>&2; exit 1; fi

# Top folders for output data and output TeX tables of this note:
gen_book_dir="gen/${lang}/${book}"; mkdir -p ${gen_book_dir} 
tex_book_dir="tex/${lang}/${book}"; mkdir -p ${tex_book_dir}

occ_sec_list="${gen_book_dir}/sections-occ.tags"
echo "getting the list of sections occurring in ${lang}/${book} from ${occ_sec_list} ..." 1>&2
occ_secs=( `cat ${occ_sec_list}` )
echo "    sections that occurred = ${occ_secs[*]}" 1>&2 

use_sec_list="${gen_book_dir}/sections-use.tags"
echo "getting the list of sections to tabulate in ${lang}/${book} from ${use_sec_list} ..." 1>&2
use_secs=( `cat ${use_sec_list}` )
echo "    sections to use = ${use_secs[*]}" 1>&2 

book_rgb_cts_txt="${gen_book_dir}/raw-gud-bad-tw-counts.txt"
echo "making table ${book_rgb_cts_txt} of token and lexeme counts per section ..." 1>&2 
./count_raw_gud_bad_toks_wrds.sh ${gen_book_dir} ${use_secs[@]} / tot.1 \
  > ${book_rgb_cts_txt}
./show_first_last_lines.sh 10 10 ${book_rgb_cts_txt}
 
echo "tabulating per-section file sizes for ${lang}/${book} ..." 1>&2
files_to_tabulate=( \
  raw.evt \
  raw.tlw gud.tlw bad.tlw \
  raw.wfr gud.wfr bad.wfr \
  raw.wdf gud.wdf bad.wdf \
)
for ff in ${files_to_tabulate[@]} ; do 
  echo "tabulating sizes of ${ff} per section for ${lang}/${book} ..." 1>&2
  sec_files=( `echo "${use_secs[*]}" | tr ' ' '\012' | egrep -e '[a-z]' | sed -e 's:$:/'"${ff}:g"` )
  echo "    sec-files = ${sec_files[*]}" 1>&2
  tot_file="tot.1/${ff}"
  ( cd gen/${lang}/${book} && ../../../vms_wc.sh ${sec_files[@]} ${tot_file} )

  # Check whether counts add up:
  printf "\n"
  ( cd gen/${lang}/${book} && cat ${sec_files[@]} /dev/null ) \
    | wc -l \
    | gawk '/./{ t += $1; } END { printf "%-24s %6d lines\n", "total of sections:", t }' 
  ( cd gen/${lang}/${book} && cat ${tot_file} ) \
    | wc -l \
    | gawk '/./{ t += $1; } END{ printf "%-24s %6d lines\n", "section tot.1:",  t }' 
done

# TEX TABLES OF RAW/GOOD/BAD TOKENS/LEXEMES PER SECTION

book_rgb_cts_tex="${tex_book_dir}/raw-gud-bad-tw-counts.tex"
echo "formatting table as TeX file ${book_rgb_cts_tex} ..." 
cat ${book_rgb_cts_txt} \
  | ./tex_format_lang_book_table_raw_gud_bad_tk_wd.gawk \
  > ${book_rgb_cts_tex}

book_rgb_summ_tex="${tex_book_dir}/raw-gud-bad-summary.tex"
echo "creating summary macros as TeX file ${book_rgb_summ_tex} ..." 
cat ${book_rgb_cts_txt} \
  | ./tex_format_book_lang_summary_raw_gud_bad_tk_wd.gawk \
      -v lang="${lang}" -v book="${book}" \
  > ${book_rgb_summ_tex}