#! /bin/bash -ue
# Last edited on 2025-05-04 22:50:45 by stolfi

lang="$1"; shift

if [[ "/${lang}" == "/" ]]; then echo "** lang not specified" 1>&2; exit 1; fi

if [[ "${lang}" != "voyn" ]]; then echo "** cannot handle language '${lang}'" 1>&2 ; exit 1; fi

# Top folders for output data and output TeX tables of this note:
gen_lang_dir="gen/${lang}"; mkdir -p ${gen_lang_dir}
tex_lang_dir="tex/${lang}"; mkdir -p ${tex_lang_dir}

# Creates all the data files and tables for all the  books and sctions 
# of language ${lang}.

# Create a table ${unit_to_type_tbl} mapping unit ID to unit type, from
# the table ${unit_tbl} that describes the various units of the VMS:
unit_to_type_tbl="${gen_lang_dir}/unit-to-type.tbl"
echo "creating unit type table ${unit_to_type_tbl} ..." 1>&2
full_unit_table="inp/${lang}/unit-table.txt"
cat ${full_unit_table} \
  | gawk -v FS=":" '/./{print $2, $6}' \
  | egrep -e '^f[0-9]+[a-z][0-9]*[.][A-Za-z][0-9a-z]* ' \
  > ${unit_to_type_tbl}
if [[ ! ( -s ${unit_to_type_tbl} ) ]]; then
  echo "** make_unit_to_type_table.sh failed -- ${unit_to_type_tbl} not created" 1>&2; exit 1
fi
vms_wc.sh ${unit_to_type_tbl}
./show_first_last_lines.sh 5 3 ${unit_to_type_tbl}

books=( tak maj prs lab ini mid fin )
echo "processing books for language ${lang} (${books[@]}) ..." 1>&2
for book in ${books[@]}; do 
  ./make_lang_book_all_data_and_tables.sh ${lang} ${book}
done

echo "counting and listing bad tokens and lexemes in ${lang} per book ..." 1>&2
bad_counts="${gen_lang_dir}/bad_counts.txt"
./count_bad_token_lexemes_per_book_and_type.sh ${lang} ${books[@]} > ${bad_counts}
