#! /bin/bash -eu
# Last edited on 2026-03-06 17:19:34 by stolfi

# Arguments are {ivt_name} {usize} {kword} {ktag} {sloc} {color}.
#
# Reads a file with relative positions of a word in a parag or page.
# Plots those positions as dots on a horizontal line.
# 
# The input file name will be "res/{ivt_name}-${ktag}-{ktag}.wpo". The
# {ltype} of {ivt_name} may be "par" or "pag". Each line must have the
# form "{LOC} {NUNITS} {WPOS}" where {LOC} is a locus ID and {NUNITS},
# {WPOS} are fractional counts of text units. {NUNITS} .
#
# Extracts the lines referring to the given {sloc} and makes a ".png"
# plot of the {WPOS} along a horz line. The {WPOS} are scaled by the
# {usize} and shifted so that the average is at abscissa 0.
#
# Writes the plot to {stdout}

ivt_name="$1"; shift   # File name of trn file, sans "res/" and ".ivt".
usize="$1";    shift   # Nominal text unit size in hanzi.
kword="$1";    shift   # Keyword/pattern searched (for title).
ktag="$1";     shift   # Filename-safe tag for {kword}.
sloc="$1";     shift   # Locus ID of parag to plot.
color="$1";    shift   # Color for plot.

wpo_file="res/${ivt_name}-${ktag}.wpo"

echo "plotting ${ktag} word positions in parag {sloc} ..." 1>&2

temp="/tmp/$$"

temp_wpo_file="${temp}-${sloc}.wpo"

echo "  extracting scaled positions in parag {sloc} ..." 1>&2
cat ${wpo_file} \
  | gawk \
      -v sloc="${sloc}" \
      -v usize=${usize} \
      ' /^ *[a-z]/ { if ($1 == sloc) { print sloc, $2*usize, $3*usize; }} ' \
  > ${temp_wpo_file}
# Debugging:
cat ${temp_wpo_file} | sed -e 's:^:!>  :g' 1>&2

echo "  computing scaled parag size ..." 1>&2
min_psize="$( cat ${temp_wpo_file} | gawk 'BEGIN{m=99999} //{nu=$2+0; if(nu<m){m=nu}} END{printf "%6.2f\n",m}' )"
max_psize="$( cat ${temp_wpo_file} | gawk 'BEGIN{m=0} //{nu=$2+0; if(nu>m){m=nu}} END{printf "%6.2f\n",m}' )"
echo "  parag size (scaled) range = [ ${min_psize} _ ${max_psize} ]" 1>&2

if [[ ${min_psize} != ${max_psize} ]]; then echo "** inconsistent parag size **" 1>&2; exit 1; fi

echo "  creating plot file for the parag size line ..."
temp_psz_file="${temp}-${sloc}.psz"
printf "%8.2f", 0 > ${temp_psz_file}
printf "%8.2f", ${max_psize} >> ${temp_psz_file}

echo "  computing average and range of scaled positions ..." 1>&2
min_wpos="$( cat ${temp_wpo_file} | gawk 'BEGIN{m=99999} //{wp=$3+0; if(wp<m){m=wp}} END{printf "%6.2f\n",m}' )"
max_wpos="$( cat ${temp_wpo_file} | gawk 'BEGIN{m=0} //{wp=$3+0; if(wp>m){m=wp}} END{printf "%6.2f\n",m}' )"
avg_wpos="$( cat ${temp_wpo_file} | gawk '// {n+=1; s+=$3} END {printf "%6.2f\n",s/n }' )"
echo "  wpos avg = ${avg_wpos} range = [ ${min_wpos} _ ${max_wpos} ]" 1>&2

temp_hsize=2200
temp_vsize=200

export GDFONTPATH=ttf

temp_plot_file="${temp}-big.png"
echo "=== creating plot of word positions" 1>&2 
gnuplot <<EOF
set term pngcairo size ${temp_hsize},${temp_vsize} font "arial,20" noenhanced
set output "${temp_plot_file}"

avg_wpos = ${avg_wpos}

set xrange [-51.0:+51.0]
set yrange [-0.05:+0.05]
set xtics 5
set mxtics 5
set grid xtics
set noxlabel

unset ytics
unset mytics
unset yzeroaxis
unset ylabel

set nokey
set notitle

zer(k) = 0
dif(k) = column(k) - avg_wpos

plot \
  "${temp_psz_file}" using (dif(3)):(zer(0)) notitle with linespoints pt 2 ps 2.3 lw 2 lc rgb '${color}', \
  "${temp_wpo_file}" using (dif(3)):(zer(0)) notitle with linespoints pt 7 ps 2.3 lw 2 lc rgb '${color}'

quit
EOF

title="${kword} in ${sloc}"

if [[ -s ${temp_plot_file} ]]; then
  good_plot_file="${temp}-sma.png"
  convert ${temp_plot_file} -resize '50%' ${good_plot_file}
  cat ${good_plot_file}
  display -title "${title}" ${good_plot_file}
  rm ${temp}-*
else
  echo "** ${temp_plot_file} not generated" 1>&2; exit 1
fi
