#! /bin/bash -eu
# Last edited on 2026-03-06 16:56:39 by stolfi

# Arguments are {ivt_name} {usize} {color} {bin_size}.
# 
# Reads file "res/{ivt_name}.upp", whose {ltype} must be "par". 
# 
# Each line must be "{LOC} {NUNITS}" where {LOC} is the locus ID of a
# parag (like "b.1.2.033" or "f103v.12") and {NUNITS} is an integer
# count of text units (chars, words, etc) in that parag.
# 
# Plots a histogram of the {NUNITS}, scaled by {usize}, with given {color}
# and {bin_size}.

ivt_name="$1"; shift      # Ivt_Name of input data file (sans "out/" or ".upp").
usize="$1"; shift     # Nominal text unit size.
color="$1"; shift     # Bar color, e.g. "#ff0077"
bin_size="$1"; shift  # Width of histogram bins.

echo "=== $0 ===" 1>&2
echo "  ivt_name = '${ivt_name}'  usize = ${usize}  color = '${color}'" 1>&2
echo "  bin_size = ${bin_size}" 1>&2

tmp="/tmp/$$"

# Counts number of units and parags:

upp_file="res/${ivt_name}.upp"
nwh_file="${tmp}-${ivt_name}.nwh"
dat_file="${tmp}-${ivt_name}.dat"

wc -l ${upp_file} 1>&2

echo "computing max unit count in ${upp_file} ..." 1>&2 
U_max=$( cat ${upp_file} | gawk -v max=0 '/^[a-z][0-9]/{ if ($2+0 > max) { max = $2} } END { print max }' )
echo "U_max = ${U_max}" 1>&2

echo "computing histogram ${upp_file} -> ${nwh_file} ..." 1>&2
make_hist_of_units_per_parag.sh ${usize} ${bin_size} < "${upp_file}" > "${nwh_file}"

echo "computing plot coords file ${nwh_file} -> ${dat_file} ..." 1>&2 
cat ${nwh_file} \
  | turn_histogram_into_polygonal_line.gawk \
      -i error_funcs.gawk \
      -v num=1 -v which=0 \
  > ${dat_file}

export GDFONTPATH=ttf

temp_plot="${tmp}-big.png"
echo "=== creating plot of ${dat_file}" 1>&2 
gnuplot <<EOF
big_hsize = 2400
big_vsize = 800
set term pngcairo size (big_hsize),(big_vsize) font "arial,20" noenhanced
set output "${temp_plot}"

ivt_name = "${ivt_name}"
usize = ${usize}
bin_size = ${bin_size}
U_max = ${U_max}

W_max = U_max * usize

if (U_max < 99) {
  set xrange [-1.0:+99.0]
  set xtics 5
  set mxtics 5
} else {
  set xrange [-5.0:(1.1*W_max)]
  set xtics 10
  set mxtics 2
}

set yrange [-1.0:]
set ytics 5
set mytics 5
set xzeroaxis
set grid ytics
set grid xtics
set xlabel "Number of units in paragraph (W)"
set ylabel "Number of paragraphs (P)"

tS = (usize == 1 ? "" : " scaling: W = ${usize}")
tB = (bin_size == 1 ? "" : " bin size = ${bin_size}")

set title (ivt_name . tS  . tB)

set nokey

plot "${dat_file}" using 1:2 notitle with filledcurves lw 2 lc rgb '${color}'

quit
EOF

if [[ -s ${temp_plot} ]]; then
  good_plot="res/${ivt_name}-upp-hist.png"
  convert ${temp_plot} -resize '50%' ${good_plot}
  display ${good_plot}
  rm ${tmp}-*
else
  echo "** ${temp_plot} not generated" 1>&2; exit 1
fi
