#! /bin/bash -eu
# Last edited on 2026-02-18 03:10:20 by stolfi

name0="$1"; shift     # Name of histogram 0.
word0="$1"; shift     # Word searched in file 0.
name1="$1"; shift     # Name of histogram 1.
word1="$1"; shift     # Word searched in file 1.

# Reads files "res/{name0}.woc" and "res/{name1}.woc" for two datasets,
# 0 and 1. Each file must contain entries of the form "{LOC} {PLEN} {WPOS}"
# where {LOC} is the locus ID of a parag, {PLEN} is the length of the parag, 
# and {WPOS} is the position of a word within the parag, both expressed
# as fractions of the average parag length.
# 
# Plots two histograms, 0 and 1 of the {WPOS} field, on the same plot.

tmp="/tmp/$$"

woc_file0="res/${name0}.woc"
woc_file1="res/${name1}.woc"

nwo_file0="${tmp}-${name0}.nwo"
nwo_file1="${tmp}-${name1}.nwo"

echo "=== computing word pos histogram ${woc_file0} -> ${nwo_file0}" 1>&2
make_hist_of_word_positions.sh < "${woc_file0}" > "${nwo_file0}"

echo "=== computing word pos histogram ${woc_file1} -> ${nwo_file1}" 1>&2
make_hist_of_word_positions.sh < "${woc_file1}" > "${nwo_file1}"

names=( ${name0} ${name1} )

dat_files=()
for which in 0 1 ; do
  name="${names[${which}]}"
  nwo_file="${tmp}-${name}.nwo"
  dat_file="${tmp}-${name}.dat"
  echo "=== computing plot coords file ${nwo_file} -> ${dat_file}" 1>&2 
  cat ${nwo_file} \
    | turn_histogram_into_polygonal_line.gawk \
        -v num=2 -v which=${which} \
        -i ./error_funcs.gawk \
    > ${dat_file}
  dat_files+=( ${dat_file} )
done

tmphsize=2200
tmpvsize=800

export GDFONTPATH=ttf

temp_plot="${tmp}-big.png"
echo "=== creating plot of ${dat_files[0]} ${dat_files[1]}" 1>&2 
gnuplot <<EOF
set term png size ${tmphsize},${tmpvsize} font "arial,20" noenhanced
set output "${temp_plot}"

set xrange [-0.05:+2.05]
set yrange [-1.0:]
set xtics 0.1
set mxtics 10
set ytics 20
set mytics 4
set xzeroaxis
set grid ytics
set grid xtics
set xlabel "Position of word in parag (WPOS)"
set ylabel "Number of occurrences"

set title "Word position histograms '${word0}' '${word1}' "

title0 = "${name0}"
title1 = "${name1}"

plot \
  "${dat_files[0]}" using 1:2 title title0 with filledcurves lw 2 lc rgb '#ff2200', \
  "${dat_files[1]}" using 1:2 title title1 with filledcurves lw 2 lc rgb '#000088'

quit
EOF

if [[ -s ${temp_plot} ]]; then
  good_plot="res/${name0}-${name1}-nwo-hist.png"
  convert ${temp_plot} -resize '50%' ${good_plot}
  display ${good_plot}
  rm ${tmp}-*
else
  echo "** ${temp_plot} not generated" 1>&2; exit 1
fi
