#! /bin/bash -eu
# Last edited on 2026-03-07 17:34:15 by stolfi

# Reads files "{wpo_file0}" and "{wpo_file1}". Each file must contain
# entries of the form "{LOC} {PSIZE} {WPOS}" where {LOC} is the locus ID
# of a parag, {PSIZE} is a line size (ignored), and {WPOS} is the
# posiiton of some word in line {LOC} of some transcription file, both
# expressed as equivalent hanzi characters.
# 
# Extracts from those files the lines whose {LOC} matches the RE
# patterns {pat_loc0} and {pat_loc1}, respectively.
#
# If a matching {LOC} that has fewer than three lines, skips it.
# Otherwise uses {list_word_delta_pairs.py} to enumerate all triples
# {p1<p2<p3} of positions for the same {LOC}, up to a maximum span
# {p3-p1}, and produce a point {(d12,d23) = (p2-p1,p3-p2)} for each
# triple, scaled by {uscale0} and {uscale1}, depending on the source
# file.
# 
# Then plots those points on a single plot, with colors {colr0} and
# {color1}. Writes that plot to {stdout}.

echo "$0 ..." 1>&2

wpo_file0="$1"; shift # Name of word pos file 0.
uscale0="$1"; shift    # Size of file 0 word pos units in hanzi.
pat_loc0="$1"; shift  # RE pattern selecting loci in file 0.
title0="$1"; shift    # Title for file 0.
color0="$1"; shift    # Color of points in set 0.

wpo_file1="$1"; shift # Name of word pos file 1.
uscale1="$1"; shift    # Size of file 1 word pos units in hanzi.
pat_loc1="$1"; shift  # RE pattern selecting loci in file 1.
title1="$1"; shift    # Title for file 1.
color1="$1"; shift    # Color of points in set 

temp="/tmp/$$"

gdp_script="list_word_delta_pairs.py"

wpo_files=( "${wpo_file0}" "${wpo_file1}" )
uscales=( ${uscale0} ${uscale1} )
pat_locs=( "${pat_loc0}" "${pat_loc1}" )
titles=( "${title0}" "${title1}" )
colors=( "${color0}" "${color1}" )
wpd_files=()
for k in 0 1; do
  wpo_file="${wpo_files[$k]}"
  uscale=${uscales[$k]}
  title="${titles[$k]}"
  color="${colors[$k]}"
  pat_loc="${pat_locs[$k]}"

  echo "  plotting delta pairs from ${wpo_file}" 1>&2
  echo "  for lines lines matching '${pat_loc}'" 1>&2
  echo "  uscale = ${uscale} title = '${title}' color = '${color}'" 1>&2
  echo "" 1>&2

  wpo_sel_file="${temp}-sel-${k}.wpo"
  echo "  extracting word positions for selected loci ..." 1>&2
  gawk "/^ *(${pat_loc})/{ print }" ${wpo_file} > ${wpo_sel_file}
  cat ${wpo_sel_file} \
    | gawk '/^ *[a-z]/{ print $1 }' \
    | sort | uniq -c \
    1>&2
  
  wpd_file="${temp}-${k}.wpd"
  echo "  computing word position delta pairs ..." 1>&2
  cat ${wpo_sel_file} | ${gdp_script} > ${wpd_file}
  cat ${wpd_file} \
    | gawk '/^ *[a-z]/{ print $1 }' \
    | sort | uniq -c \
    1>&2

  wpd_files+=( ${wpd_file} )
done

# big_hsize=1680
# big_vsize=1680
big_hsize=2800
big_vsize=2800

export GDFONTPATH=ttf

temp_plot="${temp}-big.png"
echo "  creating plot ..." 1>&2 
gnuplot <<EOF
set term pngcairo size ${big_hsize},${big_vsize} font "arial,20" noenhanced
set output "${temp_plot}"

uscale0 = ${uscale0}
uscale1 = ${uscale1}

rsz0(k) = column(k)*uscale0
rsz1(k) = column(k)*uscale1

set xrange [-0.05:]
set xtics 10.0
set mxtics 10
set xzeroaxis
set grid xtics
set xlabel "Delta of positions p2-p1"

set yrange [-0.05:]
set ytics 10.0 rotate by 90
set mytics 10
set yzeroaxis
set grid ytics
set ylabel "Delta of positions p3-p2"

title0 = "${title0}"
title1 = "${title1}"

set title (title0 . " -- " . title1)

set errorbars 0.0

# set nokey

plot \
  "${wpd_files[1]}" using (rsz1(2)):(rsz1(3)) notitle with points pt 7 ps  2.0 lw 2 lc rgb '${color1}', \
  "${wpd_files[0]}" using (rsz0(2)):(rsz0(3)) notitle with points pt 6 ps 24.0 lw 2 lc rgb '${color0}', \
  "${wpd_files[1]}" using (rsz1(2)):(rsz1(3)):1 notitle with labels rotate by +30 offset char 1,+1 left tc rgb '${color1}', \
  "${wpd_files[0]}" using (rsz0(2)):(rsz0(3)):1 notitle with labels rotate by -30 offset char 1,-1 left tc rgb '${color0}'

quit
EOF

#  "${wpd_files[0]}" using (rsz0(2)):(rsz0(3)):(3.0):(3.0) notitle with xyerrorbars lw 2 lc rgb '${color0}', \

if [[ -s ${temp_plot} ]]; then
  good_plot="${temp}-sma.png"
  convert ${temp_plot} -resize '50%' ${good_plot}
  cat ${good_plot}
  display -title "${title0} ${title1}" ${good_plot}
  rm ${temp}-*
else
  echo "** ${temp_plot} not generated" 1>&2; exit 1
fi
