#! /bin/bash
# Last edited on 2023-04-22 18:45:33 by stolfi

# Plots the results of a regression such as done by {linear_fit}.
#
# Usage:
#
#   plot_regression_result.sh {SHOW} {PREFIX} "{TITLE}" "{TITLE_DATA}" "{TITLE_FITTED}"
#
# where
#
#   {SHOW}          "SHOW" to display, "NOSHOW" not to.
#   {PREFIX}        is the in/out file name prefix.
#   {TITLE}         is a title string for the whole plot.
#   {TITLE_DATA}    is the name of the independent variable as givem in the data.
#   {TITLE_FITTED}  is the name for the formula fitted to that data.
#
# Reads {PREFIX}.txt, which is supposed contain lines 
# 
#   {ID[i]} {Z[i]} {Y[i]}
#
# where {ID[i]} is an arbitrary string, {Z[i]} is a value of the dependent variable
# from an input data line, and {Y[i]} is the value of the fitted linear regression formula.

show=$1; shift
prefix="$1"; shift
title="$1"; shift
titleData="$1"; shift
titleFitted="$1"; shift

inFile="${prefix}.txt"

if [[ ! ( -e ${inFile}) ]]; then
  echo "** no file \"${inFile}\"" 1>&2 ; exit 1
fi

tmp="/tmp/$$"

tmpDataFile="${tmp}-regr.txt"

function prep_data() {
  step=$1; shift; # Plot only this many records.
  echo "preparing the data, step = ${step}..." 1>&2
  cat ${inFile} \
    | sed -e 's:[#].*$::g' -e '/^ *$/d' \
    | gawk \
        -v step=${step} \
        ' (((FNR - 1) % step) == 0) { print; } ' \
    | sort -b -k2g \
    > ${tmpDataFile}
  nr=`cat ${tmpDataFile} | wc -l`
  echo "plot_regression_result.sh: found ${nr} data lines" 1>&2
}  

prep_data 1

# Decide if needs to sub-sample the data:
if [[ ${nr} -gt 4000 ]]; then
  step=$(( ( ${nr} + 1499 ) / 1500 ))
  echo "subsampling the data, step = ${step}..." 1>&2
  prep_data ${step}
  nr=`cat ${tmpDataFile} | wc -l`
fi  

tmpPngFile="${tmp}.png"

export GDFONTPATH=.

gnuplot <<EOF
set term png size 2800,1500 noenhanced font "arial,20"
set output "${tmpPngFile}"

set key top left

xden = 2000.0/2000.0
yden = 2000.0/1900.0

ptsz = 1.5

set multiplot layout 1,1 title "${title}"
# ----------------------------------------------------------------------
# Pairs
set origin 0.0,(0.500/yden)
set size (0.700/xden),(0.475/yden)
set xlabel "sample index i (sorted)"
set grid xtics lt 1 lw 3 lc rgb '#ffddaa', lt 1 lw 1.5 lc rgb '#ffddaa'
set ylabel "Z[i],Y[i]"
plot "${tmpDataFile}" using 0:2 title "${titleData} Z[i]" with linespoints lt 1 lw 1.5 pt 7 ps (ptsz) lc rgb '#0077ff', \
     ""               using 0:3 title "${titleFitted} Y[i]" with linespoints lt 1 lw 1.0 pt 7 ps (ptsz) lc rgb '#ee2200'
# ----------------------------------------------------------------------
set origin (0.725/xden),(0.500/yden)
set size (0.250/xden),(0.475/yden)
set xlabel "${titleData} Z[i]"
set ylabel "${titleFitted} Y[i]"
plot "${tmpDataFile}" using 2:3 notitle with points  pt 7 ps 2.0 lc rgb '#881188'
# ----------------------------------------------------------------------
# Difference
set origin 0.0,(0.000/yden)
set size (0.700/xden),(0.475/yden)
set xlabel "sample index i (sorted)"
set ylabel "error Y[i]-Z[i]"
plot "${tmpDataFile}" using 0:(column(3)-column(2)) title "error Y[i]-Z[i]" with linespoints lt 1 pt 7 ps (ptsz) lc rgb '#009900'
# ----------------------------------------------------------------------
set origin (0.725/xden),(0.000/yden)
set size (0.250/xden),(0.475/yden)
set xlabel "${titleData} Z[i]"
set ylabel "error Y[i]-Z[i]"
plot "${tmpDataFile}" using 2:(column(3)-column(2)) notitle with points  pt 7 ps (ptsz) lc rgb '#009900'
# ----------------------------------------------------------------------
unset multiplot
quit
EOF

if [[ -s ${tmpPngFile} ]]; then
  pngFile="${prefix}.png"
  convert ${tmpPngFile} -resize '50%' ${pngFile}

  if [[ "/${show}" == "/SHOW" ]]; then
    display ${pngFile}
  fi
fi

rm -fv ${tmp}.*