#! /bin/bash # Last edited on 2023-04-22 18:45:33 by stolfi # Plots the results of a regression such as done by {linear_fit}. # # Usage: # # plot_regression_result.sh {SHOW} {PREFIX} "{TITLE}" "{TITLE_DATA}" "{TITLE_FITTED}" # # where # # {SHOW} "SHOW" to display, "NOSHOW" not to. # {PREFIX} is the in/out file name prefix. # {TITLE} is a title string for the whole plot. # {TITLE_DATA} is the name of the independent variable as givem in the data. # {TITLE_FITTED} is the name for the formula fitted to that data. # # Reads {PREFIX}.txt, which is supposed contain lines # # {ID[i]} {Z[i]} {Y[i]} # # where {ID[i]} is an arbitrary string, {Z[i]} is a value of the dependent variable # from an input data line, and {Y[i]} is the value of the fitted linear regression formula. show=$1; shift prefix="$1"; shift title="$1"; shift titleData="$1"; shift titleFitted="$1"; shift inFile="${prefix}.txt" if [[ ! ( -e ${inFile}) ]]; then echo "** no file \"${inFile}\"" 1>&2 ; exit 1 fi tmp="/tmp/$$" tmpDataFile="${tmp}-regr.txt" function prep_data() { step=$1; shift; # Plot only this many records. echo "preparing the data, step = ${step}..." 1>&2 cat ${inFile} \ | sed -e 's:[#].*$::g' -e '/^ *$/d' \ | gawk \ -v step=${step} \ ' (((FNR - 1) % step) == 0) { print; } ' \ | sort -b -k2g \ > ${tmpDataFile} nr=`cat ${tmpDataFile} | wc -l` echo "plot_regression_result.sh: found ${nr} data lines" 1>&2 } prep_data 1 # Decide if needs to sub-sample the data: if [[ ${nr} -gt 4000 ]]; then step=$(( ( ${nr} + 1499 ) / 1500 )) echo "subsampling the data, step = ${step}..." 1>&2 prep_data ${step} nr=`cat ${tmpDataFile} | wc -l` fi tmpPngFile="${tmp}.png" export GDFONTPATH=. gnuplot <