#! /bin/csh -f 
# Last edited on 2008-02-04 20:44:39 by stolfi

set usage = "$0 XFILE TFILE"

# Both XFILE and TFILE must have records of the form 
#
#   COUNT SEC PNUM FNUM 
#
# where COUNT is some integer count for a page, SEC is a section
# tag, PNUM is a page's p-number (without the "p"), and FNUM is that
# page's f-number (with the "f"). Assumes the TFILE is at least as
# complete as the XFILE with regard to the set of PNUMs. 
#
# Plots the ratio XFILE.COUNT/TFILE.COUNT as a function of the
# page number PNUM.  
#
# Ignores entries where TFILE.COUNT is less than 40.

if ( $#argv != 2 ) then
  echo "usage: ${usage}"; exit 1
endif

set xfile = "$1"; shift;
set tfile = "$1"; shift;

set jfile = "/tmp/$$.ffr"

join \
    -j1 3 -j2 3 -a 1 -a 2 -e 0 \
    -o1.1,2.1,2.2,0,2.4 \
    ${xfile} ${tfile} \
  | gawk \
      ' \
        ($2 >= 40){ \
          printf "%7d %7d %7.5f %7.5f %s %s %s\n", \
            $1,$2,($1+1)/($2+1),1/($2+1),$3,$4,$5; \
        } \
      ' \
  | sort -b +4 -5 +5 -6 \
  > ${jfile}

dicio-wc ${xfile} ${tfile} ${jfile}

set sections = ( `cat ${jfile} | gawk '/./{print $5;}' | sort | uniq` )

set script = "/tmp/$$.gnuplot"

# Extract section-by-section data and create gnuplot script 

cat <<EOF > ${script}
set term x11
set boxwidth -2
set bar small
set yrange [0:0.5]
plot \
EOF

foreach sec ( ${sections} )
  set sfile = "/tmp/$$-${sec}.ffr"
  cat ${jfile} \
    | gawk -v sec=${sec} '($5==sec){print;}' \
    > ${sfile}
  echo '  "'"${sfile}"'" using 6:3 title "'"${sec}"'" w lp lw 2, \' >> ${script}
end

set efile = "/tmp/$$-ERR.ffr"
cat ${jfile} | sort -b +5 -6 > ${efile}
echo '  "'"${efile}"'" using 6:3:4 title "err" w errorbars lt 8 lw 3' >> ${script}

cat <<EOF >> ${script}
pause 300
quit
EOF

( gnuplot < ${script} ; /bin/rm /tmp/$$* ) &