#! /bin/csh -f
# Last edited on 2008-02-04 20:49:39 by stolfi

set usage = "$0 DATANAME PLOTNAME"

# The input file is DATANAME.mct and should countain 6 fields
#
#    CT CP CK CF CZ WORD
# 
# where WORD is a voynichese word containing exactly one "-",
# and CT RT etc. are the counts and frequencies of WORD with the 
# "-" replaced by t,p,k,f, and nothing, respectively. This script 
# plots various pairs of frequencies against each other.
# The plots are written to PLOTNAME-PAIR.gif

if ( $#argv != 2) then
  echo "usage: ${usage}"; exit 1
endif

set dtname = "$1"; shift;
set gfname = "$1"; shift;

set tmp = "/tmp/$$"

set infile = "${dtname}.mct"
set ctfile = "${tmp}.cct"

# Copy input file, inserting the following new fields for each word:
# [1] the total count CT+CP+CK+CF with all gallows, and [2,3] the counts
# CT+CK and CP+CF with t/k and p/f gallows.  Also discard any entries 
# which do not correspond to gallows letters.

# Note that the counts and frequencies of z-words will be distorted.
# C'est la vie...

cat ${infile} \
  | gawk \
      ' /^[^#]/ { \
          s = $1+$2+$3+$4; if (s == 0) { next;} \
          print s, $1+$3, $2+$4, \
            $1, $2, $3, $4, $5, $6; \
        } \
      ' \
  > ${ctfile}

# Compute the total counts for each gallows type.

set tot = ( `cat ${ctfile} | totalize-fields` )
echo "tot = ( ${tot} )" | gawk '//{print > "/dev/stderr";}'

set nwords = `cat ${ctfile} | gawk '//{n++} END{print n}'`
echo "nwords = ${nwords}" | gawk '//{print > "/dev/stderr";}'

set gffiles = ( )

foreach pi ( t.k/4.6  t+k.p+f/2.3  p.f/5.7 t+k.z/2.8 p+f.z/3.8 )
  set tits = ${pi:h}
  set cols = ${pi:t}
  set xtit = ${tits:r}
  if ( "/${xtit}" == "/z" ) set xtit = "nothing"
  set ytit = ${tits:e}
  if ( "/${ytit}" == "/z" ) set ytit = "nothing"
  set xcol = ${cols:r}
  set ycol = ${cols:e}

  set bmfile = "${tmp}-${xtit}:${ytit}.ppm"

  gnuplot -pointsize 2 <<EOF 
  set terminal pbm color medium
  set output "${bmfile}"
  set xlabel "frequency with ${xtit}"
  set ylabel "frequency with ${ytit}"
  set title "usage of ${ytit} versus ${xtit}"
  set logscale xy
  bias = 1
  nw = ${nwords}
  xtot = ${tot[${xcol}]}
  ytot = ${tot[${ycol}]}
  pmin = 0.00002
  pmax = 0.200
  # set key left reverse
  set nokey
  set xrange [pmin:pmax]
  set yrange [pmin:pmax]
  set size 1.0,1.2
  # set size 0.5,0.6
  plot \
    ((x<pmin?0/0:(x>pmax?0/0:x))) title "" with lines lt 2, \
    "${ctfile}" using \
      ((\$${xcol}+bias)/(xtot+nw)):((\$${ycol}+bias)/(ytot+nw)) : \
      (0.4/(xtot+nw)):(0.4/(ytot+nw)) \
      with xyerrorbars lt 3 pt 1
  quit
EOF

  if ( ( -r ${bmfile} ) && ( ! ( -z ${bmfile} ) ) ) then
    set gffile = "${gfname}-${xtit}:${ytit}.gif"
    ppmtogif < ${bmfile} > ${gffile}
    set gffiles = ( ${gffiles} ${gffile} )
  endif
  /bin/rm -f ${bmfile}

end

if ( $#gffiles > 0 ) then
  ( xv ${gffiles} ; /bin/rm -f ${ctfile} ) &
endif