#! /bin/csh -f
# Last edited on 2000-10-11 14:32:15 by stolfi

set usage = "$0 [ -truncate NUM ] [ -maxtrees NUM ]  CLASS SECTION GRAMMAR"

# Compares the probabilities generated by a given prob. GRAMMAR
# against observed word frequencies, and plots the probability
# comparison graph for the two. Then uses the observed frequencies to
# adjust the rule probabilites, producing a new grammar GRAMMAR-a, and
# compares it too against the observed data.
# 
# The parameter "-truncate" is a probability value that can be used to
# prune the enumeration of the grammar (mandatory when the grammar is
# recursive). The parameter "-maxtrees" is passed to the grammar
# adjustment routine.
#
# Input files:
#
#   gram/CLASS/SECTION/GRAMMAR.grx
#     The probabilitic grammar
#
#   prob/obs/SECTION/GRAMMAR.frq
#     The observed word frequencies
#
# Output files:
#
#   gram/CLASS/SECTION/GRAMMAR-a.grx
#     Adjusted grammar.
#
#   prob/gen/CLASS/SECTION/GRAMMAR.prb
#   prob/gen/CLASS/SECTION/GRAMMAR-a.prb
#     Predicted probabilities for the 
#     original and adjusted grammar.
#
#   prob/cmp/CLASS/SECTION/GRAMMAR.pr2
#   prob/cmp/CLASS/SECTION/GRAMMAR.gif
#   prob/cmp/CLASS/SECTION/GRAMMAR-a.pr2
#   prob/cmp/CLASS/SECTION/GRAMMAR-a.gif
#     Probability comparison listings and plots.

set maxtrees = 1
set truncopt = ( )

while ( ( $#argv > 0 ) && ( "/$1" =~ /-* ) )
  if ( ( $#argv >= 2 ) && ( "/$1" == "/-maxtrees" ) ) then
    set maxtrees = "$2" ; shift; shift;
  else if ( ( $#argv >= 2 ) && ( "/$1" == "/-truncate" ) ) then
    set truncopt = ( "-truncate" "$2" ) ; shift; shift;
  else
    echo "unknown option "'"'"$1"'"'
    echo "usage: ${usage}"; exit 1
  endif
end

if ( $#argv < 3) then
  echo "usage: ${usage}"; exit 1
endif

set class = "$1"; shift;
set sec = "$1"; shift;
set old = "$1"; shift;
set parts = ( $* )

set oldgrx = "gram/${class}/${sec}/${old}.grx"
set oldobs = "prob/obs/${sec}/${old}.frq"

check-grammar ${truncopt} ${class} ${sec} ${old}

if ($status != 0) then
  echo "aborted"; exit 1
endif

set new = "${old}-a"

set newgrx = "gram/${class}/${sec}/${new}.grx"
set newobs = "prob/obs/${sec}/${new}.frq"

if ( ! -r ${newobs} ) then
  ( cd ${newobs:h} && ln -s ${old}.frq ${new}.frq )
endif
 
cat ${oldgrx} \
  | parse-and-tally \
      -v wordcounts=${oldobs} \
      -v ignorecounts=1 \
      -v maxtrees=${maxtrees} \
      -v countprec=2 \
  > ${newgrx}

if ($status != 0) then
  echo "aborted"; exit 1
endif

check-grammar ${truncopt} ${class} ${sec} ${new}

if ($status != 0) then
  echo "aborted"; exit 1
endif