#! /bin/csh -f 
# Last edited on 2000-10-11 18:57:33 by stolfi

set usage = "$0 [ -truncate NUM ]  GRCLASS SECTION GRAMMAR"

# Compares the probabilities generated by a given probabilistic
# GRAMMAR against observed word frequencies, and plots the probability
# comparison graph for the two.
# 
# The parameter "-truncate" is a probability value that can be used to
# prune the enumeration of the grammar (mandatory when the grammar is
# recursive).
#
# Input files:
#
#   gram/GRCLASS/SECTION/GRAMMAR.grx
#     The probabilitic grammar
#
#   prob/obs/SECTION/GRAMMAR.frq
#     The observed word frequencies
#
# Output files:
#
#   prob/gen/GRCLASS/SECTION/GRAMMAR.prb
#     Word probabilities as predicted by the grammar.
#
#   prob/cmp/GRCLASS/SECTION/GRAMMAR.pr2
#   prob/cmp/GRCLASS/SECTION/GRAMMAR.gif
#     Probability comparison listings and plots.

set truncopt = ( )

while ( ( $#argv > 0 ) && ( "/$1" =~ /-* ) )
  if ( ( $#argv >= 2 ) && ( "/$1" == "/-truncate" ) ) then
    set truncopt = ( -v "truncate=$2" ) ; shift; shift;
  else
    echo "unknown option "'"'"$1"'"'
    echo "usage: ${usage}"; exit 1
  endif
end

if ( $#argv != 3) then
  echo "usage: ${usage}"; exit 1
endif

set grclass = "$1"; shift;
set sec = "$1"; shift;
set gram = "$1"; shift;

set grxfile = "gram/${grclass}/${sec}/${gram}.grx"
set obsfile = "prob/obs/${sec}/${gram}.frq"
set genfile = "prob/gen/${grclass}/${sec}/${gram}.prb"
set cmpfile = "prob/cmp/${grclass}/${sec}/${gram}.pr2"

echo "enumerating language of ${grxfile}"

cat ${grxfile} \
  | enum-language ${truncopt} \
  > ${genfile}

if ($status != 0) then
  echo "aborted"; exit 1
endif

compare-probs ${grclass} ${sec} ${gram} ${gram}

dicio-wc ${genfile} ${cmpfile}

if ($status != 0) then
  echo "aborted"; exit 1
endif