#! /bin/csh -f
# Last edited on 2000-10-11 14:32:15 by stolfi
set usage = "$0 [ -truncate NUM ] [ -maxtrees NUM ] CLASS SECTION GRAMMAR"
# Compares the probabilities generated by a given prob. GRAMMAR
# against observed word frequencies, and plots the probability
# comparison graph for the two. Then uses the observed frequencies to
# adjust the rule probabilites, producing a new grammar GRAMMAR-a, and
# compares it too against the observed data.
#
# The parameter "-truncate" is a probability value that can be used to
# prune the enumeration of the grammar (mandatory when the grammar is
# recursive). The parameter "-maxtrees" is passed to the grammar
# adjustment routine.
#
# Input files:
#
# gram/CLASS/SECTION/GRAMMAR.grx
# The probabilitic grammar
#
# prob/obs/SECTION/GRAMMAR.frq
# The observed word frequencies
#
# Output files:
#
# gram/CLASS/SECTION/GRAMMAR-a.grx
# Adjusted grammar.
#
# prob/gen/CLASS/SECTION/GRAMMAR.prb
# prob/gen/CLASS/SECTION/GRAMMAR-a.prb
# Predicted probabilities for the
# original and adjusted grammar.
#
# prob/cmp/CLASS/SECTION/GRAMMAR.pr2
# prob/cmp/CLASS/SECTION/GRAMMAR.gif
# prob/cmp/CLASS/SECTION/GRAMMAR-a.pr2
# prob/cmp/CLASS/SECTION/GRAMMAR-a.gif
# Probability comparison listings and plots.
set maxtrees = 1
set truncopt = ( )
while ( ( $#argv > 0 ) && ( "/$1" =~ /-* ) )
if ( ( $#argv >= 2 ) && ( "/$1" == "/-maxtrees" ) ) then
set maxtrees = "$2" ; shift; shift;
else if ( ( $#argv >= 2 ) && ( "/$1" == "/-truncate" ) ) then
set truncopt = ( "-truncate" "$2" ) ; shift; shift;
else
echo "unknown option "'"'"$1"'"'
echo "usage: ${usage}"; exit 1
endif
end
if ( $#argv < 3) then
echo "usage: ${usage}"; exit 1
endif
set class = "$1"; shift;
set sec = "$1"; shift;
set old = "$1"; shift;
set parts = ( $* )
set oldgrx = "gram/${class}/${sec}/${old}.grx"
set oldobs = "prob/obs/${sec}/${old}.frq"
check-grammar ${truncopt} ${class} ${sec} ${old}
if ($status != 0) then
echo "aborted"; exit 1
endif
set new = "${old}-a"
set newgrx = "gram/${class}/${sec}/${new}.grx"
set newobs = "prob/obs/${sec}/${new}.frq"
if ( ! -r ${newobs} ) then
( cd ${newobs:h} && ln -s ${old}.frq ${new}.frq )
endif
cat ${oldgrx} \
| parse-and-tally \
-v wordcounts=${oldobs} \
-v ignorecounts=1 \
-v maxtrees=${maxtrees} \
-v countprec=2 \
> ${newgrx}
if ($status != 0) then
echo "aborted"; exit 1
endif
check-grammar ${truncopt} ${class} ${sec} ${new}
if ($status != 0) then
echo "aborted"; exit 1
endif