#! /bin/csh -f
set usage = "$0 PATTERN FILE..."
# Each FILE must contain words, one per line
# Enumerates all pairs of consecutive words that match PATTERN
# Lists the 10 most frequent
# Prints the tables for each file, side by side
set pat = "$1"; shift;
if ( $#argv < 1 ) then
echo "usage: $usage"; exit 1
endif
set files = ( $* )
set outs = ( )
set head = ""
set dash = ""
set wd = 0
foreach f ( ${files} )
echo $f
set g = .${f:r}.frq
cat $f \
| egrep -v '^[-/]*$' \
| sed \
-e 's/t/k/g' \
-e 's/[ao]$/y/' \
-e 's/^\([q]*\)y/\1o/' \
| enum-word-pairs \
| egrep "${pat}" \
| sort | uniq -c | expand \
| sort -k1nr \
| head -10 \
> ${g}
set outs = ( $outs ${g} )
@ wd = ${wd} + 30
set head = "${head}`printf ' count %-22s' ${f}`"
set dash = "${dash} ----- ----------------------"
end
echo "${head}"
echo "${dash}"
pr -m -t -i' '1 -w ${wd} $outs
/bin/rm -f $outs