#! /n/gnu/bin/gawk -f # Last edited on 2000-02-04 18:31:13 by stolfi BEGIN { abort = -1; usage = ( ARGV[0] " < INFILE.fcm > OUTFILE.grm" ); # Reads from stdin a bunch of lines of the form # # NTSYMB: # COUNT OTHER... DEFN # # where NTSYMB is a non-terminal symbol, OTHER is zero or more optional # numeric fields (ignored), and DEFN is an alternative to NTSYMB. # Converts that to a grammar in ".grm" format. # # The conversion entails deleting all fields except the COUNT and # the DEFN, deleting "-", "=" in DEFN, then replacing "." by " ", # inserting NTSYMB in front of each line. Note that these conventions # preclude using ":", "-", "=" in the language. if (ARGC != 1) { error(("usage: " usage)); } nsy = ""; } (abort >= 0) { exit abort; } /^ *([#]|$)/ { print; next; } /^[-_:A-Za-z0-9]+ *[:] *$/ { nsy = $1; gsub(/[ :]/, "", nsy); next; } (NF >= 2) { frq = $1; def = $(NF); for (i=1; i<NF; i++) { if (! match($(i), /^[-+]?[0-9]*([0-9][.]?|[.]?[0-9])[0-9]*$/)) { error(("line " FNR ": bad count/prob format \"" $0 "\"")); } } if (frq < 0) { error(("line " FNR ": negative count/prob \"" $0 "\"")); } gsub(/[-=]/, "", def); gsub(/[.]/, " ", def); if (nsy == "") { error(("line " FNR ": missing non-terminal")); } printf "%-7s %7s %s\n", nsy, frq, def; next; } // { error(("line " FNR ": bad format \"" $0 "\"")); } function error(msg) { printf "%s\n", msg > "/dev/stderr"; abort = 1; exit 1; }