#! /n/gnu/bin/gawk -f
# Last edited on 2000-02-04 18:31:13 by stolfi

BEGIN {
  abort = -1;
  usage = ( ARGV[0] " < INFILE.fcm > OUTFILE.grm" );

  # Reads from stdin a bunch of lines of the form
  #
  #   NTSYMB:
  #      COUNT OTHER... DEFN
  #
  # where NTSYMB is a non-terminal symbol, OTHER is zero or more optional 
  # numeric fields (ignored), and DEFN is an alternative to NTSYMB.
  # Converts that to a grammar in ".grm" format.
  #
  # The conversion entails deleting all fields except the COUNT and
  # the DEFN, deleting "-", "=" in DEFN, then replacing "." by " ",
  # inserting NTSYMB in front of each line.  Note that these conventions 
  # preclude using ":", "-", "=" in the language.

  if (ARGC != 1) { error(("usage: " usage)); }
  nsy = "";
}

(abort >= 0) { exit abort; }

/^ *([#]|$)/ { print; next; }

/^[-_:A-Za-z0-9]+ *[:] *$/ {
  nsy = $1; gsub(/[ :]/, "", nsy);
  next;
}

(NF >= 2) {
  frq = $1; def = $(NF);
  for (i=1; i<NF; i++)
    { if (! match($(i), /^[-+]?[0-9]*([0-9][.]?|[.]?[0-9])[0-9]*$/)) 
        { error(("line " FNR ": bad count/prob format \"" $0 "\"")); }
    }
  if (frq < 0)
    { error(("line " FNR ": negative count/prob \"" $0 "\"")); }
  gsub(/[-=]/, "", def);
  gsub(/[.]/, " ", def);
  if (nsy == "") { error(("line " FNR ": missing non-terminal")); }
  printf "%-7s %7s %s\n", nsy, frq, def;
  next;
}

// { error(("line " FNR ": bad format \"" $0 "\"")); }

function error(msg)
{
  printf "%s\n", msg > "/dev/stderr";
  abort = 1; exit 1;
}