#! /usr/bin/gawk -f
# Last edited on 2000-12-27 13:19:08 by stolfi

BEGIN{
  abort = -1;
  usage = ( "compute-elem-counts < WORDFILE > FREQFILE " );

  # Input records must be 
  # 
  #   COUNT WORD FWORD
  # 
  # where FWORD is WORD factored into elements by "{}". Empty lines and
  # comments are ignored. Outputs the same lines, with an extra 
  # field -- the number of factors:
  # 
  #   COUNT WORD FWORD LEN
  # 

}

(abort >= 0) { exit abort; }

/^ *([#]|$)/ { next; }

/./ {
  if (NF != 3) { data_error(("wrong number of fields")); }
  ct = $1; w = $2; fw = $3; 
  y = fw; gsub(/}{/, "} {", y);
  nels = split(y, wels);
  print ct, w, fw, nels;
  next;
}

function arg_error(msg)
  {
    printf "%s\n", msg > "/dev/stderr";
    printf "usage: %s\n", usage > "/dev/stderr";
    abort = 1;
    exit abort;
  }

function data_error(msg)
{
  printf "file %s, line %d: %s\n", FILENAME, FNR, msg > "/dev/stderr";
  abort = 1; exit abort;
}