#! /usr/bin/gawk -f
# Last edited on 2004-02-17 15:18:46 by stolfi

BEGIN {
  abort = -1;
  usage = ( "tex-format-raw-gud-bad-summary \\\n" \
    "  -v smp=STRING [ -v totSection=TOTSEC ] \\\n" \
    "  < INFILE.wct > OUTFILE.tex" \
  );

  # Outputs a summary of the raw/good/bad token and word counts for 
  # text subset SMP as a TeX macro definition file. Assumes that
  # the input records have 11 fields 
  # 
  #   SEC  RAWTK GUDTK GUDTKPPM BADTK BADTKPPM  RAWWD GUDWD GUDWDPPM BADWD BADWDPPM
  #
  # where SEC is a section tag, RAWTK = GUDTK + BADTK, 
  # GUDTKPPM = 1000*GUDTK/RAWTK,  BADTKPPM = 1000*BADTK/RAWTK,
  # and ditto for words. Uses only the entry with SEC = TOTSEC.

  if (smp == "") { arg_error("must define \"smp\""); }
  if (totSection == "") { totSection = "tot.1" }
  
  split("", tw); tw[0] = "Tks"; tw[1] = "Wds";
  
  printf "% Created by tex-format-raw-gud-bad-summary\n";
  found_tot = 0;
}

(abort >= 0) { exit abort; }

/^ *([#/]|$)/ { next; }

/./ {
  if (NF != 11) { data_error("bad input format"); }
  if ($1 != totSection) { next; }
  found_tot = 1;
  for (k = 0; k < 2; k++)
    { printf "%\n";
      pdef_i(("Raw" tw[k]), $(5*k + 2));
      pdef_i(("Gud" tw[k]), $(5*k + 3));
      pdef_r(("Gud" tw[k] "Pct"), $(5*k + 4)/10);
      pdef_i(("Bad" tw[k]), $(5*k + 5));
      pdef_r(("Bad" tw[k] "Pct"), $(5*k + 6)/10);
    }
}

END {
  if (! found_tot) 
    { data_error(("could not find section \"" totSection "\"")); }
}

function pdef_i(var, val)
{
  pdef(var, val);
}

function pdef_r(var, val)
{
  pdef(var, sprintf("%.1f", val));
}

function pdef(var, val)
{
  printf "\\def\\%s%s{%s}\n", smp, var, val;
}

function arg_error(msg)
{ 
  printf "%s\n", msg >> "/dev/stderr";
  printf "usage: %s\n", usage >> "/dev/stderr";
  abort = 1;
  exit 1;
}

function data_error(msg)
{ 
  printf "line %d: %s\n", FNR, msg > "/dev/stderr";
  abort = 1; exit 1;
}