#! /usr/bin/gawk -f 
# Last edited on 2004-02-17 15:19:37 by stolfi

BEGIN {
  abort = -1;
  usage = ( "tex-format-raw-gud-bad-counts \\\n" \
    "  < INFILE.wct > OUTFILE.tex" \
  );

  # Tabulates the counts of raw, good, and bad tokens/words by section, as a LaTeX table.
  # Assumes the input records have 11 fields 
  # 
  #   SEC  RAWTK GUDTK GUDTKPPM BADTK BADTKPPM  RAWWD GUDWD GUDWDPPM BADWD BADWDPPM
  #
  # where RAWTK = GUDTK + BADTK, GUDTKPPM = 1000*GUDTK/RAWTK, BADTKPPM = 1000*BADTK/RAWTK,
  # and ditto for words. If SEC = "/" the rest of the line is ignored, and a
  # break is inserted in the table.

  ns = 0; # Number of sections (including "/" s)

  # These arrays are indexed by s = [1..ns] 
  split("", stag);
  split("", traw);
  split("", tgud);
  split("", tbad);
  split("", wraw);
  split("", wgud);
  split("", wbad);
}

(abort >= 0) { exit abort; }

/^ *([#]|$)/ { next; }

/./ {
  i = ns+1;
  stag[i] = $1;
  # Take care to eliminate zero entries, and any double "/"s that may result.
  if (stag[i] == "/") 
    { if ((ns == 0) || (stag[ns] != "/")) { ns++; }
    }
  else
    { if (NF != 11) { data_error(("bad line format = |" $0 "|")); }
      traw[i] = $2+0; tgud[i] = $3+0; tbad[i] = $5+0;
      wraw[i] = $7+0; wgud[i] = $8+0; wbad[i] = $10+0;
      if (traw[i] != tgud[i] + tbad[i]) { data_error(("toks error = |" $0 "|")); }
      if (wraw[i] != wgud[i] + wbad[i]) { data_error(("wrds error = |" $0 "|")); }
      if (traw[i] > 0) { ns++; }
    }
  next;
}

END { 
  if (abort >= 0) { exit abort; }
  # Ignore final "/" if any
  if ((ns > 0) && (stag[ns] == "/")) { ns--; }
  print_table();
}

function print_table(   i,s)
{
  printf "%% Created by %s\n", ARGV[0];

  # Table header:
  printf "\\begin{tabular}{|l|r|rr|rr||r|rr|rr|} \\hline\n";

  printf "        ";
  printf " &";
  printf " \\multicolumn{5}{|c||}{{\\rm Tokens}}";
  printf " & ";
  printf " \\multicolumn{5}{c|}{{\\rm Words}}";
  printf " \\\\ \\hline\n";
  
  printf " {\\rm Sec}";
  for (i = 1; i <= 2; i++)
    { printf " &";
      printf " \\multicolumn{1}{|r|}{{\\rm Total}}";
      printf " & ";
      printf " \\multicolumn{2}{r|}{{\\rm Accepted}}";
      printf " & ";
      printf " \\multicolumn{2}{r|}{{\\rm Discarded}}";
    }
  printf " \\\\ \\hline\n";
  
  # Table entries:
  for (s = 1; s <= ns; s++)
    { if (stag[s] == "/")
        { printf "  \\hline\n"; }
      else
        { printf "  {\\tt %s}", stag[s];
          print_counts(traw[s], tgud[s], tbad[s]);
          print_counts(wraw[s], wgud[s], wbad[s]);
          printf " \\\\\n";
        }
    }
  printf "  \\hline\n"; 
  printf "\\end{tabular}%%\n"; 
}
  
function print_counts(ctraw,ctgud,ctbad,   xctraw,xctgud,xfrgud,xctbad,xfrbad)
{
  xctraw = ("\\ct{" sprintf("%d", ctraw) "}"); 
  xctgud = ("\\ct{" sprintf("%d", ctgud) "}"); 
  xfrgud = ("\\pc{" sprintf("%.1f", 100*ctgud/(ctraw == 0 ? 1 : ctraw)) "}");
  xctbad = ("\\ct{" sprintf("%d", ctbad) "}"); 
  xfrbad = ("\\pc{" sprintf("%.1f", 100*ctbad/(ctraw == 0 ? 1 : ctraw)) "}");
  printf " & %10s", xctraw;
  printf " & %10s", xctgud;
  printf " & %10s", xfrgud;
  printf " & %10s", xctbad;
  printf " & %10s", xfrbad;
}

function arg_error(msg)
{ 
  printf "%s\n", NR, msg > "/dev/stderr";
  printf "usage: %s\n", usage > "/dev/stderr";
  abort = 1; exit 1;
}

function data_error(msg)
{ 
  printf "line %d: %s\n", FNR, msg > "/dev/stderr";
  abort = 1; exit 1;
}