#! /usr/bin/gawk -f
# Last edited on 2002-01-16 13:35:35 by stolfi

BEGIN {
  abort = -1;
  usage = ( ARGV[0] "\\\n" \
    "  [ -v ncols=NUM ] \\\n" \
    "  [ -v showCounts=BOOL ] \\\n" \
    "  [ -v showFreqs=BOOL ] \\\n" \
    "  < INFILE.wct > OUTFILE.tex" \
  );

  # Tabulates the counts and/or frequencies of words, as a LaTeX table.
  # Assumes the input records have fields 
  # 
  #   COUNT FREQ WORD
  # 
  # where WORD is in EVA. The output is formatted as
  # `ncols' columns, filled column-wise.

  if (ncols == "") { ncols = 4; }
  if (showCounts == "") { showCounts = 1; }
  if (showFreqs == "") { showFreqs = 1; }

  split("", wct);
  split("", wfr);
  split("", wrd);
  
  nw = 0;
}

(abort >= 0) { exit abort; }

/^ *([#]|$)/ { next; }

/./ { 
  if (NF != 3) { data_error("bad line format"); }
  ct = $1; fr = $2; w = $3;
  if (match(w, /[^\\][$&% _\^#]/))
    { data_error(("bad word \"" w "\" - has \"" substr(w,RSTART,RLENGTH) "\"")); }
  nw++;
  wct[nw] = ct;
  wfr[nw] = fr;
  wrd[nw] = w;
  next;
}

END { 
  if (abort >= 0) { exit abort; }
  print_word_table();
}

function print_word_table(   i,nrows,col,row,w,ct,fr,xw,xct,xfr)
{
  nrows = int((nw + ncols - 1)/ncols);
  printf "%% Created by %s\n", ARGV[0];
  
  # Table header:
  printf "\\begin{tabular}{|";
  for (col = 0; col < ncols; col++)
    { if (showCounts) { printf "r"; }
      if (showFreqs)  { printf "r"; }
      printf "l|";
    }
  printf "} \\hline\n";
  
  # Table entries:
  for (row = 0; row < nrows; row++)
    { for (col = 0; col < ncols; col++)
        { i = col*nrows + row + 1;
          if (i <= nw)
            { w = wrd[i]; ct = wct[i]; fr = wfr[i];
              xct = ("\\ct{" sprintf("%d", ct) "}"); 
              xfr = ("\\fr{" substr(sprintf("%6.4f", fr),2) "}");
              xw =  ("\\ev{" w "}");
            }
          else
            { w = ""; ct = 0;
              xct = "";
              xfr = "";
              xw =  (i == nw+1 ? "\\dots" : "");
            }
          printf "  ";
          if (showCounts) { printf "%10s & ", xct; }
          if (showFreqs) { printf "%10s & ", xfr; }
          printf "%-10s ", xw;
          if (col == ncols-1) 
            { printf "\\str\\\\\n"; }
          else
            { printf "&\n"; }
        }
    }
  printf "  \\hline\n"; 
  printf "\\end{tabular}%%\n"; 
}

function arg_error(msg)
{ 
  printf "%s\n", NR, msg > "/dev/stderr";
  printf "usage: %s\n", usage > "/dev/stderr";
  abort = 1; exit 1;
}

function data_error(msg)
{ 
  printf "line %d: %s\n", FNR, msg > "/dev/stderr";
  abort = 1; exit 1;
}