#! /usr/bin/gawk -f
# Last edited on 2002-01-03 22:24:39 by stolfi

BEGIN {
  abort = -1;
  usage = ( ARGV[0] "\\\n" \
    "  { -v elemList='a,o,...' | -v elemTable=FILE } \\\n" \
    "  [ -v ncols=NUM ] \\\n" \
    "  [ -v minFreq=FRQ ] \\\n" \
    "  [ -v freqDigits=NUM ] \\\n" \
    "  [ -v showHeader=BOOL ] \\\n" \
    "  [ -v showClasses=BOOL ] \\\n" \
    "  [ -v showCounts=BOOL ] \\\n" \
    "  [ -v showFreqs=BOOL ] \\\n" \
    "  < INFILE.frq > OUTFILE.tex" \
  );

  # Tabulates given counts and/or frequencies of symbols 
  # and formats the output as a LaTeX table.
  # Assumes the input records have fields
  # 
  #   COUNT FREQ GLYPH 
  # 
  # where GLYPH is an EVA string, already capitalized
  # The output is formatted as `ncols' columns, filled row-wise.
  # 
  # The output entries correspond to the glyphs listed in the
  # "elemList" string or in the "elemTable" file. In these lists,
  #   if GLYPH = "~", the entry is left blank.
  #   if GLYPH = "/", the current row is padded with blanks.
  #   if GLYPH = "-", does the same, then inserts an horizontal line.
  # These special glyphs may occur multiple times.
  # 
  # If showClasses is TRUE, also prints the element's class
  # at the leftmost column.  This option is effective only
  # when the elements are read from a file.
  
  if (ncols == "") { ncols = 2; }
  if (showHeader == "") { showHeader = 0; }
  if (showCounts == "") { showCounts = 1; }
  if (showFreqs == "") { showFreqs = 1; }
  if (showClasses == "") { showClasses = ( elemTable != ""); }
  if (minFreq == "") { minFreq = 0.00005; }
  if (freqDigits == "") { freqDigits = 4; }

  if ((elemList == "") == (elemTable == ""))
    { arg_error("must define exactly one of \"elemList\" and \"elemTable\""); }
  split("", elem);
  split("", eindex);
  split("", eclass);
  if (elemList != "") 
    { nelems = parse_explicit_elems(elemList,elem,eindex,eclass); }
  else
    { nelems = load_elems_from_file(elemTable,elem,eindex,eclass); }

  if (showClasses && (! hasclass))
    { arg_error("there are no classes to show"); }

  # indexed with the capitalized element itself:
  split("", ect);
  split("", efr);
}

(abort >= 0) { exit abort; }

/^ *([#]|$)/ { next; }

/./ { 
  if (NF != 3) { data_error("bad line format"); }
  ct = $1; fr = $2; e = $3;
  nread++;
  if (e !~ /^[A-Za-z?]+$/) { data_error(("bad elem \"" e "\"")); }
  if (e in ect) { data_error(("repeated elem \"" e "\"")); }
  ect[e] = ct;
  efr[e] = fr;
  next;
}

END {
  if (abort >= 0) { exit abort; }
  print_elem_freqs_table();
}

function print_elem_freqs_table(  \
    i,col,row,oldrow,hline,cline,e,ct,fr,cl,oldcl,xe,xct,xfr,xcl \
)
{
  printf "%% Created by %s\n", ARGV[0];
  
  # Table preamble
  output_table_preamble();
  
  # Table header:
  if (showHeader) { output_table_header(); }
  
  # Table entries:
  oldcl = "";
  row = 0;
  col = ncols+1;
  hline = 1;
  cline = 0;
  printf "nelems = %d:", nelems > "/dev/stderr";
  for (i = 1; ((i <= nelems) || (col <= ncols)); i++)
    { # Assert: col > 1.
      e = (i <= nelems ? elem[i] : "~");
      printf " %s", e > "/dev/stderr";
      # Obtain element data:
      if (e ~ /^[-\/]$/)
        { while (col <= ncols) { output_entry(col, "", "", ""); col++; }
          if (e == "-") { cline = ncols; }
        }
      else 
        { if (e == "~")
            { cl = oldcl; ct = 0; fr = 0;
              xe = ""; xcl = ""; xct = ""; xfr = "";
            }
          else
            { cl = eclass[i]; ct = ect[e]; fr = efr[e]; 
              # Format values
              if (showClasses && (cl != oldcl))
                { xcl = ("\\cl{" cl "}");
                  while (col <= ncols) { output_entry(col, "", "", ""); col++; }
                  hline = 1; cline = 0;
                }
              else
                { xcl = ""; }
              xe = format_elem(e);
              xct = format_count(ct); 
              xfr = format_freq(fr);
            }
          
          # Print element entry
          if (col > ncols)
            { end_row(row, hline, cline);
              row++; col = 1; hline = 0; cline = 0;
            }
          # Assert: col <= ncols
          output_entry(col, xe, xct, xfr, xcl); col++;
          oldcl = cl;
        }
    }
  printf "\n" > "/dev/stderr";
  end_row(row, 1, 0);
  printf "\\end{tabular}%%\n"; 
}

function output_table_preamble(   col)
{
  printf "\\begin{tabular}{|";
  if (showClasses) { printf "c|"; }
  for (col = 0; col < ncols; col++)
    { printf "c";
      if (showCounts) { printf "r"; }
      if (showFreqs)  { printf "r"; }
      printf "|";
    }
  printf "}\n";
}

function output_table_header(   col)
{
  # Prints column headers 
  printf "  \\hline\n"; 
  if (showClasses) { printf "  \\hd{class} &"; }
  printf "  \\multicolumn{%d}{l|}{\\hd{glyphs}} \\\\\n", ncols;
}

function format_elem(e)
{
  if (e == "+") 
    { return ("\\tot"); }
  else 
    { return ("\\ev{" e "}"); }
}

function format_count(ct)
{
  if (ct + 0 == 0) 
    { return "\\zeroct"; }
  else
    { return ("\\ct{" sprintf("%d", ct) "}"); }
}

function format_freq(fr)
{ 
  if (fr + 0 < minFreq) 
    { return "\\zerofr"; }
  else
    { fr = sprintf("%*.*f", freqDigits+2, freqDigits, fr);
      if (fr >= 1.0) 
        { fr = substr(fr, 1, freqDigits+1); }
      else
        { fr = substr(fr,2, freqDigits+1); }
      return ("\\fr{" fr "}");
    }
}

function end_row(row,hline,cline,   fcol)
{
  if (row > 0) { printf "\\str\\\\\n"; }
  if (hline) 
    { printf "  \\hline\n"; }
  else if (cline > 0)
    { fcol = 1 + (showClasses ? 1 : 0 );
      printf "  \\cline{%d-%d}\n", fcol, fcol + cline - 1;
    }
}

function output_entry(col,xe,xct,xfr,xcl)
{
  if (col != 1) { printf "&\n"; }
  printf "  "; 
  if ((col == 1) && showClasses) { printf "%10s &\n  ", xcl; }
  printf "%-10s ", xe;
  if (showCounts) { printf "& %10s ", xct; }
  if (showFreqs) { printf "& %10s ", xfr; }
}

function arg_error(msg)
{ 
  printf "%s\n", msg > "/dev/stderr";
  printf "usage: %s\n", usage > "/dev/stderr";
  abort = 1; exit 1;
}

function data_error(msg)
{ 
  printf "line %d: %s\n", FNR, msg > "/dev/stderr";
  abort = 1; exit 1;
}