#! /n/gnu/bin/gawk -f
# Last edited on 1998-07-27 02:07:42 by stolfi

BEGIN {
  usage = (
      "tabulate-triple-counts \\\n"
      "  -v percent=[0|1] \\\n"
      "  -v prefs="P1 P2..." \\\n"
      "  -v midfs="M1 M2..." \\\n"
      "  -v suffs="S1 S2..." \\\n"
      "  < TRCOUNTS > TABLES"

  # The records of TRCOUNT must have the form COUNT P M F
  # where COUNT is an integer and P,M,F are strings.
  #
  # One of the three strings, selected by the "which" variable,
  # is called the "slice"; the other two are the "left" and "right"
  # strings.  The file must be sorted by slice.
  #
  # This script prints one table for each slice K, 
  # where element [I,J] is the count of triples 
  # whith slice = K, left = I and right = J.
  #
  # If "percent" is true, prints the percentage of the 
  # count relative to the table total, instead of the count
  # itself.
  
  abort = 0;

  if (which == "P")     { K = 1; I = 2; J = 3; }
  else if (which =="M") { K = 2; I = 1; J = 3; }
  else if (which =="S") { K = 3; I = 1; J = 2; }
  else { error("bad \"which\""); }
  
  # Below "r" is a field index in a triple (1,2, or 3).
  # The string "lab[r,s]" is the "s"th alternative for field "r".
  # The count "tot[w]" is the count of triples in the current slice
  # The count "nlab[r]" is the number of entries in "lab[r,*]"
  split("", lab);
  split("", tot);
  split("", nlab);
  getlabs(prefs, 1, lab, nlab);
  getlabs(midfs, 2, lab, nlab);
  getlabs(suffs, 3, lab, nlab);
  
}

function getlabs(str, r, lab, nlab,     fld,s,n)
{
  # Splits a list of strings,
  # stores the "s"th elements in "lab[r,s]",
  # stores the number of elements in "nlab[r]",
  
  n = split(str, fld);
  nlab[r] = n;
  for(s=1;s<=n;s++) { lab[r,s] = fld[s]; }
}

/./{
  if (abort >= 0) { exit abort; }
  if (NF != 2) { error(("line " NR ": bad record format")); }
  nf = split($2, fld, ",");
  if (nf != 3) { error(("line " NR ": bad triple format")); }
  checklab(fld[1], 1, lab, nlab);
  checklab(fld[2], 2, lab, nlab);
  checklab(fld[3], 3, lab, nlab);

function checklab(str, r, lab, nlab,     fld,s,n)
{
  # Appends "str" to "lab" (incrementing "nlab") if not yet there.
  if 
  nlab[r] = n;
  for(s=1;s<=n;s++) { lab[r,s] = fld[s]; }
}



function error(msg)
{ 
  printf "%s\n", msg > "/dev/stderr";
  abort = 1;
  exit 1;
}