#! /usr/bin/gawk -f
# Last edited on 1998-12-20 11:46:44 by stolfi

BEGIN {
  abort = -1;
  usage = "compute-tuple-stats < TUPLECTS > TUPLESTATS";
  
  # Reads a file whose lines have the form COUNT TUPLE CON MAJ,
  # where 
  #
  #   COUNT is an occurrence count for TUPLE.
  #
  #   TUPLE is a string of 26 characters representing the 
  #     readings of one VMS character position by 26 potential
  #     transcribers ("A" thru "Z").
  #
  #   CON is an arbitrary "consensus" reading for TUPLE.
  #
  #   MAJ is an arbitrary "majority" reading for TUPLE.
  #
  # Writes a table of the form COUNT TRCODE CHR MAJ
  # where TRCODE is a transcriber code ("A" through "Z"),
  # CHR and MAJ are two EVA characters, COUNT is the number
  # of times that the transcriber read CHR when the majority reading was MAJ.

  alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
  split("", tct);
}

/./ {
  if (NF != 4) format_error("wrong num of fields");
  num = $1;
  tup = $2;
  con = $3;
  maj = $4;
  if (length(tup) != 26) { format_error("bad tuple length"); }
  
  # Add statistics
  for(i=1; i<=26; i++)
    { ch = substr(tup,i,1);
      if (ch != "%")
        { tc = substr(alpha,i,1);
          ch_seen[ch] = 1;
          ch_seen[maj] = 1;
          tct[tc,ch,maj] += num;
        }
    }
  next;
}

// { next; }
      
END {
  # Print statistics
  
  for (i=1;i<=26;i++)
    { tc = substr(alpha,i,1);
      for (ch in ch_seen)
        { for (maj in ch_seen)
            { if ((tc,ch,maj) in tct)
                { printf "%7d %s %s %s\n", tct[tc,ch,maj], tc, ch, maj; }
            }
        }
    }
}

function arg_error(msg)
{
  printf "*** %s\n", msg > "/dev/stderr"; 
  printf "usage: %s\n", usage > "/dev/stderr"; 
  abort = 1; exit abort;
}

function table_error(msg)
{
  printf "file %s, line %d: *** %s\n", FILENAME, FNR, msg > "/dev/stderr"; 
  abort = 1; exit abort;
}
    
function format_error(msg)
{
  printf "file %s, line %d: %s\n", FILENAME, FNR, msg > "/dev/stderr";
  abort = 1; exit abort;
}

function program_error(msg)
{
  printf "*** %s\n", msg > "/dev/stderr"; 
  abort = 1; exit abort;
}