#! /usr/bin/gawk -f
# Last edited on 1998-12-29 15:35:00 by stolfi

BEGIN {
  abort = -1;
  usage = "compute-consensus < TUPLECTS > TUPLEMAP";
  
  # Reads a file whose lines have the form COUNT TUPLE, where COUNT is
  # an integer and TUPLE is a string of 26 characters representing the
  # readings of one VMS character position by 26 potential
  # transcribers ("A" thru "Z"). In this list "%" denotes "no
  # information" and "*" denotes "unreadable".
  #
  # Writes a file of the form COUNT TUPLE CONS where COUNT and TUPLE
  # are as in the input, and CONS is the consensus reading for that
  # tuple.
  #
  # Let's say that a reading is "significant" if it is not "%". If all
  # significant readings are equal to the same letter C (including
  # "!"), the consensus reading is C; otherwise, if all significant
  # readings are either "!", ".", or ",", the consensus reading is
  # ","; otherwise it is "*".
  
  alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";

}

/./ {
  if (NF != 2) format_error("wrong num of fields");
  ct = $1;
  tp = $2;
  if (length(tp) != 26) { format_error("bad tuple length"); }
  
  # Compute consensus:
  cons = "%";
  for(i=1; ((i<=26) && (cons != "*")); i++)
    { c = substr(tp,i,1);
      if (c != "%") { nreaders++; }
      
      if ((cons == c) || (c == "%"))
        { # No change in consensus
        }
      else if (cons == "%")
        { # First significant reading
          cons = c;
        }
      else if ( \
        ( (cons == ",") || (cons == ".") || (cons == "!") ) &&
        ( (c    == ",") || (c    == ".") || (c    == "!") ) \
      )
        { # Significant readings differ but are either space or empty
          # Say that the consensus is "uncertain space":
          cons = ",";
        }
      else
        { # No consensus
          cons = "*";
        }
    }
  
  printf "%7d %s %s\n", ct, tp, cons;
  next;
}

// { next; }

function arg_error(msg)
{
  printf "*** %s\n", msg > "/dev/stderr"; 
  printf "usage: %s\n", usage > "/dev/stderr"; 
  abort = 1; exit abort;
}

function table_error(msg)
{
  printf "file %s, line %d: *** %s\n", FILENAME, FNR, msg > "/dev/stderr"; 
  abort = 1; exit abort;
}
    
function format_error(msg)
{
  printf "file %s, line %d: %s\n", FILENAME, FNR, msg > "/dev/stderr";
  abort = 1; exit abort;
}

function program_error(msg)
{
  printf "*** %s\n", msg > "/dev/stderr"; 
  abort = 1; exit abort;
}