#! /usr/bin/gawk -f
# Last edited on 2012-02-14 01:49:46 by stolfilocal

BEGIN {
  # Reads a table "{COUNT} {OLD_SPELLING} {SEP} {NEW_SPELLING}".
  # Selects the most likely mapping for each {OLD_SPELLING}.
  # Writes "{OLD_SPELLING} ! {NEW_SPELLING} # {FRACTION}"
  
  abort = -1;

  split("", o_npairs); # Indexed by {oldsp}, gives number of new spellings for {oldsp}.
  split("", o_newsp);  # Indexed by {oldsp}, gives new spelling of {oldsp} with argest count.
  split("", o_maxct);  # Indexed by {oldsp}, gives count of that {oldsp,newsp} pair.
  split("", o_totct);  # Indexed by {oldsp}, gives total count of {oldsp}.
  
}

(abort >= 0) { exit abort; }

/^[ ]*[0-9]+[ ].*[ ][=|<>()][ ].*$/ { 
  if (NF != 4) { data_error(("bad format")); }
  count = $1; 
  oldsp = $2;
  opcmp = $3;
  newsp = $4;
  if ((opcmp != "<") && (opcmp != ">"))
    { 
      if (! (oldsp in o_npairs)) 
        { o_npairs[oldsp] = 0; 
          o_maxct[oldsp] = 0;
          o_totct[oldsp] = 0;
        };
      if (count > o_maxct[oldsp])
        { o_newsp[oldsp] = newsp;
          o_maxct[oldsp] = count;
        }
      o_totct[oldsp] += count;
      o_npairs[oldsp]++;
    }
  next;
}
  
// { 
  data_error(("bad format"));
}

END {
  if (abort >= 0) { exit abort; }
  nold = asorti(o_totct, k_oldsp);
  for (k = 1; k <= nold; k++)
  { oldsp = k_oldsp[k]
    np = o_npairs[oldsp];
    totct = o_totct[oldsp];
    newsp = o_newsp[oldsp];
    maxct = o_maxct[oldsp];
    if (np <= 0) { prog_error(("duh?")); }
    if (totct <= 0) { prog_error(("duh?")); }
    fract = maxct/totct;
    if (fract >= 0.800)
      { if (oldsp != newsp)
          { printf "%-30s %-30s # %8.6f\n", oldsp, newsp, fract; }
      }
    else
      { printf "ambiguous: %-20s %-20s # %8.6f\n", oldsp, newsp, fract > "/dev/stderr"; }
  }
}

function prog_error(msg)
{
  printf "%s\n", msg > "/dev/stderr";
  abort = 1;
  exit 1;
}

function data_error(msg)
{
  printf "\n" > "/dev/stderr";
  printf "%s:%d: %s\n", FILENAME, FNR, msg > "/dev/stderr";
  printf "  %s\n", $0 > "/dev/stderr";
  abort = 1; exit 1;
}