#! /usr/bin/gawk -f 
# Must also specify -f eva2erg.gawk

# Usage:
#
#   cat INFILE \
#     | remove-variant-words -f eva2erg.gawk \
#          [ -v provide_ligatures=1 ] \
#          [ -v erase_ligatures=1 ] \
#          [ -v erase_plumes=1 ] \
#          [ -v ignore_gallows_eyes=1 ] \
#          [ -v join_ei=1 ] \
#          [ -v equate_aoy=1 ] \
#          [ -v collapse_ii=1 ] \
#          [ -v equate_eights=1 ] \
#          [ -v erase_q=1 ] \
#          [ -v erase_word_spaces=1 ] \
#     > OUTFILE
#
# Removes words from stdin that are considered variants of 
# previously seen words.  
#
# The options define the mapping from KEY to RKEY.
# If given they are applied in the order above.
# See eva2erg.gawk for explanations of theit effect.

function recode_text(str)
{
  # Converts a textstring with no embedded "{}"s to the
  # requested encoding, preserving length.
  
  str = (provide_ligatures   ? erg_provide_ligatures(str)   : str);
  str = (erase_ligatures     ? erg_erase_ligatures(str)     : str);
  str = (erase_plumes        ? erg_erase_plumes(str)        : str);
  str = (ignore_gallows_eyes ? erg_ignore_gallows_eyes(str) : str);
  str = (join_ei             ? erg_join_ei(str)             : str);
  str = (equate_aoy          ? erg_equate_aoy(str)          : str);
  str = (collapse_ii         ? erg_collapse_ii(str)         : str);
  str = (equate_eights       ? erg_equate_eights(str)       : str);
  str = (erase_q             ? erg_erase_q(str)             : str);
  str = (erase_word_spaces   ? erg_erase_word_spaces(str)   : erg_unify_word_spaces(str));
  return erg_pack(str);
}

BEGIN { 
  split("", words);
}

/./ {
  w = erg_erase_comments($0);
  w = recode_text(w); 
  if (w in words)
    { printf "%s = %s = %s\n", $0, w, words[w] > "/dev/stderr"; }
  else
    { print $0;
      words[w] = $0;
    }
}