#! /usr/bin/gawk -f
# Last edited on 2000-07-10 00:13:29 by stolfi
# Must specify -f eva2erg.gawk

# Adds a match key that can be used to find similar words.
#
#    cat INFILE \
#      | add-match-key -f eva2erg.gawk \
#          [ -v inField=IFLDNUM ] \
#          [ -v outField=OFLDNUM ] \
#          [ -v erase_ligatures=1 ] \
#          [ -v erase_plumes=1 ] \
#          [ -v ignore_gallows_eyes=1 ] \
#          [ -v join_ei=1 ] \
#          [ -v equate_aoy=1 ] \
#          [ -v collapse_ii=1 ] \
#          [ -v equate_eights=1 ] \
#          [ -v equate_pt=1 ] \
#          [ -v erase_q=1 ] \
#          [ -v erase_word_spaces=1 ] \
#          [ -v verbose=0 ] \
#      > OUTFILE
#
# This script reads from stdin one or more records that contain a
# Voynichese word KEY. It outputs the same redords, each preceded by a
# "reduced" version RKEY of the same word (and one blank).
#
# The KEY word is assumed to be field IFLDNUM of each record
# (the first field by default).
#
# The options define the mapping from KEY to RKEY.
# If given they are applied in the order above.
# See eva2erg.gawk for explanations of theit effect.
#

BEGIN {
  abort = 0;
  if (inField == "") inField = 1;
  if (outField == "") outField = 1;
  if (verbose == "") verbose = 0;
  if (verbose) 
    { printf "options:\n" > "/dev/stderr";
      if (erase_ligatures)     printf "  erase_ligatures\n"     > "/dev/stderr";
      if (erase_plumes)        printf "  erase_plumes\n"        > "/dev/stderr";
      if (ignore_gallows_eyes) printf "  ignore_gallows_eyes\n" > "/dev/stderr";
      if (join_ei)             printf "  join_ei\n"             > "/dev/stderr";
      if (equate_aoy)          printf "  equate_aoy\n"          > "/dev/stderr";
      if (collapse_ii)         printf "  collapse_ii\n"         > "/dev/stderr";
      if (equate_eights)       printf "  equate_eights\n"       > "/dev/stderr";
      if (equate_pt)           printf "  equate_pt\n"           > "/dev/stderr";
      if (erase_q)             printf "  erase_q\n"             > "/dev/stderr";
      if (erase_word_spaces)   printf "  erase_word_spaces\n"   > "/dev/stderr";
    }
}

/./ {
  if (abort) exit;
  if (NF < inField) { error(("only " NF " input fields ")); }
  printout(recode_text(erg_erase_comments($(inField))), outField);
  next;
}

function error(msg)
{ 
  printf "line %d: %s\n", NR, msg > "/dev/stderr"
  abort = 1
  exit
}

function recode_text(str)
{
  # Converts a textstring with no embedded "{}"s to the
  # requested encoding.
  
  str = (erase_ligatures     ? erg_erase_ligatures(str)     : str);
  str = (erase_plumes        ? erg_erase_plumes(str)        : str);
  str = (ignore_gallows_eyes ? erg_ignore_gallows_eyes(str) : str);
  str = (join_ei             ? erg_join_ei(str)             : str);
  str = (equate_aoy          ? erg_equate_aoy(str)          : str);
  str = (collapse_ii         ? erg_collapse_ii(str)         : str);
  str = (equate_eights       ? erg_equate_eights(str)       : str);
  str = (equate_pt           ? erg_equate_pt(str)           : str);
  str = (erase_q             ? erg_erase_q(str)             : str);
  str = (erase_word_spaces   ? erg_erase_word_spaces(str)   : erg_unify_word_spaces(str));
  return erg_pack(str);
}

function printout(mw, fn,  i)
{
  # prints $0 with "mw" inserted as field "$(fn)"
  if (NF < fn-1) { error("not enough output fields\n"); }
  if (fn == 1)
    { print mw, $0; }
  else if (fn == NF+1)
    { print $0, mw; }
  else
    { for (i=1;i<fn;i++) { printf "%s%s", $(i), OFS; }
      printf "%s", mw;
      for (i=fn;i<=NF;i++) { printf "%s%s", OFS, $(i); }
      printf "\n";
    }
}