#! /usr/bin/gawk -f

# Maps a file in EVMT interlinear format from EVA (European Voynich
# Alphabet) to ERA (EVA reduced for noise removal).
# Leaves '{}' comments and non-EVA symbols (including "%" and "!") alone.

function eva_to_era(txt)
{
  # Converts a chunk of comment-free EVA to ERA
  
  gsub(/sh/,   "ch",  txt);
  gsub(/s/,    "r",   txt);
  gsub(/t/,    "k",   txt);
  gsub(/ckh/,  "eke", txt);
  gsub(/cph/,  "epe", txt);
  gsub(/cfh/,  "efe", txt);
  gsub(/ei/,   "o",   txt);
  gsub(/a/,    "o",   txt);
  gsub(/y/,    "o",   txt);
  gsub(/iii*/, "i",   txt);
  gsub(/q/,    "",    txt);
  
  return txt
}

function convert(old,   new)
{
  # Converts a text string possibly with '{}' comments
  # Converts the text outside the '{}' comments, while
  # preserving the text inside the '{}' comments:
  new = "";
  while (length(old) != 0)
    { i = index(old, "{");
      if (i == 0)
        { new = (new eva_to_era(old)); old = ""; }
      else if (i > 1)
        { new = (new eva_to_era(substr(old, 1, i-1)));
          old = substr(old, i);
        }
      else
        { match(old, /^{[^}]*}/);
          if (RSTART > 0) 
            { new = (new substr(old, 1, RLENGTH));
              old = substr(old, RLENGTH + 1);
            }
          else
            { printf "line %d, missing '\}'\n", NR > "/dev/stderr";
              new = (new old); old = "";
            }
        }
    }
  return new;
}

/^ *$/ { print; next }

/^ *#/ { print; next; }

/^<[^>]*> *$/ { print; next }

/^</ { print (substr($0,1,19) convert(substr($0,20))); next; }

/./ { print convert($0); }