#! /usr/bin/gawk -f
# Maps a file in EVMT interlinear format from EVA (European Voynich
# Alphabet) to ERB (EVA reduced for error resistance, version B).
# Leaves '{}' comments and non-EVA symbols (including "%" and "!") alone.
function eva_to_ere(txt)
{
# Converts a chunk of comment-free EVA to ERA
gsub(/sh/, "ee", txt);
gsub(/ch/, "ee", txt);
gsub(/s/, "r", txt);
gsub(/t/, "k", txt);
gsub(/f/, "p", txt);
gsub(/cth/, "eke", txt);
gsub(/ckh/, "eke", txt);
gsub(/cph/, "epe", txt);
gsub(/cfh/, "epe", txt);
gsub(/ei/, "o", txt);
gsub(/a/, "o", txt);
gsub(/y/, "o", txt);
return txt
}
function convert(old, new)
{
# Converts a text string possibly with '{}' comments
# Converts the text outside the '{}' comments, while
# preserving the text inside the '{}' comments:
new = "";
while (length(old) != 0)
{ i = index(old, "{");
if (i == 0)
{ new = (new eva_to_ere(old)); old = ""; }
else if (i > 1)
{ new = (new eva_to_ere(substr(old, 1, i-1)));
old = substr(old, i);
}
else
{ match(old, /^{[^}]*}/);
if (RSTART > 0)
{ new = (new substr(old, 1, RLENGTH));
old = substr(old, RLENGTH + 1);
}
else
{ printf "line %d, missing '\}'\n", NR > "/dev/stderr";
new = (new old); old = "";
}
}
}
return new;
}
/^ *$/ { print; next }
/^ *#/ { print; next; }
/^<[^>]*> *$/ { print; next }
/^</ { print (substr($0,1,19) convert(substr($0,20))); next; }
/./ { print convert($0); }