#! /usr/bin/gawk -f
# Last edited on 1998-12-12 05:11:54 by stolfi
# Recoding Voynich text (or interlinear file)
# from FSG to EVA (European Voynich Alphabet)
function fsg_to_eva(txt)
{
# Converts a chunk of comment-free FSG text to EVA
# We discard "%" and "!" since the conversion
# will destroy synchronism anyway.
gsub(/[% !]/, "", txt);
# Now convert
gsub(/IIIE/, "iiil", txt);
gsub(/IIIK/, "iiim", txt);
gsub(/IIIL/, "iiin", txt);
gsub(/IIIR/, "iiir", txt);
gsub(/IIE/, "iil", txt);
gsub(/IIK/, "iim", txt);
gsub(/IIL/, "iin", txt);
gsub(/IIR/, "iir", txt);
gsub(/IE/, "il", txt);
gsub(/IK/, "im", txt);
gsub(/IL/, "in", txt);
gsub(/IR/, "ir", txt);
gsub(/HZ/, "cth", txt);
gsub(/PZ/, "cph", txt);
gsub(/DZ/, "ckh", txt);
gsub(/FZ/, "cfh", txt);
gsub(/2/, "s", txt);
gsub(/4/, "q", txt);
gsub(/6/, "g", txt);
gsub(/7/, "j", txt);
gsub(/8/, "d", txt);
gsub(/A/, "a", txt);
gsub(/C/, "e", txt);
gsub(/D/, "k", txt);
gsub(/E/, "l", txt);
gsub(/F/, "f", txt);
gsub(/G/, "y", txt);
gsub(/H/, "t", txt);
gsub(/I/, "i", txt);
gsub(/K/, "m", txt);
gsub(/L/, "n", txt);
gsub(/M/, "iin", txt);
gsub(/N/, "in", txt);
gsub(/O/, "o", txt);
gsub(/P/, "p", txt);
gsub(/R/, "r", txt);
gsub(/S/, "sh", txt);
gsub(/T/, "ch", txt);
gsub(/V/, "v", txt);
gsub(/Y/, "x", txt);
return txt
}
function convert(old, i,neu)
{
# Converts the text outside the '{}' comments, while
# preserving the text inside the '{}' comments:
neu = "";
while (length(old) != 0)
{ i = index(old, "{");
if (i == 0)
{ neu = (neu fsg_to_eva(old)); old = ""; }
else if (i > 1)
{ neu = (neu fsg_to_eva(substr(old, 1, i-1)));
old = substr(old, i);
}
else
{ match(old, /^{[^}]*}/);
if (RSTART > 0)
{ neu = (neu substr(old, 1, RLENGTH));
old = substr(old, RLENGTH + 1);
}
else
{ printf "line %d, missing '\}'\n", FNR > "/dev/stderr";
neu = (neu old); old = "";
}
}
}
return neu;
}
/^ *$/ { print; next }
/^ *#/ { print; next; }
/^<[^>]*> *$/ { print; next }
/^</ { print (substr($0,1,19) convert(substr($0,20))); next; }
/./ { print convert($0); }