#! /usr/bin/gawk -f
# Last edited on 2025-04-29 20:59:10 by stolfi
BEGIN {
usage = ( \
ARGV[0] "\\\n" \
" -v table=TBLFILE [ -v inverse=BOOL ] \\\n" \
" [ -v pedantic=BOOL ] \\\n" \
" < INFILE > OUTFILE " \
);
abort = -1;
# Reads an EVMT file from stdin, maps the locators through a table,
# and writes the result to stdout.
#
# Each line of the {table} file should have two words "{OLD} {NEW}"
# meaning that the old locator {OLD} should be mapped to {NEW}.
# The locators in this table should not include the "<>" delimiters
# nor the transcriber codes.
#
# Whenever an input location code is not found in the table,
# it is left alone. A message is printed if {pedantic} is true.
abort = -1;
if (table == "") arg_error("must specify \"-v table=FILE\"\n");
if (inverse == "") { inverse = 0; }
split("", dic);
read_table(table,inverse,dic);
}
/^[#]/ {
print;
next;
}
/^ *$/ {
print;
next;
}
function read_table(fname,inv,tbl, ntbl,nlin,lin,fld,nfld,tmp)
{
ntbl=0;
nlin=0;
ERRNO = ""
while((getline lin < fname) > 0) {
nlin++;
if (! match(lin, /^[ \011]*([#]|$)/))
{ nfld = split(lin, fld, " ");
if ((nfld >= 3) && (fld[3] ~ /^[#]/)) { nfld = 2; }
if (nfld != 2) { tbl_error(fname, nlin, ("bad table entry = \"" lin "\"")); }
# If {inv} is true, swap the two columns:
if (inv) { tmp = fld[1]; fld[1] = fld[2]; fld[2] = tmp; }
if (fld[1] in dic) { tbl_error(fname, nlin, ("repeated key = \"" lin "\"")); }
tbl[fld[1]] = fld[2];
ntbl++;
}
}
if (ERRNO != "") { tbl_error(fname, nlin, ERRNO); }
close (fname);
if (nlin == 0) { arg_error(("file \"" fname "\" empty or missing")); }
# printf "loaded %6d map pairs\n", ntbl > "/dev/stderr"
}
(abort >= 0) { exit abort; }
/^(|[#][#] *)</ {
$0 = map_locator($0);
print;
next;
}
/^[#]/ {
print;
next;
}
/^ *$/ {
next;
}
// {
format_error("bad line format");
print;
next;
}
function map_locator(str, res,head,loc,tail,dlen)
{
# Maps the first locator in string "str" through the table "dic".
# Complains if not found and "pedantic" is true.
# Tries to preserve the length
if (! match($0, /<[a-z][0-9]+[rv]?[0-6]?(|[.][^<>; ]*)(|[;][A-Z])[>]/))
{ format_error("bad location code format"); }
head = substr(str,1,RSTART);
loc = substr(str,RSTART+1,RLENGTH-2);
tail = substr(str,RSTART+RLENGTH-1);
if (match(loc, /[;]/))
{ trc = substr(loc,RSTART);
loc = substr(loc,1,RSTART-1);
}
else
{ trc = ""; }
dlen = length(loc);
if (loc in dic)
{ loc = dic[loc]; }
else if (pedantic)
{ printf "file %s, line %d: locator not found: %s\n", FILENAME, FNR, loc > "/dev/stderr"; }
dlen = length(loc) - dlen;
while (dlen > 0) { sub(/^> /, ">", tail); dlen--; }
while (dlen < 0) { sub(/^>/, "> ", tail); dlen++; }
return (head loc trc tail);
}
function format_error(msg)
{
printf "file %s, line %d: %s\n", FILENAME, FNR, msg > "/dev/stderr";
abort = 1;
exit 1;
}
function arg_error(msg)
{
printf "%s\n", msg > "/dev/stderr";
abort = 1;
exit 1;
}