#! /usr/bin/gawk -f 
# Last edited on 2000-05-16 15:31:57 by stolfi
#
# Replaces constructs "C{&NNN}" by the proper extended-EVA notation.
# See basify-weirdos.
#

BEGIN {
  abort = -1;
  
  # Recoding table:

  tbl["{&A}"]    = "A";
  tbl["{&E}"]    = "E";
  tbl["{&F}"]    = "F";
  tbl["{&H}"]    = "H";
  tbl["{&I}"]    = "I";
  tbl["{&K}"]    = "K";
  tbl["{&O}"]    = "O";
  tbl["{&P}"]    = "P";
  tbl["{&R}"]    = "R";
  tbl["{&S}"]    = "S";
  tbl["{&T}"]    = "T";
  tbl["{&Y}"]    = "Y";


  tbl["{&c'}"]   = "c'";
  tbl["{&e'}"]   = "e'";
  tbl["{&c'a}"]  = "c'a";
  tbl["{&c'y}"]  = "c'y";
  tbl["{&I'}"]   = "I'";
  tbl["{&I\"}"]  = "I\"";
  tbl["{&o'}"]   = "o'";
  tbl["{&O'}"]   = "O'";
  tbl["{&O\"}"]  = "O\"";
  tbl["{&q\"}"]  = "q\"";  
  tbl["{&y'}"]   = "y'";
 
  # Unidentified weirdos
  #
  # &^  = top half of "y"
  # &an = <a> and <n> run together
  # &ar = <a> and <r> run together
  # &ct = long <c> with a <t> attached to the tip of the ligature,
  # &l  = <l> squeezed up, over the adjacent chars
  # &ol = same as <l> (obsolete).
  # &r  = <r> attached to the previous char..
  # &zh = like <sh> but with the plume turning forward?
  # &-  = dash at mid-letter level.
  # &PP = like a <p> with double loop.
  # &=  = elongated <o>, or extra loop on <&171>.

}

//{ if (abort >= 0) { exit abort; } }

/^[#]/ { print; next; }

/^[<]f[0-9]+[rv]?[0-6]?(|[.][A-Za-z][A-Za-z0-9]?)[>]/ { 
  # Page/unit header lines
  print; next;
}

/^[<]/ {
  # Normal EVMT-format text lines
  lin = $0;
  p = index(lin, ">");
  if (p == 0) { format_error("unmatched `<'"); }
  
  loc = substr(lin,1,p);
  txt = substr(lin, p+1);
  gsub(/^[ ]*/, "", txt);
  printf "%-18s %s\n", loc, unbasify(txt);
  next;
}

// {
  # Non-EVMT text lines
  print unbasify($0);
}
  
function unbasify(txt,  i,j,w,c,res,rep,eva)
{
  # Restores full-EVA codes for weirdos in a basic-EVA text
  # where weirdos are denoted in the "C{&XXX}" style.
  res = "";
  while (txt != "")
    { i = index(txt, "{&");
      if (i == 0) 
        { res = (res txt); txt = ""; }
      else 
        { # we trust here that the "C{&NNN}" notation cannot get longer
          # when unbasified.
          if (i >= 2) 
            { res = (res substr(txt, 1, i-2)); 
              c = substr(txt, i-1,1); 
              txt = substr(txt, i);
            }
          else
            { c = ""; }
          i = index(txt, "}");
          if (i == 0)
            { format_error("missing `}'");
              res = (res txt); txt = "";
            }
          else
            { rep = substr(txt, 1, i); txt = substr(txt, i+1); }
          if (rep in tbl)
            { eva = tbl[rep]; }
          else if (match(rep, /^{[&][0-9][0-9][0-9]}$/))
            { eva = ("&" substr(rep,3,3) ";") ; }
          else
            { printf "line %d: weird weirdo %-19s |%s|\n", 
                FNR, substr($0,1,19), (c rep) > "/dev/stderr";
              eva = (c rep);
            }
          # preserve alignment:
          eva = substr((eva "!!!!!!!!!"), 1, length(c) + length(rep));
          res = (res eva); 
        }
    }
  return (res);
}

function format_error(msg)
{ 
  printf "file %s, line %d: %s\n", FILENAME, FNR, msg > "/dev/stderr";
  abort = 1;
  exit 1;
}

function arg_error(msg)
{ 
  printf "%s\n", msg > "/dev/stderr";
  abort = 1;
  exit 1;
}