#! /usr/bin/gawk -f 
# Last edited on 1998-12-08 06:10:53 by stolfi
#
# A filter that removes purely aesthetic EVA capitalization (where the
# ligature is implied by a subsequent "h" character) from an EVMT
# format file.
# 

BEGIN {
  abort = -1;
}

//{ if (abort >= 0) { exit abort; } }

/^[#]/ { print; next; }

/^[<]f[0-9]+[rv]?[0-6]?(|[.][A-Za-z][A-Za-z0-9]?)[>]/ { 
  # Page/unit header lines
  print; next;
}

/^[<]/ {
  # Normal EVMT-format text lines
  lin = $0;
  p = index(lin, ">");
  if (p == 0) { format_error("unmatched `<'"); }
  
  loc = substr(lin,1,p);
  txt = substr(lin, p+1);
  gsub(/^[ ]*/, "", txt);
  printf "%-18s %s\n", loc, cleanup(txt);
  next;
}

// {
  # Non-EVMT text lines
  print cleanup($0);
}
  
function cleanup(txt,    k,cmt,chunk,res,oldchunk)
{
  res = "";
  while (txt != "") 
    { # break off from "txt" a comment-free chunk and the following comment:
      k = index(txt, "{");
      if (k == 0)
        { chunk = txt; cmt = ""; txt = ""; } 
      else 
        { chunk = substr(txt, 1, k-1);
          txt = substr(txt, k);
          k = index(txt, "}");
          if (k == 0) { format_error("unclosed `{'"); }
          cmt = substr(txt, 1, k);
          txt = substr(txt, k+1);
        }
      
      # Decapitalize the chunk and add it to the result
      oldchunk = "";
      while (chunk != oldchunk)
        { oldchunk = chunk;
          # most common cases: 
          gsub(/[H]h/, "hh", chunk);
          gsub(/[C]h/, "ch", chunk);
          gsub(/[S]h/, "sh", chunk);
          if (chunk == oldchunk)
            { # more complicated cases:
              chunk = gensub(/[H]([!%]*)h/,  "h\\1h", "g", chunk);
              
              chunk = gensub(/[T]([!%]*)h/,  "t\\1h", "g", chunk);
              chunk = gensub(/[K]([!%]*)h/,  "k\\1h", "g", chunk);
              chunk = gensub(/[P]([!%]*)h/,  "p\\1h", "g", chunk);
              chunk = gensub(/[F]([!%]*)h/,  "f\\1h", "g", chunk);
              
              chunk = gensub(/[C]([!%ktpf]*)h/,  "c\\1h", "g", chunk);
              chunk = gensub(/[S]([!%ktpf]*)h/,  "s\\1h", "g", chunk);
              chunk = gensub(/[I]([!%ktpf]*)h/,  "i\\1h", "g", chunk);
            }
        }
      res = (res chunk cmt);
    }
  return (res);
}

function format_error(msg)
{ 
  printf "file %s, line %d: %s\n", FILENAME, FNR, msg > "/dev/stderr";
  abort = 1;
  exit 1;
}

function arg_error(msg)
{ 
  printf "%s\n", msg > "/dev/stderr";
  abort = 1;
  exit 1;
}