#! /usr/bin/gawk -f
# Last edited on 2002-01-16 13:18:17 by stolfi

BEGIN {
  abort = -1;
  usage = ( "capitalize-ligatures \\\n" \
    "  [ -v field=NUM ] \\\n" \
    "  < INFILE.wct > OUTFILE.tex" \
  );

  # Assumes that field number FIELD of the input is a 
  # word in EVA, possibly factored into elements by braces "{}".
  # Adds the proper EVA ligatures (i.e. capitalization) to each element.
  # ("sh" -> "Sh", "cth" -> "CTh", etc.)

  if (field == "") { arg_error("must specify \"field\""); }
}

(abort >= 0) { exit abort; }

/^ *([#]|$)/ { print; next; }

/./ { 
  if (NF < field) { data_error(("bad NF line = \"" $0 "\"")); }
  w = $(field);
  if (w !~ /^[{}?a-zA-Z]+$/) { data_error(("bad word \"" w "\"")); }
  w = capitalize_ligatures(w);
  $(field) = w;
  print; next;
}

function capitalize_ligatures(w)
{
  # Capitalize ligatures:
  gsub(/ch/, "Ch", w);
  gsub(/sh/, "Sh", w);
  gsub(/ckh/, "CKh", w); gsub(/ikh/, "IKh", w); gsub(/[?]kh/, "?Kh", w);
  gsub(/cth/, "CTh", w); gsub(/ith/, "ITh", w); gsub(/[?]th/, "?Th", w);
  gsub(/cph/, "CPh", w); gsub(/iph/, "IPh", w); gsub(/[?]ph/, "?Ph", w);
  gsub(/cfh/, "CFh", w); gsub(/ifh/, "IFh", w); gsub(/[?]fh/, "?Fh", w);
  gsub(/c[?]h/, "C?h", w); 
  gsub(/i[?]h/, "I?h", w);
  gsub(/c[?]/, "C?", w);
  gsub(/hh/, "Hh", w);
  return w;
}

function arg_error(msg)
{ 
  printf "capitalize-ligatures: %s\n", msg > "/dev/stderr";
  printf "usage: %s\n", usage > "/dev/stderr";
  abort = 1; exit 1;
}

function data_error(msg)
{ 
  printf "capitalize-ligatures: line %d: %s\n", FNR, msg > "/dev/stderr";
  abort = 1; exit 1;
}