#! /bin/gawk -f
# Last edited on 2002-01-16 12:54:50 by stolfi

BEGIN {
  abort = -1;
  usage = ( "reencode-words-viqr-for-tex \\\n" \
    "  [ -v field=NUM ] \\\n" \
    "  < INFILE.wct > OUTFILE.tex" \
  );

  # Assumes that field number FIELD of the input is a 
  # word in VIQR vietnamese encoding, possibly factored into letters by braces "{}".
  # Adds `\' in front of some characters to allow typesetting 
  # in TeX (with proper fonts).

  if (field == "") { arg_error("must specify \"field\""); }
}

(abort >= 0) { exit abort; }

/^ *([#]|$)/ { print; next; }

/./ { 
  if (NF < field) { data_error("bad NF"); }
  w = $(field);
  if (w !~ /^[-*{}a-zA-Z?(+^.`'~]+$/)
    { data_error(("bad word \"" w "\"")); }
  w = reencode_viqr_for_tex(w);
  $(field) = w;
  print; next;
}

function reencode_viqr_for_tex(w)
{
  # Protect special characters:
  gsub(/[\^]/, "\\^", w);
  gsub(/[{]/, "\\{", w);
  gsub(/[}]/, "\\}", w);
  gsub(/[#]/, "\\#", w);  # Just in case
  gsub(/[&]/, "\\&", w);  # Just in case
  gsub(/[$]/, "\\$", w);  # Just in case
  gsub(/[%]/, "\\%", w);  # Just in case
  return w;
}

function arg_error(msg)
{ 
  printf "%s\n", msg > "/dev/stderr";
  printf "usage: %s\n", usage > "/dev/stderr";
  abort = 1; exit 1;
}

function data_error(msg)
{ 
  printf "line %d: %s\n", FNR, msg > "/dev/stderr";
  abort = 1; exit 1;
}