#! /usr/bin/gawk -f
# Last edited on 2004-02-28 02:40:45 by stolfi

# Apply a mapping table to all words of the Vietnamese NT

BEGIN {
  abort = -1;
  usage = ( ARGV[0] " < INPUT.src OUTPUT.src" );

  # Maps words through the "fix-case.tbl" that should 
  # map to lowercase all Vietnamese words, and capitalize 
  # all proper names and transliterated hebrew words.
  # Ignores #-comments, @-directives, {}-comments and 
  # embedded @-constructs.
  
  load_table("fix-case.tbl");
}

(abort >= 0) { exit abort; }

/^[ \011]*([\#@]|$)/ { 
  print;
  next;
}

/./ { 
  # General contents line cleanup
  gsub(/[ \011]+$/, "", $0);
  gsub(/[ \011]+/, " ", $0);
  gsub(/^[ \011]+/, "", $0);
  # Word mapping
  map_words();
  # insert leading spaces
  gsub(/^[ ]*/, "  ", $0);
  print;
  next;
}

END {
  if (abort >= 0) { exit abort; }
}

function map_words(   i,w)
{
  # Apply case correction:
  for (i = 1; i <= NF; i++)
    { w = $(i);
      if ((w !~ /^[@]/) && (w !~ /^[{]/))
        { if (w in wmap) { w = wmap[w]; }
          $(i) = w;
        }
    }
}

function load_table(file,    nMap,lin,fld,nfld)
{
  # Reads a word mapping table from "file", containing pairs 
  # of the form ORGINAL NEW. 
  # Stores the table in "wmap[ORIGINAL] = NEW".
  
  nMap=0;
  split("", wmap)
  while((getline lin < file) > 0) { 
    if (! match(lin, /^ *([\#]|$)/))
      { nfld = split(lin, fld, " ");
        if (nfld != 2) tbl_error(("bad table entry = \"" lin "\""));
        if (fld[1] in wmap) tbl_error(("repeated key = \"" lin "\""));
        wmap[fld[1]] = fld[2];
        nMap++;
      }
  }
  if (ERRNO != "0") { arg_error((file ": " ERRNO)); }
  close (file);
  if (nMap == 0) { arg_error(("file \"" file "\" empty or missing")); }
  # printf "** loaded %6d map pairs\n", nMap > "/dev/stderr"
}

function arg_error(msg)
{
  printf "%s\n", msg > "/dev/stderr";
  printf "** usage: %s\n", usage > "/dev/stderr";
  abort = 1;
  exit 1;
}

function tbl_error(file, msg)
{
  printf "file %s, line %s: %s\n", file, FNR, msg > "/dev/stderr";
  abort = 1; exit 1;
}

function data_error(msg)
{
  printf "line %d: ** %s\n", FNR, msg > "/dev/stderr";
  abort = 1; exit 1;
}