#! /usr/bin/gawk -f # Last edited on 2004-02-28 02:40:45 by stolfi # Apply a mapping table to all words of the Vietnamese NT BEGIN { abort = -1; usage = ( ARGV[0] " < INPUT.src OUTPUT.src" ); # Maps words through the "fix-case.tbl" that should # map to lowercase all Vietnamese words, and capitalize # all proper names and transliterated hebrew words. # Ignores #-comments, @-directives, {}-comments and # embedded @-constructs. load_table("fix-case.tbl"); } (abort >= 0) { exit abort; } /^[ \011]*([\#@]|$)/ { print; next; } /./ { # General contents line cleanup gsub(/[ \011]+$/, "", $0); gsub(/[ \011]+/, " ", $0); gsub(/^[ \011]+/, "", $0); # Word mapping map_words(); # insert leading spaces gsub(/^[ ]*/, " ", $0); print; next; } END { if (abort >= 0) { exit abort; } } function map_words( i,w) { # Apply case correction: for (i = 1; i <= NF; i++) { w = $(i); if ((w !~ /^[@]/) && (w !~ /^[{]/)) { if (w in wmap) { w = wmap[w]; } $(i) = w; } } } function load_table(file, nMap,lin,fld,nfld) { # Reads a word mapping table from "file", containing pairs # of the form ORGINAL NEW. # Stores the table in "wmap[ORIGINAL] = NEW". nMap=0; split("", wmap) while((getline lin < file) > 0) { if (! match(lin, /^ *([\#]|$)/)) { nfld = split(lin, fld, " "); if (nfld != 2) tbl_error(("bad table entry = \"" lin "\"")); if (fld[1] in wmap) tbl_error(("repeated key = \"" lin "\"")); wmap[fld[1]] = fld[2]; nMap++; } } if (ERRNO != "0") { arg_error((file ": " ERRNO)); } close (file); if (nMap == 0) { arg_error(("file \"" file "\" empty or missing")); } # printf "** loaded %6d map pairs\n", nMap > "/dev/stderr" } function arg_error(msg) { printf "%s\n", msg > "/dev/stderr"; printf "** usage: %s\n", usage > "/dev/stderr"; abort = 1; exit 1; } function tbl_error(file, msg) { printf "file %s, line %s: %s\n", file, FNR, msg > "/dev/stderr"; abort = 1; exit 1; } function data_error(msg) { printf "line %d: ** %s\n", FNR, msg > "/dev/stderr"; abort = 1; exit 1; }