#! /usr/bin/gawk -f # Last edited on 2001-09-18 01:30:49 by stolfi BEGIN { abort = -1; usage = "fix-accents-in-names < INFILE > OUTFILE"; tblfile = "${STOLFIHOME}/lib/fix-accents-in-names.tbl"; # Corrects diacritics in some common Brazilian names. # Input file has multiple names per line,separated by space. # Output has the same number of fields in each line, # some of them may have ben replaced. # Read table into "dic" array: read_dic(tblfile); } /./ { for (i = 1; i <= NF; i++) { if ($(i) in dic) { r = dic[$(i)]; if ((i > 1) || (r !~ /^[a-z]/)) { $(i) = r; } } } print; } function read_dic(tblfile, nMap,lin,fld,nfld,wd) { split("", dic); nMap = 0; while((getline lin < tblfile) > 0) { nMap++; if (! match(lin, /^ *([#]|$)/)) { nfld = split(lin, fld, " "); if (nfld != 2) tbl_error(tblfile, nMap, ("bad table entry = \"" lin "\"")); if (fld[1] in dic) tbl_error(tblfile, nMap, ("repeated key = \"" lin "\"")); dic[fld[1]] = fld[2]; } } if (ERRNO != "0") { tbl_error(tblfile, nMap, ERRNO); } close (tblfile); if (nMap == 0) { arg_error(("file \"" tblfile "\" empty or missing")); } # printf "loaded %6d map pairs\n", nMap > "/dev/stderr" # Check for contradictory mappings: for (wd in dic) { if (dic[wd] in dic) { tbl_error(tblfile, nMap, ("contradicted map = \"" wd "\" -> \"" dic[wd] "\"")); } } } function arg_error(msg) { printf "%s\n", msg >> "/dev/stderr"; printf "usage: %s\n", usage >> "/dev/stderr"; abort = 1; exit 1 } function tbl_error(tblfile, nMap, msg) { printf "%s, line %d: %s\n", tblfile, nMap, msg >> "/dev/stderr"; abort = 1; exit 1 } function data_error(msg) { printf "line %d: %s\n", NR, msg >> "/dev/stderr"; abort = 1; exit 1 }