#! /usr/bin/gawk -f # Last edited on 2012-02-02 05:53:34 by stolfilocal # Reads a file in ".wds" format, replaces all words accoring to a # given table, outputs another ".wds" file. BEGIN { abort = -1; usage = ( ARGV[0] " \\\n" \ " -v table={TBLFILE} \\\n" \ " < orig.wds > cata.wds" \ ); # # See "wds-format.txt" for a description of the input file format. # Word remapping table: split("", wdmap); read_word_table(table, wdmap); nlines = 0; # Number of lines read. nwdin = 0; # Number of words/symbols read. nbad = 0; # Number of words/not found in table. } (abort >= 0) { exit abort; } // { # Get rid of funny spaces gsub(/[\011\014\015\240]/, " "); # Remove trailing blanks: gsub(/[ ]+$/, ""); nlines++; } /^[\#][ ]*$/ { # Output the line with a space: printf "%s \n", $0; next; } /^[@p\$\#][ ]/ { # Output the line unchanged: printf "%s\n", $0; next; } /^[as][ ]/ { nwdin++; # Extract the word and type: type = substr($0, 1, 1); wold = substr($0, 3); # Try to map it: if (wold in wdmap) { wnew = wdmap[wold]; } else { data_warning(("word not in table: «" wold "»")); wnew = wold; nbad++; } # Output it: printf "%s %s\n", type, wnew; next; } // { data_error(("bad line format")); next; } END { if (abort >= 0) { exit abort; } printf "%8d lines read\n", nlines > "/dev/stderr"; printf "%8d words/symbols read\n", nwdin > "/dev/stderr"; printf "%8d words not in table\n", nbad > "/dev/stderr"; } function read_word_table(fname,wt, nwords,nlines,lin,fld,nfld,wa,wb) { nwords=0; nlines=0; printf "reading wordmap from file %s\n", fname > "/dev/stderr"; while((getline lin < fname) > 0) { nlines++; if (! match(lin, /^[ \011]*([\#]|$)/)) { gsub(/[ ]*[\#].*$/, "", lin); gsub(/^[ ]+/, "", lin); gsub(/[ !]+/, " ", lin); nfld = split(lin, fld, " "); if (nfld != 2) tbl_error(fname, nlines, ("bad table entry = \"" lin "\"")); wa = fld[1]; wb = fld[2]; if (wa in wt) tbl_error(fname, nlines, ("repeated word in tables = \"" lin "\"")); wt[wa] = wb; nwords++; } } if (ERRNO != "0") { tbl_error(fname, nlines, ERRNO); } close (fname); if (nlines == 0) { arg_error(("file \"" fname "\" empty or missing")); } printf "%8d word pairs read from %s\n", nwords, fname > "/dev/stderr" } function arg_error(msg) { printf "%s\n", msg > "/dev/stderr"; printf "usage: %s\n", usage > "/dev/stderr"; abort = 1; exit 1; } function data_warning(msg) { printf "line %d: %s\n", FNR, msg > "/dev/stderr"; } function data_error(msg) { printf "line %d: %s\n", FNR, msg > "/dev/stderr"; printf " %s\n", $0 > "/dev/stderr"; abort = 1; exit 1; } function tbl_error(f,n,msg) { printf "file %s, line %d: %s\n", f, n, msg > "/dev/stderr"; abort = 1; exit 1 }