#! /usr/bin/gawk -f # Last edited on 2023-05-10 15:53:49 by stolfi BEGIN { abort = -1; usage = ( ARGV[0] "\\\n" \ " -v old=OLD.dic \\\n" \ " -v new=NEW.dic \\\n" \ " OUTPUT.tbl " \ ); # Builds a word-sustitution table from two wordlists, "{old}.dic" # and "{new}.dic", by pairing them in the given order. Neither list # may contain repetitions. # # If the new list is too long, the excess words are not used. If it # is too short, an error is printed. if (old == "") { arg_error("must define \"old\""); } if (new == "") { arg_error("must define \"new\""); } split("", oldwd); split("", oldnm); nold = read_dict(old, oldwd, oldnm); split("", newwd); split("", newnm); nnew = read_dict(new, newwd, newnm); # Make sure that we have enough new words: if (nnew < nold) { arg_error("new list is too short"); } # Write the table: for (i = 0; i < nold; i++) { print oldwd[i], newwd[i]; } } function read_dict(file,dict,wnum, n,lin,fld,nfld) { # Reads a list of words from "file", one per line. Stores the words # in {dict[0..N-1]}, in the order read, and returns {N}. Also defines # {wnum[]} so that {dict[wnum[wd]] = wd} for every word {wd} Fails # if there are any duplicate words. n=0; ERRNO = "" while((getline lin < file) > 0) { gsub(/^[ ]*/, "", lin); if (! match(lin, /^([#]|$)/)) { gsub(/[ ]*[#].*$/, "", lin); nfld = split(lin, fld, " "); if (nfld != 1) tbl_error(file, ("bad wordlist entry = \"" lin "\"")); if (fld[1] in dict) tbl_error(file, ("repeated key = \"" lin "\"")); dict[n] = fld[1]; wnum[fld[1]] = n; n++; } } if (ERRNO != "") { arg_error((file ": " ERRNO)); } close (file); printf "%s: %6d words\n", file, n > "/dev/stderr"; return n; } function arg_error(msg) { printf "%s\n", msg > "/dev/stderr"; printf "usage: %s\n", usage > "/dev/stderr"; abort = 1; exit 1; } function data_error(msg) { printf "line %d: %s\n", FNR, msg > "/dev/stderr"; abort = 1; exit 1; } function tbl_error(file, msg) { printf "file %s, line %s: %s\n", file, FNR, msg > "/dev/stderr"; abort = 1; exit 1; }