#! /usr/bin/gawk -f
# Last edited on 2023-05-10 15:53:49 by stolfi


BEGIN {
  abort = -1;
  usage = ( ARGV[0] "\\\n" \
    "  -v old=OLD.dic \\\n" \
    "  -v new=NEW.dic \\\n" \
    "  OUTPUT.tbl " \
  );
  
  # Builds a word-sustitution table from two wordlists, "{old}.dic"
  # and "{new}.dic", by pairing them in the given order. Neither list
  # may contain repetitions. 
  # 
  # If the new list is too long, the excess words are not used. If it
  # is too short, an error is printed.
  
  if (old == "")      { arg_error("must define \"old\""); }
  if (new == "")      { arg_error("must define \"new\""); }

  split("", oldwd); split("", oldnm);
  nold = read_dict(old, oldwd, oldnm);
  
  split("", newwd); split("", newnm);
  nnew = read_dict(new, newwd, newnm);

  # Make sure that we have enough new words:
  if (nnew < nold) 
    { arg_error("new list is too short"); }
  
  # Write the table:
  for (i = 0; i < nold; i++)
    { print oldwd[i], newwd[i]; }
}

function read_dict(file,dict,wnum,    n,lin,fld,nfld)
{
  # Reads a list of words from "file", one per line. Stores the words
  # in {dict[0..N-1]}, in the order read, and returns {N}. Also defines
  # {wnum[]} so that {dict[wnum[wd]] = wd} for every word {wd} Fails
  # if there are any duplicate words.
  
  n=0;
  ERRNO = ""
  while((getline lin < file) > 0) { 
    gsub(/^[ ]*/, "", lin);
    if (! match(lin, /^([#]|$)/))
      { gsub(/[ ]*[#].*$/, "", lin);
        nfld = split(lin, fld, " ");
        if (nfld != 1) tbl_error(file, ("bad wordlist entry = \"" lin "\""));
        if (fld[1] in dict) tbl_error(file, ("repeated key = \"" lin "\""));
        dict[n] = fld[1];
        wnum[fld[1]] = n;
        n++;
      }
  }
  if (ERRNO != "") { arg_error((file ": " ERRNO)); }
  close (file);
  printf "%s: %6d words\n", file, n > "/dev/stderr";
  return n;
}

function arg_error(msg)
{
  printf "%s\n", msg > "/dev/stderr";
  printf "usage: %s\n", usage > "/dev/stderr";
  abort = 1;
  exit 1;
}

function data_error(msg)
{
  printf "line %d: %s\n", FNR, msg > "/dev/stderr";
  abort = 1; exit 1;
}

function tbl_error(file, msg)
{
  printf "file %s, line %s: %s\n", file, FNR, msg > "/dev/stderr";
  abort = 1; exit 1;
}