#! /n/gnu/bin/gawk -f
# Last edited on 2004-05-28 01:11:47 by stolfi

# Reads each line from stdin, maps a selected field through a table,
# and inserts the result as a specified new field. Usage:
#
#    cat INFILE \
#      | map-field \
#          [ -v inField=FLDNUM ] \
#          [ -v outField=FLDNUM ] \
#          -v table=TBLFILE \
#          [ -v default=DEFSTRING | forgiving=BOOL ] \
#      > OUTFILE
#
# The inField and outField are "1" if not specified,
# i.e. the new field is prepended to the original record.
# 
# Each line of TBLFILE should have two words OLD NEW specifying
# the mapping from OLD field values to NEW field values.
#
# Whenever an input field is not found in the table, if 
# "forgiving" is set, leaves it alone; otherwise, the default
# string DEFSTRING is used if not empty; otherwise the script aborts
# with an error message.

function printout(mw, fn,    i)
{
  # prints $0 with "mw" inserted as field "$(fn)"
  if (NF < fn-1) { error("not enough output fields\n"); }
  if (fn == 1)
    { print mw, $0; }
  else if (fn == NF+1)
    { print $0, mw; }
  else
    { for (i=1;i<fn;i++) { printf "%s%s", $(i), OFS; }
      printf "%s", mw;
      for (i=fn;i<=NF;i++) { printf "%s%s", OFS, $(i); }
      printf "\n";
    }
}  

# === ACTIONS ===================================================

BEGIN {
  abort = -1;
  if (table == "") error("must specify \"-v table=FILE\"\n");
  split("", dic);
  if (inField == "") inField = 1;
  if (outField == "") outField = 1;
  if ((forgiving != "") && (default != ""))
    { error("can't specify \"default\" with \"forgiving\""); }
  else if ((forgiving == "") && (default == ""))
    { forgiving = 0; }
  nMap=0;
  while((getline lin < table) > 0) { 
    if (! match(lin, /^[\#]/))
      { nfld = split(lin, fld, " ");
        if ((nfld >= 3) && (fld[3] ~ /^[\#]/)) { nfld = 2; }
        if (nfld != 2) error(("bad table entry = \"" lin "\""));
        if (fld[1] in dic) error(("repeated key = \"" lin "\""));
        dic[fld[1]] = fld[2];
        nMap++;
      }
  }
  if (ERRNO != "0") { error((table ": " ERRNO)); }
  close (table);
  if (nMap == 0) { arg_error(("file \"" table "\" empty or missing")); }
  # printf "loaded %6d map pairs\n", nMap > "/dev/stderr"
}

(abort >= 0) { exit abort; }

/^#/ { 
  print;
  next;
}

/./ {
  if (abort >= 0) { exit abort; }
  if (NF < inField) { error("not enough input fields\n"); }
  x = $(inField);
  if (x in dic)
    { y = dic[x]; }
  else
    { if (forgiving)
        { y = x; }
      else if (default != "") 
        { y = default; }
      else
        { error(("key \"" x "\" not in table\n")); }
    }
  printout(y, outField);
  next;
}

function error(msg)
{ 
  printf "line %d: %s\n", NR, msg >> "/dev/stderr";
  abort = 1;
  exit 1
}

function arg_error(msg)
{ 
  printf "%s\n", msg >> "/dev/stderr";
  abort = 1;
  exit 1
}