#! /usr/bin/gawk -f

# Reads each line from stdin, maps a selected field through a table,
# and inserts the result as a specified new field. Usage:
#
#    cat INFILE \
#      | map-field \
#          [ -v inField=FLDNUM ] \
#          [ -v outField=FLDNUM ] \
#          -v table=TBLFILE \
#          [ -v default=DEFSTRING ] \
#      > OUTFILE
#
# The inField and outField are "1" is not specified,
# i.e. the new field is prependedn to the original record.
# 
# Each line of TBLFILE should have two words OLD NEW specifying
# the mapping from OLD field values to NEW field values.
#
# Whenever an input field is not found in the table, the default
# string DEFSTRING is used if not empty, otherwise the script aborts
# with an error message.

function error(msg)
{ 
  printf "line %d: %s\n", NR, msg > "/dev/stderr"
  abort = 1
  exit
}

function printout(mw, fn,    i)
{
  # prints $0 with "mw" inserted as field "$(fn)"
  if (NF < fn-1) { error("not enough output fields\n"); }
  if (fn == 1)
    { print mw, $0; }
  else if (fn == NF+1)
    { print $0, mw; }
  else
    { for (i=1;i<fn;i++) { printf "%s%s", $(i), OFS; }
      printf "%s", mw;
      for (i=fn;i<=NF;i++) { printf "%s%s", OFS, $(i); }
      printf "\n";
    }
}  

# === ACTIONS ===================================================

BEGIN {
  abort = 0;
  if (table == "") error("must specify \"-v table=FILE\"\n");
  split("", dic);
  if (inField == "") inField = 1;
  if (outField == "") outField = 1;
  nMap=0;
  while((getline lin < table) > 0) { 
    split(lin, fld);
    if ((3 in fld) || ! (2 in fld)) error("bad table entry = \"" lin "\"");
    if (fld[1] in dic) error("repeated key = \"" lin "\"");
    dic[fld[1]] = fld[2];
    nMap++;
  }
  close (table);
  printf "loaded %6d map pairs\n", nMap > "/dev/stderr"
}

/^#/ { 
  if (abort) exit;
  print;
  next;
}

/./ {
  if (abort) exit;
  if (NF < inField) { error("not enough input fields\n"); }
  x = $(inField);
  if (x in dic)
    { y = dic[x]; }
  else
    { if (default=="") 
        { error("old key not in table\n"); }
      else
        { y = default; }
    }
  printout(y, outField);
  next;
}