#! /usr/bin/gawk -f
# Last edited on 1999-02-01 06:44:11 by stolfi

# Reads each line from stdin, maps all fields through a table,
# replacing the previous fields. Usage
#
#    cat INFILE \
#      | map-field \
#          -v table=TBLFILE \
#          [ -v fields=FIELDLIST ] \
#          [ -v default=DEFSTRING | -v forgiving=BOOL ] \
#      > OUTFILE
#
# Each line of TBLFILE should have two words OLD NEW specifying
# the mapping from OLD field values to NEW field values.
#
# The FIELDS should be a list of field indices (counting from 1)
# separated by commas.  If not specified, all fields are mapped.
#
# Whenever an input field is not found in the table, if 
# "forgiving" is set, leaves it alone; otherwise, the default
# string DEFSTRING is used if not empty; otherwise the script aborts
# with an error message.
# 

function error(msg)
{ 
  printf "line %d: %s\n", NR, msg > "/dev/stderr"
  abort = 1
  exit
}

# === ACTIONS ===================================================

BEGIN {
  abort = 0;
  if (table == "") error("must specify \"-v table=FILE\"\n");
  if ((forgiving != "") && (default != ""))
    { error("can't specify \"default\" with \"forgiving\""); }
  else if ((forgiving == "") && (default == ""))
    { forgiving = 0; }
  if (fields != "")
    { nfields = split(fields, fnum, /,/); }
  else
    { nfields = 0; }
  split("", dic);
  nMap=0;
  while((getline lin < table) > 0) { 
    if (! match(lin, /^[#]/))
      { split(lin, fld, " ");
        if ((3 in fld) || ! (2 in fld)) error("bad table entry = \"" lin "\"");
        if (fld[1] in dic) error("repeated key = \"" lin "\"");
        dic[fld[1]] = fld[2];
        nMap++;
      }
  }
  if (ERRNO != "0") { error((table ": " ERRNO)); }
  close (table);
  if (nMap == 0) { arg_error(("file \"" table "\" empty or missing")); }
  # printf "loaded %6d map pairs\n", nMap > "/dev/stderr"
}

/^#/ { 
  if (abort) exit;
  print;
  next;
}

/./ {
  if (abort) exit;
  if (nfields == 0) { n = NF; } else { n = nfields; }
  for(i=1;i<=n;i++)
    { if (nfields == 0) { x = $(i); } else { x = $(fnum[i]); }
      if (x in dic)
        { y = dic[x]; }
      else
        { if (forgiving)
            { y = x; }
          else if (default != "") 
            { y = default; }
          else
            { error(("line " NR ": key \"" x "\" not in table\n")); }
        }
      if (nfields == 0) { $(i) = y; } else { $(fnum[i]) = y; }
    }
  print;
  next;
}