#! /usr/bin/gawk -f
# Last edited on 2002-03-05 01:42:49 by stolfi

BEGIN {
  abort = -1; 
  usage = ( \
    "cat INFILE \\\n" \
    "  | assign-headings \\\n" \
    "      -v table=TBLFILE \\\n" \
    "  > OUTFILE " \
  );

  # Reads a file containing records of the form 
  # 
  #     SEC USEQ FNUM UNIT LINE TRAN FPOS RPOS PFRST PLAST WORD
  #     1   2    3    4    5    6    7    8    9     10    11
  # 
  # Reads also a table of the form SWORD HEAD where SWORD is a 
  # special word and HEAD is its canonical form (report heading).
  # 
  # Outputs every input record whose WORD is either equal to some
  # SWORD (strong occurrence), or to some SWORD minus an initial gallows
  # (weak occurrence).  Multiple matches are printed multiple times.
  # Appends to each output record the corresponding HEAD ($12) and
  # a digit TAG ($13) which is 2 for a strong occurrence on column 1,
  # 1 for a strong occurrence elsewhere, and 0 for a weak occurence.
  
  split("", head); # `head[w]' is the heading for word `w'.
  split("", weak); # `weak[w]' are the headings for which `w' is weak occur.
  split("", isweak); # `isweak[w,h]' is 1 iff w is a weak occurrence of `h'.
  if (table == "") { arg_error("must defined \"table\""); }
  read_table(table, head,weak);
}

(abort >= 0) { exit abort; } 

(NF == 11) {
  fpos = $7; w = $11;
  if (w in head) 
    { # w is a variant of heading head[w]
      print $0, head[w], (fpos == 1 ? 2 : 1);
    }
  if (w in weak) 
    { # w is a weak occurrence of some variant of heading head[w].
      nhs = split(weak[w], hs, ",");
      for (i = 1; i <= nhs; i++)
        { print $0, hs[i], 0; }
    }
  next;
}

/./{ data_error("bad line type"); }

function read_table(fname,head,weak,    ntbl,nlin,lin,fld,nfld,w,h)
{
  ntbl=0;
  nlin=0;
  while((getline lin < fname) > 0) { 
    nlin++;
    if (! match(lin, /^[ \011]*([#]|$)/))
      { gsub(/[#].*$/, "", lin);
        nfld = split(lin, fld, " ");
        if (nfld != 2) tbl_error(fname, nlin, ("bad table entry = \"" lin "\""));
        w = fld[1]; h = fld[2];
        if (w in head) tbl_error(fname, nlin, ("repeated key = \"" lin "\""));
        head[w] = h;
        if (w ~ /^[ktpf]/)
          { w = substr(w, 2);
            if (w in weak) 
              { if (! ((w,h) in isweak)) 
                  { weak[w] = ( weak[w] "," h); }
              }
            else 
              { weak[w] = h; }
            isweak[w,h] = 1;
          }
        ntbl++;
      }
  }
  if (ERRNO != "0") { tbl_error(fname, nlin, ERRNO); }
  close (fname);
  if (nlin == 0) { arg_error(("file \"" fname "\" empty or missing")); }
  # printf "loaded %6d table pairs\n", ntbl > "/dev/stderr"
}

function tbl_error(f,n,msg)
{ 
  printf "file %s, line %d: %s\n", f, n, msg > "/dev/stderr";
  abort = 1;
  exit 1
}

function data_error(msg)
{
  printf "*** line %d: %s\n", FNR, msg > "/dev/stderr";
  abort = 1; exit abort;
}

function arg_error(msg)
{ 
  printf "%s\n", msg > "/dev/stderr";
  printf "usage: %s\n", usage > "/dev/stderr";
  abort = 1;
  exit 1
}