#! /usr/bin/gawk -f
# Last edited on 2002-03-05 01:50:38 by stolfi

# Reads a file containing lines of the form 
# 
#   SEC USEQ FNUM UNIT LINE TRAN FPOS RPOS PFRST PLAST WORD HEAD TAG
#   1   2    3    4    5    6    7    8    9     10    11   12   13
# 
# Combines consecutive lines with same HEAD, TAG, WORD, FNUM, UNIT, and LINE 
# by concatenating the TRAN fields (possibly with repeat digits) and 
# ORing their PFRST and PLAST flags. Sets FPOS and RPOS to "*". 

BEGIN{
  abort = -1;
}

(abort >= 0) {exit abort;} 

(NF == 13){
  sec = $1; useq = $2; 
  fnum = $3; unit = $4; nlin = $5; tran = $6;
  fpos = $7; rpos = $8; 
  pfrst = $9; plast = $10; 
  word = $11;
  head = $12; tag = $13;
  if ((head == ohead) && (tag == otag) && \
      (word == oword) && (fnum == ofnum) && (unit == ounit) && \
      (nlin == onlin))
    { otran = concat_tran(otran,tran); 
      opfrst = (opfrst || pfrst);
      oplast = (oplast || plast);
    }
  else
    { output_line();
      osec = sec; ouseq = useq; 
      ofnum = fnum; ounit = unit; onlin = nlin; otran = tran;
      ofpos = "*"; orpos = "*"; 
      opfrst = pfrst; oplast = plast; 
      oword = word;
      ohead = head; otag = tag;
    }
  next;
}

END {
  if (abort >= 0) {exit abort;} 
  output_line();
}

function concat_tran(ot,t,  n,otp)
{
  if (ot == "")
    { return t; }
  else 
    { if (match(ot, /[0-9]+$/))
        { n = substr(ot, RSTART); otp = substr(ot,1,RSTART-1); }
      else
        { n = 1; otp = ot; }
      if (substr(otp,length(otp),1) == t) 
        { return (otp n+1); }
      else
        { return (ot t); }
    }
}

function output_line()
{
  if (ofnum != "") 
    { print osec, ouseq, ofnum, ounit, onlin, otran, \
        ofpos, orpos, opfrst, oplast, oword, ohead, otag; 
    }
}

/./{ data_error("bad line type"); }

function data_error(msg)
{
  printf "*** line %d: %s\n", FNR, msg > "/dev/stderr";
  abort = 1; exit abort;
}