#! /usr/bin/gawk -f
# Last edited on 2000-07-10 00:03:21 by stolfi

# Splits a multipage EVT-format file into one file per page,
# discarding #-comments and selecting the majority-vote version.
# Also writes to stdout a list of the files, in the order written

BEGIN {
  abort = -1;
  usage = "split-pages [-v outdir=DIR] [-v trcode=LETTER] < FILE > PAGELIST";
  
  if (trcode == "") { trcode = "A"; }
  if (! match(trcode, /^[A-Z]$/)) { error("bad trcode"); }
  
  ofnum = "";
  otrcd = "";
  ofile = "";
  ounln = "";
  if (outdir == "") { outdir = "."; }
}

(abort >= 0) { exit abort; }

/^#/ { next; }

/^ *$/ { next; }

/^[<][^<>. ]*[>]/ { next; }

/^[<][f][0-9][0-9]*[vr][0-9]*\.[A-Za-z][A-Za-z0-9]*\..*[>]/ {
  tmp = substr($0,2,index($0,">")-2);
  skip = 19;
  # Analyze and regularize location code: 
  gsub(/[.;]/, " ", tmp);
  nlocf = split(tmp, locf);
  if ((nlocf < 3) || (nlocf > 4)) error("bad location format");
  fnum = locf[1];
  unit = locf[2];
  line = locf[3];
  if (nlocf >= 4) 
    { trcd = locf[4]; }
  else
    { trcd = "X"; }

  if (skip >= length($0)) next;
  txt = substr($0,1+skip);
  
  loc = sprintf ("<%s.%s.%s;%s>", fnum, unit, line, trcd);
  unln = ( unit "." line );

  if ( fnum != ofnum )
    { # New page
      oout();
      if (ofile != "") { close(ofile); }
      printf "%s\n", fnum;
      ofile = ( outdir "/" fnum ".evt" );
      ofnum = fnum;
      ounln = unln;
    }
  else if ( ounln != unln )
    { # Same page, new line
      oout();
      ounln = unln;
    }
  
  if (trcd == trcode) { oloc = loc; otxt = txt; otrcd = trcd; }
  next;
}

/./ { 
  error("bad location code");
}

END{
  if (abort >= 0) { exit abort; }
  oout();
  if (ofile != "") { close(ofile); }
}


function oout()
{ 
  # Writes the current line "(oloc, otxt)" to file "ofile"
  # and clears it.
  if (ounln != "") 
    { # We do have a current line.
      if (otrcd == "") { error(("missing majority version of " oloc)); }
      if (ofile == "") { error(("file not open???")); }
      printf "%-19s%s\n", oloc, otxt >> ofile;
      oloc = ""; otxt = ""; otrcd = "";
    }
}

function error(msg)
{ 
  printf "line %d: %s\n", NR, msg > "/dev/stderr";
  printf "usage: %s\n", usage > "/dev/stderr";
  abort = 1;
  exit abort;
}