#! /usr/bin/gawk -f
# Last edited on 1998-07-23 04:11:37 by stolfi

# usage: pick-best-labels [-v trcodes=STRING] < INFILE.idx > OUTFILE.idx
#
# Reads a label/title index file and selects the "best" transliteration
# for each label. 

BEGIN {
  abort = -1;
  FS = "|";
  OFS = "|";
  if (trcodes == "") { trcodes = "UVZABENOPRSWXYKQLMRJITFGCD"; }
  len = length(trcodes);
  if ( len != 26 ) { error(("bad trcodes")); }
  for (i=1; i<=len; i++) { c = substr(trcodes, i, 1); trpri[c] = i; }
  oclr();
}

function oclr()
{
  # Clears the line buffer
  oposn = ""; # Textual order
  osect = ""; # Section name
  ofnum = ""; # Page f-number
  ounit = ""; # Textual unit tag 
  oline = ""; # Line number
  otrcd = ""; # Transcriber's code
  olabl = ""; # The label 
  ogrov = ""; # Grove's alternate encoding
  okind = ""; # One-letter subject code
  owhat = ""; # Object labeled
  onote = ""; # Comments or "-"
  oloc = "";
}

function oget()
{
  # Copies the current line to the line buffer
  oposn =  $1;
  osect =  $2;
  ofnum =  $3;
  ounit =  $4;
  oline =  $5;
  otrcd =  $6;
  olabl =  $7;
  ogrov =  $8;
  okind =  $9;
  owhat = $10;
  onote = $11;
  oloc = sprintf ("<%s.%s.%s>", ofnum, ounit, oline);
}

function oout()
{ 
  # Writes the line buffer and clears it.
  if (oposn != "") 
    { print oposn, osect, ofnum, ounit, oline, otrcd, olabl, ogrov, okind, owhat, onote;
      oclr();
    }
}

/^#/ { if (abort) { exit 1; } next; }

/./ {
  if (abort >= 0) { exit abort; }
  if (NF != 11)
    { error(("line " NR ": bad field count")); }
  
  fnum = $3; unit = $4; line = $5; trcd = $6;
  
  if (! match(fnum, /^f[0-9][0-9]*[vr][0-9]*$/)) 
    { error(("line " NR ": bad page f-number")); }
  if (! match(trcd, /^[A-Z]$/)) 
    { error(("line " NR ": bad transcriber code")); }
  
  loc = sprintf ("<%s.%s.%s>", fnum, unit, line);

  if ( loc != oloc )
    { oout(); oget(); }
  else if (trpri[trcd] < trpri[otrcd])
    { oget(); }
}

END{ 
  if (abort >= 0) { exit abort; }
  oout();
}

function error(msg)
{ 
  printf "%s\n", msg > "/dev/stderr"
  abort = 1
  exit
}