#! /usr/bin/gawk -f
# Last edited on 2002-01-02 02:56:28 by stolfi

# Preprocess the Ge`ez "Glory of the Kings"

BEGIN {
  abort = -1;
  usage = ( "geez-gok-process \\\n" \
    "  < INFILE.txt > OUTFILE.evt" \
  );

  # Lines will be numbered with <Xppp.U.nnn> where
  # X is a letter identifying the part of the book (a,b,c...)
  # ppp is the chapter number (sequential theough book)
  # U is a letter identifying the unit type
  # nnn is the line number within the unit

  curpart = "c";       # Current part number.
  curchapter = "";     # Current chapter number.
  nchapters = 0;       # Number of completed nonempty chapters.
  curunit = "";        # Current unit number within chapter.
  nchapunits = "";     # Number of completed nonempty units in chapter.
  nunitlines = 0;      # Number of lines already seein in current unit.
  nparlines = 0;       # Number of lines already seen in current parag.
  lastwasblank = 0;    # TRUE if last line written was a blank #-comment.
}

(abort >= 0) { exit abort; }

/^ *[#]/ { 
  lin = $0; gsub(/^ */,"",lin); 
  output_comment_line(lin);
  next;
}

/^ *$/ { output_comment_line("#"); next; }

/.+[@]/ { data_error("@ not on column 1"); }

/./ { 
  # General contents and control line cleanup
  gsub(/^[ 	]+/, "", $0);
  gsub(/[ 	]+$/, "", $0);
  gsub(/[ 	]+/, " ", $0);
}

/^[@]chapter([ ]|$)/ {
  # start a new chapter
  end_current_chapter();
  output_control_line($0);
  begin_new_chapter();
  next;
}

/^[@]chapnum([ ]|$)/ {
  # start of chapter number
  end_current_unit();
  output_control_line($0);
  begin_new_unit("B");
  next;
}

/^[@]chaptitle([ ]|$)/ {
  # start of chapter title
  end_current_unit();
  output_control_line($0);
  begin_new_unit("C");
  next;
}

/^[@][=]([ ]|$)/ {
  # paragraph delimiter
  end_current_parag();
  begin_new_parag();
  next;
}

/^[@]text([ ]|$)/ {
  # Start (or continuation) of running text block
  if (curunittype != "P") 
    { end_current_unit();
      output_control_line($0);
      begin_new_unit("P");
    }
  next;
}

/^ *$/ {
  next;
}

/^[@]/ {
  data_error("unknown @ directive");
}

/./ {
  # Contents line, phew!
  output_contents_line(($0 " "));
  next;
}

END {
  if (abort >= 0) { exit abort; }
  end_current_chapter();
}

function end_current_chapter()
{
  # Finishes off the current chapter, and increments "nchapters".
  if (curchapter != "") 
    { end_current_unit();
      if (nchapunits > 0) 
        { nchapters++; }
      curchapter = "";
      nchapunits = "";
      printf "]\n" > "/dev/stderr";
    }
  else
    { if (curunit != "")
        { data_error("inconsistent curunit (0)"); }
      if (nchapunits != "")
        { data_error("inconsistent nchapunits (0)"); }
    }
}

function end_current_unit()
{
  # Finishes off the current unit, and defines the default "nchapunits".
  do_end_current_unit();
}

function do_end_current_unit()
{
  # Finishes off the current unit, and increments "nchapunits".
  if (curunit != "")
    { end_current_parag();
      if (nunitlines > 0)
        { nchapunits++;
          printf "%d", nunitlines > "/dev/stderr"; 
        }
      curunit = "";
      curunittype = "";
      nunitlines = 0;
      printf ")" > "/dev/stderr";
    }
  else
    { if (nunitlines > 0) 
        { data_error("inconsistent curunit (1)"); }
    }
}

function end_current_parag()
{ # Finishes off the current paragraph, adding "@=" if necessary,
  if (nparlines > 0) 
    { output_contents_line("="); }
  nparlines = 0;
}

function begin_new_parag()
{
  # Starts a new paragraph.
  nparlines = 0;
}

function begin_new_unit(newtype)
{ 
  # Initializes a new unit of the given type.
  # Assumes that the previous unit has been finished, and that
  # nchapunits is the number of complete nonempty units in chapter.
  if (curunit != "") { data_error("inconsistent curunit (2)"); }
  if (curunittype != "") { data_error("inconsistent curunittype (2)"); }
  curunit = nchapunits + 1;
  curunittype = newtype;
  printf "(%s%d:", curunittype, curunit > "/dev/stderr";
  nunitlines = 0;
  if (! lastwasblank) { output_comment_line("#"); }
  begin_new_parag();
}

function begin_new_chapter()
{
  if (curpart == "") { data_error("unspecified part (2)"); }
  if (curchapter != "") { data_error("inconsistent curchapter (2)"); }
  curchapter = nchapters;
  output_comment_line(sprintf("## <%s%03d>", curpart, curchapter));
  printf "[%d=", curchapter > "/dev/stderr";
  nchapunits = 0;
}

function output_contents_line(lin,   loc)
{
  nunitlines++;
  nparlines++;
  curline = nunitlines;
  if (curpart == "")    { data_error("unspecified part"); }
  if (curchapter == "") { data_error("unspecified chapter"); }
  if (curunit == "")    { data_error("unspecified unit"); }
  gsub(/[ 	]+/, "_", lin);
  loc = sprintf("<%s%03d.%s%02d.%03d>", 
    curpart, curchapter, curunittype, curunit, curline);
  printf "%-18s %s\n", loc, lin;
  lastwasblank = 0;
}

function output_comment_line(lin)
{ # Outputs the comment line `lin', and sets `lastwasblank'
  gsub(/[ 	]+$/, "", lin);
  if (lin !~ /^[#]/) { data_error("bad comment"); }
  lastwasblank = 0;
  if (lin ~ /^[#][ ]*$/) { lastwasblank = 1; }
  print lin;
}

function output_control_line(lin)
{ # Outputs an "@" line
  print lin;
}

function arg_error(msg)
{
  printf "%s\n", msg > "/dev/stderr";
  printf "usage: %s\n", usage > "/dev/stderr";
  abort = 1;
  exit 1;
}

function data_error(msg)
{
  printf "line %d: %s\n", FNR, msg > "/dev/stderr";
  abort = 1; exit 1;
}