#! /bin/gawk -f
# Last edited on 2004-02-01 02:14:23 by stolfi

# Converts a text frm "org" format to extended EVT format

BEGIN {
  abort = -1;
  usage = ( "org-to-evt \\\n" \
    "  [ -v omitControls={0|1} ] \\\n" \
    "  [ -v omitBlanks={0|1} ] \\\n" \
    "  [ -v inSpace=CHAR ] \\\n" \
    "  [ -v outSpace=CHAR ] \\\n" \
    "  < main.org > main.evt" \
  );
  
  # Input:
  #   Comment lines start with "#".
  #   Directives start with "@" in column 1:
  #
  #     @verbatim          = input is already in EVT format, just copy it.
  #     @format NC NU NL   = digits to use for chapter, unit, line
  #     @part X NAME       = begin of new part, code X
  #     @chapter NUM NAME  = begin new chapter
  #     @unit U NUM NAME   = begin new unit of type U
  #     @=                 = paragraph boundary
  #     @end               = end of text
  #     @include FILE      = include external comments file
  #
  #   The NAME above is for documentation purposes only.
  #   The chapter NUM can be absolute, or a signed increment
  #     relative to the number of the previous chapter.
  #   Text lines may not include "@" or "#".
  #
  # If not verbatim:
  #   "inSpace" characters (default Ascii SP) and puntuation separates words.
  #   Leading and trailing blanks and "inSpace"s will be discarded. 
  #   Repeated "inSpace"s will be converted to single ones.
  #   Each "inSpace" is replaced by an "outSpace" (default "_").
  #   Output lines will be numbered with <Xccc.Uvv.nnn> where
  #     X is a letter identifying the part of the book
  #     ccc is the chapter number (sequential within the part)
  #     U is a letter identifying the unit's type
  #     vv is the unit number (sequential within the chapter)
  #     nnn is the line number within the unit.
  #   A comment "## <Xccc>" will be inserted before each chapter.
  #   If "omitControls" is TRUE, omits "@" lines from the output.
  # 
  # In any case:
  #   If "omitBlanks" is TRUE, omits blank lines, else turns them into 
  #     blank comments.
  
  if (omitControls == "") { omitControls = 0; }
  if (omitBlanks == "") { omitBlanks = 0; }
  if (inSpace == "") { inSpace = " "; }
  if (outSpace == "") { outSpace = "_"; }

  curpage = "";        # Current page number (indicative only).

  curpart = "";        # Current part number.
  nparts = 0;          # Number of completed nonempty parts.

  curchapter = "";     # Current chapter number.
  nchapters = "";      # Number of completed nonempty chapters.

  curunit = "";        # Current unit number within chapter.
  nunits = "";         # Number of completed nonempty units in chapter.

  nlines = 0;          # Number of lines already seein in current unit.

  nparlines = 0;       # Number of lines already seen in current parag.
  lastwasblank = 0;    # TRUE if last line written was a blank #-comment.
  
  verbatim = 0;

  # Default locator format:
  chapterDigits = 3;
  unitDigits = 2;
  lineDigits = 3;
}

(abort >= 0) { exit abort; }

/^ *$/ { 
  if (! omitBlanks) { output_comment_line("#"); }
  next;
}

(verbatim) { print; next; }

/^ *[\#]/ {
  $0 = cleanup_comment($0);
  if ($2 == "BLANK") { $0 = ("# BLANK = \"" outSpace "\""); }
  lin = $0; 
  output_comment_line(lin);
  next;
}

/.+[@\#]/ { data_error("@ or # not on column 1"); }

/./ { 
  # General contents and control line cleanup
  gsub(/^[ 	]+/, "", $0);
  gsub(/[ 	]+$/, "", $0);
  gsub(/[ 	]+/, " ", $0);
}

/^[@]verbatim[ ]*$/ {
  # Close any open parts/chapters etc, copy rest of file verbatim
  end_current_part();
  verbatim = 1;
  output_control_line($0);
  next;
}

/^[@]format[ ]+[0-9]+[ ]+[0-9]+[ ]+[0-9]+[ ]*$/ {
  # Locator format parameters
  chapterDigits = $2 + 0;
  unitDigits = $3 + 0;
  lineDigits = $4 + 0;
  output_control_line($0);
  next;
}

/^[@]part[ ]+[A-Za-z][ ]+[^ ]+[ ]*$/ {
  # Start a new part
  pt_tag = $2; pt_nam = $3;
  end_current_part();
  output_control_line($0);
  begin_new_part(pt_tag, pt_nam);
  next;
}

/^[@]chapter[ ]+[\+]?[0-9_]+[ ]+[^ ]+[ ]*$/ {
  # Start a new chapter
  ch_num = $2; ch_nam = $3;
  if (ch_num ~ /^[+]/) 
    { ch_num = (curchapter == "" ? 1 : curchapter + ch_num); }
  end_current_chapter();
  output_control_line($0);
  begin_new_chapter(ch_num, ch_nam);
  next;
}

/^[@]unit[ ]+[A-Z][ ]+[+]?[0-9]+[ ]+[^ ]+[ ]*$/ {
  # Start (or continuation) of current unit
  un_tag = $2; un_num = $3; un_name = $4;
  if (un_num ~ /^[+]/) 
    { un_num = (curunit == "" ? 1 : curunit + un_num); }
  end_current_unit();
  output_control_line($0);
  begin_new_unit(un_tag, un_num, un_name);
  next;
}

/^[@][=]([ ]|$)/ {
  # Paragraph delimiter
  end_current_parag();
  begin_new_parag();
  next;
}

/^[@]include[ ]+[-/_.~A-Za-z0-9]+[ ]*$/ {
  # Include external comments file (no nesting, no processing):
  fname = $2;
  insert_file(fname);
  next;
}

/^[@]end[ ]*$/ {
  # End of text
  end_current_part();
  next;
}

/^[@]/ {
  data_error(("unknown @ directive \"" $0 "\""));
}

/./ {
  # Contents line, phew!
  output_contents_line($0);
  next;
}

END {
  if (abort >= 0) { exit abort; }
  if (! verbatim) { end_current_part(); }
}

function end_current_part()
{
  # Finishes off the current part, increments "nparts" if appropriate.
  if (curpart != "") 
    { end_current_chapter();
      if (nchapters > 0) 
        { nparts++; }
      curpart = "";
      nchapters = "";
      printf "\n}\n" > "/dev/stderr";
    }
  else
    { if (curchapter != "")
        { data_error("inconsistent curchapter (0)"); }
      if (nchapters != "")
        { data_error("inconsistent nchapters (0)"); }
    }
}

function end_current_chapter()
{
  # Finishes off the current chapter, increments "nchapters" if appropriate.
  if (curchapter != "") 
    { end_current_unit();
      if (nunits > 0) 
        { nchapters++; }
      curchapter = "";
      nunits = "";
      printf "]\n" > "/dev/stderr";
    }
  else
    { if (curunit != "")
        { data_error("inconsistent curunit (0)"); }
      if (nunits != "")
        { data_error("inconsistent nunits (0)"); }
    }
}

function end_current_unit()
{
  # Finishes off the current unit, and defines the default "nunits".
  if (curunit != "")
    { if (nlines > 0)
        { nunits++;
          printf "%d", nlines > "/dev/stderr"; 
        }
      curunit = "";
      curunittype = "";
      nlines = 0;
      printf ")" > "/dev/stderr";
    }
  else
    { if (nlines > 0) 
        { data_error("inconsistent curunit (1)"); }
    }
}

function end_current_parag()
{ # Finishes off the current paragraph, adding "=" if necessary,
  if (nparlines > 0) 
    { output_contents_line("="); }
  nparlines = 0;
}

function begin_new_parag()
{
  # Starts a new paragraph.
  nparlines = 0;
}

function begin_new_unit(newtype,unitnumber,unitname)
{ 
  # Initializes a new unit of the given type and number.
  # Assumes that the previous unit has been finished, and that
  # nunits is the number of complete nonempty units in chapter.
  if (curunit != "") { data_error("inconsistent curunit (2)"); }
  if (curunittype != "") { data_error("inconsistent curunittype (2)"); }
  curunit = unitnumber;
  curunittype = newtype;
  output_comment_line(sprintf("# unit %s %s", curunittype, unitname));
  printf "(%s%d:", curunittype, curunit > "/dev/stderr";
  nlines = 0;
}

function begin_new_chapter(chapternum,chaptername)
{
  if (curpart == "") { data_error("unspecified part (2)"); }
  if (curchapter != "") { data_error("inconsistent curchapter (2)"); }
  curchapter = chapternum + 0;
  if (! lastwasblank) { output_comment_line("#"); }
  output_comment_line(sprintf("## <%s%0*d>", curpart, chapterDigits, curchapter));
  output_comment_line(sprintf("# chapter %*d %s", chapterDigits, curchapter, chaptername));
  printf "  [%d=", curchapter > "/dev/stderr";
  nunits = 0;
  begin_new_parag();
}

function begin_new_part(newpart,partname)
{
  if (curpart != "") { data_error("inconsistent curpart (3)"); }
  curpart = newpart;
  output_comment_line(sprintf("# part %s %s", curpart, partname));
  printf "\n{%s\n", curpart > "/dev/stderr";
  nchapters = 0;
}

function output_contents_line(lin,   xch,xun,xli,loc)
{
  nlines++;
  nparlines++;
  curline = nlines;
  if (curpart == "")    { data_error(("unspecified part - line = \"" $0 "\"")); }
  if (curchapter == "") { data_error("unspecified chapter"); }
  if (curunit == "")    { data_error("unspecified unit"); }
  gsub(/[ 	]+/, "_", lin);
  xch = sprintf("%s%0*d", curpart, chapterDigits, curchapter);
  if (length(xch) != chapterDigits+1)
    { data_error(("chapter number overflow \"" xch "\"")); }
  xun = sprintf("%s%0*d", curunittype, unitDigits, curunit);
  if (length(xun) != unitDigits+1) 
    { data_error(("unit number overflow \"" xun "\"")); }
  xli = sprintf("%0*d", lineDigits, curline);
  if (length(xli) != lineDigits)
    { data_error(("line number overflow \"" xli "\"")); }
  loc = sprintf("<%s.%s.%s>", xch, xun, xli);
  printf "%-18s %s\n", loc, lin;
  lastwasblank = 0;
}

function cleanup_comment(lin)
{
  # Cleanses the current line, ensures it is a comment
  gsub(/^ */, "", lin);
  if (lin !~ /^[\#]/) { lin = ("# " lin); }
  return lin;
}

function output_comment_line(lin)
{ # Outputs the comment line `lin', and sets `lastwasblank'
  gsub(/[ 	]+$/, "", lin);
  if (lin !~ /^[#]/) { data_error("bad comment"); }
  print lin;
  lastwasblank = (lin ~ /^[#][ ]*$/);
}

function output_control_line(lin)
{ # Outputs an "@" line
  if (! omitControls) { print lin; }
}

function insert_file(file,    nRead,lin,fld,nfld)
{
  # Copies the specified file into the output stream, as comments
  
  nRead=0;
  while((getline lin < file) > 0) 
    { 
      lin = cleanup_comment(lin);
      print lin;
      nRead++;
    }
  if (ERRNO != "0") { data_error((file ": " ERRNO)); }
  close (file);
  if (nRead == 0) { arg_error(("file \"" file "\" empty or missing")); }
  printf "included %6d lines from %s\n", nRead, file > "/dev/stderr"
}

function arg_error(msg)
{
  printf "%s\n", msg > "/dev/stderr";
  printf "usage: %s\n", usage > "/dev/stderr";
  abort = 1;
  exit 1;
}

function data_error(msg)
{
  printf "line %d: %s\n", FNR, msg > "/dev/stderr";
  abort = 1; exit 1;
}