#! /bin/gawk -f # Last edited on 2004-02-01 02:14:23 by stolfi # Converts a text frm "org" format to extended EVT format BEGIN { abort = -1; usage = ( "org-to-evt \\\n" \ " [ -v omitControls={0|1} ] \\\n" \ " [ -v omitBlanks={0|1} ] \\\n" \ " [ -v inSpace=CHAR ] \\\n" \ " [ -v outSpace=CHAR ] \\\n" \ " < main.org > main.evt" \ ); # Input: # Comment lines start with "#". # Directives start with "@" in column 1: # # @verbatim = input is already in EVT format, just copy it. # @format NC NU NL = digits to use for chapter, unit, line # @part X NAME = begin of new part, code X # @chapter NUM NAME = begin new chapter # @unit U NUM NAME = begin new unit of type U # @= = paragraph boundary # @end = end of text # @include FILE = include external comments file # # The NAME above is for documentation purposes only. # The chapter NUM can be absolute, or a signed increment # relative to the number of the previous chapter. # Text lines may not include "@" or "#". # # If not verbatim: # "inSpace" characters (default Ascii SP) and puntuation separates words. # Leading and trailing blanks and "inSpace"s will be discarded. # Repeated "inSpace"s will be converted to single ones. # Each "inSpace" is replaced by an "outSpace" (default "_"). # Output lines will be numbered with where # X is a letter identifying the part of the book # ccc is the chapter number (sequential within the part) # U is a letter identifying the unit's type # vv is the unit number (sequential within the chapter) # nnn is the line number within the unit. # A comment "## " will be inserted before each chapter. # If "omitControls" is TRUE, omits "@" lines from the output. # # In any case: # If "omitBlanks" is TRUE, omits blank lines, else turns them into # blank comments. if (omitControls == "") { omitControls = 0; } if (omitBlanks == "") { omitBlanks = 0; } if (inSpace == "") { inSpace = " "; } if (outSpace == "") { outSpace = "_"; } curpage = ""; # Current page number (indicative only). curpart = ""; # Current part number. nparts = 0; # Number of completed nonempty parts. curchapter = ""; # Current chapter number. nchapters = ""; # Number of completed nonempty chapters. curunit = ""; # Current unit number within chapter. nunits = ""; # Number of completed nonempty units in chapter. nlines = 0; # Number of lines already seein in current unit. nparlines = 0; # Number of lines already seen in current parag. lastwasblank = 0; # TRUE if last line written was a blank #-comment. verbatim = 0; # Default locator format: chapterDigits = 3; unitDigits = 2; lineDigits = 3; } (abort >= 0) { exit abort; } /^ *$/ { if (! omitBlanks) { output_comment_line("#"); } next; } (verbatim) { print; next; } /^ *[\#]/ { $0 = cleanup_comment($0); if ($2 == "BLANK") { $0 = ("# BLANK = \"" outSpace "\""); } lin = $0; output_comment_line(lin); next; } /.+[@\#]/ { data_error("@ or # not on column 1"); } /./ { # General contents and control line cleanup gsub(/^[ ]+/, "", $0); gsub(/[ ]+$/, "", $0); gsub(/[ ]+/, " ", $0); } /^[@]verbatim[ ]*$/ { # Close any open parts/chapters etc, copy rest of file verbatim end_current_part(); verbatim = 1; output_control_line($0); next; } /^[@]format[ ]+[0-9]+[ ]+[0-9]+[ ]+[0-9]+[ ]*$/ { # Locator format parameters chapterDigits = $2 + 0; unitDigits = $3 + 0; lineDigits = $4 + 0; output_control_line($0); next; } /^[@]part[ ]+[A-Za-z][ ]+[^ ]+[ ]*$/ { # Start a new part pt_tag = $2; pt_nam = $3; end_current_part(); output_control_line($0); begin_new_part(pt_tag, pt_nam); next; } /^[@]chapter[ ]+[\+]?[0-9_]+[ ]+[^ ]+[ ]*$/ { # Start a new chapter ch_num = $2; ch_nam = $3; if (ch_num ~ /^[+]/) { ch_num = (curchapter == "" ? 1 : curchapter + ch_num); } end_current_chapter(); output_control_line($0); begin_new_chapter(ch_num, ch_nam); next; } /^[@]unit[ ]+[A-Z][ ]+[+]?[0-9]+[ ]+[^ ]+[ ]*$/ { # Start (or continuation) of current unit un_tag = $2; un_num = $3; un_name = $4; if (un_num ~ /^[+]/) { un_num = (curunit == "" ? 1 : curunit + un_num); } end_current_unit(); output_control_line($0); begin_new_unit(un_tag, un_num, un_name); next; } /^[@][=]([ ]|$)/ { # Paragraph delimiter end_current_parag(); begin_new_parag(); next; } /^[@]include[ ]+[-/_.~A-Za-z0-9]+[ ]*$/ { # Include external comments file (no nesting, no processing): fname = $2; insert_file(fname); next; } /^[@]end[ ]*$/ { # End of text end_current_part(); next; } /^[@]/ { data_error(("unknown @ directive \"" $0 "\"")); } /./ { # Contents line, phew! output_contents_line($0); next; } END { if (abort >= 0) { exit abort; } if (! verbatim) { end_current_part(); } } function end_current_part() { # Finishes off the current part, increments "nparts" if appropriate. if (curpart != "") { end_current_chapter(); if (nchapters > 0) { nparts++; } curpart = ""; nchapters = ""; printf "\n}\n" > "/dev/stderr"; } else { if (curchapter != "") { data_error("inconsistent curchapter (0)"); } if (nchapters != "") { data_error("inconsistent nchapters (0)"); } } } function end_current_chapter() { # Finishes off the current chapter, increments "nchapters" if appropriate. if (curchapter != "") { end_current_unit(); if (nunits > 0) { nchapters++; } curchapter = ""; nunits = ""; printf "]\n" > "/dev/stderr"; } else { if (curunit != "") { data_error("inconsistent curunit (0)"); } if (nunits != "") { data_error("inconsistent nunits (0)"); } } } function end_current_unit() { # Finishes off the current unit, and defines the default "nunits". if (curunit != "") { if (nlines > 0) { nunits++; printf "%d", nlines > "/dev/stderr"; } curunit = ""; curunittype = ""; nlines = 0; printf ")" > "/dev/stderr"; } else { if (nlines > 0) { data_error("inconsistent curunit (1)"); } } } function end_current_parag() { # Finishes off the current paragraph, adding "=" if necessary, if (nparlines > 0) { output_contents_line("="); } nparlines = 0; } function begin_new_parag() { # Starts a new paragraph. nparlines = 0; } function begin_new_unit(newtype,unitnumber,unitname) { # Initializes a new unit of the given type and number. # Assumes that the previous unit has been finished, and that # nunits is the number of complete nonempty units in chapter. if (curunit != "") { data_error("inconsistent curunit (2)"); } if (curunittype != "") { data_error("inconsistent curunittype (2)"); } curunit = unitnumber; curunittype = newtype; output_comment_line(sprintf("# unit %s %s", curunittype, unitname)); printf "(%s%d:", curunittype, curunit > "/dev/stderr"; nlines = 0; } function begin_new_chapter(chapternum,chaptername) { if (curpart == "") { data_error("unspecified part (2)"); } if (curchapter != "") { data_error("inconsistent curchapter (2)"); } curchapter = chapternum + 0; if (! lastwasblank) { output_comment_line("#"); } output_comment_line(sprintf("## <%s%0*d>", curpart, chapterDigits, curchapter)); output_comment_line(sprintf("# chapter %*d %s", chapterDigits, curchapter, chaptername)); printf " [%d=", curchapter > "/dev/stderr"; nunits = 0; begin_new_parag(); } function begin_new_part(newpart,partname) { if (curpart != "") { data_error("inconsistent curpart (3)"); } curpart = newpart; output_comment_line(sprintf("# part %s %s", curpart, partname)); printf "\n{%s\n", curpart > "/dev/stderr"; nchapters = 0; } function output_contents_line(lin, xch,xun,xli,loc) { nlines++; nparlines++; curline = nlines; if (curpart == "") { data_error(("unspecified part - line = \"" $0 "\"")); } if (curchapter == "") { data_error("unspecified chapter"); } if (curunit == "") { data_error("unspecified unit"); } gsub(/[ ]+/, "_", lin); xch = sprintf("%s%0*d", curpart, chapterDigits, curchapter); if (length(xch) != chapterDigits+1) { data_error(("chapter number overflow \"" xch "\"")); } xun = sprintf("%s%0*d", curunittype, unitDigits, curunit); if (length(xun) != unitDigits+1) { data_error(("unit number overflow \"" xun "\"")); } xli = sprintf("%0*d", lineDigits, curline); if (length(xli) != lineDigits) { data_error(("line number overflow \"" xli "\"")); } loc = sprintf("<%s.%s.%s>", xch, xun, xli); printf "%-18s %s\n", loc, lin; lastwasblank = 0; } function cleanup_comment(lin) { # Cleanses the current line, ensures it is a comment gsub(/^ */, "", lin); if (lin !~ /^[\#]/) { lin = ("# " lin); } return lin; } function output_comment_line(lin) { # Outputs the comment line `lin', and sets `lastwasblank' gsub(/[ ]+$/, "", lin); if (lin !~ /^[#]/) { data_error("bad comment"); } print lin; lastwasblank = (lin ~ /^[#][ ]*$/); } function output_control_line(lin) { # Outputs an "@" line if (! omitControls) { print lin; } } function insert_file(file, nRead,lin,fld,nfld) { # Copies the specified file into the output stream, as comments nRead=0; while((getline lin < file) > 0) { lin = cleanup_comment(lin); print lin; nRead++; } if (ERRNO != "0") { data_error((file ": " ERRNO)); } close (file); if (nRead == 0) { arg_error(("file \"" file "\" empty or missing")); } printf "included %6d lines from %s\n", nRead, file > "/dev/stderr" } function arg_error(msg) { printf "%s\n", msg > "/dev/stderr"; printf "usage: %s\n", usage > "/dev/stderr"; abort = 1; exit 1; } function data_error(msg) { printf "line %d: %s\n", FNR, msg > "/dev/stderr"; abort = 1; exit 1; }