#! /usr/bin/gawk -f
# Last edited on 2002-01-02 02:56:28 by stolfi
# Preprocess the Ge`ez "Glory of the Kings"
BEGIN {
abort = -1;
usage = ( "geez-gok-process \\\n" \
" < INFILE.txt > OUTFILE.evt" \
);
# Lines will be numbered with <Xppp.U.nnn> where
# X is a letter identifying the part of the book (a,b,c...)
# ppp is the chapter number (sequential theough book)
# U is a letter identifying the unit type
# nnn is the line number within the unit
curpart = "c"; # Current part number.
curchapter = ""; # Current chapter number.
nchapters = 0; # Number of completed nonempty chapters.
curunit = ""; # Current unit number within chapter.
nchapunits = ""; # Number of completed nonempty units in chapter.
nunitlines = 0; # Number of lines already seein in current unit.
nparlines = 0; # Number of lines already seen in current parag.
lastwasblank = 0; # TRUE if last line written was a blank #-comment.
}
(abort >= 0) { exit abort; }
/^ *[#]/ {
lin = $0; gsub(/^ */,"",lin);
output_comment_line(lin);
next;
}
/^ *$/ { output_comment_line("#"); next; }
/.+[@]/ { data_error("@ not on column 1"); }
/./ {
# General contents and control line cleanup
gsub(/^[ ]+/, "", $0);
gsub(/[ ]+$/, "", $0);
gsub(/[ ]+/, " ", $0);
}
/^[@]chapter([ ]|$)/ {
# start a new chapter
end_current_chapter();
output_control_line($0);
begin_new_chapter();
next;
}
/^[@]chapnum([ ]|$)/ {
# start of chapter number
end_current_unit();
output_control_line($0);
begin_new_unit("B");
next;
}
/^[@]chaptitle([ ]|$)/ {
# start of chapter title
end_current_unit();
output_control_line($0);
begin_new_unit("C");
next;
}
/^[@][=]([ ]|$)/ {
# paragraph delimiter
end_current_parag();
begin_new_parag();
next;
}
/^[@]text([ ]|$)/ {
# Start (or continuation) of running text block
if (curunittype != "P")
{ end_current_unit();
output_control_line($0);
begin_new_unit("P");
}
next;
}
/^ *$/ {
next;
}
/^[@]/ {
data_error("unknown @ directive");
}
/./ {
# Contents line, phew!
output_contents_line(($0 " "));
next;
}
END {
if (abort >= 0) { exit abort; }
end_current_chapter();
}
function end_current_chapter()
{
# Finishes off the current chapter, and increments "nchapters".
if (curchapter != "")
{ end_current_unit();
if (nchapunits > 0)
{ nchapters++; }
curchapter = "";
nchapunits = "";
printf "]\n" > "/dev/stderr";
}
else
{ if (curunit != "")
{ data_error("inconsistent curunit (0)"); }
if (nchapunits != "")
{ data_error("inconsistent nchapunits (0)"); }
}
}
function end_current_unit()
{
# Finishes off the current unit, and defines the default "nchapunits".
do_end_current_unit();
}
function do_end_current_unit()
{
# Finishes off the current unit, and increments "nchapunits".
if (curunit != "")
{ end_current_parag();
if (nunitlines > 0)
{ nchapunits++;
printf "%d", nunitlines > "/dev/stderr";
}
curunit = "";
curunittype = "";
nunitlines = 0;
printf ")" > "/dev/stderr";
}
else
{ if (nunitlines > 0)
{ data_error("inconsistent curunit (1)"); }
}
}
function end_current_parag()
{ # Finishes off the current paragraph, adding "@=" if necessary,
if (nparlines > 0)
{ output_contents_line("="); }
nparlines = 0;
}
function begin_new_parag()
{
# Starts a new paragraph.
nparlines = 0;
}
function begin_new_unit(newtype)
{
# Initializes a new unit of the given type.
# Assumes that the previous unit has been finished, and that
# nchapunits is the number of complete nonempty units in chapter.
if (curunit != "") { data_error("inconsistent curunit (2)"); }
if (curunittype != "") { data_error("inconsistent curunittype (2)"); }
curunit = nchapunits + 1;
curunittype = newtype;
printf "(%s%d:", curunittype, curunit > "/dev/stderr";
nunitlines = 0;
if (! lastwasblank) { output_comment_line("#"); }
begin_new_parag();
}
function begin_new_chapter()
{
if (curpart == "") { data_error("unspecified part (2)"); }
if (curchapter != "") { data_error("inconsistent curchapter (2)"); }
curchapter = nchapters;
output_comment_line(sprintf("## <%s%03d>", curpart, curchapter));
printf "[%d=", curchapter > "/dev/stderr";
nchapunits = 0;
}
function output_contents_line(lin, loc)
{
nunitlines++;
nparlines++;
curline = nunitlines;
if (curpart == "") { data_error("unspecified part"); }
if (curchapter == "") { data_error("unspecified chapter"); }
if (curunit == "") { data_error("unspecified unit"); }
gsub(/[ ]+/, "_", lin);
loc = sprintf("<%s%03d.%s%02d.%03d>",
curpart, curchapter, curunittype, curunit, curline);
printf "%-18s %s\n", loc, lin;
lastwasblank = 0;
}
function output_comment_line(lin)
{ # Outputs the comment line `lin', and sets `lastwasblank'
gsub(/[ ]+$/, "", lin);
if (lin !~ /^[#]/) { data_error("bad comment"); }
lastwasblank = 0;
if (lin ~ /^[#][ ]*$/) { lastwasblank = 1; }
print lin;
}
function output_control_line(lin)
{ # Outputs an "@" line
print lin;
}
function arg_error(msg)
{
printf "%s\n", msg > "/dev/stderr";
printf "usage: %s\n", usage > "/dev/stderr";
abort = 1;
exit 1;
}
function data_error(msg)
{
printf "line %d: %s\n", FNR, msg > "/dev/stderr";
abort = 1; exit 1;
}