#! /usr/bin/gawk -f # Last edited on 2001-03-10 23:11:09 by stolfi # Process the "Dream of a Red Mansion" in decimal GB-code BEGIN { abort = -1; usage = ( "chin-red-process \\\n" \ " < INFILE.txt > OUTFILE.evt" \ ); # Lines will be numbered with where # ppp is the chapter number (sequential through book) # U is a letter identifying the unit type # nnn is the line number within the unit curpart = "c"; # Current part number. curchapter = ""; # Current chapter number. nchapters = 0; # Number of completed nonempty chapters. curunit = ""; # Current unit number within chapter. nchapunits = ""; # Number of completed nonempty units in chapter. nunitlines = 0; # Number of lines already seein in current unit. nparlines = 0; # Number of lines already seen in current parag. lastwasblank = 0; # TRUE if last line written was a blank #-comment. } (abort >= 0) { exit abort; } /^ *[#]/ { lin = $0; gsub(/^ */,"",lin); output_comment_line(lin); next; } /^ *$/ { output_comment_line("#"); next; } /.+[@]/ { data_error("@ not on column 1"); } /./ { # General contents and control line cleanup gsub(/^[ ]+/, "", $0); gsub(/[ ]+$/, "", $0); gsub(/[ ]+/, " ", $0); } /^[@]chapter([ ]|$)/ { # start a new chapter end_current_chapter(); output_control_line($0); begin_new_chapter(); next; } /^[@]chaptitle([ ]|$)/ { # start of chapter title end_current_unit(); output_control_line($0); begin_new_unit("C"); next; } /^[@][=]([ ]|$)/ { # paragraph delimiter end_current_parag(); begin_new_parag(); next; } /^[@]text([ ]|$)/ { # Start (or continuation) of running text block if (curunittype != "P") { end_current_unit(); output_control_line($0); begin_new_unit("P"); } next; } /^ *$/ { next; } /^[@]/ { data_error("unknown @ directive"); } /./ { # Contents line, phew! output_contents_line(($0 " ")); next; } END { if (abort >= 0) { exit abort; } end_current_chapter(); } function end_current_chapter() { # Finishes off the current chapter, and increments "nchapters". if (curchapter != "") { end_current_unit(); if (nchapunits > 0) { nchapters++; } curchapter = ""; nchapunits = ""; printf "]\n" > "/dev/stderr"; } else { if (curunit != "") { data_error("inconsistent curunit (0)"); } if (nchapunits != "") { data_error("inconsistent nchapunits (0)"); } } } function end_current_unit() { # Finishes off the current unit, and defines the default "nchapunits". do_end_current_unit(); } function do_end_current_unit() { # Finishes off the current unit, and increments "nchapunits". if (curunit != "") { end_current_parag(); if (nunitlines > 0) { nchapunits++; printf "%d", nunitlines > "/dev/stderr"; } curunit = ""; curunittype = ""; nunitlines = 0; printf ")" > "/dev/stderr"; } else { if (nunitlines > 0) { data_error("inconsistent curunit (1)"); } } } function end_current_parag() { # Finishes off the current paragraph, adding "@=" if necessary, if (nparlines > 0) { output_contents_line("="); } nparlines = 0; } function begin_new_parag() { # Starts a new paragraph. nparlines = 0; } function begin_new_unit(newtype) { # Initializes a new unit of the given type. # Assumes that the previous unit has been finished, and that # nchapunits is the number of complete nonempty units in chapter. if (curunit != "") { data_error("inconsistent curunit (2)"); } if (curunittype != "") { data_error("inconsistent curunittype (2)"); } curunit = nchapunits + 1; curunittype = newtype; printf "(%s%d:", curunittype, curunit > "/dev/stderr"; nunitlines = 0; if (! lastwasblank) { output_comment_line("#"); } begin_new_parag(); } function begin_new_chapter() { if (curpart == "") { data_error("unspecified part (2)"); } if (curchapter != "") { data_error("inconsistent curchapter (2)"); } curchapter = nchapters; output_comment_line(sprintf("## <%s%03d>", curpart, curchapter)); printf "[%d=", curchapter > "/dev/stderr"; nchapunits = 0; } function output_contents_line(lin, loc) { nunitlines++; nparlines++; curline = nunitlines; if (curpart == "") { data_error("unspecified part"); } if (curchapter == "") { data_error("unspecified chapter"); } if (curunit == "") { data_error("unspecified unit"); } gsub(/[ ]+/, "_", lin); loc = sprintf("<%s%03d.%s%02d.%03d>", curpart, curchapter, curunittype, curunit, curline); printf "%-18s %s\n", loc, lin; lastwasblank = 0; } function output_comment_line(lin) { # Outputs the comment line `lin', and sets `lastwasblank' gsub(/[ ]+$/, "", lin); if (lin !~ /^[#]/) { data_error("bad comment"); } lastwasblank = 0; if (lin ~ /^[#][ ]*$/) { lastwasblank = 1; } print lin; } function output_control_line(lin) { # Outputs an "@" line print lin; } function arg_error(msg) { printf "%s\n", msg > "/dev/stderr"; printf "usage: %s\n", usage > "/dev/stderr"; abort = 1; exit 1; } function data_error(msg) { printf "line %d: %s\n", FNR, msg > "/dev/stderr"; abort = 1; exit 1; }