#! /usr/bin/gawk -f # Last edited on 2002-01-01 10:11:34 by stolfi # Extracts the plain text (with chapter titles and #-comments) from an EVT file. # In other words, removes the location codes and maps BLANK to blank. BEGIN { abort = -1; usage = ( "evt-to-txt \\\n" \ " < INFILE > OUTFILE" \ ); BLANK = ""; # Characters to be converted to space. NULL = ""; # Characters that need to be deleted. ndata = 0; # Number of data lines (excluding comments). } (abort >= 0) { exit abort; } /^[#] *[A-Z]+ *= *".*" *$/ { if (ndata > 0) { data_error("late charset defintion"); } } /^[#] *BLANK *= *".*" *$/ { BLANK = (BLANK get_val($0)); } /^[#] *NULL *= *".*" *$/ { NULL = (NULL get_val($0)); } function get_val(def) { # Extracts a charset from a charset-defining comment, # and turns it into something that can be placed # between "[]" in a pattern. gsub(/^[#][ ]*[A-Z]+ *= *"/, "", def); gsub(/" *$/, "", def); return(quote_special(def)); return def; } function quote_special(chars) { gsub(/[\\]/, "\\\\", chars); gsub(/[-]/, "\\-", chars); gsub(/[\]]/, "\\]", chars); gsub(/[\^]/, "\\^", chars); return chars; } /^[#][ ]*$/ { print ""; next; } /^[#]/ { print; next; } /^ *$/ { print ""; next; } /^[<]/ { if (ndata == 0) { make_patterns(); } ndata++; if (! match($0, /^<[a-zA-Z0-9.;]*>/)) { data_error("bad locator"); } loc = substr($0, 2, RLENGTH-2); lin = substr($0, RLENGTH+1); # Remove {}-comments: gsub(/[{][^{}]*[}]/, "", lin); # Remove NULL characters: if (NULL != "") { gsub(null_pat, "", lin); } # Note and remove the end-paragraph mark: endpar = match(lin, /[=] *$/); gsub(/[=][ ]*$/, "", lin); # Remove non-significant blanks gsub(/^[ ]+/, "", lin); gsub(/[ ]+$/, "", lin); # Convert BLANKs to spaces if (BLANK != "") { if (" " !~ blank_pat) { gsub(/[ ]/, "", lin); } gsub(blank_pat, " ", lin); } # Trim spaces at end (but *not* at the beginning): gsub(/[ ]+$/, "", lin); print lin; if (endpar && (! match(lin, /^ *$/))) { print ""; } next; } // { data_error("neither text nor comment"); } function make_patterns() { # Creates the patterns blank_pat and null_pat if (BLANK == "") { BLANK = " "; } blank_pat = ( "[" BLANK "]" ); printf "blank_pat = /%s/\n", blank_pat > "/dev/stderr"; if (NULL != "") { null_pat = ( "[" NULL "]" ); printf "null_pat = /%s/\n", null_pat > "/dev/stderr"; } } function arg_error(msg) { printf "%s\n" > "/dev/stderr"; printf "usage: %s\n", usage > "/dev/stderr"; abort=1; exit 1; } function data_error(msg) { printf "line %d: %s\n", FNR, msg > "/dev/stderr"; abort = 1; exit 1; }