#! /usr/bin/gawk -f # Last edited on 2012-02-08 05:34:22 by stolfilocal # Reads a file in ".wds" format, prints the text in plain. BEGIN { abort = -1; usage = ( ARGV[0] " \\\n" \ " < some.wds > some.txt" \ ); # # See "wds-format.txt" for a description of the input file format. nlines = 0; # Number of lines read. nwdin = 0; # Number of words/symbols read. nptin = 0; # Number of punctuation symbols read. olin = ""; # Current output line. ital = 0; # 1 within italic text. } (abort >= 0) { exit abort; } // { # Get rid of funny spaces gsub(/[\011\014\015\240]/, " "); # Remove trailing blanks: gsub(/[ ]+$/, ""); nlines++; } /^[\#][ ]*$/ { # Comment line, ignore: next; } /^[\$][ ]/ { # Section start, handle as end of paragraph: output_olin(); output_olin(); next; } /^[@\$\#][ ]/ { # Internal info or comment line, ignore: next; } /^[asp][ ]/ { # Alpha, symbol, or punctuation, output it: # Grab the type and word: type = substr($0, 1, 1); word = substr($0, 3); # Get the last character {prev} on current line: prev = (olin == "" ? "" : substr(olin, length(olin), 1)); # Decide the spaces {befo} to add before before (or flush at end of parag and set {word} to nil): if (type == "p") { # Punctuation: nptin++; if (word == "÷") { # End of paragraph: output_olin(); output_olin(); ital = 0; next; } else { if (word == "_") { if (ital) { befo = ""; word = "}"; ital = 0; } else { befo = " "; word = "\\emph{"; ital = 1; } } else if (word == "(") { befo = (prev == "(" ? "" : " "); } else if (word == "-") { befo = (prev == "-" ? "" : " "); } else if (word == "`") { befo = ((prev == "(") || (prev == "`") ? "" : " "); } else { befo = ""; } } } else { # Alpha or symbol: nwdin++; befo = ((prev == "(") || (prev == "`") || (prev == "-") || (prev == "{") ? "" : " "); } # No blanks needed at begin-of-line: if (prev == "") { befo = ""; } # Recode some symbols: gsub(/[_]/, "/", word); gsub(/[~]/, "-", word); gsub(/\^/, ".", word); # Does it fit in the current line? if ((befo == "") || (length(olin) + length(befo) + length(word) < 72)) { # Append {word} with its space before: olin = (olin befo word); } else { # Flush the line and append {word} without space: output_olin(); olin = word; } next; } // { data_error(("bad line format")); next; } END { if (abort >= 0) { exit abort; } if (olin != "") { output_olin(); } printf "%8d lines read\n", nlines > "/dev/stderr"; printf "%8d words/symbols read\n", nwdin > "/dev/stderr"; printf "%8d punctuation read\n", nptin > "/dev/stderr"; } function output_olin( ) { print olin; olin = ""; } function arg_error(msg) { printf "%s\n", msg > "/dev/stderr"; printf "usage: %s\n", usage > "/dev/stderr"; abort = 1; exit 1; } function data_warning(msg) { printf "line %d: %s\n", FNR, msg > "/dev/stderr"; } function data_error(msg) { printf "line %d: %s\n", FNR, msg > "/dev/stderr"; printf " %s\n", $0 > "/dev/stderr"; abort = 1; exit 1; } function tbl_error(f,n,msg) { printf "file %s, line %d: %s\n", f, n, msg > "/dev/stderr"; abort = 1; exit 1 }