#! /usr/bin/gawk -f # Last edited on 2016-05-09 23:28:02 by stolfilocal # Reads a file in ".wds" format, prints the text in plain. # !!! Maybe too specific to {port/cso} and {port/csm} !!! BEGIN { abort = -1; usage = ( ARGV[0] " \\\n" \ " -f recode-tex.gawk < some.wds > some.txt" \ ); # # See "wds-format.txt" for a description of the input file format. nlines = 0; # Number of lines read. nwdin = 0; # Number of words/symbols read. nptin = 0; # Number of punctuation symbols read. olin = ""; # Current output line. # Global state: # 0 waiting for {b} section (book start) # 1 after {b}, waiting for {b}{tpg} (title page). # 2 after {b}{tpg}, waiting for {b}{tpg}{tt} (title) # 3 after {b}{tpg}{tt}, waiting for word or {b}{tpg}{au} (author) # 4 after {b}{tpg}{au}, waiting for word or {b}{XX}.. (start of chapter) # 5 after {b}{XX}, waiting for {b}{XX}{cn} (chapter number) # 6 after {b}{XX}{cn}, waiting for word or {b}{XX}{tt} (chapter title) # 7 after {b}{XX}{tt}, waiting for word or subsections, *{tx}, {b}{YY}, or EOF. # 8 after {b}{XX}{tx}, waiting for word or subsections, *{tx}, {b}{YY}, or EOF. gstate = 0; # Paragraph state: intx = 0; # 1 within horizontal mode text, 0 vertical mode ital = 0; # 1 within italic text. math = 0; # 1 within math formula. pword = ""; # Previous unconverted word in the paragraph. ptype = ""; # The type ("a", "p", "s") of the previous word. printf "% Created by wds-to-tex.gawk\n" } (abort >= 0) { exit abort; } // { # Get rid of funny spaces gsub(/[\011\014\015\240]/, " "); # Remove trailing blanks: gsub(/[ ]+$/, ""); nlines++; } /^[\#][ ]*$/ { # Comment line, ignore: next; } # ---------------------------------------------------------------------- # Book /^[\$][ ]+{b} *$/ { # Start of book if ((gstate != 0) || (intx != 0)) { data_warning(("misplaced start-of-book")); } gstate = 1; next; } # ---------------------------------------------------------------------- # Title page /^[\$][ ].*{tpg} *$/ { # Start of title page if ((gstate != 1) || (intx != 0)) { data_warning(("misplaced title page")); } if ((ital != 0) || (math != 0)) { data_warning(("funny {math,ital} state")); } gstate = 2; next; } /^[\$][ ].*{tpg}{tt} *$/ { # Start of book title: if ((gstate != 2) || (intx != 0)) { data_warning(("misplaced book title")); } if ((ital != 0) || (math != 0)) { data_warning(("funny {math,ital} state")); } gstate = 3; start_text("\\titlepage{"); next; } /^[\$][ ].*{tpg}{au} *$/ { # Start of book author: if ((gstate != 3) || (intx == 0)) { data_warning(("misplaced book author")); } close_math(); close_ital(); close_text(); gstate = 4; start_text("{"); next; } # ---------------------------------------------------------------------- # Chapter headers: # !!! Must generalize !!! /^[\$][ ].*{c[0-9]+} *$/ { # Start of chapter: if ((gstate != 4) && (gstate != 7) && (gstate != 8)) { data_warning(("misplaced start of chapter")); } close_math(); close_ital(); close_text(); gstate = 5; next; } /^[\$][ ].*{c[0-9]+}{cn} *$/ { # Start of chapter number: if ((gstate != 5) || (intx != 0)) { data_warning(("misplaced start of chapter number")); } if ((ital != 0) || (math != 0)) { data_warning(("funny {math,ital} state")); } output_olin(); output_olin(); gstate = 6; start_text("\\chapt{"); next; } /^[\$][ ].*{c[0-9]+}{tt} *$/ { # Start of chapter title: if ((gstate != 6) || (intx == 0)) { data_warning(("misplaced start of chapter title")); } close_math(); close_ital(); close_text(); gstate = 7; start_text("{"); next; } /^[\$][ ].*{tx} *$/ { # Start or restart of normal prose: if ((gstate != 7) && (gstate != 8)) { data_warning(("misplaced start of normal prose (gstate = " gstate ")")); } close_math(); close_ital(); close_text(); output_olin(); output_olin(); gstate = 8; start_text("\\parag{"); next; } /^[\$][ ].*{(lat|frc|ita|eng)[pv]} *$/ { # Start of foreign phrase, set {intx} but do not break parag. if ((gstate != 3) && (gstate != 7) && (gstate != 8)) { data_warning(("misplaced foreign phrase")); } if (gstate != 8) { data_warning(("foreign phrase in book/chapter title")); } close_math(); close_ital(); match($0, /{(lat|frc|ita|eng)[pv]}/); sectag = substr($0, RSTART, RLENGTH); start_ital(("\\" sectag "{")); next; } /^[\$][ ].*{math} *$/ { # Start of math formula, output "\math{}" and set {intx,math} but do not break parag. if ((gstate != 3) && (gstate != 7) && (gstate != 8)) { data_warning(("misplaced math formula")); } close_math(); math = 1; start_math("\\mth{"); next; } # ---------------------------------------------------------------------- # Other sections: /^[\$][ ]/ { # Start of other sections, handle as end of paragraph, reset {intx}: data_warning(("unhandled section «" $0 "»")); if ((gstate != 3) && (gstate != 7) && (gstate != 8)) { data_warning(("misplaced section")); } close_math(); close_ital(); close_text(); intx = 1; start_text("\\parag{"); next; } # ---------------------------------------------------------------------- # Non-sections: /^[@\#][ ]/ { # Internal info or comment line, ignore: next; } /^[asp][ ]/ { if ((gstate <= 2) || (gstate == 5)) { data_warning("misplaced word (gstate = " gstate ")"); } if (intx == 0) { start_text("\\parag{"); } # Alpha, symbol, or punctuation, output it: # Grab the type and word: type = substr($0, 1, 1); word = substr($0, 3); if (type == "p") { nptin++; } else { nwdin++; } # Default translation: befo = ""; wtex = word; neol = 0; # Handle standard codes: if ((type == "p") && (word == "=")) { # End of paragraph, force end of italic and math: wtex = ""; neol = 2; } else if ((type == "p") && (word == "÷")) { # Hard line break: wtex = "\\\\"; neol = 1; } else if ((type == "p") && (word == "_") && (! math)) { # Toggle italic mode: if (ital) { befo = ""; wtex = "}"; ital = 0; } else { befo = " "; wtex = "\\emph{"; ital = 1; } } else { # Convert to LaTeX by text-specific function: split("", act); tex_recode(pword, ptype, math, type, word, act); befo = act[1]; # Space to insert before {wtex}, if no line break. wtex = act[2]; # The {word} converted to LaTeX. } # Does it fit in the current line? if ((befo == "") || (length(olin) + length(befo) + length(wtex) < 72)) { # Append {word} with its space before: olin = (olin befo wtex); } else { # Flush the line and append {wtex} without space: output_olin(); olin = wtex; } if (neol == 2) { close_math(); close_ital(); close_text(); output_olin(); output_olin(); start_text("\\parag{"); } else if (neol == 1) { output_olin(); } pword = word; ptype = type; next; } // { data_error(("bad line format")); next; } END { if (abort >= 0) { exit abort; } close_math(); close_ital(); close_text(); if (olin != "") { output_olin(); } printf "%8d lines read\n", nlines > "/dev/stderr"; printf "%8d words/symbols read\n", nwdin > "/dev/stderr"; printf "%8d punctuation read\n", nptin > "/dev/stderr"; } function start_math(macro) { # Starts math formula. Sets {pword="$",ptype="p"}. if (math != 0) { data_error("close {math} first"); } olin = ( olin macro ); pword = "$"; ptype = "."; math = 1; } function close_math() { # Closes math formula and sets {pword="$",ptype="p"}, if open. if (math != 0) { olin = ( olin "}" ); pword = "$"; ptype = "."; } math = 0; } function start_ital(macro) { # Starts italic section. Does not change {pword,ptype}. if ((math != 0) || (ital != 0)) { data_error("close {math,ital} first"); } olin = ( olin macro ); ital = 1; } function close_ital() { # Closes ital section, if open. Does not change {pword,ptype}. if (math != 0) { data_error("close {math} first"); } if (ital != 0) { olin = ( olin "}" ); } ital = 0; } function start_text(macro) { # Resets the paragraph state for new parag: {ital}, {math}, {pword}, {ptype} # and sets {intx} to 1. olin = ( olin macro ); math = 0; ital = 0; pword = ""; ptype = ""; intx = 1; } function close_text() { # Closes a text paragraph or a book/chapter title, if open. # At this point, {math} ad {ital} must be closed. if ((math != 0) || (ital != 0)) { data_error("close {math,ital} first"); } if (intx != 0) { olin = ( olin "}" ); pword = "="; ptype = "p"; } intx = 0; } function output_olin( ) { print olin; olin = ""; } function arg_error(msg) { printf "%s\n", msg > "/dev/stderr"; printf "usage: %s\n", usage > "/dev/stderr"; abort = 1; exit 1; } function data_warning(msg) { printf "line %d: %s\n", FNR, msg > "/dev/stderr"; } function data_error(msg) { printf "line %d: %s\n", FNR, msg > "/dev/stderr"; printf " %s\n", $0 > "/dev/stderr"; abort = 1; exit 1; } function tbl_error(f,n,msg) { printf "file %s, line %d: %s\n", f, n, msg > "/dev/stderr"; abort = 1; exit 1 }