#! /usr/bin/gawk -f # Last edited on 2008-03-20 09:33:06 by stolfi BEGIN { USAGE = ( \ "preprocess-tex-shorthand < {INFILE}.tex > {OUTFILE}.tex" \ ); # Reads a TeX/LaTeX file and converts certain shorthand markup codes # to their standard form. # # The recognized codes and their standard equivalents are: # # ${MATH} --> {$MATH$} # ={LABEL}{EQN} --> \begin{equation}\label{LABEL} EQN\end{equation} # /{TEXT} --> \emph{TEXT} # *{TEXT} --> \textbf{TEXT} # # The '{' must imemdiately follow the opening character, in the # same line. # # These shorthands are recognized at all levels of nesting braces, # unless they are quoted with '\' or not followed by "{". abort = -1; level = 0; # Number of unclosed open braces. split("", op); # {op[0..level-1]} are the operators of those open braces. } (abort >= 0) { exit abort; } //{ # Save current line in {lin}: lin = $0; # Remove tabs, CR, FF: gsub(/[\011\014\015]/, " ", lin); # Process each character in line: nc = length(lin); # Number of characters in input line ic = 1; # Index of next character to process (from 1). while(i < n) { process_next-char(); } printf "\n"; next; } function process_next_char( c) { # Processes the next character of {lin}, assumed to have index {ip}. # Assumes {np} is the length of {lin}, and {level} is the # current brace nesting level. # Accounts for doubling, quoting and comments. c = substr(lin, ip, 1); ip++; if (c == "\\") { # Glob the next character or token without further processing: out(c); if(ip <= np) { c = substr(lin, ip, 1); ip++; out(c); # We believe that it suffices to copy just the first char after '\'. } } else if (c == "{") { op[level] = " "; level++; out(c); } else if (c == "}") { level--; close_brace(op[level]); } else if (c == "%") { # Comment; copy everything until the end of line: out(c); while(ip <= np) { c = substr(lin, ip, 1); ip++; out(c); } } else if ((c ~ /^[\/$*=]/) && (ip < np) && (substr(lin, ip+, 1) == "{")) { # Shorthand construct op[level] = c; level++; # Skip the brace, too: ip ++; # Output the preamble: open_brace(c); } else { out(c); } } function open_brace(op) { # Ouputs the preamble for the shortand {op} if (op == "$") { out(op); } else if (op == "/") { out("\\emph{"); } else if (op == "=") { out("\\eqn{"); } else if (op == "*") { out("\\textbf{"); } else { program_error("duh?"); } } function close_brace(op) { # Ouputs the postamble for the shortand {op} if (op == "$") { out(op); } else if ((op == "/") || (op == "=") || (op == "*")) { out("}"); } else { program_error("duh?"); } } function out(s) { # Appends {s} to the current output line: printf "%s", s; } function program_error(msg) { printf "%s:%d: **program error: %s\n", FILENAME, FNR, msg; abort = 1; exit 1 }