#! /usr/bin/gawk -f # Last edited on 2005-06-25 17:30:00 by lucien BEGIN { usage = ( "cat FILES.tex | grg > OUTFILE.grm" ); abort = -1; # Reads a LaTeX file containing "\grm{...}" macros. # Extracts these macros and converts their arguments to grammar # format as accepted by "gpp". Also processes "\alt{}{}" groups. # {state} is 0 outside "\grm" ou "\xmp" arguments, 1 inside. state = 0; # The following variables are defined only inside a "\grm" # construct: # {depth} is the number of unclosed macros seen so far: depth = 0; # {incomplete} is 1 between two arguments of a macro, or between # a macro and its first argument: incomplete = 0; # {op[k]} is the macro applied to the {k}th unclosed open brace: split("", op); # {iarg[k] = j} means that we are inside argument number {j} # of the macro {op[k]}: split("", iarg); # {narg[m]} is the number of arguments of macro {m}: split("", narg); narg["\\grm"] = 6; narg["\\alt"] = 2; narg["\\fcrp"] = 4; narg["\\flp"] = 3; narg["\\xmp"] = 3; narg["\\E"] = 1; narg["\\?"] = 0; narg["\\+"] = 0; narg["\\*"] = 0; narg["\\nl"] = 0; narg["~"] = 0; # {arg[k,1..iarg[k]]} are the totally or partially # parsed arguments of macro {op[k]}: split("", arg); } // { txt = $0; # Remove funny chars: gsub(/[\011]/, " ", txt); # TAB gsub(/[\015]/, "", txt); # CR (Windows) # Remove trailing comments (LaTeX): gsub(/[%].*$/, "", txt); # Remove leading spaces gsub(/^[ ]+/, "", txt); while (txt != "") { if (state == 0) { if ((match(txt, /[\\]grm[\{]/)) || (match(txt, /[\\]xmp[\{]/))) { txt = substr(txt, RSTART); state = 1; } else { txt = ""; } } else { # Get next interesting thing {item}: if (match(txt, /([{}~]|([\\]([a-zA-Z]+|[?*+])))/)) { # Extract macro name: chunk = substr(txt, 1, RSTART-1); item = substr(txt, RSTART, RLENGTH); txt = substr(txt, RSTART + RLENGTH); } else { chunk = txt; item = ""; txt = ""; } # Process the uninteresting {chunk} before {item}: if (incomplete) { # Expecting an open brace: if (chunk !~ /^[ ]*$/) { syntax_error(("expecting \"{\", found \"" chunk "\"")); } } else { if (depth > 0) { # It is not the external macro ("\grm" or "\xmp") # Append {chunk} to current argument: arg[depth,iarg[depth]] = ( arg[depth,iarg[depth]] chunk ); } } # Process the {item}: if (item == "") { # No interesting stuff on this line, nothing to do: } else if (item == "{") { if (incomplete == 0) { # Start of unexpected argument: syntax_error(("unexpected \"{\" found")); } # Start of additional argument of macro {op[depth]} iarg[depth]++; arg[depth, iarg[depth]] = ""; incomplete = 0; } else if (item == "}") { if (incomplete) { # expecting next argument syntax_error(("expecting \"{\", found \"}\"")); } if (depth == 0) { syntax_error(("unexpected \"}\" found")); } mac = op[depth]; if (iarg[depth] == narg[mac]) { # Completed a call of macro {mac}, evaluate it: res = apply_macro(mac,depth,narg[mac]); depth--; if (depth > 0) { # Append result of macro to current arg: arg[depth,iarg[depth]] = ( arg[depth,iarg[depth]] res); } else { # End of "\grm" or "\xmp" call: printf res; state = 0; } incomplete = 0; } else { # Completed an argument of {mac}, but not the last one: incomplete = 1; } } else if (item ~ /^([~]|([\\]([A-Za-z]+|[?*+])))$/) { # We must be in an {incomplete == 0} state: if (incomplete) { syntax_error(("expected \"{\", found \"" item "\"")); } # Start of new macro call: if (! (item in narg)) { # Unknown macro with unknown arguments: syntax_error(("unknown macro \"" item "\"")); } else { # Known macro, stack it: depth++; op[depth] = item; iarg[depth] = 0; if (narg[item] == 0) { # Argumentless macro, apply it immediately: res = apply_macro(item,depth,0); # Append result to current arg: depth--; if (depth == 0) { prog_error("duh? duh?"); } arg[depth,iarg[depth]] = ( arg[depth,iarg[depth]] res ); incomplete = 0; } else { # Macro with arguments, expect args: incomplete = 1; } } } else { prog_error("duh?"); } } } next; } function apply_macro(mac,k,n, i) { # Applies macro {mac} to arguments {arg[k,1..n]}: if (mac == "") { return arg[k,1]; } else if (mac == "\\grm") { return ("\n" apply_macro_grm(mac,k,n)); } else if (mac == "\\xmp") { return apply_macro_xmp(mac,k,n); } else if (mac == "\\fcrp") { return apply_macro_fcrp(mac,k,n); } else if (mac == "\\flp") { return apply_macro_flp(mac,k,n); } else if (mac == "\\alt") { return arg[k,2]; } else if (mac == "\\?") { return "?"; } else if (mac == "\\E") { return ("^" arg[k,1]); } else if (mac == "\\+") { return "+"; } else if (mac == "\\*") { return "*"; } else if (mac == "\\nl") { return ""; } else if (mac == "~") { return ""; } else { prog_error(("invalid macro \"" mac "\"")); } } function apply_macro_grm(mac,k,n, DP,LABEL,SYMB,RTAG,MARKERS,RHS,res) { # LaTeX macro call: # \grm{DP}{LABEL}{SYMB}{RTAG}{MARKERS} # { # ITEM... # \fcrp{V}{SV}{}{FV,MV,P,N} # } # where # DP is "","d","p","dp","pd" meaning # dissertation(d) and/or parser (p); # LABEL is a label for LaTeX use; # SYMB is the left-hand non-lexical symbol; # RTAG is the rule's ID within those of SYMB; # MARKERS are the marker parameters of SYMB; # ITEM... are the symbols in the right-hand side, # one per line, usually "\fcrp" calls. DP = arg[k,1]; LABEL = arg[k,2]; SYMB = arg[k,3]; RTAG = arg[k,4]; MARKERS = arg[k,5]; RHS = arg[k,6]; if (DP !~ /[p]/) { return ""; } res = \ ( \ " " SYMB "{" RTAG "}" \ (MARKERS == "" ? "" : ("(" MARKERS ")")) \ " ->" \ RHS \ "\n .\n" \ ); return res; } function apply_macro_xmp(mac,k,n, DP,LABEL,TEXT,res) { # LaTeX macro call: # \xmp{DP}{LABEL}{TEXT} # where # DP is "","d","p","dp","pd" meaning # dissertation(d) and/or parser (p); # LABEL is a label for LaTeX use; # TEXT is an arbitrary text, to be made into comment. DP = arg[k,1]; LABEL = arg[k,2]; TEXT = arg[k,3]; if (DP !~ /[p]/) { return ""; } res = ( " # " TEXT "\n" ); return res; } function apply_macro_fcrp(mac,k,n, FUNCT,SYMB,RTAGS,MARKERS,res) { # LaTeX macro call: # \fcrp{FUNCT}{SYMB}{RTAGS}{MARKERS} # where # FUNCT is the function of SYMB within the rule; # SYMB is a right-hand-side non-lexical symbol; # RTAGS are the rule's ID called by this SYMB; # MARKERS are the marker arguments of SYMB; FUNCT = arg[k,1]; SYMB = arg[k,2]; RTAGS = arg[k,3]; MARKERS = arg[k,4]; res = \ ( \ "\n" \ " " \ (FUNCT == "" ? "" : (FUNCT ":")) \ SYMB \ (RTAGS == "" ? "" : ("{" RTAGS "}")) \ (MARKERS == "" ? "" : ("(" MARKERS ")")) \ ); return res; } function apply_macro_flp(mac,k,n, FUNCT,SYMB,MARKERS,res) { # LaTeX macro call: # \flp{FUNCT}{SYMB}{MARKERS}{EXPO} # where # FUNCT is the function of SYMB within the rule; # SYMB is a right-hand-side lexical symbol; # MARKERS are the marker arguments of SYMB; FUNCT = arg[k,1]; SYMB = arg[k,2]; MARKERS = arg[k,3]; res = \ ( \ "\n" \ " " \ (FUNCT == "" ? "" : (FUNCT ":")) \ SYMB \ (MARKERS == "" ? "" : ("(" MARKERS ")")) \ ); return res; } function prog_error(msg) { printf "%s:%s: *** %s\n", FILENAME, FNR, msg > "/dev/stderr"; abort = 1; exit abort; } function syntax_error(msg) { printf "%s:%s: %s\n", FILENAME, FNR, msg > "/dev/stderr"; abort = 1; exit abort; } function print_stack( i,j) { printf "--- stack -------------------------------\n" > "/dev/stderr"; for (i = 1; i <= depth; i++) { printf " | %-8s %3d ", op[i], iarg[i] > "/dev/stderr"; for (j = 1; j <= iarg[i]; j++) { printf "{%s}", arg[i,j] > "/dev/stderr"; } printf "\n" > "/dev/stderr"; } printf "-----------------------------------------\n" > "/dev/stderr"; }