#! /usr/bin/gawk -f # Last edited on 2002-01-03 22:59:49 by stolfi BEGIN { abort = -1; usage = ( \ "elem-to-stroke-pair \\\n" \ " [ -v inField=NUM ] \\\n" \ " [ -v erase=BOOL ] \\\n" \ " [ -v outField=NUM ] \\\n" \ " < INFILE > OUTFILE" \ ); # Expects the `inField'th field of INFILE to be a # string of elements, each surrounded in braces, already capitalized as # in full EVA. Replaces each element by three pseudo-elements: # a left stroke code, a "X", and a right-stroke code, # each inside braces "{}". # # If `erase' is set, the input field is erased, otherwise it is # preserved. Then inserts the stroke string as the "outField"th # field. if (inField == "") inField = 1; if (erase == "") erase = 0; if (outField == "") outField = inField; } (abort >= 0) { exit abort; } /^#/ { print; next; } /./ { if (NF < inField) { data_error("not enough input fields"); } x = $(inField); y = convert_elems_to_stroke_pairs(x); printout(y, outField, inField, erase); next; } function convert_elems_to_stroke_pairs(x) { gsub(/{e}/, "<E><X><E>", x); gsub(/{i}/, "<I><X><I>", x); gsub(/{o}/, "<E><X><D>", x); gsub(/{a}/, "<E><X><I>", x); gsub(/{y}/, "<E><X><Y>", x); gsub(/{q}/, "<Q><X><Q>", x); gsub(/{l}/, "<I><X><Y>", x); gsub(/{d}/, "<E><X><G>", x); gsub(/{r}/, "<I><X><R>", x); gsub(/{s}/, "<E><X><R>", x); gsub(/{n}/, "<I><X><N>", x); gsub(/{m}/, "<I><X><M>", x); gsub(/{Ch}/, "<E><X><E>", x); gsub(/{Sh}/, "<E><X><E>", x); gsub(/{k}/, "<K><X><P>", x); gsub(/{t}/, "<T><X><P>", x); gsub(/{CKh}/, "<E><X><E>", x); gsub(/{CTh}/, "<E><X><E>", x); gsub(/{f}/, "<K><X><J>", x); gsub(/{p}/, "<T><X><J>", x); gsub(/{CFh}/, "<E><X><E>", x); gsub(/{CPh}/, "<E><X><E>", x); # Validity check: if (x ~ /[{}]/) { data_error(("bad element \"" x "\"")); } # Convert "<>" into "{}" in strokes: gsub(/[<]/, "{", x); gsub(/[>]/, "}", x); return x; } function printout(mw, ofn, ifn, del, i) { # prints $0 with "mw" inserted as field "$(ofn)" # if "del" is true, deletes field "$(ifn)" if (del) { if (NF < ifn) { data_error("not enough input fields\n"); } else { for(i=ifn; i<NF-1; i++) { $(i) = $(i+1); } NF--; } } if (NF < ofn-1) { data_error("not enough output fields\n"); } if (ofn == 1) { print mw, $0; } else if (ofn == NF+1) { print $0, mw; } else { for (i=1;i<ofn;i++) { printf "%s%s", $(i), OFS; } printf "%s", mw; for (i=ofn;i<=NF;i++) { printf "%s%s", OFS, $(i); } printf "\n"; } } function data_error(msg) { printf "line %d: %s\n", NR, msg >> "/dev/stderr"; abort = 1; exit 1; } function arg_error(msg) { printf "%s\n", msg >> "/dev/stderr"; abort = 1; exit 1; }