#! awk -f # Last edited on 1998-08-02 19:31:52 by stolfi BEGIN { abort = -1; usage = "enum-elem-triples [ -v consecutive=BOOL ] < INFILE > OUTFILE"; # # Reads a file of strings factored into elements. # Assumes each element is bracketed by "{}". # # For each input string, outputs all subsequences of three non-empty # elements. # # If "consecutive" is set, enumerates only triples of consecutive # elements; otherwise enumerates all three-selement subsequences. # # Ignores strings that are half-parsed or contain error markers. if (consecutive == "") { consecutive = 0; } nproc = 0; nwrit = 0; nskip = 0; } /[^{}a-z?]/{ if (abort >= 0) { exit abort; } printf "ignored: %s\n", $0 > "/dev/stderr"; nskip++; next; } /./{ if (abort >= 0) { exit abort; } nproc++; str = $0; gsub(/[{][}]/, "", str); gsub(/[}]/, "} ", str); n = split(str, fld); for (k=3;k<=n;k++) { fk = fld[k]; if (! match(fk, /^[{][a-z][a-z?]*[}]$/)) { error(("line " NR ": bad field [" k "] = \"" fk "\" in \"" $0 "\"")); } if (consecutive) { jmin = k-1; } else { jmin = 1; } for (j=jmin; j= 0) { exit abort; } printf "%7d strings ignored\n", nskip > "/dev/stderr"; printf "%7d strings processed\n", nproc > "/dev/stderr"; printf "%7d triples written\n", nwrit > "/dev/stderr"; } function error(msg) { printf "%s\n", msg > "/dev/stderr"; abort = 1; exit 1; }