#! awk -f # Last edited on 1998-07-27 02:36:13 by stolfi BEGIN { abort = -1; usage = "enum-elem-pairs [ -v consecutive=1 ] < INFILE > OUTFILE"; # # Reads a file of strings factored into elements. # Assumes each element is bracketed by "{}". # # For each input string, outputs all subsequences of two non-empty # elements. # # If "consecutive" is set enumerates only pairs # of *consecutive* elements; otherwise enumerates all # two-element subsequences. # # Ignores strings that are half-parsed or contain error markers. if (consecutive == "") { consecutive = 0; } nproc = 0; nwrit = 0; nskip = 0; } /[^{}a-z?]/{ if (abort >= 0) { exit abort; } printf "ignored: %s\n", $0 > "/dev/stderr"; nskip++; next; } /./{ if (abort >= 0) { exit abort; } nproc++; str = $0; gsub(/[{][}]/, "", str); gsub(/[}]/, "} ", str); n = split(str, fld); for (k=2;k<=n;k++) { fk = fld[k]; if (! match(fk, /^[{][a-z][a-z?]*[}]$/)) { error(("line " NR ": bad field [" k "] = \"" fk "\" in \"" $0 "\"")); } if (consecutive) { jmin = k-1; } else { jmin = 1; } for (j=jmin; j= 0) { exit abort; } printf "%7d strings ignored\n", nskip > "/dev/stderr"; printf "%7d strings processed\n", nproc > "/dev/stderr"; printf "%7d pairs written\n", nwrit > "/dev/stderr"; } function error(msg) { printf "%s\n", msg > "/dev/stderr"; abort = 1; exit 1; }