#! /usr/bin/gawk -f # Last edited on 2025-05-04 16:53:06 by stolfi BEGIN{ abort = -1; usage = ( "extract_words_by_elem_count.gawk -v len=LEN < INFILE > OUTFILE " ); # Input records must be # # COUNT LEXEME # # where LEXEME is factored into elements by "{}". Empty lines and # comments are ignored. Outputs those lines where # LEXEME has exactly LEN elements, in the same format # # COUNT LEXEME # if (len == "") { arg_error("must specify \"len\""); } } (abort >= 0) { exit abort; } /^ *([#]|$)/ { next; } /./ { ct = $1; w = $2; y = w; gsub(/}{/, "} {", y); m = split(y, wels); if (m == len) { print; } next; } function arg_error(msg) { printf "%s\n", msg > "/dev/stderr"; printf "usage: %s\n", usage > "/dev/stderr"; abort = 1; exit abort; }