#! /usr/bin/gawk -f # Last edited on 2025-05-04 16:53:55 by stolfi BEGIN{ abort = -1; usage = ( "compute_elem_counts.gawk < INFILE > OUTFILE " ); # Input records must be # # COUNT LEXEME FLEXEME # # where FLEXEME is LEXEME factored into elements by "{}". Empty lines and # comments are ignored. Outputs the same lines, with an extra # field -- the number of factors: # # COUNT LEXEME FLEXEME LEN # } (abort >= 0) { exit abort; } /^ *([#]|$)/ { next; } /./ { if (NF != 3) { data_error(("wrong number of fields")); } ct = $1; w = $2; fw = $3; y = fw; gsub(/}{/, "} {", y); nels = split(y, wels); print ct, w, fw, nels; next; } function arg_error(msg) { printf "%s\n", msg > "/dev/stderr"; printf "usage: %s\n", usage > "/dev/stderr"; abort = 1; exit abort; } function data_error(msg) { printf "file %s, line %d: %s\n", FILENAME, FNR, msg > "/dev/stderr"; abort = 1; exit abort; }