#! /usr/bin/gawk -f
# Last edited on 2025-05-04 16:53:06 by stolfi

BEGIN{
  abort = -1;
  usage = ( "extract_words_by_elem_count.gawk -v len=LEN < INFILE > OUTFILE " );

  # Input records must be 
  # 
  #   COUNT LEXEME
  # 
  # where LEXEME is factored into elements by "{}". Empty lines and
  # comments are ignored. Outputs those lines where
  # LEXEME has exactly LEN elements, in the same format
  # 
  #   COUNT LEXEME
  # 

  if (len == "") { arg_error("must specify \"len\""); }
}

(abort >= 0) { exit abort; }

/^ *([#]|$)/ { next; }

/./ {
  ct = $1; w = $2; 
  y = w; gsub(/}{/, "} {", y);
  m = split(y, wels);
  if (m == len) { print; }
  next;
}

function arg_error(msg)
  {
    printf "%s\n", msg > "/dev/stderr";
    printf "usage: %s\n", usage > "/dev/stderr";
    abort = 1;
    exit abort;
  }