#! /usr/bin/gawk -f
# Last edited on 2000-12-27 11:25:49 by stolfi

BEGIN{
  abort = -1;
  usage = ( "extract-words-by-elem-count -v len=LEN < WORDFILE > FREQFILE " );

  # Input records must be 
  # 
  #   COUNT WORD
  # 
  # where WORD is factored into elements by "{}". Empty lines and
  # comments are ignored. Outputs those lines where
  # WORD has exactly LEN elements, in the same format
  # 
  #   COUNT WORD
  # 

  if (len == "") { arg_error("must specify \"len\""); }
}

(abort >= 0) { exit abort; }

/^ *([#]|$)/ { next; }

/./ {
  ct = $1; w = $2; 
  y = w; gsub(/}{/, "} {", y);
  m = split(y, wels);
  if (m == len) { print; }
  next;
}

function arg_error(msg)
  {
    printf "%s\n", msg > "/dev/stderr";
    printf "usage: %s\n", usage > "/dev/stderr";
    abort = 1;
    exit abort;
  }