#! /usr/bin/gawk -f # Last edited on 2025-11-06 14:28:45 by stolfi # Reads from stdin a file in the "main.wds" format of the langbank project. # Writes to stdout one word per line, omitting all punctuation # except "=". BEGIN { skip = 1; nread = 0; nwords = 0; npuncts = 0} // { nread++ } {hb}{h3}{tx}{sD}{txs} /^[$] {hb}{h[0-9]*}{tx}{s[DV]}{txs} *$/ { skip = 0; next } /^[$] / { skip = 1; next; } (skip == 1){ next } /^a / { print tolower($2); nwords++; next; } /^p =$/ { print $2; npuncts++; next; } END { printf "read %d lines wrote %d words and %d punctuation\n", \ nread, nwords, npuncts > "/dev/stderr" }