#! /usr/bin/gawk -f # Last edited on 2025-09-04 09:27:15 by stolfi # Counts the frequency of word pairs and word triples # in a ".wds" file, resetting at paragraphs, @section, etc.. BEGIN { ow = ""; oow = ""; nww = 0; nwww = 0; max = 10000; } /^a/ { w = tolower($2); if ((ow != "") && (nww < max)) { printf "[2] %s.%s\n", ow, w; nww++; } if (oow != "") { printf "[3] %s.%s.%s\n", oow, ow, w; nwww++; } if (nwww >= max) { exit(0); } oow = ow; ow = w; next; } /@section/ { ow = ""; oow = ""; } /[=]/ { ow = ""; oow = ""; } END { printf "%6d word pairs generated\n", nww > "/dev/stderr" printf "%6d word triples generated\n", nwww > "/dev/stderr" }