#! /usr/bin/gawk -f # Last edited on 1999-11-25 00:51:40 by stolfi BEGIN { split("", nsc); split("", ns); split("", nc); n = 0; split("", str); nstr = 0; split("", ctx); nctx = 0; if (ncmin == "") { ncmin = 0; } if (ncmax == "") { ncmax = 999999999; } } /./ { count = $1; s = $2; c = $3; nsc[s,c] += count; if(! (s in ns)) { str[nstr] = s; nstr++; } ns[s] += count; if(! (c in nc)) { ctx[nctx] = c; nctx++; } nc[c] += count; n += count; } END { # Discard contexts that occur with wrong freq: j = 0; while (j < nctx) { c = ctx[j]; if ((nc[c] < ncmin) || (nc[c] > ncmax)) { for (s in ns) { ns[s] -= nsc[s,c]; nsc[s,c] = 0; } n -= nc[c]; nc[c] = 0; if(j < nctx-1) { ctx[j] = ctx[nctx-1]; } nctx--; } else { j++; } } # Print table: printf "%-20s %4s ", "CONTEXT", "TOT"; for (i=0; i "/dev/stderr"; fv = sprintf("%d", int(10*fsc/(fs*fc) + 0.5)); printf " %3s", fv; } printf "\n"; } }