#! /usr/bin/gawk -f BEGIN { n = 0; split("", xn); if (length(dig) != 2) { print "must specify \"-vdig=XX\"" > "/dev/stderr"; exit 1 } } function tally(x, xn) { n++; if (!(x in xn)) { xn[x] = 1 } else { xn[x]++ } } function ptable(t, xt, fmt) { printf " "; for(x in xt) printf " %5s", x; printf " %5s", "TOT"; printf "\n"; printf " "; for(x in xt) printf " %5.5s", "---------"; printf " %5.5s", "---------"; printf "\n"; printf "%s", dig; for(x in xt) {if (xt[x] == 0) { printf " %5s", "."; } else { printf (" %5" fmt), xt[x]; } } printf (" %5" fmt), t; printf "\n"; } function entropy(p) { if (p == 0) { return 0.0 } else { return (- p * log(p)/log(2.0)) } } function pscale(p,m) { return int((m*p)+ 0.5) } /./ { m = length($0); w = (" " ($0) " "); for(i=1;i<=m+1;i++) { ct = substr(w,i,2); if (ct == dig) tally(substr(w,i+2,1), xn) } } /^$/ { next; } END { printf "Symbol counts adter digraph \"%s\":\n", dig; printf "\n"; ptable(n, xn, "d") printf "\n"; printf "Next-symbol probabilities (× 99):\n"; printf "\n"; for (x in xn) xp[x] = pscale(xn[x]/n, 99); ptable(pscale(1.000, 99), xp, "d") printf "\n"; printf "Next-symbol entropies:\n" printf "\n"; for (x in xn) xh[x] = entropy(xn[x]/n); ptable(1.000, xh, ".3f") printf "\n"; exit 0; }