#! /usr/bin/gawk -f # Last edited on 2001-01-01 22:57:46 by stolfi BEGIN { abort = -1; usage = ( \ "compute-entropy < INFILE > OUTFILE" \ ); split("", ct); n = 0; totct = 0; } (abort >= 0) { exit abort; } /./ { if (NF != 1) { data_error("bad field count"); } if ($1 !~ /^[+]*[.0-9]*[0-9][.0-9]*$/) { data_error("bad data format"); } ct[n] = $1; totct += $1; n++; next; } END { if (abort >= 0) { exit abort; } for (i = 0; i < n; i++) { ct[i] /= totct; } entropy = 0; for (i = 0; i < n; i++) { if (ct[i] > 0) { entropy += -ct[i]*log(ct[i]); } } entropy /= log(2.0); printf "%8.4f\n", entropy; } function data_error(msg) { printf "line %d: %s\n", NR, msg >> "/dev/stderr"; abort = 1; exit 1; } function arg_error(msg) { printf "%s\n", msg >> "/dev/stderr"; abort = 1; exit 1; }