#! /usr/bin/gawk -f # Last edited on 2001-01-05 23:30:49 by stolfi BEGIN { abort = -1; usage = ( "compute-freqs \\\n" \ " [ -v outputTotal=BOOL ] \\\n" \ " < INFILE.wct > OUTFILE.wfr" \ ); # Reads a file of COUNT ITEM pairs, as produced by "uniq -c" # Outputs a similar file with COUNT FREQ ITEM lines, where FREQ is # the fraction of COUNT relative to the total of all COUNTs. # # If outputTotals is 1, also outputs an extra line # with ITEM = "+" and COUNT = total item count. if (outputTotal == "") { outputTotal = 0; } total = 0; n = 0; } (abort >= 0) { exit abort; } /^[ ]*([#]|$)/ { ct[n] = "#"; it[n] = $0; n++; next; } // { if (NF != 2) { fatal_error(("line " NF ": bad input format = «" $0 "»")); } total += $1; ct[n] = $1; it[n] = $2; n++; next; } END { if (abort >= 0) { exit abort; } den = ( total != 0 ? total : 1 ) for (i=0; i "/dev/stderr"; abort = 1; exit abort; }