#! /bin/bash
# Last edited on 2012-05-05 14:38:21 by stolfilocal

# Reads a file of word counts, as produced by "uniq -c"
# Outputs a similar file, where the counts are followed by 
# the corresponding fractions relative to the total count and the cumulative fractions
# up to and including the current line.
#
# Preserves blank lines and comments.

temp="/tmp/$$"
addpgm='/^#/{next} /^ *$/{next} /./{S+=$1;if (NF!=2){print ("bad line «" $0 "»")>>"/dev/stderr";exit 1}} END{print S}'

cat > ${temp}

total="`cat ${temp} | /usr/bin/gawk \"${addpgm}\"`"
if [[ "/${total}" == "/" ]]; then total=1; fi

cat ${temp} \
  | /usr/bin/gawk \
      ' BEGIN{S=0;} \
        /^[\#]/ {print;next} \
        /^ *$/ {print;next} \
        /./ { \
          V=$1; S+=V; $1=""; \
          printf "%7d %6.4f %7d %6.4f%s\n", \
            V, (V/'"${total}"'), S, (S/'"${total}"'), $0; \
        } \
      '
      
/bin/rm -f ${temp}