#! /usr/bin/gawk -f

# Reads from stdin a bunch of pairs of the form COUNT WORD,
# one per line (such as produced by "uniq -c"), where the 
# same WORD may occur several times.  Adds all COUNTs for
# the same WORD, and writes the resulting TOTCOUNT WORD pairs 
# to standard output, in some order.

BEGIN {
  abort = 0;
  usage = "combine-freqs < INFILE > OUTFILE";
  if (ARGC != 1) 
    { printf "usage: %s\n", usage > "/dev/stderr"; abort=1; exit 1; }
}

/./ { 
  if (abort) { exit 1; }
  if (NF != 2) 
    { print "line %d, bad format \"%s\"\n", NR, $0 > "/dev/stderr"; abort=1; exit 1; }
  ct[$2] += $1;
}

END {
  for (w in ct) { printf "%7d %s\n", ct[w], w; }
}