#! /usr/bin/gawk -f

# Usage: "$0 -v TOTAL=NNN < joinfile > output.cmp

# This script is used internally by compare-freqs.
# The input should be a file in the format "word  ct1 fr1  ct2 fr2 ... ctn frn"
# where the "ct"s are counts anf the "fr"s are freqs in [0_1].
# Adds the counts and frequencies of each word in the input.
# The output has the format " ctt frt  ct1 fr1 ct2 fr2 ... ctn frn word"
# where "ctt" is the total count and "frt" the total frequency.
# The command line parameter "TOTAL" is used to compute the total percentages.
# WARNING: the word should not contain any spaces.

BEGIN {
    if (TOTAL == "") 
      { printf "must define TOTAL\n" > "/dev/stderr"; exit 1 }
   }

/^#/ {
    printf "##%11.11s", "TOTAL";
    for (i=2;i<=NF;i++) printf "  %-11.11s", $(i);
    printf "  %-11.11s", "WORD";
    printf "\n";
    
    printf "# %11.11s", "--------------------------";
    for (i=2;i<=NF;i++) printf "  %-11.11s", "--------------------------";
    printf "  %11.11s", "--------------------------";
    printf "\n";
    next;
  }

/./ { 
    if(TOTAL == "") exit 1;
    TOTCT=0;
    for (i=2;i<=NF;i+=2) { TOTCT+=$(i); }
    printf "  %5d %5.3f", TOTCT, (TOTCT/TOTAL);
    for (i=2;i<=NF;i+=2) printf "  %5d %5.3f", $(i), $(i+1);
    printf "  %s\n", $1;
    next
  }