#! /usr/bin/gawk -f
# Last edited on 2000-05-26 02:34:49 by stolfi

# Usage: "$0 < joinfile > output.cmp

# This script is used internally by compare-freqs. The input should be
# a file in the format "CT1 FR1  CT2 FR2  ...  CTN FRN  WORD" where the
# "CT"s are counts anf the "FR"s are freqs in [0_1].  The WORD
# should not contain any spaces.
# 
# Computes the maximum frequency of each word. The output has the
# format "  FRMAX  CT1 FR1  CT2 FR2  ...  CTN FRN  WORD" where "frm"
# is the # maximum of "FR1" through "FRN".
# 
# A comment in the input file beginning with "##" followed by a list
# of column names is turned into a pair of column header comments. 
# Other "#"-comments are ignored

/^[#][#]/ {
    printf "##%5.5s", "MAXFR";
    for (i=2;i<=NF;i++) 
      { printf "  %-11.11s", $(i); }
    printf "  %-11.11s", "WORD";
    printf "\n";
    
    printf "# %5.5s", "--------------------------";
    for (i=2;i<=NF;i++) 
      { printf "  %-11.11s", "--------------------------"; }
    printf "  %11.11s", "--------------------------";
    printf "\n";
    next;
  }

/^[#]/ { next; }

/./ { 
    MAXFR=0;
    for (i=2;i<=NF-1;i+=2) { if ($(i) > MAXFR) { MAXFR = $(i); } }
    printf "  %5.3f", MAXFR;
    for (i=1;i<=NF-1;i+=2) printf "  %5d %5.3f", $(i), $(i+1);
    printf "  %s\n", $(NF);
    next
  }