#! /usr/bin/gawk -f
# Last edited on 2000-06-13 02:53:32 by stolfi

# Reformats the total lines gerated by tabulate-attribues
# and computes the relative frequencies.
# Input line format:
#
#    TAG NBAD N0 N1 ... Nm
# 
# where TAG is an arbitrary tag, NBAD is the number of bad words,
# N0..Nm the counts for different values of the attribute.

BEGIN {
  abort = -1;
  first = 1;
}

(abort >= 0) { exit abort; }

/./ {
  max_i = NF-3;
  if (first) { print_head(); first = 0; }
  tot = 0;
  for(i=-1; i<=max_i; i++)
    { ct[i]=$(i+3); tot += ct[i]; }
  if (tot == 0) { next; }
  printf "%s |", $1;
  for(i=-1; i<=max_i; i++)
    { printf " %5s", (ct[i]==0 ? "." : ct[i]); }
  printf " | %5d |", tot;
  for(i=-1; i<=max_i; i++)
    { printf " %5s", (ct[i]==0 ? ".   " : sprintf("%.3f", ct[i]/tot)); }
  printf " | %5s |", (tot == 0 ? ".   " : sprintf("%.3f", sqrt(1/(4*tot))));
  printf "\n";
}

END { 
  if (abort >= 0) { exit abort; }
  print_tail();
}

function print_head()
{
  printf "sec   |";
  for (i=-1; i<=max_i; i++) { printf " %5s", (i == -1 ? "?":i); } 
  printf " | %5s |", "tot";
  for (i=-1; i<=max_i; i++) { printf " %5s", (i == -1 ? "?":i); } 
  printf " | %5s |", "SD";
  printf "\n";
  print_dashes();
}

function print_tail()
{
  print_dashes();
}

function print_dashes()
{
  printf "%5s |", "-----";
  for (i=-1; i<=max_i; i++) { printf " %5s", "-----"; } 
  printf " | %5s |", "-----";
  for (i=-1; i<=max_i; i++) { printf " %5s", "-----"; } 
  printf " | %5s |", "-----";
  printf "\n";
}

function input_error(msg)
{ 
  printf "file %s, line %d: %s\n", FILENAME, FNR, msg > "/dev/stderr";
  abort = 1; exit abort;
}