#! /bin/csh -f
# Last edited on 2008-06-15 20:58:52 by stolfi

set usage = "$0 [-sort {NUM}] [-freqs|-cumFreqs|-remFreqs] [-totals] [-titles '{T1} ... {Tn} {Tw}'] [-widths '{W1} ... {Wn}'] {INFILE} ... > {OUTFILE}"

# Merges two or more count files
# Assumes each line of each {INFILE} has fields {COUNT} {WORD}
# where {COUNT} is a number (integer or fraction)
#
# If "-sort" is given, sorts by the {COUNT} of the {NUM}th file decreasing,
# else by the {WORD} increasing.
#
# If "-freqs" is given, prints frequencies ×999 instead of counts.
#
# WARNING: assumes the {WORD} does not contain blanks.

set tit = ( )
set minwds = ( 1 )
set maxwd = 999
set mxl = 999999999
set srt = ( sort +0 -1 )
set tot = ( )
set frop = ( )

set path = ( ${STOLFIHOME}/voynich/work $path )

while ( ( $#argv > 0 ) && ( "x$1" =~ x-* ) )
  if ( ( $#argv >= 1  ) && ( "x$1" == "x-totals" ) ) then
    set tot = ( -v totals=1 ); shift;
  else if ( ( $#argv >= 1  ) && ( "x$1" == "x-freqs" ) ) then
    set frop = ( -v freqs=1 ); set maxwd = 3;  shift;
  else if ( ( $#argv >= 1  ) && ( "x$1" == "x-cumFreqs" ) ) then
    set frop = ( -v cumFreqs=1 ); set maxwd = 3; shift;
  else if ( ( $#argv >= 1  ) && ( "x$1" == "x-remFreqs" ) ) then
    set frop = ( -v remFreqs=1 ); set maxwd = 3; shift;
  else if ( ( $#argv >= 2  ) && ( "x$1" == "x-titles" ) ) then
    set tit = ( $2 ); shift; shift; 
  else if ( ( $#argv >= 2  ) && ( "x$1" == "x-widths" ) ) then
    set minwds = ( `echo $2` ); shift; shift; 
  else if ( ( $#argv >= 2  ) && ( "x$1" == "x-sort" ) ) then
    set srt = ( sort -k$2,$2gr ); shift; shift; 
  else if ( ( $#argv >= 2  ) && ( "x$1" == "x-maxlines" ) ) then
    set mxl = "$2"; shift; shift; 
  else
    echo "invalid option $1"
    echo "usage: ${usage}"; exit 1
  endif
end

if ( $#argv == 0 ) then
  echo "usage: ${usage}"; exit 1
endif

set files = ( $* )

@ foo = 1 + $#files
if (($#tit > 0) && ($#tit != $foo)) then
  echo "wrong num of titles $#tit (must be 1 + num files)"; exit 1
endif

set tmp = "/tmp/$$"

# Compute maximum width of count fields of each file

set wds = ( )
@ n = 0
while ($n < $#files)
  @ n = $n + 1
  if ( $maxwd == 999 ) then
    set wd = "`cat $files[$n] | get-max-field-width -v field=1`"
  else
    set wd = "$maxwd"
  endif
  if ( $n <= $#minwds ) then
    set minwd = "$minwds[$n]"
  else
    set minwd = "$minwds[$#minwds]"
  endif
  if ( ${wd} < ${minwd} ) set wd = "${minwd}"
  set wds = ( ${wds} ${wd} )
end

join-counts ${files} \
  | ${srt} \
  | format-multi-counts \
      ${tot} \
      ${frop} \
      -v titles="${tit}" \
      -v maxLines="${mxl}" \
      -v widths="${wds}"