#! /bin/csh -f # Last edited on 2008-06-15 21:33:41 by stolfi set usage = "$0 {INFILE1}.cts {INFILE1}.cts ... > {OUTFILE}.mct" # Merges two or more files produced by uniq -c # Assumes that each line in each input file # (apart from blanks and '#'-comments) has the format # "{COUNT} {WORD}" where neither {COUNT} nor {WORD} contain any blanks. if ( $#argv == 0 ) then echo "usage: ${usage}"; exit 1 endif set tmp = "/tmp/$$" set jfile = "${tmp}-join.epr" set sfile = "${tmp}-sort.epr" set tfile = "${tmp}-temp.epr" /bin/touch ${jfile} set ofmt = "0" # ${n} is the number of fields in ${jfile}; initially just the key @ n = 1 set noglob set files = () while ( $#argv > 0 ) echo "# $1" set files = ( ${files} $1 ) cat $1 \ | sed -e 's:[\#].*$::' -e '/^[ ]*$/d' \ | sort -b -k2,2 \ > ${sfile} join \ -1 2 -2 1 \ -a 1 -a 2 \ -e 0 \ -o "${ofmt},1.1" \ ${sfile} ${jfile} \ > ${tfile} mv ${tfile} ${jfile} @ n = ${n} + 1 set ofmt = "${ofmt},2.${n}" shift end cat ${jfile} \ | sort -b -k1,1 \ | gawk ' \ /./ { \ for (i = 2; i <= NF; i++) printf " %7d", $(i); \ printf " %s\n", $1; \ } \ ' /bin/rm -f ${jfile} ${sfile}