#! /bin/csh -f set usage = "$0 [ -external | -unicamp | -local ] TAG LOGS..." # Reads a bunch of access and error logs, writes summary files # # http-stats-by-file-${TAG}.cts - stats by full pathname # http-stats-by-host-${TAG}.cts - stats by requesting host # http-stats-by-user-${TAG}.cts - stats by page owner # # The lines of these files have the format # # COUNT TOTBYTES ITEM # # where COUNT is the number of accesses, TOTBYTES is the total # number of bytes transferred by those accesses, ITEM is the # thing of interest (filename, hostname, or username) set ORGS = ( ); unset FILTER while ( ( $#argv > 1 ) && ( "x$1" =~ x-* ) ) if ( "x$1" == "x-external" || "x$1" == "x-unicamp" || "x$1" == "x-local" ) then set FILTER; set ORGS = ( ${ORGS} $1 ) shift else echo "error: $0 $*" echo "${usage}" exit 1 endif end if ( $#argv < 2) then echo "error: $0 $*" echo "usage: $usage" exit 1 endif set TAG = "$1" set LOGS = ( $argv[2-] ) set LIB = "${STOLFIHOME}/programs/csh/http-log-tools" set TMP = "/tmp/$$.log" set CAT = "/tmp/$$-cat.log" set FCT = "http-stats-by-file-${TAG}.cts" set HCT = "http-stats-by-host-${TAG}.cts" set DCT = "http-stats-by-user-${TAG}.cts" /bin/rm -f ${TMP} ${CAT} /bin/touch ${CAT} foreach f ( ${LOGS} ) if ( -r $f ) then echo '=== '$f' ===' if ( $?FILTER ) then ${LIB}/select-log-entries-by-origin ${ORGS} < $f > ${TMP} else cat $f > ${TMP} endif if ( ! (-z ${TMP} ) ) then cat ${TMP} \ | sed \ -e 's/%7[eE]/~/' \ -e 's/-$/ 0/' \ -e 's/"[GHP][EOSAU]*[TD] */"/' \ -e 's/ *HTTP\/[V]*1\.0"/"/' \ -e 's/^\([^ ]*\) .*"\([^"]*\)" *[-0-9][0-9]* *\([0-9][0-9]*\)$/OK \1 "\2" \3/' \ -e '/^OK/\!d' \ -e 's/^OK //' \ >> ${CAT} endif end # Gather by-file statistics: cat ${CAT} \ | cut -d ' ' -f2,3 \ | sed \ -e 's@^"/*@"@' \ -e 's@^"home/[^ "/]*/@"~@' \ -e 's@public_html/@@' \ -e 's@^"\([^"? ]*?\)[^" ]*" @"\1" @' \ | sort \ | nawk -f ${LIB}/add-item-refs.nawk \ | sort +1 -2nr +2 -3 \ > ${FCT} # Gather by-host statistics: cat ${CAT} \ | cut -d ' ' -f1,3 \ | sort \ | nawk -f ${LIB}/add-item-refs.nawk \ | sort +1 -2nr +2 -3 \ > ${HCT} # Gather by-owner statistics: cat ${CAT} \ | cut -d ' ' -f2,3 \ | sed \ -e 's@^"/*@"@' \ -e 's@^"home/[^ "/]*/@"~@' \ -e 's@public_html/@@' \ -e 's@^"\([^"? ]*?\)[^" ]*" @"\1" @' \ -e 's@^"\(~[^" /]*\)/[^" ]*" @"\1" @' \ -e 's@^"\([^" /]*\)/[^" ]*" @"\1/" @' \ | sort \ | nawk -f ${LIB}/add-item-refs.nawk \ | sort +1 -2nr +2 -3 \ > ${DCT} /bin/rm -f ${TMP} ${CAT}