# Gathering and plotting email statistics # Last edited on 2008-05-03 14:20:14 by stolfi PURPOSE The goal of this project is to extract and plot several statistics from Unix-format mail folders. GLOBAL SETTINGS AND VARIABLES Execute: set maildir = ${HOME}/mail set outdir = ./out mkdir -p ${outdir}/froms set types = ( test sent unsorted good trash ) set typescm = ( `echo "${types}" | tr ' ' ','` ) echo "${typescm}" COUNTING FROM LINES BY MONTH Prepare manually the folder lists: emacs {${typescm}}-folders.dir Place in each file the bare names of the folders of the corresponding type. The lists need not be disjoint. Run: foreach type ( ${types} ) echo " " echo "=== ${type} =======================" set folders = ( `cat folders-${type}.dir` ) ( cd ${maildir} && egrep -a -e '^From ' ${folders} ) \ > ${outdir}/froms/${type}.raw reformat-from-lines ${outdir}/froms/${type}.raw \ > ${outdir}/froms/${type}.frd cat ${outdir}/froms/${type}.frd \ | gawk \ ' //{ \ dt = $1; \ sub(/-[0-9][0-9]$/, "", dt); \ print dt; \ } \ ' \ | sort -k1,1 | uniq -c \ > ${outdir}/froms/${type}.cts cat ${outdir}/froms/${type}.cts end PLOTTING TRASH VS. NON-TRASH Create the "*.mspm" files as above. Then run: plot-mail-per-month \ ${outdir} \ "2004-08" "2008-04" \ trash good sent or plot-mail-per-month \ ${outdir} \ "2004-08" "2008-04" \ trash good unsorted COUNTING "@DCC.UNICAMP.BR" MESSAGES foreach type ( test trash good ) echo " " echo "=== ${type} =======================" printf "dcc: " egrep -i -e 'To: *stolfi@dcc' `cat folders-${type}.dir` | wc -l printf "ic: " egrep -i -e 'To: *stolfi@ic' `cat folders-${type}.dir` | wc -l end