#! /bin/csh -f # Last edited on 2008-06-12 23:18:37 by stolfi # Tabulates the number of items and lines in each EID species. # Usage: "tabulate-datasets {DATASET}..." # # Assumes that for each {DATASET} and each file type # {T} in {"dEID","hEID","pEID"} there is a file called # "orig/eids/{DATASET}/{T}EID.items" containing one # line for each EID item, with fields # # {ITEM_ID} {GENE_ID} {N_LINES} {N_BYTES} # set datasets = ( $* ) printf "%-10s" "Dataset" foreach t ( items dEID hEID pEID ) printf " %10s" "${t}" end printf '\n' foreach f ( ${datasets} ) printf "%-10s" "${f}" cat orig/eids/${f}/dEID.items \ | gawk '//{n++;} END{printf " %10d",n;}' foreach t ( dEID hEID pEID ) cat orig/eids/${f}/${t}.items \ | gawk '//{n+=$4;} END{printf " %10d",n;}' end printf '\n' end