#! /bin/bash # Last edited on 2008-06-13 09:50:52 by stolfi # Usage: get-region-lengths {SETNAME} {SETDIR} {BASDIR} {OUTDIR} # Extracts region length statistics for a set of labeled genes. # Reads the gene names from the file "{SETDIR}/{SETNAME}.gset", and # the labeling of each gene from the file "{BASDIR}/{GENE}.lab" # Tabulates the lengths of exon and non-exon regions # in the given genes of Reese's dataset. Writes the # statistical parameters (average length, etc.) and the # length histogram to the file "{OUTDIR}/{SETNAME}-{TYPE}.lens" # where {TYPE} is "K" or "N". setName="$1"; shift setDir="$1"; shift basDir="$1"; shift outDir="$1"; shift setFile="${setDir}/${setName}.gset" outName="${outDir}/${setName}" dataBank="/tmp/$$" progdir=${STOLFIHOME}/programs/c/DNA/dnabayes cat ${setFile} \ | gawk \ -v maxLength=2000 \ -v basDir="${basDir}" \ '/^[a-zA-Z0-9]/{ printf "%s/%s.lab\n", basDir, $1; }' \ > ${dataBank} ${progdir}/dbd_gather_region_length_stats -v outName=${outName} `cat ${dataBank}` /bin/rm -f ${dataBank}