#! /bin/bash # Last edited on 2008-06-13 08:49:27 by stolfi # Usage: tabulate-probs {SETNAME} {WINSIZE} {LABEQV} {SETDIR} {BASDIR} {EVTDIR} {PRBDIR} # Reads from the file "{SETDIR}/{SETNAME}.gset" a list of full item # names, of the form "{SPECIES}/{ITEM}" where {SPECIES} is the name of # a species's dataser in the EID database (e.g. "at2004", "hs35p1") # and {ITEM} is an EID item identifier (e.g. "10008_NC_003074"). # Then reads the nucleotide files "{BASDIR}/{SPECIES}/{ITEM}.bas" and # their labels "{BASDIR}/{SPECIES}/{ITEM}.lab" for all those items, # and maps each label letter in the alphabet 'DEFHIJNXYPQRS' to the # corresponding letter in the string {LABEQV}. Also maps all each # lowecase nucleotide base to upper case, and replaces the 'U' base by # 'T'. # Then for every pair {e,t} where {e} is a {k}-event, {t} is a # {k}-tuple, and {k = WINSIZE}, it counts the number of occurrences # {#(e,t)} in all those items together, and estimates the conditional # probability {Pr(t|e)} from those counts. # These data are written ou to the file # "{PRBDIR}/{SETNAME}-{WINSIZE}-{LABEQV}.prb", one pair per line, in # the format "{e} {t} {#(e,t)} {Pr(t|e)}". # If the file "{EVTDIR}/{WINSIZE}-{LABEQV}.evts" is present, it should # contain a list of interesting {k}-events. In that case, this script # only tabulates those pairs with events {i} in that list. setName="$1"; shift windowSize="$1"; shift labEqv="$1"; shift setDir="$1"; shift basDir="$1"; shift evtDir="$1"; shift prbDir="$1"; shift progdir="${STOLFIHOME}/programs/c/DNA/dnabayes" setFile="${setDir}/${setName}.gset" dataBank="/tmp/$$" eventList="${evtDir}/${windowSize}-${labEqv}.evts" if [[ -r ${eventList} ]] ; then eventOptions=( "-eventList" "${eventList}" ) else eventOptions=( "-all" ) fi prbFile="${prbDir}/${setName}-${windowSize}-${labEqv}.prb" # Create the bank file: cat ${setFile} \ | gawk \ -v basDir="${basDir}" \ '/[a-zA-Z0-9]/{ printf "%s/%s.bas %s/%s.lab\n", basDir, $1, basDir, $1; }' \ > ${dataBank} ${progdir}/dbd_tabulate \ -dataBank ${dataBank} \ -windowSize ${windowSize} \ -labelMap "DEFHIJNXYPQRS" = "${labEqv}" \ ${eventOptions[@]} \ -quiet \ > ${prbFile}