#ifndef dm_datum_H #define dm_datum_H /* Numerical encoding of DNA/RNA bases */ /* Last edited on 2008-01-30 10:25:20 by hcgl */ #define dm_datum_H_COPYRIGHT \ "Copyright © 2006 by the State University of Campinas (UNICAMP)" #include #include #include #include #define dm_CHANNELS 3 typedef struct dm_datum_t { dm_sample_t c[dm_CHANNELS]; } dm_datum_t; /* A digitalized signal is a sequence of {dm_datum_t} values, each being a vector with {dm_CHANNELS} samples. */ #define dm_datum_zero ((dm_datum_t){0, 0, 0}) dm_datum_t dm_datum_mix(double sx, dm_datum_t *fx, double sy, dm_datum_t *fy); /* Computes the linear combination {sx*fx + sy*fy}, channel by channel. */ /* COMPARING DATUM VALUES */ double dm_datum_diffsq(dm_datum_t *fx, dm_datum_t *fy); /* Returns an estimate of the average difference squared between the raw datums which were averaged into {*fx} and {*fy}. The result is a quadratic function of the datum values, normalized so that is has value 1 between two distinct raw (unfiltered) nucleotide vectors, e.g. between {fx = (+1,+1,+1)} ('A') and {fy = (-1,-1,+1)} ('T'). */ double dm_datum_step_diffsq ( dm_datum_t *fx0, dm_datum_t *fy0, dm_datum_t *fx1, dm_datum_t *fy1 ); /* Average value of {dm_datum_diffsq(X(t),Y(t))} when {X(t)} and {Y(t)} interpolate linearly between {fx0,fy0} and {fx1,fy1}. Assuming that all arguments lie in the datum simplex, the result will range over {[0 _ 1]}. */ double dm_datum_half_step_diffsq ( dm_datum_t *fx0, dm_datum_t *fy0, dm_datum_t *fx1, dm_datum_t *fy1 ); /* Integral of {(1-t)*dm_datum_diffsq(X(t),Y(t))} when {X(t)} and {Y(t)} interpolate linearly between {fx0,fy0} and {fx1,fy1}, as {t} ranges over {[0_1]}. Assuming that all arguments lie in the datum simplex, the result will range over {[0.0 _ 0.5]}. Informally, the result is the part of {dm_datum_step_diffsq} that comes mostly from the first half of the step. Indeed, the value of {dm_datum_step_diffsq(fx0,fy0,fx1,fy1)} is the sum of the first and second half-step values, namely {dm_datum_half_step_diffsq(fx0,fy0,fx1,fy1)} and {dm_datum_half_step_diffsq(fx1,fy1,fx0,fy0)}. */ /* DATUM VECTORS */ vec_typedef(dm_datum_vec_t,dm_datum_vec,dm_datum_t); /* Vector of {dm_datum_t}. */ /* DATUM I/O */ void dm_datum_encoded_write(FILE *wr, dm_datum_t *d, char *lp, char *sep, char *rp); /* Writes the datum {d} (encoded) to file {wr}. Samples are preceded by {lp}, separated by {sep}, and followed by {rp}. */ void dm_datum_decoded_write(FILE *wr, dm_datum_t *d, char *lp, char *sep, char *rp); /* Writes the datum {d} (decoded) to file {wr}. Samples are preceded by {lp}, separated by {sep}, and followed by {rp}. */ /* DATUMS FROM DNA/RNA NUCLEOTIDES When used to represent elements of nucleic acid sequences, or samples of smoothed versions thereof, the three channels {c[0..2]} are {(A-T)+(C-G)}, {(G-C)+(A-T)}, and {(A+T)-(C+G)}; where {A} is the local density of 'A' nucleotides at a specified point of the sequence, and ditto for {C}, {G} and {T}. Ordinarily, the datum should be in the convex hull of the four /nucleotide datums/ {dm_datum_from_nucleic_char(b)} where {b} is 'A', 'T', 'C', or 'G'. */ void dm_datum_decoded_from_nucleic_char(char b, int *d); /* Stores in {d[0..2]} the numeric representation of the DNA/RNA nucleotide character {b}, namely A (+1,+1,+1) T,U (-1,-1,+1) C (+1,-1,-1) G (-1,+1,-1) The result is {(0,0,0)} if {b} is not in [ATCGUatcgu]. */ dm_datum_t dm_datum_encoded_from_nucleic_char(char b); /* Converts the DNA/RNA nucleotide character {b} to a numeric vector, as decribed under {dm_datum_decoded_from_nucleic_char}, and encodes it as a {dm_datum_t}. */ void dm_datum_to_nucleic_densities(dm_datum_t *d, double *A, double *T, double *C, double *G); /* Returns in {*A,*T,*C,*G} the local densities of the four nuclotides that are implied by the datum {d}. Ordinarily, those four numbers shoudl add to 1. */ dm_datum_vec_t dm_datum_vec_from_nucleic_string(char *s); /* Converts a DNA/RNA nucleotide sequence {c[0..n-1]} to a vector of {dm_datum_t}s, as explained under {dm_datum_from_nucleic_char}. */ /* SCALING FACTORS FOR DATUMS */ typedef struct dm_datum_scale_t { double f[dm_CHANNELS]; } dm_datum_scale_t; /* Per-channel scale factors for decoded samples of a {dm_datum_t}. */ #define dm_NUCLEIC_RAW_SCALE 1.0 /* Standard deviation of decoded sample values in each channel of a {dm_datum_t} that corresponds to a single nucleotide taken from a uniform random distribution. */ #endif