/* See dm_seq.h */ /* Last edited on 2019-04-09 12:59:51 by jstolfi */ #define dm_seq_C_COPYRIGHT \ "Copyright � 2005 by the State University of Campinas (UNICAMP)" \ " and the Federal Fluminense University (UFF)" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* IMPLEMENTATIONS */ int dm_seq_num_datums(dm_seq_t *seqp) { return seqp->dv.ne; } dm_seq_t dm_seq_new(int n) { dm_seq_t seq; seq.sd.id = dm_seq_id_none; seq.sd.name = NULL; seq.sd.level = 0; seq.sd.npos = n; seq.nsub = 1; seq.cmt = NULL; int k; for (k = 0; k < dm_CHANNELS; k++) { seq.sfac.f[k] = 1.0; } seq.dv = dm_datum_vec_new(n); return seq; } dm_seq_t dm_seq_from_datum_vec ( dm_seq_id_t id, char *name, int level, char *cmt, dm_datum_scale_t *sfac, dm_datum_vec_t dv, int nsub ) { dm_seq_t seq; seq.sd.id = id; seq.sd.name = name; seq.sd.level = level; seq.sd.npos = ( (dv.ne - 1)*nsub + 1); seq.cmt = cmt; int k; for (k = 0; k < dm_CHANNELS; k++) { seq.sfac.f[k] = sfac->f[k]; } seq.dv = dv; seq.nsub = nsub; return seq; } dm_seq_t dm_seq_copy_sub(dm_seq_t* sr, int ix_ini, int ix_fin){ demand(sr->nsub == 1, "cannot extract from subsampled sequence."); dm_seq_t sc; sc.sd.id = sr->sd.id; sc.sd.name = sr->sd.name; sc.sd.level = sr->sd.level; sc.sd.npos = ix_fin - ix_ini + 1; sc.cmt = sr->cmt; int k; for (k = 0; k < dm_CHANNELS; k++) { sc.sfac.f[k] = sr->sfac.f[k]; } sc.dv = dm_datum_vec_new(sc.sd.npos); for (k = 0; k < sc.dv.ne; k++) { sc.dv.e[k] = sr->dv.e[k+ix_ini]; } sc.nsub = sr->nsub; return sc; } dm_seq_t dm_seq_copy_datums(dm_seq_t *sr) { dm_seq_t sc; sc.sd.id = sr->sd.id; sc.sd.name = sr->sd.name; sc.sd.level = sr->sd.level; sc.sd.npos = sr->sd.npos; sc.cmt = sr->cmt; int k; for (k = 0; k < dm_CHANNELS; k++) { sc.sfac.f[k] = sr->sfac.f[k]; } sc.dv = dm_datum_vec_new(sr->dv.ne); for (k = 0; k < sr->dv.ne; k++) { sc.dv.e[k] = sr->dv.e[k]; } sc.nsub = sr->nsub; return sc; } void dm_seq_write(FILE *fsq, dm_seq_t *seqp) { /* Write the file header: */ filefmt_write_header(fsq, dm_seq_type_name, dm_seq_version); /* Write the comment, if any: */ int ind = 0; /* Comment indentation. */ if (seqp->cmt != NULL) { filefmt_write_comment(fsq, seqp->cmt, 0, '|'); } /* Write the header fields: */ fprintf(fsq, "level = %d\n", seqp->sd.level); fprintf(fsq, "channels = %d\n", dm_CHANNELS); fprintf(fsq, "scale ="); int k; for (k = 0; k < dm_CHANNELS; k++) { fprintf(fsq, " %24.16e", seqp->sfac.f[k]); } fprintf(fsq, "\n"); fprintf(fsq, "circular = %s\n", "F"); fprintf(fsq, "samples = %d\n", seqp->dv.ne); /* Write the datum vector: */ int ib; for (ib = 0; ib < seqp->dv.ne; ib++) { dm_datum_t *dp = &(seqp->dv.e[ib]); int k; for (k = 0; k < dm_CHANNELS; k++) { fprintf(fsq, " %+4d", dp->c[k]); } fprintf(fsq, "\n"); } /* Write the file footer: */ filefmt_write_footer(fsq, dm_seq_type_name); } void dm_seq_write_named(dm_seq_t *seqp, char *name, char *tag) { FILE *wr = dm_open_write(name, tag, ".eqs", TRUE); dm_seq_write(wr, seqp); fclose(wr); } dm_seq_t dm_seq_read(FILE *fsq, int nsub) { /* Check and skip the file header: */ filefmt_read_header(fsq, dm_seq_type_name, dm_seq_version); /* Skip comment lines: */ char *cmt = filefmt_read_comment(fsq, '|'); /* Read the header fields: */ int level = nget_int(fsq, "level"); fget_eol(fsq); demand(level >= 0, "invalid level"); int chns = nget_int(fsq, "channels"); fget_eol(fsq); demand(chns = dm_CHANNELS, "wrong number of channels"); dm_datum_scale_t sfac; sfac.f[0] = nget_double(fsq, "scale"); int k; for (k = 1; k < dm_CHANNELS; k++) { sfac.f[k] = fget_double(fsq); } fget_eol(fsq); bool_t circular = nget_bool(fsq,"circular"); demand(!circular, "no circular sequences allowed !"); fget_eol(fsq); int nsmp = nget_int(fsq, "samples"); fget_eol(fsq); demand(nsmp >= 0, "invalid sample count"); /* Read the datum vector: */ dm_datum_vec_t dv = dm_datum_vec_new(nsmp); int ib; for (ib = 0; ib < nsmp; ib++) { dm_datum_t d; int k; for (k = 0; k < dm_CHANNELS; k++) { int s = fget_int(fsq); demand((s >= -127) && (s <= +127), "bad sample value"); d.c[k] = s; } fget_eol(fsq); dv.e[ib] = d; } /* Check and skip file footer: */ filefmt_read_footer(fsq, dm_seq_type_name); /* Create the {dm_seq_t} record: */ dm_seq_id_t id = dm_seq_id_none; char *name = NULL; dm_seq_t seq = dm_seq_from_datum_vec(id, name, level, cmt, &sfac, dv, nsub); return seq; } dm_datum_t *dm_seq_get_datum_address(dm_seq_t *seqp, int i) { int n = seqp->dv.ne; assert((i >= 0) && (i < n)); dm_datum_t *dp = &(seqp->dv.e[i]); return dp; } dm_datum_t dm_seq_get_datum(dm_seq_t *seqp, int i) { dm_datum_t *dp = dm_seq_get_datum_address(seqp, i); return *dp; } void dm_seq_set_datum(dm_seq_t *seqp, int i, dm_datum_t d) { dm_datum_t *dp = dm_seq_get_datum_address(seqp, i); (*dp) = d; } dm_sample_t *dm_seq_get_sample_address(dm_seq_t *seqp, int i, int k) { assert((k >= 0) && (k < dm_CHANNELS)); dm_datum_t *dp = dm_seq_get_datum_address(seqp, i); return &(dp->c[k]); } dm_sample_t dm_seq_get_sample(dm_seq_t *seqp, int i, int k) { dm_sample_t *sp = dm_seq_get_sample_address(seqp, i, k); return *sp; } void dm_seq_set_sample(dm_seq_t *seqp, int i, int k, dm_sample_t s) { dm_sample_t *sp = dm_seq_get_sample_address(seqp, i, k); (*sp) = s; } int dm_seq_num_positions(dm_seq_t *seqp) { return seqp->sd.npos; } dm_datum_t dm_seq_eval(dm_seq_t *seqp, int x) { int nsmp = seqp->dv.ne; int npos = seqp->sd.npos; int nsub = seqp->nsub; /* Check range of matching position {x}: */ demand((x >= 0) && (x < npos), "invalid sequence position"); /* Linear interpolation for now. */ /* !!! Change to a smoother interpolation !!! */ int i0 = x/nsub; /* Index of preceding sample. */ int i1 = (x + nsub - 1)/nsub; /* Index of next sample. */ assert((0 <= i0) && (i0 <= i1) && (i1 <= nsmp)); /* Reduce {i1} to a sample index: */ assert(i1 < nsmp); if (i0 == i1) { /* No interpolation needed. */ return seqp->dv.e[i0]; } else { /* Interpolate the two samples. */ double s = ((double)(x % nsub)); double t = 1 - s; return dm_datum_mix(t, &(seqp->dv.e[i0]), s, &(seqp->dv.e[i1])); } } dm_seq_t dm_seq_from_nucleic_string(dm_seq_id_t id, char *name, char *cmt, char *bas) { /* Convert nucleotide codes to numeric datums: */ dm_datum_vec_t dv = dm_datum_vec_from_nucleic_string(bas); /* Compute standard deviation {rms} of signal for uniformly distributed bases: */ double rms = dm_NUCLEIC_RAW_SCALE; int level = 0; int nsub = 1; dm_datum_scale_t sfac; int k; for (k = 0; k < dm_CHANNELS; k++) { sfac.f[k] = rms; } /* Pack it all as a {dm_seq_t}: */ dm_seq_t seq = dm_seq_from_datum_vec(id, name, level, cmt, &sfac, dv, nsub); return seq; } dm_seq_t dm_seq_read_from_nucleic_file(dm_seq_id_t id, char *name, char *fname) { /* Read letter sequence from file: */ char *bas = NULL; char *cmt = NULL; dm_nucleic_string_read_named(&bas, &cmt, fname, ""); /* Pack it all up: */ dm_seq_t seq = dm_seq_from_nucleic_string(id, name, cmt, bas); return seq; } dm_seq_t dm_seq_read_from_nucleic_simple(dm_seq_id_t id, char *name, char *fname) { /* Read letter sequence from file: */ char *bas = NULL; char *cmt = NULL; FILE *rd = open_read(fname, TRUE); dm_nucleic_string_read(rd, &bas, &cmt); fclose(rd); /* Pack it all up: */ dm_seq_t seq = dm_seq_from_nucleic_string(id, name, cmt, bas); return seq; } void dm_seq_postscript_plot_named ( dm_seq_t *seqp, double hSize, double vSize, double fontSize, char *name, char *tag ) { /* Create the {msm_ps_tools_t} object stream from it: */ msm_ps_tools_t *dp = msm_ps_tools_new_graph ( NULL, name, tag, /*hGraphSize:*/ hSize, /*vGraphSize:*/ vSize, /*scaleL:*/ TRUE, /*titleL:*/ FALSE, /*scaleR:*/ FALSE, /*titleR:*/ FALSE, /*scaleB:*/ TRUE, /*titleB:*/ FALSE, /*scaleT:*/ FALSE, /*titleT:*/ FALSE, /*fontSize:*/ fontSize, /*maxLabChars:*/ 7, /*mrg:*/ msm_EPS_MARGIN_MM ); /* Plot the sequence: */ dm_seq_postscript_plot(dp, seqp); /* Close and cleanup: */ msm_ps_tools_close(dp); } void dm_seq_postscript_plot(msm_ps_tools_t *dp, dm_seq_t *seqp) { fprintf(stderr,"AT START SEQP %p\n",seqp); /* Get sequence size and circularity: */ int nd = dm_seq_num_datums(seqp); /* Number of datums. */ int nc = dm_CHANNELS; /* Number of channels per datum. */ if (nd == 0) { return; } /* Get scaling factors: */ double *sfac = seqp->sfac.f; /* Sample scaling factor per channel. */ fprintf(stderr,"SCALE SEQP %p\n",seqp); /* Choose the nominal Y range {[yMin_yMax]}: */ int c; double sfMax = 1.0e-100; /* Max sample scale factor among channels. */ for (c = 0; c < nc; c++) { if (sfac[c] > sfMax) { sfMax = sfac[c]; } } double vMax = dm_sample_decode(dm_sample_VALID_MAX)*sfMax; double ySkosh = 0.0; double yMin = -vMax - ySkosh; double yMax = +vMax + ySkosh; fprintf(stderr,"RANGE SEQP %p\n",seqp); /* Extract the data to plot: */ int ny = nd*nc; // double y[ny]; double* y = (double*) malloc(sizeof(double)*ny); int i; for (i = 0; i < nd; i++) { for (c = 0; c < nc; c++) { double s = dm_seq_get_sample(seqp, i, c); y[c*nd + i] = dm_sample_decode(s)*sfac[c]; } } /* Plot the graphs: */ msm_ps_tools_draw_graphs(dp, nc, nd, NULL, y, yMin, yMax); free(y); } void dm_seq_free_datums(dm_seq_t *sr) { free(sr->dv.e); } void dm_seq_free(dm_seq_t *sr) { dm_seq_free_datums(sr); free(sr); } void dm_seq_multi_free_datums(dm_seq_t sr[], int maxLevel) { int level; for (level = 0; level <= maxLevel; level++) { dm_seq_free_datums(&(sr[level])); } } void dm_seq_multi_free(dm_seq_t *sr[], int maxLevel) { int level; for (level = 0; level <= maxLevel; level++) { dm_seq_free(sr[level]); } }