#define PROG_NAME "dm_match_bank" #define PROG_DESC "multiscale matching of a DNA/RNA seq against a seq bank" #define PROG_VERS "1.0" /* Copyright © 2006 by the State University of Campinas (UNICAMP). ** See the copyright, authorship, and warranty notice at end of file. ** Last edited on 2007-12-27 03:15:52 by stolfi */ #define PROG_HELP \ PROG_NAME " \\\n" \ " SEQ_DIR QUERY_SEQ BANK_NAME \\\n" \ " MAX_LEVEL MAX_MATCHES MIN_BASES MAX_DIFF \\\n" \ " OUT_NAME" #include #include #include #include #include #include #include #include #include typedef struct dm_options_t { char *seqDir; /* Directory where to find the sequences. */ char *dbName; /* Name of sequence bank file (minus ".gdb" ext). */ char *queryName; /* Name of query sequence (minus dir and ".bas" ext). */ int maxLevel; /* Maximum filtering level. */ int maxMatches; /* Max number o matches to report. */ int minBases; /* Max difference metric. */ double maxDiff; /* Max difference metric. */ char *outName; /* Output file name prefix (minus extensions). */ } dm_options_t; int main(int argc, char**argv); dm_options_t *dm_get_options(int argc, char**argv); dm_seq_t dm_read_input_sequence(char *seqName); /* Reads a DNA sequence from file "{seqName}.bas". */ void dm_output_matches(dm_pairing_vec_t cand, char *outName); /* Writes the list of pairings {cand} to the file named {outName}. */ int main(int argc, char**argv) { dm_options_t *o = dm_get_options(argc, argv); dm_seq_bank_t db = dm_seq_bank_read(o->dbName, o->seqDir); dm_seq_t q = dm_seq_read(o->queryName, o->seqDir); dm_pairing_vec_t cand = dm_pairing_vec_new(100); int ncand = 0; int ib; double sigma0 = 2.0; /* TO FIX!!! */ double sigma1 = 1.5; /* TO FIX!!! */ double_vec_t wtb0 = dm_make_weight_table(sigma0); double_vec_t wtb1 = dm_make_weight_table(sigma1); dm_seq_t qr[o->maxLevel + 1]; dm_seq_t br[o->maxLevel + 1]; dm_multi_seq_filter(q, o->maxLevel, &wtb0, &wtb1, qr); for (ib = 0; ib < db.seq.nel; ib++) { dm_multi_seq_filter(db.seq.el[ib], o->maxLevel, &wtb0, &wtb1, br); dm_pairing_vec_t part = dm_multi_find_matches ( qr, br, o->maxMatches, o->maxLevel, o->minBases, o->maxDiff ); int ic; for (ic = 0; ic < part.nel; ic++) { dm_pairing_vec_expand(&cand, ncand); cand.el[ncand] = part.el[ic]; ncand++; } free(part.el); dm_multi_seq_free(br, o->maxLevel); } dm_pairing_vec_trim(&cand, ncand); dm_output_matches(cand, o->outName); dm_multi_seq_free(qr, o->maxLevel); return 0; } dm_seq_t dm_read_input_sequence(char *seqName, char*seqDir) { char *fileName = NULL; char *fileName = jsprintf("%s/%s.bas", seqName, seqDir); FILE *fsq = open_read(fileName, TRUE); dm_seq_t seq = dm_seq_read(fsq); fclose(fsq); free(fileName); return seq; } dm_options_t* dm_get_options(int argc, char**argv) { dm_options_t *o = (dm_options_t *)notnull(malloc(sizeof(dm_options_t)), "no mem"); char *endptr; demand(argc == 8, "wrong num of args"); o->seqDir = argv[1]; o->dbName = argv[2]; o->queryName = argv[3]; o->maxLevel = strtod(argv[4], &endptr); demand((*endptr) == '\000', "bad maxLevel"); o->maxMatches = strtod(argv[5], &endptr); demand((*endptr) == '\000', "bad maxMatches"); o->minBases = strtod(argv[6], &endptr); demand((*endptr) == '\000', "bad minBases"); o->maxDiff = strtod(argv[7], &endptr); demand((*endptr) == '\000', "bad maxDiff"); o->outName = argv[8]; return o; } /* COPYRIGHT, AUTHORSHIP, AND WARRANTY NOTICE: ** ** Copyright © 2006 by the State University of Campinas (UNICAMP). ** ** Created feb/2006 by Jorge Stolfi, IC-UNICAMP. ** ** Permission to use, copy, modify, and redistribute this software and ** its documentation for any purpose and without fee is hereby ** granted, provided that: (1) the copyright notice at the top of this ** file and this copyright, authorship, and warranty notice is retained ** in all derived source files and documentation; (2) no executable ** code derived from this file is published or distributed without the ** corresponding source code; and (3) these same rights are granted to ** any recipient of such code, under the same conditions. ** This software is provided "as is", WITHOUT ANY EXPLICIT OR IMPLICIT ** WARRANTIES, not even the implied warranties of merchantibility and ** fitness for a particular purpose. END OF NOTICE. */