#define PROG_NAME "dnaencode" #define PROG_DESC "DNA/binary encoding/decoding" #define PROG_VERS "1.0" /* Copyright © 2001 by the State University of Campinas (UNICAMP). ** See the copyright, authorship, and warranty notice at end of file. ** Last edited on 2006-03-13 20:58:42 by stolfi */ #define PROG_HELP \ PROG_NAME " [-u] < INFILE > OUTFILE \n" \ "Converts binary to DNA code; vice-versa with \"-u\"." #include #include #include #include #include static char *usage = PROG_NAME " version " PROG_VERS ", usage: \n" \ PROG_HELP "\n"; #define MAXBITS (8*sizeof(unsigned long)) #define MAXDNALINE 75 /* The following procedures treat the low-order `*bitsp' bits of `*valp', high to low, as a queue of bits. */ typedef void (*encode_function_t) (unsigned long *valp, int *bitsp); /* Tries to output the bits in the queue. Consumes as many bits as possible, and leaves the rest there. If `*bitsp' is negative, writes the preamble and exits. If `*bitsp == 0', writes the postamble and exits. */ typedef void (*decode_function_t) (unsigned long *valp, int *bitsp); /* Tries to read more data and append it to the queue. If `*bitsp' is negative, skips the preamble first. At end-of-file, returns with `*bitsp' unchanged. */ static void dna_encode (unsigned long *valp, int *bitsp) { static char base[4] = { 'A', 'C', 'G', 'T' }; static int count = 0; /* Count of chars in output line */ unsigned long val = *valp; int bits = *bitsp; int c; /* Preamble is empty: */ if (bits < 0) { return; } /* Postamble is `\n' if last line is non-empty: */ if (bits == 0) { if(count) { putchar('\n'); } return; } while (bits >= 2) { if (count >= MAXDNALINE) { putchar ('\n'); count = 0; } count++; bits -= 2; c = (int)(val >> bits) & 3; putchar (base[c]); val -= ((unsigned long) c) << bits; } *bitsp = bits; *valp = val; } static void bin_encode (unsigned long *valp, int *bitsp) { unsigned long val = *valp; int bits = *bitsp; int c; /* Preamble and postamble are empty: */ if (bits <= 0) { return; } while (bits >= 8) { bits -= 8; c = (int)(val >> bits) & 255; putchar (c); val -= ((unsigned long) c) << bits; } *bitsp = bits; *valp = val; } static void dna_decode (unsigned long *valp, int *bitsp) { unsigned long val = *valp; int bits = *bitsp; int c = 0; /* Preamble is empty: */ if (bits < 0) { bits = 0; } while ((c != EOF) && (bits <= MAXBITS - 2)) { c = getchar(); if ((c != EOF) && (c > '\037') && (c != ' ')) switch (c) { case 'A': case 'a': val = (val << 2) | 0; bits += 2; break; case 'C': case 'c': val = (val << 2) | 1; bits += 2; break; case 'G': case 'g': val = (val << 2) | 2; bits += 2; break; case 'T': case 't': val = (val << 2) | 3; bits += 2; break; default: fprintf(stderr, "%c: bad char\n", c); exit(1); } } fprintf(stderr, " %d:%d", bits, MAXBITS); *bitsp = bits; *valp = val; } static void bin_decode (unsigned long *valp, int *bitsp) { unsigned long val = *valp; int bits = *bitsp; int c; /* Preamble is empty: */ if (bits < 0) { bits = 0; } while (bits <= MAXBITS - 8) { c = getchar(); if (c == EOF) { break; } val = (val << 8) | (unsigned long)c; bits += 8; } *bitsp = bits; *valp = val; } #define ABORT { fprintf(stderr, usage); return 1; } int main (int argc, char *argv[]) { int i, bits; unsigned long val; encode_function_t encode_function = dna_encode; decode_function_t decode_function = bin_decode; if (argc < 1 || ! argv || ! argv[0]) ABORT for (i = 1; i < argc; i++) { char *arg = argv[i]; if ((arg[0] != '-') || (arg[1] == '\0') || (arg[2] != '\0')) ABORT switch (arg[1]) { case 'u': decode_function = dna_decode; encode_function = bin_encode; break; default: case 'h': case '?': printf(usage); return 0; case 'v': case 'V': printf ("dnaencode version -1, by Jorge Stolfi\n"); return 0; } } /* Write output preamble: */ bits = -1; val = 0; (*encode_function) (&val, &bits); /* Main loop: */ while (1) { int obits = bits; decode_function(&val, &bits); if (bits == obits) { break; } encode_function(&val, &bits); } /* Flush any remaining bits: */ if (bits > 0) { fprintf(stderr, "%s: incomplete input, padding with 0s\n", argv[0]); do { val <<= 1; bits++; encode_function(&val, &bits); } while (bits > 0); } /* Write postamble: */ (*encode_function) (&val, &bits); fclose(stdout); return 0; } /* COPYRIGHT, AUTHORSHIP, AND WARRANTY NOTICE: ** ** Copyright © 2001 by the State University of Campinas (UNICAMP). ** ** Created by J. Stolfi, Unicamp, 2001-20-07. ** A drastic simplification of Alexandre Oliva's "dnacode.c". ** ** This file is free software; you can redistribute it and/or modify it ** under the terms of the GNU General Public License as published by ** the Free Software Foundation; either version 2 of the License, or ** (at your option) any later version. ** ** This program is distributed in the hope that it will be useful, but ** WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ** General Public License for more details. ** ** You should have received a copy of the GNU General Public License ** along with this program; if not, write to the Free Software ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. **/