#! /usr/bin/gawk -f # Last edited on 2008-06-15 08:19:48 by stolfi BEGIN { abort = -1; USAGE = ( "check_translation < {INFILE}" ); # Each line of the input should contain # a codon (with letters [ATCG]) and an aminoacid code (one # uppercase letter). Checks whether the codon matches # the aminoacid code. # !!! Should read the aminoacid table from an external file !!! # Translation table: split("", table); setup_aminoacid_table(table); } (abort >= 0) { exit abort; } # Ignore blank lines: /^ *$/ { next; } // { codon = toupper($1); amino = $2; if (NF != 2) { data_error(("wrong number of fields")); } if (codon !~ /^[ATCG][ATCG][ATCG]$/) { data_error(("bad codon format")); } if (amino !~ /^[A-Z]$/) { data_error(("bad aminoacid format")); } if (amino != table[codon]) { data_error(("incorrect translation for \"" codon "\" = \"" amino "\" should be \"" table[codon] "\"")); } } function setup_aminoacid_table(tb ) { # Stores in {tb} the codon-to-aminoacid table. # Aminoacids are represented by capital letter. # The stop codons are mapped to "$". tb["AAA"] = "K"; tb["AAT"] = "N"; tb["AAC"] = "N"; tb["AAG"] = "K"; tb["ATA"] = "I"; tb["ATT"] = "I"; tb["ATC"] = "I"; tb["ATG"] = "M"; tb["ACA"] = "T"; tb["ACT"] = "T"; tb["ACC"] = "T"; tb["ACG"] = "T"; tb["AGA"] = "R"; tb["AGT"] = "S"; tb["AGC"] = "S"; tb["AGG"] = "R"; tb["TAA"] = "$"; tb["TAT"] = "Y"; tb["TAC"] = "Y"; tb["TAG"] = "$"; tb["TTA"] = "L"; tb["TTT"] = "F"; tb["TTC"] = "F"; tb["TTG"] = "L"; tb["TCA"] = "S"; tb["TCT"] = "S"; tb["TCC"] = "S"; tb["TCG"] = "S"; tb["TGA"] = "$"; tb["TGT"] = "C"; tb["TGC"] = "C"; tb["TGG"] = "W"; tb["CAA"] = "Q"; tb["CAT"] = "H"; tb["CAC"] = "H"; tb["CAG"] = "Q"; tb["CTA"] = "L"; tb["CTT"] = "L"; tb["CTC"] = "L"; tb["CTG"] = "L"; tb["CCA"] = "P"; tb["CCT"] = "P"; tb["CCC"] = "P"; tb["CCG"] = "P"; tb["CGA"] = "R"; tb["CGT"] = "R"; tb["CGC"] = "R"; tb["CGG"] = "R"; tb["GAA"] = "E"; tb["GAT"] = "D"; tb["GAC"] = "D"; tb["GAG"] = "E"; tb["GTA"] = "V"; tb["GTT"] = "V"; tb["GTC"] = "V"; tb["GTG"] = "V"; tb["GCA"] = "A"; tb["GCT"] = "A"; tb["GCC"] = "A"; tb["GCG"] = "A"; tb["GGA"] = "G"; tb["GGT"] = "G"; tb["GGC"] = "G"; tb["GGG"] = "G"; } function data_error(msg) { printf " %s\n", $0 > "/dev/stderr"; printf "** %s:%s: %s\n", FILENAME, FNR, msg > "/dev/stderr"; abort = 1; exit abort; } function arg_error(msg) { printf "** %s\n", msg > "/dev/stderr"; printf "usage: %s\n", USAGE > "/dev/stderr"; abort = 1; exit abort; } END { if (abort >= 0) { exit abort; } }