#! /usr/bin/gawk -f # Last edited on 2013-10-14 20:11:18 by stolfilocal # Assumes that each line of the input is a nucleotide sequence. # Applies a small random mutation at the beginning of each line. /^ *[ATCGU ]+ *$/ { lin = $0; gsub(/[ ]/, "", lin); n = length(lin); # Choose length {p} of subseq to replace. p1 = int(rand()*3) + 1; p2 = int(rand()*3) + 1; p3 = int(rand()*3) + 1; p = (p1 < p2 ? p1 : p2); p = (p < p3 ? p : p3); p = (p > n ? n : p); # Choose the replacement length {q}: q1 = int(rand()*3) + 1; q2 = int(rand()*3) + 1; q3 = int(rand()*3) + 1; q = (q1 < q2 ? q1 : q2); q = (q < q3 ? q : q3); # Create the replacement string: rep = ""; for (i = 0; i < q; i++) { j = int(rand()*4) + 1; b = substr("ATCG", j, 1); rep = (rep b); } # Replace: out = (rep substr(lin, p+1)); print out; next; } // { # May be header or comment: print; }