#! /usr/bin/gawk -f # Scans a file and outputs a "sed" file that # fixes any "?" symbols BEGIN { split("", fw); split("", delicate); # characters that need to be protected in sed patterns: delchars = "^$/\\&*.+[]|"; for(i=1;i<=length(delchars);i++) { delicate[substr(delchars,i,1)] = true } lenw = 5; lenl = int(lenw/2); lenr = lenw-1-lenl; padl = ""; for(i=0;i=MINOCC) && (2*fw[w] > fc[c])) { # printf "# %6d %6d\n", fw[w], fc[c]; printf "s/%s/%s/g\n", sedpattern(c), sedreplacement(w) } } } /./ { m = length($0); lin = (padl ($0) padr); for(i=1;i<=m;i++) { w = substr(lin,i,lenw); tally(w, fw) } } /^$/ { next; } END { prules(fw) exit 0; }