#! /bin/bash # Last edited on 2008-07-14 19:53:02 by stolfi # USAGE: ${0} < {INFILE} > {OUTFILE} # Extracts the number-like sequences from a # text file; outputs a frequency count. cat \ | gawk '//{ for (t=0;t<2;t++) { $0 = gensub(/[0-9]*([.][0-9]|[0-9][.]|[0-9])[0-9]*/, " & ", "g", $0); } print; }' \ | tr ' ' '\012' \ | egrep -e '^[0-9]*([.][0-9]|[0-9][.]|[0-9])[0-9]*$' \ | sort \ | uniq -c \ | sort -k1,1nr