#! /bin/bash
# Last edited on 2008-07-14 19:53:02 by stolfi

# USAGE: ${0} < {INFILE} > {OUTFILE}
# Extracts the number-like sequences from a 
# text file; outputs a frequency count.

cat \
  | gawk '//{ for (t=0;t<2;t++) { $0 = gensub(/[0-9]*([.][0-9]|[0-9][.]|[0-9])[0-9]*/, " & ", "g", $0); } print; }' \
  | tr ' ' '\012' \
  | egrep -e '^[0-9]*([.][0-9]|[0-9][.]|[0-9])[0-9]*$' \
  | sort \
  | uniq -c \
  | sort -k1,1nr


