#! /usr/bin/gawk -f # Last edited on 2004-12-26 15:33:46 by stolfi BEGIN { usage = ( "pick-some-records \\\n" \ " -v nin=NUM -v nout=NUM < INFILE > OUTFILE" \ ); abort = -1; # Reads a file that is supposed to contain {nin} non-blank records. # Writes {nout} of those records, approximately equally spaced, # including the first and last input records. if (nin == "") { arg_error("must specify \"nin\""); } if (nout == "") { arg_error("must specify \"nout\""); } if (nout > nin) { arg_error("bad \"nout\""); } nr=0; nw=0; } (abort >= 0) { exit abort; } /./{ nr++; if ((nr > nin) || (nw >= nout)) { next; } # Decide how many records we should have selected from those already read: if (nout == 0) { ne = 0; } else if (nin == 1) { ne = nr; } else if (nr == nin) { ne = nout; } else if (nin == 2) { ne = nr; } else { # Excluding the first and last records read and written, try to keep average: ne = 1 + (nr-1.0)*((nout-2.0)/(nin-2.0)); } if (int(ne + 0.5) >= nw+1) { print; nw++; } # printf "nr = %d ne = %.4f nw = %d\n", nr, ne, nw > "/dev/stderr"; } END { if (abort >= 0) { exit abort; } if (nr != nin) { printf "actual nin = %d\n", nr > "/dev/stderr"; } if (nw != nout) { printf "actual nout = %d\n", nw > "/dev/stderr"; } } function arg_error(msg) { printf "** %s\n", msg > "/dev/stderr"; printf "usage: %s\n", usage > "/dev/stderr"; abort = 1; exit abort; }