#! /usr/bin/gawk -f # Last edited on 2022-10-22 03:36:02 by stolfi # Reads from {stdin} a file with at least two numeric columns with variables {X} and {Y}. # # Writes to standard output a ".fni" float image with the 2D histogram of # those two variables, with {X} on the horizontal axis, {Y} on the vertical # axis. Each pixel will be a bucket of the histogram, and the pixel value # is the number of {(X,Y)} pairs from the input file that fall into that bucket, # or a fraction as specified by the {norm} argument (see below). # # The client must load (with "-f") the library {write_fni_image.gawk}. # It must also define (with "-v") the variables # # {xcol} Index (from 1) of column of input file with the {X} variable. # {xmin} Nominal min value of {X}. # {xmax} Nominal max value of {X}. # {xnum} Number of buckets (pixels) in horizontal axis. # # {ycol} Index (from 1) of column of input file with the {Y} variable. # {ymin} Nominal min value of {Y}. # {ymax} Nominal max value of {Y}. # {ynum} Number of buckets (pixels) in horizontal axis. # # {noex} Boolean: if 1, rejects pairs {(X,Y)} outside of the # rectangle {[xmin _ xmax] x [ymin _ymax]}; if 0, maps them # to the nearest bucket. # # {norm} If "x", divides the count of each bucket by the total count # data pairs in that bucket row. If "y", same but by column. # If "xy", divides by the total data pairs read. In all three # the denominator includes pairs that fell off the stated # {X} and/or {Y} ranges. If "" (or omitted), the pixel values are # the raw counts. # BEGIN \ { xcol = check_int_arg("xcol", xcol, 1, 999999); xmin = check_num_arg("xmin", xmin, -1.0e100, +1.0e100); xmax = check_num_arg("xmax", xmax, xmin + 1.0e-100 + 1.0e-12*fabs(xmin), +1.0e100); xnum = check_int_arg("xnum", xnum, 1, 8192); ycol = check_int_arg("ycol", ycol, 1, 999999); ymin = check_num_arg("ymin", ymin, -1.0e100, +1.0e100); ymax = check_num_arg("ymax", ymax, ymin + 1.0e-100 + 1.0e-12*fabs(ymin), +1.0e100); ynum = check_int_arg("ynum", ynum, 1, 8192); norm = check_norm_arg("norm", norm); noex = check_bool_arg("noex", noex); NC = 1; # Image channels. NX = xnum+0; NY = ynum+0; dx = (xmax - xmin)/NX; # {X} bucket width. dy = (ymax - ymin)/NY; # {Y} bucket width. nread = 0; # Number of entries read. exct = 0; # Count of data points that fell outside the range. split("", binct); # Element {binct[kx,ky]} is the total count for bin {[kx,ky]}. split("", colct); # Element {colct[kx]} is the total count for bin column {kx}. split("", rowct); # Element {rowct[ky]} is the total count for bin row {ky}. for (kx = 0; kx < xnum; kx++) { colct[kx] = 0; } for (ky = 0; ky < ynum; ky++) { colct[ky] = 0; } for (kx = 0; kx < xnum; kx++) { for (ky = 0; ky < ynum; ky++) { binct[kx,ky] = 0; } } } /(^[ ]*([#]|$))|[!]/ { next; } ((NF < xcol) || (NF < ycol)) { data_error(("wrong {NF}")); } // \ { nread++; X = check_num_datum("X", $(xcol)); Y = check_num_datum("Y", $(ycol)); kx = bucket_index(X, xmin, xmax, xnum, noex); ky = bucket_index(Y, ymin, ymax, ynum, noex); # printf "!! X = %12.8f Y = %12.8f kx = %5d ky = %5d\n", X, Y, kx, ky > "/dev/stderr" if ((kx >= 0) && (ky >= 0)) { binct[kx,ky]++; } if (kx >= 0) { colct[kx]++; } if (ky >= 0) { rowct[ky]++; } # Count pairs outside the range: if ((x < xmin) || (x > xmax) || (y < ymin) || (y > ymax)) { exct++; } next; } END \ { printf "read %d valid data points\n", nread > "/dev/stderr"; if (dvnat_out > 0) { printf "!! %d pairs outside the given ranges", exct > "/dev/stderr"; if (noex) { printf " (excluded)\n" > "/dev/stderr"; } else { printf " (mapped to nearest bin)\n" > "/dev/stderr"; } } if ((nread > 0) && (norm != "")) { # Apply normalizations: for (kx = 0; kx < NX; kx++) { for (ky = 0; ky < NY; ky++) { if (norm == "xy") { den = nread; } else if (norm == "x") { den = rowct[ky]; } else if (norm == "y") { den = colct[kx]; } else { bug("duh?"); } if (den > 0) { binct[kx,ky] /= den; } } } } # Build image: split("", img); sum = 0; for (kx = 0; kx < NX; kx++) { for (ky = 0; ky < NY; ky++) { img[0,kx,ky] = binct[kx,ky]; sum += img[0,kx,ky]; } } printf "total bin value in image = %16.8f\n", sum > "/dev/stderr" write_fni_image(NC, NX, NY, img, "/dev/stdout"); } function bucket_index(v,vmin,vmax,vnum,noex, kv) \ { # Computes the bucket index for value {v}, given the histogram range {[vmin _ vmax]} # and the number of bins {vnum}. If {noex} is true, values outside that range, # are mapped to {-1}, otherwise to {0} or {vnum-1}. # # If {v} falls precisely on a bin boundary, returns one of the adjacent bins, # unpredictably. if (v < vmin) { kv = (noex ? -1 : 0); } else if (v > vmax) { kv = (noex ? -1 : vnum-1); } else { kv = floor((v - vmin)*vnum/(vmax - vmin)); # Fix rounding problems: if (kv < 0) { kv = 0; } if (kv >= vnum) {kv = vnum-1; } } return kv } function check_bool_arg(vname,v) \ { # Check whether {v} is a valid boolean argument value and converts it to a number. if (v == "") { arg_error(("must define {" vname "}")); } if (v !~ /^[01]+$/) { arg_error("invalid bool value of {" vname "} = \"" v "\""); } v += 0; if ((v != 0) && (v != 1)) { arg_error("value of {{" vname "}} = \"" v "\" out of range"); } return v; } function check_int_arg(vname,v,vmin,vmax) \ { # Check whether {v} is a valid integer argument value and converts it to a number. if (v == "") { arg_error(("must define {" vname "}")); } if (v !~ /^[-+]?[0-9]+$/) { arg_error("invalid int value of {{" vname "}} = \"" v "\""); } v += 0; if ((v < vmin) || (v > vmax)) { arg_error("value of {" vname "} = \"" v "\" out of range"); } return v; } function check_num_arg(vname,v,vmin,vmax) \ { # Check whether {v} is a valid numeric argument value and converts it to a number. if (v == "") { arg_error(("must define {" vname "}")); } if (v !~ /^[-+]?[0-9]*(([.][0-9]|[0-9][.])[0-9]*|[0-9])(|[Ee][-+][0-9]+)$/) { arg_error("invalid num value of {" vname "} = \"" v "\""); } v += 0; if ((v < vmin) || (v > vmax)) { arg_error("value of {" vname "} = \"" v "\" out of range"); } return v; } function check_norm_arg(vname,v) \ { # Check whether {v} is a valid normalization option and returns it. if ((v != "") && (v != "x") && (v != "y") && (v != "xy")) { arg_error("invalid normalization option {" vname "} = \"" v "\""); } return v; } function check_num_datum(vname,v) \ { # Check whether {v} is a valid numeric data value and converts it to a number. if (v !~ /^[-+]?[0-9]*(([.][0-9]|[0-9][.])[0-9]*|[0-9])(|[Ee][-+][0-9]+)$/) { data_error("invalid value of {" vname "} = \"" v "\""); } v += 0; return v; } function fabs(x) { if (x < 0) { x = -x; } return x; } function floor(x) { x = (x >= 0? int(x) : -int(-x)); return x; } function data_error(msg) { printf "%s:%s: ** %s\n", FILENAME, FNR, msg > "/dev/stderr"; printf " «%s»\n", $0 > "/dev/stderr"; abort = 1; exit(abort); } function arg_error(msg) { printf "** %s\n", msg > "/dev/stderr"; abort = 1; exit(abort); }