#! /usr/bin/gawk -f # Last edited on 2015-02-24 18:13:01 by stolfilocal # Reads a data files resulting from the join of two price files # by the date and time fields. Checks whether the differecnes # are small enough. # # The user must define (with {export}) the environment variable {TZ="UTC"}, # load (with "-f") the library "useful_functions.gawk" # BEGIN \ { if (ENVIRON["TZ"] != "UTC") { arg_error(("must set TZ to 'UTC'")); } FS="|"; nints = 0; # Number of data lines. # Typical precision of input values: ulp_vbt = 0.01; # Unit in last place for {vbt} ulp_vcr = 0.01; # Unit in last place for {vcr} ulp_pav = 0.01; # Unit in last place for {pav}. ulp_phl = 0.01; # Unit in last place for {pop,phi,plo,pcl}. # Field names: split("", fld_name); fld_name[1] = "datetime"; fld_name[2] = "opening price"; fld_name[3] = "high price"; fld_name[4] = "low price"; fld_name[5] = "closing price"; fld_name[6] = "BTC volume" fld_name[7] = "currency volume" fld_name[8] = "average price"; # Maximum error in each field: split("", emax); for (i = 1; i < 15; i++) { emax[i] = 0; } # Datetime range in file: dt_ini = "???"; dt_fin = "???"; odt = ""; # Previous datetime. } // \ { # Remove tabs: gsub(/[\011]/, " ", $0); } # Data lines: /^20/ \ { if (NF != 15) { data_error(("wrong field count = " NF "")); } # Get the input fields: dt = usf_check_datetime(FILENAME,FNR,$1); # Datetime. # Consistency checks: if (dt <= odt) { data_error(("repeated datetime = \"" odt "\" \"" dt "\"")); } odt = dt; # Get the original average price {pav} and volumes {vbt,vcr}: pav = $8 + 0; vbt = $6 + 0; vcr = $7 + 0; if ((pav == 0) || ((vbt == 0) && (vcr == 0))) { # New fields must be zero: for (i = 9; i <= 15; i++) { check_null_field(i); } } else { # New fields must be non-zero: for (i = 9; i <= 15; i++) { check_non_null_field(i); } # Check prices {pop,phi,plo,pcl}: tol_phl = 0.50001 * ulp_phl; emax[2] = max_error(emax[2], compare_fields( 2, 9, tol_phl)); # Opening price emax[3] = max_error(emax[3], compare_fields( 3, 10, tol_phl)); # High price. emax[4] = max_error(emax[4], compare_fields( 4, 11, tol_phl)); # Low price. emax[5] = max_error(emax[5], compare_fields( 5, 12, tol_phl)); # Closing price. # Check that average price: tol_pav = 0.50001 * 2*ulp_pav; emax[8] = max_error(emax[8], compare_fields( 8, 15, tol_pav)); # Average price. # Check the currency volume: tol_vcr = 0.50001 * 2*ulp_vcr; emax[7] = max_error(emax[7], compare_fields( 7, 14, tol_vcr)); # Currency volume. # Check the VBT volume, assumed computed by {vcr/pav}: vloc = (vcr <= tol_vcr ? 0.0 : (vcr - tol_vcr)/(pav + tol_pav)); # Min volume assuming worst rounding. vhic = (pav <= tol_pav ? 1e10 : (vcr + tol_vcr)/(pav - tol_pav)); # Max volume assuming worst rounding. tol_vbt = max_error(0.50001 * 2*ulp_vbt, 0.50001*(vhic - vloc)); emax[6] = max_error(emax[6], compare_fields( 6, 13, tol_vbt)); # BTC volume. } # Update the date/time range: if (nints == 0) { dt_ini = dt; } dt_fin = dt; nints++; next; } // \ { data_error(("invalid line format")); } END \ { printf "found %d intervals, from %s to %s\n", nints, dt_ini, dt_fin > "/dev/stderr"; printf "max errors relative to tolerance:\n" > "/dev/stderr"; for (i = 2; i <= 8; i++) { printf " %10.5f %s\n", emax[i], fld_name[i] > "/dev/stderr"; } exit(0); } function check_null_field(kf, val,name) { name = fld_name[kf]; val = usf_check_num(FILENAME,FNR,$(kf)); if (val != 0.0) { data_error((name " = \"" val "\" should be zero")); } } function check_non_null_field(kf, val,name) { name = fld_name[kf]; val = usf_check_num(FILENAME,FNR,$(kf)); if (val == 0.0) { data_error((name " = \"" val "\" should be non-zero")); } } function compare_fields(kfa,kfb,tol, va,vb,d,name) { # Compares fields with indices {kfa,kfb} with tolerance {tol}. # Returns the absolute difference. name = fld_name[kfa]; va = usf_check_num(FILENAME,FNR,$(kfa)); vb = usf_check_num(FILENAME,FNR,$(kfb)); if ((va == 0) && (vb == 0)) { return 0; } else { d = va - vb; if (d < 0) { d = -d; } if (d > tol) { printf "%s:%s: !!", FILENAME, FNR > "/dev/stderr"; printf " %s fields $%d = %.5f and $%d = %.5f", name, kfa, va, kfb, vb > "/dev/stderr"; printf " differ by %.5f, tol = %.5f\n", d, tol > "/dev/stderr"; } return d/tol; } } function max_error(x,y) { return (x+0 > y+0 ? x : y); }