#! /usr/bin/gawk -f # Last edited on 2020-11-17 22:59:27 by jstolfi # Miscellaneous utility functions be loaded with "-f" into other gawk programs. function usf_check_num(fname,nlin,x) { # Accepts "." as a valid number. if (! match(x, /^[0-9]*([0-9]|[.][0-9]*)$/)) { file_error(fname, nlin, ("invalid number \"" x "\"")); } x = x + 0.0; return x; } function usf_check_date(fname,nlin,date) { if (! match(date, /^20[012][0-9]-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])$/)) { file_error(fname, nlin, ("invalid date = \"" date "\"")); } return date; } function usf_check_time(fname,nlin,time) { if (! match(time, /^([01][0-9]|2[0-3]):[0-5][0-9]:([0-5][0-9]|60)$/)) { file_error(fname, nlin, ("invalid time = \"" time "\"")); } return time; } function usf_check_datetime(fname,nlin,dt, date,time) { gsub(/[ ][ ]+/, " ", dt); if (! match(dt, /^[^ ]+[ ][^ ]+$/)) { file_error(fname, nlin, ("invalid datetime = \"" dt "\"")); } date = dt; gsub(/[ ].*$/, "", date); date = usf_check_date(fname,nlin,date); time = dt; gsub(/^.*[ ]/, "", time); time = usf_check_time(fname,nlin,time); return dt; } function usf_dates_are_consecutive(odate,date, tst,otst) { # If {odate} is empty, returns 1. # If {odate} is not empty, check that {odate} and {date} are consecutive days. # Returns 1 if they are, 0 if not. if (odate == "") { return 1; } else { # Append "00:00:00" times and check difference in seconds: return usf_datetimes_are_consecutive((odate " 00:00:00"), (date " 00:00:00"), 24*3600); } } function usf_datetimes_are_consecutive(odt,dt,step, tst,otst,diff) { # If {odt} is empty, returns 1. # If {odt} is not empty, check that {odt} and {dt} are separated by {dt_step} seconds # (or {dt_step+1}, allowing for one leap second between them). # Returns 1 if they are, 0 if not. if (odt == "") { return 1; } else { # Convert date and time to UTC timestamp: otst = usf_datetime_to_timestamp(odt); tst = usf_datetime_to_timestamp(dt); # Check if input dates are consecutive: diff = tst - otst; return ((diff == step) || (diff == step+1)); } } function usf_date_and_time_to_timestamp(date,time, tst) { # Converts date "{YYYY}-{mm}-{dd}" and time "{HH}:{MM}:{SS}" (UTC) to timestamp. # Be sure to have the environment variable "TZ" set to "UTC". return usf_datetime_to_timestamp((date " " time)); } function usf_datetime_to_timestamp(dt, tst) { # Converts datetime "{YYYY}-{mm}-{dd} {HH}:{MM}:{SS}" (UTC) to timestamp. # Be sure to have the environment variable "TZ" set to "UTC". if (ENVIRON["TZ"] != "UTC") { arg_error(("must set TZ to 'UTC'")); } tst = (dt " 0"); # Final 0 says "no daylight savings time". gsub(/[-:]/, " ", tst); # {mktime} wants spaces as separators. tst = mktime(tst); return tst; } function usf_check_prices(fname,nlin,pop,phi,plo,pcl,vbt,vcr,pav, uphl,uvbt,uvcr,upav, tvbt,tvcr,tpav,ploc,phic) { # Checks consistency of data from a summary file line: # the opening price {pop}, the high and low prices {phi,plo}, # the closing price {pcl}, the BTC volume {vbt}, the currency volume {vcr}, # and the average price {pav}. The parameters {uvbt,uvcr,upav,uphl} define the precision # (unit-in-last-place) of those numbers: {uphl} for {phi,plo}, # {uvbt} for {vbt}, {uvcr} for {vcr}, {upav} for {pav}. if ((vcr == 0) != (vbt == 0)) { file_error(fname, nlin, ("inconsistent vbt = \"" vbt "\" vcr = \"" vcr "\"")); } if (pav == 0) { usf_check_null_value(fname,nlin, pop, "opening price"); usf_check_null_value(fname,nlin, phi, "high price"); usf_check_null_value(fname,nlin, plo, "low price"); usf_check_null_value(fname,nlin, pcl, "closing price"); usf_check_null_value(fname,nlin, vbt, "BTC volume"); usf_check_null_value(fname,nlin, vcr, "currency volume"); } else { # All values must be non-null: usf_check_non_null_value(fname,nlin, pop, "opening price"); usf_check_non_null_value(fname,nlin, phi, "high price"); usf_check_non_null_value(fname,nlin, plo, "low price"); usf_check_non_null_value(fname,nlin, pcl, "closing price"); usf_check_non_null_value(fname,nlin, vbt, "BTC volume"); usf_check_non_null_value(fname,nlin, vcr, "currency volume"); # All prices must be strictly in the range {[plo_phi]}: usf_check_price_range(fname,nlin, pop, plo,phi, "opening price"); usf_check_price_range(fname,nlin, pcl, plo,phi, "closing price"); usf_check_price_range(fname,nlin, pav, plo,phi, "average price"); # The volumes and the average price must be consistent, except for rounding: tvbt = 0.50001*uvbt; tvcr = 0.50001*uvcr; ploc = (vcr <= tvcr ? upav : (vcr - tvcr)/(vbt + tvbt)); # Min average price assuming worst rounding. phic = (vbt <= tvbt ? 1e10 : (vcr + tvcr)/(vbt - tvbt)); # Max average price assuming worst rounding. tpav = 2.0*upav; if ((pav > phic + tpav) || (pav < ploc - tpav)) { printf "%s:%s: !! average price inconsistent with volumes", fname, nlin > "/dev/stderr"; printf " is %.5f should be in [%.5f _ %.5f]\n", pav, ploc - tpav, phic + tpav > "/dev/stderr"; } } } function usf_round_value(val,ulp) { # Round the non-negative value {val} # so that the unit in the last place is {ulp}, makes sure it is not zero. val = ulp*int(val/ulp + 0.5); if (val < ulp) {val = ulp; } return val; } function usf_check_rounding(fname,nlin, vnew,vold,ulp,name) { # Checks whether {vnew} could have been rounded to {vold} # given that the precision of {vold}is {ulp}. usf_check_price_range(fname,nlin, vnew, vold,vold, ulp, name); } function usf_check_price_range(fname,nlin, val,plo,phi,ulp,name, tol) { # Checks whether {val} is in the positive range {[plo_phi]} with some tolerance. # Assumes that the precision of {phi,plo} is {ulp}. tol = 0.500001*ulp; if ((val < plo-tol) || (val > phi+tol)) { printf "%s:%s: !! %s out of range", fname, nlin, name > "/dev/stderr"; printf " is %.5f should be in [%.5f _ %.5f]\n", val, plo-tol, phi+tol > "/dev/stderr"; if ((val < plo - 2*tol) || (val > phi + 2*tol)) { file_error(fname,nlin, ("excessive range overflow")); } } } function usf_check_null_value(fname,nlin, val,name) { if (val != 0.0) { file_error(fname,nlin, (name " = \"" val "\" should be zero")); } } function usf_check_non_null_value(fname,nlin, val,name) { if (val == 0.0) { file_error(fname,nlin, (name " = \"" val "\" should not be zero")); } } function usf_check_min_value(fname,nlin, val,vmin,name) { if (val < vmin) { file_error(fname,nlin, (name " = \"" val "\" should be at least " vmin "")); } } function data_error(msg) { printf "%s:%s: ** %s\n", FILENAME, FNR, msg > "/dev/stderr"; printf " «%s»\n", $0 > "/dev/stderr"; abort = 1; exit(abort); } function data_warning(msg) { printf "%s:%s: !! %s\n", FILENAME, FNR, msg > "/dev/stderr"; printf " «%s»\n", $0 > "/dev/stderr"; } function arg_error(msg) { printf "** %s\n", msg > "/dev/stderr"; abort = 1; exit(abort); } function arg_warning(msg) { printf "!! %s\n", msg > "/dev/stderr"; } function file_error(f,n,msg) { if (f == "") { arg_error(msg); } else { printf "%s:%d: ** %s\n", f, n, msg > "/dev/stderr"; abort = 1; exit(abort); } } function file_warning(f,n,msg) { if (f == "") { arg_warning(msg); } else { printf "%s:%s: !! %s\n", f, n, msg > "/dev/stderr"; } } function prog_error(msg) { printf "** PROG ERROR: %s\n", msg > "/dev/stderr"; abort = 1; exit(abort); }