#! /usr/bin/gawk -f # Last edited on 2014-11-01 05:25:12 by stolfilocal # Miscellaneous function for the merging of price files. # To be loaded with "-f" into other gawk programs. function mpf_initialize_index_tables() { # Initializes global tables that describe the files to be merged. # The tables are indexed {0..nfiles-1}: # Namely: split("", inidate_fi); # Nominal start date of file. split("", findate_fi); # Nominal end date of file. split("", extag_fi); # Tag of exchange ("MGOX", "OKCO", etc). split("", crtag_fi); # Currency symbol ("USD", "CNY", etc.). split("", exname_fi); # Name of exchange. split("", rate_fi); # Default units of {crtag_fi[kf]} worth 1 USD. split("", rlodate_fi); # Initial date of range to consider. split("", rhidate_fi); # Final date of range to consider. split("", color_fi); # Color to use in plots. } function mpf_read_index_file \ ( fname, \ inidate_fi,findate_fi,extag_fi,crtag_fi,exname_fi,rate_fi,rlodate_fi,rhidate_fi,color_fi, \ nlin,lin,nfiles,fld,nfld,extag,crtag,exname,rate,rlodate,rhidate,color \ ) { # Reads the index file that describes the data files # to be merged. Each line of the index file must contain: # # "{INIDATE} {FINDATE} {EXTAG} {CRTAG} {EXNAME} {RATE} {RLODATE} {RHIDATE} {COLOR}" # # This line describes the file whose nominal date span is {INIDATE .. FINDATE}, # for the exchange whose tag is {EXTAG} (e.g. "MGOX"), with trade currency {CRTAG} (e.g. "CNY). # The {EXNAME} (a string without blanks) is for documentation purposes only, # and the {RATE} may be used in price plots. The file is to be clipped to the range # {RLODATE .. RHIDATE}, inclusive both. The {COLOR} may be used in plots. # # Those fields are returned in the arguments {inidate_fi,findate_fi,extag_fi}, etc. # which should be pre-allocated arrays, indexed {0..nfiles-1}: # Retuns the number of files {nfiles}. nlin = 0; # Number of lines read. nfiles = 0; # Number of relevant lines. while((getline lin < fname) > 0) { nlin++; # Remove tabs, inline comments, spurious blanks gsub(/[\011]/, " ", lin); gsub(/[\#].*$/, "", lin); gsub(/^[ ]+/, "", lin); gsub(/[ ]+$/, "", lin); gsub(/[ ][ ]+/, " ", lin); if (lin != "") { nfld = split(lin, fld, " "); if (nfld != 9) { file_error(fname, nlin, ("bad file index entry = \"" lin "\"")); } kf = nfiles; inidate_fi[kf] = mpf_check_date(fname,nlin,fld[1]); findate_fi[kf] = mpf_check_date(fname,nlin,fld[2]); extag_fi[kf] = fld[3]; crtag_fi[kf] = fld[4]; ex_name_fi[kf] = fld[5]; rate_fi[kf] = mpf_check_num(fname,nlin,fld[6]); rlodate_fi[kf] = mpf_check_date(fname,nlin,fld[7]); rhidate_fi[kf] = mpf_check_date(fname,nlin,fld[8]); color_fi[kf] = fld[9]; nfiles++; } } if ((ERRNO != "0") && (ERRNO != "")) { file_error(fname, nlin, ERRNO); } close (fname); if (nlin == 0) { arg_error(("file \"" fname "\" empty or missing")); } printf "%6d index file lines read\n", nlin > "/dev/stderr"; printf "%6d data files found\n", nfiles > "/dev/stderr"; return nfiles; } function mpf_check_num(fname,nlin,x) { if (! match(x, /^[0-9]*([0-9]|[.][0-9]*)$/)) { file_error(fname, nlin, ("invalid number \"" x "\"")); } x = x + 0.0; return x; } function mpf_check_date(fname,nlin,date) { if (! match(date, /^20[01][0-9]-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])$/)) { file_error(fname, nlin, ("invalid date = \"" date "\"")); } return date; } function mpf_dates_are_consecutive(odate,date, tst,otst) { # If {odate} is empty, returns 1. # If {odate} is not empty, check that {odate} and {date} are consecutive days. # Returns 1 if they are, 0 if not. if (odate != "") { # Convert date and time to UTC timestamp: otst = mpf_date_time_to_timestamp(odate,"00:00:00"); tst = mpf_date_time_to_timestamp(date,"00:00:00"); # Check if input dates are consecutive: return ((tst - otst) == 24*3600); } } function mpf_date_time_to_timestamp(date,time, tst) { # Converts date "{YYYY}-{mm}-{dd}" and time "{HH}:{MM}:{SS}" (UTC) to timestamp. # Be sure to have the environment variable "TZ" set to "UTC". if (ENVIRON["TZ"] != "UTC") { arg_error(("must set TZ to 'UTC'")); } tst = (date " " time " 0"); # Final 0 says "no daylight savings time". gsub(/[-:]/, " ", tst); # {mktime} wants spaces as separators. tst = mktime(tst); return tst; } function data_error(msg) { printf "%s:%s: ** %s\n", FILENAME, FNR, msg > "/dev/stderr"; printf " «%s»\n", $0 > "/dev/stderr"; abort = 1; exit(abort); } function arg_error(msg) { printf "** %s\n", msg > "/dev/stderr"; abort = 1; exit(abort); } function file_error(f,n,msg) { if (f == "") { arg_error(msg); } else { printf "%s:%d: ** %s\n", f, n, msg > "/dev/stderr"; abort = 1; exit(abort); } } function prog_error(msg) { printf "** PROG ERROR: %s\n", msg > "/dev/stderr"; abort = 1; exit(abort); }