#! /usr/bin/gawk -f
# Last edited on 2014-11-01 05:25:12 by stolfilocal

# Miscellaneous function for the merging of price files.
# To be loaded with "-f" into other gawk programs.

function mpf_initialize_index_tables()
  {
    # Initializes global tables that describe the files to be merged.
    # The tables are indexed {0..nfiles-1}:
    # Namely:
    split("", inidate_fi);  # Nominal start date of file.
    split("", findate_fi);  # Nominal end date of file.
    split("", extag_fi);    # Tag of exchange ("MGOX", "OKCO", etc).
    split("", crtag_fi);    # Currency symbol ("USD", "CNY", etc.).
    split("", exname_fi);   # Name of exchange.
    split("", rate_fi);     # Default units of {crtag_fi[kf]} worth 1 USD.
    split("", rlodate_fi);  # Initial date of range to consider.
    split("", rhidate_fi);  # Final date of range to consider.
    split("", color_fi);    # Color to use in plots.
  }
  
function mpf_read_index_file \
  ( fname, \
    inidate_fi,findate_fi,extag_fi,crtag_fi,exname_fi,rate_fi,rlodate_fi,rhidate_fi,color_fi, \
    nlin,lin,nfiles,fld,nfld,extag,crtag,exname,rate,rlodate,rhidate,color \
  )
  {
    # Reads the index file that describes the data files 
    # to be merged.  Each line of the index file must contain:
    #
    #   "{INIDATE} {FINDATE} {EXTAG} {CRTAG} {EXNAME} {RATE} {RLODATE} {RHIDATE} {COLOR}"
    #
    # This line describes the file whose nominal date span is {INIDATE .. FINDATE},
    # for the exchange whose tag is {EXTAG} (e.g. "MGOX"), with trade currency {CRTAG} (e.g. "CNY). 
    # The {EXNAME} (a string without blanks) is for documentation purposes only,
    # and the {RATE} may be used in price plots.  The file is to be clipped to the range 
    # {RLODATE .. RHIDATE}, inclusive both.  The {COLOR} may be used in plots.
    #
    # Those fields are returned in the arguments {inidate_fi,findate_fi,extag_fi}, etc.
    # which should be pre-allocated arrays, indexed {0..nfiles-1}:
    # Retuns the number of files {nfiles}.

    nlin = 0;   # Number of lines read.
    nfiles = 0; # Number of relevant lines.
    while((getline lin < fname) > 0) { 
      nlin++;
      # Remove tabs, inline comments, spurious blanks
      gsub(/[\011]/, " ", lin);
      gsub(/[\#].*$/, "", lin);
      gsub(/^[ ]+/, "", lin); 
      gsub(/[ ]+$/, "", lin); 
      gsub(/[ ][ ]+/, " ", lin); 
      if (lin != "")
        { nfld = split(lin, fld, " ");
          if (nfld != 9) 
            { file_error(fname, nlin, ("bad file index entry = \"" lin "\"")); }
          kf = nfiles;
          inidate_fi[kf] = mpf_check_date(fname,nlin,fld[1]);
          findate_fi[kf] = mpf_check_date(fname,nlin,fld[2]);
          extag_fi[kf] = fld[3];
          crtag_fi[kf] = fld[4];
          ex_name_fi[kf] = fld[5];
          rate_fi[kf] = mpf_check_num(fname,nlin,fld[6]);
          rlodate_fi[kf] = mpf_check_date(fname,nlin,fld[7]);
          rhidate_fi[kf] = mpf_check_date(fname,nlin,fld[8]);
          color_fi[kf] = fld[9];
          nfiles++;
        }
    }
    if ((ERRNO != "0") && (ERRNO != "")) { file_error(fname, nlin, ERRNO); }
    close (fname);
    if (nlin == 0) { arg_error(("file \"" fname "\" empty or missing")); }
    printf "%6d index file lines read\n", nlin > "/dev/stderr";
    printf "%6d data files found\n", nfiles > "/dev/stderr";
    return nfiles;
  }

function mpf_check_num(fname,nlin,x)
  { 
    if (! match(x, /^[0-9]*([0-9]|[.][0-9]*)$/))
      { file_error(fname, nlin, ("invalid number \"" x "\"")); }
    x = x + 0.0;
    return x;
  }
  
function mpf_check_date(fname,nlin,date)
  {
    if (! match(date, /^20[01][0-9]-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])$/))
      { file_error(fname, nlin, ("invalid date = \"" date "\"")); }
    return date;
  }

function mpf_dates_are_consecutive(odate,date,  tst,otst)
  { 
    # If {odate} is empty, returns 1.
    # If {odate} is not empty, check that {odate} and {date} are consecutive days.
    # Returns 1 if they are, 0 if not.
    
    if (odate != "")
      { # Convert date and time to UTC timestamp:
        otst = mpf_date_time_to_timestamp(odate,"00:00:00");
        tst = mpf_date_time_to_timestamp(date,"00:00:00");
        # Check if input dates are consecutive:
        return ((tst - otst) == 24*3600);
      }
  }

function mpf_date_time_to_timestamp(date,time,  tst)
  { # Converts date "{YYYY}-{mm}-{dd}" and time "{HH}:{MM}:{SS}" (UTC) to timestamp.
    # Be sure to have the environment variable "TZ" set to "UTC".
    if (ENVIRON["TZ"] != "UTC") { arg_error(("must set TZ to 'UTC'")); }
    tst = (date " " time " 0"); # Final 0 says "no daylight savings time".
    gsub(/[-:]/, " ", tst); # {mktime} wants spaces as separators.
    tst = mktime(tst);
    return tst;
  }
  
function data_error(msg)
  { printf "%s:%s: ** %s\n", FILENAME, FNR, msg > "/dev/stderr"; 
    printf "  «%s»\n", $0 > "/dev/stderr"; 
    abort = 1;
    exit(abort);
  } 
          
function arg_error(msg)
  { printf "** %s\n", msg > "/dev/stderr"; 
    abort = 1;
    exit(abort);
  } 

function file_error(f,n,msg)
  { 
    if (f == "")
      { arg_error(msg); }
    else
      { printf "%s:%d: ** %s\n", f, n, msg > "/dev/stderr";
        abort = 1;
        exit(abort);
      }
  }
          
function prog_error(msg)
  { printf "** PROG ERROR: %s\n", msg > "/dev/stderr"; 
    abort = 1;
    exit(abort);
  }