#! /usr/bin/gawk -f
# Last edited on 2015-02-24 18:13:01 by stolfilocal

# Reads a data files resulting from the join of two price files
# by the date and time fields.  Checks whether the differecnes
# are small enough.
# 
# The user must define (with {export}) the environment variable {TZ="UTC"},
# load (with "-f") the library "useful_functions.gawk"
#

BEGIN \
  { 
    if (ENVIRON["TZ"] != "UTC") { arg_error(("must set TZ to 'UTC'")); }
    
    FS="|";
    
    nints = 0;            # Number of data lines.
    
    # Typical precision of input values:
    ulp_vbt = 0.01;   # Unit in last place for {vbt}
    ulp_vcr = 0.01;   # Unit in last place for {vcr}
    ulp_pav = 0.01;   # Unit in last place for {pav}.
    ulp_phl = 0.01;   # Unit in last place for {pop,phi,plo,pcl}.
    
    # Field names:
    split("", fld_name);
    fld_name[1] = "datetime";
    fld_name[2] = "opening price";
    fld_name[3] = "high price";   
    fld_name[4] = "low price";    
    fld_name[5] = "closing price";
    fld_name[6] = "BTC volume"
    fld_name[7] = "currency volume"
    fld_name[8] = "average price";
    
    # Maximum error in each field:
    split("", emax);
    for (i = 1; i < 15; i++) { emax[i] = 0; }
    
    # Datetime range in file:
    dt_ini = "???";
    dt_fin = "???";
    
    odt = ""; # Previous datetime.
  }

// \
  { # Remove tabs:
    gsub(/[\011]/, " ", $0);
  }
  
# Data lines:
/^20/ \
  { 
    if (NF != 15) { data_error(("wrong field count = " NF "")); }
    # Get the input fields:
    dt = usf_check_datetime(FILENAME,FNR,$1); # Datetime.
    
    # Consistency checks:
    if (dt <= odt) { data_error(("repeated datetime = \"" odt "\" \"" dt "\"")); }
    odt = dt;

    # Get the original average price {pav} and volumes {vbt,vcr}:
    pav = $8 + 0;
    vbt = $6 + 0;
    vcr = $7 + 0;
    
    if ((pav == 0) || ((vbt == 0) && (vcr == 0)))
      { # New fields must be zero:
        for (i = 9; i <= 15; i++) { check_null_field(i); }
      }
    else
      { 
        # New fields must be non-zero:
        for (i = 9; i <= 15; i++) { check_non_null_field(i); }
      
        # Check prices {pop,phi,plo,pcl}:
        tol_phl = 0.50001 * ulp_phl;
        emax[2] = max_error(emax[2], compare_fields( 2,  9, tol_phl)); # Opening price
        emax[3] = max_error(emax[3], compare_fields( 3, 10, tol_phl)); # High price.   
        emax[4] = max_error(emax[4], compare_fields( 4, 11, tol_phl)); # Low price.    
        emax[5] = max_error(emax[5], compare_fields( 5, 12, tol_phl)); # Closing price.
        
        # Check that average price:
        tol_pav = 0.50001 * 2*ulp_pav;
        emax[8] = max_error(emax[8], compare_fields( 8, 15, tol_pav)); # Average price.

        # Check the currency volume:
        tol_vcr = 0.50001 * 2*ulp_vcr;
        emax[7] = max_error(emax[7], compare_fields( 7, 14, tol_vcr)); # Currency volume.

        # Check the VBT volume, assumed computed by {vcr/pav}:
        vloc = (vcr <= tol_vcr  ? 0.0 : (vcr - tol_vcr)/(pav + tol_pav)); # Min volume assuming worst rounding.
        vhic = (pav <= tol_pav ? 1e10 : (vcr + tol_vcr)/(pav - tol_pav)); # Max volume assuming worst rounding.
        tol_vbt = max_error(0.50001 * 2*ulp_vbt, 0.50001*(vhic - vloc));
        emax[6] = max_error(emax[6], compare_fields( 6, 13, tol_vbt)); # BTC volume.
      }
      
    # Update the date/time range:
    if (nints == 0) { dt_ini = dt; }
    dt_fin = dt;

    nints++;
    next;
  }
    
// \
  { 
    data_error(("invalid line format"));
  }

END \
  { 
    printf "found %d intervals, from %s to %s\n", nints, dt_ini, dt_fin > "/dev/stderr";
    printf "max errors relative to tolerance:\n" > "/dev/stderr";
    for (i = 2; i <= 8; i++)
      { printf "  %10.5f %s\n", emax[i], fld_name[i] > "/dev/stderr"; }
    exit(0);
  } 
   
function check_null_field(kf,   val,name)
  {  
    name = fld_name[kf];
    val = usf_check_num(FILENAME,FNR,$(kf));
    if (val != 0.0) { data_error((name " = \"" val "\" should be zero")); }
  }
   
function check_non_null_field(kf,   val,name)
  {  
    name = fld_name[kf];
    val = usf_check_num(FILENAME,FNR,$(kf));
    if (val == 0.0) { data_error((name " = \"" val "\" should be non-zero")); }
  }

function compare_fields(kfa,kfb,tol,  va,vb,d,name)
  { 
    # Compares fields with indices {kfa,kfb} with tolerance {tol}.
    # Returns the absolute difference.
    name = fld_name[kfa];
    va = usf_check_num(FILENAME,FNR,$(kfa));
    vb = usf_check_num(FILENAME,FNR,$(kfb));
    if ((va == 0) && (vb == 0)) 
      { return 0; }
    else
      { d = va - vb;
        if (d < 0) { d = -d; }
        if (d > tol)
          { printf "%s:%s: !!", FILENAME, FNR > "/dev/stderr";
            printf " %s fields $%d = %.5f and $%d = %.5f", name, kfa, va, kfb, vb > "/dev/stderr";
            printf " differ by %.5f, tol = %.5f\n", d, tol > "/dev/stderr";
          }
        return d/tol;
      }
  }
          
function max_error(x,y)
  { return (x+0 > y+0 ? x : y); }