#! /usr/bin/gawk -f
# Last edited on 2019-10-27 18:59:52 by jstolfi

# Checks whether the name of a price/volume series file is consistent with 
# the range of dates within.

# The user must define the file name with "-v fname={FNAME}"
# where {FNAME} must be a string like
# "{TDLO}--{TDHI}-{EXCHANGE}-{CURRENCY}-{TIMESTEP}.txt"
# And {TDLO} and {TDHI} must be either "{yyyy}-{mm}-{dd}",
# if {TIMESTEP} is "01d" or multiple thereof, or
# "{yyyy}-{mm}-{dd}-{HHMM}" if {TIMESTEP} is "01h", "01m",
# or multiple thereof.

BEGIN {
  abort = -1
  
  if (fname == "") { printf "** must define {fname}\n" > "/dev/stderr"; abort = 1; exit(1) }
  gsub(/[-][A-Z][A-Z0-9][A-Z0-9][A-Z0-9].*$/, "", fname)
  gsub(/[-][-]/, " ", fname)
  n = split(fname, dname)
  if (n == 1) {
    printf "** obsolete {fname} format \"%s\" \n", fname > "/dev/stderr"; abort = 1; exit(1)
    dname[2] = dname[1]
  } else if (n != 2) { 
    printf "** invalid {fname} format \"%s\" \n", fname > "/dev/stderr"; abort = 1; exit(1)
  }
  
  dini = ""; dfin = "" # Date range from data lines
}

(abort >= 0) { exit(abort) }

/^[ ]*([\#]|$)/ { 
  # Blank or comment line
  next;
}

/^Timestamp/ { 
  # Header line
  next;
} 

/^20(09|[12][0-9])[-](0[1-9]|1[0-2])[-]([0-2][0-9]|3[01])[ ]/ { 
  if (NF != 16) { 
    printf "** invalid field count = %d \"%s\" \n", NF, $0 > "/dev/stderr"; abort = 1; exit(1)
  }
  dt = ($1 "-" $2); 
  gsub(/[:]/, "", dt);
  if (dini == "") { dini = dt; }
  dfin = dt
  next;
}

// { 
  printf "** invalid data line format \"%s\" \n", $0 > "/dev/stderr"; abort = 1; exit(1)
}

END {
  if (abort >=0) { exit(abort); }
  check_date("ini", dname[1], dini)
  check_date("fin", dname[2], dfin)
}

function check_date(tag,dn,dd, m,rx) {
  # Compares dates {dn} from file name and {dd} from contents
  # and complains if they don't match:
  m = length(dn)
  if (m > length(dd)) { 
    printf "** %s date length mismatch \"%s\" \"%s\"\n", tag, dn, dd > "/dev/stderr";
    abort = 1;
  }
  if (m < length(dd)) {
    rx = substr(dd,m+1)
    gsub(/^[- ]/, "", rx)
    if (rx != substr("000000", 1, length(rx))) {
      printf "** %s date info truncated \"%s\" \"%s\"\n", tag, dn, dd > "/dev/stderr";
      abort = 1;
    }
    dd = substr(dd,1,m)
  }
  if (dn != dd) {
    printf "** %s date mismatch \"%s\" \"%s\"\n", tag, dn, dd > "/dev/stderr";
    abort = 1;
  }
}