#! /usr/bin/gawk -f # Last edited on 2009-12-12 00:02:05 by stolfi # Options: "-v phase={PHASE}" "-v freq={FREQ}" -v cumweight={BOOL}" # where {PHASE} is 1 or 2 (which phase to fit) # and {FREQ} is 0 (no sesonal effect), 1 (annual), or 2 (semestral). # Reads the file with size of wikipedia in the format # "{TIME} {YEAR} {MONTH} {DAY} {SZ} {SU} {DZ} {DU}" # Outputs to {stderr} the coefficients {Ki}, {Ri}, {Ai}, and {Di} # of an exponential modulated by a exp-sinusoidal semestral factor. # If {cumweight} is nonzero, weight is proportional to time left to end of phase. # Must be called with "-f lib_date_time.gawk -f lib_functions.gawk" BEGIN{ abort = -1; debug = 1; if (phase == "") { phase = 1; } if ((phase != 1) && (phase != 2)) { arg_error(("bad phase [" phase "]")); } if (freq == "") { freq = 0; } if ((freq != 0) && (freq != 1) && (freq != 2)) { arg_error(("bad freq [" freq "]")); } if (cumweight == "") { cumweight = 0; } if ((cumweight != 0) && (cumweight != 1)) { arg_error(("bad cumweight [" cumweight "]")); } yz = 2001; # Year zero of Wikipedia Y = 365.25; # One year in days. pi = 3.1415926; # Euler's number. sper = 28; # Sampling period assumed in input file (days). # Phase 1 bph1 = 364; # Initial day for weight function (2002-01-02). eph1 = 1460; # Final day for weight function (2004-12-31). Sph1 = 0.00 # Reference day for exponential (2001-01-01). # Phase 2 bph2 = 2190; # Initial day for weight function (2007-01-01). eph2 = 5000; # Final day for weight function (far in the future). Sph2 = 1826; # Reference day for exponential (2006-01-01). # Selected phase: bph = (phase == 1 ? bph1 : bph2); # Initial day for weight function. eph = (phase == 1 ? eph1 : eph2); # Final day for weight function. Sph = (phase == 1 ? Sph1 : Sph2); # Reference day for exponential. Tph = (freq == 0 ? 0 : Y/freq); # Period of seasonal factor (days). nv = (freq == 0 ? 2 : 4); # Number of basis functions in the regression. wblur = Y/3; # Half-width of transition zone (days). ntt = sper-1; # Next expected sample time {tt} (days). split("", bas); # {bas[i]} will be the value of basis function {i} at current time {tc}. split("", bgz); # {bgz[i]} will hold the scalar product of {log(dz)} and basis function {i}. split("", bb); # {bb[i,j]} will hold the scalar product of basis functions {i} and {j}. date_time_init(); } function abs(x) { return (x + 0 < 0 ? 0.0 - x : 0.0 + x); } (abort >= 0) { exit abort; } //{ # Save line for error reporting: lin = $0; } /^ *([\#]|$)/ { next; } /^ *[0-9]/ { if (NF != 8) { data_error(("bad line format"), lin); } tt = 0+$1; yr = 0+$2; mo = 0+$3; da = 0+$4; sz = 0+$5; su = 0+$6; dz = 0+$7; du = 0+$8; check_entry(yz, tt, yr, mo, da, sz, su, dz, du); # Check for uniform sampling every 28 days: if ((tt % sper) != (sper-1)) { data_error(("sample time is not " sper-1 " mod " sper " [" tt "]"), lin); } if (tt != ntt) { data_error(("unexpected sample time [" ntt "] [" tt "]"), lin); } ntt += sper; # Compute the nominal time {tc} for {dz}: tc = tt - 0.5*sper; # Compute window weight factor {ww} of selected phase: ww = window_factor(tc, bph, eph, wblur); # Compute the reliability weight {rw} based on reliability flag of {dz}: rw = (du == 0 ? 0.2 : 1.0); # Compute the weight {cw} for cumulative or differential eSphasis: if (cumweight != 0) { te = eph + wblur + 1; cw = (tc >= te ? 0.0 : te - tc); } else { cw = 1.0; } # Compute the overall weight {dw}: dw = ww * rw * cw; # Convert {dz} to log scale: gz = log(dz + 1.0); # Evaluate the basis functions for time {tt}: bas[0] = 1; bas[1] = tc - Sph; if (freq != 0) { bas[2] = cos(2*pi*tc/Tph); bas[3] = sin(2*pi*tc/Tph); } if (debug != 0) { # Debug: for (i = 0; i < nv; i++) { printf " %16.6f", bas[i] > "/dev/stderr"; } printf " %16.6f", gz > "/dev/stderr"; printf " %16.6f\n", dw > "/dev/stderr"; } # Accumulate scalar products with basis functions: for (i = 0; i < nv; i++) { bgz[i] += dw * gz * bas[i]; } # Accumulate scalar products of basis against basis: for (i = 0; i < nv; i++) for (j = 0; j < nv; j++) { bb[i,j] += dw * bas[i] * bas[j]; } next; } // { data_error(("bad line format"), lin); } END { # Print system: printf "\n" > "/dev/stderr"; for (i = 0; i < nv; i++) { for (j = 0; j < nv; j++) { printf " %16.6f", bb[i,j] > "/dev/stderr"; } printf " %16.6f\n", bgz[i] > "/dev/stderr"; } # Solve the system {bb coef = bgz}: split("", coef); sys_solve(bb, bgz, coef); Kph = exp(coef[0]); Rph = coef[1]; printf " # Data for phase %d:\n", phase; printf " Sph%d = %.9f;\n", phase, Sph; printf " Kph%d = %.1f;\n", phase, Kph; printf " Rph%d = %.9f;\n", phase, Rph; if (freq != 0) { C = coef[2]; S = coef[3]; Aph = sqrt(C*C + S*S); Dph = atan2(S,C)/(2*pi); while (Dph < 0) { Dph += 1.0; } while (Dph >= 1.0) { Dph -= 1.0; } printf " Aph%d = %.4f;\n", phase, Aph; printf " Tph%d = %.4f;\n", phase, Tph; printf " Dph%d = %.3f;\n", phase, Dph; } fflush("/dev/stdout"); } function check_entry(yz,tt,yr,mo,da,sz,su,dz,du, xtt) { if ((tt < 0) || (tt > 36500)) { data_error(("bad time [" tt "]"), lin); } xtt = time_from_date(yz, yr, mo, da); if (tt != xtt) { data_error(("date error [" tt "] [" xtt "]"), lin); } if ((yr < yz) || (yr > 2099)) { data_error(("bad year [" yr "]"), lin); } if ((mo < 1) || (mo > 12)) { data_error(("bad month [" mo "]"), lin); } if ((da < 1) || (da > 31)) { data_error(("bad day [" da "]"), lin); } if ((sz < 0) || (sz > 999999999)) { data_error(("bad size [" sz "]"), lin); } if ((su != 0) && (su != 1)) { data_error(("bad size status [" su "]"), lin); } if ((dz < -10) || (dz > 999999999)) { data_error(("bad growth [" dz "]"), lin); } if ((du != 0) && (du != 1) && (du != 9)) { data_error(("bad growth status [" du "]"), lin); } } function sys_solve(M,b,x, i,j,k,p,z,C) { # Triangularize the system: for (i = 0; i < nv; i++) { # Select pivot row: p = i; for (k = i+1; k abs(M[p,i])) { p = k; } } # Swap rows: for (j = i; j < nv; j++) { z = M[i,j]; M[i,j] = M[p,j]; M[p,j] = z; } z = b[i]; b[i] = b[p]; b[p] = z; # Eliminate from following rows: for (k = i+1; k < nv; k++) { C = M[k,i]/M[i,i]; M[k,i] = 0; for (j = i+1; j < nv; j++) { M[k,j] = M[k,j] - C*M[i,j]; } b[k] = b[k] - C*b[i]; } } # Solve the triangular system: for (i = nv-1; i >= 0; i--) { C = b[i]; for (j = i+1; j < nv; j++) { C -= M[i,j]*x[j]; } x[i] = C/M[i,i]; } }