#! /usr/bin/gawk -f # Last edited on 2021-08-08 14:31:20 by stolfi # Reads a file with one pair "{X[i]} {Y[i]}" per line. Writes the means {XAVG} and {YAVG}, # the two orthogonal unit vectors {u,v} that are the main axes of that cloud of points, # and the corresponding deviations {ru,rv}. # # Also prints the affine functions {Y = A X + B} # and {X = C Y + D} corresponding to those unit vectors. Note that these are not # quite the linear regression coeffs because the {X[i]} are assumed to be # error-tainted too. BEGIN{ abort = -1; n = 0; split("", x); split("", y); verbose = 0; } (abort >= 0) { exit abort; } # Remove funny blanks: //{ gsub(/[\011\015\014]/, " ", $0); } # Remove comments: //{ gsub(/[#].*$/, "", $0); } # Discard blanks and comments: /^[ ]*$/ { next; } # Accumulate data lines: /^[ ]*[0-9]+[ ]+[0-9]+[ ]*$/ { x[n] = $1; y[n] = $2; if (verbose) { printf "%f %f\n", x[n], y[n] > "/dev/stderr"; } n++; next; } # Invalid lines: // { data_error("bad line format"); } # Perform fitting: END { if (abort >= 0) { exit abort; } # Default values: ax = 0; ay = 0; # Means. ru = 1; ux = 1; uy = 0; # Largest eigenvalue and its eigenvector. rv = 1; vx = 0; vy = 1; # Smallest eigenvalue and its eigenvector. ut = 0; # Angle of largest eigenvector. if (n > 0) { # Compute means {ax,ay}: sx = 0; sy = 0; for (k = 0; k < n; k++) { sx += x[k]; sy += y[k]; } if (verbose) { printf "sx = %f sy = %f\n", sx, sy > "/dev/stderr"; } ax = sx/n; ay = sy/n; # Compute moments {sxx, sxy,syy}: sxx = 0; sxy = 0; syy = 0; for (k = 0; k < n; k++) { dx = x[k]-ax; dy = y[k]-ay; sxx += dx*dx; sxy += dx*dy; syy += dy*dy; } sxx /= n; sxy /= n; syy /= n; if (verbose) { printf "sxx = %f sxy = %f syy = %f\n", sxx, sxy, syy > "/dev/stderr"; } # Compute eigenvalues {ru,rv} (variances along principal directions): tau = sxx + syy; del = sxx*syy - sxy*sxy; if (del > 0) { D = tau*tau - 4*del; if (D < 0) { D = 0; } sig = (tau > 0 ? +1 : -1)*sqrt(D); } else { sig = tau; } if (verbose) { printf "tau = %f del = %f sig = %f\n", tau, del, sig > "/dev/stderr"; } if (sig == 0) { # Equal eigenvalues: ru = tau/2; rv = tau/2; } else { ru = (tau + sig)/2; rv = (tau - sig)/2; # Find eigenvector {ux,uy} of largest eigenvalue: mxx = sxx - rv; myy = syy - rv; if (verbose) { printf "mxx = %f myy = %f\n", mxx, myy > "/dev/stderr"; } if ((mxx + myy)*sxy >= 0) { wx = mxx + sxy; wy = sxy + myy; } else { wx = mxx - sxy; wy = sxy - myy; } wd = sqrt(wx*wx + wy*wy); if (wd > 0) { ux = wx/wd; uy = wy/wd; # Ensure the angle is in {(-pi/2 _ +pi/2]}: if ((ux < 0) || ((ux == 0) && (uy < 0))) { ux = -ux; uy = -uy; } ang = atan2(uy, ux); vx = -uy; vy = ux; } } } printf "avgx = %f\n", ax; printf "avgy = %f\n", ay; printf "u = ( %f %f ) r = %f\n", ux, uy, ru; printf "v = ( %f %f ) r = %f\n", vx, vy, rv; printf "ang = %f radians = %f degrees\n", ang, ang*180/3.14159265358979323844; print_func(ax, ay, ux, uy, "X", "Y"); print_func(ay, ax, uy, ux, "Y", "X"); } function print_func(as,at,us,ut,S,T, alf,bet) { # Prints the equation for variable {S} as function of variable {R}. if (us < 0) { us = -us; ut = -ut; } if (us <= 1.0e-6) { printf "%s largely independent of %s\n", T, S; } else { alf = ut/us; bet = at - alf*as; printf "%s = %f * %s %+f\n", T, alf, S, bet; } } function data_error(msg) { printf "%s:%d: %s\n", FILENAME, FNR, msg > "/dev/stderr"; printf " [%s]\n", $0 > "/dev/stderr"; abort = 1; exit abort; }