/* See {neuromat_poly.h}. */ /* Last edited on 2014-03-26 16:11:37 by stolfilocal */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include void neuromat_poly_compute_lsq_matrix(int n, double x[], double w[], int g, double A[]); /* Assumes that {A} is a {g+1} by {g+1} matrix inearzed by rows. Fills it with the linear system's matrix for the weighted least squares polynomial approximation problem at arguments {x[0..n-1]} with weight {w[0..n-1]}. Namely, sets {A[i*(g+1)+j]} to {SUM{ w[k]*x[k]^i*x[k]^j : k \in 0..n-1 }} for all {i,j} in {0..g}. If {x} is null, assumes {x[k] = 2*(k + 1/2)/n - 1} for {k} in {0..n-1}. If {w} is null, assumes unit weights for all arguments.*/ void neuromat_poly_compute_lsq_vector(int n, double x[], double y[], double w[], int g, double b[]); /* Assumes that {b} is a vector with {g+1} elements. Fills it with the linear system's right-hand-side vector for the weighted least squares polynomial approximation problem at arguments {x[0..n-1]} of values {y[0..n-1]} with weight {w[0..n-1]}. Namely, sets {b[i]} to {SUM{ w[k]*x[k]^i*y[k] : k \in 0..n-1 }} for all {i} in {0..g}. If {x} is null, assumes {x[k] = 2*(k + 1/2)/n - 1} for {k} in {0..n-1}. If {w} is null, assumes unit weights for all arguments.*/ double neuromat_poly_bayes(double v, double pri_gud, double avg_gud, double dev_gud, double avg_bad, double dev_bad); /* Uses Bayes's formula to compute the probability of {v} being a good sample, assuming good and bad samples have normal distributions with averages {avg_gud,avg_bad} and deviations {dev_gud,dev_bad}, respectively, and that, a priori, the sample is good with probability {pri_gud}. */ void neuromat_poly_compute_stats ( int n, double v[], double w[], double p[], bool_t good, double *avgP, double *devP, double *priP ); /* Estimates the average {*avgP} and deviation {*devP} of samples {v[0..n-1]}. If {good} is TRUE, estimates the parameters of the `good' samples (inliers), if {good} is FALSE estimates those of the `bad' ones (outliers). Also returns in {*priP} (if not null) the estimated `a priori' probability of a sample being good or bad, respectively. Assumes that {w[k]} is the relevance weight of sample {v[k]}, and {p[k]} is the current probability of that sample being a good sample. If {w} is null, assumes {w[k]==1.0} for all {k}. If {p} is null, assumes {p[k]==0.5} for all {k}. */ double neuromat_poly_eval(int g, double P[], double x) { demand(g >= 0, "invalid power"); double y = P[g]; int i; for (i = g-1; i >= 0; i--) { y = P[i] + x*y; } return y; } void neuromat_poly_eval_multi(int g, double P[], int n, double x[], double s[]) { demand(g >= 0, "invalid power"); int k; for (k = 0; k < n; k++) { double xk = (x != NULL ? x[k] : 2*(k + 0.5)/n - 1); s[k] = neuromat_poly_eval(g, P, xk); } } void neuromat_poly_compute_lsq_matrix(int n, double x[], double w[], int g, double A[]) { demand(g >= 0, "invalid power"); int g1 = g + 1; rmxn_zero(g1, g1, A); /* Fill the lower triangular half of {A}: */ double *p = notnull(malloc(g1*sizeof(double)), "no mem"); /* Powers of each {x}. */ int i, j, k; for (k = 0; k < n; k++) { double xk = (x != NULL ? x[k] : 2*(k + 0.5)/n - 1); double wk = (w != NULL ? w[k] : 1); for (i = 0; i <= g; i++) { p[i] = (i == 0 ? 1.0 : p[i-1]*xk); for (j = 0; j <= i; j++) { double pipj = p[i]*p[j]; A[i*g1 + j] += wk*pipj; } } } free(p); /* Replicate the lower half of {A} into the upper half: */ for (i = 1; i <= g; i++) { for (j = 0; j < i; j++) { A[j*g1 + i] = A[i*g1 + j]; } } } void neuromat_poly_compute_lsq_vector(int n, double x[], double y[], double w[], int g, double b[]) { demand(g >= 0, "invalid power"); int g1 = g + 1; rn_zero(g1, b); int i, k; for (k = 0; k < n; k++) { double xk = (x != NULL ? x[k] : 2*(k + 0.5)/n - 1); double yk = y[k]; double wk = (w != NULL ? w[k] : 1); double pi = 1.0; /* Power {xk^i}. */ for (i = 0; i <= g; i++) { b[i] += wk*pi*yk; pi = pi*xk; } } } void neuromat_poly_shift(int g, double P[], double a, double Q[]) { demand(g >= 0, "invalid power"); int k; for (k = g; k >= 0; k--) { /* Assume that {Q'=Q[k+1..g]} is the polynomial {P'=P[k+1..g]} shifted by {a}, that is {Q'(x) = P'(x-a)} for all {x}. Now set {Q[k..g]} to the coeffs of {Q''} that is {P''=P[k..g]} shifted by {a}. That is, { Q''(x) = P''(x-a) = P[k]+(x-a)*P'(x-a) = P[k]+(x-a)*Q'(x) } */ Q[k] = P[k]; int i; for (i = k; i < g; i++) { Q[i] = Q[i] - a*Q[i+1]; } } } void neuromat_poly_stretch(int g, double P[], double h, double Q[]) { demand(g >= 0, "invalid power"); double hk = 1.0; int k; for (k = 1; k <= g; k++) { hk *= h; /* Value is {h^k}. */ Q[k] = P[k]/hk; } } void neuromat_poly_bezier(int g, int i, double a, double b, double P[]) { demand((0 <= i) && (i <= g), "invalid Bezier exponent or index"); double h = b - a; double ah = a/h, bh = b/h; P[0] = (double)comb(g,i); /* Hopefully there is no overflow or rounding. */ /* Multiply {P[0..0]} by {((x-a)/h)^i} yielding {P[0..i]}: */ int k, r; for (k = 0; k < i; k++) { /* Multiply {P[0..k]} by {(x-a)/h} yielding {P[0..k+1]}: */ P[k+1] = P[k]/h; for (r = k; r > 0; r--) { P[r] = P[r-1]/h - P[r]*ah; } P[0] = -P[0]*ah; } /* Multiply {P[0..i]} by {((b-x)/h)^(g-i)} yielding {P[0..g]}: */ for (k = i; k < g; k++) { /* Multiply {P[0..k]} by {(b-x)/h} yielding {P[0..k+1]}: */ P[k+1] = -P[k]/h; for (r = k; r > 0; r--) { P[r] = -P[r-1]/h + P[r]*bh; } P[0] = P[0]*bh; } } void neuromat_poly_fit(int n, double x[], double y[], double w[], int g, double P[]) { demand(g >= 0, "invalid power"); demand(n > g, "too few data points"); int g1 = g + 1; /* Number of unnowns (coefficients). */ double *A = notnull(malloc(g1*g1*sizeof(double)), "no mem"); /* Moment matrix. */ double *b = notnull(malloc(g1*sizeof(double)), "no mem"); /* Right-hand side. */ neuromat_poly_compute_lsq_matrix(n, x, w, g, A); rmxn_inv_full(g1, A, A); neuromat_poly_compute_lsq_vector(n, x, y, w, g, b); rmxn_map_col(g1, g1, A, b, P); free(A); free(b); } void neuromat_poly_fit_robust ( int n, double x[], double y[], double w[], int maxiter, int g, double P[], neuromat_poly_report_proc_t *report ) { /* bool_t debug = (report != NULL); */ bool_t debug = TRUE; demand(g >= 0, "invalid power"); demand(n > g, "too few data points"); int g1 = g+1; /* Number of unnowns (coefficients). */ double *A = notnull(malloc(g1*g1*sizeof(double)), "no mem"); /* Moment matrix. */ double *b = notnull(malloc(g1*sizeof(double)), "no mem"); /* Right-hand side. */ /* The matrix does not change: */ neuromat_poly_compute_lsq_matrix(n, x, w, g, A); rmxn_inv_full(g1, A, A); /* Solve first without correction: */ neuromat_poly_compute_lsq_vector(n, x, y, w, g, b); rmxn_map_col(g1, g1, A, b, P); if (report != NULL) { report(0, P, y); } if (maxiter > 0) { /* Outlier suppression method: */ double *ya = notnull(malloc(n*sizeof(double)), "no mem"); /* Current approximation. */ double *yc = notnull(malloc(n*sizeof(double)), "no mem"); /* Deviations or fake values. */ double *p = notnull(malloc(n*sizeof(double)), "no mem"); /* Probability of each datum to be inlier. */ /* Assumed parameters of inlier residuals and outlier values: */ double avg_gud, avg_bad; /* Average. */ double dev_gud, dev_bad; /* Deviation. */ double pri_gud, pri_bad; /* A priori probability of being inlier or outlier. */ int iter; rn_all(n, 0.5, p); /* A priori, inliers and outliers are equally likely: */ for (iter = 1; iter <= maxiter; iter++) { if (debug) { fprintf(stderr, " iteration %d\n", iter); } /* Set {ya[0..n-1]} to current approximation: */ neuromat_poly_eval_multi(g, P, n, x, ya); /* Recompute parameters of inlier deviations: */ rn_sub(n, y, ya, yc); /* Temporary: residuals of current spproximation. */ neuromat_poly_compute_stats(n, yc, w, p, TRUE, &avg_gud, &dev_gud, &pri_gud); if (debug) { fprintf(stderr, " gud: avg = %+9.4f dev = %9.4f pri = %9.4f\n", avg_gud, dev_gud, pri_gud); } /* Recompute parameters of outlier values: */ neuromat_poly_compute_stats(n, y, w, p, FALSE, &avg_bad, &dev_bad, &pri_bad); if (debug) { fprintf(stderr, " bad: avg = %+9.4f dev = %9.4f pri = %9.4f\n", avg_bad, dev_bad, pri_bad); } assert(fabs(pri_bad + pri_gud - 1.0) < 0.0001); /* Widen the inlier deviation distribution to account for the shrink effect: */ double beta = 1.1; /* !!! Figure out the right factor !!! */ dev_gud = beta*dev_gud + 1.0e-10*dev_bad + 1.0e-200; if (debug) { fprintf(stderr, " gud: avg = %+9.4f dev = %9.4f pri = %9.4f\n", avg_gud, dev_gud, pri_gud); } /* Make sure that the outlier distrib is broader than the inlier one: */ double alpha = 2.0; /* Difference factor. */ if (dev_bad < dev_gud*alpha) { dev_bad = dev_gud*alpha; } if (debug) { fprintf(stderr, " bad: avg = %+9.4f dev = %9.4f pri = %9.4f\n", avg_bad, dev_bad, pri_bad); } /* Recompute data point inlier/outlier probabilities and adjusted data {yc[0..n-1]}: */ int k; for (k = 0; k < n; k++) { /* Decide the probability {p[k]} of each sample {y[k]} being an inlier: */ double pk = neuromat_poly_bayes(y[k], pri_gud, ya[k] + avg_gud, dev_gud, avg_bad, dev_bad); p[k] = pk; /* Set {yc[k]} to the expected value of the inlier part of {y[k]}: */ yc[k] = pk*y[k] + (1 - pk)*ya[k]; } /* Recompute coeffs using {yc[0..n-1]} instead of {y[0..n-1]}: */ neuromat_poly_compute_lsq_vector(n, x, yc, w, g, b); rmxn_map_col(g1, g1, A, b, P); if (report != NULL) { report(iter, P, yc); } } free(ya); free(yc); free(p); } } void neuromat_poly_compute_stats ( int n, double v[], double w[], double p[], bool_t good, double *avgP, double *devP, double *priP ) { /* Compute the average and overall probability: */ double sum_wpv = 0; double sum_wp = 0; double sum_w = 0; int k; for (k = 0; k < n; k++) { double vk = v[k]; double wk = (w == NULL ? 1.0 : w[k]); double pk = (p == NULL ? 0.5 : (good ? p[k] : 1.0 - p[k])); sum_wpv += wk*pk*vk; sum_wp += wk*pk; sum_w += wk; } double avg = sum_wpv/(sum_wp + 1.0e-300); double pri = sum_wp/(sum_w + 1.0e-300); assert(! isnan(avg)); /* Compute the standard deviation: */ double sum_wpd2 = 0; for (k = 0; k < n; k++) { double dk = v[k] - avg; double wk = (w == NULL ? 1.0 : w[k]); double pk = (p == NULL ? 0.5 : (good ? p[k] : 1.0 - p[k])); sum_wpd2 += wk*pk*dk*dk; } double var = sum_wpd2/(sum_wp + 1.0e-300); double dev = sqrt(var); /* Should compensate for bias... */ /* Return: */ assert(! isnan(dev)); assert(! isnan(pri)); (*avgP) = avg; (*devP) = dev; if (priP != NULL) { (*priP) = pri; } } double neuromat_poly_bayes(double v, double pri_gud, double avg_gud, double dev_gud, double avg_bad, double dev_bad) { double dk_gud = (v - avg_gud)/dev_gud; double dk_bad = (v - avg_bad)/dev_bad; if (dk_gud > 6.0) { return 0.0; } else if (dk_bad > 6.0) { return 1.0; } else { double pri_bad = 1 - pri_gud; double P_gud = exp(-dk_gud*dk_gud/2)/dev_gud; /* Prob of sample times {sqrt(2*PI)}, assuming good. */ double P_bad = exp(-dk_bad*dk_bad/2)/dev_bad; /* Prob of sample times {sqrt(2*PI)}, assuming bad. */ double PP_gud = pri_gud*P_gud; double PP_bad = pri_bad*P_bad; return PP_gud/(PP_gud + PP_bad); } }