/* Last edited on 2013-10-02 02:46:01 by stolfilocal */ /* Compares the speed of vector ops with unrolled loops vs. with {for} loops. */ #include #include #define N 4 #define UNROLL 1 typedef struct RN { double c[N]; } RN; double decomp_gen (int n, double *a, double *u, double *para, double *perp); double decomp_normal (RN *a, RN *u, RN *para, RN *perp); double decomp_unroll_2 (RN *a, RN *u, RN *para, RN *perp); double decomp_unroll_4 (RN *a, RN *u, RN *para, RN *perp); void init(RN *x, double v); void do_test(int n); int main(int argc, char **argv); double decomp_gen (int n, double *a, double *u, double *para, double *perp) { double sau = 0.0; double suu = 0.0; int i; for (i = 0; i < n; i++) { double ai = a[i]; double ui = u[i]; suu += ui*ui; sau += ai*ui; } if (sau == 0.0) { for (i = 0; i < n; i++) { para[i] = 0.0; perp[i] = a[i]; } return 0.0; } else { double c = sau / suu; for (i = 0; i < n; i++) { double pi = c * u[i]; para[i] = pi; perp[i] = a[i] - pi; } return c; } } double decomp_normal (RN *a, RN *u, RN *para, RN *perp) { return decomp_gen(N, &(a->c[0]), &(u->c[0]), &(para->c[0]), &(perp->c[0])); } double decomp_unroll_2 (RN *a, RN *u, RN *para, RN *perp) { double u0 = u->c[0]; double u1 = u->c[1]; double sau = a->c[0]*u0 + a->c[1]*u1; if (sau == 0.0) { para->c[0] = 0.0; para->c[1] = 0.0; perp->c[0] = a->c[0]; perp->c[1] = a->c[1]; return 0.0; } else { double suu = u0*u0 + u1*u1; double c = sau / suu; double p0 = c * u0; double p1 = c * u1; para->c[0] = p0; para->c[1] = p1; perp->c[0] = a->c[0] - p0; perp->c[1] = a->c[1] - p1; return c; } } double decomp_unroll_4 (RN *a, RN *u, RN *para, RN *perp) { double u0 = u->c[0]; double u1 = u->c[1]; double u2 = u->c[2]; double u3 = u->c[3]; double sau = a->c[0]*u0 + a->c[1]*u1 + a->c[2]*u2 + a->c[3]*u3; if (sau == 0.0) { para->c[0] = 0.0; para->c[1] = 0.0; para->c[2] = 0.0; para->c[3] = 0.0; perp->c[0] = a->c[0]; perp->c[1] = a->c[1]; perp->c[2] = a->c[2]; perp->c[3] = a->c[3]; return 0.0; } else { double suu = u0*u0 + u1*u1; double c = sau / suu; double p0 = c * u0; double p1 = c * u1; double p2 = c * u2; double p3 = c * u3; para->c[0] = p0; para->c[1] = p1; para->c[2] = p2; para->c[3] = p3; perp->c[0] = a->c[0] - p0; perp->c[1] = a->c[1] - p1; perp->c[2] = a->c[2] - p2; perp->c[3] = a->c[3] - p3; return c; } } void init(RN *x, double v) { int i; for(i = 0; i < N; i++) { x->c[i] = sin(M_PI * (v + ((double)i)/(4*(double)N))); } } #if (UNROLL == 0) #define decomp decomp_normal #define method "decomp_normal" #else #if (N == 2) #define decomp decomp_unroll_2 #define method "decomp_unroll_2" #elif (N == 4) #define decomp decomp_unroll_4 #define method "decomp_unroll_4" #elif (N == 6) #define decomp decomp_unroll_6 #define method "decomp_unroll_6" #endif #endif void do_test(int n) { int i; RN x, y, para, perp; printf("method: %s\n", method); fflush(stdout); init(&x, 1.0); init(&y, 2.0); for (i=0; i