[37] | 1 | /* Test the time for various math operations. |
---|
| 2 | * This should be compiled as a standalone program, NOT a mex file: |
---|
| 3 | * cc -O3 -o test_flops test_flops.c -lm |
---|
| 4 | * These options don't seem to help: |
---|
| 5 | * -ffast-math -funroll-loops -fprefetch-loop-arrays |
---|
| 6 | * -march=pentium4 -mfpmath=sse -msse -msse2 -malign-double |
---|
| 7 | * |
---|
| 8 | * On Pentium 4, VC gives better results: |
---|
| 9 | * cl /O2 /G7 /Oi- test_flops.c |
---|
| 10 | * /Oi- disables intrinsic functions, making exp faster but sqrt slower. |
---|
| 11 | * These options don't seem to help: /arch:SSE |
---|
| 12 | * |
---|
| 13 | * Results do not seem to be reliable within mex. |
---|
| 14 | */ |
---|
| 15 | /* source code at: |
---|
| 16 | http://www.opencores.org/cvsweb.shtml/or1k/newlib/newlib/libm/mathfp/s_exp.c |
---|
| 17 | according to source: |
---|
| 18 | exp 20 flops |
---|
| 19 | log 22 flops |
---|
| 20 | pow 43 (naive alg) |
---|
| 21 | */ |
---|
| 22 | #define STANDALONE 1 |
---|
| 23 | |
---|
| 24 | #if STANDALONE |
---|
| 25 | #include <stdio.h> |
---|
| 26 | #else |
---|
| 27 | #include "mex.h" |
---|
| 28 | #endif |
---|
| 29 | #include <math.h> |
---|
| 30 | #include <time.h> |
---|
| 31 | |
---|
| 32 | #define M 10000 |
---|
| 33 | #define N 10000 |
---|
| 34 | |
---|
| 35 | #if STANDALONE |
---|
| 36 | int main() |
---|
| 37 | #else |
---|
| 38 | void mexFunction(int nlhs, mxArray *plhs[], |
---|
| 39 | int nrhs, const mxArray *prhs[]) |
---|
| 40 | #endif |
---|
| 41 | { |
---|
| 42 | int i,j; |
---|
| 43 | clock_t t,t1,t2; |
---|
| 44 | clock_t t_loop,t_mul; |
---|
| 45 | double a[N],b[N],c[N]; |
---|
| 46 | |
---|
| 47 | for(i=0;i<N;i++) { b[i]=i; c[i] = N-i; } |
---|
| 48 | #if 0 |
---|
| 49 | t=clock(); for(j=0;j<M;j++) for(i=0;i<N;i++) { a[i] = b[i]*c[i]; } t1=clock()-t; |
---|
| 50 | t=clock(); for(j=0;j<M;j++) for(i=0;i<N;i++) { a[i] = b[i]*c[i]; a[i] = b[i]*c[i]; } t2=clock()-t; |
---|
| 51 | t_loop = 2*t1 - t2; |
---|
| 52 | printf("time for loop: \t%d\n", t_loop); |
---|
| 53 | #else |
---|
| 54 | t_loop = 0; |
---|
| 55 | #endif |
---|
| 56 | t=clock(); for(j=0;j<M;j++) for(i=0;i<N;i++) a[i] = b[i]*c[i]; t=clock()-t-t_loop; |
---|
| 57 | printf("time for multiply: \t%d\n", t); |
---|
| 58 | t_mul = t; |
---|
| 59 | t=clock(); for(j=0;j<M;j++) for(i=0;i<N;i++) a[i] = b[i]+c[i]; t=clock()-t-t_loop; |
---|
| 60 | printf("time for add: \t%d\n", t); |
---|
| 61 | t=clock(); for(j=0;j<M;j++) for(i=0;i<N;i++) a[i] = (b[i]<c[i]); t=clock()-t-t_loop; |
---|
| 62 | printf("time for <: \t%d\tflops=%g\n", t, (double)t/t_mul); |
---|
| 63 | t=clock(); for(j=0;j<M;j++) for(i=0;i<N;i++) a[i] = (b[i]==c[i]); t=clock()-t-t_loop; |
---|
| 64 | printf("time for ==: \t%d\tflops=%g\n", t, (double)t/t_mul); |
---|
| 65 | t=clock(); for(j=0;j<M;j++) for(i=0;i<N;i++) a[i] = b[i]/c[i]; t=clock()-t-t_loop; |
---|
| 66 | printf("time for /: \t%d\tflops=%g\n", t, (double)t/t_mul); |
---|
| 67 | t=clock(); for(j=0;j<M;j++) for(i=0;i<N;i++) a[i] = sqrt(b[i]); t=clock()-t-t_loop; |
---|
| 68 | printf("time for sqrt: \t%d\tflops=%g\n", t, (double)t/t_mul); |
---|
| 69 | t=clock(); for(j=0;j<M;j++) for(i=0;i<N;i++) a[i] = exp(b[i]); t=clock()-t-t_loop; |
---|
| 70 | printf("time for exp: \t%d\tflops=%g\n", t, (double)t/t_mul); |
---|
| 71 | t=clock(); for(j=0;j<M;j++) for(i=0;i<N;i++) a[i] = log(b[i]); t=clock()-t-t_loop; |
---|
| 72 | printf("time for log: \t%d\tflops=%g\n", t, (double)t/t_mul); |
---|
| 73 | t=clock(); for(j=0;j<M;j++) for(i=0;i<N;i++) a[i] = pow(b[i],c[i]); t=clock()-t-t_loop; |
---|
| 74 | printf("time for pow: \t%d\tflops=%g\n", t, (double)t/t_mul); |
---|
| 75 | } |
---|