1 | /* Test the time for various math operations. |
---|
2 | * This should be compiled as a standalone program, NOT a mex file: |
---|
3 | * cc -O3 -o test_flops test_flops.c -lm |
---|
4 | * These options don't seem to help: |
---|
5 | * -ffast-math -funroll-loops -fprefetch-loop-arrays |
---|
6 | * -march=pentium4 -mfpmath=sse -msse -msse2 -malign-double |
---|
7 | * |
---|
8 | * On Pentium 4, VC gives better results: |
---|
9 | * cl /O2 /G7 /Oi- test_flops.c |
---|
10 | * /Oi- disables intrinsic functions, making exp faster but sqrt slower. |
---|
11 | * These options don't seem to help: /arch:SSE |
---|
12 | * |
---|
13 | * Results do not seem to be reliable within mex. |
---|
14 | */ |
---|
15 | /* source code at: |
---|
16 | http://www.opencores.org/cvsweb.shtml/or1k/newlib/newlib/libm/mathfp/s_exp.c |
---|
17 | according to source: |
---|
18 | exp 20 flops |
---|
19 | log 22 flops |
---|
20 | pow 43 (naive alg) |
---|
21 | */ |
---|
22 | #define STANDALONE 1 |
---|
23 | |
---|
24 | #if STANDALONE |
---|
25 | #include <stdio.h> |
---|
26 | #else |
---|
27 | #include "mex.h" |
---|
28 | #endif |
---|
29 | #include <math.h> |
---|
30 | #include <time.h> |
---|
31 | |
---|
32 | #define M 10000 |
---|
33 | #define N 10000 |
---|
34 | |
---|
35 | #if STANDALONE |
---|
36 | int main() |
---|
37 | #else |
---|
38 | void mexFunction(int nlhs, mxArray *plhs[], |
---|
39 | int nrhs, const mxArray *prhs[]) |
---|
40 | #endif |
---|
41 | { |
---|
42 | int i,j; |
---|
43 | clock_t t,t1,t2; |
---|
44 | clock_t t_loop,t_mul; |
---|
45 | double a[N],b[N],c[N]; |
---|
46 | |
---|
47 | for(i=0;i<N;i++) { b[i]=i; c[i] = N-i; } |
---|
48 | #if 0 |
---|
49 | t=clock(); for(j=0;j<M;j++) for(i=0;i<N;i++) { a[i] = b[i]*c[i]; } t1=clock()-t; |
---|
50 | t=clock(); for(j=0;j<M;j++) for(i=0;i<N;i++) { a[i] = b[i]*c[i]; a[i] = b[i]*c[i]; } t2=clock()-t; |
---|
51 | t_loop = 2*t1 - t2; |
---|
52 | printf("time for loop: \t%d\n", t_loop); |
---|
53 | #else |
---|
54 | t_loop = 0; |
---|
55 | #endif |
---|
56 | t=clock(); for(j=0;j<M;j++) for(i=0;i<N;i++) a[i] = b[i]*c[i]; t=clock()-t-t_loop; |
---|
57 | printf("time for multiply: \t%d\n", t); |
---|
58 | t_mul = t; |
---|
59 | t=clock(); for(j=0;j<M;j++) for(i=0;i<N;i++) a[i] = b[i]+c[i]; t=clock()-t-t_loop; |
---|
60 | printf("time for add: \t%d\n", t); |
---|
61 | t=clock(); for(j=0;j<M;j++) for(i=0;i<N;i++) a[i] = (b[i]<c[i]); t=clock()-t-t_loop; |
---|
62 | printf("time for <: \t%d\tflops=%g\n", t, (double)t/t_mul); |
---|
63 | t=clock(); for(j=0;j<M;j++) for(i=0;i<N;i++) a[i] = (b[i]==c[i]); t=clock()-t-t_loop; |
---|
64 | printf("time for ==: \t%d\tflops=%g\n", t, (double)t/t_mul); |
---|
65 | t=clock(); for(j=0;j<M;j++) for(i=0;i<N;i++) a[i] = b[i]/c[i]; t=clock()-t-t_loop; |
---|
66 | printf("time for /: \t%d\tflops=%g\n", t, (double)t/t_mul); |
---|
67 | t=clock(); for(j=0;j<M;j++) for(i=0;i<N;i++) a[i] = sqrt(b[i]); t=clock()-t-t_loop; |
---|
68 | printf("time for sqrt: \t%d\tflops=%g\n", t, (double)t/t_mul); |
---|
69 | t=clock(); for(j=0;j<M;j++) for(i=0;i<N;i++) a[i] = exp(b[i]); t=clock()-t-t_loop; |
---|
70 | printf("time for exp: \t%d\tflops=%g\n", t, (double)t/t_mul); |
---|
71 | t=clock(); for(j=0;j<M;j++) for(i=0;i<N;i++) a[i] = log(b[i]); t=clock()-t-t_loop; |
---|
72 | printf("time for log: \t%d\tflops=%g\n", t, (double)t/t_mul); |
---|
73 | t=clock(); for(j=0;j<M;j++) for(i=0;i<N;i++) a[i] = pow(b[i],c[i]); t=clock()-t-t_loop; |
---|
74 | printf("time for pow: \t%d\tflops=%g\n", t, (double)t/t_mul); |
---|
75 | } |
---|