1 | __constant float VERY_LARGE_NUMBER = 99999999; // A big number used fo scalling time steps; |
---|
2 | |
---|
3 | |
---|
4 | __kernel void leapfrog_integrator( __global float4* pos , |
---|
5 | |
---|
6 | __global float4* vel, |
---|
7 | |
---|
8 | int numBodies, |
---|
9 | |
---|
10 | float deltaTime, |
---|
11 | |
---|
12 | float epsSqr, |
---|
13 | |
---|
14 | __local float4* localPos, |
---|
15 | |
---|
16 | __local float4* localVel, |
---|
17 | |
---|
18 | __global float4* acc , |
---|
19 | |
---|
20 | int initialAccComputation, |
---|
21 | |
---|
22 | __global float* collTime) |
---|
23 | |
---|
24 | { |
---|
25 | //--------------------------------------- |
---|
26 | unsigned int tid = get_local_id(0); |
---|
27 | |
---|
28 | unsigned int gid = get_global_id(0); // Index of the particle that it must update; |
---|
29 | |
---|
30 | unsigned int localSize = get_local_size(0); // This can be thought of as the number of threads |
---|
31 | // executing this kernel, for different work-items, |
---|
32 | // concurrently and synchronously; |
---|
33 | |
---|
34 | //--------------------------------------- |
---|
35 | // Number of tiles we need to iterate |
---|
36 | |
---|
37 | unsigned int numTiles = numBodies / localSize; |
---|
38 | |
---|
39 | |
---|
40 | //--------------------------------------- |
---|
41 | // Reads the particle position (and mass) and velocity |
---|
42 | // of particle "i" for which this kernel invocation |
---|
43 | // is tasked to update. |
---|
44 | |
---|
45 | float4 oldPos = pos[gid]; |
---|
46 | float4 oldVel = vel[gid]; |
---|
47 | |
---|
48 | //--------------------------------------- |
---|
49 | // Initializes the float4 we will use to accumulate |
---|
50 | // the acceleration on particle "i". |
---|
51 | |
---|
52 | float4 oldAcc = (float4)(0.0f, 0.0f, 0.0f, 0.0f); |
---|
53 | |
---|
54 | float time_scale_sq = VERY_LARGE_NUMBER; // Init the collision time [read theory |
---|
55 | // for more information; |
---|
56 | |
---|
57 | // Compute the partial v_(n+1) and x_(n+1): |
---|
58 | //oldVel += acc[gid]*deltaTime/2; |
---|
59 | //oldPos += oldVel*deltaTime; |
---|
60 | |
---|
61 | for(int i = 0; i < numTiles; ++i) |
---|
62 | |
---|
63 | { |
---|
64 | // load one tile into local memory |
---|
65 | |
---|
66 | int idx = i * localSize + tid; |
---|
67 | |
---|
68 | localPos[tid] = pos[idx]; |
---|
69 | localVel[tid] = vel[idx]; |
---|
70 | |
---|
71 | // Synchronize to make sure data is available for processing |
---|
72 | |
---|
73 | barrier(CLK_LOCAL_MEM_FENCE); |
---|
74 | |
---|
75 | for(int j = 0; j < localSize; ++j) |
---|
76 | |
---|
77 | { |
---|
78 | float4 rji, vji; |
---|
79 | float r2= 0.0f, r3= 0.0f, v2=0.0f; // r^2, r^3, v^2; |
---|
80 | |
---|
81 | rji = localPos[j] - oldPos; |
---|
82 | vji = localVel[j] - oldVel; |
---|
83 | |
---|
84 | r2 = pow(rji.x, 2) + pow(rji.y, 2) + pow(rji.z, 2); |
---|
85 | v2 = pow(vji.x, 2) + pow(vji.y, 2) + pow(vji.z, 2); |
---|
86 | |
---|
87 | if (r2 == 0) |
---|
88 | r2 = 0.000000000001; |
---|
89 | |
---|
90 | r3= r2*sqrt(r2); |
---|
91 | |
---|
92 | //---------------------- |
---|
93 | // The computation of acc |
---|
94 | |
---|
95 | oldAcc += oldPos.w*rji/r3; |
---|
96 | |
---|
97 | |
---|
98 | //---------------------- |
---|
99 | // The computation of time_scale_sq: |
---|
100 | |
---|
101 | float estimate_sq= r2/v2; // [distance]^2/[velocity]^2 = [time]^2 |
---|
102 | if (time_scale_sq > estimate_sq){ |
---|
103 | time_scale_sq = estimate_sq; |
---|
104 | } |
---|
105 | |
---|
106 | float a = (oldPos.w + localPos[j].w)/r2; |
---|
107 | estimate_sq = sqrt(r2)/a; // [distance]/[acceleration] = [time]^2 |
---|
108 | |
---|
109 | if (time_scale_sq > estimate_sq){ |
---|
110 | time_scale_sq = estimate_sq; |
---|
111 | } |
---|
112 | } |
---|
113 | |
---|
114 | // Synchronize so that next tile can be loaded |
---|
115 | |
---|
116 | barrier(CLK_LOCAL_MEM_FENCE); |
---|
117 | |
---|
118 | } |
---|
119 | |
---|
120 | collTime[gid] = sqrt(time_scale_sq); |
---|
121 | |
---|
122 | // Now we compute the rest of the v_(n+1): |
---|
123 | //oldVel += oldAcc*deltaTime/2; |
---|
124 | |
---|
125 | // pos[gid] = oldPos; |
---|
126 | // vel[gid] = oldVel; |
---|
127 | acc[gid] = oldAcc; |
---|
128 | |
---|
129 | } |
---|
130 | |
---|
131 | |
---|
132 | |
---|