1 | /* |
---|
2 | * Each work-item invocation of this kernel, calculates the position for |
---|
3 | * one particle |
---|
4 | * |
---|
5 | * Work-items use local memory to reduce memory bandwidth and reuse of data |
---|
6 | */ |
---|
7 | |
---|
8 | __kernel void simple_integrator( __global float4* pos , |
---|
9 | |
---|
10 | __global float4* vel, |
---|
11 | |
---|
12 | int numBodies, |
---|
13 | |
---|
14 | float deltaTime, |
---|
15 | |
---|
16 | float epsSqr, |
---|
17 | |
---|
18 | __local float4* localPos) |
---|
19 | |
---|
20 | { |
---|
21 | //--------------------------------------- |
---|
22 | unsigned int tid = get_local_id(0); |
---|
23 | |
---|
24 | unsigned int gid = get_global_id(0); // Index of the particle that it must update; |
---|
25 | |
---|
26 | unsigned int localSize = get_local_size(0); // This can be thought of as the number of threads |
---|
27 | // executing this kernel, for different work-items, |
---|
28 | // concurrently and synchronously; |
---|
29 | |
---|
30 | //--------------------------------------- |
---|
31 | // Number of tiles we need to iterate |
---|
32 | |
---|
33 | unsigned int numTiles = numBodies / localSize; |
---|
34 | |
---|
35 | |
---|
36 | //--------------------------------------- |
---|
37 | // Reads the particle position (and mass) and velocity |
---|
38 | // of particle "i" for which this kernel invocation |
---|
39 | // is tasked to update. |
---|
40 | |
---|
41 | float4 oldPos = pos[gid]; |
---|
42 | float4 oldVel = vel[gid]; |
---|
43 | |
---|
44 | //--------------------------------------- |
---|
45 | // Initializes the float4 we will use to accumulate |
---|
46 | // the acceleration on particle "i". |
---|
47 | |
---|
48 | float4 acc = (float4)(0.0f, 0.0f, 0.0f, 0.0f); |
---|
49 | |
---|
50 | |
---|
51 | for(int i = 0; i < numTiles; ++i) |
---|
52 | |
---|
53 | { |
---|
54 | |
---|
55 | // load one tile into local memory |
---|
56 | |
---|
57 | int idx = i * localSize + tid; |
---|
58 | |
---|
59 | localPos[tid] = pos[idx]; |
---|
60 | |
---|
61 | |
---|
62 | |
---|
63 | // Synchronize to make sure data is available for processing |
---|
64 | |
---|
65 | barrier(CLK_LOCAL_MEM_FENCE); |
---|
66 | |
---|
67 | |
---|
68 | // calculate acceleration effect due to each body |
---|
69 | |
---|
70 | // a[i->j] = m[j] * r[i->j] / (r^2 + epsSqr)^(3/2) |
---|
71 | |
---|
72 | for(int j = 0; j < localSize; ++j) |
---|
73 | |
---|
74 | { |
---|
75 | |
---|
76 | // Calculate acceleartion caused by particle j on particle i |
---|
77 | |
---|
78 | float4 r = localPos[j] - oldPos; |
---|
79 | |
---|
80 | float distSqr = r.x * r.x + r.y * r.y + r.z * r.z; |
---|
81 | |
---|
82 | float invDist = 1.0f / sqrt(distSqr + epsSqr); |
---|
83 | |
---|
84 | float invDistCube = invDist * invDist * invDist; |
---|
85 | |
---|
86 | float s = localPos[j].w * invDistCube; |
---|
87 | |
---|
88 | |
---|
89 | |
---|
90 | // accumulate effect of all particles |
---|
91 | |
---|
92 | acc += s * r; |
---|
93 | |
---|
94 | } |
---|
95 | |
---|
96 | // Synchronize so that next tile can be loaded |
---|
97 | |
---|
98 | barrier(CLK_LOCAL_MEM_FENCE); |
---|
99 | |
---|
100 | } |
---|
101 | |
---|
102 | |
---|
103 | |
---|
104 | // updated position and velocity |
---|
105 | |
---|
106 | float4 newPos = oldPos + oldVel * deltaTime + acc * 0.5f * deltaTime * deltaTime; |
---|
107 | |
---|
108 | newPos.w = oldPos.w; |
---|
109 | |
---|
110 | |
---|
111 | |
---|
112 | float4 newVel = oldVel + acc * deltaTime; |
---|
113 | |
---|
114 | |
---|
115 | |
---|
116 | // write to global memory |
---|
117 | |
---|
118 | pos[gid] = newPos; |
---|
119 | |
---|
120 | vel[gid] = newVel; |
---|
121 | } |
---|
122 | |
---|