1 | /* |
---|
2 | * Each work-item invocation of this kernel, calculates the position for |
---|
3 | * one particle |
---|
4 | * |
---|
5 | * Work-items use local memory to reduce memory bandwidth and reuse of data |
---|
6 | */ |
---|
7 | |
---|
8 | |
---|
9 | |
---|
10 | __kernel void nbody_sim( __global float4* pos , |
---|
11 | |
---|
12 | __global float4* vel, |
---|
13 | |
---|
14 | int numBodies, |
---|
15 | |
---|
16 | float deltaTime, |
---|
17 | |
---|
18 | float epsSqr, |
---|
19 | |
---|
20 | __local float4* localPos) |
---|
21 | |
---|
22 | { |
---|
23 | unsigned int tid = get_local_id(0); |
---|
24 | |
---|
25 | unsigned int gid = get_global_id(0); |
---|
26 | |
---|
27 | unsigned int localSize = get_local_size(0); |
---|
28 | |
---|
29 | |
---|
30 | |
---|
31 | // Number of tiles we need to iterate |
---|
32 | |
---|
33 | unsigned int numTiles = numBodies / localSize; |
---|
34 | |
---|
35 | |
---|
36 | |
---|
37 | // position of this work-item |
---|
38 | |
---|
39 | float4 myPos = pos[gid]; |
---|
40 | |
---|
41 | float4 acc = (float4)(0.0f, 0.0f, 0.0f, 0.0f); |
---|
42 | |
---|
43 | |
---|
44 | |
---|
45 | for(int i = 0; i < numTiles; ++i) |
---|
46 | |
---|
47 | { |
---|
48 | |
---|
49 | // load one tile into local memory |
---|
50 | |
---|
51 | int idx = i * localSize + tid; |
---|
52 | |
---|
53 | localPos[tid] = pos[idx]; |
---|
54 | |
---|
55 | |
---|
56 | |
---|
57 | // Synchronize to make sure data is available for processing |
---|
58 | |
---|
59 | barrier(CLK_LOCAL_MEM_FENCE); |
---|
60 | |
---|
61 | |
---|
62 | |
---|
63 | // calculate acceleration effect due to each body |
---|
64 | |
---|
65 | // a[i->j] = m[j] * r[i->j] / (r^2 + epsSqr)^(3/2) |
---|
66 | |
---|
67 | for(int j = 0; j < localSize; ++j) |
---|
68 | |
---|
69 | { |
---|
70 | |
---|
71 | // Calculate acceleartion caused by particle j on particle i |
---|
72 | |
---|
73 | float4 r = localPos[j] - myPos; |
---|
74 | |
---|
75 | float distSqr = r.x * r.x + r.y * r.y + r.z * r.z; |
---|
76 | |
---|
77 | float invDist = 1.0f / sqrt(distSqr + epsSqr); |
---|
78 | |
---|
79 | float invDistCube = invDist * invDist * invDist; |
---|
80 | |
---|
81 | float s = localPos[j].w * invDistCube; |
---|
82 | |
---|
83 | |
---|
84 | |
---|
85 | // accumulate effect of all particles |
---|
86 | |
---|
87 | acc += s * r; |
---|
88 | |
---|
89 | } |
---|
90 | |
---|
91 | |
---|
92 | |
---|
93 | // Synchronize so that next tile can be loaded |
---|
94 | |
---|
95 | barrier(CLK_LOCAL_MEM_FENCE); |
---|
96 | |
---|
97 | } |
---|
98 | |
---|
99 | |
---|
100 | |
---|
101 | float4 oldVel = vel[gid]; |
---|
102 | |
---|
103 | |
---|
104 | |
---|
105 | // updated position and velocity |
---|
106 | |
---|
107 | float4 newPos = myPos + oldVel * deltaTime + acc * 0.5f * deltaTime * deltaTime; |
---|
108 | |
---|
109 | newPos.w = myPos.w; |
---|
110 | |
---|
111 | |
---|
112 | |
---|
113 | float4 newVel = oldVel + acc * deltaTime; |
---|
114 | |
---|
115 | |
---|
116 | |
---|
117 | // write to global memory |
---|
118 | |
---|
119 | pos[gid] = newPos; |
---|
120 | |
---|
121 | vel[gid] = newVel; |
---|
122 | } |
---|
123 | |
---|