[34] | 1 | __constant float VERY_LARGE_NUMBER = 99999999; // A big number used fo scalling time steps; |
---|
| 2 | |
---|
| 3 | |
---|
| 4 | __kernel void leapfrog_integrator( __global float4* pos , |
---|
| 5 | |
---|
| 6 | __global float4* vel, |
---|
| 7 | |
---|
| 8 | int numBodies, |
---|
| 9 | |
---|
| 10 | float deltaTime, |
---|
| 11 | |
---|
| 12 | float epsSqr, |
---|
| 13 | |
---|
| 14 | __local float4* localPos, |
---|
| 15 | |
---|
| 16 | __local float4* localVel, |
---|
| 17 | |
---|
| 18 | __global float4* acc , |
---|
| 19 | |
---|
| 20 | int initialAccComputation, |
---|
| 21 | |
---|
| 22 | __global float* collTime) |
---|
| 23 | |
---|
| 24 | { |
---|
| 25 | //--------------------------------------- |
---|
| 26 | unsigned int tid = get_local_id(0); |
---|
| 27 | |
---|
| 28 | unsigned int gid = get_global_id(0); // Index of the particle that it must update; |
---|
| 29 | |
---|
| 30 | unsigned int localSize = get_local_size(0); // This can be thought of as the number of threads |
---|
| 31 | // executing this kernel, for different work-items, |
---|
| 32 | // concurrently and synchronously; |
---|
| 33 | |
---|
| 34 | //--------------------------------------- |
---|
| 35 | // Number of tiles we need to iterate |
---|
| 36 | |
---|
| 37 | unsigned int numTiles = numBodies / localSize; |
---|
| 38 | |
---|
| 39 | |
---|
| 40 | //--------------------------------------- |
---|
| 41 | // Reads the particle position (and mass) and velocity |
---|
| 42 | // of particle "i" for which this kernel invocation |
---|
| 43 | // is tasked to update. |
---|
| 44 | |
---|
| 45 | float4 oldPos = pos[gid]; |
---|
| 46 | float4 oldVel = vel[gid]; |
---|
| 47 | |
---|
| 48 | //--------------------------------------- |
---|
| 49 | // Initializes the float4 we will use to accumulate |
---|
| 50 | // the acceleration on particle "i". |
---|
| 51 | |
---|
| 52 | float4 oldAcc = (float4)(0.0f, 0.0f, 0.0f, 0.0f); |
---|
| 53 | |
---|
| 54 | float time_scale_sq = VERY_LARGE_NUMBER; // Init the collision time [read theory |
---|
| 55 | // for more information; |
---|
| 56 | |
---|
| 57 | // Compute the partial v_(n+1) and x_(n+1): |
---|
| 58 | //oldVel += acc[gid]*deltaTime/2; |
---|
| 59 | //oldPos += oldVel*deltaTime; |
---|
| 60 | |
---|
| 61 | for(int i = 0; i < numTiles; ++i) |
---|
| 62 | |
---|
| 63 | { |
---|
| 64 | // load one tile into local memory |
---|
| 65 | |
---|
| 66 | int idx = i * localSize + tid; |
---|
| 67 | |
---|
| 68 | localPos[tid] = pos[idx]; |
---|
| 69 | localVel[tid] = vel[idx]; |
---|
| 70 | |
---|
| 71 | // Synchronize to make sure data is available for processing |
---|
| 72 | |
---|
| 73 | barrier(CLK_LOCAL_MEM_FENCE); |
---|
| 74 | |
---|
| 75 | for(int j = 0; j < localSize; ++j) |
---|
| 76 | |
---|
| 77 | { |
---|
| 78 | float4 rji, vji; |
---|
| 79 | float r2= 0.0f, r3= 0.0f, v2=0.0f; // r^2, r^3, v^2; |
---|
| 80 | |
---|
| 81 | rji = localPos[j] - oldPos; |
---|
| 82 | vji = localVel[j] - oldVel; |
---|
| 83 | |
---|
| 84 | r2 = pow(rji.x, 2) + pow(rji.y, 2) + pow(rji.z, 2); |
---|
| 85 | v2 = pow(vji.x, 2) + pow(vji.y, 2) + pow(vji.z, 2); |
---|
| 86 | |
---|
| 87 | if (r2 == 0) |
---|
| 88 | r2 = 0.000000000001; |
---|
| 89 | |
---|
| 90 | r3= r2*sqrt(r2); |
---|
| 91 | |
---|
| 92 | //---------------------- |
---|
| 93 | // The computation of acc |
---|
| 94 | |
---|
| 95 | oldAcc += oldPos.w*rji/r3; |
---|
| 96 | |
---|
| 97 | |
---|
| 98 | //---------------------- |
---|
| 99 | // The computation of time_scale_sq: |
---|
| 100 | |
---|
| 101 | float estimate_sq= r2/v2; // [distance]^2/[velocity]^2 = [time]^2 |
---|
| 102 | if (time_scale_sq > estimate_sq){ |
---|
| 103 | time_scale_sq = estimate_sq; |
---|
| 104 | } |
---|
| 105 | |
---|
| 106 | float a = (oldPos.w + localPos[j].w)/r2; |
---|
| 107 | estimate_sq = sqrt(r2)/a; // [distance]/[acceleration] = [time]^2 |
---|
| 108 | |
---|
| 109 | if (time_scale_sq > estimate_sq){ |
---|
| 110 | time_scale_sq = estimate_sq; |
---|
| 111 | } |
---|
| 112 | } |
---|
| 113 | |
---|
| 114 | // Synchronize so that next tile can be loaded |
---|
| 115 | |
---|
| 116 | barrier(CLK_LOCAL_MEM_FENCE); |
---|
| 117 | |
---|
| 118 | } |
---|
| 119 | |
---|
| 120 | collTime[gid] = sqrt(time_scale_sq); |
---|
| 121 | |
---|
| 122 | // Now we compute the rest of the v_(n+1): |
---|
| 123 | //oldVel += oldAcc*deltaTime/2; |
---|
| 124 | |
---|
| 125 | // pos[gid] = oldPos; |
---|
| 126 | // vel[gid] = oldVel; |
---|
| 127 | acc[gid] = oldAcc; |
---|
| 128 | |
---|
| 129 | } |
---|
| 130 | |
---|
| 131 | |
---|
| 132 | |
---|