[35] | 1 | #include "NBody.hpp" |
---|
| 2 | |
---|
| 3 | extern int numBodies; |
---|
| 4 | |
---|
| 5 | /////////////////////////////////// NBody::runCLKernels Func \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ |
---|
| 6 | // |
---|
| 7 | // Enqueue calls to the kernels on to the command queue, |
---|
| 8 | // wait till end of kernel execution. |
---|
| 9 | // |
---|
| 10 | /////////////////////////////////// NBody::runCLKernels Func \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ |
---|
| 11 | |
---|
| 12 | int NBody::runCLKernels() |
---|
| 13 | { |
---|
| 14 | cl_int status; |
---|
| 15 | cl_event events[1]; |
---|
| 16 | |
---|
| 17 | /* |
---|
| 18 | * Enqueue a kernel run call. |
---|
| 19 | */ |
---|
| 20 | size_t globalThreads[] = {numBodies}; |
---|
| 21 | size_t localThreads[] = {GROUP_SIZE}; |
---|
| 22 | |
---|
| 23 | if(localThreads[0] > maxWorkItemSizes[0] || localThreads[0] > maxWorkGroupSize) |
---|
| 24 | { |
---|
| 25 | std::cout<<"Unsupported: Device does not support requested number of work items."; |
---|
| 26 | |
---|
| 27 | return 1; |
---|
| 28 | } |
---|
| 29 | |
---|
| 30 | status = clEnqueueNDRangeKernel( |
---|
| 31 | commandQueue, |
---|
| 32 | kernel, |
---|
| 33 | 1, |
---|
| 34 | NULL, |
---|
| 35 | globalThreads, |
---|
| 36 | localThreads, |
---|
| 37 | 0, |
---|
| 38 | NULL, |
---|
| 39 | NULL); |
---|
| 40 | |
---|
| 41 | if(status != CL_SUCCESS) |
---|
| 42 | { |
---|
| 43 | std::cout << "clEnqueueNDRangeKernel failed." << std::endl; |
---|
| 44 | return 1; |
---|
| 45 | } |
---|
| 46 | |
---|
| 47 | status = clFinish(commandQueue); |
---|
| 48 | if(status != CL_SUCCESS) |
---|
| 49 | { |
---|
| 50 | std::cout << "clFinish failed." << std::endl; |
---|
| 51 | return 1; |
---|
| 52 | } |
---|
| 53 | |
---|
| 54 | /* Enqueue readBuffer*/ |
---|
| 55 | status = clEnqueueReadBuffer( |
---|
| 56 | commandQueue, |
---|
| 57 | updatedPos, |
---|
| 58 | CL_TRUE, |
---|
| 59 | 0, |
---|
| 60 | numBodies* sizeof(cl_float4), |
---|
| 61 | pos, |
---|
| 62 | 0, |
---|
| 63 | NULL, |
---|
| 64 | &events[0]); |
---|
| 65 | |
---|
| 66 | if(status != CL_SUCCESS) |
---|
| 67 | { |
---|
| 68 | std::cout << "clEnqueueReadBuffer failed." << std::endl; |
---|
| 69 | return 1; |
---|
| 70 | } |
---|
| 71 | |
---|
| 72 | /* Wait for the read buffer to finish execution */ |
---|
| 73 | status = clWaitForEvents(1, &events[0]); |
---|
| 74 | if(status != CL_SUCCESS) |
---|
| 75 | { |
---|
| 76 | std::cout << "clWaitForEvents failed." << std::endl; |
---|
| 77 | return 1; |
---|
| 78 | } |
---|
| 79 | |
---|
| 80 | clReleaseEvent(events[0]); |
---|
| 81 | |
---|
| 82 | return 0; |
---|
| 83 | } |
---|
| 84 | |
---|
| 85 | |
---|
| 86 | int NBody::runCLKernelLeapfrog() |
---|
| 87 | { |
---|
| 88 | //cl_int status; |
---|
| 89 | //cl_event events[1]; |
---|
| 90 | |
---|
| 91 | if (initialAccComputation){ |
---|
| 92 | enqueueKernel(); |
---|
| 93 | initialAccComputation = 0; |
---|
| 94 | } |
---|
| 95 | |
---|
| 96 | float min = collTime[0]; |
---|
| 97 | |
---|
| 98 | for (int i = 1; i < numParticles; i++){ |
---|
| 99 | if (min > collTime[i]) |
---|
| 100 | min = collTime[i]; |
---|
| 101 | } |
---|
| 102 | |
---|
| 103 | cl_float dt = (float)delT ;//* min; |
---|
| 104 | |
---|
| 105 | // Compute the partial v_(n+1) and x_(n+1): |
---|
| 106 | for (int i = 0; i < numParticles; i++){ |
---|
| 107 | |
---|
| 108 | int idx = i*4; |
---|
| 109 | |
---|
| 110 | for (int k = 0; k < 3; k++) |
---|
| 111 | vel[idx+k] += acc[idx+k]*dt/2; |
---|
| 112 | |
---|
| 113 | for (int k = 0; k < 3; k++) |
---|
| 114 | pos[idx+k] += vel[idx+k] * dt; |
---|
| 115 | } |
---|
| 116 | |
---|
| 117 | |
---|
| 118 | enqueueKernel(); |
---|
| 119 | |
---|
| 120 | min = collTime[0]; |
---|
| 121 | |
---|
| 122 | for (int i = 1; i < numParticles; i++){ |
---|
| 123 | if (min > collTime[i]) |
---|
| 124 | min = collTime[i]; |
---|
| 125 | } |
---|
| 126 | |
---|
| 127 | dt = (float)delT ;//* min; |
---|
| 128 | |
---|
| 129 | // Now we compute the rest of the v_(n+1): |
---|
| 130 | for (int i = 0; i < numParticles; i++){ |
---|
| 131 | |
---|
| 132 | int idx = i*4; |
---|
| 133 | |
---|
| 134 | for (int k = 0; k < 3; k++) |
---|
| 135 | vel[idx+k] += acc[idx+k] * dt/2; |
---|
| 136 | } |
---|
| 137 | |
---|
| 138 | return 0; |
---|
| 139 | } |
---|
| 140 | |
---|
| 141 | int NBody::enqueueKernel() |
---|
| 142 | { |
---|
| 143 | cl_int status; |
---|
| 144 | cl_event events[1]; |
---|
| 145 | |
---|
| 146 | /* |
---|
| 147 | * Enqueue a kernel run call. |
---|
| 148 | */ |
---|
| 149 | size_t globalThreads[] = {numBodies}; |
---|
| 150 | size_t localThreads[] = {GROUP_SIZE}; |
---|
| 151 | |
---|
| 152 | if(localThreads[0] > maxWorkItemSizes[0] || localThreads[0] > maxWorkGroupSize) |
---|
| 153 | { |
---|
| 154 | std::cout << "Unsupported: Device does not support requested number of work items." << std::endl; |
---|
| 155 | |
---|
| 156 | return 1; |
---|
| 157 | } |
---|
| 158 | |
---|
| 159 | |
---|
| 160 | status = clEnqueueNDRangeKernel( |
---|
| 161 | commandQueue, |
---|
| 162 | kernel, |
---|
| 163 | 1, |
---|
| 164 | NULL, |
---|
| 165 | globalThreads, |
---|
| 166 | localThreads, |
---|
| 167 | 0, |
---|
| 168 | NULL, |
---|
| 169 | NULL); |
---|
| 170 | |
---|
| 171 | if(status != CL_SUCCESS) |
---|
| 172 | { |
---|
| 173 | std::cout << "clEnqueueNDRangeKernel failed." << std::endl; |
---|
| 174 | return 1; |
---|
| 175 | } |
---|
| 176 | |
---|
| 177 | status = clFinish(commandQueue); |
---|
| 178 | if(status != CL_SUCCESS) |
---|
| 179 | { |
---|
| 180 | std::cout << "clFinish failed." << std::endl; |
---|
| 181 | return 1; |
---|
| 182 | } |
---|
| 183 | |
---|
| 184 | /* Enqueue readBuffer*/ |
---|
| 185 | status = clEnqueueReadBuffer( |
---|
| 186 | commandQueue, |
---|
| 187 | updatedAcc, |
---|
| 188 | CL_TRUE, |
---|
| 189 | 0, |
---|
| 190 | numBodies* sizeof(cl_float4), |
---|
| 191 | acc, |
---|
| 192 | 0, |
---|
| 193 | NULL, |
---|
| 194 | &events[0]); |
---|
| 195 | |
---|
| 196 | if(status != CL_SUCCESS) |
---|
| 197 | { |
---|
| 198 | std::cout << "clEnqueueReadBuffer failed." << std::endl; |
---|
| 199 | return 1; |
---|
| 200 | } |
---|
| 201 | |
---|
| 202 | /* Wait for the read buffer to finish execution */ |
---|
| 203 | status = clWaitForEvents(1, &events[0]); |
---|
| 204 | if(status != CL_SUCCESS) |
---|
| 205 | { |
---|
| 206 | std::cout << "clWaitForEvents failed." << std::endl; |
---|
| 207 | return 1; |
---|
| 208 | } |
---|
| 209 | |
---|
| 210 | /* Enqueue readBuffer*/ |
---|
| 211 | status = clEnqueueReadBuffer( |
---|
| 212 | commandQueue, |
---|
| 213 | updatedCollTime, |
---|
| 214 | CL_TRUE, |
---|
| 215 | 0, |
---|
| 216 | numBodies* sizeof(cl_float), |
---|
| 217 | collTime, |
---|
| 218 | 0, |
---|
| 219 | NULL, |
---|
| 220 | &events[0]); |
---|
| 221 | |
---|
| 222 | if(status != CL_SUCCESS) |
---|
| 223 | { |
---|
| 224 | std::cout << "clEnqueueReadBuffer failed." << std::endl; |
---|
| 225 | return 1; |
---|
| 226 | } |
---|
| 227 | |
---|
| 228 | /* Wait for the read buffer to finish execution */ |
---|
| 229 | status = clWaitForEvents(1, &events[0]); |
---|
| 230 | if(status != CL_SUCCESS) |
---|
| 231 | { |
---|
| 232 | std::cout << "clWaitForEvents failed." << std::endl; |
---|
| 233 | return 1; |
---|
| 234 | } |
---|
| 235 | |
---|
| 236 | clReleaseEvent(events[0]); |
---|
| 237 | } |
---|