#include "NBody.hpp" extern int numBodies; /////////////////////////////////// NBody::runCLKernels Func \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ // // Enqueue calls to the kernels on to the command queue, // wait till end of kernel execution. // /////////////////////////////////// NBody::runCLKernels Func \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ int NBody::runCLKernels() { cl_int status; cl_event events[1]; /* * Enqueue a kernel run call. */ size_t globalThreads[] = {numBodies}; size_t localThreads[] = {GROUP_SIZE}; if(localThreads[0] > maxWorkItemSizes[0] || localThreads[0] > maxWorkGroupSize) { std::cout<<"Unsupported: Device does not support requested number of work items."; return 1; } status = clEnqueueNDRangeKernel( commandQueue, kernel, 1, NULL, globalThreads, localThreads, 0, NULL, NULL); if(status != CL_SUCCESS) { std::cout << "clEnqueueNDRangeKernel failed." << std::endl; return 1; } status = clFinish(commandQueue); if(status != CL_SUCCESS) { std::cout << "clFinish failed." << std::endl; return 1; } /* Enqueue readBuffer*/ status = clEnqueueReadBuffer( commandQueue, updatedPos, CL_TRUE, 0, numBodies* sizeof(cl_float4), pos, 0, NULL, &events[0]); if(status != CL_SUCCESS) { std::cout << "clEnqueueReadBuffer failed." << std::endl; return 1; } /* Wait for the read buffer to finish execution */ status = clWaitForEvents(1, &events[0]); if(status != CL_SUCCESS) { std::cout << "clWaitForEvents failed." << std::endl; return 1; } clReleaseEvent(events[0]); return 0; } int NBody::runCLKernelLeapfrog() { //cl_int status; //cl_event events[1]; if (initialAccComputation){ enqueueKernel(); initialAccComputation = 0; } float min = collTime[0]; for (int i = 1; i < numParticles; i++){ if (min > collTime[i]) min = collTime[i]; } cl_float dt = (float)delT ;//* min; // Compute the partial v_(n+1) and x_(n+1): for (int i = 0; i < numParticles; i++){ int idx = i*4; for (int k = 0; k < 3; k++) vel[idx+k] += acc[idx+k]*dt/2; for (int k = 0; k < 3; k++) pos[idx+k] += vel[idx+k] * dt; } enqueueKernel(); min = collTime[0]; for (int i = 1; i < numParticles; i++){ if (min > collTime[i]) min = collTime[i]; } dt = (float)delT ;//* min; // Now we compute the rest of the v_(n+1): for (int i = 0; i < numParticles; i++){ int idx = i*4; for (int k = 0; k < 3; k++) vel[idx+k] += acc[idx+k] * dt/2; } return 0; } int NBody::enqueueKernel() { cl_int status; cl_event events[1]; /* * Enqueue a kernel run call. */ size_t globalThreads[] = {numBodies}; size_t localThreads[] = {GROUP_SIZE}; if(localThreads[0] > maxWorkItemSizes[0] || localThreads[0] > maxWorkGroupSize) { std::cout << "Unsupported: Device does not support requested number of work items." << std::endl; return 1; } status = clEnqueueNDRangeKernel( commandQueue, kernel, 1, NULL, globalThreads, localThreads, 0, NULL, NULL); if(status != CL_SUCCESS) { std::cout << "clEnqueueNDRangeKernel failed." << std::endl; return 1; } status = clFinish(commandQueue); if(status != CL_SUCCESS) { std::cout << "clFinish failed." << std::endl; return 1; } /* Enqueue readBuffer*/ status = clEnqueueReadBuffer( commandQueue, updatedAcc, CL_TRUE, 0, numBodies* sizeof(cl_float4), acc, 0, NULL, &events[0]); if(status != CL_SUCCESS) { std::cout << "clEnqueueReadBuffer failed." << std::endl; return 1; } /* Wait for the read buffer to finish execution */ status = clWaitForEvents(1, &events[0]); if(status != CL_SUCCESS) { std::cout << "clWaitForEvents failed." << std::endl; return 1; } /* Enqueue readBuffer*/ status = clEnqueueReadBuffer( commandQueue, updatedCollTime, CL_TRUE, 0, numBodies* sizeof(cl_float), collTime, 0, NULL, &events[0]); if(status != CL_SUCCESS) { std::cout << "clEnqueueReadBuffer failed." << std::endl; return 1; } /* Wait for the read buffer to finish execution */ status = clWaitForEvents(1, &events[0]); if(status != CL_SUCCESS) { std::cout << "clWaitForEvents failed." << std::endl; return 1; } clReleaseEvent(events[0]); }