#include "NBody.hpp" extern int numBodies; // No. of particles; /////////////////////////////////// NBody::setupCL Func \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ // // Initialize OpenCL specific information : // - get context, devices; // - create command-queues, memory buffers, program, kernels; // /////////////////////////////////// NBody::setupCL Func \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ int NBody::setupCL() { cl_int status = CL_SUCCESS; cl_device_type dType; if(deviceType.compare("cpu") == 0) { dType = CL_DEVICE_TYPE_CPU; } else //deviceType = "gpu" { dType = CL_DEVICE_TYPE_GPU; } /* Create context from given device type */ context = clCreateContextFromType( 0, dType, NULL, NULL, &status); /* * if opencl fails to open a context on default device GPU * then it falls back to CPU */ if(status != CL_SUCCESS && dType == CL_DEVICE_TYPE_GPU) { std::cout << "Unsupported GPU device; falling back to CPU ..." << std::endl; std::cout << std::endl; context = clCreateContextFromType( 0, CL_DEVICE_TYPE_CPU, NULL, NULL, &status); } if (status != CL_SUCCESS){ std::cout << "clCreateContextFromType failed." << std::endl; return 1; } size_t deviceListSize; /* First, get the size of device list data */ status = clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &deviceListSize); if (status != CL_SUCCESS){ std::cout << "clGetContextInfo failed." << std::endl; return 1; } /* Now allocate memory for device list based on the size we got earlier */ devices = (cl_device_id *)malloc(deviceListSize); if(devices==NULL) { std::cout << "Failed to allocate memory (devices)." << std::endl; return 1; } /* Now, get the device list data */ status = clGetContextInfo( context, CL_CONTEXT_DEVICES, deviceListSize, devices, NULL); if (status != CL_SUCCESS){ std::cout << "clGetContextInfo failed." << std::endl; return 1; } /* Create command queue */ commandQueue = clCreateCommandQueue( context, devices[0], 0, &status); if (status != CL_SUCCESS){ std::cout << "clCreateCommandQueue failed." << std::endl; return 1; } //---------------------------------- // Device infos: getDeviceInfo(); /* * Create and initialize memory objects */ //================================================ // Buffers used for all Kernels: /* Create memory objects for position */ updatedPos = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, numBodies * sizeof(cl_float4), // numbodies * (cl_float f32[4]); pos, &status); if (status != CL_SUCCESS){ std::cout << "clCreateBuffer failed. (updatePos)" << std::endl; return 1; } /* Create memory objects for velocity */ updatedVel = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, numBodies * sizeof(cl_float4), vel, &status); if (status != CL_SUCCESS){ std::cout << "clCreateBuffer failed. (updatedVel)" << std::endl; return 1; } //================================================ // Buffers used only for Leapfrog Kernels: if (integrator == LEAPFROG){ /* Create memory objects for accelerations */ updatedAcc = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, numBodies * sizeof(cl_float4), acc, &status); if (status != CL_SUCCESS){ std::cout << "clCreateBuffer failed. (updatedAcc)" << std::endl; return 1; } /* Create memory objects for collision time */ updatedCollTime = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, numBodies * sizeof(cl_float), collTime, &status); if (status != CL_SUCCESS){ std::cout << "clCreateBuffer failed. (updatedCollTime)" << std::endl; return 1; } initialAccComputation = 1; } return 0; }