[30] | 1 | #include "NBody.hpp" |
---|
| 2 | |
---|
| 3 | int numBodies; // No. of particles; |
---|
| 4 | cl_float* pos; // Output position; |
---|
| 5 | void* me; // Pointing to NBody class; |
---|
| 6 | cl_bool display; // If it is true then OpenGL display is used; |
---|
| 7 | long curr_step = 0; // Reatins the current step in the simulation; |
---|
| 8 | long n_steps = 110; // Numbuer of steps of the simulation; |
---|
| 9 | cl_bool verify = false; // Compares the final position vectors resulted from running |
---|
| 10 | // the code on CPU and GPU; |
---|
| 11 | |
---|
| 12 | |
---|
| 13 | std::string deviceType("cpu"); // It says on which device we want to do the computations; |
---|
| 14 | |
---|
| 15 | time_t rawtime; // Variables used for measuring the duration of the run; |
---|
| 16 | struct tm * timeinfo; |
---|
| 17 | |
---|
| 18 | |
---|
| 19 | float NBody::random(float randMax, float randMin) |
---|
| 20 | { |
---|
| 21 | float result; |
---|
| 22 | result =(float)rand()/(float)RAND_MAX; |
---|
| 23 | |
---|
| 24 | return ((1.0f - result) * randMin + result *randMax); |
---|
| 25 | } |
---|
| 26 | |
---|
| 27 | int NBody::setupNBody() |
---|
| 28 | { |
---|
| 29 | //-------------------------------------------- |
---|
| 30 | // make sure numParticles is multiple of group size |
---|
| 31 | numParticles = (numParticles < GROUP_SIZE) ? GROUP_SIZE : numParticles; |
---|
| 32 | numParticles = (numParticles / GROUP_SIZE) * GROUP_SIZE; |
---|
| 33 | |
---|
| 34 | numBodies = numParticles; |
---|
| 35 | |
---|
| 36 | //-------------------------------------------- |
---|
| 37 | // First we will use initPos and initVel vectors to generate the |
---|
| 38 | // input data. They will be used only in this function to |
---|
| 39 | // initialize the other vectors: the pos and vel vectors |
---|
| 40 | // used in simulation on GPU only, and refPos and refVel used in |
---|
| 41 | // simulation on CPU only |
---|
| 42 | initPos = (cl_float*)malloc(numBodies * sizeof(cl_float4)); |
---|
| 43 | if(initPos == NULL) |
---|
| 44 | { |
---|
| 45 | std::cout << "Failed to allocate host memory. (initPos)" << std::endl; |
---|
| 46 | return 1; |
---|
| 47 | } |
---|
| 48 | |
---|
| 49 | initVel = (cl_float*)malloc(numBodies * sizeof(cl_float4)); |
---|
| 50 | if(initVel == NULL) |
---|
| 51 | { |
---|
| 52 | std::cout << "Failed to allocate host memory. (initVel)" << std::endl; |
---|
| 53 | return 1; |
---|
| 54 | } |
---|
| 55 | |
---|
| 56 | // initialization of inputs |
---|
| 57 | for(int i = 0; i < numBodies; ++i) |
---|
| 58 | { |
---|
| 59 | int index = 4 * i; |
---|
| 60 | |
---|
| 61 | // First 3 values are position in x,y and z direction |
---|
| 62 | for(int j = 0; j < 3; ++j) |
---|
| 63 | { |
---|
| 64 | initPos[index + j] = random(3, 50); |
---|
| 65 | } |
---|
| 66 | |
---|
| 67 | // Mass value |
---|
| 68 | initPos[index + 3] = random(1, 1000); |
---|
| 69 | |
---|
| 70 | // First 3 values are velocity in x,y and z direction |
---|
| 71 | for(int j = 0; j < 3; ++j) |
---|
| 72 | { |
---|
| 73 | initVel[index + j] = 0.0f; |
---|
| 74 | } |
---|
| 75 | |
---|
| 76 | // unused |
---|
| 77 | initVel[3] = 0.0f; |
---|
| 78 | } |
---|
| 79 | |
---|
| 80 | //-------------------------------------------- |
---|
| 81 | // Variables used on GPU Running only: |
---|
| 82 | if(deviceType.compare("gpu") == 0){ |
---|
| 83 | |
---|
| 84 | #if defined (_WIN32) |
---|
| 85 | pos = (cl_float*)_aligned_malloc(numBodies * sizeof(cl_float4), 16); |
---|
| 86 | #else |
---|
| 87 | pos = (cl_float*)memalign(16, numBodies * sizeof(cl_float4)); |
---|
| 88 | #endif |
---|
| 89 | |
---|
| 90 | if(pos == NULL) |
---|
| 91 | { |
---|
| 92 | std::cout << "Failed to allocate host memory. (pos)" << std::endl; |
---|
| 93 | return 1; |
---|
| 94 | } |
---|
| 95 | |
---|
| 96 | #if defined (_WIN32) |
---|
| 97 | vel = (cl_float*)_aligned_malloc(numBodies * sizeof(cl_float4), 16); |
---|
| 98 | #else |
---|
| 99 | vel = (cl_float*)memalign(16, numBodies * sizeof(cl_float4)); |
---|
| 100 | #endif |
---|
| 101 | |
---|
| 102 | if(vel == NULL) |
---|
| 103 | { |
---|
| 104 | std::cout << "Failed to allocate host memory. (vel)" << std::endl; |
---|
| 105 | return 1; |
---|
| 106 | } |
---|
| 107 | |
---|
| 108 | // Copy the auxiliary vectors into the pos and vel ones: |
---|
| 109 | memcpy(pos, initPos, 4 * numBodies * sizeof(cl_float)); |
---|
| 110 | memcpy(vel, initVel, 4 * numBodies * sizeof(cl_float)); |
---|
| 111 | } |
---|
| 112 | |
---|
| 113 | //-------------------------------------------- |
---|
| 114 | // Variables used on CPU Running only: |
---|
| 115 | if(deviceType.compare("cpu") == 0){ |
---|
| 116 | |
---|
| 117 | refPos = (cl_float*)malloc(numBodies * sizeof(cl_float4)); |
---|
| 118 | if(refPos == NULL) |
---|
| 119 | { |
---|
| 120 | std::cout << "Failed to allocate host memory. (refPos)" << std::endl; |
---|
| 121 | return 1; |
---|
| 122 | } |
---|
| 123 | |
---|
| 124 | refVel = (cl_float*)malloc(numBodies * sizeof(cl_float4)); |
---|
| 125 | if(refVel == NULL) |
---|
| 126 | { |
---|
| 127 | std::cout << "Failed to allocate host memory. (refVel)" << std::endl; |
---|
| 128 | return 1; |
---|
| 129 | } |
---|
| 130 | |
---|
| 131 | // Copy the auxiliary vectors into the refPos and refVel ones: |
---|
| 132 | memcpy(refPos, initPos, 4 * numBodies * sizeof(cl_float)); |
---|
| 133 | memcpy(refVel, initVel, 4 * numBodies * sizeof(cl_float)); |
---|
| 134 | } |
---|
| 135 | |
---|
| 136 | return 0; |
---|
| 137 | } |
---|
| 138 | |
---|
| 139 | int NBody::setupCL() |
---|
| 140 | { |
---|
| 141 | cl_int status = CL_SUCCESS; |
---|
| 142 | |
---|
| 143 | cl_device_type dType; |
---|
| 144 | |
---|
| 145 | if(deviceType.compare("cpu") == 0) |
---|
| 146 | { |
---|
| 147 | dType = CL_DEVICE_TYPE_CPU; |
---|
| 148 | } |
---|
| 149 | else //deviceType = "gpu" |
---|
| 150 | { |
---|
| 151 | dType = CL_DEVICE_TYPE_GPU; |
---|
| 152 | } |
---|
| 153 | |
---|
| 154 | /* Create context from given device type */ |
---|
| 155 | context = clCreateContextFromType( |
---|
| 156 | 0, |
---|
| 157 | dType, |
---|
| 158 | NULL, |
---|
| 159 | NULL, |
---|
| 160 | &status); |
---|
| 161 | /* |
---|
| 162 | * if opencl fails to open a context on default device GPU |
---|
| 163 | * then it falls back to CPU |
---|
| 164 | */ |
---|
| 165 | if(status != CL_SUCCESS && dType == CL_DEVICE_TYPE_GPU) |
---|
| 166 | { |
---|
| 167 | std::cout << "Unsupported GPU device; falling back to CPU ..." << std::endl; |
---|
| 168 | context = clCreateContextFromType( |
---|
| 169 | 0, |
---|
| 170 | CL_DEVICE_TYPE_CPU, |
---|
| 171 | NULL, |
---|
| 172 | NULL, |
---|
| 173 | &status); |
---|
| 174 | } |
---|
| 175 | |
---|
| 176 | if (status != CL_SUCCESS){ |
---|
| 177 | std::cout << "clCreateContextFromType failed." << std::endl; |
---|
| 178 | return 1; |
---|
| 179 | } |
---|
| 180 | |
---|
| 181 | size_t deviceListSize; |
---|
| 182 | |
---|
| 183 | /* First, get the size of device list data */ |
---|
| 184 | status = clGetContextInfo( |
---|
| 185 | context, |
---|
| 186 | CL_CONTEXT_DEVICES, |
---|
| 187 | 0, |
---|
| 188 | NULL, |
---|
| 189 | &deviceListSize); |
---|
| 190 | |
---|
| 191 | if (status != CL_SUCCESS){ |
---|
| 192 | std::cout << "clGetContextInfo failed." << std::endl; |
---|
| 193 | return 1; |
---|
| 194 | } |
---|
| 195 | |
---|
| 196 | /* Now allocate memory for device list based on the size we got earlier */ |
---|
| 197 | devices = (cl_device_id *)malloc(deviceListSize); |
---|
| 198 | |
---|
| 199 | if(devices==NULL) { |
---|
| 200 | std::cout << "Failed to allocate memory (devices)." << std::endl; |
---|
| 201 | return 1; |
---|
| 202 | } |
---|
| 203 | |
---|
| 204 | /* Now, get the device list data */ |
---|
| 205 | status = clGetContextInfo( |
---|
| 206 | context, |
---|
| 207 | CL_CONTEXT_DEVICES, |
---|
| 208 | deviceListSize, |
---|
| 209 | devices, |
---|
| 210 | NULL); |
---|
| 211 | |
---|
| 212 | if (status != CL_SUCCESS){ |
---|
| 213 | std::cout << "clGetContextInfo failed." << std::endl; |
---|
| 214 | return 1; |
---|
| 215 | } |
---|
| 216 | |
---|
| 217 | /* Create command queue */ |
---|
| 218 | |
---|
| 219 | commandQueue = clCreateCommandQueue( |
---|
| 220 | context, |
---|
| 221 | devices[0], |
---|
| 222 | 0, |
---|
| 223 | &status); |
---|
| 224 | |
---|
| 225 | if (status != CL_SUCCESS){ |
---|
| 226 | std::cout << "clCreateCommandQueue failed." << std::endl; |
---|
| 227 | return 1; |
---|
| 228 | } |
---|
| 229 | |
---|
| 230 | /* Get Device specific Information */ |
---|
| 231 | status = clGetDeviceInfo( |
---|
| 232 | devices[0], |
---|
| 233 | CL_DEVICE_MAX_WORK_GROUP_SIZE, |
---|
| 234 | sizeof(size_t), |
---|
| 235 | (void*)&maxWorkGroupSize, |
---|
| 236 | NULL); |
---|
| 237 | |
---|
| 238 | if (status != CL_SUCCESS){ |
---|
| 239 | std::cout << "clGetDeviceInfo CL_DEVICE_MAX_WORK_GROUP_SIZE failed." << std::endl; |
---|
| 240 | return 1; |
---|
| 241 | } |
---|
| 242 | //---------------------------------- |
---|
| 243 | // Device infos: |
---|
| 244 | cl_char vendor_name[1024] = {0}; |
---|
| 245 | cl_char device_name[1024] = {0}; |
---|
| 246 | |
---|
| 247 | status = clGetDeviceInfo( |
---|
| 248 | devices[0], |
---|
| 249 | CL_DEVICE_VENDOR, |
---|
| 250 | sizeof(vendor_name), |
---|
| 251 | vendor_name, |
---|
| 252 | NULL); |
---|
| 253 | |
---|
| 254 | if (status != CL_SUCCESS){ |
---|
| 255 | std::cout << "clGetDeviceInfo CL_DEVICE_VENDOR failed." << std::endl; |
---|
| 256 | return 1; |
---|
| 257 | } |
---|
| 258 | |
---|
| 259 | status|= clGetDeviceInfo( |
---|
| 260 | devices[0], |
---|
| 261 | CL_DEVICE_NAME, |
---|
| 262 | sizeof(device_name), |
---|
| 263 | device_name, |
---|
| 264 | NULL); |
---|
| 265 | |
---|
| 266 | if (status != CL_SUCCESS){ |
---|
| 267 | std::cout << "clGetDeviceInfo CL_DEVICE_NAME failed." << std::endl; |
---|
| 268 | return 1; |
---|
| 269 | } |
---|
| 270 | |
---|
| 271 | std::cout << "Connecting to " << vendor_name << ", " << device_name << " ... " << std::endl; |
---|
| 272 | |
---|
| 273 | status = clGetDeviceInfo( |
---|
| 274 | devices[0], |
---|
| 275 | CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, |
---|
| 276 | sizeof(cl_uint), |
---|
| 277 | (void*)&maxDimensions, |
---|
| 278 | NULL); |
---|
| 279 | |
---|
| 280 | if (status != CL_SUCCESS){ |
---|
| 281 | std::cout << "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed." << std::endl; |
---|
| 282 | return 1; |
---|
| 283 | } |
---|
| 284 | |
---|
| 285 | maxWorkItemSizes = (size_t*)malloc(maxDimensions * sizeof(size_t)); |
---|
| 286 | |
---|
| 287 | status = clGetDeviceInfo( |
---|
| 288 | devices[0], |
---|
| 289 | CL_DEVICE_MAX_WORK_ITEM_SIZES, |
---|
| 290 | sizeof(size_t) * maxDimensions, |
---|
| 291 | (void*)maxWorkItemSizes, |
---|
| 292 | NULL); |
---|
| 293 | |
---|
| 294 | if (status != CL_SUCCESS){ |
---|
| 295 | std::cout << "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_SIZES failed." << std::endl; |
---|
| 296 | return 1; |
---|
| 297 | } |
---|
| 298 | |
---|
| 299 | status = clGetDeviceInfo( |
---|
| 300 | devices[0], |
---|
| 301 | CL_DEVICE_LOCAL_MEM_SIZE, |
---|
| 302 | sizeof(cl_ulong), |
---|
| 303 | (void *)&totalLocalMemory, |
---|
| 304 | NULL); |
---|
| 305 | |
---|
| 306 | if (status != CL_SUCCESS){ |
---|
| 307 | std::cout << "clGetDeviceInfo CL_DEVICE_LOCAL_MEM_SIZE failed." << std::endl; |
---|
| 308 | return 1; |
---|
| 309 | } |
---|
| 310 | |
---|
| 311 | /* |
---|
| 312 | * Create and initialize memory objects |
---|
| 313 | */ |
---|
| 314 | |
---|
| 315 | /* Create memory objects for position */ |
---|
| 316 | if(deviceType.compare("gpu") == 0){ |
---|
| 317 | updatedPos = clCreateBuffer( |
---|
| 318 | context, |
---|
| 319 | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, |
---|
| 320 | numBodies * sizeof(cl_float4), |
---|
| 321 | pos, |
---|
| 322 | &status); |
---|
| 323 | }else |
---|
| 324 | if(deviceType.compare("cpu") == 0){ |
---|
| 325 | updatedPos = clCreateBuffer( |
---|
| 326 | context, |
---|
| 327 | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, |
---|
| 328 | numBodies * sizeof(cl_float4), |
---|
| 329 | refPos, |
---|
| 330 | &status); |
---|
| 331 | } |
---|
| 332 | |
---|
| 333 | if (status != CL_SUCCESS){ |
---|
| 334 | std::cout << "clCreateBuffer failed. (updatePos)" << std::endl; |
---|
| 335 | return 1; |
---|
| 336 | } |
---|
| 337 | |
---|
| 338 | /* Create memory objects for velocity */ |
---|
| 339 | if(deviceType.compare("gpu") == 0){ |
---|
| 340 | updatedVel = clCreateBuffer( |
---|
| 341 | context, |
---|
| 342 | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, |
---|
| 343 | numBodies * sizeof(cl_float4), |
---|
| 344 | vel, |
---|
| 345 | &status); |
---|
| 346 | }else |
---|
| 347 | if(deviceType.compare("cpu") == 0){ |
---|
| 348 | updatedVel = clCreateBuffer( |
---|
| 349 | context, |
---|
| 350 | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, |
---|
| 351 | numBodies * sizeof(cl_float4), |
---|
| 352 | refVel, |
---|
| 353 | &status); |
---|
| 354 | } |
---|
| 355 | |
---|
| 356 | if (status != CL_SUCCESS){ |
---|
| 357 | std::cout << "clCreateBuffer failed. (updatedVel)" << std::endl; |
---|
| 358 | return 1; |
---|
| 359 | } |
---|
| 360 | |
---|
| 361 | /* create a CL program using the kernel source */ |
---|
| 362 | char *program_source = load_program_source(filename); |
---|
| 363 | size_t sourceSize[] = { strlen(program_source) }; |
---|
| 364 | |
---|
| 365 | program = clCreateProgramWithSource( |
---|
| 366 | context, |
---|
| 367 | 1, |
---|
| 368 | (const char**)&program_source, |
---|
| 369 | sourceSize, |
---|
| 370 | &status); |
---|
| 371 | |
---|
| 372 | if (status != CL_SUCCESS){ |
---|
| 373 | std::cout << "clCreateProgramWithSource failed." << std::endl; |
---|
| 374 | return 1; |
---|
| 375 | } |
---|
| 376 | |
---|
| 377 | /* create a cl program executable for all the devices specified */ |
---|
| 378 | status = clBuildProgram( |
---|
| 379 | program, |
---|
| 380 | 1, |
---|
| 381 | &devices[0], |
---|
| 382 | NULL, |
---|
| 383 | NULL, |
---|
| 384 | NULL); |
---|
| 385 | |
---|
| 386 | if(status != CL_SUCCESS) |
---|
| 387 | { |
---|
| 388 | if(status == CL_BUILD_PROGRAM_FAILURE) |
---|
| 389 | { |
---|
| 390 | cl_int logStatus; |
---|
| 391 | char * buildLog = NULL; |
---|
| 392 | size_t buildLogSize = 0; |
---|
| 393 | logStatus = clGetProgramBuildInfo (program, |
---|
| 394 | devices[0], |
---|
| 395 | CL_PROGRAM_BUILD_LOG, |
---|
| 396 | buildLogSize, |
---|
| 397 | buildLog, |
---|
| 398 | &buildLogSize); |
---|
| 399 | if (logStatus != CL_SUCCESS){ |
---|
| 400 | std::cout << "clGetProgramBuildInfo failed." << std::endl; |
---|
| 401 | return 1; |
---|
| 402 | } |
---|
| 403 | |
---|
| 404 | buildLog = (char*)malloc(buildLogSize); |
---|
| 405 | if(buildLog == NULL) |
---|
| 406 | { |
---|
| 407 | std::cout << "Failed to allocate host memory. (buildLog)" << std::endl; |
---|
| 408 | return 1; |
---|
| 409 | } |
---|
| 410 | memset(buildLog, 0, buildLogSize); |
---|
| 411 | |
---|
| 412 | logStatus = clGetProgramBuildInfo (program, |
---|
| 413 | devices[0], |
---|
| 414 | CL_PROGRAM_BUILD_LOG, |
---|
| 415 | buildLogSize, |
---|
| 416 | buildLog, |
---|
| 417 | NULL); |
---|
| 418 | if (logStatus != CL_SUCCESS){ |
---|
| 419 | std::cout << "clGetProgramBuildInfo failed." << std::endl; |
---|
| 420 | free(buildLog); |
---|
| 421 | return 1; |
---|
| 422 | } |
---|
| 423 | |
---|
| 424 | |
---|
| 425 | std::cout << " \n\t\t\tBUILD LOG\n"; |
---|
| 426 | std::cout << " ************************************************\n"; |
---|
| 427 | std::cout << buildLog << std::endl; |
---|
| 428 | std::cout << " ************************************************\n"; |
---|
| 429 | free(buildLog); |
---|
| 430 | } |
---|
| 431 | |
---|
| 432 | if(status != CL_SUCCESS) |
---|
| 433 | { |
---|
| 434 | std::cout << "clBuildProgram failed." << std::endl; |
---|
| 435 | return 1; |
---|
| 436 | } |
---|
| 437 | } |
---|
| 438 | |
---|
| 439 | /* get a kernel object handle for a kernel with the given name */ |
---|
| 440 | kernel = clCreateKernel( |
---|
| 441 | program, |
---|
| 442 | "nbody_sim", |
---|
| 443 | &status); |
---|
| 444 | |
---|
| 445 | if(status != CL_SUCCESS) |
---|
| 446 | { |
---|
| 447 | std::cout << "clCreateKernel failed." << std::endl; |
---|
| 448 | return 1; |
---|
| 449 | } |
---|
| 450 | |
---|
| 451 | return 0; |
---|
| 452 | } |
---|
| 453 | |
---|
| 454 | char* NBody::load_program_source(const char *filename) |
---|
| 455 | { |
---|
| 456 | FILE *fh; |
---|
| 457 | char *source; |
---|
| 458 | int size; |
---|
| 459 | |
---|
| 460 | fh = fopen(filename, "r"); |
---|
| 461 | if (fh == NULL){ |
---|
| 462 | std::cout << "Reading the source cod for kernel failed." << std::endl; |
---|
| 463 | exit(1); |
---|
| 464 | } |
---|
| 465 | |
---|
| 466 | fseek (fh, 0, SEEK_END); |
---|
| 467 | size = ftell (fh); |
---|
| 468 | rewind(fh); |
---|
| 469 | |
---|
| 470 | source = (char *) malloc(size+1); |
---|
| 471 | |
---|
| 472 | int result = fread(source, 1, size, fh); |
---|
| 473 | /*if (result != size){ |
---|
| 474 | std::cout << "Reading the source cod for kernel failed." << std::endl; |
---|
| 475 | exit(1); |
---|
| 476 | }*/ |
---|
| 477 | source[result] = '\0'; |
---|
| 478 | |
---|
| 479 | fclose(fh); |
---|
| 480 | |
---|
| 481 | return source; |
---|
| 482 | } |
---|
| 483 | |
---|
| 484 | int NBody::setupCLKernels() |
---|
| 485 | { |
---|
| 486 | cl_int status; |
---|
| 487 | |
---|
| 488 | /*** Set appropriate arguments to the kernel ***/ |
---|
| 489 | |
---|
| 490 | /* Particle positions */ |
---|
| 491 | status = clSetKernelArg( |
---|
| 492 | kernel, |
---|
| 493 | 0, |
---|
| 494 | sizeof(cl_mem), |
---|
| 495 | (void *)&updatedPos); |
---|
| 496 | |
---|
| 497 | if(status != CL_SUCCESS) |
---|
| 498 | { |
---|
| 499 | std::cout << "clSetKernelArg failed. (updatedPos)" << std::endl; |
---|
| 500 | return 1; |
---|
| 501 | } |
---|
| 502 | |
---|
| 503 | /* Particle velocity */ |
---|
| 504 | status = clSetKernelArg( |
---|
| 505 | kernel, |
---|
| 506 | 1, |
---|
| 507 | sizeof(cl_mem), |
---|
| 508 | (void *)&updatedVel); |
---|
| 509 | |
---|
| 510 | if(status != CL_SUCCESS) |
---|
| 511 | { |
---|
| 512 | std::cout << "clSetKernelArg failed. (updatedVel)" << std::endl; |
---|
| 513 | return 1; |
---|
| 514 | } |
---|
| 515 | |
---|
| 516 | /* numBodies */ |
---|
| 517 | status = clSetKernelArg( |
---|
| 518 | kernel, |
---|
| 519 | 2, |
---|
| 520 | sizeof(cl_int), |
---|
| 521 | (void *)&numBodies); |
---|
| 522 | |
---|
| 523 | if(status != CL_SUCCESS) |
---|
| 524 | { |
---|
| 525 | std::cout << "clSetKernelArg failed. (numBodies)" << std::endl; |
---|
| 526 | return 1; |
---|
| 527 | } |
---|
| 528 | |
---|
| 529 | /* time step */ |
---|
| 530 | status = clSetKernelArg( |
---|
| 531 | kernel, |
---|
| 532 | 3, |
---|
| 533 | sizeof(cl_float), |
---|
| 534 | (void *)&delT); |
---|
| 535 | if(status != CL_SUCCESS) |
---|
| 536 | { |
---|
| 537 | std::cout << "clSetKernelArg failed. (delT)" << std::endl; |
---|
| 538 | return 1; |
---|
| 539 | } |
---|
| 540 | |
---|
| 541 | /* upward Pseudoprobability */ |
---|
| 542 | status = clSetKernelArg( |
---|
| 543 | kernel, |
---|
| 544 | 4, |
---|
| 545 | sizeof(cl_float), |
---|
| 546 | (void *)&espSqr); |
---|
| 547 | if(status != CL_SUCCESS) |
---|
| 548 | { |
---|
| 549 | std::cout << "clSetKernelArg failed. (espSqr)" << std::endl; |
---|
| 550 | return 1; |
---|
| 551 | } |
---|
| 552 | |
---|
| 553 | /* local memory */ |
---|
| 554 | status = clSetKernelArg( |
---|
| 555 | kernel, |
---|
| 556 | 5, |
---|
| 557 | GROUP_SIZE * 4 * sizeof(float), |
---|
| 558 | NULL); |
---|
| 559 | if(status != CL_SUCCESS) |
---|
| 560 | { |
---|
| 561 | std::cout << "clSetKernelArg failed. (localPos)" << std::endl; |
---|
| 562 | return 1; |
---|
| 563 | } |
---|
| 564 | status = clGetKernelWorkGroupInfo(kernel, |
---|
| 565 | devices[0], |
---|
| 566 | CL_KERNEL_LOCAL_MEM_SIZE, |
---|
| 567 | sizeof(cl_ulong), |
---|
| 568 | &usedLocalMemory, |
---|
| 569 | NULL); |
---|
| 570 | if(status != CL_SUCCESS) |
---|
| 571 | { |
---|
| 572 | std::cout << "clGetKernelWorkGroupInfo CL_KERNEL_LOCAL_MEM_SIZE failed." << std::endl; |
---|
| 573 | return 1; |
---|
| 574 | } |
---|
| 575 | |
---|
| 576 | if(usedLocalMemory > totalLocalMemory) |
---|
| 577 | { |
---|
| 578 | std::cout << "Unsupported: Insufficient local memory on device." << std::endl; |
---|
| 579 | return 1; |
---|
| 580 | } |
---|
| 581 | |
---|
| 582 | return 0; |
---|
| 583 | } |
---|
| 584 | |
---|
| 585 | int NBody::runCLKernels() |
---|
| 586 | { |
---|
| 587 | cl_int status; |
---|
| 588 | cl_event events[1]; |
---|
| 589 | |
---|
| 590 | /* |
---|
| 591 | * Enqueue a kernel run call. |
---|
| 592 | */ |
---|
| 593 | size_t globalThreads[] = {numBodies}; |
---|
| 594 | size_t localThreads[] = {GROUP_SIZE}; |
---|
| 595 | |
---|
| 596 | if(localThreads[0] > maxWorkItemSizes[0] || localThreads[0] > maxWorkGroupSize) |
---|
| 597 | { |
---|
| 598 | std::cout<<"Unsupported: Device does not support requested number of work items."; |
---|
| 599 | |
---|
| 600 | return 1; |
---|
| 601 | } |
---|
| 602 | |
---|
| 603 | status = clEnqueueNDRangeKernel( |
---|
| 604 | commandQueue, |
---|
| 605 | kernel, |
---|
| 606 | 1, |
---|
| 607 | NULL, |
---|
| 608 | globalThreads, |
---|
| 609 | localThreads, |
---|
| 610 | 0, |
---|
| 611 | NULL, |
---|
| 612 | NULL); |
---|
| 613 | if(status != CL_SUCCESS) |
---|
| 614 | { |
---|
| 615 | std::cout << "clEnqueueNDRangeKernel failed." << std::endl; |
---|
| 616 | return 1; |
---|
| 617 | } |
---|
| 618 | |
---|
| 619 | status = clFinish(commandQueue); |
---|
| 620 | if(status != CL_SUCCESS) |
---|
| 621 | { |
---|
| 622 | std::cout << "clFinish failed." << std::endl; |
---|
| 623 | return 1; |
---|
| 624 | } |
---|
| 625 | |
---|
| 626 | /* Enqueue readBuffer*/ |
---|
| 627 | status = clEnqueueReadBuffer( |
---|
| 628 | commandQueue, |
---|
| 629 | updatedPos, |
---|
| 630 | CL_TRUE, |
---|
| 631 | 0, |
---|
| 632 | numBodies* sizeof(cl_float4), |
---|
| 633 | pos, |
---|
| 634 | 0, |
---|
| 635 | NULL, |
---|
| 636 | &events[0]); |
---|
| 637 | if(status != CL_SUCCESS) |
---|
| 638 | { |
---|
| 639 | std::cout << "clEnqueueReadBuffer failed." << std::endl; |
---|
| 640 | return 1; |
---|
| 641 | } |
---|
| 642 | |
---|
| 643 | /* Wait for the read buffer to finish execution */ |
---|
| 644 | status = clWaitForEvents(1, &events[0]); |
---|
| 645 | if(status != CL_SUCCESS) |
---|
| 646 | { |
---|
| 647 | std::cout << "clWaitForEvents failed." << std::endl; |
---|
| 648 | return 1; |
---|
| 649 | } |
---|
| 650 | |
---|
| 651 | clReleaseEvent(events[0]); |
---|
| 652 | |
---|
| 653 | return 0; |
---|
| 654 | } |
---|
| 655 | |
---|
| 656 | /* |
---|
| 657 | * n-body simulation on cpu |
---|
| 658 | */ |
---|
| 659 | void NBody::nBodyCPUReference() |
---|
| 660 | { |
---|
| 661 | //Iterate for all samples |
---|
| 662 | for(int i = 0; i < numBodies; ++i) |
---|
| 663 | { |
---|
| 664 | int myIndex = 4 * i; |
---|
| 665 | float acc[3] = {0.0f, 0.0f, 0.0f}; |
---|
| 666 | for(int j = 0; j < numBodies; ++j) |
---|
| 667 | { |
---|
| 668 | float r[3]; |
---|
| 669 | int index = 4 * j; |
---|
| 670 | |
---|
| 671 | float distSqr = 0.0f; |
---|
| 672 | for(int k = 0; k < 3; ++k) |
---|
| 673 | { |
---|
| 674 | r[k] = refPos[index + k] - refPos[myIndex + k]; |
---|
| 675 | |
---|
| 676 | distSqr += r[k] * r[k]; |
---|
| 677 | } |
---|
| 678 | |
---|
| 679 | float invDist = 1.0f / sqrt(distSqr + espSqr); |
---|
| 680 | float invDistCube = invDist * invDist * invDist; |
---|
| 681 | float s = refPos[index + 3] * invDistCube; |
---|
| 682 | |
---|
| 683 | for(int k = 0; k < 3; ++k) |
---|
| 684 | { |
---|
| 685 | acc[k] += s * r[k]; |
---|
| 686 | } |
---|
| 687 | } |
---|
| 688 | |
---|
| 689 | for(int k = 0; k < 3; ++k) |
---|
| 690 | { |
---|
| 691 | refPos[myIndex + k] += refVel[myIndex + k] * delT + 0.5f * acc[k] * delT * delT; |
---|
| 692 | refVel[myIndex + k] += acc[k] * delT; |
---|
| 693 | } |
---|
| 694 | } |
---|
| 695 | } |
---|
| 696 | |
---|
| 697 | int NBody::setup() |
---|
| 698 | { |
---|
| 699 | if(setupNBody()!= 0) // SDK_SUCCESS == 0; |
---|
| 700 | return 1; |
---|
| 701 | /* |
---|
| 702 | int timer = sampleCommon->createTimer(); |
---|
| 703 | sampleCommon->resetTimer(timer); |
---|
| 704 | sampleCommon->startTimer(timer); |
---|
| 705 | */ |
---|
| 706 | if(setupCL()!= 0) |
---|
| 707 | return 1; |
---|
| 708 | |
---|
| 709 | /* sampleCommon->stopTimer(timer); |
---|
| 710 | // Compute setup time |
---|
| 711 | setupTime = (double)(sampleCommon->readTimer(timer)); |
---|
| 712 | */ |
---|
| 713 | display= true; |
---|
| 714 | |
---|
| 715 | return 0; |
---|
| 716 | } |
---|
| 717 | |
---|
| 718 | /** |
---|
| 719 | * @brief Initialize GL |
---|
| 720 | */ |
---|
| 721 | void GLInit() |
---|
| 722 | { |
---|
| 723 | glClearColor(0.0 ,0.0, 0.0, 0.0); |
---|
| 724 | glClear(GL_COLOR_BUFFER_BIT); |
---|
| 725 | glClear(GL_DEPTH_BUFFER_BIT); |
---|
| 726 | glMatrixMode(GL_PROJECTION); |
---|
| 727 | glLoadIdentity(); |
---|
| 728 | } |
---|
| 729 | |
---|
| 730 | /** |
---|
| 731 | * @brief Glut Idle function |
---|
| 732 | */ |
---|
| 733 | void idle() |
---|
| 734 | { |
---|
| 735 | if (curr_step < n_steps){ |
---|
| 736 | curr_step++; |
---|
| 737 | }else{ |
---|
| 738 | time ( &rawtime ); |
---|
| 739 | timeinfo = localtime ( &rawtime ); |
---|
| 740 | std::cout << "End time and date: " << asctime (timeinfo) << std::endl; |
---|
| 741 | exit(1); |
---|
| 742 | } |
---|
| 743 | |
---|
| 744 | glutPostRedisplay(); |
---|
| 745 | } |
---|
| 746 | |
---|
| 747 | /** |
---|
| 748 | * @brief Glut reshape func |
---|
| 749 | * |
---|
| 750 | * @param w numParticles of OpenGL window |
---|
| 751 | * @param h height of OpenGL window |
---|
| 752 | */ |
---|
| 753 | void reShape(int w,int h) |
---|
| 754 | { |
---|
| 755 | glViewport(0,0,w,h); |
---|
| 756 | |
---|
| 757 | glViewport(0,0,w,h); |
---|
| 758 | glMatrixMode(GL_MODELVIEW); |
---|
| 759 | glLoadIdentity(); |
---|
| 760 | gluPerspective(45.0f,w/h,1.0f,1000.0f); |
---|
| 761 | gluLookAt (0.0, 0.0, -2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0); |
---|
| 762 | } |
---|
| 763 | |
---|
| 764 | /** |
---|
| 765 | * @brief OpenGL display function |
---|
| 766 | */ |
---|
| 767 | void displayfunc() |
---|
| 768 | { |
---|
| 769 | glClearColor(0.0 ,0.0, 0.0, 0.0); |
---|
| 770 | glClear(GL_COLOR_BUFFER_BIT); |
---|
| 771 | glClear(GL_DEPTH_BUFFER_BIT); |
---|
| 772 | |
---|
| 773 | glPointSize(1.0); |
---|
| 774 | glBlendFunc(GL_SRC_ALPHA, GL_ONE); |
---|
| 775 | glEnable(GL_BLEND); |
---|
| 776 | glDepthMask(GL_FALSE); |
---|
| 777 | |
---|
| 778 | glColor3f(1.0f,0.6f,0.0f); |
---|
| 779 | |
---|
| 780 | if (curr_step < n_steps){ |
---|
| 781 | if (deviceType.compare("gpu") == 0){ |
---|
| 782 | //Calling kernel for calculatig subsequent positions |
---|
| 783 | ((NBody*)me)->runCLKernels(); |
---|
| 784 | }else |
---|
| 785 | if (deviceType.compare("cpu") == 0){ |
---|
| 786 | ((NBody*)me)->nBodyCPUReference(); |
---|
| 787 | } |
---|
| 788 | } |
---|
| 789 | |
---|
| 790 | glBegin(GL_POINTS); |
---|
| 791 | for(int i=0; i < numBodies; ++i) |
---|
| 792 | { |
---|
| 793 | if (deviceType.compare("gpu") == 0){ |
---|
| 794 | //divided by 300 just for scaling |
---|
| 795 | glVertex3d(pos[i*4+ 0]/300,pos[i*4+1]/300,pos[i*4+2]/300); |
---|
| 796 | }else |
---|
| 797 | if (deviceType.compare("cpu") == 0){ |
---|
| 798 | //divided by 300 just for scaling |
---|
| 799 | glVertex3d(((NBody*)me)->refPos[i*4+ 0]/300,((NBody*)me)->refPos[i*4+1]/300,((NBody*)me)->refPos[i*4+2]/300); |
---|
| 800 | } |
---|
| 801 | } |
---|
| 802 | glEnd(); |
---|
| 803 | |
---|
| 804 | glFlush(); |
---|
| 805 | glutSwapBuffers(); |
---|
| 806 | } |
---|
| 807 | |
---|
| 808 | /* keyboard function */ |
---|
| 809 | void keyboardFunc(unsigned char key, int mouseX, int mouseY) |
---|
| 810 | { |
---|
| 811 | switch(key) |
---|
| 812 | { |
---|
| 813 | /* If the user hits escape or Q, then exit */ |
---|
| 814 | /* ESCAPE_KEY = 27 */ |
---|
| 815 | case 27: |
---|
| 816 | case 'q': |
---|
| 817 | case 'Q': |
---|
| 818 | { |
---|
| 819 | if(((NBody*)me)->cleanup() != 0) // SDK_SUCCESS == 0; |
---|
| 820 | exit(1); |
---|
| 821 | else |
---|
| 822 | exit(0); |
---|
| 823 | } |
---|
| 824 | default: |
---|
| 825 | break; |
---|
| 826 | } |
---|
| 827 | } |
---|
| 828 | |
---|
| 829 | int NBody::run() |
---|
| 830 | { |
---|
| 831 | /* Arguments are set and execution call is enqueued on command buffer */ |
---|
| 832 | if(setupCLKernels()!= 0) // SDK_SUCCESS == 0; |
---|
| 833 | { |
---|
| 834 | return 1; |
---|
| 835 | } |
---|
| 836 | |
---|
| 837 | /* |
---|
| 838 | if(!quiet) |
---|
| 839 | { |
---|
| 840 | // Printeaza pozitiile corpurilor de la inceput: |
---|
| 841 | sampleCommon->printArray<cl_float>("Output", pos, numBodies, 1); |
---|
| 842 | } |
---|
| 843 | */ |
---|
| 844 | return 0; |
---|
| 845 | } |
---|
| 846 | |
---|
| 847 | void NBody::printStats() |
---|
| 848 | { |
---|
| 849 | /* std::string strArray[3] = {"Particles", "Iterations", "Time(sec)"}; |
---|
| 850 | std::string stats[3]; |
---|
| 851 | totalTime = setupTime + kernelTime; |
---|
| 852 | |
---|
| 853 | stats[0] = sampleCommon->toString(numParticles, std::dec); |
---|
| 854 | stats[1] = sampleCommon->toString(ITER, std::dec); |
---|
| 855 | stats[2] = sampleCommon->toString(totalTime, std::dec); |
---|
| 856 | |
---|
| 857 | this->SDKSample::printStats(strArray, stats, 3); |
---|
| 858 | */ |
---|
| 859 | } |
---|
| 860 | |
---|
| 861 | int NBody::cleanup() |
---|
| 862 | { |
---|
| 863 | /* Releases OpenCL resources (Context, Memory etc.) */ |
---|
| 864 | cl_int status; |
---|
| 865 | |
---|
| 866 | status = clReleaseKernel(kernel); |
---|
| 867 | if (status != CL_SUCCESS){ |
---|
| 868 | std::cout << "clReleaseKernel failed." << std::endl; |
---|
| 869 | return 1; |
---|
| 870 | } |
---|
| 871 | |
---|
| 872 | status = clReleaseProgram(program); |
---|
| 873 | if (status != CL_SUCCESS){ |
---|
| 874 | std::cout << "clReleaseProgram failed." << std::endl; |
---|
| 875 | return 1; |
---|
| 876 | } |
---|
| 877 | |
---|
| 878 | status = clReleaseMemObject(updatedPos); |
---|
| 879 | if (status != CL_SUCCESS){ |
---|
| 880 | std::cout << "clReleaseMemObject failed." << std::endl; |
---|
| 881 | return 1; |
---|
| 882 | } |
---|
| 883 | |
---|
| 884 | status = clReleaseMemObject(updatedVel); |
---|
| 885 | if (status != CL_SUCCESS){ |
---|
| 886 | std::cout << "clReleaseMemObject failed." << std::endl; |
---|
| 887 | return 1; |
---|
| 888 | } |
---|
| 889 | |
---|
| 890 | status = clReleaseCommandQueue(commandQueue); |
---|
| 891 | if (status != CL_SUCCESS){ |
---|
| 892 | std::cout << "clReleaseMemObject failed." << std::endl; |
---|
| 893 | return 1; |
---|
| 894 | } |
---|
| 895 | |
---|
| 896 | status = clReleaseContext(context); |
---|
| 897 | if (status != CL_SUCCESS){ |
---|
| 898 | std::cout << "clReleaseMemObject failed." << std::endl; |
---|
| 899 | return 1; |
---|
| 900 | } |
---|
| 901 | |
---|
| 902 | return 0; |
---|
| 903 | } |
---|
| 904 | |
---|
| 905 | NBody::~NBody() |
---|
| 906 | { |
---|
| 907 | /* release program resources */ |
---|
| 908 | if(initPos) |
---|
| 909 | { |
---|
| 910 | free(initPos); |
---|
| 911 | initPos = NULL; |
---|
| 912 | } |
---|
| 913 | |
---|
| 914 | if(initVel) |
---|
| 915 | { |
---|
| 916 | free(initVel); |
---|
| 917 | initVel = NULL; |
---|
| 918 | } |
---|
| 919 | |
---|
| 920 | if(pos) |
---|
| 921 | { |
---|
| 922 | #if defined (_WIN32) |
---|
| 923 | _aligned_free(pos); |
---|
| 924 | #else |
---|
| 925 | free(pos); |
---|
| 926 | #endif |
---|
| 927 | pos = NULL; |
---|
| 928 | } |
---|
| 929 | if(vel) |
---|
| 930 | { |
---|
| 931 | #if defined (_WIN32) |
---|
| 932 | _aligned_free(vel); |
---|
| 933 | #else |
---|
| 934 | free(vel); |
---|
| 935 | #endif |
---|
| 936 | vel = NULL; |
---|
| 937 | } |
---|
| 938 | |
---|
| 939 | if(devices) |
---|
| 940 | { |
---|
| 941 | free(devices); |
---|
| 942 | devices = NULL; |
---|
| 943 | } |
---|
| 944 | |
---|
| 945 | if(refPos) |
---|
| 946 | { |
---|
| 947 | free(refPos); |
---|
| 948 | refPos = NULL; |
---|
| 949 | } |
---|
| 950 | |
---|
| 951 | if(refVel) |
---|
| 952 | { |
---|
| 953 | free(refVel); |
---|
| 954 | refVel = NULL; |
---|
| 955 | } |
---|
| 956 | |
---|
| 957 | if(maxWorkItemSizes) |
---|
| 958 | { |
---|
| 959 | free(maxWorkItemSizes); |
---|
| 960 | maxWorkItemSizes = NULL; |
---|
| 961 | } |
---|
| 962 | } |
---|
| 963 | |
---|
| 964 | |
---|
| 965 | int main(int argc, char * argv[]) |
---|
| 966 | { |
---|
| 967 | NBody clNBody("OpenCL NBody"); |
---|
| 968 | me = &clNBody; |
---|
| 969 | |
---|
| 970 | if(clNBody.setup() != 0) |
---|
| 971 | return 1; |
---|
| 972 | |
---|
| 973 | if(clNBody.run() != 0) |
---|
| 974 | return 1; |
---|
| 975 | |
---|
| 976 | time ( &rawtime ); |
---|
| 977 | timeinfo = localtime ( &rawtime ); |
---|
| 978 | std::cout << "Start time and date: " << asctime (timeinfo) << std::endl; |
---|
| 979 | |
---|
| 980 | if(display) |
---|
| 981 | { |
---|
| 982 | // Run in graphical window if requested |
---|
| 983 | glutInit(&argc, argv); |
---|
| 984 | glutInitWindowPosition(100,10); |
---|
| 985 | glutInitWindowSize(600,600); |
---|
| 986 | glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE ); |
---|
| 987 | glutCreateWindow("NBody simulation"); |
---|
| 988 | GLInit(); |
---|
| 989 | glutDisplayFunc(displayfunc); |
---|
| 990 | glutReshapeFunc(reShape); |
---|
| 991 | glutIdleFunc(idle); |
---|
| 992 | glutKeyboardFunc(keyboardFunc); |
---|
| 993 | glutMainLoop(); |
---|
| 994 | } |
---|
| 995 | |
---|
| 996 | if(clNBody.cleanup() != 0) |
---|
| 997 | return 1; |
---|
| 998 | |
---|
| 999 | clNBody.printStats(); |
---|
| 1000 | |
---|
| 1001 | return 0; |
---|
| 1002 | } |
---|