1 | #include "NBody.hpp" |
---|
2 | |
---|
3 | int numBodies; // No. of particles; |
---|
4 | cl_float* pos; // Output position; |
---|
5 | void* me; // Pointing to NBody class; |
---|
6 | cl_bool display; // If it is true then OpenGL display is used; |
---|
7 | long curr_step = 0; // Reatins the current step in the simulation; |
---|
8 | long n_steps = 110; // Numbuer of steps of the simulation; |
---|
9 | cl_bool verify = false; // Compares the final position vectors resulted from running |
---|
10 | // the code on CPU and GPU; |
---|
11 | |
---|
12 | |
---|
13 | std::string deviceType("cpu"); // It says on which device we want to do the computations; |
---|
14 | |
---|
15 | time_t rawtime; // Variables used for measuring the duration of the run; |
---|
16 | struct tm * timeinfo; |
---|
17 | |
---|
18 | |
---|
19 | float NBody::random(float randMax, float randMin) |
---|
20 | { |
---|
21 | float result; |
---|
22 | result =(float)rand()/(float)RAND_MAX; |
---|
23 | |
---|
24 | return ((1.0f - result) * randMin + result *randMax); |
---|
25 | } |
---|
26 | |
---|
27 | int NBody::setupNBody() |
---|
28 | { |
---|
29 | //-------------------------------------------- |
---|
30 | // make sure numParticles is multiple of group size |
---|
31 | numParticles = (numParticles < GROUP_SIZE) ? GROUP_SIZE : numParticles; |
---|
32 | numParticles = (numParticles / GROUP_SIZE) * GROUP_SIZE; |
---|
33 | |
---|
34 | numBodies = numParticles; |
---|
35 | |
---|
36 | //-------------------------------------------- |
---|
37 | // First we will use initPos and initVel vectors to generate the |
---|
38 | // input data. They will be used only in this function to |
---|
39 | // initialize the other vectors: the pos and vel vectors |
---|
40 | // used in simulation on GPU only, and refPos and refVel used in |
---|
41 | // simulation on CPU only |
---|
42 | initPos = (cl_float*)malloc(numBodies * sizeof(cl_float4)); |
---|
43 | if(initPos == NULL) |
---|
44 | { |
---|
45 | std::cout << "Failed to allocate host memory. (initPos)" << std::endl; |
---|
46 | return 1; |
---|
47 | } |
---|
48 | |
---|
49 | initVel = (cl_float*)malloc(numBodies * sizeof(cl_float4)); |
---|
50 | if(initVel == NULL) |
---|
51 | { |
---|
52 | std::cout << "Failed to allocate host memory. (initVel)" << std::endl; |
---|
53 | return 1; |
---|
54 | } |
---|
55 | |
---|
56 | // initialization of inputs |
---|
57 | for(int i = 0; i < numBodies; ++i) |
---|
58 | { |
---|
59 | int index = 4 * i; |
---|
60 | |
---|
61 | // First 3 values are position in x,y and z direction |
---|
62 | for(int j = 0; j < 3; ++j) |
---|
63 | { |
---|
64 | initPos[index + j] = random(3, 50); |
---|
65 | } |
---|
66 | |
---|
67 | // Mass value |
---|
68 | initPos[index + 3] = random(1, 1000); |
---|
69 | |
---|
70 | // First 3 values are velocity in x,y and z direction |
---|
71 | for(int j = 0; j < 3; ++j) |
---|
72 | { |
---|
73 | initVel[index + j] = 0.0f; |
---|
74 | } |
---|
75 | |
---|
76 | // unused |
---|
77 | initVel[3] = 0.0f; |
---|
78 | } |
---|
79 | |
---|
80 | //-------------------------------------------- |
---|
81 | // Variables used on GPU Running only: |
---|
82 | if(deviceType.compare("gpu") == 0){ |
---|
83 | |
---|
84 | #if defined (_WIN32) |
---|
85 | pos = (cl_float*)_aligned_malloc(numBodies * sizeof(cl_float4), 16); |
---|
86 | #else |
---|
87 | pos = (cl_float*)memalign(16, numBodies * sizeof(cl_float4)); |
---|
88 | #endif |
---|
89 | |
---|
90 | if(pos == NULL) |
---|
91 | { |
---|
92 | std::cout << "Failed to allocate host memory. (pos)" << std::endl; |
---|
93 | return 1; |
---|
94 | } |
---|
95 | |
---|
96 | #if defined (_WIN32) |
---|
97 | vel = (cl_float*)_aligned_malloc(numBodies * sizeof(cl_float4), 16); |
---|
98 | #else |
---|
99 | vel = (cl_float*)memalign(16, numBodies * sizeof(cl_float4)); |
---|
100 | #endif |
---|
101 | |
---|
102 | if(vel == NULL) |
---|
103 | { |
---|
104 | std::cout << "Failed to allocate host memory. (vel)" << std::endl; |
---|
105 | return 1; |
---|
106 | } |
---|
107 | |
---|
108 | // Copy the auxiliary vectors into the pos and vel ones: |
---|
109 | memcpy(pos, initPos, 4 * numBodies * sizeof(cl_float)); |
---|
110 | memcpy(vel, initVel, 4 * numBodies * sizeof(cl_float)); |
---|
111 | } |
---|
112 | |
---|
113 | //-------------------------------------------- |
---|
114 | // Variables used on CPU Running only: |
---|
115 | if(deviceType.compare("cpu") == 0){ |
---|
116 | |
---|
117 | refPos = (cl_float*)malloc(numBodies * sizeof(cl_float4)); |
---|
118 | if(refPos == NULL) |
---|
119 | { |
---|
120 | std::cout << "Failed to allocate host memory. (refPos)" << std::endl; |
---|
121 | return 1; |
---|
122 | } |
---|
123 | |
---|
124 | refVel = (cl_float*)malloc(numBodies * sizeof(cl_float4)); |
---|
125 | if(refVel == NULL) |
---|
126 | { |
---|
127 | std::cout << "Failed to allocate host memory. (refVel)" << std::endl; |
---|
128 | return 1; |
---|
129 | } |
---|
130 | |
---|
131 | // Copy the auxiliary vectors into the refPos and refVel ones: |
---|
132 | memcpy(refPos, initPos, 4 * numBodies * sizeof(cl_float)); |
---|
133 | memcpy(refVel, initVel, 4 * numBodies * sizeof(cl_float)); |
---|
134 | } |
---|
135 | |
---|
136 | return 0; |
---|
137 | } |
---|
138 | |
---|
139 | int NBody::setupCL() |
---|
140 | { |
---|
141 | cl_int status = CL_SUCCESS; |
---|
142 | |
---|
143 | cl_device_type dType; |
---|
144 | |
---|
145 | if(deviceType.compare("cpu") == 0) |
---|
146 | { |
---|
147 | dType = CL_DEVICE_TYPE_CPU; |
---|
148 | } |
---|
149 | else //deviceType = "gpu" |
---|
150 | { |
---|
151 | dType = CL_DEVICE_TYPE_GPU; |
---|
152 | } |
---|
153 | |
---|
154 | /* Create context from given device type */ |
---|
155 | context = clCreateContextFromType( |
---|
156 | 0, |
---|
157 | dType, |
---|
158 | NULL, |
---|
159 | NULL, |
---|
160 | &status); |
---|
161 | /* |
---|
162 | * if opencl fails to open a context on default device GPU |
---|
163 | * then it falls back to CPU |
---|
164 | */ |
---|
165 | if(status != CL_SUCCESS && dType == CL_DEVICE_TYPE_GPU) |
---|
166 | { |
---|
167 | std::cout << "Unsupported GPU device; falling back to CPU ..." << std::endl; |
---|
168 | context = clCreateContextFromType( |
---|
169 | 0, |
---|
170 | CL_DEVICE_TYPE_CPU, |
---|
171 | NULL, |
---|
172 | NULL, |
---|
173 | &status); |
---|
174 | } |
---|
175 | |
---|
176 | if (status != CL_SUCCESS){ |
---|
177 | std::cout << "clCreateContextFromType failed." << std::endl; |
---|
178 | return 1; |
---|
179 | } |
---|
180 | |
---|
181 | size_t deviceListSize; |
---|
182 | |
---|
183 | /* First, get the size of device list data */ |
---|
184 | status = clGetContextInfo( |
---|
185 | context, |
---|
186 | CL_CONTEXT_DEVICES, |
---|
187 | 0, |
---|
188 | NULL, |
---|
189 | &deviceListSize); |
---|
190 | |
---|
191 | if (status != CL_SUCCESS){ |
---|
192 | std::cout << "clGetContextInfo failed." << std::endl; |
---|
193 | return 1; |
---|
194 | } |
---|
195 | |
---|
196 | /* Now allocate memory for device list based on the size we got earlier */ |
---|
197 | devices = (cl_device_id *)malloc(deviceListSize); |
---|
198 | |
---|
199 | if(devices==NULL) { |
---|
200 | std::cout << "Failed to allocate memory (devices)." << std::endl; |
---|
201 | return 1; |
---|
202 | } |
---|
203 | |
---|
204 | /* Now, get the device list data */ |
---|
205 | status = clGetContextInfo( |
---|
206 | context, |
---|
207 | CL_CONTEXT_DEVICES, |
---|
208 | deviceListSize, |
---|
209 | devices, |
---|
210 | NULL); |
---|
211 | |
---|
212 | if (status != CL_SUCCESS){ |
---|
213 | std::cout << "clGetContextInfo failed." << std::endl; |
---|
214 | return 1; |
---|
215 | } |
---|
216 | |
---|
217 | /* Create command queue */ |
---|
218 | |
---|
219 | commandQueue = clCreateCommandQueue( |
---|
220 | context, |
---|
221 | devices[0], |
---|
222 | 0, |
---|
223 | &status); |
---|
224 | |
---|
225 | if (status != CL_SUCCESS){ |
---|
226 | std::cout << "clCreateCommandQueue failed." << std::endl; |
---|
227 | return 1; |
---|
228 | } |
---|
229 | |
---|
230 | /* Get Device specific Information */ |
---|
231 | status = clGetDeviceInfo( |
---|
232 | devices[0], |
---|
233 | CL_DEVICE_MAX_WORK_GROUP_SIZE, |
---|
234 | sizeof(size_t), |
---|
235 | (void*)&maxWorkGroupSize, |
---|
236 | NULL); |
---|
237 | |
---|
238 | if (status != CL_SUCCESS){ |
---|
239 | std::cout << "clGetDeviceInfo CL_DEVICE_MAX_WORK_GROUP_SIZE failed." << std::endl; |
---|
240 | return 1; |
---|
241 | } |
---|
242 | //---------------------------------- |
---|
243 | // Device infos: |
---|
244 | cl_char vendor_name[1024] = {0}; |
---|
245 | cl_char device_name[1024] = {0}; |
---|
246 | |
---|
247 | status = clGetDeviceInfo( |
---|
248 | devices[0], |
---|
249 | CL_DEVICE_VENDOR, |
---|
250 | sizeof(vendor_name), |
---|
251 | vendor_name, |
---|
252 | NULL); |
---|
253 | |
---|
254 | if (status != CL_SUCCESS){ |
---|
255 | std::cout << "clGetDeviceInfo CL_DEVICE_VENDOR failed." << std::endl; |
---|
256 | return 1; |
---|
257 | } |
---|
258 | |
---|
259 | status|= clGetDeviceInfo( |
---|
260 | devices[0], |
---|
261 | CL_DEVICE_NAME, |
---|
262 | sizeof(device_name), |
---|
263 | device_name, |
---|
264 | NULL); |
---|
265 | |
---|
266 | if (status != CL_SUCCESS){ |
---|
267 | std::cout << "clGetDeviceInfo CL_DEVICE_NAME failed." << std::endl; |
---|
268 | return 1; |
---|
269 | } |
---|
270 | |
---|
271 | std::cout << "Connecting to " << vendor_name << ", " << device_name << " ... " << std::endl; |
---|
272 | |
---|
273 | status = clGetDeviceInfo( |
---|
274 | devices[0], |
---|
275 | CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, |
---|
276 | sizeof(cl_uint), |
---|
277 | (void*)&maxDimensions, |
---|
278 | NULL); |
---|
279 | |
---|
280 | if (status != CL_SUCCESS){ |
---|
281 | std::cout << "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed." << std::endl; |
---|
282 | return 1; |
---|
283 | } |
---|
284 | |
---|
285 | maxWorkItemSizes = (size_t*)malloc(maxDimensions * sizeof(size_t)); |
---|
286 | |
---|
287 | status = clGetDeviceInfo( |
---|
288 | devices[0], |
---|
289 | CL_DEVICE_MAX_WORK_ITEM_SIZES, |
---|
290 | sizeof(size_t) * maxDimensions, |
---|
291 | (void*)maxWorkItemSizes, |
---|
292 | NULL); |
---|
293 | |
---|
294 | if (status != CL_SUCCESS){ |
---|
295 | std::cout << "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_SIZES failed." << std::endl; |
---|
296 | return 1; |
---|
297 | } |
---|
298 | |
---|
299 | status = clGetDeviceInfo( |
---|
300 | devices[0], |
---|
301 | CL_DEVICE_LOCAL_MEM_SIZE, |
---|
302 | sizeof(cl_ulong), |
---|
303 | (void *)&totalLocalMemory, |
---|
304 | NULL); |
---|
305 | |
---|
306 | if (status != CL_SUCCESS){ |
---|
307 | std::cout << "clGetDeviceInfo CL_DEVICE_LOCAL_MEM_SIZE failed." << std::endl; |
---|
308 | return 1; |
---|
309 | } |
---|
310 | |
---|
311 | /* |
---|
312 | * Create and initialize memory objects |
---|
313 | */ |
---|
314 | |
---|
315 | /* Create memory objects for position */ |
---|
316 | if(deviceType.compare("gpu") == 0){ |
---|
317 | updatedPos = clCreateBuffer( |
---|
318 | context, |
---|
319 | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, |
---|
320 | numBodies * sizeof(cl_float4), |
---|
321 | pos, |
---|
322 | &status); |
---|
323 | }else |
---|
324 | if(deviceType.compare("cpu") == 0){ |
---|
325 | updatedPos = clCreateBuffer( |
---|
326 | context, |
---|
327 | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, |
---|
328 | numBodies * sizeof(cl_float4), |
---|
329 | refPos, |
---|
330 | &status); |
---|
331 | } |
---|
332 | |
---|
333 | if (status != CL_SUCCESS){ |
---|
334 | std::cout << "clCreateBuffer failed. (updatePos)" << std::endl; |
---|
335 | return 1; |
---|
336 | } |
---|
337 | |
---|
338 | /* Create memory objects for velocity */ |
---|
339 | if(deviceType.compare("gpu") == 0){ |
---|
340 | updatedVel = clCreateBuffer( |
---|
341 | context, |
---|
342 | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, |
---|
343 | numBodies * sizeof(cl_float4), |
---|
344 | vel, |
---|
345 | &status); |
---|
346 | }else |
---|
347 | if(deviceType.compare("cpu") == 0){ |
---|
348 | updatedVel = clCreateBuffer( |
---|
349 | context, |
---|
350 | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, |
---|
351 | numBodies * sizeof(cl_float4), |
---|
352 | refVel, |
---|
353 | &status); |
---|
354 | } |
---|
355 | |
---|
356 | if (status != CL_SUCCESS){ |
---|
357 | std::cout << "clCreateBuffer failed. (updatedVel)" << std::endl; |
---|
358 | return 1; |
---|
359 | } |
---|
360 | |
---|
361 | /* create a CL program using the kernel source */ |
---|
362 | char *program_source = load_program_source(filename); |
---|
363 | size_t sourceSize[] = { strlen(program_source) }; |
---|
364 | |
---|
365 | program = clCreateProgramWithSource( |
---|
366 | context, |
---|
367 | 1, |
---|
368 | (const char**)&program_source, |
---|
369 | sourceSize, |
---|
370 | &status); |
---|
371 | |
---|
372 | if (status != CL_SUCCESS){ |
---|
373 | std::cout << "clCreateProgramWithSource failed." << std::endl; |
---|
374 | return 1; |
---|
375 | } |
---|
376 | |
---|
377 | /* create a cl program executable for all the devices specified */ |
---|
378 | status = clBuildProgram( |
---|
379 | program, |
---|
380 | 1, |
---|
381 | &devices[0], |
---|
382 | NULL, |
---|
383 | NULL, |
---|
384 | NULL); |
---|
385 | |
---|
386 | if(status != CL_SUCCESS) |
---|
387 | { |
---|
388 | if(status == CL_BUILD_PROGRAM_FAILURE) |
---|
389 | { |
---|
390 | cl_int logStatus; |
---|
391 | char * buildLog = NULL; |
---|
392 | size_t buildLogSize = 0; |
---|
393 | logStatus = clGetProgramBuildInfo (program, |
---|
394 | devices[0], |
---|
395 | CL_PROGRAM_BUILD_LOG, |
---|
396 | buildLogSize, |
---|
397 | buildLog, |
---|
398 | &buildLogSize); |
---|
399 | if (logStatus != CL_SUCCESS){ |
---|
400 | std::cout << "clGetProgramBuildInfo failed." << std::endl; |
---|
401 | return 1; |
---|
402 | } |
---|
403 | |
---|
404 | buildLog = (char*)malloc(buildLogSize); |
---|
405 | if(buildLog == NULL) |
---|
406 | { |
---|
407 | std::cout << "Failed to allocate host memory. (buildLog)" << std::endl; |
---|
408 | return 1; |
---|
409 | } |
---|
410 | memset(buildLog, 0, buildLogSize); |
---|
411 | |
---|
412 | logStatus = clGetProgramBuildInfo (program, |
---|
413 | devices[0], |
---|
414 | CL_PROGRAM_BUILD_LOG, |
---|
415 | buildLogSize, |
---|
416 | buildLog, |
---|
417 | NULL); |
---|
418 | if (logStatus != CL_SUCCESS){ |
---|
419 | std::cout << "clGetProgramBuildInfo failed." << std::endl; |
---|
420 | free(buildLog); |
---|
421 | return 1; |
---|
422 | } |
---|
423 | |
---|
424 | |
---|
425 | std::cout << " \n\t\t\tBUILD LOG\n"; |
---|
426 | std::cout << " ************************************************\n"; |
---|
427 | std::cout << buildLog << std::endl; |
---|
428 | std::cout << " ************************************************\n"; |
---|
429 | free(buildLog); |
---|
430 | } |
---|
431 | |
---|
432 | if(status != CL_SUCCESS) |
---|
433 | { |
---|
434 | std::cout << "clBuildProgram failed." << std::endl; |
---|
435 | return 1; |
---|
436 | } |
---|
437 | } |
---|
438 | |
---|
439 | /* get a kernel object handle for a kernel with the given name */ |
---|
440 | kernel = clCreateKernel( |
---|
441 | program, |
---|
442 | "nbody_sim", |
---|
443 | &status); |
---|
444 | |
---|
445 | if(status != CL_SUCCESS) |
---|
446 | { |
---|
447 | std::cout << "clCreateKernel failed." << std::endl; |
---|
448 | return 1; |
---|
449 | } |
---|
450 | |
---|
451 | return 0; |
---|
452 | } |
---|
453 | |
---|
454 | char* NBody::load_program_source(const char *filename) |
---|
455 | { |
---|
456 | FILE *fh; |
---|
457 | char *source; |
---|
458 | int size; |
---|
459 | |
---|
460 | fh = fopen(filename, "r"); |
---|
461 | if (fh == NULL){ |
---|
462 | std::cout << "Reading the source cod for kernel failed." << std::endl; |
---|
463 | exit(1); |
---|
464 | } |
---|
465 | |
---|
466 | fseek (fh, 0, SEEK_END); |
---|
467 | size = ftell (fh); |
---|
468 | rewind(fh); |
---|
469 | |
---|
470 | source = (char *) malloc(size+1); |
---|
471 | |
---|
472 | int result = fread(source, 1, size, fh); |
---|
473 | /*if (result != size){ |
---|
474 | std::cout << "Reading the source cod for kernel failed." << std::endl; |
---|
475 | exit(1); |
---|
476 | }*/ |
---|
477 | source[result] = '\0'; |
---|
478 | |
---|
479 | fclose(fh); |
---|
480 | |
---|
481 | return source; |
---|
482 | } |
---|
483 | |
---|
484 | int NBody::setupCLKernels() |
---|
485 | { |
---|
486 | cl_int status; |
---|
487 | |
---|
488 | /*** Set appropriate arguments to the kernel ***/ |
---|
489 | |
---|
490 | /* Particle positions */ |
---|
491 | status = clSetKernelArg( |
---|
492 | kernel, |
---|
493 | 0, |
---|
494 | sizeof(cl_mem), |
---|
495 | (void *)&updatedPos); |
---|
496 | |
---|
497 | if(status != CL_SUCCESS) |
---|
498 | { |
---|
499 | std::cout << "clSetKernelArg failed. (updatedPos)" << std::endl; |
---|
500 | return 1; |
---|
501 | } |
---|
502 | |
---|
503 | /* Particle velocity */ |
---|
504 | status = clSetKernelArg( |
---|
505 | kernel, |
---|
506 | 1, |
---|
507 | sizeof(cl_mem), |
---|
508 | (void *)&updatedVel); |
---|
509 | |
---|
510 | if(status != CL_SUCCESS) |
---|
511 | { |
---|
512 | std::cout << "clSetKernelArg failed. (updatedVel)" << std::endl; |
---|
513 | return 1; |
---|
514 | } |
---|
515 | |
---|
516 | /* numBodies */ |
---|
517 | status = clSetKernelArg( |
---|
518 | kernel, |
---|
519 | 2, |
---|
520 | sizeof(cl_int), |
---|
521 | (void *)&numBodies); |
---|
522 | |
---|
523 | if(status != CL_SUCCESS) |
---|
524 | { |
---|
525 | std::cout << "clSetKernelArg failed. (numBodies)" << std::endl; |
---|
526 | return 1; |
---|
527 | } |
---|
528 | |
---|
529 | /* time step */ |
---|
530 | status = clSetKernelArg( |
---|
531 | kernel, |
---|
532 | 3, |
---|
533 | sizeof(cl_float), |
---|
534 | (void *)&delT); |
---|
535 | if(status != CL_SUCCESS) |
---|
536 | { |
---|
537 | std::cout << "clSetKernelArg failed. (delT)" << std::endl; |
---|
538 | return 1; |
---|
539 | } |
---|
540 | |
---|
541 | /* upward Pseudoprobability */ |
---|
542 | status = clSetKernelArg( |
---|
543 | kernel, |
---|
544 | 4, |
---|
545 | sizeof(cl_float), |
---|
546 | (void *)&espSqr); |
---|
547 | if(status != CL_SUCCESS) |
---|
548 | { |
---|
549 | std::cout << "clSetKernelArg failed. (espSqr)" << std::endl; |
---|
550 | return 1; |
---|
551 | } |
---|
552 | |
---|
553 | /* local memory */ |
---|
554 | status = clSetKernelArg( |
---|
555 | kernel, |
---|
556 | 5, |
---|
557 | GROUP_SIZE * 4 * sizeof(float), |
---|
558 | NULL); |
---|
559 | if(status != CL_SUCCESS) |
---|
560 | { |
---|
561 | std::cout << "clSetKernelArg failed. (localPos)" << std::endl; |
---|
562 | return 1; |
---|
563 | } |
---|
564 | status = clGetKernelWorkGroupInfo(kernel, |
---|
565 | devices[0], |
---|
566 | CL_KERNEL_LOCAL_MEM_SIZE, |
---|
567 | sizeof(cl_ulong), |
---|
568 | &usedLocalMemory, |
---|
569 | NULL); |
---|
570 | if(status != CL_SUCCESS) |
---|
571 | { |
---|
572 | std::cout << "clGetKernelWorkGroupInfo CL_KERNEL_LOCAL_MEM_SIZE failed." << std::endl; |
---|
573 | return 1; |
---|
574 | } |
---|
575 | |
---|
576 | if(usedLocalMemory > totalLocalMemory) |
---|
577 | { |
---|
578 | std::cout << "Unsupported: Insufficient local memory on device." << std::endl; |
---|
579 | return 1; |
---|
580 | } |
---|
581 | |
---|
582 | return 0; |
---|
583 | } |
---|
584 | |
---|
585 | int NBody::runCLKernels() |
---|
586 | { |
---|
587 | cl_int status; |
---|
588 | cl_event events[1]; |
---|
589 | |
---|
590 | /* |
---|
591 | * Enqueue a kernel run call. |
---|
592 | */ |
---|
593 | size_t globalThreads[] = {numBodies}; |
---|
594 | size_t localThreads[] = {GROUP_SIZE}; |
---|
595 | |
---|
596 | if(localThreads[0] > maxWorkItemSizes[0] || localThreads[0] > maxWorkGroupSize) |
---|
597 | { |
---|
598 | std::cout<<"Unsupported: Device does not support requested number of work items."; |
---|
599 | |
---|
600 | return 1; |
---|
601 | } |
---|
602 | |
---|
603 | status = clEnqueueNDRangeKernel( |
---|
604 | commandQueue, |
---|
605 | kernel, |
---|
606 | 1, |
---|
607 | NULL, |
---|
608 | globalThreads, |
---|
609 | localThreads, |
---|
610 | 0, |
---|
611 | NULL, |
---|
612 | NULL); |
---|
613 | if(status != CL_SUCCESS) |
---|
614 | { |
---|
615 | std::cout << "clEnqueueNDRangeKernel failed." << std::endl; |
---|
616 | return 1; |
---|
617 | } |
---|
618 | |
---|
619 | status = clFinish(commandQueue); |
---|
620 | if(status != CL_SUCCESS) |
---|
621 | { |
---|
622 | std::cout << "clFinish failed." << std::endl; |
---|
623 | return 1; |
---|
624 | } |
---|
625 | |
---|
626 | /* Enqueue readBuffer*/ |
---|
627 | status = clEnqueueReadBuffer( |
---|
628 | commandQueue, |
---|
629 | updatedPos, |
---|
630 | CL_TRUE, |
---|
631 | 0, |
---|
632 | numBodies* sizeof(cl_float4), |
---|
633 | pos, |
---|
634 | 0, |
---|
635 | NULL, |
---|
636 | &events[0]); |
---|
637 | if(status != CL_SUCCESS) |
---|
638 | { |
---|
639 | std::cout << "clEnqueueReadBuffer failed." << std::endl; |
---|
640 | return 1; |
---|
641 | } |
---|
642 | |
---|
643 | /* Wait for the read buffer to finish execution */ |
---|
644 | status = clWaitForEvents(1, &events[0]); |
---|
645 | if(status != CL_SUCCESS) |
---|
646 | { |
---|
647 | std::cout << "clWaitForEvents failed." << std::endl; |
---|
648 | return 1; |
---|
649 | } |
---|
650 | |
---|
651 | clReleaseEvent(events[0]); |
---|
652 | |
---|
653 | return 0; |
---|
654 | } |
---|
655 | |
---|
656 | /* |
---|
657 | * n-body simulation on cpu |
---|
658 | */ |
---|
659 | void NBody::nBodyCPUReference() |
---|
660 | { |
---|
661 | //Iterate for all samples |
---|
662 | for(int i = 0; i < numBodies; ++i) |
---|
663 | { |
---|
664 | int myIndex = 4 * i; |
---|
665 | float acc[3] = {0.0f, 0.0f, 0.0f}; |
---|
666 | for(int j = 0; j < numBodies; ++j) |
---|
667 | { |
---|
668 | float r[3]; |
---|
669 | int index = 4 * j; |
---|
670 | |
---|
671 | float distSqr = 0.0f; |
---|
672 | for(int k = 0; k < 3; ++k) |
---|
673 | { |
---|
674 | r[k] = refPos[index + k] - refPos[myIndex + k]; |
---|
675 | |
---|
676 | distSqr += r[k] * r[k]; |
---|
677 | } |
---|
678 | |
---|
679 | float invDist = 1.0f / sqrt(distSqr + espSqr); |
---|
680 | float invDistCube = invDist * invDist * invDist; |
---|
681 | float s = refPos[index + 3] * invDistCube; |
---|
682 | |
---|
683 | for(int k = 0; k < 3; ++k) |
---|
684 | { |
---|
685 | acc[k] += s * r[k]; |
---|
686 | } |
---|
687 | } |
---|
688 | |
---|
689 | for(int k = 0; k < 3; ++k) |
---|
690 | { |
---|
691 | refPos[myIndex + k] += refVel[myIndex + k] * delT + 0.5f * acc[k] * delT * delT; |
---|
692 | refVel[myIndex + k] += acc[k] * delT; |
---|
693 | } |
---|
694 | } |
---|
695 | } |
---|
696 | |
---|
697 | int NBody::setup() |
---|
698 | { |
---|
699 | if(setupNBody()!= 0) // SDK_SUCCESS == 0; |
---|
700 | return 1; |
---|
701 | /* |
---|
702 | int timer = sampleCommon->createTimer(); |
---|
703 | sampleCommon->resetTimer(timer); |
---|
704 | sampleCommon->startTimer(timer); |
---|
705 | */ |
---|
706 | if(setupCL()!= 0) |
---|
707 | return 1; |
---|
708 | |
---|
709 | /* sampleCommon->stopTimer(timer); |
---|
710 | // Compute setup time |
---|
711 | setupTime = (double)(sampleCommon->readTimer(timer)); |
---|
712 | */ |
---|
713 | display= true; |
---|
714 | |
---|
715 | return 0; |
---|
716 | } |
---|
717 | |
---|
718 | /** |
---|
719 | * @brief Initialize GL |
---|
720 | */ |
---|
721 | void GLInit() |
---|
722 | { |
---|
723 | glClearColor(0.0 ,0.0, 0.0, 0.0); |
---|
724 | glClear(GL_COLOR_BUFFER_BIT); |
---|
725 | glClear(GL_DEPTH_BUFFER_BIT); |
---|
726 | glMatrixMode(GL_PROJECTION); |
---|
727 | glLoadIdentity(); |
---|
728 | } |
---|
729 | |
---|
730 | /** |
---|
731 | * @brief Glut Idle function |
---|
732 | */ |
---|
733 | void idle() |
---|
734 | { |
---|
735 | if (curr_step < n_steps){ |
---|
736 | curr_step++; |
---|
737 | }else{ |
---|
738 | time ( &rawtime ); |
---|
739 | timeinfo = localtime ( &rawtime ); |
---|
740 | std::cout << "End time and date: " << asctime (timeinfo) << std::endl; |
---|
741 | exit(1); |
---|
742 | } |
---|
743 | |
---|
744 | glutPostRedisplay(); |
---|
745 | } |
---|
746 | |
---|
747 | /** |
---|
748 | * @brief Glut reshape func |
---|
749 | * |
---|
750 | * @param w numParticles of OpenGL window |
---|
751 | * @param h height of OpenGL window |
---|
752 | */ |
---|
753 | void reShape(int w,int h) |
---|
754 | { |
---|
755 | glViewport(0,0,w,h); |
---|
756 | |
---|
757 | glViewport(0,0,w,h); |
---|
758 | glMatrixMode(GL_MODELVIEW); |
---|
759 | glLoadIdentity(); |
---|
760 | gluPerspective(45.0f,w/h,1.0f,1000.0f); |
---|
761 | gluLookAt (0.0, 0.0, -2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0); |
---|
762 | } |
---|
763 | |
---|
764 | /** |
---|
765 | * @brief OpenGL display function |
---|
766 | */ |
---|
767 | void displayfunc() |
---|
768 | { |
---|
769 | glClearColor(0.0 ,0.0, 0.0, 0.0); |
---|
770 | glClear(GL_COLOR_BUFFER_BIT); |
---|
771 | glClear(GL_DEPTH_BUFFER_BIT); |
---|
772 | |
---|
773 | glPointSize(1.0); |
---|
774 | glBlendFunc(GL_SRC_ALPHA, GL_ONE); |
---|
775 | glEnable(GL_BLEND); |
---|
776 | glDepthMask(GL_FALSE); |
---|
777 | |
---|
778 | glColor3f(1.0f,0.6f,0.0f); |
---|
779 | |
---|
780 | if (curr_step < n_steps){ |
---|
781 | if (deviceType.compare("gpu") == 0){ |
---|
782 | //Calling kernel for calculatig subsequent positions |
---|
783 | ((NBody*)me)->runCLKernels(); |
---|
784 | }else |
---|
785 | if (deviceType.compare("cpu") == 0){ |
---|
786 | ((NBody*)me)->nBodyCPUReference(); |
---|
787 | } |
---|
788 | } |
---|
789 | |
---|
790 | glBegin(GL_POINTS); |
---|
791 | for(int i=0; i < numBodies; ++i) |
---|
792 | { |
---|
793 | if (deviceType.compare("gpu") == 0){ |
---|
794 | //divided by 300 just for scaling |
---|
795 | glVertex3d(pos[i*4+ 0]/300,pos[i*4+1]/300,pos[i*4+2]/300); |
---|
796 | }else |
---|
797 | if (deviceType.compare("cpu") == 0){ |
---|
798 | //divided by 300 just for scaling |
---|
799 | glVertex3d(((NBody*)me)->refPos[i*4+ 0]/300,((NBody*)me)->refPos[i*4+1]/300,((NBody*)me)->refPos[i*4+2]/300); |
---|
800 | } |
---|
801 | } |
---|
802 | glEnd(); |
---|
803 | |
---|
804 | glFlush(); |
---|
805 | glutSwapBuffers(); |
---|
806 | } |
---|
807 | |
---|
808 | /* keyboard function */ |
---|
809 | void keyboardFunc(unsigned char key, int mouseX, int mouseY) |
---|
810 | { |
---|
811 | switch(key) |
---|
812 | { |
---|
813 | /* If the user hits escape or Q, then exit */ |
---|
814 | /* ESCAPE_KEY = 27 */ |
---|
815 | case 27: |
---|
816 | case 'q': |
---|
817 | case 'Q': |
---|
818 | { |
---|
819 | if(((NBody*)me)->cleanup() != 0) // SDK_SUCCESS == 0; |
---|
820 | exit(1); |
---|
821 | else |
---|
822 | exit(0); |
---|
823 | } |
---|
824 | default: |
---|
825 | break; |
---|
826 | } |
---|
827 | } |
---|
828 | |
---|
829 | int NBody::run() |
---|
830 | { |
---|
831 | /* Arguments are set and execution call is enqueued on command buffer */ |
---|
832 | if(setupCLKernels()!= 0) // SDK_SUCCESS == 0; |
---|
833 | { |
---|
834 | return 1; |
---|
835 | } |
---|
836 | |
---|
837 | /* |
---|
838 | if(!quiet) |
---|
839 | { |
---|
840 | // Printeaza pozitiile corpurilor de la inceput: |
---|
841 | sampleCommon->printArray<cl_float>("Output", pos, numBodies, 1); |
---|
842 | } |
---|
843 | */ |
---|
844 | return 0; |
---|
845 | } |
---|
846 | |
---|
847 | void NBody::printStats() |
---|
848 | { |
---|
849 | /* std::string strArray[3] = {"Particles", "Iterations", "Time(sec)"}; |
---|
850 | std::string stats[3]; |
---|
851 | totalTime = setupTime + kernelTime; |
---|
852 | |
---|
853 | stats[0] = sampleCommon->toString(numParticles, std::dec); |
---|
854 | stats[1] = sampleCommon->toString(ITER, std::dec); |
---|
855 | stats[2] = sampleCommon->toString(totalTime, std::dec); |
---|
856 | |
---|
857 | this->SDKSample::printStats(strArray, stats, 3); |
---|
858 | */ |
---|
859 | } |
---|
860 | |
---|
861 | int NBody::cleanup() |
---|
862 | { |
---|
863 | /* Releases OpenCL resources (Context, Memory etc.) */ |
---|
864 | cl_int status; |
---|
865 | |
---|
866 | status = clReleaseKernel(kernel); |
---|
867 | if (status != CL_SUCCESS){ |
---|
868 | std::cout << "clReleaseKernel failed." << std::endl; |
---|
869 | return 1; |
---|
870 | } |
---|
871 | |
---|
872 | status = clReleaseProgram(program); |
---|
873 | if (status != CL_SUCCESS){ |
---|
874 | std::cout << "clReleaseProgram failed." << std::endl; |
---|
875 | return 1; |
---|
876 | } |
---|
877 | |
---|
878 | status = clReleaseMemObject(updatedPos); |
---|
879 | if (status != CL_SUCCESS){ |
---|
880 | std::cout << "clReleaseMemObject failed." << std::endl; |
---|
881 | return 1; |
---|
882 | } |
---|
883 | |
---|
884 | status = clReleaseMemObject(updatedVel); |
---|
885 | if (status != CL_SUCCESS){ |
---|
886 | std::cout << "clReleaseMemObject failed." << std::endl; |
---|
887 | return 1; |
---|
888 | } |
---|
889 | |
---|
890 | status = clReleaseCommandQueue(commandQueue); |
---|
891 | if (status != CL_SUCCESS){ |
---|
892 | std::cout << "clReleaseMemObject failed." << std::endl; |
---|
893 | return 1; |
---|
894 | } |
---|
895 | |
---|
896 | status = clReleaseContext(context); |
---|
897 | if (status != CL_SUCCESS){ |
---|
898 | std::cout << "clReleaseMemObject failed." << std::endl; |
---|
899 | return 1; |
---|
900 | } |
---|
901 | |
---|
902 | return 0; |
---|
903 | } |
---|
904 | |
---|
905 | NBody::~NBody() |
---|
906 | { |
---|
907 | /* release program resources */ |
---|
908 | if(initPos) |
---|
909 | { |
---|
910 | free(initPos); |
---|
911 | initPos = NULL; |
---|
912 | } |
---|
913 | |
---|
914 | if(initVel) |
---|
915 | { |
---|
916 | free(initVel); |
---|
917 | initVel = NULL; |
---|
918 | } |
---|
919 | |
---|
920 | if(pos) |
---|
921 | { |
---|
922 | #if defined (_WIN32) |
---|
923 | _aligned_free(pos); |
---|
924 | #else |
---|
925 | free(pos); |
---|
926 | #endif |
---|
927 | pos = NULL; |
---|
928 | } |
---|
929 | if(vel) |
---|
930 | { |
---|
931 | #if defined (_WIN32) |
---|
932 | _aligned_free(vel); |
---|
933 | #else |
---|
934 | free(vel); |
---|
935 | #endif |
---|
936 | vel = NULL; |
---|
937 | } |
---|
938 | |
---|
939 | if(devices) |
---|
940 | { |
---|
941 | free(devices); |
---|
942 | devices = NULL; |
---|
943 | } |
---|
944 | |
---|
945 | if(refPos) |
---|
946 | { |
---|
947 | free(refPos); |
---|
948 | refPos = NULL; |
---|
949 | } |
---|
950 | |
---|
951 | if(refVel) |
---|
952 | { |
---|
953 | free(refVel); |
---|
954 | refVel = NULL; |
---|
955 | } |
---|
956 | |
---|
957 | if(maxWorkItemSizes) |
---|
958 | { |
---|
959 | free(maxWorkItemSizes); |
---|
960 | maxWorkItemSizes = NULL; |
---|
961 | } |
---|
962 | } |
---|
963 | |
---|
964 | |
---|
965 | int main(int argc, char * argv[]) |
---|
966 | { |
---|
967 | NBody clNBody("OpenCL NBody"); |
---|
968 | me = &clNBody; |
---|
969 | |
---|
970 | if(clNBody.setup() != 0) |
---|
971 | return 1; |
---|
972 | |
---|
973 | if(clNBody.run() != 0) |
---|
974 | return 1; |
---|
975 | |
---|
976 | time ( &rawtime ); |
---|
977 | timeinfo = localtime ( &rawtime ); |
---|
978 | std::cout << "Start time and date: " << asctime (timeinfo) << std::endl; |
---|
979 | |
---|
980 | if(display) |
---|
981 | { |
---|
982 | // Run in graphical window if requested |
---|
983 | glutInit(&argc, argv); |
---|
984 | glutInitWindowPosition(100,10); |
---|
985 | glutInitWindowSize(600,600); |
---|
986 | glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE ); |
---|
987 | glutCreateWindow("NBody simulation"); |
---|
988 | GLInit(); |
---|
989 | glutDisplayFunc(displayfunc); |
---|
990 | glutReshapeFunc(reShape); |
---|
991 | glutIdleFunc(idle); |
---|
992 | glutKeyboardFunc(keyboardFunc); |
---|
993 | glutMainLoop(); |
---|
994 | } |
---|
995 | |
---|
996 | if(clNBody.cleanup() != 0) |
---|
997 | return 1; |
---|
998 | |
---|
999 | clNBody.printStats(); |
---|
1000 | |
---|
1001 | return 0; |
---|
1002 | } |
---|