1 | #ifndef NBODY_H_ |
---|
2 | #define NBODY_H_ |
---|
3 | |
---|
4 | // standard utility and system includes |
---|
5 | #include <oclUtils.h> |
---|
6 | |
---|
7 | // GLEW and GLUT includes |
---|
8 | #include <GL/glew.h> |
---|
9 | #if defined (__APPLE__) || defined(MACOSX) |
---|
10 | #include <GLUT/glut.h> |
---|
11 | #else |
---|
12 | #include <GL/glut.h> |
---|
13 | #endif |
---|
14 | |
---|
15 | // Extra CL/GL include |
---|
16 | #include <CL/cl_gl.h> |
---|
17 | |
---|
18 | |
---|
19 | #include <iostream> |
---|
20 | #include <iomanip> |
---|
21 | #include <stdio.h> |
---|
22 | #include <stdlib.h> |
---|
23 | #include <assert.h> |
---|
24 | #include <string.h> |
---|
25 | #include <cmath> |
---|
26 | #include <malloc.h> |
---|
27 | #include <time.h> |
---|
28 | |
---|
29 | #include <GL/glut.h> |
---|
30 | |
---|
31 | #define GROUP_SIZE 64 |
---|
32 | #define NUM_PARTICLES 30000 |
---|
33 | #define END_TIME 2 |
---|
34 | |
---|
35 | //---------------------- |
---|
36 | #define TAKE_DIAGNOSTICS 0 |
---|
37 | #define DO_NOT_TAKE_DIAGNOSTICS 1 |
---|
38 | #define TAKE_SNAPSHOTS 2 |
---|
39 | #define DO_NOT_TAKE_SNAPSHOTS 3 |
---|
40 | |
---|
41 | static int takeDiagnostics = DO_NOT_TAKE_DIAGNOSTICS; |
---|
42 | static int takeSnapshots = DO_NOT_TAKE_SNAPSHOTS; |
---|
43 | |
---|
44 | //---------------------- |
---|
45 | #define SIMPLE 0 |
---|
46 | #define LEAPFROG 1 |
---|
47 | |
---|
48 | static int integrator = 0; |
---|
49 | |
---|
50 | //---------------------- |
---|
51 | |
---|
52 | static bool displayOpenGL = true; // If it is true then OpenGL display is used; |
---|
53 | static bool readInputData = false; // If true then the particles information will be read |
---|
54 | // from an input file. If it is false, then the information |
---|
55 | // is generated via random functions; |
---|
56 | |
---|
57 | static std::string deviceType("gpu"); // It says on which device we want to do the computations; |
---|
58 | |
---|
59 | static time_t rawtime; // Variables used for measuring the duration of the run; |
---|
60 | static struct tm * timeinfo; |
---|
61 | |
---|
62 | static char* inputDataFileName = "Input-Output\\input_64.txt"; |
---|
63 | // Represents the name of the file which contains the |
---|
64 | // information about particles (mass, position, vel); |
---|
65 | |
---|
66 | static char* snapshotFileName = "Input-Output\\snapshot.txt"; |
---|
67 | // Represents the name of the file in which we will take |
---|
68 | // snapshots of the current system when we require. |
---|
69 | |
---|
70 | static char* diagnosticFileName = "Input-Output\\diagnostic.txt"; |
---|
71 | // Represents the name of the file in which we will |
---|
72 | // write information about the energy conservation, |
---|
73 | // from time to time; |
---|
74 | |
---|
75 | //---------------------- |
---|
76 | |
---|
77 | |
---|
78 | /** |
---|
79 | * NBody |
---|
80 | * Class implements OpenCL NBody sample |
---|
81 | * |
---|
82 | */ |
---|
83 | |
---|
84 | class NBody |
---|
85 | { |
---|
86 | public: |
---|
87 | |
---|
88 | //================================================ |
---|
89 | // General variables used for all integrators |
---|
90 | |
---|
91 | cl_double setupTimeCL; // Time taken to setup OpenCL resources and building kernel; |
---|
92 | cl_double kernelTime; // Time taken to run kernel and read result back; |
---|
93 | |
---|
94 | cl_context context; // CL context; |
---|
95 | cl_device_id *devices; // CL device list; |
---|
96 | |
---|
97 | size_t maxWorkGroupSize; // Max allowed work-items in a group; |
---|
98 | cl_uint maxDimensions; // Max group dimensions allowed; |
---|
99 | size_t* maxWorkItemSizes; // Max work-items sizes in each dimensions; |
---|
100 | cl_ulong totalLocalMemory; // Max local memory allowed; |
---|
101 | cl_ulong usedLocalMemory; // Used local memory; |
---|
102 | |
---|
103 | cl_float* initPos; // Initial position. Used to hold the initial particle position; |
---|
104 | cl_float* initVel; // Initial velocity. Used to hold the initial particle velocity; |
---|
105 | |
---|
106 | cl_float* pos; // This is the buffer for positions that resides on the host |
---|
107 | // side of the application. It is linked with the memory |
---|
108 | // buffer for positions "updatedPos"; |
---|
109 | |
---|
110 | cl_float* vel; // This is the buffer for velocities that resides on the host |
---|
111 | // side of the application. It is linked with the memory |
---|
112 | // buffer for velocities "updatedVel"; |
---|
113 | |
---|
114 | |
---|
115 | cl_mem updatedPos; // This is a memory buffer representing the position of |
---|
116 | // partciles. It is linked with the application buffer |
---|
117 | // for positions "pos". updatedPos is set as an argument to |
---|
118 | // the kernel, and also used to read data from the kernel by |
---|
119 | // enqueueing a read command. It will represent the updated |
---|
120 | // values of positions after running once the kernel; |
---|
121 | |
---|
122 | cl_mem updatedVel; // This is a memory buffer representing the poelocity of |
---|
123 | // partciles. It is linked with the application buffer |
---|
124 | // for velocities "vel". updatedVel is set as an argument to |
---|
125 | // the kernel, and also used to read data from the kernel by |
---|
126 | // enqueueing a read command. It will represent the updated |
---|
127 | // values of velocities after running once the kernel; |
---|
128 | |
---|
129 | cl_command_queue commandQueue; // CL command queue; |
---|
130 | cl_program program; // CL program; |
---|
131 | cl_kernel kernel; // CL kernel; |
---|
132 | |
---|
133 | cl_int numParticles; // Number of particles in the system; |
---|
134 | |
---|
135 | cl_float espSqr; // Softening Factor; |
---|
136 | cl_float delT; // dT (timestep); |
---|
137 | cl_double curr_time_step; // Reatins the current time step in the simulation; |
---|
138 | cl_double end_time; // The total time (end time) of the simulation; |
---|
139 | cl_long taken_steps; // Tne number of steps taken by now (at the curr_time_step); |
---|
140 | cl_double dt_snap; // The interval between two snapshots; |
---|
141 | cl_double dt_diag; // The interval between two diagnostics; |
---|
142 | cl_double curr_snap_time; // This is the current time that must be reached by |
---|
143 | // curr_time_step in order to take a snapshot. After this, |
---|
144 | // curr_snap_time will be incremented by dt_snap; |
---|
145 | cl_double curr_diag_time; // This is the current time that must be reached by |
---|
146 | // curr_time_step in order to make a diagnostication of the |
---|
147 | // system. After this, curr_diag_time will be incremented by dt_diag; |
---|
148 | |
---|
149 | cl_double Etot_init; // Will retain the total energy of the system at the beggining |
---|
150 | // of the simulation. It is used to measure the convergence |
---|
151 | // (error) of the simulation; |
---|
152 | |
---|
153 | bool initFlag; // This flag variable is used to make some initialisations of |
---|
154 | // variables when the simulation starts. For example if |
---|
155 | // initFlag is true, than we are starting the simulation and |
---|
156 | // compute "Etot_init" energy. After this first step of simulation |
---|
157 | // it becomes false; |
---|
158 | |
---|
159 | const char * kernelFileName; // The name of the file that contains the kernel code; |
---|
160 | const char * kernelFunctionName;// The name of the __kernel function in the kernel file; |
---|
161 | |
---|
162 | //================================================ |
---|
163 | // Variables specific only to Leapfrog integrator: |
---|
164 | |
---|
165 | cl_float* acc; // This is the buffer for accelerations that resides on the host |
---|
166 | // side of the application. It is linked with the memory |
---|
167 | // buffer for accelerations "updatedAcc"; |
---|
168 | |
---|
169 | cl_mem updatedAcc; // This is a memory buffer representing the acceleration of |
---|
170 | // partciles. It is linked with the application buffer |
---|
171 | // for acc "acc". updatedAcc is set as an argument to |
---|
172 | // the kernel, and also used to read data from the kernel by |
---|
173 | // enqueueing a read command. It will represent the updated |
---|
174 | // values of accelerations after running once the kernel; |
---|
175 | cl_mem updatedCollTime; |
---|
176 | cl_float* collTime; |
---|
177 | |
---|
178 | cl_int initialAccComputation; |
---|
179 | |
---|
180 | |
---|
181 | |
---|
182 | |
---|
183 | private: |
---|
184 | float random(float randMax, float randMin); |
---|
185 | |
---|
186 | public: |
---|
187 | /** |
---|
188 | * Constructor |
---|
189 | * Initialize member variables |
---|
190 | * @param name name of sample (string) |
---|
191 | */ |
---|
192 | explicit NBody(std::string name) |
---|
193 | { |
---|
194 | setupTimeCL = 0; |
---|
195 | kernelTime = 0; |
---|
196 | delT = 0.005f; |
---|
197 | espSqr = 50.0f; |
---|
198 | initPos = NULL; |
---|
199 | initVel = NULL; |
---|
200 | pos = NULL; |
---|
201 | vel = NULL; |
---|
202 | devices = NULL; |
---|
203 | maxWorkItemSizes = NULL; |
---|
204 | kernelFileName = "Kernels\\simpleIntegratorKernel.cl"; |
---|
205 | kernelFunctionName = "simple_integrator"; |
---|
206 | numParticles = 30; |
---|
207 | curr_time_step = 0; |
---|
208 | end_time = 10; |
---|
209 | initFlag = true; |
---|
210 | dt_snap = 0.1; |
---|
211 | dt_diag = 0.1; |
---|
212 | curr_snap_time = dt_snap; |
---|
213 | curr_diag_time = dt_diag; |
---|
214 | taken_steps = 0; |
---|
215 | } |
---|
216 | |
---|
217 | /** |
---|
218 | * Constructor |
---|
219 | * Initialize member variables |
---|
220 | * @param name name of sample (const char*) |
---|
221 | */ |
---|
222 | explicit NBody(const char* name) |
---|
223 | { |
---|
224 | setupTimeCL = 0; |
---|
225 | kernelTime = 0; |
---|
226 | delT = 0.01f; |
---|
227 | espSqr = 50.0f; |
---|
228 | initPos = NULL; |
---|
229 | initVel = NULL; |
---|
230 | pos = NULL; |
---|
231 | vel = NULL; |
---|
232 | devices = NULL; |
---|
233 | maxWorkItemSizes = NULL; |
---|
234 | if (integrator == LEAPFROG){ |
---|
235 | kernelFileName = "Kernels\\leapfrogIntegratorKernel.cl"; |
---|
236 | kernelFunctionName = "leapfrog_integrator"; |
---|
237 | }else{ |
---|
238 | kernelFileName = "Kernels\\simpleIntegratorKernel.cl"; |
---|
239 | kernelFunctionName = "simple_integrator"; |
---|
240 | } |
---|
241 | numParticles = NUM_PARTICLES; |
---|
242 | curr_time_step = 0; |
---|
243 | end_time = END_TIME; |
---|
244 | initFlag = true; |
---|
245 | dt_snap = 1.0; |
---|
246 | dt_diag = 1.0; |
---|
247 | curr_snap_time = dt_snap; |
---|
248 | curr_diag_time = dt_diag; |
---|
249 | taken_steps = 0; |
---|
250 | } |
---|
251 | |
---|
252 | ~NBody(); |
---|
253 | |
---|
254 | /** |
---|
255 | * Returns information about the device on which the |
---|
256 | * simulation will run. Also some variables will be init |
---|
257 | * here based on the device information |
---|
258 | * @return 1 on success and 0 on failure |
---|
259 | */ |
---|
260 | int getDeviceInfo(); |
---|
261 | |
---|
262 | /** |
---|
263 | * Allocate and initialize host memory array with random values |
---|
264 | * @return 1 on success and 0 on failure |
---|
265 | */ |
---|
266 | int setupNBody(); |
---|
267 | |
---|
268 | /** |
---|
269 | * OpenCL related initialisations. |
---|
270 | * Set up Context, Device list, Command Queue, Memory buffers |
---|
271 | * @return 1 on success and 0 on failure |
---|
272 | */ |
---|
273 | int setupCL(); |
---|
274 | |
---|
275 | /** |
---|
276 | * Build CL kernel program executable |
---|
277 | * @return 1 on success and 0 on failure |
---|
278 | */ |
---|
279 | int setupCLProgram(); |
---|
280 | |
---|
281 | /** |
---|
282 | * Build kernels and set values for kernels' arguments |
---|
283 | * @return 1 on success and 0 on failure |
---|
284 | */ |
---|
285 | int setupCLKernels(); |
---|
286 | |
---|
287 | /** |
---|
288 | * Enqueue calls to the kernels |
---|
289 | * on to the command queue, wait till end of kernel execution. |
---|
290 | * Get kernel start and end time if timing is enabled |
---|
291 | * @return 1 on success and 0 on failure |
---|
292 | */ |
---|
293 | int runCLKernels(); |
---|
294 | |
---|
295 | /** |
---|
296 | * Load a .cl source file as a char* and it will be used |
---|
297 | * for as a parameter for creating a program |
---|
298 | * @return the char vector |
---|
299 | */ |
---|
300 | char * load_program_source(const char *filename); |
---|
301 | |
---|
302 | /** |
---|
303 | * Override from SDKSample |
---|
304 | * Run OpenCL NBody |
---|
305 | */ |
---|
306 | int run(); |
---|
307 | |
---|
308 | /** |
---|
309 | * Override from SDKSample |
---|
310 | * Cleanup memory allocations |
---|
311 | */ |
---|
312 | int cleanup(); |
---|
313 | |
---|
314 | /** |
---|
315 | * Writes a single snapshot on the output snapshot file. |
---|
316 | */ |
---|
317 | void put_snapshot(); |
---|
318 | |
---|
319 | /** |
---|
320 | * Writes diagnostics on the diagnostics file: |
---|
321 | * current time; number of integration steps so far; |
---|
322 | * kinetic, potential, and total energy; absolute and |
---|
323 | * relative energy errors since the start of the run. |
---|
324 | */ |
---|
325 | void write_diagnostics(); |
---|
326 | |
---|
327 | /** |
---|
328 | * Computes the potential energy of a single particle. |
---|
329 | */ |
---|
330 | float epot_particle(int currPart); |
---|
331 | |
---|
332 | int runCLKernelLeapfrog(); |
---|
333 | |
---|
334 | int enqueueKernel(); |
---|
335 | |
---|
336 | }; |
---|
337 | |
---|
338 | #endif // NBODY_H_ |
---|