[34] | 1 | #ifndef NBODY_H_ |
---|
| 2 | #define NBODY_H_ |
---|
| 3 | |
---|
| 4 | // standard utility and system includes |
---|
| 5 | #include <oclUtils.h> |
---|
| 6 | |
---|
| 7 | // GLEW and GLUT includes |
---|
| 8 | #include <GL/glew.h> |
---|
| 9 | #if defined (__APPLE__) || defined(MACOSX) |
---|
| 10 | #include <GLUT/glut.h> |
---|
| 11 | #else |
---|
| 12 | #include <GL/glut.h> |
---|
| 13 | #endif |
---|
| 14 | |
---|
| 15 | // Extra CL/GL include |
---|
| 16 | #include <CL/cl_gl.h> |
---|
| 17 | |
---|
| 18 | |
---|
| 19 | #include <iostream> |
---|
| 20 | #include <iomanip> |
---|
| 21 | #include <stdio.h> |
---|
| 22 | #include <stdlib.h> |
---|
| 23 | #include <assert.h> |
---|
| 24 | #include <string.h> |
---|
| 25 | #include <cmath> |
---|
| 26 | #include <malloc.h> |
---|
| 27 | #include <time.h> |
---|
| 28 | |
---|
| 29 | #include <GL/glut.h> |
---|
| 30 | |
---|
| 31 | #define GROUP_SIZE 64 |
---|
| 32 | #define NUM_PARTICLES 30000 |
---|
| 33 | #define END_TIME 2 |
---|
| 34 | |
---|
| 35 | //---------------------- |
---|
| 36 | #define TAKE_DIAGNOSTICS 0 |
---|
| 37 | #define DO_NOT_TAKE_DIAGNOSTICS 1 |
---|
| 38 | #define TAKE_SNAPSHOTS 2 |
---|
| 39 | #define DO_NOT_TAKE_SNAPSHOTS 3 |
---|
| 40 | |
---|
| 41 | static int takeDiagnostics = DO_NOT_TAKE_DIAGNOSTICS; |
---|
| 42 | static int takeSnapshots = DO_NOT_TAKE_SNAPSHOTS; |
---|
| 43 | |
---|
| 44 | //---------------------- |
---|
| 45 | #define SIMPLE 0 |
---|
| 46 | #define LEAPFROG 1 |
---|
| 47 | |
---|
| 48 | static int integrator = 0; |
---|
| 49 | |
---|
| 50 | //---------------------- |
---|
| 51 | |
---|
| 52 | static bool displayOpenGL = true; // If it is true then OpenGL display is used; |
---|
| 53 | static bool readInputData = false; // If true then the particles information will be read |
---|
| 54 | // from an input file. If it is false, then the information |
---|
| 55 | // is generated via random functions; |
---|
| 56 | |
---|
| 57 | static std::string deviceType("gpu"); // It says on which device we want to do the computations; |
---|
| 58 | |
---|
| 59 | static time_t rawtime; // Variables used for measuring the duration of the run; |
---|
| 60 | static struct tm * timeinfo; |
---|
| 61 | |
---|
| 62 | static char* inputDataFileName = "Input-Output\\input_64.txt"; |
---|
| 63 | // Represents the name of the file which contains the |
---|
| 64 | // information about particles (mass, position, vel); |
---|
| 65 | |
---|
| 66 | static char* snapshotFileName = "Input-Output\\snapshot.txt"; |
---|
| 67 | // Represents the name of the file in which we will take |
---|
| 68 | // snapshots of the current system when we require. |
---|
| 69 | |
---|
| 70 | static char* diagnosticFileName = "Input-Output\\diagnostic.txt"; |
---|
| 71 | // Represents the name of the file in which we will |
---|
| 72 | // write information about the energy conservation, |
---|
| 73 | // from time to time; |
---|
| 74 | |
---|
| 75 | //---------------------- |
---|
| 76 | |
---|
| 77 | |
---|
| 78 | /** |
---|
| 79 | * NBody |
---|
| 80 | * Class implements OpenCL NBody sample |
---|
| 81 | * |
---|
| 82 | */ |
---|
| 83 | |
---|
| 84 | class NBody |
---|
| 85 | { |
---|
| 86 | public: |
---|
| 87 | |
---|
| 88 | //================================================ |
---|
| 89 | // General variables used for all integrators |
---|
| 90 | |
---|
| 91 | cl_double setupTimeCL; // Time taken to setup OpenCL resources and building kernel; |
---|
| 92 | cl_double kernelTime; // Time taken to run kernel and read result back; |
---|
| 93 | |
---|
| 94 | cl_context context; // CL context; |
---|
| 95 | cl_device_id *devices; // CL device list; |
---|
| 96 | |
---|
| 97 | size_t maxWorkGroupSize; // Max allowed work-items in a group; |
---|
| 98 | cl_uint maxDimensions; // Max group dimensions allowed; |
---|
| 99 | size_t* maxWorkItemSizes; // Max work-items sizes in each dimensions; |
---|
| 100 | cl_ulong totalLocalMemory; // Max local memory allowed; |
---|
| 101 | cl_ulong usedLocalMemory; // Used local memory; |
---|
| 102 | |
---|
| 103 | cl_float* initPos; // Initial position. Used to hold the initial particle position; |
---|
| 104 | cl_float* initVel; // Initial velocity. Used to hold the initial particle velocity; |
---|
| 105 | |
---|
| 106 | cl_float* pos; // This is the buffer for positions that resides on the host |
---|
| 107 | // side of the application. It is linked with the memory |
---|
| 108 | // buffer for positions "updatedPos"; |
---|
| 109 | |
---|
| 110 | cl_float* vel; // This is the buffer for velocities that resides on the host |
---|
| 111 | // side of the application. It is linked with the memory |
---|
| 112 | // buffer for velocities "updatedVel"; |
---|
| 113 | |
---|
| 114 | |
---|
| 115 | cl_mem updatedPos; // This is a memory buffer representing the position of |
---|
| 116 | // partciles. It is linked with the application buffer |
---|
| 117 | // for positions "pos". updatedPos is set as an argument to |
---|
| 118 | // the kernel, and also used to read data from the kernel by |
---|
| 119 | // enqueueing a read command. It will represent the updated |
---|
| 120 | // values of positions after running once the kernel; |
---|
| 121 | |
---|
| 122 | cl_mem updatedVel; // This is a memory buffer representing the poelocity of |
---|
| 123 | // partciles. It is linked with the application buffer |
---|
| 124 | // for velocities "vel". updatedVel is set as an argument to |
---|
| 125 | // the kernel, and also used to read data from the kernel by |
---|
| 126 | // enqueueing a read command. It will represent the updated |
---|
| 127 | // values of velocities after running once the kernel; |
---|
| 128 | |
---|
| 129 | cl_command_queue commandQueue; // CL command queue; |
---|
| 130 | cl_program program; // CL program; |
---|
| 131 | cl_kernel kernel; // CL kernel; |
---|
| 132 | |
---|
| 133 | cl_int numParticles; // Number of particles in the system; |
---|
| 134 | |
---|
| 135 | cl_float espSqr; // Softening Factor; |
---|
| 136 | cl_float delT; // dT (timestep); |
---|
| 137 | cl_double curr_time_step; // Reatins the current time step in the simulation; |
---|
| 138 | cl_double end_time; // The total time (end time) of the simulation; |
---|
| 139 | cl_long taken_steps; // Tne number of steps taken by now (at the curr_time_step); |
---|
| 140 | cl_double dt_snap; // The interval between two snapshots; |
---|
| 141 | cl_double dt_diag; // The interval between two diagnostics; |
---|
| 142 | cl_double curr_snap_time; // This is the current time that must be reached by |
---|
| 143 | // curr_time_step in order to take a snapshot. After this, |
---|
| 144 | // curr_snap_time will be incremented by dt_snap; |
---|
| 145 | cl_double curr_diag_time; // This is the current time that must be reached by |
---|
| 146 | // curr_time_step in order to make a diagnostication of the |
---|
| 147 | // system. After this, curr_diag_time will be incremented by dt_diag; |
---|
| 148 | |
---|
| 149 | cl_double Etot_init; // Will retain the total energy of the system at the beggining |
---|
| 150 | // of the simulation. It is used to measure the convergence |
---|
| 151 | // (error) of the simulation; |
---|
| 152 | |
---|
| 153 | bool initFlag; // This flag variable is used to make some initialisations of |
---|
| 154 | // variables when the simulation starts. For example if |
---|
| 155 | // initFlag is true, than we are starting the simulation and |
---|
| 156 | // compute "Etot_init" energy. After this first step of simulation |
---|
| 157 | // it becomes false; |
---|
| 158 | |
---|
| 159 | const char * kernelFileName; // The name of the file that contains the kernel code; |
---|
| 160 | const char * kernelFunctionName;// The name of the __kernel function in the kernel file; |
---|
| 161 | |
---|
| 162 | //================================================ |
---|
| 163 | // Variables specific only to Leapfrog integrator: |
---|
| 164 | |
---|
| 165 | cl_float* acc; // This is the buffer for accelerations that resides on the host |
---|
| 166 | // side of the application. It is linked with the memory |
---|
| 167 | // buffer for accelerations "updatedAcc"; |
---|
| 168 | |
---|
| 169 | cl_mem updatedAcc; // This is a memory buffer representing the acceleration of |
---|
| 170 | // partciles. It is linked with the application buffer |
---|
| 171 | // for acc "acc". updatedAcc is set as an argument to |
---|
| 172 | // the kernel, and also used to read data from the kernel by |
---|
| 173 | // enqueueing a read command. It will represent the updated |
---|
| 174 | // values of accelerations after running once the kernel; |
---|
| 175 | cl_mem updatedCollTime; |
---|
| 176 | cl_float* collTime; |
---|
| 177 | |
---|
| 178 | cl_int initialAccComputation; |
---|
| 179 | |
---|
| 180 | |
---|
| 181 | |
---|
| 182 | |
---|
| 183 | private: |
---|
| 184 | float random(float randMax, float randMin); |
---|
| 185 | |
---|
| 186 | public: |
---|
| 187 | /** |
---|
| 188 | * Constructor |
---|
| 189 | * Initialize member variables |
---|
| 190 | * @param name name of sample (string) |
---|
| 191 | */ |
---|
| 192 | explicit NBody(std::string name) |
---|
| 193 | { |
---|
| 194 | setupTimeCL = 0; |
---|
| 195 | kernelTime = 0; |
---|
| 196 | delT = 0.005f; |
---|
| 197 | espSqr = 50.0f; |
---|
| 198 | initPos = NULL; |
---|
| 199 | initVel = NULL; |
---|
| 200 | pos = NULL; |
---|
| 201 | vel = NULL; |
---|
| 202 | devices = NULL; |
---|
| 203 | maxWorkItemSizes = NULL; |
---|
| 204 | kernelFileName = "Kernels\\simpleIntegratorKernel.cl"; |
---|
| 205 | kernelFunctionName = "simple_integrator"; |
---|
| 206 | numParticles = 30; |
---|
| 207 | curr_time_step = 0; |
---|
| 208 | end_time = 10; |
---|
| 209 | initFlag = true; |
---|
| 210 | dt_snap = 0.1; |
---|
| 211 | dt_diag = 0.1; |
---|
| 212 | curr_snap_time = dt_snap; |
---|
| 213 | curr_diag_time = dt_diag; |
---|
| 214 | taken_steps = 0; |
---|
| 215 | } |
---|
| 216 | |
---|
| 217 | /** |
---|
| 218 | * Constructor |
---|
| 219 | * Initialize member variables |
---|
| 220 | * @param name name of sample (const char*) |
---|
| 221 | */ |
---|
| 222 | explicit NBody(const char* name) |
---|
| 223 | { |
---|
| 224 | setupTimeCL = 0; |
---|
| 225 | kernelTime = 0; |
---|
| 226 | delT = 0.01f; |
---|
| 227 | espSqr = 50.0f; |
---|
| 228 | initPos = NULL; |
---|
| 229 | initVel = NULL; |
---|
| 230 | pos = NULL; |
---|
| 231 | vel = NULL; |
---|
| 232 | devices = NULL; |
---|
| 233 | maxWorkItemSizes = NULL; |
---|
| 234 | if (integrator == LEAPFROG){ |
---|
| 235 | kernelFileName = "Kernels\\leapfrogIntegratorKernel.cl"; |
---|
| 236 | kernelFunctionName = "leapfrog_integrator"; |
---|
| 237 | }else{ |
---|
| 238 | kernelFileName = "Kernels\\simpleIntegratorKernel.cl"; |
---|
| 239 | kernelFunctionName = "simple_integrator"; |
---|
| 240 | } |
---|
| 241 | numParticles = NUM_PARTICLES; |
---|
| 242 | curr_time_step = 0; |
---|
| 243 | end_time = END_TIME; |
---|
| 244 | initFlag = true; |
---|
| 245 | dt_snap = 1.0; |
---|
| 246 | dt_diag = 1.0; |
---|
| 247 | curr_snap_time = dt_snap; |
---|
| 248 | curr_diag_time = dt_diag; |
---|
| 249 | taken_steps = 0; |
---|
| 250 | } |
---|
| 251 | |
---|
| 252 | ~NBody(); |
---|
| 253 | |
---|
| 254 | /** |
---|
| 255 | * Returns information about the device on which the |
---|
| 256 | * simulation will run. Also some variables will be init |
---|
| 257 | * here based on the device information |
---|
| 258 | * @return 1 on success and 0 on failure |
---|
| 259 | */ |
---|
| 260 | int getDeviceInfo(); |
---|
| 261 | |
---|
| 262 | /** |
---|
| 263 | * Allocate and initialize host memory array with random values |
---|
| 264 | * @return 1 on success and 0 on failure |
---|
| 265 | */ |
---|
| 266 | int setupNBody(); |
---|
| 267 | |
---|
| 268 | /** |
---|
| 269 | * OpenCL related initialisations. |
---|
| 270 | * Set up Context, Device list, Command Queue, Memory buffers |
---|
| 271 | * @return 1 on success and 0 on failure |
---|
| 272 | */ |
---|
| 273 | int setupCL(); |
---|
| 274 | |
---|
| 275 | /** |
---|
| 276 | * Build CL kernel program executable |
---|
| 277 | * @return 1 on success and 0 on failure |
---|
| 278 | */ |
---|
| 279 | int setupCLProgram(); |
---|
| 280 | |
---|
| 281 | /** |
---|
| 282 | * Build kernels and set values for kernels' arguments |
---|
| 283 | * @return 1 on success and 0 on failure |
---|
| 284 | */ |
---|
| 285 | int setupCLKernels(); |
---|
| 286 | |
---|
| 287 | /** |
---|
| 288 | * Enqueue calls to the kernels |
---|
| 289 | * on to the command queue, wait till end of kernel execution. |
---|
| 290 | * Get kernel start and end time if timing is enabled |
---|
| 291 | * @return 1 on success and 0 on failure |
---|
| 292 | */ |
---|
| 293 | int runCLKernels(); |
---|
| 294 | |
---|
| 295 | /** |
---|
| 296 | * Load a .cl source file as a char* and it will be used |
---|
| 297 | * for as a parameter for creating a program |
---|
| 298 | * @return the char vector |
---|
| 299 | */ |
---|
| 300 | char * load_program_source(const char *filename); |
---|
| 301 | |
---|
| 302 | /** |
---|
| 303 | * Override from SDKSample |
---|
| 304 | * Run OpenCL NBody |
---|
| 305 | */ |
---|
| 306 | int run(); |
---|
| 307 | |
---|
| 308 | /** |
---|
| 309 | * Override from SDKSample |
---|
| 310 | * Cleanup memory allocations |
---|
| 311 | */ |
---|
| 312 | int cleanup(); |
---|
| 313 | |
---|
| 314 | /** |
---|
| 315 | * Writes a single snapshot on the output snapshot file. |
---|
| 316 | */ |
---|
| 317 | void put_snapshot(); |
---|
| 318 | |
---|
| 319 | /** |
---|
| 320 | * Writes diagnostics on the diagnostics file: |
---|
| 321 | * current time; number of integration steps so far; |
---|
| 322 | * kinetic, potential, and total energy; absolute and |
---|
| 323 | * relative energy errors since the start of the run. |
---|
| 324 | */ |
---|
| 325 | void write_diagnostics(); |
---|
| 326 | |
---|
| 327 | /** |
---|
| 328 | * Computes the potential energy of a single particle. |
---|
| 329 | */ |
---|
| 330 | float epot_particle(int currPart); |
---|
| 331 | |
---|
| 332 | int runCLKernelLeapfrog(); |
---|
| 333 | |
---|
| 334 | int enqueueKernel(); |
---|
| 335 | |
---|
| 336 | }; |
---|
| 337 | |
---|
| 338 | #endif // NBODY_H_ |
---|