#ifndef NBODY_H_ #define NBODY_H_ // standard utility and system includes #include // GLEW and GLUT includes #include #if defined (__APPLE__) || defined(MACOSX) #include #else #include #endif // Extra CL/GL include #include #include #include #include #include #include #include #include #include #include #include #define GROUP_SIZE 64 #define NUM_PARTICLES 30000 #define END_TIME 2 //---------------------- #define TAKE_DIAGNOSTICS 0 #define DO_NOT_TAKE_DIAGNOSTICS 1 #define TAKE_SNAPSHOTS 2 #define DO_NOT_TAKE_SNAPSHOTS 3 static int takeDiagnostics = DO_NOT_TAKE_DIAGNOSTICS; static int takeSnapshots = DO_NOT_TAKE_SNAPSHOTS; //---------------------- #define SIMPLE 0 #define LEAPFROG 1 static int integrator = 0; //---------------------- static bool displayOpenGL = true; // If it is true then OpenGL display is used; static bool readInputData = false; // If true then the particles information will be read // from an input file. If it is false, then the information // is generated via random functions; static std::string deviceType("gpu"); // It says on which device we want to do the computations; static time_t rawtime; // Variables used for measuring the duration of the run; static struct tm * timeinfo; static char* inputDataFileName = "Input-Output\\input_64.txt"; // Represents the name of the file which contains the // information about particles (mass, position, vel); static char* snapshotFileName = "Input-Output\\snapshot.txt"; // Represents the name of the file in which we will take // snapshots of the current system when we require. static char* diagnosticFileName = "Input-Output\\diagnostic.txt"; // Represents the name of the file in which we will // write information about the energy conservation, // from time to time; //---------------------- /** * NBody * Class implements OpenCL NBody sample * */ class NBody { public: //================================================ // General variables used for all integrators cl_double setupTimeCL; // Time taken to setup OpenCL resources and building kernel; cl_double kernelTime; // Time taken to run kernel and read result back; cl_context context; // CL context; cl_device_id *devices; // CL device list; size_t maxWorkGroupSize; // Max allowed work-items in a group; cl_uint maxDimensions; // Max group dimensions allowed; size_t* maxWorkItemSizes; // Max work-items sizes in each dimensions; cl_ulong totalLocalMemory; // Max local memory allowed; cl_ulong usedLocalMemory; // Used local memory; cl_float* initPos; // Initial position. Used to hold the initial particle position; cl_float* initVel; // Initial velocity. Used to hold the initial particle velocity; cl_float* pos; // This is the buffer for positions that resides on the host // side of the application. It is linked with the memory // buffer for positions "updatedPos"; cl_float* vel; // This is the buffer for velocities that resides on the host // side of the application. It is linked with the memory // buffer for velocities "updatedVel"; cl_mem updatedPos; // This is a memory buffer representing the position of // partciles. It is linked with the application buffer // for positions "pos". updatedPos is set as an argument to // the kernel, and also used to read data from the kernel by // enqueueing a read command. It will represent the updated // values of positions after running once the kernel; cl_mem updatedVel; // This is a memory buffer representing the poelocity of // partciles. It is linked with the application buffer // for velocities "vel". updatedVel is set as an argument to // the kernel, and also used to read data from the kernel by // enqueueing a read command. It will represent the updated // values of velocities after running once the kernel; cl_command_queue commandQueue; // CL command queue; cl_program program; // CL program; cl_kernel kernel; // CL kernel; cl_int numParticles; // Number of particles in the system; cl_float espSqr; // Softening Factor; cl_float delT; // dT (timestep); cl_double curr_time_step; // Reatins the current time step in the simulation; cl_double end_time; // The total time (end time) of the simulation; cl_long taken_steps; // Tne number of steps taken by now (at the curr_time_step); cl_double dt_snap; // The interval between two snapshots; cl_double dt_diag; // The interval between two diagnostics; cl_double curr_snap_time; // This is the current time that must be reached by // curr_time_step in order to take a snapshot. After this, // curr_snap_time will be incremented by dt_snap; cl_double curr_diag_time; // This is the current time that must be reached by // curr_time_step in order to make a diagnostication of the // system. After this, curr_diag_time will be incremented by dt_diag; cl_double Etot_init; // Will retain the total energy of the system at the beggining // of the simulation. It is used to measure the convergence // (error) of the simulation; bool initFlag; // This flag variable is used to make some initialisations of // variables when the simulation starts. For example if // initFlag is true, than we are starting the simulation and // compute "Etot_init" energy. After this first step of simulation // it becomes false; const char * kernelFileName; // The name of the file that contains the kernel code; const char * kernelFunctionName;// The name of the __kernel function in the kernel file; //================================================ // Variables specific only to Leapfrog integrator: cl_float* acc; // This is the buffer for accelerations that resides on the host // side of the application. It is linked with the memory // buffer for accelerations "updatedAcc"; cl_mem updatedAcc; // This is a memory buffer representing the acceleration of // partciles. It is linked with the application buffer // for acc "acc". updatedAcc is set as an argument to // the kernel, and also used to read data from the kernel by // enqueueing a read command. It will represent the updated // values of accelerations after running once the kernel; cl_mem updatedCollTime; cl_float* collTime; cl_int initialAccComputation; private: float random(float randMax, float randMin); public: /** * Constructor * Initialize member variables * @param name name of sample (string) */ explicit NBody(std::string name) { setupTimeCL = 0; kernelTime = 0; delT = 0.005f; espSqr = 50.0f; initPos = NULL; initVel = NULL; pos = NULL; vel = NULL; devices = NULL; maxWorkItemSizes = NULL; kernelFileName = "Kernels\\simpleIntegratorKernel.cl"; kernelFunctionName = "simple_integrator"; numParticles = 30; curr_time_step = 0; end_time = 10; initFlag = true; dt_snap = 0.1; dt_diag = 0.1; curr_snap_time = dt_snap; curr_diag_time = dt_diag; taken_steps = 0; } /** * Constructor * Initialize member variables * @param name name of sample (const char*) */ explicit NBody(const char* name) { setupTimeCL = 0; kernelTime = 0; delT = 0.01f; espSqr = 50.0f; initPos = NULL; initVel = NULL; pos = NULL; vel = NULL; devices = NULL; maxWorkItemSizes = NULL; if (integrator == LEAPFROG){ kernelFileName = "Kernels\\leapfrogIntegratorKernel.cl"; kernelFunctionName = "leapfrog_integrator"; }else{ kernelFileName = "Kernels\\simpleIntegratorKernel.cl"; kernelFunctionName = "simple_integrator"; } numParticles = NUM_PARTICLES; curr_time_step = 0; end_time = END_TIME; initFlag = true; dt_snap = 1.0; dt_diag = 1.0; curr_snap_time = dt_snap; curr_diag_time = dt_diag; taken_steps = 0; } ~NBody(); /** * Returns information about the device on which the * simulation will run. Also some variables will be init * here based on the device information * @return 1 on success and 0 on failure */ int getDeviceInfo(); /** * Allocate and initialize host memory array with random values * @return 1 on success and 0 on failure */ int setupNBody(); /** * OpenCL related initialisations. * Set up Context, Device list, Command Queue, Memory buffers * @return 1 on success and 0 on failure */ int setupCL(); /** * Build CL kernel program executable * @return 1 on success and 0 on failure */ int setupCLProgram(); /** * Build kernels and set values for kernels' arguments * @return 1 on success and 0 on failure */ int setupCLKernels(); /** * Enqueue calls to the kernels * on to the command queue, wait till end of kernel execution. * Get kernel start and end time if timing is enabled * @return 1 on success and 0 on failure */ int runCLKernels(); /** * Load a .cl source file as a char* and it will be used * for as a parameter for creating a program * @return the char vector */ char * load_program_source(const char *filename); /** * Override from SDKSample * Run OpenCL NBody */ int run(); /** * Override from SDKSample * Cleanup memory allocations */ int cleanup(); /** * Writes a single snapshot on the output snapshot file. */ void put_snapshot(); /** * Writes diagnostics on the diagnostics file: * current time; number of integration steps so far; * kinetic, potential, and total energy; absolute and * relative energy errors since the start of the run. */ void write_diagnostics(); /** * Computes the potential energy of a single particle. */ float epot_particle(int currPart); int runCLKernelLeapfrog(); int enqueueKernel(); }; #endif // NBODY_H_