#include "NBody.hpp"

extern int numBodies;

/////////////////////////////////// NBody::runCLKernels Func \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
//
// Enqueue calls to the kernels on to the command queue, 
// wait till end of kernel execution.
//
/////////////////////////////////// NBody::runCLKernels Func \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\

int NBody::runCLKernels()
{
    cl_int   status;
    cl_event events[1];

    /* 
     * Enqueue a kernel run call.
     */
    size_t globalThreads[] = {numBodies};
    size_t localThreads[] = {GROUP_SIZE};

	if(localThreads[0] > maxWorkItemSizes[0] || localThreads[0] > maxWorkGroupSize)
	{
		std::cout<<"Unsupported: Device does not support requested number of work items.";
				 
		return 1;
	}

    status = clEnqueueNDRangeKernel(
                 commandQueue,
                 kernel,
                 1,
                 NULL,
                 globalThreads,
                 localThreads,
                 0,
                 NULL,
                 NULL);

	if(status != CL_SUCCESS)
    {
        std::cout << "clEnqueueNDRangeKernel failed." << std::endl;
        return 1;
    }

	status = clFinish(commandQueue);
	if(status != CL_SUCCESS)
    {
        std::cout << "clFinish failed." << std::endl;
        return 1;
    }

    /* Enqueue readBuffer*/
    status = clEnqueueReadBuffer(
                commandQueue,
                updatedPos,
                CL_TRUE,
                0,
                numBodies* sizeof(cl_float4),
                pos,
                0,
                NULL,
                &events[0]);

	if(status != CL_SUCCESS)
    {
        std::cout << "clEnqueueReadBuffer failed." << std::endl;
        return 1;
    }
    
    /* Wait for the read buffer to finish execution */
    status = clWaitForEvents(1, &events[0]);
	if(status != CL_SUCCESS)
    {
        std::cout << "clWaitForEvents failed." << std::endl;
        return 1;
    }
    
    clReleaseEvent(events[0]);

    return 0;
}


int NBody::runCLKernelLeapfrog()
{
    //cl_int   status;
    //cl_event events[1];

	if (initialAccComputation){
		enqueueKernel();
		initialAccComputation = 0;
	}

	float min = collTime[0];

	for (int i = 1; i < numParticles; i++){
		if (min > collTime[i])
			min = collTime[i];
	}
	
	cl_float dt = (float)delT ;//* min;

	// Compute the partial v_(n+1) and x_(n+1):
	for (int i = 0; i < numParticles; i++){

		int idx = i*4;

        for (int k = 0; k < 3; k++)
			vel[idx+k] += acc[idx+k]*dt/2;

	    for (int k = 0; k < 3; k++)
	    	pos[idx+k] += vel[idx+k] * dt;
	}


	enqueueKernel();
 
	min = collTime[0];

	for (int i = 1; i < numParticles; i++){
		if (min > collTime[i])
			min = collTime[i];
	}
	
	dt = (float)delT ;//* min;

	// Now we compute the rest of the v_(n+1):
	for (int i = 0; i < numParticles; i++){
		
		int idx = i*4;

		for (int k = 0; k < 3; k++)
            vel[idx+k] += acc[idx+k] * dt/2;
   		}

    return 0;
}

int NBody::enqueueKernel()
{
	cl_int   status;
    cl_event events[1];

	/* 
     * Enqueue a kernel run call.
     */
    size_t globalThreads[] = {numBodies};
    size_t localThreads[] = {GROUP_SIZE};

	if(localThreads[0] > maxWorkItemSizes[0] || localThreads[0] > maxWorkGroupSize)
	{
		std::cout << "Unsupported: Device does not support requested number of work items." << std::endl;
				 
		return 1;
	}


	status = clEnqueueNDRangeKernel(
                 commandQueue,
                 kernel,
                 1,
                 NULL,
                 globalThreads,
                 localThreads,
                 0,
                 NULL,
                 NULL);

	if(status != CL_SUCCESS)
    {
        std::cout << "clEnqueueNDRangeKernel failed." << std::endl;
        return 1;
    }

	status = clFinish(commandQueue);
	if(status != CL_SUCCESS)
    {
        std::cout << "clFinish failed." << std::endl;
        return 1;
    }

    /* Enqueue readBuffer*/
    status = clEnqueueReadBuffer(
                commandQueue,
                updatedAcc,
                CL_TRUE,
                0,
                numBodies* sizeof(cl_float4),
                acc,
                0,
                NULL,
                &events[0]);

	if(status != CL_SUCCESS)
    {
        std::cout << "clEnqueueReadBuffer failed." << std::endl;
        return 1;
    }
    
    /* Wait for the read buffer to finish execution */
    status = clWaitForEvents(1, &events[0]);
	if(status != CL_SUCCESS)
    {
        std::cout << "clWaitForEvents failed." << std::endl;
        return 1;
    }

	/* Enqueue readBuffer*/
    status = clEnqueueReadBuffer(
                commandQueue,
                updatedCollTime,
                CL_TRUE,
                0,
                numBodies* sizeof(cl_float),
                collTime,
                0,
                NULL,
                &events[0]);

	if(status != CL_SUCCESS)
    {
        std::cout << "clEnqueueReadBuffer failed." << std::endl;
        return 1;
    }
    
    /* Wait for the read buffer to finish execution */
    status = clWaitForEvents(1, &events[0]);
	if(status != CL_SUCCESS)
    {
        std::cout << "clWaitForEvents failed." << std::endl;
        return 1;
    }
    
    clReleaseEvent(events[0]);
}