Refactoring of tiles opencl implementation:

- Moved methods from NodeOperation to OpenCLDevice
- Added check on Nvidia for local size
This commit is contained in:
2012-06-20 20:05:21 +00:00
parent cf129d8cb5
commit 82bad4bd6c
14 changed files with 185 additions and 171 deletions

View File

@@ -23,11 +23,7 @@
#ifndef _COM_Device_h
#define _COM_Device_h
#include "COM_ExecutionSystem.h"
#include "COM_WorkPackage.h"
#include "COM_NodeOperation.h"
#include "BLI_rect.h"
#include "COM_MemoryBuffer.h"
/**
* @brief Abstract class for device implementations to be used by the Compositor.

View File

@@ -29,6 +29,7 @@
#include "COM_CompositorContext.h"
#include "DNA_node_types.h"
#include "BKE_text.h"
#include "COM_ExecutionSystem.h"
#include <vector>
#include <string>

View File

@@ -140,118 +140,3 @@ bool NodeOperation::determineDependingAreaOfInterest(rcti *input, ReadBufferOper
return false;
}
}
cl_mem NodeOperation::COM_clAttachMemoryBufferToKernelParameter(cl_context context, cl_kernel kernel, int parameterIndex, int offsetIndex, list<cl_mem> *cleanup, MemoryBuffer **inputMemoryBuffers, SocketReader *reader)
{
cl_int error;
MemoryBuffer *result = (MemoryBuffer *)reader->initializeTileData(NULL, inputMemoryBuffers);
const cl_image_format imageFormat = {
CL_RGBA,
CL_FLOAT
};
cl_mem clBuffer = clCreateImage2D(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &imageFormat, result->getWidth(),
result->getHeight(), 0, result->getBuffer(), &error);
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
if (error == CL_SUCCESS) cleanup->push_back(clBuffer);
error = clSetKernelArg(kernel, parameterIndex, sizeof(cl_mem), &clBuffer);
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
COM_clAttachMemoryBufferOffsetToKernelParameter(kernel, offsetIndex, result);
return clBuffer;
}
void NodeOperation::COM_clAttachMemoryBufferOffsetToKernelParameter(cl_kernel kernel, int offsetIndex, MemoryBuffer *memoryBuffer)
{
if (offsetIndex != -1) {
cl_int error;
rcti *rect = memoryBuffer->getRect();
cl_int2 offset = {rect->xmin, rect->ymin};
error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset);
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
}
}
void NodeOperation::COM_clAttachSizeToKernelParameter(cl_kernel kernel, int offsetIndex)
{
if (offsetIndex != -1) {
cl_int error;
cl_int2 offset = {this->getWidth(), this->getHeight()};
error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset);
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
}
}
void NodeOperation::COM_clAttachOutputMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, cl_mem clOutputMemoryBuffer)
{
cl_int error;
error = clSetKernelArg(kernel, parameterIndex, sizeof(cl_mem), &clOutputMemoryBuffer);
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
}
void NodeOperation::COM_clEnqueueRange(cl_command_queue queue, cl_kernel kernel, MemoryBuffer *outputMemoryBuffer)
{
cl_int error;
const size_t size[] = {outputMemoryBuffer->getWidth(), outputMemoryBuffer->getHeight()};
error = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, size, 0, 0, 0, NULL);
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
}
void NodeOperation::COM_clEnqueueRange(cl_command_queue queue, cl_kernel kernel, MemoryBuffer *outputMemoryBuffer, int offsetIndex)
{
cl_int error;
const int width = outputMemoryBuffer->getWidth();
const int height = outputMemoryBuffer->getHeight();
int offsetx;
int offsety;
const int localSize = 32;
size_t size[2];
cl_int2 offset;
bool breaked = false;
for (offsety = 0; offsety < height && (!breaked); offsety += localSize) {
offset[1] = offsety;
if (offsety + localSize < height) {
size[1] = localSize;
}
else {
size[1] = height - offsety;
}
for (offsetx = 0; offsetx < width && (!breaked); offsetx += localSize) {
if (offsetx + localSize < width) {
size[0] = localSize;
}
else {
size[0] = width - offsetx;
}
offset[0] = offsetx;
error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset);
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
error = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, size, 0, 0, 0, NULL);
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
clFlush(queue);
if (isBreaked()) {
breaked = false;
}
}
}
}
cl_kernel NodeOperation::COM_clCreateKernel(cl_program program, const char *kernelname, list<cl_kernel> *clKernelsToCleanUp)
{
cl_int error;
cl_kernel kernel = clCreateKernel(program, kernelname, &error);
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
else {
if (clKernelsToCleanUp) clKernelsToCleanUp->push_back(kernel);
}
return kernel;
}

View File

@@ -22,9 +22,7 @@
#ifndef _COM_NodeOperation_h
#define _COM_NodeOperation_h
class NodeOperation;
class OpenCLDevice;
#include "COM_Node.h"
#include <string>
#include <sstream>
@@ -150,7 +148,7 @@ public:
* @param memoryBuffers all input MemoryBuffer's needed
* @param outputBuffer the outputbuffer to write to
*/
virtual void executeOpenCLRegion(cl_context context, cl_program program, cl_command_queue queue, rcti *rect,
virtual void executeOpenCLRegion(OpenCLDevice* device, rcti *rect,
unsigned int chunkNumber, MemoryBuffer **memoryBuffers, MemoryBuffer *outputBuffer) {}
/**
@@ -165,7 +163,7 @@ public:
* @param clMemToCleanUp all created cl_mem references must be added to this list. Framework will clean this after execution
* @param clKernelsToCleanUp all created cl_kernel references must be added to this list. Framework will clean this after execution
*/
virtual void executeOpenCL(cl_context context, cl_program program, cl_command_queue queue, MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer, MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp, list<cl_kernel> *clKernelsToCleanUp) {}
virtual void executeOpenCL(OpenCLDevice* device, MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer, MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp, list<cl_kernel> *clKernelsToCleanUp) {}
virtual void deinitExecution();
bool isResolutionSet() {
@@ -272,15 +270,6 @@ protected:
* @brief set if this NodeOperation can be scheduled on a OpenCLDevice
*/
void setOpenCL(bool openCL) { this->openCL = openCL; }
static cl_mem COM_clAttachMemoryBufferToKernelParameter(cl_context context, cl_kernel kernel, int parameterIndex, int offsetIndex, list<cl_mem> *cleanup, MemoryBuffer **inputMemoryBuffers, SocketReader *reader);
static void COM_clAttachMemoryBufferOffsetToKernelParameter(cl_kernel kernel, int offsetIndex, MemoryBuffer *memoryBuffers);
static void COM_clAttachOutputMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, cl_mem clOutputMemoryBuffer);
void COM_clAttachSizeToKernelParameter(cl_kernel kernel, int offsetIndex);
static void COM_clEnqueueRange(cl_command_queue queue, cl_kernel kernel, MemoryBuffer *outputMemoryBuffer);
void COM_clEnqueueRange(cl_command_queue queue, cl_kernel kernel, MemoryBuffer *outputMemoryBuffer, int offsetIndex);
cl_kernel COM_clCreateKernel(cl_program program, const char *kernelname, list<cl_kernel> *clKernelsToCleanUp);
};
#endif

View File

@@ -23,13 +23,15 @@
#include "COM_OpenCLDevice.h"
#include "COM_WorkScheduler.h"
typedef enum COM_VendorID {NVIDIA=0x10DE, AMD=0x1002} COM_VendorID;
OpenCLDevice::OpenCLDevice(cl_context context, cl_device_id device, cl_program program)
OpenCLDevice::OpenCLDevice(cl_context context, cl_device_id device, cl_program program, cl_int vendorId)
{
this->device = device;
this->context = context;
this->program = program;
this->queue = NULL;
this->vendorID = vendorId;
}
bool OpenCLDevice::initialize()
@@ -56,10 +58,126 @@ void OpenCLDevice::execute(WorkPackage *work)
MemoryBuffer **inputBuffers = executionGroup->getInputBuffersOpenCL(chunkNumber);
MemoryBuffer *outputBuffer = executionGroup->allocateOutputBuffer(chunkNumber, &rect);
executionGroup->getOutputNodeOperation()->executeOpenCLRegion(this->context, this->program, this->queue, &rect,
executionGroup->getOutputNodeOperation()->executeOpenCLRegion(this, &rect,
chunkNumber, inputBuffers, outputBuffer);
delete outputBuffer;
executionGroup->finalizeChunkExecution(chunkNumber, inputBuffers);
}
cl_mem OpenCLDevice::COM_clAttachMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, int offsetIndex, list<cl_mem> *cleanup, MemoryBuffer **inputMemoryBuffers, SocketReader *reader)
{
cl_int error;
MemoryBuffer *result = (MemoryBuffer *)reader->initializeTileData(NULL, inputMemoryBuffers);
const cl_image_format imageFormat = {
CL_RGBA,
CL_FLOAT
};
cl_mem clBuffer = clCreateImage2D(this->context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &imageFormat, result->getWidth(),
result->getHeight(), 0, result->getBuffer(), &error);
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
if (error == CL_SUCCESS) cleanup->push_back(clBuffer);
error = clSetKernelArg(kernel, parameterIndex, sizeof(cl_mem), &clBuffer);
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
COM_clAttachMemoryBufferOffsetToKernelParameter(kernel, offsetIndex, result);
return clBuffer;
}
void OpenCLDevice::COM_clAttachMemoryBufferOffsetToKernelParameter(cl_kernel kernel, int offsetIndex, MemoryBuffer *memoryBuffer)
{
if (offsetIndex != -1) {
cl_int error;
rcti *rect = memoryBuffer->getRect();
cl_int2 offset = {rect->xmin, rect->ymin};
error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset);
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
}
}
void OpenCLDevice::COM_clAttachSizeToKernelParameter(cl_kernel kernel, int offsetIndex, NodeOperation* operation)
{
if (offsetIndex != -1) {
cl_int error;
cl_int2 offset = {operation->getWidth(), operation->getHeight()};
error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset);
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
}
}
void OpenCLDevice::COM_clAttachOutputMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, cl_mem clOutputMemoryBuffer)
{
cl_int error;
error = clSetKernelArg(kernel, parameterIndex, sizeof(cl_mem), &clOutputMemoryBuffer);
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
}
void OpenCLDevice::COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemoryBuffer)
{
cl_int error;
const size_t size[] = {outputMemoryBuffer->getWidth(), outputMemoryBuffer->getHeight()};
error = clEnqueueNDRangeKernel(this->queue, kernel, 2, NULL, size, 0, 0, 0, NULL);
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
}
void OpenCLDevice::COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemoryBuffer, int offsetIndex, NodeOperation* operation)
{
cl_int error;
const int width = outputMemoryBuffer->getWidth();
const int height = outputMemoryBuffer->getHeight();
int offsetx;
int offsety;
int localSize = 1024;
size_t size[2];
cl_int2 offset;
if (this->vendorID == NVIDIA){localSize = 32;}
bool breaked = false;
for (offsety = 0; offsety < height && (!breaked); offsety += localSize) {
offset[1] = offsety;
if (offsety + localSize < height) {
size[1] = localSize;
}
else {
size[1] = height - offsety;
}
for (offsetx = 0; offsetx < width && (!breaked); offsetx += localSize) {
if (offsetx + localSize < width) {
size[0] = localSize;
}
else {
size[0] = width - offsetx;
}
offset[0] = offsetx;
error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset);
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
error = clEnqueueNDRangeKernel(this->queue, kernel, 2, NULL, size, 0, 0, 0, NULL);
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
clFlush(this->queue);
if (operation->isBreaked()) {
breaked = false;
}
}
}
}
cl_kernel OpenCLDevice::COM_clCreateKernel(const char *kernelname, list<cl_kernel> *clKernelsToCleanUp)
{
cl_int error;
cl_kernel kernel = clCreateKernel(this->program, kernelname, &error);
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
else {
if (clKernelsToCleanUp) clKernelsToCleanUp->push_back(kernel);
}
return kernel;
}

View File

@@ -29,7 +29,6 @@ class OpenCLDevice;
#include "OCL_opencl.h"
#include "COM_WorkScheduler.h"
/**
* @brief device representing an GPU OpenCL device.
* an instance of this class represents a single cl_device
@@ -55,13 +54,21 @@ private:
* @brief opencl command queue
*/
cl_command_queue queue;
/**
* @brief opencl vendor ID
*/
cl_int vendorID;
public:
/**
* @brief constructor with opencl device
* @param context
* @param device
* @param program
* @param vendorID
*/
OpenCLDevice(cl_context context, cl_device_id device, cl_program program);
OpenCLDevice(cl_context context, cl_device_id device, cl_program program, cl_int vendorId);
/**
@@ -83,6 +90,18 @@ public:
* @param work the WorkPackage to execute
*/
void execute(WorkPackage *work);
cl_context getContext(){return this->context;}
cl_command_queue getQueue(){return this->queue;}
cl_mem COM_clAttachMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, int offsetIndex, list<cl_mem> *cleanup, MemoryBuffer **inputMemoryBuffers, SocketReader *reader);
void COM_clAttachMemoryBufferOffsetToKernelParameter(cl_kernel kernel, int offsetIndex, MemoryBuffer *memoryBuffers);
void COM_clAttachOutputMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, cl_mem clOutputMemoryBuffer);
void COM_clAttachSizeToKernelParameter(cl_kernel kernel, int offsetIndex, NodeOperation* operation);
void COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemoryBuffer);
void COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemoryBuffer, int offsetIndex, NodeOperation* operation);
cl_kernel COM_clCreateKernel(const char *kernelname, list<cl_kernel> *clKernelsToCleanUp);
};
#endif

View File

@@ -24,7 +24,7 @@ class WorkPackage;
#ifndef _COM_WorkPackage_h_
#define _COM_WorkPackage_h_
class ExecutionGroup;
#include "COM_ExecutionGroup.h"
/**

View File

@@ -257,7 +257,10 @@ void WorkScheduler::initialize()
unsigned int indexDevices;
for (indexDevices = 0; indexDevices < totalNumberOfDevices; indexDevices++) {
cl_device_id device = cldevices[indexDevices];
OpenCLDevice *clDevice = new OpenCLDevice(context, device, program);
cl_int vendorID = 0;
cl_int error = clGetDeviceInfo(device, CL_DEVICE_VENDOR_ID, sizeof(cl_int), &vendorID, NULL);
if (error!= CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
OpenCLDevice *clDevice = new OpenCLDevice(context, device, program, vendorID);
clDevice->initialize(),
gpudevices.push_back(clDevice);
if (G.f & G_DEBUG) {

View File

@@ -22,6 +22,7 @@
#include "COM_BokehBlurOperation.h"
#include "BLI_math.h"
#include "COM_OpenCLDevice.h"
extern "C" {
#include "RE_pipeline.h"
@@ -160,25 +161,25 @@ bool BokehBlurOperation::determineDependingAreaOfInterest(rcti *input, ReadBuffe
}
static cl_kernel kernel = 0;
void BokehBlurOperation::executeOpenCL(cl_context context, cl_program program, cl_command_queue queue,
void BokehBlurOperation::executeOpenCL(OpenCLDevice* device,
MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer,
MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp,
list<cl_kernel> *clKernelsToCleanUp)
{
if (!kernel) {
kernel = COM_clCreateKernel(program, "bokehBlurKernel", NULL);
kernel = device->COM_clCreateKernel("bokehBlurKernel", NULL);
}
cl_int radius = this->getWidth() * this->size / 100.0f;
cl_int step = this->getStep();
COM_clAttachMemoryBufferToKernelParameter(context, kernel, 0, -1, clMemToCleanUp, inputMemoryBuffers, this->inputBoundingBoxReader);
COM_clAttachMemoryBufferToKernelParameter(context, kernel, 1, 4, clMemToCleanUp, inputMemoryBuffers, this->inputProgram);
COM_clAttachMemoryBufferToKernelParameter(context, kernel, 2, -1, clMemToCleanUp, inputMemoryBuffers, this->inputBokehProgram);
COM_clAttachOutputMemoryBufferToKernelParameter(kernel, 3, clOutputBuffer);
COM_clAttachMemoryBufferOffsetToKernelParameter(kernel, 5, outputMemoryBuffer);
device->COM_clAttachMemoryBufferToKernelParameter(kernel, 0, -1, clMemToCleanUp, inputMemoryBuffers, this->inputBoundingBoxReader);
device->COM_clAttachMemoryBufferToKernelParameter(kernel, 1, 4, clMemToCleanUp, inputMemoryBuffers, this->inputProgram);
device->COM_clAttachMemoryBufferToKernelParameter(kernel, 2, -1, clMemToCleanUp, inputMemoryBuffers, this->inputBokehProgram);
device->COM_clAttachOutputMemoryBufferToKernelParameter(kernel, 3, clOutputBuffer);
device->COM_clAttachMemoryBufferOffsetToKernelParameter(kernel, 5, outputMemoryBuffer);
clSetKernelArg(kernel, 6, sizeof(cl_int), &radius);
clSetKernelArg(kernel, 7, sizeof(cl_int), &step);
COM_clAttachSizeToKernelParameter(kernel, 8);
device->COM_clAttachSizeToKernelParameter(kernel, 8, this);
COM_clEnqueueRange(queue, kernel, outputMemoryBuffer, 9);
device->COM_clEnqueueRange(kernel, outputMemoryBuffer, 9, this);
}

View File

@@ -57,6 +57,6 @@ public:
void setSize(float size) { this->size = size; }
void executeOpenCL(cl_context context, cl_program program, cl_command_queue queue, MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer, MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp, list<cl_kernel> *clKernelsToCleanUp);
void executeOpenCL(OpenCLDevice* device, MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer, MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp, list<cl_kernel> *clKernelsToCleanUp);
};
#endif

View File

@@ -22,6 +22,7 @@
#include "COM_DilateErodeOperation.h"
#include "BLI_math.h"
#include "COM_OpenCLDevice.h"
// DilateErode Distance Threshold
DilateErodeThresholdOperation::DilateErodeThresholdOperation() : NodeOperation()
@@ -234,24 +235,24 @@ bool DilateDistanceOperation::determineDependingAreaOfInterest(rcti *input, Read
}
static cl_kernel dilateKernel = 0;
void DilateDistanceOperation::executeOpenCL(cl_context context, cl_program program, cl_command_queue queue,
void DilateDistanceOperation::executeOpenCL(OpenCLDevice* device,
MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer,
MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp,
list<cl_kernel> *clKernelsToCleanUp)
{
if (!dilateKernel) {
dilateKernel = COM_clCreateKernel(program, "dilateKernel", NULL);
dilateKernel = device->COM_clCreateKernel("dilateKernel", NULL);
}
cl_int distanceSquared = this->distance * this->distance;
cl_int scope = this->scope;
COM_clAttachMemoryBufferToKernelParameter(context, dilateKernel, 0, 2, clMemToCleanUp, inputMemoryBuffers, this->inputProgram);
COM_clAttachOutputMemoryBufferToKernelParameter(dilateKernel, 1, clOutputBuffer);
COM_clAttachMemoryBufferOffsetToKernelParameter(dilateKernel, 3, outputMemoryBuffer);
device->COM_clAttachMemoryBufferToKernelParameter(dilateKernel, 0, 2, clMemToCleanUp, inputMemoryBuffers, this->inputProgram);
device->COM_clAttachOutputMemoryBufferToKernelParameter(dilateKernel, 1, clOutputBuffer);
device->COM_clAttachMemoryBufferOffsetToKernelParameter(dilateKernel, 3, outputMemoryBuffer);
clSetKernelArg(dilateKernel, 4, sizeof(cl_int), &scope);
clSetKernelArg(dilateKernel, 5, sizeof(cl_int), &distanceSquared);
COM_clAttachSizeToKernelParameter(dilateKernel, 6);
COM_clEnqueueRange(queue, dilateKernel, outputMemoryBuffer, 7);
device->COM_clAttachSizeToKernelParameter(dilateKernel, 6, this);
device->COM_clEnqueueRange(dilateKernel, outputMemoryBuffer, 7, this);
}
// Erode Distance
@@ -293,24 +294,24 @@ void ErodeDistanceOperation::executePixel(float *color, int x, int y, MemoryBuff
}
static cl_kernel erodeKernel = 0;
void ErodeDistanceOperation::executeOpenCL(cl_context context, cl_program program, cl_command_queue queue,
void ErodeDistanceOperation::executeOpenCL(OpenCLDevice* device,
MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer,
MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp,
list<cl_kernel> *clKernelsToCleanUp)
{
if (!erodeKernel) {
erodeKernel = COM_clCreateKernel(program, "erodeKernel", NULL);
erodeKernel = device->COM_clCreateKernel("erodeKernel", NULL);
}
cl_int distanceSquared = this->distance * this->distance;
cl_int scope = this->scope;
COM_clAttachMemoryBufferToKernelParameter(context, erodeKernel, 0, 2, clMemToCleanUp, inputMemoryBuffers, this->inputProgram);
COM_clAttachOutputMemoryBufferToKernelParameter(erodeKernel, 1, clOutputBuffer);
COM_clAttachMemoryBufferOffsetToKernelParameter(erodeKernel, 3, outputMemoryBuffer);
device->COM_clAttachMemoryBufferToKernelParameter(erodeKernel, 0, 2, clMemToCleanUp, inputMemoryBuffers, this->inputProgram);
device->COM_clAttachOutputMemoryBufferToKernelParameter(erodeKernel, 1, clOutputBuffer);
device->COM_clAttachMemoryBufferOffsetToKernelParameter(erodeKernel, 3, outputMemoryBuffer);
clSetKernelArg(erodeKernel, 4, sizeof(cl_int), &scope);
clSetKernelArg(erodeKernel, 5, sizeof(cl_int), &distanceSquared);
COM_clAttachSizeToKernelParameter(erodeKernel, 6);
COM_clEnqueueRange(queue, erodeKernel, outputMemoryBuffer, 7);
device->COM_clAttachSizeToKernelParameter(erodeKernel, 6, this);
device->COM_clEnqueueRange(erodeKernel, outputMemoryBuffer, 7, this);
}
// Dilate step

View File

@@ -99,7 +99,7 @@ public:
void setDistance(float distance) { this->distance = distance; }
bool determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output);
void executeOpenCL(cl_context context, cl_program program, cl_command_queue queue,
void executeOpenCL(OpenCLDevice* device,
MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer,
MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp,
list<cl_kernel> *clKernelsToCleanUp);
@@ -113,7 +113,7 @@ public:
*/
void executePixel(float *color, int x, int y, MemoryBuffer * inputBuffers[], void *data);
void executeOpenCL(cl_context context, cl_program program, cl_command_queue queue,
void executeOpenCL(OpenCLDevice* device,
MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer,
MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp,
list<cl_kernel> *clKernelsToCleanUp);

View File

@@ -23,6 +23,7 @@
#include "COM_WriteBufferOperation.h"
#include "COM_defines.h"
#include <stdio.h>
#include "COM_OpenCLDevice.h"
WriteBufferOperation::WriteBufferOperation() : NodeOperation()
{
@@ -110,7 +111,7 @@ void WriteBufferOperation::executeRegion(rcti *rect, unsigned int tileNumber, Me
memoryBuffer->setCreatedState();
}
void WriteBufferOperation::executeOpenCLRegion(cl_context context, cl_program program, cl_command_queue queue, rcti *rect, unsigned int chunkNumber, MemoryBuffer **inputMemoryBuffers, MemoryBuffer *outputBuffer)
void WriteBufferOperation::executeOpenCLRegion(OpenCLDevice* device, rcti *rect, unsigned int chunkNumber, MemoryBuffer **inputMemoryBuffers, MemoryBuffer *outputBuffer)
{
float *outputFloatBuffer = outputBuffer->getBuffer();
cl_int error;
@@ -131,7 +132,7 @@ void WriteBufferOperation::executeOpenCLRegion(cl_context context, cl_program pr
CL_FLOAT
};
cl_mem clOutputBuffer = clCreateImage2D(context, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, &imageFormat, outputBufferWidth, outputBufferHeight, 0, outputFloatBuffer, &error);
cl_mem clOutputBuffer = clCreateImage2D(device->getContext(), CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, &imageFormat, outputBufferWidth, outputBufferHeight, 0, outputFloatBuffer, &error);
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
// STEP 2
@@ -139,7 +140,7 @@ void WriteBufferOperation::executeOpenCLRegion(cl_context context, cl_program pr
clMemToCleanUp->push_back(clOutputBuffer);
list<cl_kernel> *clKernelsToCleanUp = new list<cl_kernel>();
this->input->executeOpenCL(context, program, queue, outputBuffer, clOutputBuffer, inputMemoryBuffers, clMemToCleanUp, clKernelsToCleanUp);
this->input->executeOpenCL(device, outputBuffer, clOutputBuffer, inputMemoryBuffers, clMemToCleanUp, clKernelsToCleanUp);
// STEP 3
@@ -149,9 +150,9 @@ void WriteBufferOperation::executeOpenCLRegion(cl_context context, cl_program pr
// clFlush(queue);
// clFinish(queue);
error = clEnqueueBarrier(queue);
error = clEnqueueBarrier(device->getQueue());
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
error = clEnqueueReadImage(queue, clOutputBuffer, CL_TRUE, origin, region, 0, 0, outputFloatBuffer, 0, NULL, NULL);
error = clEnqueueReadImage(device->getQueue(), clOutputBuffer, CL_TRUE, origin, region, 0, 0, outputFloatBuffer, 0, NULL, NULL);
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
this->getMemoryProxy()->getBuffer()->copyContentFrom(outputBuffer);

View File

@@ -44,7 +44,7 @@ public:
void executeRegion(rcti *rect, unsigned int tileNumber, MemoryBuffer **memoryBuffers);
void initExecution();
void deinitExecution();
void executeOpenCLRegion(cl_context context, cl_program program, cl_command_queue queue, rcti *rect, unsigned int chunkNumber, MemoryBuffer **memoryBuffers, MemoryBuffer *outputBuffer);
void executeOpenCLRegion(OpenCLDevice* device, rcti *rect, unsigned int chunkNumber, MemoryBuffer **memoryBuffers, MemoryBuffer *outputBuffer);
void readResolutionFromInputSocket();
};