Refactoring of tiles opencl implementation:
- Moved methods from NodeOperation to OpenCLDevice - Added check on Nvidia for local size
This commit is contained in:
@@ -23,11 +23,7 @@
|
||||
#ifndef _COM_Device_h
|
||||
#define _COM_Device_h
|
||||
|
||||
#include "COM_ExecutionSystem.h"
|
||||
#include "COM_WorkPackage.h"
|
||||
#include "COM_NodeOperation.h"
|
||||
#include "BLI_rect.h"
|
||||
#include "COM_MemoryBuffer.h"
|
||||
|
||||
/**
|
||||
* @brief Abstract class for device implementations to be used by the Compositor.
|
||||
|
@@ -29,6 +29,7 @@
|
||||
#include "COM_CompositorContext.h"
|
||||
#include "DNA_node_types.h"
|
||||
#include "BKE_text.h"
|
||||
#include "COM_ExecutionSystem.h"
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
|
@@ -140,118 +140,3 @@ bool NodeOperation::determineDependingAreaOfInterest(rcti *input, ReadBufferOper
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
cl_mem NodeOperation::COM_clAttachMemoryBufferToKernelParameter(cl_context context, cl_kernel kernel, int parameterIndex, int offsetIndex, list<cl_mem> *cleanup, MemoryBuffer **inputMemoryBuffers, SocketReader *reader)
|
||||
{
|
||||
cl_int error;
|
||||
MemoryBuffer *result = (MemoryBuffer *)reader->initializeTileData(NULL, inputMemoryBuffers);
|
||||
|
||||
const cl_image_format imageFormat = {
|
||||
CL_RGBA,
|
||||
CL_FLOAT
|
||||
};
|
||||
|
||||
cl_mem clBuffer = clCreateImage2D(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &imageFormat, result->getWidth(),
|
||||
result->getHeight(), 0, result->getBuffer(), &error);
|
||||
|
||||
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
|
||||
if (error == CL_SUCCESS) cleanup->push_back(clBuffer);
|
||||
|
||||
error = clSetKernelArg(kernel, parameterIndex, sizeof(cl_mem), &clBuffer);
|
||||
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
|
||||
|
||||
COM_clAttachMemoryBufferOffsetToKernelParameter(kernel, offsetIndex, result);
|
||||
return clBuffer;
|
||||
}
|
||||
|
||||
void NodeOperation::COM_clAttachMemoryBufferOffsetToKernelParameter(cl_kernel kernel, int offsetIndex, MemoryBuffer *memoryBuffer)
|
||||
{
|
||||
if (offsetIndex != -1) {
|
||||
cl_int error;
|
||||
rcti *rect = memoryBuffer->getRect();
|
||||
cl_int2 offset = {rect->xmin, rect->ymin};
|
||||
|
||||
error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset);
|
||||
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
|
||||
}
|
||||
}
|
||||
|
||||
void NodeOperation::COM_clAttachSizeToKernelParameter(cl_kernel kernel, int offsetIndex)
|
||||
{
|
||||
if (offsetIndex != -1) {
|
||||
cl_int error;
|
||||
cl_int2 offset = {this->getWidth(), this->getHeight()};
|
||||
|
||||
error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset);
|
||||
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
|
||||
}
|
||||
}
|
||||
|
||||
void NodeOperation::COM_clAttachOutputMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, cl_mem clOutputMemoryBuffer)
|
||||
{
|
||||
cl_int error;
|
||||
error = clSetKernelArg(kernel, parameterIndex, sizeof(cl_mem), &clOutputMemoryBuffer);
|
||||
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
|
||||
}
|
||||
|
||||
void NodeOperation::COM_clEnqueueRange(cl_command_queue queue, cl_kernel kernel, MemoryBuffer *outputMemoryBuffer)
|
||||
{
|
||||
cl_int error;
|
||||
const size_t size[] = {outputMemoryBuffer->getWidth(), outputMemoryBuffer->getHeight()};
|
||||
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, size, 0, 0, 0, NULL);
|
||||
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
|
||||
}
|
||||
|
||||
void NodeOperation::COM_clEnqueueRange(cl_command_queue queue, cl_kernel kernel, MemoryBuffer *outputMemoryBuffer, int offsetIndex)
|
||||
{
|
||||
cl_int error;
|
||||
const int width = outputMemoryBuffer->getWidth();
|
||||
const int height = outputMemoryBuffer->getHeight();
|
||||
int offsetx;
|
||||
int offsety;
|
||||
const int localSize = 32;
|
||||
size_t size[2];
|
||||
cl_int2 offset;
|
||||
|
||||
bool breaked = false;
|
||||
for (offsety = 0; offsety < height && (!breaked); offsety += localSize) {
|
||||
offset[1] = offsety;
|
||||
if (offsety + localSize < height) {
|
||||
size[1] = localSize;
|
||||
}
|
||||
else {
|
||||
size[1] = height - offsety;
|
||||
}
|
||||
for (offsetx = 0; offsetx < width && (!breaked); offsetx += localSize) {
|
||||
if (offsetx + localSize < width) {
|
||||
size[0] = localSize;
|
||||
}
|
||||
else {
|
||||
size[0] = width - offsetx;
|
||||
}
|
||||
offset[0] = offsetx;
|
||||
|
||||
error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset);
|
||||
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, size, 0, 0, 0, NULL);
|
||||
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
|
||||
clFlush(queue);
|
||||
if (isBreaked()) {
|
||||
breaked = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cl_kernel NodeOperation::COM_clCreateKernel(cl_program program, const char *kernelname, list<cl_kernel> *clKernelsToCleanUp)
|
||||
{
|
||||
cl_int error;
|
||||
cl_kernel kernel = clCreateKernel(program, kernelname, &error);
|
||||
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
|
||||
else {
|
||||
if (clKernelsToCleanUp) clKernelsToCleanUp->push_back(kernel);
|
||||
}
|
||||
return kernel;
|
||||
|
||||
}
|
||||
|
@@ -22,9 +22,7 @@
|
||||
|
||||
#ifndef _COM_NodeOperation_h
|
||||
#define _COM_NodeOperation_h
|
||||
|
||||
class NodeOperation;
|
||||
|
||||
class OpenCLDevice;
|
||||
#include "COM_Node.h"
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
@@ -150,7 +148,7 @@ public:
|
||||
* @param memoryBuffers all input MemoryBuffer's needed
|
||||
* @param outputBuffer the outputbuffer to write to
|
||||
*/
|
||||
virtual void executeOpenCLRegion(cl_context context, cl_program program, cl_command_queue queue, rcti *rect,
|
||||
virtual void executeOpenCLRegion(OpenCLDevice* device, rcti *rect,
|
||||
unsigned int chunkNumber, MemoryBuffer **memoryBuffers, MemoryBuffer *outputBuffer) {}
|
||||
|
||||
/**
|
||||
@@ -165,7 +163,7 @@ public:
|
||||
* @param clMemToCleanUp all created cl_mem references must be added to this list. Framework will clean this after execution
|
||||
* @param clKernelsToCleanUp all created cl_kernel references must be added to this list. Framework will clean this after execution
|
||||
*/
|
||||
virtual void executeOpenCL(cl_context context, cl_program program, cl_command_queue queue, MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer, MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp, list<cl_kernel> *clKernelsToCleanUp) {}
|
||||
virtual void executeOpenCL(OpenCLDevice* device, MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer, MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp, list<cl_kernel> *clKernelsToCleanUp) {}
|
||||
virtual void deinitExecution();
|
||||
|
||||
bool isResolutionSet() {
|
||||
@@ -272,15 +270,6 @@ protected:
|
||||
* @brief set if this NodeOperation can be scheduled on a OpenCLDevice
|
||||
*/
|
||||
void setOpenCL(bool openCL) { this->openCL = openCL; }
|
||||
|
||||
static cl_mem COM_clAttachMemoryBufferToKernelParameter(cl_context context, cl_kernel kernel, int parameterIndex, int offsetIndex, list<cl_mem> *cleanup, MemoryBuffer **inputMemoryBuffers, SocketReader *reader);
|
||||
static void COM_clAttachMemoryBufferOffsetToKernelParameter(cl_kernel kernel, int offsetIndex, MemoryBuffer *memoryBuffers);
|
||||
static void COM_clAttachOutputMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, cl_mem clOutputMemoryBuffer);
|
||||
void COM_clAttachSizeToKernelParameter(cl_kernel kernel, int offsetIndex);
|
||||
static void COM_clEnqueueRange(cl_command_queue queue, cl_kernel kernel, MemoryBuffer *outputMemoryBuffer);
|
||||
void COM_clEnqueueRange(cl_command_queue queue, cl_kernel kernel, MemoryBuffer *outputMemoryBuffer, int offsetIndex);
|
||||
cl_kernel COM_clCreateKernel(cl_program program, const char *kernelname, list<cl_kernel> *clKernelsToCleanUp);
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@@ -23,13 +23,15 @@
|
||||
#include "COM_OpenCLDevice.h"
|
||||
#include "COM_WorkScheduler.h"
|
||||
|
||||
typedef enum COM_VendorID {NVIDIA=0x10DE, AMD=0x1002} COM_VendorID;
|
||||
|
||||
OpenCLDevice::OpenCLDevice(cl_context context, cl_device_id device, cl_program program)
|
||||
OpenCLDevice::OpenCLDevice(cl_context context, cl_device_id device, cl_program program, cl_int vendorId)
|
||||
{
|
||||
this->device = device;
|
||||
this->context = context;
|
||||
this->program = program;
|
||||
this->queue = NULL;
|
||||
this->vendorID = vendorId;
|
||||
}
|
||||
|
||||
bool OpenCLDevice::initialize()
|
||||
@@ -56,10 +58,126 @@ void OpenCLDevice::execute(WorkPackage *work)
|
||||
MemoryBuffer **inputBuffers = executionGroup->getInputBuffersOpenCL(chunkNumber);
|
||||
MemoryBuffer *outputBuffer = executionGroup->allocateOutputBuffer(chunkNumber, &rect);
|
||||
|
||||
executionGroup->getOutputNodeOperation()->executeOpenCLRegion(this->context, this->program, this->queue, &rect,
|
||||
executionGroup->getOutputNodeOperation()->executeOpenCLRegion(this, &rect,
|
||||
chunkNumber, inputBuffers, outputBuffer);
|
||||
|
||||
delete outputBuffer;
|
||||
|
||||
executionGroup->finalizeChunkExecution(chunkNumber, inputBuffers);
|
||||
}
|
||||
|
||||
cl_mem OpenCLDevice::COM_clAttachMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, int offsetIndex, list<cl_mem> *cleanup, MemoryBuffer **inputMemoryBuffers, SocketReader *reader)
|
||||
{
|
||||
cl_int error;
|
||||
MemoryBuffer *result = (MemoryBuffer *)reader->initializeTileData(NULL, inputMemoryBuffers);
|
||||
|
||||
const cl_image_format imageFormat = {
|
||||
CL_RGBA,
|
||||
CL_FLOAT
|
||||
};
|
||||
|
||||
cl_mem clBuffer = clCreateImage2D(this->context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &imageFormat, result->getWidth(),
|
||||
result->getHeight(), 0, result->getBuffer(), &error);
|
||||
|
||||
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
|
||||
if (error == CL_SUCCESS) cleanup->push_back(clBuffer);
|
||||
|
||||
error = clSetKernelArg(kernel, parameterIndex, sizeof(cl_mem), &clBuffer);
|
||||
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
|
||||
|
||||
COM_clAttachMemoryBufferOffsetToKernelParameter(kernel, offsetIndex, result);
|
||||
return clBuffer;
|
||||
}
|
||||
|
||||
void OpenCLDevice::COM_clAttachMemoryBufferOffsetToKernelParameter(cl_kernel kernel, int offsetIndex, MemoryBuffer *memoryBuffer)
|
||||
{
|
||||
if (offsetIndex != -1) {
|
||||
cl_int error;
|
||||
rcti *rect = memoryBuffer->getRect();
|
||||
cl_int2 offset = {rect->xmin, rect->ymin};
|
||||
|
||||
error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset);
|
||||
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
|
||||
}
|
||||
}
|
||||
|
||||
void OpenCLDevice::COM_clAttachSizeToKernelParameter(cl_kernel kernel, int offsetIndex, NodeOperation* operation)
|
||||
{
|
||||
if (offsetIndex != -1) {
|
||||
cl_int error;
|
||||
cl_int2 offset = {operation->getWidth(), operation->getHeight()};
|
||||
|
||||
error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset);
|
||||
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
|
||||
}
|
||||
}
|
||||
|
||||
void OpenCLDevice::COM_clAttachOutputMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, cl_mem clOutputMemoryBuffer)
|
||||
{
|
||||
cl_int error;
|
||||
error = clSetKernelArg(kernel, parameterIndex, sizeof(cl_mem), &clOutputMemoryBuffer);
|
||||
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
|
||||
}
|
||||
|
||||
void OpenCLDevice::COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemoryBuffer)
|
||||
{
|
||||
cl_int error;
|
||||
const size_t size[] = {outputMemoryBuffer->getWidth(), outputMemoryBuffer->getHeight()};
|
||||
|
||||
error = clEnqueueNDRangeKernel(this->queue, kernel, 2, NULL, size, 0, 0, 0, NULL);
|
||||
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
|
||||
}
|
||||
|
||||
void OpenCLDevice::COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemoryBuffer, int offsetIndex, NodeOperation* operation)
|
||||
{
|
||||
cl_int error;
|
||||
const int width = outputMemoryBuffer->getWidth();
|
||||
const int height = outputMemoryBuffer->getHeight();
|
||||
int offsetx;
|
||||
int offsety;
|
||||
int localSize = 1024;
|
||||
size_t size[2];
|
||||
cl_int2 offset;
|
||||
|
||||
if (this->vendorID == NVIDIA){localSize = 32;}
|
||||
bool breaked = false;
|
||||
for (offsety = 0; offsety < height && (!breaked); offsety += localSize) {
|
||||
offset[1] = offsety;
|
||||
if (offsety + localSize < height) {
|
||||
size[1] = localSize;
|
||||
}
|
||||
else {
|
||||
size[1] = height - offsety;
|
||||
}
|
||||
for (offsetx = 0; offsetx < width && (!breaked); offsetx += localSize) {
|
||||
if (offsetx + localSize < width) {
|
||||
size[0] = localSize;
|
||||
}
|
||||
else {
|
||||
size[0] = width - offsetx;
|
||||
}
|
||||
offset[0] = offsetx;
|
||||
|
||||
error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset);
|
||||
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
|
||||
error = clEnqueueNDRangeKernel(this->queue, kernel, 2, NULL, size, 0, 0, 0, NULL);
|
||||
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
|
||||
clFlush(this->queue);
|
||||
if (operation->isBreaked()) {
|
||||
breaked = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cl_kernel OpenCLDevice::COM_clCreateKernel(const char *kernelname, list<cl_kernel> *clKernelsToCleanUp)
|
||||
{
|
||||
cl_int error;
|
||||
cl_kernel kernel = clCreateKernel(this->program, kernelname, &error);
|
||||
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
|
||||
else {
|
||||
if (clKernelsToCleanUp) clKernelsToCleanUp->push_back(kernel);
|
||||
}
|
||||
return kernel;
|
||||
|
||||
}
|
||||
|
@@ -29,7 +29,6 @@ class OpenCLDevice;
|
||||
#include "OCL_opencl.h"
|
||||
#include "COM_WorkScheduler.h"
|
||||
|
||||
|
||||
/**
|
||||
* @brief device representing an GPU OpenCL device.
|
||||
* an instance of this class represents a single cl_device
|
||||
@@ -55,13 +54,21 @@ private:
|
||||
* @brief opencl command queue
|
||||
*/
|
||||
cl_command_queue queue;
|
||||
|
||||
/**
|
||||
* @brief opencl vendor ID
|
||||
*/
|
||||
cl_int vendorID;
|
||||
|
||||
public:
|
||||
/**
|
||||
* @brief constructor with opencl device
|
||||
* @param context
|
||||
* @param device
|
||||
* @param program
|
||||
* @param vendorID
|
||||
*/
|
||||
OpenCLDevice(cl_context context, cl_device_id device, cl_program program);
|
||||
OpenCLDevice(cl_context context, cl_device_id device, cl_program program, cl_int vendorId);
|
||||
|
||||
|
||||
/**
|
||||
@@ -83,6 +90,18 @@ public:
|
||||
* @param work the WorkPackage to execute
|
||||
*/
|
||||
void execute(WorkPackage *work);
|
||||
|
||||
cl_context getContext(){return this->context;}
|
||||
|
||||
cl_command_queue getQueue(){return this->queue;}
|
||||
|
||||
cl_mem COM_clAttachMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, int offsetIndex, list<cl_mem> *cleanup, MemoryBuffer **inputMemoryBuffers, SocketReader *reader);
|
||||
void COM_clAttachMemoryBufferOffsetToKernelParameter(cl_kernel kernel, int offsetIndex, MemoryBuffer *memoryBuffers);
|
||||
void COM_clAttachOutputMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, cl_mem clOutputMemoryBuffer);
|
||||
void COM_clAttachSizeToKernelParameter(cl_kernel kernel, int offsetIndex, NodeOperation* operation);
|
||||
void COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemoryBuffer);
|
||||
void COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemoryBuffer, int offsetIndex, NodeOperation* operation);
|
||||
cl_kernel COM_clCreateKernel(const char *kernelname, list<cl_kernel> *clKernelsToCleanUp);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@@ -24,7 +24,7 @@ class WorkPackage;
|
||||
|
||||
#ifndef _COM_WorkPackage_h_
|
||||
#define _COM_WorkPackage_h_
|
||||
|
||||
class ExecutionGroup;
|
||||
#include "COM_ExecutionGroup.h"
|
||||
|
||||
/**
|
||||
|
@@ -257,7 +257,10 @@ void WorkScheduler::initialize()
|
||||
unsigned int indexDevices;
|
||||
for (indexDevices = 0; indexDevices < totalNumberOfDevices; indexDevices++) {
|
||||
cl_device_id device = cldevices[indexDevices];
|
||||
OpenCLDevice *clDevice = new OpenCLDevice(context, device, program);
|
||||
cl_int vendorID = 0;
|
||||
cl_int error = clGetDeviceInfo(device, CL_DEVICE_VENDOR_ID, sizeof(cl_int), &vendorID, NULL);
|
||||
if (error!= CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
|
||||
OpenCLDevice *clDevice = new OpenCLDevice(context, device, program, vendorID);
|
||||
clDevice->initialize(),
|
||||
gpudevices.push_back(clDevice);
|
||||
if (G.f & G_DEBUG) {
|
||||
|
@@ -22,6 +22,7 @@
|
||||
|
||||
#include "COM_BokehBlurOperation.h"
|
||||
#include "BLI_math.h"
|
||||
#include "COM_OpenCLDevice.h"
|
||||
|
||||
extern "C" {
|
||||
#include "RE_pipeline.h"
|
||||
@@ -160,25 +161,25 @@ bool BokehBlurOperation::determineDependingAreaOfInterest(rcti *input, ReadBuffe
|
||||
}
|
||||
|
||||
static cl_kernel kernel = 0;
|
||||
void BokehBlurOperation::executeOpenCL(cl_context context, cl_program program, cl_command_queue queue,
|
||||
void BokehBlurOperation::executeOpenCL(OpenCLDevice* device,
|
||||
MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer,
|
||||
MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp,
|
||||
list<cl_kernel> *clKernelsToCleanUp)
|
||||
{
|
||||
if (!kernel) {
|
||||
kernel = COM_clCreateKernel(program, "bokehBlurKernel", NULL);
|
||||
kernel = device->COM_clCreateKernel("bokehBlurKernel", NULL);
|
||||
}
|
||||
cl_int radius = this->getWidth() * this->size / 100.0f;
|
||||
cl_int step = this->getStep();
|
||||
|
||||
COM_clAttachMemoryBufferToKernelParameter(context, kernel, 0, -1, clMemToCleanUp, inputMemoryBuffers, this->inputBoundingBoxReader);
|
||||
COM_clAttachMemoryBufferToKernelParameter(context, kernel, 1, 4, clMemToCleanUp, inputMemoryBuffers, this->inputProgram);
|
||||
COM_clAttachMemoryBufferToKernelParameter(context, kernel, 2, -1, clMemToCleanUp, inputMemoryBuffers, this->inputBokehProgram);
|
||||
COM_clAttachOutputMemoryBufferToKernelParameter(kernel, 3, clOutputBuffer);
|
||||
COM_clAttachMemoryBufferOffsetToKernelParameter(kernel, 5, outputMemoryBuffer);
|
||||
device->COM_clAttachMemoryBufferToKernelParameter(kernel, 0, -1, clMemToCleanUp, inputMemoryBuffers, this->inputBoundingBoxReader);
|
||||
device->COM_clAttachMemoryBufferToKernelParameter(kernel, 1, 4, clMemToCleanUp, inputMemoryBuffers, this->inputProgram);
|
||||
device->COM_clAttachMemoryBufferToKernelParameter(kernel, 2, -1, clMemToCleanUp, inputMemoryBuffers, this->inputBokehProgram);
|
||||
device->COM_clAttachOutputMemoryBufferToKernelParameter(kernel, 3, clOutputBuffer);
|
||||
device->COM_clAttachMemoryBufferOffsetToKernelParameter(kernel, 5, outputMemoryBuffer);
|
||||
clSetKernelArg(kernel, 6, sizeof(cl_int), &radius);
|
||||
clSetKernelArg(kernel, 7, sizeof(cl_int), &step);
|
||||
COM_clAttachSizeToKernelParameter(kernel, 8);
|
||||
device->COM_clAttachSizeToKernelParameter(kernel, 8, this);
|
||||
|
||||
COM_clEnqueueRange(queue, kernel, outputMemoryBuffer, 9);
|
||||
device->COM_clEnqueueRange(kernel, outputMemoryBuffer, 9, this);
|
||||
}
|
||||
|
@@ -57,6 +57,6 @@ public:
|
||||
|
||||
void setSize(float size) { this->size = size; }
|
||||
|
||||
void executeOpenCL(cl_context context, cl_program program, cl_command_queue queue, MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer, MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp, list<cl_kernel> *clKernelsToCleanUp);
|
||||
void executeOpenCL(OpenCLDevice* device, MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer, MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp, list<cl_kernel> *clKernelsToCleanUp);
|
||||
};
|
||||
#endif
|
||||
|
@@ -22,6 +22,7 @@
|
||||
|
||||
#include "COM_DilateErodeOperation.h"
|
||||
#include "BLI_math.h"
|
||||
#include "COM_OpenCLDevice.h"
|
||||
|
||||
// DilateErode Distance Threshold
|
||||
DilateErodeThresholdOperation::DilateErodeThresholdOperation() : NodeOperation()
|
||||
@@ -234,24 +235,24 @@ bool DilateDistanceOperation::determineDependingAreaOfInterest(rcti *input, Read
|
||||
}
|
||||
|
||||
static cl_kernel dilateKernel = 0;
|
||||
void DilateDistanceOperation::executeOpenCL(cl_context context, cl_program program, cl_command_queue queue,
|
||||
void DilateDistanceOperation::executeOpenCL(OpenCLDevice* device,
|
||||
MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer,
|
||||
MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp,
|
||||
list<cl_kernel> *clKernelsToCleanUp)
|
||||
{
|
||||
if (!dilateKernel) {
|
||||
dilateKernel = COM_clCreateKernel(program, "dilateKernel", NULL);
|
||||
dilateKernel = device->COM_clCreateKernel("dilateKernel", NULL);
|
||||
}
|
||||
cl_int distanceSquared = this->distance * this->distance;
|
||||
cl_int scope = this->scope;
|
||||
|
||||
COM_clAttachMemoryBufferToKernelParameter(context, dilateKernel, 0, 2, clMemToCleanUp, inputMemoryBuffers, this->inputProgram);
|
||||
COM_clAttachOutputMemoryBufferToKernelParameter(dilateKernel, 1, clOutputBuffer);
|
||||
COM_clAttachMemoryBufferOffsetToKernelParameter(dilateKernel, 3, outputMemoryBuffer);
|
||||
device->COM_clAttachMemoryBufferToKernelParameter(dilateKernel, 0, 2, clMemToCleanUp, inputMemoryBuffers, this->inputProgram);
|
||||
device->COM_clAttachOutputMemoryBufferToKernelParameter(dilateKernel, 1, clOutputBuffer);
|
||||
device->COM_clAttachMemoryBufferOffsetToKernelParameter(dilateKernel, 3, outputMemoryBuffer);
|
||||
clSetKernelArg(dilateKernel, 4, sizeof(cl_int), &scope);
|
||||
clSetKernelArg(dilateKernel, 5, sizeof(cl_int), &distanceSquared);
|
||||
COM_clAttachSizeToKernelParameter(dilateKernel, 6);
|
||||
COM_clEnqueueRange(queue, dilateKernel, outputMemoryBuffer, 7);
|
||||
device->COM_clAttachSizeToKernelParameter(dilateKernel, 6, this);
|
||||
device->COM_clEnqueueRange(dilateKernel, outputMemoryBuffer, 7, this);
|
||||
}
|
||||
|
||||
// Erode Distance
|
||||
@@ -293,24 +294,24 @@ void ErodeDistanceOperation::executePixel(float *color, int x, int y, MemoryBuff
|
||||
}
|
||||
|
||||
static cl_kernel erodeKernel = 0;
|
||||
void ErodeDistanceOperation::executeOpenCL(cl_context context, cl_program program, cl_command_queue queue,
|
||||
void ErodeDistanceOperation::executeOpenCL(OpenCLDevice* device,
|
||||
MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer,
|
||||
MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp,
|
||||
list<cl_kernel> *clKernelsToCleanUp)
|
||||
{
|
||||
if (!erodeKernel) {
|
||||
erodeKernel = COM_clCreateKernel(program, "erodeKernel", NULL);
|
||||
erodeKernel = device->COM_clCreateKernel("erodeKernel", NULL);
|
||||
}
|
||||
cl_int distanceSquared = this->distance * this->distance;
|
||||
cl_int scope = this->scope;
|
||||
|
||||
COM_clAttachMemoryBufferToKernelParameter(context, erodeKernel, 0, 2, clMemToCleanUp, inputMemoryBuffers, this->inputProgram);
|
||||
COM_clAttachOutputMemoryBufferToKernelParameter(erodeKernel, 1, clOutputBuffer);
|
||||
COM_clAttachMemoryBufferOffsetToKernelParameter(erodeKernel, 3, outputMemoryBuffer);
|
||||
device->COM_clAttachMemoryBufferToKernelParameter(erodeKernel, 0, 2, clMemToCleanUp, inputMemoryBuffers, this->inputProgram);
|
||||
device->COM_clAttachOutputMemoryBufferToKernelParameter(erodeKernel, 1, clOutputBuffer);
|
||||
device->COM_clAttachMemoryBufferOffsetToKernelParameter(erodeKernel, 3, outputMemoryBuffer);
|
||||
clSetKernelArg(erodeKernel, 4, sizeof(cl_int), &scope);
|
||||
clSetKernelArg(erodeKernel, 5, sizeof(cl_int), &distanceSquared);
|
||||
COM_clAttachSizeToKernelParameter(erodeKernel, 6);
|
||||
COM_clEnqueueRange(queue, erodeKernel, outputMemoryBuffer, 7);
|
||||
device->COM_clAttachSizeToKernelParameter(erodeKernel, 6, this);
|
||||
device->COM_clEnqueueRange(erodeKernel, outputMemoryBuffer, 7, this);
|
||||
}
|
||||
|
||||
// Dilate step
|
||||
|
@@ -99,7 +99,7 @@ public:
|
||||
void setDistance(float distance) { this->distance = distance; }
|
||||
bool determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output);
|
||||
|
||||
void executeOpenCL(cl_context context, cl_program program, cl_command_queue queue,
|
||||
void executeOpenCL(OpenCLDevice* device,
|
||||
MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer,
|
||||
MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp,
|
||||
list<cl_kernel> *clKernelsToCleanUp);
|
||||
@@ -113,7 +113,7 @@ public:
|
||||
*/
|
||||
void executePixel(float *color, int x, int y, MemoryBuffer * inputBuffers[], void *data);
|
||||
|
||||
void executeOpenCL(cl_context context, cl_program program, cl_command_queue queue,
|
||||
void executeOpenCL(OpenCLDevice* device,
|
||||
MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer,
|
||||
MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp,
|
||||
list<cl_kernel> *clKernelsToCleanUp);
|
||||
|
@@ -23,6 +23,7 @@
|
||||
#include "COM_WriteBufferOperation.h"
|
||||
#include "COM_defines.h"
|
||||
#include <stdio.h>
|
||||
#include "COM_OpenCLDevice.h"
|
||||
|
||||
WriteBufferOperation::WriteBufferOperation() : NodeOperation()
|
||||
{
|
||||
@@ -110,7 +111,7 @@ void WriteBufferOperation::executeRegion(rcti *rect, unsigned int tileNumber, Me
|
||||
memoryBuffer->setCreatedState();
|
||||
}
|
||||
|
||||
void WriteBufferOperation::executeOpenCLRegion(cl_context context, cl_program program, cl_command_queue queue, rcti *rect, unsigned int chunkNumber, MemoryBuffer **inputMemoryBuffers, MemoryBuffer *outputBuffer)
|
||||
void WriteBufferOperation::executeOpenCLRegion(OpenCLDevice* device, rcti *rect, unsigned int chunkNumber, MemoryBuffer **inputMemoryBuffers, MemoryBuffer *outputBuffer)
|
||||
{
|
||||
float *outputFloatBuffer = outputBuffer->getBuffer();
|
||||
cl_int error;
|
||||
@@ -131,7 +132,7 @@ void WriteBufferOperation::executeOpenCLRegion(cl_context context, cl_program pr
|
||||
CL_FLOAT
|
||||
};
|
||||
|
||||
cl_mem clOutputBuffer = clCreateImage2D(context, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, &imageFormat, outputBufferWidth, outputBufferHeight, 0, outputFloatBuffer, &error);
|
||||
cl_mem clOutputBuffer = clCreateImage2D(device->getContext(), CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, &imageFormat, outputBufferWidth, outputBufferHeight, 0, outputFloatBuffer, &error);
|
||||
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
|
||||
|
||||
// STEP 2
|
||||
@@ -139,7 +140,7 @@ void WriteBufferOperation::executeOpenCLRegion(cl_context context, cl_program pr
|
||||
clMemToCleanUp->push_back(clOutputBuffer);
|
||||
list<cl_kernel> *clKernelsToCleanUp = new list<cl_kernel>();
|
||||
|
||||
this->input->executeOpenCL(context, program, queue, outputBuffer, clOutputBuffer, inputMemoryBuffers, clMemToCleanUp, clKernelsToCleanUp);
|
||||
this->input->executeOpenCL(device, outputBuffer, clOutputBuffer, inputMemoryBuffers, clMemToCleanUp, clKernelsToCleanUp);
|
||||
|
||||
// STEP 3
|
||||
|
||||
@@ -149,9 +150,9 @@ void WriteBufferOperation::executeOpenCLRegion(cl_context context, cl_program pr
|
||||
// clFlush(queue);
|
||||
// clFinish(queue);
|
||||
|
||||
error = clEnqueueBarrier(queue);
|
||||
error = clEnqueueBarrier(device->getQueue());
|
||||
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
|
||||
error = clEnqueueReadImage(queue, clOutputBuffer, CL_TRUE, origin, region, 0, 0, outputFloatBuffer, 0, NULL, NULL);
|
||||
error = clEnqueueReadImage(device->getQueue(), clOutputBuffer, CL_TRUE, origin, region, 0, 0, outputFloatBuffer, 0, NULL, NULL);
|
||||
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
|
||||
|
||||
this->getMemoryProxy()->getBuffer()->copyContentFrom(outputBuffer);
|
||||
|
@@ -44,7 +44,7 @@ public:
|
||||
void executeRegion(rcti *rect, unsigned int tileNumber, MemoryBuffer **memoryBuffers);
|
||||
void initExecution();
|
||||
void deinitExecution();
|
||||
void executeOpenCLRegion(cl_context context, cl_program program, cl_command_queue queue, rcti *rect, unsigned int chunkNumber, MemoryBuffer **memoryBuffers, MemoryBuffer *outputBuffer);
|
||||
void executeOpenCLRegion(OpenCLDevice* device, rcti *rect, unsigned int chunkNumber, MemoryBuffer **memoryBuffers, MemoryBuffer *outputBuffer);
|
||||
void readResolutionFromInputSocket();
|
||||
|
||||
};
|
||||
|
Reference in New Issue
Block a user