This repository has been archived on 2023-10-09. You can view files and clone it, but cannot push or open issues or pull requests.
Files
blender-archive/source/blender/compositor/intern/COM_WorkScheduler.cpp
Sergey Sharybin 77b7e1fe9a Deduplicate CUDA and OpenCL wranglers
For now it was mainly about OpenCL wrangler being duplicated
between Cycles and Compositor, but with OpenSubdiv work those
wranglers were gonna to be duplicated just once again.

This commit makes it so Cycles and Compositor uses wranglers
from this repositories:

  - https://github.com/CudaWrangler/cuew
  - https://github.com/OpenCLWrangler/clew

This repositories are based on the wranglers we used before
and they'll be likely continued maintaining by us plus some
more players in the market.

Pretty much straightforward change with some tricks in the
CMake/SCons to make this libs being passed to the linker
after all other libraries in order to make OpenSubdiv linked
against those wranglers in the future.

For those who're worrying about Cycles being less standalone,
it's not truth, it's rather more flexible now and in the future
different wranglers might be used in Cycles. For now it'll
just mean those libs would need to be put into Cycles repository
together with some other libs from Blender such as mikkspace.

This is mainly platform maintenance commit, should not be any
changes to the user space.

Reviewers: juicyfruit, dingto, campbellbarton

Reviewed By: juicyfruit, dingto, campbellbarton

Differential Revision: https://developer.blender.org/D707
2014-08-05 13:57:50 +06:00

448 lines
12 KiB
C++

/*
* Copyright 2011, Blender Foundation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Contributor:
* Jeroen Bakker
* Monique Dewanchand
*/
#include <list>
#include <stdio.h>
#include "COM_compositor.h"
#include "COM_WorkScheduler.h"
#include "COM_CPUDevice.h"
#include "COM_OpenCLDevice.h"
#include "COM_OpenCLKernels.cl.h"
#include "clew.h"
#include "COM_WriteBufferOperation.h"
#include "MEM_guardedalloc.h"
#include "PIL_time.h"
#include "BLI_threads.h"
#include "BKE_global.h"
#if COM_CURRENT_THREADING_MODEL == COM_TM_NOTHREAD
# ifndef DEBUG /* test this so we dont get warnings in debug builds */
# warning COM_CURRENT_THREADING_MODEL COM_TM_NOTHREAD is activated. Use only for debugging.
# endif
#elif COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
/* do nothing - default */
#else
# error COM_CURRENT_THREADING_MODEL No threading model selected
#endif
/// @brief list of all CPUDevices. for every hardware thread an instance of CPUDevice is created
static vector<CPUDevice *> g_cpudevices;
#if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
/// @brief list of all thread for every CPUDevice in cpudevices a thread exists
static ListBase g_cputhreads;
static bool g_cpuInitialized = false;
/// @brief all scheduled work for the cpu
static ThreadQueue *g_cpuqueue;
static ThreadQueue *g_gpuqueue;
#ifdef COM_OPENCL_ENABLED
static cl_context g_context;
static cl_program g_program;
/// @brief list of all OpenCLDevices. for every OpenCL GPU device an instance of OpenCLDevice is created
static vector<OpenCLDevice *> g_gpudevices;
/// @brief list of all thread for every GPUDevice in cpudevices a thread exists
static ListBase g_gputhreads;
/// @brief all scheduled work for the gpu
#ifdef COM_OPENCL_ENABLED
static bool g_openclActive = false;
static bool g_openclInitialized = false;
#endif
#endif
#endif
#define MAX_HIGHLIGHT 8
static bool g_highlightInitialized = false;
extern "C" {
static int g_highlightIndex;
static void **g_highlightedNodes;
static void **g_highlightedNodesRead;
/* XXX highlighting disabled for now
* This requires pointers back to DNA data (bNodeTree/bNode) in operations, which is bad!
* Instead IF we want to keep this feature it should use a weak reference such as bNodeInstanceKey
*/
#if 0
#if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
#define HIGHLIGHT(wp) \
{ \
ExecutionGroup *group = wp->getExecutionGroup(); \
if (group->isComplex()) { \
NodeOperation *operation = group->getOutputOperation(); \
if (operation->isWriteBufferOperation()) { \
WriteBufferOperation *writeOperation = (WriteBufferOperation *)operation; \
NodeOperation *complexOperation = writeOperation->getInput(); \
bNode *node = complexOperation->getbNode(); \
if (node) { \
if (node->original) { \
node = node->original; \
} \
if (g_highlightInitialized && g_highlightedNodes) { \
if (g_highlightIndex < MAX_HIGHLIGHT) { \
g_highlightedNodes[g_highlightIndex++] = node; \
} \
} \
} \
} \
} \
}
#endif /* COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE */
#else
#define HIGHLIGHT(wp) {}
#endif
void COM_startReadHighlights()
{
if (!g_highlightInitialized) {
return;
}
if (g_highlightedNodesRead) {
MEM_freeN(g_highlightedNodesRead);
}
g_highlightedNodesRead = g_highlightedNodes;
g_highlightedNodes = (void **)MEM_callocN(sizeof(void *) * MAX_HIGHLIGHT, __func__);
g_highlightIndex = 0;
}
int COM_isHighlightedbNode(bNode *bnode)
{
if (!g_highlightInitialized) {
return false;
}
if (!g_highlightedNodesRead) {
return false;
}
for (int i = 0; i < MAX_HIGHLIGHT; i++) {
void *p = g_highlightedNodesRead[i];
if (!p) return false;
if (p == bnode) return true;
}
return false;
}
} // end extern "C"
#if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
void *WorkScheduler::thread_execute_cpu(void *data)
{
Device *device = (Device *)data;
WorkPackage *work;
while ((work = (WorkPackage *)BLI_thread_queue_pop(g_cpuqueue))) {
HIGHLIGHT(work);
device->execute(work);
delete work;
}
return NULL;
}
void *WorkScheduler::thread_execute_gpu(void *data)
{
Device *device = (Device *)data;
WorkPackage *work;
while ((work = (WorkPackage *)BLI_thread_queue_pop(g_gpuqueue))) {
HIGHLIGHT(work);
device->execute(work);
delete work;
}
return NULL;
}
#endif
void WorkScheduler::schedule(ExecutionGroup *group, int chunkNumber)
{
WorkPackage *package = new WorkPackage(group, chunkNumber);
#if COM_CURRENT_THREADING_MODEL == COM_TM_NOTHREAD
CPUDevice device;
device.execute(package);
delete package;
#elif COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
#ifdef COM_OPENCL_ENABLED
if (group->isOpenCL() && g_openclActive) {
BLI_thread_queue_push(g_gpuqueue, package);
}
else {
BLI_thread_queue_push(g_cpuqueue, package);
}
#else
BLI_thread_queue_push(cpuqueue, package);
#endif
#endif
}
void WorkScheduler::start(CompositorContext &context)
{
#if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
unsigned int index;
g_cpuqueue = BLI_thread_queue_init();
BLI_init_threads(&g_cputhreads, thread_execute_cpu, g_cpudevices.size());
for (index = 0; index < g_cpudevices.size(); index++) {
Device *device = g_cpudevices[index];
BLI_insert_thread(&g_cputhreads, device);
}
#ifdef COM_OPENCL_ENABLED
if (context.getHasActiveOpenCLDevices()) {
g_gpuqueue = BLI_thread_queue_init();
BLI_init_threads(&g_gputhreads, thread_execute_gpu, g_gpudevices.size());
for (index = 0; index < g_gpudevices.size(); index++) {
Device *device = g_gpudevices[index];
BLI_insert_thread(&g_gputhreads, device);
}
g_openclActive = true;
}
else {
g_openclActive = false;
}
#endif
#endif
}
void WorkScheduler::finish()
{
#if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
#ifdef COM_OPENCL_ENABLED
if (g_openclActive) {
BLI_thread_queue_wait_finish(g_gpuqueue);
BLI_thread_queue_wait_finish(g_cpuqueue);
}
else {
BLI_thread_queue_wait_finish(g_cpuqueue);
}
#else
BLI_thread_queue_wait_finish(cpuqueue);
#endif
#endif
}
void WorkScheduler::stop()
{
#if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
BLI_thread_queue_nowait(g_cpuqueue);
BLI_end_threads(&g_cputhreads);
BLI_thread_queue_free(g_cpuqueue);
g_cpuqueue = NULL;
#ifdef COM_OPENCL_ENABLED
if (g_openclActive) {
BLI_thread_queue_nowait(g_gpuqueue);
BLI_end_threads(&g_gputhreads);
BLI_thread_queue_free(g_gpuqueue);
g_gpuqueue = NULL;
}
#endif
#endif
}
bool WorkScheduler::hasGPUDevices()
{
#if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
#ifdef COM_OPENCL_ENABLED
return g_gpudevices.size() > 0;
#else
return 0;
#endif
#else
return 0;
#endif
}
static void CL_CALLBACK clContextError(const char *errinfo, const void *private_info, size_t cb, void *user_data)
{
printf("OPENCL error: %s\n", errinfo);
}
void WorkScheduler::initialize(bool use_opencl, int num_cpu_threads)
{
/* initialize highlighting */
if (!g_highlightInitialized) {
if (g_highlightedNodesRead) MEM_freeN(g_highlightedNodesRead);
if (g_highlightedNodes) MEM_freeN(g_highlightedNodes);
g_highlightedNodesRead = NULL;
g_highlightedNodes = NULL;
COM_startReadHighlights();
g_highlightInitialized = true;
}
#if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
/* deinitialize if number of threads doesn't match */
if (g_cpudevices.size() != num_cpu_threads) {
Device *device;
while (g_cpudevices.size() > 0) {
device = g_cpudevices.back();
g_cpudevices.pop_back();
device->deinitialize();
delete device;
}
g_cpuInitialized = false;
}
/* initialize CPU threads */
if (!g_cpuInitialized) {
for (int index = 0; index < num_cpu_threads; index++) {
CPUDevice *device = new CPUDevice();
device->initialize();
g_cpudevices.push_back(device);
}
g_cpuInitialized = true;
}
#ifdef COM_OPENCL_ENABLED
/* deinitialize OpenCL GPU's */
if (use_opencl && !g_openclInitialized) {
g_context = NULL;
g_program = NULL;
if (clewInit() != CLEW_SUCCESS) /* this will check for errors and skip if already initialized */
return;
if (clCreateContextFromType) {
cl_uint numberOfPlatforms = 0;
cl_int error;
error = clGetPlatformIDs(0, 0, &numberOfPlatforms);
if (error == -1001) { } /* GPU not supported */
else if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
if (G.f & G_DEBUG) printf("%u number of platforms\n", numberOfPlatforms);
cl_platform_id *platforms = (cl_platform_id *)MEM_mallocN(sizeof(cl_platform_id) * numberOfPlatforms, __func__);
error = clGetPlatformIDs(numberOfPlatforms, platforms, 0);
unsigned int indexPlatform;
for (indexPlatform = 0; indexPlatform < numberOfPlatforms; indexPlatform++) {
cl_platform_id platform = platforms[indexPlatform];
cl_uint numberOfDevices = 0;
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, 0, &numberOfDevices);
if (numberOfDevices <= 0)
continue;
cl_device_id *cldevices = (cl_device_id *)MEM_mallocN(sizeof(cl_device_id) * numberOfDevices, __func__);
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numberOfDevices, cldevices, 0);
g_context = clCreateContext(NULL, numberOfDevices, cldevices, clContextError, NULL, &error);
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
const char *cl_str[2] = {datatoc_COM_OpenCLKernels_cl, NULL};
g_program = clCreateProgramWithSource(g_context, 1, cl_str, 0, &error);
error = clBuildProgram(g_program, numberOfDevices, cldevices, 0, 0, 0);
if (error != CL_SUCCESS) {
cl_int error2;
size_t ret_val_size = 0;
printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
error2 = clGetProgramBuildInfo(g_program, cldevices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
if (error2 != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
char *build_log = (char *)MEM_mallocN(sizeof(char) * ret_val_size + 1, __func__);
error2 = clGetProgramBuildInfo(g_program, cldevices[0], CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
if (error2 != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
build_log[ret_val_size] = '\0';
printf("%s", build_log);
MEM_freeN(build_log);
}
else {
unsigned int indexDevices;
for (indexDevices = 0; indexDevices < numberOfDevices; indexDevices++) {
cl_device_id device = cldevices[indexDevices];
cl_int vendorID = 0;
cl_int error2 = clGetDeviceInfo(device, CL_DEVICE_VENDOR_ID, sizeof(cl_int), &vendorID, NULL);
if (error2 != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error2, clewErrorString(error2)); }
OpenCLDevice *clDevice = new OpenCLDevice(g_context, device, g_program, vendorID);
clDevice->initialize();
g_gpudevices.push_back(clDevice);
}
}
MEM_freeN(cldevices);
}
MEM_freeN(platforms);
}
g_openclInitialized = true;
}
#endif
#endif
}
void WorkScheduler::deinitialize()
{
#if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
/* deinitialize CPU threads */
if (g_cpuInitialized) {
Device *device;
while (g_cpudevices.size() > 0) {
device = g_cpudevices.back();
g_cpudevices.pop_back();
device->deinitialize();
delete device;
}
g_cpuInitialized = false;
}
#ifdef COM_OPENCL_ENABLED
/* deinitialize OpenCL GPU's */
if (g_openclInitialized) {
Device *device;
while (g_gpudevices.size() > 0) {
device = g_gpudevices.back();
g_gpudevices.pop_back();
device->deinitialize();
delete device;
}
if (g_program) {
clReleaseProgram(g_program);
g_program = NULL;
}
if (g_context) {
clReleaseContext(g_context);
g_context = NULL;
}
g_openclInitialized = false;
}
#endif
#endif
/* deinitialize highlighting */
if (g_highlightInitialized) {
g_highlightInitialized = false;
if (g_highlightedNodes) {
MEM_freeN(g_highlightedNodes);
g_highlightedNodes = NULL;
}
if (g_highlightedNodesRead) {
MEM_freeN(g_highlightedNodesRead);
g_highlightedNodesRead = NULL;
}
}
}