/* * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software Foundation, * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright 2011, Blender Foundation. */ #include #include #include "COM_CPUDevice.h" #include "COM_OpenCLDevice.h" #include "COM_OpenCLKernels.cl.h" #include "COM_WorkScheduler.h" #include "COM_WriteBufferOperation.h" #include "COM_compositor.h" #include "clew.h" #include "MEM_guardedalloc.h" #include "BLI_threads.h" #include "PIL_time.h" #include "BKE_global.h" #if COM_CURRENT_THREADING_MODEL == COM_TM_NOTHREAD # ifndef DEBUG /* Test this so we don't get warnings in debug builds. */ # warning COM_CURRENT_THREADING_MODEL COM_TM_NOTHREAD is activated. Use only for debugging. # endif #elif COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE /* do nothing - default */ #else # error COM_CURRENT_THREADING_MODEL No threading model selected #endif static ThreadLocal(CPUDevice *) g_thread_device; static struct { /** \brief list of all CPUDevices. for every hardware thread an instance of CPUDevice is created */ std::vector cpu_devices; #if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE /** \brief list of all thread for every CPUDevice in cpudevices a thread exists. */ ListBase cpu_threads; bool cpu_initialized = false; /** \brief all scheduled work for the cpu */ ThreadQueue *cpu_queue; ThreadQueue *gpu_queue; # ifdef COM_OPENCL_ENABLED cl_context opencl_context; cl_program opencl_program; /** \brief list of all OpenCLDevices. for every OpenCL GPU device an instance of OpenCLDevice is * created. */ std::vector gpu_devices; /** \brief list of all thread for every GPUDevice in cpudevices a thread exists. */ ListBase gpu_threads; /** \brief all scheduled work for the GPU. */ bool opencl_active = false; bool opencl_initialized = false; # endif #endif } g_work_scheduler; #if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE void *WorkScheduler::thread_execute_cpu(void *data) { CPUDevice *device = (CPUDevice *)data; WorkPackage *work; BLI_thread_local_set(g_thread_device, device); while ((work = (WorkPackage *)BLI_thread_queue_pop(g_work_scheduler.cpu_queue))) { device->execute(work); delete work; } return nullptr; } void *WorkScheduler::thread_execute_gpu(void *data) { Device *device = (Device *)data; WorkPackage *work; while ((work = (WorkPackage *)BLI_thread_queue_pop(g_work_scheduler.gpu_queue))) { device->execute(work); delete work; } return nullptr; } #endif void WorkScheduler::schedule(ExecutionGroup *group, int chunkNumber) { WorkPackage *package = new WorkPackage(group, chunkNumber); #if COM_CURRENT_THREADING_MODEL == COM_TM_NOTHREAD CPUDevice device(0); device.execute(package); delete package; #elif COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE # ifdef COM_OPENCL_ENABLED if (group->isOpenCL() && g_work_scheduler.opencl_active) { BLI_thread_queue_push(g_work_scheduler.gpu_queue, package); } else { BLI_thread_queue_push(g_work_scheduler.cpu_queue, package); } # else BLI_thread_queue_push(g_work_scheduler.cpu_queue, package); # endif #endif } void WorkScheduler::start(CompositorContext &context) { #if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE unsigned int index; g_work_scheduler.cpu_queue = BLI_thread_queue_init(); BLI_threadpool_init( &g_work_scheduler.cpu_threads, thread_execute_cpu, g_work_scheduler.cpu_devices.size()); for (index = 0; index < g_work_scheduler.cpu_devices.size(); index++) { Device *device = g_work_scheduler.cpu_devices[index]; BLI_threadpool_insert(&g_work_scheduler.cpu_threads, device); } # ifdef COM_OPENCL_ENABLED if (context.getHasActiveOpenCLDevices()) { g_work_scheduler.gpu_queue = BLI_thread_queue_init(); BLI_threadpool_init( &g_work_scheduler.gpu_threads, thread_execute_gpu, g_work_scheduler.gpu_devices.size()); for (index = 0; index < g_work_scheduler.gpu_devices.size(); index++) { Device *device = g_work_scheduler.gpu_devices[index]; BLI_threadpool_insert(&g_work_scheduler.gpu_threads, device); } g_work_scheduler.opencl_active = true; } else { g_work_scheduler.opencl_active = false; } # endif #endif } void WorkScheduler::finish() { #if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE # ifdef COM_OPENCL_ENABLED if (g_work_scheduler.opencl_active) { BLI_thread_queue_wait_finish(g_work_scheduler.gpu_queue); BLI_thread_queue_wait_finish(g_work_scheduler.cpu_queue); } else { BLI_thread_queue_wait_finish(g_work_scheduler.cpu_queue); } # else BLI_thread_queue_wait_finish(cpuqueue); # endif #endif } void WorkScheduler::stop() { #if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE BLI_thread_queue_nowait(g_work_scheduler.cpu_queue); BLI_threadpool_end(&g_work_scheduler.cpu_threads); BLI_thread_queue_free(g_work_scheduler.cpu_queue); g_work_scheduler.cpu_queue = nullptr; # ifdef COM_OPENCL_ENABLED if (g_work_scheduler.opencl_active) { BLI_thread_queue_nowait(g_work_scheduler.gpu_queue); BLI_threadpool_end(&g_work_scheduler.gpu_threads); BLI_thread_queue_free(g_work_scheduler.gpu_queue); g_work_scheduler.gpu_queue = nullptr; } # endif #endif } bool WorkScheduler::has_gpu_devices() { #if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE # ifdef COM_OPENCL_ENABLED return !g_work_scheduler.gpu_devices.empty(); # else return false; # endif #else return false; #endif } #if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE static void CL_CALLBACK clContextError(const char *errinfo, const void * /*private_info*/, size_t /*cb*/, void * /*user_data*/) { printf("OPENCL error: %s\n", errinfo); } #endif void WorkScheduler::initialize(bool use_opencl, int num_cpu_threads) { #if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE /* deinitialize if number of threads doesn't match */ if (g_work_scheduler.cpu_devices.size() != num_cpu_threads) { Device *device; while (!g_work_scheduler.cpu_devices.empty()) { device = g_work_scheduler.cpu_devices.back(); g_work_scheduler.cpu_devices.pop_back(); device->deinitialize(); delete device; } if (g_work_scheduler.cpu_initialized) { BLI_thread_local_delete(g_thread_device); } g_work_scheduler.cpu_initialized = false; } /* initialize CPU threads */ if (!g_work_scheduler.cpu_initialized) { for (int index = 0; index < num_cpu_threads; index++) { CPUDevice *device = new CPUDevice(index); device->initialize(); g_work_scheduler.cpu_devices.push_back(device); } BLI_thread_local_create(g_thread_device); g_work_scheduler.cpu_initialized = true; } # ifdef COM_OPENCL_ENABLED /* deinitialize OpenCL GPU's */ if (use_opencl && !g_work_scheduler.opencl_initialized) { g_work_scheduler.opencl_context = nullptr; g_work_scheduler.opencl_program = nullptr; /* This will check for errors and skip if already initialized. */ if (clewInit() != CLEW_SUCCESS) { return; } if (clCreateContextFromType) { cl_uint numberOfPlatforms = 0; cl_int error; error = clGetPlatformIDs(0, nullptr, &numberOfPlatforms); if (error == -1001) { } /* GPU not supported */ else if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } if (G.f & G_DEBUG) { printf("%u number of platforms\n", numberOfPlatforms); } cl_platform_id *platforms = (cl_platform_id *)MEM_mallocN( sizeof(cl_platform_id) * numberOfPlatforms, __func__); error = clGetPlatformIDs(numberOfPlatforms, platforms, nullptr); unsigned int indexPlatform; for (indexPlatform = 0; indexPlatform < numberOfPlatforms; indexPlatform++) { cl_platform_id platform = platforms[indexPlatform]; cl_uint numberOfDevices = 0; clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, nullptr, &numberOfDevices); if (numberOfDevices <= 0) { continue; } cl_device_id *cldevices = (cl_device_id *)MEM_mallocN( sizeof(cl_device_id) * numberOfDevices, __func__); clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numberOfDevices, cldevices, nullptr); g_work_scheduler.opencl_context = clCreateContext( nullptr, numberOfDevices, cldevices, clContextError, nullptr, &error); if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } const char *cl_str[2] = {datatoc_COM_OpenCLKernels_cl, nullptr}; g_work_scheduler.opencl_program = clCreateProgramWithSource( g_work_scheduler.opencl_context, 1, cl_str, nullptr, &error); error = clBuildProgram(g_work_scheduler.opencl_program, numberOfDevices, cldevices, nullptr, nullptr, nullptr); if (error != CL_SUCCESS) { cl_int error2; size_t ret_val_size = 0; printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); error2 = clGetProgramBuildInfo(g_work_scheduler.opencl_program, cldevices[0], CL_PROGRAM_BUILD_LOG, 0, nullptr, &ret_val_size); if (error2 != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } char *build_log = (char *)MEM_mallocN(sizeof(char) * ret_val_size + 1, __func__); error2 = clGetProgramBuildInfo(g_work_scheduler.opencl_program, cldevices[0], CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, nullptr); if (error2 != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } build_log[ret_val_size] = '\0'; printf("%s", build_log); MEM_freeN(build_log); } else { unsigned int indexDevices; for (indexDevices = 0; indexDevices < numberOfDevices; indexDevices++) { cl_device_id device = cldevices[indexDevices]; cl_int vendorID = 0; cl_int error2 = clGetDeviceInfo( device, CL_DEVICE_VENDOR_ID, sizeof(cl_int), &vendorID, nullptr); if (error2 != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error2, clewErrorString(error2)); } OpenCLDevice *clDevice = new OpenCLDevice(g_work_scheduler.opencl_context, device, g_work_scheduler.opencl_program, vendorID); clDevice->initialize(); g_work_scheduler.gpu_devices.push_back(clDevice); } } MEM_freeN(cldevices); } MEM_freeN(platforms); } g_work_scheduler.opencl_initialized = true; } # endif #endif } void WorkScheduler::deinitialize() { #if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE /* deinitialize CPU threads */ if (g_work_scheduler.cpu_initialized) { Device *device; while (!g_work_scheduler.cpu_devices.empty()) { device = g_work_scheduler.cpu_devices.back(); g_work_scheduler.cpu_devices.pop_back(); device->deinitialize(); delete device; } BLI_thread_local_delete(g_thread_device); g_work_scheduler.cpu_initialized = false; } # ifdef COM_OPENCL_ENABLED /* deinitialize OpenCL GPU's */ if (g_work_scheduler.opencl_initialized) { Device *device; while (!g_work_scheduler.gpu_devices.empty()) { device = g_work_scheduler.gpu_devices.back(); g_work_scheduler.gpu_devices.pop_back(); device->deinitialize(); delete device; } if (g_work_scheduler.opencl_program) { clReleaseProgram(g_work_scheduler.opencl_program); g_work_scheduler.opencl_program = nullptr; } if (g_work_scheduler.opencl_context) { clReleaseContext(g_work_scheduler.opencl_context); g_work_scheduler.opencl_context = nullptr; } g_work_scheduler.opencl_initialized = false; } # endif #endif } int WorkScheduler::current_thread_id() { CPUDevice *device = (CPUDevice *)BLI_thread_local_get(g_thread_device); return device->thread_id(); }