diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index 1cd538d655f..0b9881c0eb5 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -104,12 +104,194 @@ static string opencl_kernel_build_options(const string& platform, const string * if(opencl_kernel_use_debug()) build_options += "-D__KERNEL_OPENCL_DEBUG__ "; - if (opencl_kernel_use_advanced_shading(platform)) + if(opencl_kernel_use_advanced_shading(platform)) build_options += "-D__KERNEL_OPENCL_NEED_ADVANCED_SHADING__ "; return build_options; } +/* thread safe cache for contexts and programs */ +class OpenCLCache +{ + struct Slot + { + thread_mutex *mutex; + cl_context context; + cl_program program; + + Slot() : mutex(NULL), context(NULL), program(NULL) {} + + Slot(const Slot &rhs) + : mutex(rhs.mutex) + , context(rhs.context) + , program(rhs.program) + { + /* copy can only happen in map insert, assert that */ + assert(mutex == NULL); + } + + ~Slot() + { + delete mutex; + mutex = NULL; + } + }; + + /* key is combination of platform ID and device ID */ + typedef pair PlatformDevicePair; + + /* map of Slot objects */ + typedef map CacheMap; + CacheMap cache; + + thread_mutex cache_lock; + + /* lazy instantiate */ + static OpenCLCache &global_instance() + { + static OpenCLCache instance; + return instance; + } + + OpenCLCache() + { + } + + ~OpenCLCache() + { + /* Intel OpenCL bug raises SIGABRT due to pure virtual call + * so this is disabled. It's not necessary to free objects + * at process exit anyway. + * http://software.intel.com/en-us/forums/topic/370083#comments */ + + //flush(); + } + + /* lookup something in the cache. If this returns NULL, slot_locker + * will be holding a lock for the cache. slot_locker should refer to a + * default constructed thread_scoped_lock */ + template + static T get_something(cl_platform_id platform, cl_device_id device, + T Slot::*member, cl_int (*retain_func)(T), thread_scoped_lock &slot_locker) + { + assert(platform != NULL); + + OpenCLCache &self = global_instance(); + + thread_scoped_lock cache_lock(self.cache_lock); + + pair ins = self.cache.insert( + CacheMap::value_type(PlatformDevicePair(platform, device), Slot())); + + Slot &slot = ins.first->second; + + /* create slot lock only while holding cache lock */ + if(!slot.mutex) + slot.mutex = new thread_mutex; + + /* need to unlock cache before locking slot, to allow store to complete */ + cache_lock.unlock(); + + /* lock the slot */ + slot_locker = thread_scoped_lock(*slot.mutex); + + /* If the thing isn't cached */ + if(slot.*member == NULL) { + /* return with the caller's lock holder holding the slot lock */ + return NULL; + } + + /* the item was already cached, release the slot lock */ + slot_locker.unlock(); + + /* caller is going to release it when done with it, so retain it */ + cl_int ciErr = retain_func(slot.*member); + assert(ciErr == CL_SUCCESS); + (void)ciErr; + + return slot.*member; + } + + /* store something in the cache. you MUST have tried to get the item before storing to it */ + template + static void store_something(cl_platform_id platform, cl_device_id device, T thing, + T Slot::*member, cl_int (*retain_func)(T), thread_scoped_lock &slot_locker) + { + assert(platform != NULL); + assert(device != NULL); + assert(thing != NULL); + + OpenCLCache &self = global_instance(); + + thread_scoped_lock cache_lock(self.cache_lock); + CacheMap::iterator i = self.cache.find(PlatformDevicePair(platform, device)); + cache_lock.unlock(); + + Slot &slot = i->second; + + /* sanity check */ + assert(i != self.cache.end()); + assert(slot.*member == NULL); + + slot.*member = thing; + + /* unlock the slot */ + slot_locker.unlock(); + + /* increment reference count in OpenCL. + * The caller is going to release the object when done with it. */ + cl_int ciErr = retain_func(thing); + assert(ciErr == CL_SUCCESS); + (void)ciErr; + } + +public: + /* see get_something comment */ + static cl_context get_context(cl_platform_id platform, cl_device_id device, + thread_scoped_lock &slot_locker) + { + return get_something(platform, device, &Slot::context, clRetainContext, slot_locker); + } + + /* see get_something comment */ + static cl_program get_program(cl_platform_id platform, cl_device_id device, + thread_scoped_lock &slot_locker) + { + return get_something(platform, device, &Slot::program, clRetainProgram, slot_locker); + } + + /* see store_something comment */ + static void store_context(cl_platform_id platform, cl_device_id device, cl_context context, + thread_scoped_lock &slot_locker) + { + store_something(platform, device, context, &Slot::context, clRetainContext, slot_locker); + } + + /* see store_something comment */ + static void store_program(cl_platform_id platform, cl_device_id device, cl_program program, + thread_scoped_lock &slot_locker) + { + store_something(platform, device, program, &Slot::program, clRetainProgram, slot_locker); + } + + /* discard all cached contexts and programs + * the parameter is a temporary workaround. See OpenCLCache::~OpenCLCache */ + static void flush() + { + OpenCLCache &self = global_instance(); + thread_scoped_lock cache_lock(self.cache_lock); + + foreach(CacheMap::value_type &item, self.cache) { + if(item.second.program != NULL) + clReleaseProgram(item.second.program); + if(item.second.context != NULL) + clReleaseContext(item.second.context); + } + + self.cache.clear(); + } +}; + class OpenCLDevice : public Device { public: @@ -290,21 +472,34 @@ public: opencl_error("OpenCL: no devices found."); return; } - else if (!cdDevice) { + else if(!cdDevice) { opencl_error("OpenCL: specified device not found."); return; } - /* Create context properties array to specify platform */ - const cl_context_properties context_props[] = { - CL_CONTEXT_PLATFORM, (cl_context_properties)cpPlatform, - 0, 0 - }; + { + /* try to use cached context */ + thread_scoped_lock cache_locker; + cxContext = OpenCLCache::get_context(cpPlatform, cdDevice, cache_locker); - /* create context */ - cxContext = clCreateContext(context_props, 1, &cdDevice, NULL, NULL, &ciErr); - if(opencl_error(ciErr)) - return; + if(cxContext == NULL) { + /* create context properties array to specify platform */ + const cl_context_properties context_props[] = { + CL_CONTEXT_PLATFORM, (cl_context_properties)cpPlatform, + 0, 0 + }; + + /* create context */ + cxContext = clCreateContext(context_props, 1, &cdDevice, + context_notify_callback, cdDevice, &ciErr); + + if(opencl_error(ciErr)) + return; + + /* cache it */ + OpenCLCache::store_context(cpPlatform, cdDevice, cxContext, cache_locker); + } + } cqCommandQueue = clCreateCommandQueue(cxContext, cdDevice, 0, &ciErr); if(opencl_error(ciErr)) @@ -317,6 +512,15 @@ public: device_initialized = true; } + static void context_notify_callback(const char *err_info, + const void *private_info, size_t cb, void *user_data) + { + char name[256]; + clGetDeviceInfo((cl_device_id)user_data, CL_DEVICE_NAME, sizeof(name), &name, NULL); + + fprintf(stderr, "OpenCL error (%s): %s\n", name, err_info); + } + bool opencl_version_check() { char version[256]; @@ -436,7 +640,7 @@ public: string source = "#include \"kernel.cl\" // " + kernel_md5 + "\n"; source = path_source_replace_includes(source, kernel_path); - if (debug_src) + if(debug_src) path_write_text(*debug_src, source); size_t source_len = source.size(); @@ -487,39 +691,49 @@ public: return false; } - /* verify we have right opencl version */ - if(!opencl_version_check()) - return false; + /* try to use cached kernel */ + thread_scoped_lock cache_locker; + cpProgram = OpenCLCache::get_program(cpPlatform, cdDevice, cache_locker); - /* md5 hash to detect changes */ - string kernel_path = path_get("kernel"); - string kernel_md5 = path_files_md5_hash(kernel_path); - string device_md5 = device_md5_hash(); - - /* path to cached binary */ - string clbin = string_printf("cycles_kernel_%s_%s.clbin", device_md5.c_str(), kernel_md5.c_str()); - clbin = path_user_get(path_join("cache", clbin)); - - /* path to preprocessed source for debugging */ - string clsrc, *debug_src = NULL; - - if (opencl_kernel_use_debug()) { - clsrc = string_printf("cycles_kernel_%s_%s.cl", device_md5.c_str(), kernel_md5.c_str()); - clsrc = path_user_get(path_join("cache", clsrc)); - debug_src = &clsrc; - } - - /* if exists already, try use it */ - if(path_exists(clbin) && load_binary(kernel_path, clbin, debug_src)) { - /* kernel loaded from binary */ - } - else { - /* if does not exist or loading binary failed, compile kernel */ - if(!compile_kernel(kernel_path, kernel_md5, debug_src)) + if(!cpProgram) { + /* verify we have right opencl version */ + if(!opencl_version_check()) return false; - /* save binary for reuse */ - save_binary(clbin); + /* md5 hash to detect changes */ + string kernel_path = path_get("kernel"); + string kernel_md5 = path_files_md5_hash(kernel_path); + string device_md5 = device_md5_hash(); + + /* path to cached binary */ + string clbin = string_printf("cycles_kernel_%s_%s.clbin", device_md5.c_str(), kernel_md5.c_str()); + clbin = path_user_get(path_join("cache", clbin)); + + /* path to preprocessed source for debugging */ + string clsrc, *debug_src = NULL; + + if(opencl_kernel_use_debug()) { + clsrc = string_printf("cycles_kernel_%s_%s.cl", device_md5.c_str(), kernel_md5.c_str()); + clsrc = path_user_get(path_join("cache", clsrc)); + debug_src = &clsrc; + } + + /* if exists already, try use it */ + if(path_exists(clbin) && load_binary(kernel_path, clbin, debug_src)) { + /* kernel loaded from binary */ + } + else { + /* if does not exist or loading binary failed, compile kernel */ + if(!compile_kernel(kernel_path, kernel_md5, debug_src)) + return false; + + /* save binary for reuse */ + if(!save_binary(clbin)) + return false; + } + + /* cache the program */ + OpenCLCache::store_program(cpPlatform, cdDevice, cpProgram, cache_locker); } /* find kernels */ @@ -563,12 +777,17 @@ public: { size_t size = mem.memory_size(); + cl_mem_flags mem_flag; + void *mem_ptr = NULL; + if(type == MEM_READ_ONLY) - mem.device_pointer = (device_ptr)clCreateBuffer(cxContext, CL_MEM_READ_ONLY, size, NULL, &ciErr); + mem_flag = CL_MEM_READ_ONLY; else if(type == MEM_WRITE_ONLY) - mem.device_pointer = (device_ptr)clCreateBuffer(cxContext, CL_MEM_WRITE_ONLY, size, NULL, &ciErr); + mem_flag = CL_MEM_WRITE_ONLY; else - mem.device_pointer = (device_ptr)clCreateBuffer(cxContext, CL_MEM_READ_WRITE, size, NULL, &ciErr); + mem_flag = CL_MEM_READ_WRITE; + + mem.device_pointer = (device_ptr)clCreateBuffer(cxContext, mem_flag, size, mem_ptr, &ciErr); opencl_assert(ciErr); @@ -664,7 +883,7 @@ public: size_t local_size[2] = {sqrt_workgroup_size, sqrt_workgroup_size}; /* some implementations have max size 1 on 2nd dimension */ - if (local_size[1] > max_work_items[1]) { + if(local_size[1] > max_work_items[1]) { local_size[0] = workgroup_size/max_work_items[1]; local_size[1] = max_work_items[1]; } @@ -674,7 +893,7 @@ public: /* run kernel */ ciErr = clEnqueueNDRangeKernel(cqCommandQueue, kernel, 2, NULL, global_size, local_size, 0, NULL, NULL); opencl_assert(ciErr); - opencl_assert(clFinish(cqCommandQueue)); + opencl_assert(clFlush(cqCommandQueue)); } void path_trace(RenderTile& rtile, int sample) @@ -789,7 +1008,7 @@ public: int end_sample = tile.start_sample + tile.num_samples; for(int sample = start_sample; sample < end_sample; sample++) { - if (task->get_cancel()) { + if(task->get_cancel()) { if(task->need_finish_queue == false) break; } @@ -798,7 +1017,7 @@ public: tile.sample = sample + 1; - task->update_progress(tile); + //task->update_progress(tile); } task->release_tile(tile); diff --git a/intern/cycles/util/util_opencl.cpp b/intern/cycles/util/util_opencl.cpp index c146c14b10c..1e67afb3fa8 100644 --- a/intern/cycles/util/util_opencl.cpp +++ b/intern/cycles/util/util_opencl.cpp @@ -114,6 +114,7 @@ PFNCLGETEXTENSIONFUNCTIONADDRESS __clewGetExtensionFunctionAddress = NULL; #endif // CLCC_GENERATE_DOCUMENTATION +#if 0 //! \brief Unloads OpenCL dynamic library, should not be called directly static void clewExit(void) { @@ -124,6 +125,7 @@ static void clewExit(void) module = NULL; } } +#endif //! \param path path to dynamic library to load //! \return CLEW_ERROR_OPEN_FAILED if the library could not be opened @@ -138,7 +140,6 @@ int clLibraryInit() #else const char *path = "libOpenCL.so"; #endif - int error = 0; // OpenCL disabled for now, only works with this environment variable set if(!getenv("CYCLES_OPENCL_TEST")) @@ -159,8 +160,11 @@ int clLibraryInit() return 0; } + // Disabled because we retain OpenCL context and it's difficult to ensure + // this will exit after releasing the context +#if 0 // Set unloading - error = atexit(clewExit); + int error = atexit(clewExit); if (error) { @@ -170,6 +174,7 @@ int clLibraryInit() return 0; } +#endif // Determine function entry-points __clewGetPlatformIDs = (PFNCLGETPLATFORMIDS )CLCC_DYNLIB_IMPORT(module, "clGetPlatformIDs"); diff --git a/intern/cycles/util/util_task.cpp b/intern/cycles/util/util_task.cpp index 43f15ba0ce6..abcb05561bd 100644 --- a/intern/cycles/util/util_task.cpp +++ b/intern/cycles/util/util_task.cpp @@ -21,6 +21,15 @@ #include "util_system.h" #include "util_task.h" +//#define THREADING_DEBUG_ENABLED + +#ifdef THREADING_DEBUG_ENABLED +#include +#define THREADING_DEBUG(...) do { printf(__VA_ARGS__); fflush(stdout); } while(0) +#else +#define THREADING_DEBUG(...) +#endif + CCL_NAMESPACE_BEGIN /* Task Pool */ @@ -95,8 +104,11 @@ void TaskPool::wait_work() if(num == 0) break; - if(!found_entry) + if(!found_entry) { + THREADING_DEBUG("num==%d, Waiting for condition in TaskPool::wait_work !found_entry\n", num); num_cond.wait(num_lock); + THREADING_DEBUG("num==%d, condition wait done in TaskPool::wait_work !found_entry\n", num); + } } } @@ -109,8 +121,11 @@ void TaskPool::cancel() { thread_scoped_lock num_lock(num_mutex); - while(num) + while(num) { + THREADING_DEBUG("num==%d, Waiting for condition in TaskPool::cancel\n", num); num_cond.wait(num_lock); + THREADING_DEBUG("num==%d condition wait done in TaskPool::cancel\n", num); + } } do_cancel = false; @@ -134,8 +149,10 @@ void TaskPool::num_decrease(int done) num -= done; assert(num >= 0); - if(num == 0) + if(num == 0) { + THREADING_DEBUG("num==%d, notifying all in TaskPool::num_decrease\n", num); num_cond.notify_all(); + } num_mutex.unlock(); } @@ -144,6 +161,7 @@ void TaskPool::num_increase() { thread_scoped_lock num_lock(num_mutex); num++; + THREADING_DEBUG("num==%d, notifying all in TaskPool::num_increase\n", num); num_cond.notify_all(); } diff --git a/intern/cycles/util/util_time.cpp b/intern/cycles/util/util_time.cpp index 5f543fc7f91..4edd59780a2 100644 --- a/intern/cycles/util/util_time.cpp +++ b/intern/cycles/util/util_time.cpp @@ -58,12 +58,23 @@ double time_dt() return now.tv_sec + now.tv_usec*1e-6; } +/* sleep t seconds */ void time_sleep(double t) { - if(t >= 1.0) - sleep((int)t); + /* get whole seconds */ + int s = (int)t; - usleep((int)(t*1e6)); + if(s >= 1) { + sleep(s); + + /* adjust parameter to remove whole seconds */ + t -= s; + } + + /* get microseconds */ + int us = (int)(t * 1e6); + if (us > 0) + usleep(us); } CCL_NAMESPACE_END