Merge branch 'master' into blender2.8

This commit is contained in:
2018-07-05 07:54:47 +02:00
22 changed files with 363 additions and 365 deletions

View File

@@ -21,7 +21,7 @@
# <pep8 compliant> # <pep8 compliant>
bpy_types_Operator_bl_property__doc__ = ( bpy_types_Operator_bl_property__doc__ = (
""" """
The name of a property to use as this operators primary property. The name of a property to use as this operators primary property.
Currently this is only used to select the default property when Currently this is only used to select the default property when
expanding an operator into a menu. expanding an operator into a menu.

View File

@@ -29,7 +29,7 @@ You'll need to specify your user login and password, obviously.
Example usage: Example usage:
./sphinx_doc_update.py --mirror ../../../docs/remote_api_backup/ --source ../.. --blender ../../../build_cmake/bin/blender --user foobar --password barfoo ./sphinx_doc_update.py --mirror ../../../docs/remote_api_backup/ --source ../.. --blender ../../../build_cmake/bin/blender --user foobar --password barfoo
""" """
@@ -107,14 +107,16 @@ def main():
with tempfile.TemporaryDirectory() as tmp_dir: with tempfile.TemporaryDirectory() as tmp_dir:
# II) Generate doc source in temp dir. # II) Generate doc source in temp dir.
doc_gen_cmd = (args.blender, "--background", "-noaudio", "--factory-startup", "--python-exit-code", "1", doc_gen_cmd = (
"--python", "%s/doc/python_api/sphinx_doc_gen.py" % args.source_dir, "--", args.blender, "--background", "-noaudio", "--factory-startup", "--python-exit-code", "1",
"--output", tmp_dir) "--python", "%s/doc/python_api/sphinx_doc_gen.py" % args.source_dir, "--",
"--output", tmp_dir
)
subprocess.run(doc_gen_cmd) subprocess.run(doc_gen_cmd)
# III) Get Blender version info. # III) Get Blender version info.
getver_file = os.path.join(tmp_dir, "blendver.txt") getver_file = os.path.join(tmp_dir, "blendver.txt")
getver_script = ("" getver_script = (
"import sys, bpy\n" "import sys, bpy\n"
"with open(sys.argv[-1], 'w') as f:\n" "with open(sys.argv[-1], 'w') as f:\n"
" is_release = bpy.app.version_cycle in {'rc', 'release'}\n" " is_release = bpy.app.version_cycle in {'rc', 'release'}\n"
@@ -124,7 +126,8 @@ def main():
" f.write('%d.%d%s\\n' % (bpy.app.version[0], bpy.app.version[1], bpy.app.version_char)\n" " f.write('%d.%d%s\\n' % (bpy.app.version[0], bpy.app.version[1], bpy.app.version_char)\n"
" if is_release else '%s\\n' % branch)\n" " if is_release else '%s\\n' % branch)\n"
" f.write('%d_%d%s_release' % (bpy.app.version[0], bpy.app.version[1], bpy.app.version_char)\n" " f.write('%d_%d%s_release' % (bpy.app.version[0], bpy.app.version[1], bpy.app.version_char)\n"
" if is_release else '%d_%d_%d' % bpy.app.version)\n") " if is_release else '%d_%d_%d' % bpy.app.version)\n"
)
get_ver_cmd = (args.blender, "--background", "-noaudio", "--factory-startup", "--python-exit-code", "1", get_ver_cmd = (args.blender, "--background", "-noaudio", "--factory-startup", "--python-exit-code", "1",
"--python-expr", getver_script, "--", getver_file) "--python-expr", getver_script, "--", getver_file)
subprocess.run(get_ver_cmd) subprocess.run(get_ver_cmd)

View File

@@ -179,8 +179,8 @@ public:
KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> convert_to_byte_kernel; KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> convert_to_byte_kernel;
KernelFunctions<void(*)(KernelGlobals *, uint4 *, float4 *, int, int, int, int, int)> shader_kernel; KernelFunctions<void(*)(KernelGlobals *, uint4 *, float4 *, int, int, int, int, int)> shader_kernel;
KernelFunctions<void(*)(int, TilesInfo*, int, int, float*, float*, float*, float*, float*, int*, int, int)> filter_divide_shadow_kernel; KernelFunctions<void(*)(int, TileInfo*, int, int, float*, float*, float*, float*, float*, int*, int, int)> filter_divide_shadow_kernel;
KernelFunctions<void(*)(int, TilesInfo*, int, int, int, int, float*, float*, int*, int, int)> filter_get_feature_kernel; KernelFunctions<void(*)(int, TileInfo*, int, int, int, int, float*, float*, int*, int, int)> filter_get_feature_kernel;
KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_detect_outliers_kernel; KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_detect_outliers_kernel;
KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_combine_halves_kernel; KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_combine_halves_kernel;
@@ -459,18 +459,6 @@ public:
} }
}; };
bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task)
{
TilesInfo *tiles = (TilesInfo*) task->tiles_mem.host_pointer;
for(int i = 0; i < 9; i++) {
tiles->buffers[i] = buffers[i];
}
task->tiles_mem.copy_to_device();
return true;
}
bool denoising_non_local_means(device_ptr image_ptr, device_ptr guide_ptr, device_ptr variance_ptr, device_ptr out_ptr, bool denoising_non_local_means(device_ptr image_ptr, device_ptr guide_ptr, device_ptr variance_ptr, device_ptr out_ptr,
DenoisingTask *task) DenoisingTask *task)
{ {
@@ -626,7 +614,7 @@ public:
for(int y = task->rect.y; y < task->rect.w; y++) { for(int y = task->rect.y; y < task->rect.w; y++) {
for(int x = task->rect.x; x < task->rect.z; x++) { for(int x = task->rect.x; x < task->rect.z; x++) {
filter_divide_shadow_kernel()(task->render_buffer.samples, filter_divide_shadow_kernel()(task->render_buffer.samples,
task->tiles, task->tile_info,
x, y, x, y,
(float*) a_ptr, (float*) a_ptr,
(float*) b_ptr, (float*) b_ptr,
@@ -635,7 +623,7 @@ public:
(float*) buffer_variance_ptr, (float*) buffer_variance_ptr,
&task->rect.x, &task->rect.x,
task->render_buffer.pass_stride, task->render_buffer.pass_stride,
task->render_buffer.denoising_data_offset); task->render_buffer.offset);
} }
} }
return true; return true;
@@ -650,7 +638,7 @@ public:
for(int y = task->rect.y; y < task->rect.w; y++) { for(int y = task->rect.y; y < task->rect.w; y++) {
for(int x = task->rect.x; x < task->rect.z; x++) { for(int x = task->rect.x; x < task->rect.z; x++) {
filter_get_feature_kernel()(task->render_buffer.samples, filter_get_feature_kernel()(task->render_buffer.samples,
task->tiles, task->tile_info,
mean_offset, mean_offset,
variance_offset, variance_offset,
x, y, x, y,
@@ -658,7 +646,7 @@ public:
(float*) variance_ptr, (float*) variance_ptr,
&task->rect.x, &task->rect.x,
task->render_buffer.pass_stride, task->render_buffer.pass_stride,
task->render_buffer.denoising_data_offset); task->render_buffer.offset);
} }
} }
return true; return true;
@@ -711,7 +699,7 @@ public:
} }
} }
void denoise(DeviceTask &task, DenoisingTask& denoising, RenderTile &tile) void denoise(DenoisingTask& denoising, RenderTile &tile)
{ {
tile.sample = tile.start_sample + tile.num_samples; tile.sample = tile.start_sample + tile.num_samples;
@@ -722,23 +710,11 @@ public:
denoising.functions.combine_halves = function_bind(&CPUDevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising); denoising.functions.combine_halves = function_bind(&CPUDevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
denoising.functions.get_feature = function_bind(&CPUDevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising); denoising.functions.get_feature = function_bind(&CPUDevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising);
denoising.functions.detect_outliers = function_bind(&CPUDevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising); denoising.functions.detect_outliers = function_bind(&CPUDevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
denoising.functions.set_tiles = function_bind(&CPUDevice::denoising_set_tiles, this, _1, &denoising);
denoising.filter_area = make_int4(tile.x, tile.y, tile.w, tile.h); denoising.filter_area = make_int4(tile.x, tile.y, tile.w, tile.h);
denoising.render_buffer.samples = tile.sample; denoising.render_buffer.samples = tile.sample;
RenderTile rtiles[9]; denoising.run_denoising(&tile);
rtiles[4] = tile;
task.map_neighbor_tiles(rtiles, this);
denoising.tiles_from_rendertiles(rtiles);
denoising.init_from_devicetask(task);
denoising.run_denoising();
task.unmap_neighbor_tiles(rtiles, this);
task.update_progress(&tile, tile.w*tile.h);
} }
void thread_render(DeviceTask& task) void thread_render(DeviceTask& task)
@@ -766,7 +742,7 @@ public:
} }
RenderTile tile; RenderTile tile;
DenoisingTask denoising(this); DenoisingTask denoising(this, task);
while(task.acquire_tile(this, tile)) { while(task.acquire_tile(this, tile)) {
if(tile.task == RenderTile::PATH_TRACE) { if(tile.task == RenderTile::PATH_TRACE) {
@@ -779,7 +755,9 @@ public:
} }
} }
else if(tile.task == RenderTile::DENOISE) { else if(tile.task == RenderTile::DENOISE) {
denoise(task, denoising, tile); denoise(denoising, tile);
task.update_progress(&tile, tile.w*tile.h);
} }
task.release_tile(tile); task.release_tile(tile);

View File

@@ -1251,18 +1251,6 @@ public:
} }
} }
bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task)
{
TilesInfo *tiles = (TilesInfo*) task->tiles_mem.host_pointer;
for(int i = 0; i < 9; i++) {
tiles->buffers[i] = buffers[i];
}
task->tiles_mem.copy_to_device();
return !have_error();
}
#define CUDA_GET_BLOCKSIZE(func, w, h) \ #define CUDA_GET_BLOCKSIZE(func, w, h) \
int threads_per_block; \ int threads_per_block; \
cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \ cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \
@@ -1534,7 +1522,7 @@ public:
task->rect.w-task->rect.y); task->rect.w-task->rect.y);
void *args[] = {&task->render_buffer.samples, void *args[] = {&task->render_buffer.samples,
&task->tiles_mem.device_pointer, &task->tile_info_mem.device_pointer,
&a_ptr, &a_ptr,
&b_ptr, &b_ptr,
&sample_variance_ptr, &sample_variance_ptr,
@@ -1542,7 +1530,7 @@ public:
&buffer_variance_ptr, &buffer_variance_ptr,
&task->rect, &task->rect,
&task->render_buffer.pass_stride, &task->render_buffer.pass_stride,
&task->render_buffer.denoising_data_offset}; &task->render_buffer.offset};
CUDA_LAUNCH_KERNEL(cuFilterDivideShadow, args); CUDA_LAUNCH_KERNEL(cuFilterDivideShadow, args);
cuda_assert(cuCtxSynchronize()); cuda_assert(cuCtxSynchronize());
@@ -1568,14 +1556,14 @@ public:
task->rect.w-task->rect.y); task->rect.w-task->rect.y);
void *args[] = {&task->render_buffer.samples, void *args[] = {&task->render_buffer.samples,
&task->tiles_mem.device_pointer, &task->tile_info_mem.device_pointer,
&mean_offset, &mean_offset,
&variance_offset, &variance_offset,
&mean_ptr, &mean_ptr,
&variance_ptr, &variance_ptr,
&task->rect, &task->rect,
&task->render_buffer.pass_stride, &task->render_buffer.pass_stride,
&task->render_buffer.denoising_data_offset}; &task->render_buffer.offset};
CUDA_LAUNCH_KERNEL(cuFilterGetFeature, args); CUDA_LAUNCH_KERNEL(cuFilterGetFeature, args);
cuda_assert(cuCtxSynchronize()); cuda_assert(cuCtxSynchronize());
@@ -1613,7 +1601,7 @@ public:
return !have_error(); return !have_error();
} }
void denoise(RenderTile &rtile, DenoisingTask& denoising, const DeviceTask &task) void denoise(RenderTile &rtile, DenoisingTask& denoising)
{ {
denoising.functions.construct_transform = function_bind(&CUDADevice::denoising_construct_transform, this, &denoising); denoising.functions.construct_transform = function_bind(&CUDADevice::denoising_construct_transform, this, &denoising);
denoising.functions.reconstruct = function_bind(&CUDADevice::denoising_reconstruct, this, _1, _2, _3, &denoising); denoising.functions.reconstruct = function_bind(&CUDADevice::denoising_reconstruct, this, _1, _2, _3, &denoising);
@@ -1622,21 +1610,11 @@ public:
denoising.functions.combine_halves = function_bind(&CUDADevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising); denoising.functions.combine_halves = function_bind(&CUDADevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
denoising.functions.get_feature = function_bind(&CUDADevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising); denoising.functions.get_feature = function_bind(&CUDADevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising);
denoising.functions.detect_outliers = function_bind(&CUDADevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising); denoising.functions.detect_outliers = function_bind(&CUDADevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
denoising.functions.set_tiles = function_bind(&CUDADevice::denoising_set_tiles, this, _1, &denoising);
denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h); denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h);
denoising.render_buffer.samples = rtile.sample; denoising.render_buffer.samples = rtile.sample;
RenderTile rtiles[9]; denoising.run_denoising(&rtile);
rtiles[4] = rtile;
task.map_neighbor_tiles(rtiles, this);
denoising.tiles_from_rendertiles(rtiles);
denoising.init_from_devicetask(task);
denoising.run_denoising();
task.unmap_neighbor_tiles(rtiles, this);
} }
void path_trace(DeviceTask& task, RenderTile& rtile, device_vector<WorkTile>& work_tiles) void path_trace(DeviceTask& task, RenderTile& rtile, device_vector<WorkTile>& work_tiles)
@@ -2092,7 +2070,7 @@ public:
/* keep rendering tiles until done */ /* keep rendering tiles until done */
RenderTile tile; RenderTile tile;
DenoisingTask denoising(this); DenoisingTask denoising(this, *task);
while(task->acquire_tile(this, tile)) { while(task->acquire_tile(this, tile)) {
if(tile.task == RenderTile::PATH_TRACE) { if(tile.task == RenderTile::PATH_TRACE) {
@@ -2107,7 +2085,7 @@ public:
else if(tile.task == RenderTile::DENOISE) { else if(tile.task == RenderTile::DENOISE) {
tile.sample = tile.start_sample + tile.num_samples; tile.sample = tile.start_sample + tile.num_samples;
denoise(tile, denoising, *task); denoise(tile, denoising);
task->update_progress(&tile, tile.w*tile.h); task->update_progress(&tile, tile.w*tile.h);
} }

View File

@@ -20,12 +20,29 @@
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
DenoisingTask::DenoisingTask(Device *device) DenoisingTask::DenoisingTask(Device *device, const DeviceTask &task)
: tiles_mem(device, "denoising tiles_mem", MEM_READ_WRITE), : tile_info_mem(device, "denoising tile info mem", MEM_READ_WRITE),
storage(device), storage(device),
buffer(device), buffer(device),
device(device) device(device)
{ {
radius = task.denoising_radius;
nlm_k_2 = powf(2.0f, lerp(-5.0f, 3.0f, task.denoising_strength));
if(task.denoising_relative_pca) {
pca_threshold = -powf(10.0f, lerp(-8.0f, 0.0f, task.denoising_feature_strength));
}
else {
pca_threshold = powf(10.0f, lerp(-5.0f, 3.0f, task.denoising_feature_strength));
}
render_buffer.pass_stride = task.pass_stride;
render_buffer.offset = task.pass_denoising_data;
target_buffer.pass_stride = task.pass_stride;
target_buffer.denoising_clean_offset = task.pass_denoising_clean;
functions.map_neighbor_tiles = function_bind(task.map_neighbor_tiles, _1, device);
functions.unmap_neighbor_tiles = function_bind(task.unmap_neighbor_tiles, _1, device);
} }
DenoisingTask::~DenoisingTask() DenoisingTask::~DenoisingTask()
@@ -38,170 +55,170 @@ DenoisingTask::~DenoisingTask()
storage.temporary_2.free(); storage.temporary_2.free();
storage.temporary_color.free(); storage.temporary_color.free();
buffer.mem.free(); buffer.mem.free();
tiles_mem.free(); tile_info_mem.free();
} }
void DenoisingTask::init_from_devicetask(const DeviceTask &task) void DenoisingTask::set_render_buffer(RenderTile *rtiles)
{ {
radius = task.denoising_radius; tile_info = (TileInfo*) tile_info_mem.alloc(sizeof(TileInfo)/sizeof(int));
nlm_k_2 = powf(2.0f, lerp(-5.0f, 3.0f, task.denoising_strength));
if(task.denoising_relative_pca) {
pca_threshold = -powf(10.0f, lerp(-8.0f, 0.0f, task.denoising_feature_strength));
}
else {
pca_threshold = powf(10.0f, lerp(-5.0f, 3.0f, task.denoising_feature_strength));
}
render_buffer.pass_stride = task.pass_stride; for(int i = 0; i < 9; i++) {
render_buffer.denoising_data_offset = task.pass_denoising_data; tile_info->offsets[i] = rtiles[i].offset;
render_buffer.denoising_clean_offset = task.pass_denoising_clean; tile_info->strides[i] = rtiles[i].stride;
tile_info->buffers[i] = rtiles[i].buffer;
}
tile_info->x[0] = rtiles[3].x;
tile_info->x[1] = rtiles[4].x;
tile_info->x[2] = rtiles[5].x;
tile_info->x[3] = rtiles[5].x + rtiles[5].w;
tile_info->y[0] = rtiles[1].y;
tile_info->y[1] = rtiles[4].y;
tile_info->y[2] = rtiles[7].y;
tile_info->y[3] = rtiles[7].y + rtiles[7].h;
target_buffer.offset = rtiles[9].offset;
target_buffer.stride = rtiles[9].stride;
target_buffer.ptr = rtiles[9].buffer;
tile_info_mem.copy_to_device();
}
void DenoisingTask::setup_denoising_buffer()
{
/* Expand filter_area by radius pixels and clamp the result to the extent of the neighboring tiles */ /* Expand filter_area by radius pixels and clamp the result to the extent of the neighboring tiles */
rect = rect_from_shape(filter_area.x, filter_area.y, filter_area.z, filter_area.w); rect = rect_from_shape(filter_area.x, filter_area.y, filter_area.z, filter_area.w);
rect = rect_expand(rect, radius); rect = rect_expand(rect, radius);
rect = rect_clip(rect, make_int4(tiles->x[0], tiles->y[0], tiles->x[3], tiles->y[3])); rect = rect_clip(rect, make_int4(tile_info->x[0], tile_info->y[0], tile_info->x[3], tile_info->y[3]));
}
void DenoisingTask::tiles_from_rendertiles(RenderTile *rtiles)
{
tiles = (TilesInfo*) tiles_mem.alloc(sizeof(TilesInfo)/sizeof(int));
device_ptr buffers[9];
for(int i = 0; i < 9; i++) {
buffers[i] = rtiles[i].buffer;
tiles->offsets[i] = rtiles[i].offset;
tiles->strides[i] = rtiles[i].stride;
}
tiles->x[0] = rtiles[3].x;
tiles->x[1] = rtiles[4].x;
tiles->x[2] = rtiles[5].x;
tiles->x[3] = rtiles[5].x + rtiles[5].w;
tiles->y[0] = rtiles[1].y;
tiles->y[1] = rtiles[4].y;
tiles->y[2] = rtiles[7].y;
tiles->y[3] = rtiles[7].y + rtiles[7].h;
render_buffer.offset = rtiles[4].offset;
render_buffer.stride = rtiles[4].stride;
render_buffer.ptr = rtiles[4].buffer;
functions.set_tiles(buffers);
}
bool DenoisingTask::run_denoising()
{
/* Allocate denoising buffer. */
buffer.passes = 14; buffer.passes = 14;
buffer.width = rect.z - rect.x; buffer.width = rect.z - rect.x;
buffer.stride = align_up(buffer.width, 4); buffer.stride = align_up(buffer.width, 4);
buffer.h = rect.w - rect.y; buffer.h = rect.w - rect.y;
buffer.pass_stride = align_up(buffer.stride * buffer.h, divide_up(device->mem_sub_ptr_alignment(), sizeof(float))); int alignment_floats = divide_up(device->mem_sub_ptr_alignment(), sizeof(float));
buffer.mem.alloc_to_device(buffer.pass_stride * buffer.passes, false); buffer.pass_stride = align_up(buffer.stride * buffer.h, alignment_floats);
/* Pad the total size by four floats since the SIMD kernels might go a bit over the end. */
int mem_size = align_up(buffer.pass_stride * buffer.passes + 4, alignment_floats);
buffer.mem.alloc_to_device(mem_size, false);
}
void DenoisingTask::prefilter_shadowing()
{
device_ptr null_ptr = (device_ptr) 0; device_ptr null_ptr = (device_ptr) 0;
/* Prefilter shadow feature. */ device_sub_ptr unfiltered_a (buffer.mem, 0, buffer.pass_stride);
{ device_sub_ptr unfiltered_b (buffer.mem, 1*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr unfiltered_a (buffer.mem, 0, buffer.pass_stride); device_sub_ptr sample_var (buffer.mem, 2*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr unfiltered_b (buffer.mem, 1*buffer.pass_stride, buffer.pass_stride); device_sub_ptr sample_var_var (buffer.mem, 3*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr sample_var (buffer.mem, 2*buffer.pass_stride, buffer.pass_stride); device_sub_ptr buffer_var (buffer.mem, 5*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr sample_var_var (buffer.mem, 3*buffer.pass_stride, buffer.pass_stride); device_sub_ptr filtered_var (buffer.mem, 6*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr buffer_var (buffer.mem, 5*buffer.pass_stride, buffer.pass_stride); device_sub_ptr nlm_temporary_1(buffer.mem, 7*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr filtered_var (buffer.mem, 6*buffer.pass_stride, buffer.pass_stride); device_sub_ptr nlm_temporary_2(buffer.mem, 8*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr nlm_temporary_1(buffer.mem, 7*buffer.pass_stride, buffer.pass_stride); device_sub_ptr nlm_temporary_3(buffer.mem, 9*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr nlm_temporary_2(buffer.mem, 8*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr nlm_temporary_3(buffer.mem, 9*buffer.pass_stride, buffer.pass_stride);
nlm_state.temporary_1_ptr = *nlm_temporary_1; nlm_state.temporary_1_ptr = *nlm_temporary_1;
nlm_state.temporary_2_ptr = *nlm_temporary_2; nlm_state.temporary_2_ptr = *nlm_temporary_2;
nlm_state.temporary_3_ptr = *nlm_temporary_3; nlm_state.temporary_3_ptr = *nlm_temporary_3;
/* Get the A/B unfiltered passes, the combined sample variance, the estimated variance of the sample variance and the buffer variance. */ /* Get the A/B unfiltered passes, the combined sample variance, the estimated variance of the sample variance and the buffer variance. */
functions.divide_shadow(*unfiltered_a, *unfiltered_b, *sample_var, *sample_var_var, *buffer_var); functions.divide_shadow(*unfiltered_a, *unfiltered_b, *sample_var, *sample_var_var, *buffer_var);
/* Smooth the (generally pretty noisy) buffer variance using the spatial information from the sample variance. */ /* Smooth the (generally pretty noisy) buffer variance using the spatial information from the sample variance. */
nlm_state.set_parameters(6, 3, 4.0f, 1.0f); nlm_state.set_parameters(6, 3, 4.0f, 1.0f);
functions.non_local_means(*buffer_var, *sample_var, *sample_var_var, *filtered_var); functions.non_local_means(*buffer_var, *sample_var, *sample_var_var, *filtered_var);
/* Reuse memory, the previous data isn't needed anymore. */ /* Reuse memory, the previous data isn't needed anymore. */
device_ptr filtered_a = *buffer_var, device_ptr filtered_a = *buffer_var,
filtered_b = *sample_var; filtered_b = *sample_var;
/* Use the smoothed variance to filter the two shadow half images using each other for weight calculation. */ /* Use the smoothed variance to filter the two shadow half images using each other for weight calculation. */
nlm_state.set_parameters(5, 3, 1.0f, 0.25f); nlm_state.set_parameters(5, 3, 1.0f, 0.25f);
functions.non_local_means(*unfiltered_a, *unfiltered_b, *filtered_var, filtered_a); functions.non_local_means(*unfiltered_a, *unfiltered_b, *filtered_var, filtered_a);
functions.non_local_means(*unfiltered_b, *unfiltered_a, *filtered_var, filtered_b); functions.non_local_means(*unfiltered_b, *unfiltered_a, *filtered_var, filtered_b);
device_ptr residual_var = *sample_var_var; device_ptr residual_var = *sample_var_var;
/* Estimate the residual variance between the two filtered halves. */ /* Estimate the residual variance between the two filtered halves. */
functions.combine_halves(filtered_a, filtered_b, null_ptr, residual_var, 2, rect); functions.combine_halves(filtered_a, filtered_b, null_ptr, residual_var, 2, rect);
device_ptr final_a = *unfiltered_a, device_ptr final_a = *unfiltered_a,
final_b = *unfiltered_b; final_b = *unfiltered_b;
/* Use the residual variance for a second filter pass. */ /* Use the residual variance for a second filter pass. */
nlm_state.set_parameters(4, 2, 1.0f, 0.5f); nlm_state.set_parameters(4, 2, 1.0f, 0.5f);
functions.non_local_means(filtered_a, filtered_b, residual_var, final_a); functions.non_local_means(filtered_a, filtered_b, residual_var, final_a);
functions.non_local_means(filtered_b, filtered_a, residual_var, final_b); functions.non_local_means(filtered_b, filtered_a, residual_var, final_b);
/* Combine the two double-filtered halves to a final shadow feature. */ /* Combine the two double-filtered halves to a final shadow feature. */
device_sub_ptr shadow_pass(buffer.mem, 4*buffer.pass_stride, buffer.pass_stride); device_sub_ptr shadow_pass(buffer.mem, 4*buffer.pass_stride, buffer.pass_stride);
functions.combine_halves(final_a, final_b, *shadow_pass, null_ptr, 0, rect); functions.combine_halves(final_a, final_b, *shadow_pass, null_ptr, 0, rect);
}
void DenoisingTask::prefilter_features()
{
device_sub_ptr unfiltered (buffer.mem, 8*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr variance (buffer.mem, 9*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr nlm_temporary_1(buffer.mem, 10*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr nlm_temporary_2(buffer.mem, 11*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr nlm_temporary_3(buffer.mem, 12*buffer.pass_stride, buffer.pass_stride);
nlm_state.temporary_1_ptr = *nlm_temporary_1;
nlm_state.temporary_2_ptr = *nlm_temporary_2;
nlm_state.temporary_3_ptr = *nlm_temporary_3;
int mean_from[] = { 0, 1, 2, 12, 6, 7, 8 };
int variance_from[] = { 3, 4, 5, 13, 9, 10, 11};
int pass_to[] = { 1, 2, 3, 0, 5, 6, 7};
for(int pass = 0; pass < 7; pass++) {
device_sub_ptr feature_pass(buffer.mem, pass_to[pass]*buffer.pass_stride, buffer.pass_stride);
/* Get the unfiltered pass and its variance from the RenderBuffers. */
functions.get_feature(mean_from[pass], variance_from[pass], *unfiltered, *variance);
/* Smooth the pass and store the result in the denoising buffers. */
nlm_state.set_parameters(2, 2, 1.0f, 0.25f);
functions.non_local_means(*unfiltered, *unfiltered, *variance, *feature_pass);
}
}
void DenoisingTask::prefilter_color()
{
int mean_from[] = {20, 21, 22};
int variance_from[] = {23, 24, 25};
int mean_to[] = { 8, 9, 10};
int variance_to[] = {11, 12, 13};
int num_color_passes = 3;
storage.temporary_color.alloc_to_device(3*buffer.pass_stride, false);
device_sub_ptr nlm_temporary_1(storage.temporary_color, 0*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr nlm_temporary_2(storage.temporary_color, 1*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr nlm_temporary_3(storage.temporary_color, 2*buffer.pass_stride, buffer.pass_stride);
nlm_state.temporary_1_ptr = *nlm_temporary_1;
nlm_state.temporary_2_ptr = *nlm_temporary_2;
nlm_state.temporary_3_ptr = *nlm_temporary_3;
for(int pass = 0; pass < num_color_passes; pass++) {
device_sub_ptr color_pass(storage.temporary_color, pass*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr color_var_pass(buffer.mem, variance_to[pass]*buffer.pass_stride, buffer.pass_stride);
functions.get_feature(mean_from[pass], variance_from[pass], *color_pass, *color_var_pass);
} }
/* Prefilter general features. */ device_sub_ptr depth_pass (buffer.mem, 0, buffer.pass_stride);
{ device_sub_ptr color_var_pass(buffer.mem, variance_to[0]*buffer.pass_stride, 3*buffer.pass_stride);
device_sub_ptr unfiltered (buffer.mem, 8*buffer.pass_stride, buffer.pass_stride); device_sub_ptr output_pass (buffer.mem, mean_to[0]*buffer.pass_stride, 3*buffer.pass_stride);
device_sub_ptr variance (buffer.mem, 9*buffer.pass_stride, buffer.pass_stride); functions.detect_outliers(storage.temporary_color.device_pointer, *color_var_pass, *depth_pass, *output_pass);
device_sub_ptr nlm_temporary_1(buffer.mem, 10*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr nlm_temporary_2(buffer.mem, 11*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr nlm_temporary_3(buffer.mem, 12*buffer.pass_stride, buffer.pass_stride);
nlm_state.temporary_1_ptr = *nlm_temporary_1; storage.temporary_color.free();
nlm_state.temporary_2_ptr = *nlm_temporary_2; }
nlm_state.temporary_3_ptr = *nlm_temporary_3;
int mean_from[] = { 0, 1, 2, 12, 6, 7, 8 };
int variance_from[] = { 3, 4, 5, 13, 9, 10, 11};
int pass_to[] = { 1, 2, 3, 0, 5, 6, 7};
for(int pass = 0; pass < 7; pass++) {
device_sub_ptr feature_pass(buffer.mem, pass_to[pass]*buffer.pass_stride, buffer.pass_stride);
/* Get the unfiltered pass and its variance from the RenderBuffers. */
functions.get_feature(mean_from[pass], variance_from[pass], *unfiltered, *variance);
/* Smooth the pass and store the result in the denoising buffers. */
nlm_state.set_parameters(2, 2, 1.0f, 0.25f);
functions.non_local_means(*unfiltered, *unfiltered, *variance, *feature_pass);
}
}
/* Copy color passes. */
{
int mean_from[] = {20, 21, 22};
int variance_from[] = {23, 24, 25};
int mean_to[] = { 8, 9, 10};
int variance_to[] = {11, 12, 13};
int num_color_passes = 3;
storage.temporary_color.alloc_to_device(3*buffer.pass_stride, false);
for(int pass = 0; pass < num_color_passes; pass++) {
device_sub_ptr color_pass(storage.temporary_color, pass*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr color_var_pass(buffer.mem, variance_to[pass]*buffer.pass_stride, buffer.pass_stride);
functions.get_feature(mean_from[pass], variance_from[pass], *color_pass, *color_var_pass);
}
{
device_sub_ptr depth_pass (buffer.mem, 0, buffer.pass_stride);
device_sub_ptr color_var_pass(buffer.mem, variance_to[0]*buffer.pass_stride, 3*buffer.pass_stride);
device_sub_ptr output_pass (buffer.mem, mean_to[0]*buffer.pass_stride, 3*buffer.pass_stride);
functions.detect_outliers(storage.temporary_color.device_pointer, *color_var_pass, *depth_pass, *output_pass);
}
}
void DenoisingTask::construct_transform()
{
storage.w = filter_area.z; storage.w = filter_area.z;
storage.h = filter_area.w; storage.h = filter_area.w;
storage.transform.alloc_to_device(storage.w*storage.h*TRANSFORM_SIZE, false); storage.transform.alloc_to_device(storage.w*storage.h*TRANSFORM_SIZE, false);
storage.rank.alloc_to_device(storage.w*storage.h, false); storage.rank.alloc_to_device(storage.w*storage.h, false);
functions.construct_transform(); functions.construct_transform();
}
void DenoisingTask::reconstruct()
{
device_only_memory<float> temporary_1(device, "Denoising NLM temporary 1"); device_only_memory<float> temporary_1(device, "Denoising NLM temporary 1");
device_only_memory<float> temporary_2(device, "Denoising NLM temporary 2"); device_only_memory<float> temporary_2(device, "Denoising NLM temporary 2");
@@ -214,21 +231,36 @@ bool DenoisingTask::run_denoising()
storage.XtWY.alloc_to_device(storage.w*storage.h*XTWY_SIZE, false); storage.XtWY.alloc_to_device(storage.w*storage.h*XTWY_SIZE, false);
reconstruction_state.filter_window = rect_from_shape(filter_area.x-rect.x, filter_area.y-rect.y, storage.w, storage.h); reconstruction_state.filter_window = rect_from_shape(filter_area.x-rect.x, filter_area.y-rect.y, storage.w, storage.h);
int tile_coordinate_offset = filter_area.y*render_buffer.stride + filter_area.x; int tile_coordinate_offset = filter_area.y*target_buffer.stride + filter_area.x;
reconstruction_state.buffer_params = make_int4(render_buffer.offset + tile_coordinate_offset, reconstruction_state.buffer_params = make_int4(target_buffer.offset + tile_coordinate_offset,
render_buffer.stride, target_buffer.stride,
render_buffer.pass_stride, target_buffer.pass_stride,
render_buffer.denoising_clean_offset); target_buffer.denoising_clean_offset);
reconstruction_state.source_w = rect.z-rect.x; reconstruction_state.source_w = rect.z-rect.x;
reconstruction_state.source_h = rect.w-rect.y; reconstruction_state.source_h = rect.w-rect.y;
{ device_sub_ptr color_ptr (buffer.mem, 8*buffer.pass_stride, 3*buffer.pass_stride);
device_sub_ptr color_ptr (buffer.mem, 8*buffer.pass_stride, 3*buffer.pass_stride); device_sub_ptr color_var_ptr(buffer.mem, 11*buffer.pass_stride, 3*buffer.pass_stride);
device_sub_ptr color_var_ptr(buffer.mem, 11*buffer.pass_stride, 3*buffer.pass_stride); functions.reconstruct(*color_ptr, *color_var_ptr, target_buffer.ptr);
functions.reconstruct(*color_ptr, *color_var_ptr, render_buffer.ptr); }
}
return true; void DenoisingTask::run_denoising(RenderTile *tile)
{
RenderTile rtiles[10];
rtiles[4] = *tile;
functions.map_neighbor_tiles(rtiles);
set_render_buffer(rtiles);
setup_denoising_buffer();
prefilter_shadowing();
prefilter_features();
prefilter_color();
construct_transform();
reconstruct();
functions.unmap_neighbor_tiles(rtiles);
} }
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -32,20 +32,24 @@ public:
float nlm_k_2; float nlm_k_2;
float pca_threshold; float pca_threshold;
/* Pointer and parameters of the RenderBuffers. */ /* Parameters of the RenderBuffers. */
struct RenderBuffers { struct RenderBuffers {
int denoising_data_offset;
int denoising_clean_offset;
int pass_stride;
int offset; int offset;
int stride; int pass_stride;
device_ptr ptr;
int samples; int samples;
} render_buffer; } render_buffer;
TilesInfo *tiles; /* Pointer and parameters of the target buffer. */
device_vector<int> tiles_mem; struct TargetBuffer {
void tiles_from_rendertiles(RenderTile *rtiles); int offset;
int stride;
int pass_stride;
int denoising_clean_offset;
device_ptr ptr;
} target_buffer;
TileInfo *tile_info;
device_vector<int> tile_info_mem;
int4 rect; int4 rect;
int4 filter_area; int4 filter_area;
@@ -85,7 +89,8 @@ public:
device_ptr depth_ptr, device_ptr depth_ptr,
device_ptr output_ptr device_ptr output_ptr
)> detect_outliers; )> detect_outliers;
function<bool(device_ptr*)> set_tiles; function<void(RenderTile *rtiles)> map_neighbor_tiles;
function<void(RenderTile *rtiles)> unmap_neighbor_tiles;
} functions; } functions;
/* Stores state of the current Reconstruction operation, /* Stores state of the current Reconstruction operation,
@@ -138,12 +143,10 @@ public:
{} {}
} storage; } storage;
DenoisingTask(Device *device); DenoisingTask(Device *device, const DeviceTask &task);
~DenoisingTask(); ~DenoisingTask();
void init_from_devicetask(const DeviceTask &task); void run_denoising(RenderTile *tile);
bool run_denoising();
struct DenoiseBuffers { struct DenoiseBuffers {
int pass_stride; int pass_stride;
@@ -160,6 +163,14 @@ public:
protected: protected:
Device *device; Device *device;
void set_render_buffer(RenderTile *rtiles);
void setup_denoising_buffer();
void prefilter_shadowing();
void prefilter_features();
void prefilter_color();
void construct_transform();
void reconstruct();
}; };
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -104,6 +104,26 @@ void device_memory::device_zero()
} }
} }
void device_memory::swap_device(Device *new_device,
size_t new_device_size,
device_ptr new_device_ptr)
{
original_device = device;
original_device_size = device_size;
original_device_ptr = device_pointer;
device = new_device;
device_size = new_device_size;
device_pointer = new_device_ptr;
}
void device_memory::restore_device()
{
device = original_device;
device_size = original_device_size;
device_pointer = original_device_ptr;
}
/* Device Sub Ptr */ /* Device Sub Ptr */
device_sub_ptr::device_sub_ptr(device_memory& mem, int offset, int size) device_sub_ptr::device_sub_ptr(device_memory& mem, int offset, int size)

View File

@@ -200,6 +200,9 @@ public:
virtual ~device_memory(); virtual ~device_memory();
void swap_device(Device *new_device, size_t new_device_size, device_ptr new_device_ptr);
void restore_device();
protected: protected:
friend class CUDADevice; friend class CUDADevice;
@@ -222,6 +225,10 @@ protected:
void device_copy_to(); void device_copy_to();
void device_copy_from(int y, int w, int h, int elem); void device_copy_from(int y, int w, int h, int elem);
void device_zero(); void device_zero();
device_ptr original_device_ptr;
size_t original_device_size;
Device *original_device;
}; };
/* Device Only Memory /* Device Only Memory

View File

@@ -285,26 +285,27 @@ public:
mem.copy_from_device(0, mem.data_size, 1); mem.copy_from_device(0, mem.data_size, 1);
} }
Device *original_device = mem.device; mem.swap_device(sub_device, 0, 0);
device_ptr original_ptr = mem.device_pointer;
size_t original_size = mem.device_size;
mem.device = sub_device;
mem.device_pointer = 0;
mem.device_size = 0;
mem.copy_to_device(); mem.copy_to_device();
tiles[i].buffer = mem.device_pointer; tiles[i].buffer = mem.device_pointer;
tiles[i].device_size = mem.device_size;
mem.device = original_device; mem.restore_device();
mem.device_pointer = original_ptr;
mem.device_size = original_size;
} }
} }
} }
void unmap_neighbor_tiles(Device * sub_device, RenderTile * tiles) void unmap_neighbor_tiles(Device * sub_device, RenderTile * tiles)
{ {
/* Copy denoised result back to the host. */
device_vector<float> &mem = tiles[9].buffers->buffer;
mem.swap_device(sub_device, tiles[9].device_size, tiles[9].buffer);
mem.copy_from_device(0, mem.data_size, 1);
mem.restore_device();
/* Copy denoised result to the original device. */
mem.copy_to_device();
for(int i = 0; i < 9; i++) { for(int i = 0; i < 9; i++) {
if(!tiles[i].buffers) { if(!tiles[i].buffers) {
continue; continue;
@@ -312,28 +313,9 @@ public:
device_vector<float> &mem = tiles[i].buffers->buffer; device_vector<float> &mem = tiles[i].buffers->buffer;
if(mem.device != sub_device) { if(mem.device != sub_device) {
Device *original_device = mem.device; mem.swap_device(sub_device, tiles[i].device_size, tiles[i].buffer);
device_ptr original_ptr = mem.device_pointer;
size_t original_size = mem.device_size;
mem.device = sub_device;
mem.device_pointer = tiles[i].buffer;
/* Copy denoised tile to the host. */
if(i == 4) {
mem.copy_from_device(0, mem.data_size, 1);
}
sub_device->mem_free(mem); sub_device->mem_free(mem);
mem.restore_device();
mem.device = original_device;
mem.device_pointer = original_ptr;
mem.device_size = original_size;
/* Copy denoised tile to the original device. */
if(i == 4) {
mem.copy_to_device();
}
} }
} }
} }

View File

@@ -362,7 +362,7 @@ public:
void film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half); void film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half);
void shader(DeviceTask& task); void shader(DeviceTask& task);
void denoise(RenderTile& tile, DenoisingTask& denoising, const DeviceTask& task); void denoise(RenderTile& tile, DenoisingTask& denoising);
class OpenCLDeviceTask : public DeviceTask { class OpenCLDeviceTask : public DeviceTask {
public: public:
@@ -436,8 +436,6 @@ protected:
device_ptr depth_ptr, device_ptr depth_ptr,
device_ptr output_ptr, device_ptr output_ptr,
DenoisingTask *task); DenoisingTask *task);
bool denoising_set_tiles(device_ptr *buffers,
DenoisingTask *task);
device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int size); device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int size);
void mem_free_sub_ptr(device_ptr ptr); void mem_free_sub_ptr(device_ptr ptr);

View File

@@ -246,7 +246,6 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
denoising_program.add_kernel(ustring("filter_nlm_normalize")); denoising_program.add_kernel(ustring("filter_nlm_normalize"));
denoising_program.add_kernel(ustring("filter_nlm_construct_gramian")); denoising_program.add_kernel(ustring("filter_nlm_construct_gramian"));
denoising_program.add_kernel(ustring("filter_finalize")); denoising_program.add_kernel(ustring("filter_finalize"));
denoising_program.add_kernel(ustring("filter_set_tiles"));
vector<OpenCLProgram*> programs; vector<OpenCLProgram*> programs;
programs.push_back(&base_program); programs.push_back(&base_program);
@@ -977,13 +976,20 @@ bool OpenCLDeviceBase::denoising_divide_shadow(device_ptr a_ptr,
cl_mem sv_variance_mem = CL_MEM_PTR(sv_variance_ptr); cl_mem sv_variance_mem = CL_MEM_PTR(sv_variance_ptr);
cl_mem buffer_variance_mem = CL_MEM_PTR(buffer_variance_ptr); cl_mem buffer_variance_mem = CL_MEM_PTR(buffer_variance_ptr);
cl_mem tiles_mem = CL_MEM_PTR(task->tiles_mem.device_pointer); cl_mem tile_info_mem = CL_MEM_PTR(task->tile_info_mem.device_pointer);
cl_kernel ckFilterDivideShadow = denoising_program(ustring("filter_divide_shadow")); cl_kernel ckFilterDivideShadow = denoising_program(ustring("filter_divide_shadow"));
kernel_set_args(ckFilterDivideShadow, 0, int arg_ofs = kernel_set_args(ckFilterDivideShadow, 0,
task->render_buffer.samples, task->render_buffer.samples,
tiles_mem, tile_info_mem);
cl_mem buffers[9];
for(int i = 0; i < 9; i++) {
buffers[i] = CL_MEM_PTR(task->tile_info->buffers[i]);
arg_ofs += kernel_set_args(ckFilterDivideShadow, arg_ofs,
buffers[i]);
}
kernel_set_args(ckFilterDivideShadow, arg_ofs,
a_mem, a_mem,
b_mem, b_mem,
sample_variance_mem, sample_variance_mem,
@@ -991,7 +997,7 @@ bool OpenCLDeviceBase::denoising_divide_shadow(device_ptr a_ptr,
buffer_variance_mem, buffer_variance_mem,
task->rect, task->rect,
task->render_buffer.pass_stride, task->render_buffer.pass_stride,
task->render_buffer.denoising_data_offset); task->render_buffer.offset);
enqueue_kernel(ckFilterDivideShadow, enqueue_kernel(ckFilterDivideShadow,
task->rect.z-task->rect.x, task->rect.z-task->rect.x,
task->rect.w-task->rect.y); task->rect.w-task->rect.y);
@@ -1008,20 +1014,27 @@ bool OpenCLDeviceBase::denoising_get_feature(int mean_offset,
cl_mem mean_mem = CL_MEM_PTR(mean_ptr); cl_mem mean_mem = CL_MEM_PTR(mean_ptr);
cl_mem variance_mem = CL_MEM_PTR(variance_ptr); cl_mem variance_mem = CL_MEM_PTR(variance_ptr);
cl_mem tiles_mem = CL_MEM_PTR(task->tiles_mem.device_pointer); cl_mem tile_info_mem = CL_MEM_PTR(task->tile_info_mem.device_pointer);
cl_kernel ckFilterGetFeature = denoising_program(ustring("filter_get_feature")); cl_kernel ckFilterGetFeature = denoising_program(ustring("filter_get_feature"));
kernel_set_args(ckFilterGetFeature, 0, int arg_ofs = kernel_set_args(ckFilterGetFeature, 0,
task->render_buffer.samples, task->render_buffer.samples,
tiles_mem, tile_info_mem);
cl_mem buffers[9];
for(int i = 0; i < 9; i++) {
buffers[i] = CL_MEM_PTR(task->tile_info->buffers[i]);
arg_ofs += kernel_set_args(ckFilterGetFeature, arg_ofs,
buffers[i]);
}
kernel_set_args(ckFilterGetFeature, arg_ofs,
mean_offset, mean_offset,
variance_offset, variance_offset,
mean_mem, mean_mem,
variance_mem, variance_mem,
task->rect, task->rect,
task->render_buffer.pass_stride, task->render_buffer.pass_stride,
task->render_buffer.denoising_data_offset); task->render_buffer.offset);
enqueue_kernel(ckFilterGetFeature, enqueue_kernel(ckFilterGetFeature,
task->rect.z-task->rect.x, task->rect.z-task->rect.x,
task->rect.w-task->rect.y); task->rect.w-task->rect.y);
@@ -1056,29 +1069,8 @@ bool OpenCLDeviceBase::denoising_detect_outliers(device_ptr image_ptr,
return true; return true;
} }
bool OpenCLDeviceBase::denoising_set_tiles(device_ptr *buffers, void OpenCLDeviceBase::denoise(RenderTile &rtile, DenoisingTask& denoising)
DenoisingTask *task)
{ {
task->tiles_mem.copy_to_device();
cl_mem tiles_mem = CL_MEM_PTR(task->tiles_mem.device_pointer);
cl_kernel ckFilterSetTiles = denoising_program(ustring("filter_set_tiles"));
kernel_set_args(ckFilterSetTiles, 0, tiles_mem);
for(int i = 0; i < 9; i++) {
cl_mem buffer_mem = CL_MEM_PTR(buffers[i]);
kernel_set_args(ckFilterSetTiles, i+1, buffer_mem);
}
enqueue_kernel(ckFilterSetTiles, 1, 1);
return true;
}
void OpenCLDeviceBase::denoise(RenderTile &rtile, DenoisingTask& denoising, const DeviceTask &task)
{
denoising.functions.set_tiles = function_bind(&OpenCLDeviceBase::denoising_set_tiles, this, _1, &denoising);
denoising.functions.construct_transform = function_bind(&OpenCLDeviceBase::denoising_construct_transform, this, &denoising); denoising.functions.construct_transform = function_bind(&OpenCLDeviceBase::denoising_construct_transform, this, &denoising);
denoising.functions.reconstruct = function_bind(&OpenCLDeviceBase::denoising_reconstruct, this, _1, _2, _3, &denoising); denoising.functions.reconstruct = function_bind(&OpenCLDeviceBase::denoising_reconstruct, this, _1, _2, _3, &denoising);
denoising.functions.divide_shadow = function_bind(&OpenCLDeviceBase::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising); denoising.functions.divide_shadow = function_bind(&OpenCLDeviceBase::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
@@ -1090,16 +1082,7 @@ void OpenCLDeviceBase::denoise(RenderTile &rtile, DenoisingTask& denoising, cons
denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h); denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h);
denoising.render_buffer.samples = rtile.sample; denoising.render_buffer.samples = rtile.sample;
RenderTile rtiles[9]; denoising.run_denoising(&rtile);
rtiles[4] = rtile;
task.map_neighbor_tiles(rtiles, this);
denoising.tiles_from_rendertiles(rtiles);
denoising.init_from_devicetask(task);
denoising.run_denoising();
task.unmap_neighbor_tiles(rtiles, this);
} }
void OpenCLDeviceBase::shader(DeviceTask& task) void OpenCLDeviceBase::shader(DeviceTask& task)

View File

@@ -107,7 +107,7 @@ public:
} }
else if(task->type == DeviceTask::RENDER) { else if(task->type == DeviceTask::RENDER) {
RenderTile tile; RenderTile tile;
DenoisingTask denoising(this); DenoisingTask denoising(this, *task);
/* Keep rendering tiles until done. */ /* Keep rendering tiles until done. */
while(task->acquire_tile(this, tile)) { while(task->acquire_tile(this, tile)) {
@@ -141,7 +141,7 @@ public:
} }
else if(tile.task == RenderTile::DENOISE) { else if(tile.task == RenderTile::DENOISE) {
tile.sample = tile.start_sample + tile.num_samples; tile.sample = tile.start_sample + tile.num_samples;
denoise(tile, denoising, *task); denoise(tile, denoising);
task->update_progress(&tile, tile.w*tile.h); task->update_progress(&tile, tile.w*tile.h);
} }

View File

@@ -129,7 +129,7 @@ public:
} }
else if(task->type == DeviceTask::RENDER) { else if(task->type == DeviceTask::RENDER) {
RenderTile tile; RenderTile tile;
DenoisingTask denoising(this); DenoisingTask denoising(this, *task);
/* Allocate buffer for kernel globals */ /* Allocate buffer for kernel globals */
device_only_memory<KernelGlobalsDummy> kgbuffer(this, "kernel_globals"); device_only_memory<KernelGlobalsDummy> kgbuffer(this, "kernel_globals");
@@ -159,7 +159,7 @@ public:
} }
else if(tile.task == RenderTile::DENOISE) { else if(tile.task == RenderTile::DENOISE) {
tile.sample = tile.start_sample + tile.num_samples; tile.sample = tile.start_sample + tile.num_samples;
denoise(tile, denoising, *task); denoise(tile, denoising);
task->update_progress(&tile, tile.w*tile.h); task->update_progress(&tile, tile.w*tile.h);
} }

View File

@@ -22,7 +22,7 @@
#define XTWX_SIZE (((DENOISE_FEATURES+1)*(DENOISE_FEATURES+2))/2) #define XTWX_SIZE (((DENOISE_FEATURES+1)*(DENOISE_FEATURES+2))/2)
#define XTWY_SIZE (DENOISE_FEATURES+1) #define XTWY_SIZE (DENOISE_FEATURES+1)
typedef struct TilesInfo { typedef struct TileInfo {
int offsets[9]; int offsets[9];
int strides[9]; int strides[9];
int x[4]; int x[4];
@@ -33,6 +33,31 @@ typedef struct TilesInfo {
#else #else
long long int buffers[9]; long long int buffers[9];
#endif #endif
} TilesInfo; } TileInfo;
#ifdef __KERNEL_OPENCL__
# define CCL_FILTER_TILE_INFO ccl_global TileInfo* tile_info, \
ccl_global float *tile_buffer_1, \
ccl_global float *tile_buffer_2, \
ccl_global float *tile_buffer_3, \
ccl_global float *tile_buffer_4, \
ccl_global float *tile_buffer_5, \
ccl_global float *tile_buffer_6, \
ccl_global float *tile_buffer_7, \
ccl_global float *tile_buffer_8, \
ccl_global float *tile_buffer_9
# define CCL_FILTER_TILE_INFO_ARG tile_info, \
tile_buffer_1, tile_buffer_2, tile_buffer_3, \
tile_buffer_4, tile_buffer_5, tile_buffer_6, \
tile_buffer_7, tile_buffer_8, tile_buffer_9
# define ccl_get_tile_buffer(id) (tile_buffer_ ## id)
#else
# ifdef __KERNEL_CUDA__
# define CCL_FILTER_TILE_INFO ccl_global TileInfo* tile_info
# else
# define CCL_FILTER_TILE_INFO TileInfo* tile_info
# endif
# define ccl_get_tile_buffer(id) (tile_info->buffers[id])
#endif
#endif /* __FILTER_DEFINES_H__*/ #endif /* __FILTER_DEFINES_H__*/

View File

@@ -26,7 +26,7 @@ CCL_NAMESPACE_BEGIN
* bufferVariance: The buffer-based variance of the shadow feature. Unbiased, but quite noisy. * bufferVariance: The buffer-based variance of the shadow feature. Unbiased, but quite noisy.
*/ */
ccl_device void kernel_filter_divide_shadow(int sample, ccl_device void kernel_filter_divide_shadow(int sample,
ccl_global TilesInfo *tiles, CCL_FILTER_TILE_INFO,
int x, int y, int x, int y,
ccl_global float *unfilteredA, ccl_global float *unfilteredA,
ccl_global float *unfilteredB, ccl_global float *unfilteredB,
@@ -37,13 +37,13 @@ ccl_device void kernel_filter_divide_shadow(int sample,
int buffer_pass_stride, int buffer_pass_stride,
int buffer_denoising_offset) int buffer_denoising_offset)
{ {
int xtile = (x < tiles->x[1])? 0: ((x < tiles->x[2])? 1: 2); int xtile = (x < tile_info->x[1])? 0: ((x < tile_info->x[2])? 1: 2);
int ytile = (y < tiles->y[1])? 0: ((y < tiles->y[2])? 1: 2); int ytile = (y < tile_info->y[1])? 0: ((y < tile_info->y[2])? 1: 2);
int tile = ytile*3+xtile; int tile = ytile*3+xtile;
int offset = tiles->offsets[tile]; int offset = tile_info->offsets[tile];
int stride = tiles->strides[tile]; int stride = tile_info->strides[tile];
const ccl_global float *ccl_restrict center_buffer = (ccl_global float*) tiles->buffers[tile]; const ccl_global float *ccl_restrict center_buffer = (ccl_global float*) ccl_get_tile_buffer(tile);
center_buffer += (y*stride + x + offset)*buffer_pass_stride; center_buffer += (y*stride + x + offset)*buffer_pass_stride;
center_buffer += buffer_denoising_offset + 14; center_buffer += buffer_denoising_offset + 14;
@@ -79,7 +79,7 @@ ccl_device void kernel_filter_divide_shadow(int sample,
* - rect: The prefilter area (lower pixels inclusive, upper pixels exclusive). * - rect: The prefilter area (lower pixels inclusive, upper pixels exclusive).
*/ */
ccl_device void kernel_filter_get_feature(int sample, ccl_device void kernel_filter_get_feature(int sample,
ccl_global TilesInfo *tiles, CCL_FILTER_TILE_INFO,
int m_offset, int v_offset, int m_offset, int v_offset,
int x, int y, int x, int y,
ccl_global float *mean, ccl_global float *mean,
@@ -87,10 +87,10 @@ ccl_device void kernel_filter_get_feature(int sample,
int4 rect, int buffer_pass_stride, int4 rect, int buffer_pass_stride,
int buffer_denoising_offset) int buffer_denoising_offset)
{ {
int xtile = (x < tiles->x[1])? 0: ((x < tiles->x[2])? 1: 2); int xtile = (x < tile_info->x[1])? 0: ((x < tile_info->x[2])? 1: 2);
int ytile = (y < tiles->y[1])? 0: ((y < tiles->y[2])? 1: 2); int ytile = (y < tile_info->y[1])? 0: ((y < tile_info->y[2])? 1: 2);
int tile = ytile*3+xtile; int tile = ytile*3+xtile;
ccl_global float *center_buffer = ((ccl_global float*) tiles->buffers[tile]) + (tiles->offsets[tile] + y*tiles->strides[tile] + x)*buffer_pass_stride + buffer_denoising_offset; ccl_global float *center_buffer = ((ccl_global float*) ccl_get_tile_buffer(tile)) + (tile_info->offsets[tile] + y*tile_info->strides[tile] + x)*buffer_pass_stride + buffer_denoising_offset;
int buffer_w = align_up(rect.z - rect.x, 4); int buffer_w = align_up(rect.z - rect.x, 4);
int idx = (y-rect.y)*buffer_w + (x - rect.x); int idx = (y-rect.y)*buffer_w + (x - rect.x);

View File

@@ -17,7 +17,7 @@
/* Templated common declaration part of all CPU kernels. */ /* Templated common declaration part of all CPU kernels. */
void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample, void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
TilesInfo *tiles, TileInfo *tile_info,
int x, int x,
int y, int y,
float *unfilteredA, float *unfilteredA,
@@ -30,7 +30,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
int buffer_denoising_offset); int buffer_denoising_offset);
void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample, void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
TilesInfo *tiles, TileInfo *tile_info,
int m_offset, int m_offset,
int v_offset, int v_offset,
int x, int x,

View File

@@ -34,7 +34,7 @@ CCL_NAMESPACE_BEGIN
/* Denoise filter */ /* Denoise filter */
void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample, void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
TilesInfo *tiles, TileInfo *tile_info,
int x, int x,
int y, int y,
float *unfilteredA, float *unfilteredA,
@@ -49,7 +49,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
#ifdef KERNEL_STUB #ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, filter_divide_shadow); STUB_ASSERT(KERNEL_ARCH, filter_divide_shadow);
#else #else
kernel_filter_divide_shadow(sample, tiles, kernel_filter_divide_shadow(sample, tile_info,
x, y, x, y,
unfilteredA, unfilteredA,
unfilteredB, unfilteredB,
@@ -63,7 +63,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
} }
void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample, void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
TilesInfo *tiles, TileInfo *tile_info,
int m_offset, int m_offset,
int v_offset, int v_offset,
int x, int x,
@@ -76,7 +76,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
#ifdef KERNEL_STUB #ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, filter_get_feature); STUB_ASSERT(KERNEL_ARCH, filter_get_feature);
#else #else
kernel_filter_get_feature(sample, tiles, kernel_filter_get_feature(sample, tile_info,
m_offset, v_offset, m_offset, v_offset,
x, y, x, y,
mean, variance, mean, variance,

View File

@@ -29,7 +29,7 @@
extern "C" __global__ void extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
kernel_cuda_filter_divide_shadow(int sample, kernel_cuda_filter_divide_shadow(int sample,
TilesInfo *tiles, TileInfo *tile_info,
float *unfilteredA, float *unfilteredA,
float *unfilteredB, float *unfilteredB,
float *sampleVariance, float *sampleVariance,
@@ -43,7 +43,7 @@ kernel_cuda_filter_divide_shadow(int sample,
int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y; int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y;
if(x < prefilter_rect.z && y < prefilter_rect.w) { if(x < prefilter_rect.z && y < prefilter_rect.w) {
kernel_filter_divide_shadow(sample, kernel_filter_divide_shadow(sample,
tiles, tile_info,
x, y, x, y,
unfilteredA, unfilteredA,
unfilteredB, unfilteredB,
@@ -59,7 +59,7 @@ kernel_cuda_filter_divide_shadow(int sample,
extern "C" __global__ void extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
kernel_cuda_filter_get_feature(int sample, kernel_cuda_filter_get_feature(int sample,
TilesInfo *tiles, TileInfo *tile_info,
int m_offset, int m_offset,
int v_offset, int v_offset,
float *mean, float *mean,
@@ -72,7 +72,7 @@ kernel_cuda_filter_get_feature(int sample,
int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y; int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y;
if(x < prefilter_rect.z && y < prefilter_rect.w) { if(x < prefilter_rect.z && y < prefilter_rect.w) {
kernel_filter_get_feature(sample, kernel_filter_get_feature(sample,
tiles, tile_info,
m_offset, v_offset, m_offset, v_offset,
x, y, x, y,
mean, variance, mean, variance,

View File

@@ -23,7 +23,7 @@
/* kernels */ /* kernels */
__kernel void kernel_ocl_filter_divide_shadow(int sample, __kernel void kernel_ocl_filter_divide_shadow(int sample,
ccl_global TilesInfo *tiles, CCL_FILTER_TILE_INFO,
ccl_global float *unfilteredA, ccl_global float *unfilteredA,
ccl_global float *unfilteredB, ccl_global float *unfilteredB,
ccl_global float *sampleVariance, ccl_global float *sampleVariance,
@@ -37,7 +37,7 @@ __kernel void kernel_ocl_filter_divide_shadow(int sample,
int y = prefilter_rect.y + get_global_id(1); int y = prefilter_rect.y + get_global_id(1);
if(x < prefilter_rect.z && y < prefilter_rect.w) { if(x < prefilter_rect.z && y < prefilter_rect.w) {
kernel_filter_divide_shadow(sample, kernel_filter_divide_shadow(sample,
tiles, CCL_FILTER_TILE_INFO_ARG,
x, y, x, y,
unfilteredA, unfilteredA,
unfilteredB, unfilteredB,
@@ -51,7 +51,7 @@ __kernel void kernel_ocl_filter_divide_shadow(int sample,
} }
__kernel void kernel_ocl_filter_get_feature(int sample, __kernel void kernel_ocl_filter_get_feature(int sample,
ccl_global TilesInfo *tiles, CCL_FILTER_TILE_INFO,
int m_offset, int m_offset,
int v_offset, int v_offset,
ccl_global float *mean, ccl_global float *mean,
@@ -64,7 +64,7 @@ __kernel void kernel_ocl_filter_get_feature(int sample,
int y = prefilter_rect.y + get_global_id(1); int y = prefilter_rect.y + get_global_id(1);
if(x < prefilter_rect.z && y < prefilter_rect.w) { if(x < prefilter_rect.z && y < prefilter_rect.w) {
kernel_filter_get_feature(sample, kernel_filter_get_feature(sample,
tiles, CCL_FILTER_TILE_INFO_ARG,
m_offset, v_offset, m_offset, v_offset,
x, y, x, y,
mean, variance, mean, variance,
@@ -276,27 +276,3 @@ __kernel void kernel_ocl_filter_finalize(ccl_global float *buffer,
buffer_params, sample); buffer_params, sample);
} }
} }
__kernel void kernel_ocl_filter_set_tiles(ccl_global TilesInfo* tiles,
ccl_global float *buffer_1,
ccl_global float *buffer_2,
ccl_global float *buffer_3,
ccl_global float *buffer_4,
ccl_global float *buffer_5,
ccl_global float *buffer_6,
ccl_global float *buffer_7,
ccl_global float *buffer_8,
ccl_global float *buffer_9)
{
if((get_global_id(0) == 0) && (get_global_id(1) == 0)) {
tiles->buffers[0] = buffer_1;
tiles->buffers[1] = buffer_2;
tiles->buffers[2] = buffer_3;
tiles->buffers[3] = buffer_4;
tiles->buffers[4] = buffer_5;
tiles->buffers[5] = buffer_6;
tiles->buffers[6] = buffer_7;
tiles->buffers[7] = buffer_8;
tiles->buffers[8] = buffer_9;
}
}

View File

@@ -137,6 +137,7 @@ public:
int tile_index; int tile_index;
device_ptr buffer; device_ptr buffer;
int device_size;
RenderBuffers *buffers; RenderBuffers *buffers;

View File

@@ -502,6 +502,9 @@ void Session::map_neighbor_tiles(RenderTile *tiles, Device *tile_device)
assert(tiles[4].buffers); assert(tiles[4].buffers);
device->map_neighbor_tiles(tile_device, tiles); device->map_neighbor_tiles(tile_device, tiles);
/* The denoised result is written back to the original tile. */
tiles[9] = tiles[4];
} }
void Session::unmap_neighbor_tiles(RenderTile *tiles, Device *tile_device) void Session::unmap_neighbor_tiles(RenderTile *tiles, Device *tile_device)

View File

@@ -488,11 +488,11 @@ def smpte_from_frame(frame, fps=None, fps_base=None):
return ( return (
"%s%02d:%02d:%02d:%02d" % ( "%s%02d:%02d:%02d:%02d" % (
sign, sign,
int(frame / (3600 * fps)), # HH int(frame / (3600 * fps)), # HH
int((frame / (60 * fps)) % 60), # MM int((frame / (60 * fps)) % 60), # MM
int((frame / fps) % 60), # SS int((frame / fps) % 60), # SS
int(frame % fps), # FF int(frame % fps), # FF
)) ))
@@ -773,6 +773,7 @@ def _blender_default_map():
del _sys.modules["rna_manual_reference"] del _sys.modules["rna_manual_reference"]
return ret return ret
# hooks for doc lookups # hooks for doc lookups
_manual_map = [_blender_default_map] _manual_map = [_blender_default_map]