Merge branch 'master' into blender2.8
This commit is contained in:
@@ -21,7 +21,7 @@
|
|||||||
# <pep8 compliant>
|
# <pep8 compliant>
|
||||||
|
|
||||||
bpy_types_Operator_bl_property__doc__ = (
|
bpy_types_Operator_bl_property__doc__ = (
|
||||||
"""
|
"""
|
||||||
The name of a property to use as this operators primary property.
|
The name of a property to use as this operators primary property.
|
||||||
Currently this is only used to select the default property when
|
Currently this is only used to select the default property when
|
||||||
expanding an operator into a menu.
|
expanding an operator into a menu.
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ You'll need to specify your user login and password, obviously.
|
|||||||
|
|
||||||
Example usage:
|
Example usage:
|
||||||
|
|
||||||
./sphinx_doc_update.py --mirror ../../../docs/remote_api_backup/ --source ../.. --blender ../../../build_cmake/bin/blender --user foobar --password barfoo
|
./sphinx_doc_update.py --mirror ../../../docs/remote_api_backup/ --source ../.. --blender ../../../build_cmake/bin/blender --user foobar --password barfoo
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -107,14 +107,16 @@ def main():
|
|||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||||
# II) Generate doc source in temp dir.
|
# II) Generate doc source in temp dir.
|
||||||
doc_gen_cmd = (args.blender, "--background", "-noaudio", "--factory-startup", "--python-exit-code", "1",
|
doc_gen_cmd = (
|
||||||
"--python", "%s/doc/python_api/sphinx_doc_gen.py" % args.source_dir, "--",
|
args.blender, "--background", "-noaudio", "--factory-startup", "--python-exit-code", "1",
|
||||||
"--output", tmp_dir)
|
"--python", "%s/doc/python_api/sphinx_doc_gen.py" % args.source_dir, "--",
|
||||||
|
"--output", tmp_dir
|
||||||
|
)
|
||||||
subprocess.run(doc_gen_cmd)
|
subprocess.run(doc_gen_cmd)
|
||||||
|
|
||||||
# III) Get Blender version info.
|
# III) Get Blender version info.
|
||||||
getver_file = os.path.join(tmp_dir, "blendver.txt")
|
getver_file = os.path.join(tmp_dir, "blendver.txt")
|
||||||
getver_script = (""
|
getver_script = (
|
||||||
"import sys, bpy\n"
|
"import sys, bpy\n"
|
||||||
"with open(sys.argv[-1], 'w') as f:\n"
|
"with open(sys.argv[-1], 'w') as f:\n"
|
||||||
" is_release = bpy.app.version_cycle in {'rc', 'release'}\n"
|
" is_release = bpy.app.version_cycle in {'rc', 'release'}\n"
|
||||||
@@ -124,7 +126,8 @@ def main():
|
|||||||
" f.write('%d.%d%s\\n' % (bpy.app.version[0], bpy.app.version[1], bpy.app.version_char)\n"
|
" f.write('%d.%d%s\\n' % (bpy.app.version[0], bpy.app.version[1], bpy.app.version_char)\n"
|
||||||
" if is_release else '%s\\n' % branch)\n"
|
" if is_release else '%s\\n' % branch)\n"
|
||||||
" f.write('%d_%d%s_release' % (bpy.app.version[0], bpy.app.version[1], bpy.app.version_char)\n"
|
" f.write('%d_%d%s_release' % (bpy.app.version[0], bpy.app.version[1], bpy.app.version_char)\n"
|
||||||
" if is_release else '%d_%d_%d' % bpy.app.version)\n")
|
" if is_release else '%d_%d_%d' % bpy.app.version)\n"
|
||||||
|
)
|
||||||
get_ver_cmd = (args.blender, "--background", "-noaudio", "--factory-startup", "--python-exit-code", "1",
|
get_ver_cmd = (args.blender, "--background", "-noaudio", "--factory-startup", "--python-exit-code", "1",
|
||||||
"--python-expr", getver_script, "--", getver_file)
|
"--python-expr", getver_script, "--", getver_file)
|
||||||
subprocess.run(get_ver_cmd)
|
subprocess.run(get_ver_cmd)
|
||||||
|
|||||||
@@ -179,8 +179,8 @@ public:
|
|||||||
KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> convert_to_byte_kernel;
|
KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> convert_to_byte_kernel;
|
||||||
KernelFunctions<void(*)(KernelGlobals *, uint4 *, float4 *, int, int, int, int, int)> shader_kernel;
|
KernelFunctions<void(*)(KernelGlobals *, uint4 *, float4 *, int, int, int, int, int)> shader_kernel;
|
||||||
|
|
||||||
KernelFunctions<void(*)(int, TilesInfo*, int, int, float*, float*, float*, float*, float*, int*, int, int)> filter_divide_shadow_kernel;
|
KernelFunctions<void(*)(int, TileInfo*, int, int, float*, float*, float*, float*, float*, int*, int, int)> filter_divide_shadow_kernel;
|
||||||
KernelFunctions<void(*)(int, TilesInfo*, int, int, int, int, float*, float*, int*, int, int)> filter_get_feature_kernel;
|
KernelFunctions<void(*)(int, TileInfo*, int, int, int, int, float*, float*, int*, int, int)> filter_get_feature_kernel;
|
||||||
KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_detect_outliers_kernel;
|
KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_detect_outliers_kernel;
|
||||||
KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_combine_halves_kernel;
|
KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_combine_halves_kernel;
|
||||||
|
|
||||||
@@ -459,18 +459,6 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task)
|
|
||||||
{
|
|
||||||
TilesInfo *tiles = (TilesInfo*) task->tiles_mem.host_pointer;
|
|
||||||
for(int i = 0; i < 9; i++) {
|
|
||||||
tiles->buffers[i] = buffers[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
task->tiles_mem.copy_to_device();
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool denoising_non_local_means(device_ptr image_ptr, device_ptr guide_ptr, device_ptr variance_ptr, device_ptr out_ptr,
|
bool denoising_non_local_means(device_ptr image_ptr, device_ptr guide_ptr, device_ptr variance_ptr, device_ptr out_ptr,
|
||||||
DenoisingTask *task)
|
DenoisingTask *task)
|
||||||
{
|
{
|
||||||
@@ -626,7 +614,7 @@ public:
|
|||||||
for(int y = task->rect.y; y < task->rect.w; y++) {
|
for(int y = task->rect.y; y < task->rect.w; y++) {
|
||||||
for(int x = task->rect.x; x < task->rect.z; x++) {
|
for(int x = task->rect.x; x < task->rect.z; x++) {
|
||||||
filter_divide_shadow_kernel()(task->render_buffer.samples,
|
filter_divide_shadow_kernel()(task->render_buffer.samples,
|
||||||
task->tiles,
|
task->tile_info,
|
||||||
x, y,
|
x, y,
|
||||||
(float*) a_ptr,
|
(float*) a_ptr,
|
||||||
(float*) b_ptr,
|
(float*) b_ptr,
|
||||||
@@ -635,7 +623,7 @@ public:
|
|||||||
(float*) buffer_variance_ptr,
|
(float*) buffer_variance_ptr,
|
||||||
&task->rect.x,
|
&task->rect.x,
|
||||||
task->render_buffer.pass_stride,
|
task->render_buffer.pass_stride,
|
||||||
task->render_buffer.denoising_data_offset);
|
task->render_buffer.offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
@@ -650,7 +638,7 @@ public:
|
|||||||
for(int y = task->rect.y; y < task->rect.w; y++) {
|
for(int y = task->rect.y; y < task->rect.w; y++) {
|
||||||
for(int x = task->rect.x; x < task->rect.z; x++) {
|
for(int x = task->rect.x; x < task->rect.z; x++) {
|
||||||
filter_get_feature_kernel()(task->render_buffer.samples,
|
filter_get_feature_kernel()(task->render_buffer.samples,
|
||||||
task->tiles,
|
task->tile_info,
|
||||||
mean_offset,
|
mean_offset,
|
||||||
variance_offset,
|
variance_offset,
|
||||||
x, y,
|
x, y,
|
||||||
@@ -658,7 +646,7 @@ public:
|
|||||||
(float*) variance_ptr,
|
(float*) variance_ptr,
|
||||||
&task->rect.x,
|
&task->rect.x,
|
||||||
task->render_buffer.pass_stride,
|
task->render_buffer.pass_stride,
|
||||||
task->render_buffer.denoising_data_offset);
|
task->render_buffer.offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
@@ -711,7 +699,7 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void denoise(DeviceTask &task, DenoisingTask& denoising, RenderTile &tile)
|
void denoise(DenoisingTask& denoising, RenderTile &tile)
|
||||||
{
|
{
|
||||||
tile.sample = tile.start_sample + tile.num_samples;
|
tile.sample = tile.start_sample + tile.num_samples;
|
||||||
|
|
||||||
@@ -722,23 +710,11 @@ public:
|
|||||||
denoising.functions.combine_halves = function_bind(&CPUDevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
|
denoising.functions.combine_halves = function_bind(&CPUDevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
|
||||||
denoising.functions.get_feature = function_bind(&CPUDevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising);
|
denoising.functions.get_feature = function_bind(&CPUDevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising);
|
||||||
denoising.functions.detect_outliers = function_bind(&CPUDevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
|
denoising.functions.detect_outliers = function_bind(&CPUDevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
|
||||||
denoising.functions.set_tiles = function_bind(&CPUDevice::denoising_set_tiles, this, _1, &denoising);
|
|
||||||
|
|
||||||
denoising.filter_area = make_int4(tile.x, tile.y, tile.w, tile.h);
|
denoising.filter_area = make_int4(tile.x, tile.y, tile.w, tile.h);
|
||||||
denoising.render_buffer.samples = tile.sample;
|
denoising.render_buffer.samples = tile.sample;
|
||||||
|
|
||||||
RenderTile rtiles[9];
|
denoising.run_denoising(&tile);
|
||||||
rtiles[4] = tile;
|
|
||||||
task.map_neighbor_tiles(rtiles, this);
|
|
||||||
denoising.tiles_from_rendertiles(rtiles);
|
|
||||||
|
|
||||||
denoising.init_from_devicetask(task);
|
|
||||||
|
|
||||||
denoising.run_denoising();
|
|
||||||
|
|
||||||
task.unmap_neighbor_tiles(rtiles, this);
|
|
||||||
|
|
||||||
task.update_progress(&tile, tile.w*tile.h);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void thread_render(DeviceTask& task)
|
void thread_render(DeviceTask& task)
|
||||||
@@ -766,7 +742,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
RenderTile tile;
|
RenderTile tile;
|
||||||
DenoisingTask denoising(this);
|
DenoisingTask denoising(this, task);
|
||||||
|
|
||||||
while(task.acquire_tile(this, tile)) {
|
while(task.acquire_tile(this, tile)) {
|
||||||
if(tile.task == RenderTile::PATH_TRACE) {
|
if(tile.task == RenderTile::PATH_TRACE) {
|
||||||
@@ -779,7 +755,9 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if(tile.task == RenderTile::DENOISE) {
|
else if(tile.task == RenderTile::DENOISE) {
|
||||||
denoise(task, denoising, tile);
|
denoise(denoising, tile);
|
||||||
|
|
||||||
|
task.update_progress(&tile, tile.w*tile.h);
|
||||||
}
|
}
|
||||||
|
|
||||||
task.release_tile(tile);
|
task.release_tile(tile);
|
||||||
|
|||||||
@@ -1251,18 +1251,6 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task)
|
|
||||||
{
|
|
||||||
TilesInfo *tiles = (TilesInfo*) task->tiles_mem.host_pointer;
|
|
||||||
for(int i = 0; i < 9; i++) {
|
|
||||||
tiles->buffers[i] = buffers[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
task->tiles_mem.copy_to_device();
|
|
||||||
|
|
||||||
return !have_error();
|
|
||||||
}
|
|
||||||
|
|
||||||
#define CUDA_GET_BLOCKSIZE(func, w, h) \
|
#define CUDA_GET_BLOCKSIZE(func, w, h) \
|
||||||
int threads_per_block; \
|
int threads_per_block; \
|
||||||
cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \
|
cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \
|
||||||
@@ -1534,7 +1522,7 @@ public:
|
|||||||
task->rect.w-task->rect.y);
|
task->rect.w-task->rect.y);
|
||||||
|
|
||||||
void *args[] = {&task->render_buffer.samples,
|
void *args[] = {&task->render_buffer.samples,
|
||||||
&task->tiles_mem.device_pointer,
|
&task->tile_info_mem.device_pointer,
|
||||||
&a_ptr,
|
&a_ptr,
|
||||||
&b_ptr,
|
&b_ptr,
|
||||||
&sample_variance_ptr,
|
&sample_variance_ptr,
|
||||||
@@ -1542,7 +1530,7 @@ public:
|
|||||||
&buffer_variance_ptr,
|
&buffer_variance_ptr,
|
||||||
&task->rect,
|
&task->rect,
|
||||||
&task->render_buffer.pass_stride,
|
&task->render_buffer.pass_stride,
|
||||||
&task->render_buffer.denoising_data_offset};
|
&task->render_buffer.offset};
|
||||||
CUDA_LAUNCH_KERNEL(cuFilterDivideShadow, args);
|
CUDA_LAUNCH_KERNEL(cuFilterDivideShadow, args);
|
||||||
cuda_assert(cuCtxSynchronize());
|
cuda_assert(cuCtxSynchronize());
|
||||||
|
|
||||||
@@ -1568,14 +1556,14 @@ public:
|
|||||||
task->rect.w-task->rect.y);
|
task->rect.w-task->rect.y);
|
||||||
|
|
||||||
void *args[] = {&task->render_buffer.samples,
|
void *args[] = {&task->render_buffer.samples,
|
||||||
&task->tiles_mem.device_pointer,
|
&task->tile_info_mem.device_pointer,
|
||||||
&mean_offset,
|
&mean_offset,
|
||||||
&variance_offset,
|
&variance_offset,
|
||||||
&mean_ptr,
|
&mean_ptr,
|
||||||
&variance_ptr,
|
&variance_ptr,
|
||||||
&task->rect,
|
&task->rect,
|
||||||
&task->render_buffer.pass_stride,
|
&task->render_buffer.pass_stride,
|
||||||
&task->render_buffer.denoising_data_offset};
|
&task->render_buffer.offset};
|
||||||
CUDA_LAUNCH_KERNEL(cuFilterGetFeature, args);
|
CUDA_LAUNCH_KERNEL(cuFilterGetFeature, args);
|
||||||
cuda_assert(cuCtxSynchronize());
|
cuda_assert(cuCtxSynchronize());
|
||||||
|
|
||||||
@@ -1613,7 +1601,7 @@ public:
|
|||||||
return !have_error();
|
return !have_error();
|
||||||
}
|
}
|
||||||
|
|
||||||
void denoise(RenderTile &rtile, DenoisingTask& denoising, const DeviceTask &task)
|
void denoise(RenderTile &rtile, DenoisingTask& denoising)
|
||||||
{
|
{
|
||||||
denoising.functions.construct_transform = function_bind(&CUDADevice::denoising_construct_transform, this, &denoising);
|
denoising.functions.construct_transform = function_bind(&CUDADevice::denoising_construct_transform, this, &denoising);
|
||||||
denoising.functions.reconstruct = function_bind(&CUDADevice::denoising_reconstruct, this, _1, _2, _3, &denoising);
|
denoising.functions.reconstruct = function_bind(&CUDADevice::denoising_reconstruct, this, _1, _2, _3, &denoising);
|
||||||
@@ -1622,21 +1610,11 @@ public:
|
|||||||
denoising.functions.combine_halves = function_bind(&CUDADevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
|
denoising.functions.combine_halves = function_bind(&CUDADevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
|
||||||
denoising.functions.get_feature = function_bind(&CUDADevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising);
|
denoising.functions.get_feature = function_bind(&CUDADevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising);
|
||||||
denoising.functions.detect_outliers = function_bind(&CUDADevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
|
denoising.functions.detect_outliers = function_bind(&CUDADevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
|
||||||
denoising.functions.set_tiles = function_bind(&CUDADevice::denoising_set_tiles, this, _1, &denoising);
|
|
||||||
|
|
||||||
denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h);
|
denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h);
|
||||||
denoising.render_buffer.samples = rtile.sample;
|
denoising.render_buffer.samples = rtile.sample;
|
||||||
|
|
||||||
RenderTile rtiles[9];
|
denoising.run_denoising(&rtile);
|
||||||
rtiles[4] = rtile;
|
|
||||||
task.map_neighbor_tiles(rtiles, this);
|
|
||||||
denoising.tiles_from_rendertiles(rtiles);
|
|
||||||
|
|
||||||
denoising.init_from_devicetask(task);
|
|
||||||
|
|
||||||
denoising.run_denoising();
|
|
||||||
|
|
||||||
task.unmap_neighbor_tiles(rtiles, this);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void path_trace(DeviceTask& task, RenderTile& rtile, device_vector<WorkTile>& work_tiles)
|
void path_trace(DeviceTask& task, RenderTile& rtile, device_vector<WorkTile>& work_tiles)
|
||||||
@@ -2092,7 +2070,7 @@ public:
|
|||||||
|
|
||||||
/* keep rendering tiles until done */
|
/* keep rendering tiles until done */
|
||||||
RenderTile tile;
|
RenderTile tile;
|
||||||
DenoisingTask denoising(this);
|
DenoisingTask denoising(this, *task);
|
||||||
|
|
||||||
while(task->acquire_tile(this, tile)) {
|
while(task->acquire_tile(this, tile)) {
|
||||||
if(tile.task == RenderTile::PATH_TRACE) {
|
if(tile.task == RenderTile::PATH_TRACE) {
|
||||||
@@ -2107,7 +2085,7 @@ public:
|
|||||||
else if(tile.task == RenderTile::DENOISE) {
|
else if(tile.task == RenderTile::DENOISE) {
|
||||||
tile.sample = tile.start_sample + tile.num_samples;
|
tile.sample = tile.start_sample + tile.num_samples;
|
||||||
|
|
||||||
denoise(tile, denoising, *task);
|
denoise(tile, denoising);
|
||||||
|
|
||||||
task->update_progress(&tile, tile.w*tile.h);
|
task->update_progress(&tile, tile.w*tile.h);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,12 +20,29 @@
|
|||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
DenoisingTask::DenoisingTask(Device *device)
|
DenoisingTask::DenoisingTask(Device *device, const DeviceTask &task)
|
||||||
: tiles_mem(device, "denoising tiles_mem", MEM_READ_WRITE),
|
: tile_info_mem(device, "denoising tile info mem", MEM_READ_WRITE),
|
||||||
storage(device),
|
storage(device),
|
||||||
buffer(device),
|
buffer(device),
|
||||||
device(device)
|
device(device)
|
||||||
{
|
{
|
||||||
|
radius = task.denoising_radius;
|
||||||
|
nlm_k_2 = powf(2.0f, lerp(-5.0f, 3.0f, task.denoising_strength));
|
||||||
|
if(task.denoising_relative_pca) {
|
||||||
|
pca_threshold = -powf(10.0f, lerp(-8.0f, 0.0f, task.denoising_feature_strength));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
pca_threshold = powf(10.0f, lerp(-5.0f, 3.0f, task.denoising_feature_strength));
|
||||||
|
}
|
||||||
|
|
||||||
|
render_buffer.pass_stride = task.pass_stride;
|
||||||
|
render_buffer.offset = task.pass_denoising_data;
|
||||||
|
|
||||||
|
target_buffer.pass_stride = task.pass_stride;
|
||||||
|
target_buffer.denoising_clean_offset = task.pass_denoising_clean;
|
||||||
|
|
||||||
|
functions.map_neighbor_tiles = function_bind(task.map_neighbor_tiles, _1, device);
|
||||||
|
functions.unmap_neighbor_tiles = function_bind(task.unmap_neighbor_tiles, _1, device);
|
||||||
}
|
}
|
||||||
|
|
||||||
DenoisingTask::~DenoisingTask()
|
DenoisingTask::~DenoisingTask()
|
||||||
@@ -38,170 +55,170 @@ DenoisingTask::~DenoisingTask()
|
|||||||
storage.temporary_2.free();
|
storage.temporary_2.free();
|
||||||
storage.temporary_color.free();
|
storage.temporary_color.free();
|
||||||
buffer.mem.free();
|
buffer.mem.free();
|
||||||
tiles_mem.free();
|
tile_info_mem.free();
|
||||||
}
|
}
|
||||||
|
|
||||||
void DenoisingTask::init_from_devicetask(const DeviceTask &task)
|
void DenoisingTask::set_render_buffer(RenderTile *rtiles)
|
||||||
{
|
{
|
||||||
radius = task.denoising_radius;
|
tile_info = (TileInfo*) tile_info_mem.alloc(sizeof(TileInfo)/sizeof(int));
|
||||||
nlm_k_2 = powf(2.0f, lerp(-5.0f, 3.0f, task.denoising_strength));
|
|
||||||
if(task.denoising_relative_pca) {
|
|
||||||
pca_threshold = -powf(10.0f, lerp(-8.0f, 0.0f, task.denoising_feature_strength));
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
pca_threshold = powf(10.0f, lerp(-5.0f, 3.0f, task.denoising_feature_strength));
|
|
||||||
}
|
|
||||||
|
|
||||||
render_buffer.pass_stride = task.pass_stride;
|
for(int i = 0; i < 9; i++) {
|
||||||
render_buffer.denoising_data_offset = task.pass_denoising_data;
|
tile_info->offsets[i] = rtiles[i].offset;
|
||||||
render_buffer.denoising_clean_offset = task.pass_denoising_clean;
|
tile_info->strides[i] = rtiles[i].stride;
|
||||||
|
tile_info->buffers[i] = rtiles[i].buffer;
|
||||||
|
}
|
||||||
|
tile_info->x[0] = rtiles[3].x;
|
||||||
|
tile_info->x[1] = rtiles[4].x;
|
||||||
|
tile_info->x[2] = rtiles[5].x;
|
||||||
|
tile_info->x[3] = rtiles[5].x + rtiles[5].w;
|
||||||
|
tile_info->y[0] = rtiles[1].y;
|
||||||
|
tile_info->y[1] = rtiles[4].y;
|
||||||
|
tile_info->y[2] = rtiles[7].y;
|
||||||
|
tile_info->y[3] = rtiles[7].y + rtiles[7].h;
|
||||||
|
|
||||||
|
target_buffer.offset = rtiles[9].offset;
|
||||||
|
target_buffer.stride = rtiles[9].stride;
|
||||||
|
target_buffer.ptr = rtiles[9].buffer;
|
||||||
|
|
||||||
|
tile_info_mem.copy_to_device();
|
||||||
|
}
|
||||||
|
|
||||||
|
void DenoisingTask::setup_denoising_buffer()
|
||||||
|
{
|
||||||
/* Expand filter_area by radius pixels and clamp the result to the extent of the neighboring tiles */
|
/* Expand filter_area by radius pixels and clamp the result to the extent of the neighboring tiles */
|
||||||
rect = rect_from_shape(filter_area.x, filter_area.y, filter_area.z, filter_area.w);
|
rect = rect_from_shape(filter_area.x, filter_area.y, filter_area.z, filter_area.w);
|
||||||
rect = rect_expand(rect, radius);
|
rect = rect_expand(rect, radius);
|
||||||
rect = rect_clip(rect, make_int4(tiles->x[0], tiles->y[0], tiles->x[3], tiles->y[3]));
|
rect = rect_clip(rect, make_int4(tile_info->x[0], tile_info->y[0], tile_info->x[3], tile_info->y[3]));
|
||||||
}
|
|
||||||
|
|
||||||
void DenoisingTask::tiles_from_rendertiles(RenderTile *rtiles)
|
|
||||||
{
|
|
||||||
tiles = (TilesInfo*) tiles_mem.alloc(sizeof(TilesInfo)/sizeof(int));
|
|
||||||
|
|
||||||
device_ptr buffers[9];
|
|
||||||
for(int i = 0; i < 9; i++) {
|
|
||||||
buffers[i] = rtiles[i].buffer;
|
|
||||||
tiles->offsets[i] = rtiles[i].offset;
|
|
||||||
tiles->strides[i] = rtiles[i].stride;
|
|
||||||
}
|
|
||||||
tiles->x[0] = rtiles[3].x;
|
|
||||||
tiles->x[1] = rtiles[4].x;
|
|
||||||
tiles->x[2] = rtiles[5].x;
|
|
||||||
tiles->x[3] = rtiles[5].x + rtiles[5].w;
|
|
||||||
tiles->y[0] = rtiles[1].y;
|
|
||||||
tiles->y[1] = rtiles[4].y;
|
|
||||||
tiles->y[2] = rtiles[7].y;
|
|
||||||
tiles->y[3] = rtiles[7].y + rtiles[7].h;
|
|
||||||
|
|
||||||
render_buffer.offset = rtiles[4].offset;
|
|
||||||
render_buffer.stride = rtiles[4].stride;
|
|
||||||
render_buffer.ptr = rtiles[4].buffer;
|
|
||||||
|
|
||||||
functions.set_tiles(buffers);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DenoisingTask::run_denoising()
|
|
||||||
{
|
|
||||||
/* Allocate denoising buffer. */
|
|
||||||
buffer.passes = 14;
|
buffer.passes = 14;
|
||||||
buffer.width = rect.z - rect.x;
|
buffer.width = rect.z - rect.x;
|
||||||
buffer.stride = align_up(buffer.width, 4);
|
buffer.stride = align_up(buffer.width, 4);
|
||||||
buffer.h = rect.w - rect.y;
|
buffer.h = rect.w - rect.y;
|
||||||
buffer.pass_stride = align_up(buffer.stride * buffer.h, divide_up(device->mem_sub_ptr_alignment(), sizeof(float)));
|
int alignment_floats = divide_up(device->mem_sub_ptr_alignment(), sizeof(float));
|
||||||
buffer.mem.alloc_to_device(buffer.pass_stride * buffer.passes, false);
|
buffer.pass_stride = align_up(buffer.stride * buffer.h, alignment_floats);
|
||||||
|
/* Pad the total size by four floats since the SIMD kernels might go a bit over the end. */
|
||||||
|
int mem_size = align_up(buffer.pass_stride * buffer.passes + 4, alignment_floats);
|
||||||
|
buffer.mem.alloc_to_device(mem_size, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
void DenoisingTask::prefilter_shadowing()
|
||||||
|
{
|
||||||
device_ptr null_ptr = (device_ptr) 0;
|
device_ptr null_ptr = (device_ptr) 0;
|
||||||
|
|
||||||
/* Prefilter shadow feature. */
|
device_sub_ptr unfiltered_a (buffer.mem, 0, buffer.pass_stride);
|
||||||
{
|
device_sub_ptr unfiltered_b (buffer.mem, 1*buffer.pass_stride, buffer.pass_stride);
|
||||||
device_sub_ptr unfiltered_a (buffer.mem, 0, buffer.pass_stride);
|
device_sub_ptr sample_var (buffer.mem, 2*buffer.pass_stride, buffer.pass_stride);
|
||||||
device_sub_ptr unfiltered_b (buffer.mem, 1*buffer.pass_stride, buffer.pass_stride);
|
device_sub_ptr sample_var_var (buffer.mem, 3*buffer.pass_stride, buffer.pass_stride);
|
||||||
device_sub_ptr sample_var (buffer.mem, 2*buffer.pass_stride, buffer.pass_stride);
|
device_sub_ptr buffer_var (buffer.mem, 5*buffer.pass_stride, buffer.pass_stride);
|
||||||
device_sub_ptr sample_var_var (buffer.mem, 3*buffer.pass_stride, buffer.pass_stride);
|
device_sub_ptr filtered_var (buffer.mem, 6*buffer.pass_stride, buffer.pass_stride);
|
||||||
device_sub_ptr buffer_var (buffer.mem, 5*buffer.pass_stride, buffer.pass_stride);
|
device_sub_ptr nlm_temporary_1(buffer.mem, 7*buffer.pass_stride, buffer.pass_stride);
|
||||||
device_sub_ptr filtered_var (buffer.mem, 6*buffer.pass_stride, buffer.pass_stride);
|
device_sub_ptr nlm_temporary_2(buffer.mem, 8*buffer.pass_stride, buffer.pass_stride);
|
||||||
device_sub_ptr nlm_temporary_1(buffer.mem, 7*buffer.pass_stride, buffer.pass_stride);
|
device_sub_ptr nlm_temporary_3(buffer.mem, 9*buffer.pass_stride, buffer.pass_stride);
|
||||||
device_sub_ptr nlm_temporary_2(buffer.mem, 8*buffer.pass_stride, buffer.pass_stride);
|
|
||||||
device_sub_ptr nlm_temporary_3(buffer.mem, 9*buffer.pass_stride, buffer.pass_stride);
|
|
||||||
|
|
||||||
nlm_state.temporary_1_ptr = *nlm_temporary_1;
|
nlm_state.temporary_1_ptr = *nlm_temporary_1;
|
||||||
nlm_state.temporary_2_ptr = *nlm_temporary_2;
|
nlm_state.temporary_2_ptr = *nlm_temporary_2;
|
||||||
nlm_state.temporary_3_ptr = *nlm_temporary_3;
|
nlm_state.temporary_3_ptr = *nlm_temporary_3;
|
||||||
|
|
||||||
/* Get the A/B unfiltered passes, the combined sample variance, the estimated variance of the sample variance and the buffer variance. */
|
/* Get the A/B unfiltered passes, the combined sample variance, the estimated variance of the sample variance and the buffer variance. */
|
||||||
functions.divide_shadow(*unfiltered_a, *unfiltered_b, *sample_var, *sample_var_var, *buffer_var);
|
functions.divide_shadow(*unfiltered_a, *unfiltered_b, *sample_var, *sample_var_var, *buffer_var);
|
||||||
|
|
||||||
/* Smooth the (generally pretty noisy) buffer variance using the spatial information from the sample variance. */
|
/* Smooth the (generally pretty noisy) buffer variance using the spatial information from the sample variance. */
|
||||||
nlm_state.set_parameters(6, 3, 4.0f, 1.0f);
|
nlm_state.set_parameters(6, 3, 4.0f, 1.0f);
|
||||||
functions.non_local_means(*buffer_var, *sample_var, *sample_var_var, *filtered_var);
|
functions.non_local_means(*buffer_var, *sample_var, *sample_var_var, *filtered_var);
|
||||||
|
|
||||||
/* Reuse memory, the previous data isn't needed anymore. */
|
/* Reuse memory, the previous data isn't needed anymore. */
|
||||||
device_ptr filtered_a = *buffer_var,
|
device_ptr filtered_a = *buffer_var,
|
||||||
filtered_b = *sample_var;
|
filtered_b = *sample_var;
|
||||||
/* Use the smoothed variance to filter the two shadow half images using each other for weight calculation. */
|
/* Use the smoothed variance to filter the two shadow half images using each other for weight calculation. */
|
||||||
nlm_state.set_parameters(5, 3, 1.0f, 0.25f);
|
nlm_state.set_parameters(5, 3, 1.0f, 0.25f);
|
||||||
functions.non_local_means(*unfiltered_a, *unfiltered_b, *filtered_var, filtered_a);
|
functions.non_local_means(*unfiltered_a, *unfiltered_b, *filtered_var, filtered_a);
|
||||||
functions.non_local_means(*unfiltered_b, *unfiltered_a, *filtered_var, filtered_b);
|
functions.non_local_means(*unfiltered_b, *unfiltered_a, *filtered_var, filtered_b);
|
||||||
|
|
||||||
device_ptr residual_var = *sample_var_var;
|
device_ptr residual_var = *sample_var_var;
|
||||||
/* Estimate the residual variance between the two filtered halves. */
|
/* Estimate the residual variance between the two filtered halves. */
|
||||||
functions.combine_halves(filtered_a, filtered_b, null_ptr, residual_var, 2, rect);
|
functions.combine_halves(filtered_a, filtered_b, null_ptr, residual_var, 2, rect);
|
||||||
|
|
||||||
device_ptr final_a = *unfiltered_a,
|
device_ptr final_a = *unfiltered_a,
|
||||||
final_b = *unfiltered_b;
|
final_b = *unfiltered_b;
|
||||||
/* Use the residual variance for a second filter pass. */
|
/* Use the residual variance for a second filter pass. */
|
||||||
nlm_state.set_parameters(4, 2, 1.0f, 0.5f);
|
nlm_state.set_parameters(4, 2, 1.0f, 0.5f);
|
||||||
functions.non_local_means(filtered_a, filtered_b, residual_var, final_a);
|
functions.non_local_means(filtered_a, filtered_b, residual_var, final_a);
|
||||||
functions.non_local_means(filtered_b, filtered_a, residual_var, final_b);
|
functions.non_local_means(filtered_b, filtered_a, residual_var, final_b);
|
||||||
|
|
||||||
/* Combine the two double-filtered halves to a final shadow feature. */
|
/* Combine the two double-filtered halves to a final shadow feature. */
|
||||||
device_sub_ptr shadow_pass(buffer.mem, 4*buffer.pass_stride, buffer.pass_stride);
|
device_sub_ptr shadow_pass(buffer.mem, 4*buffer.pass_stride, buffer.pass_stride);
|
||||||
functions.combine_halves(final_a, final_b, *shadow_pass, null_ptr, 0, rect);
|
functions.combine_halves(final_a, final_b, *shadow_pass, null_ptr, 0, rect);
|
||||||
|
}
|
||||||
|
|
||||||
|
void DenoisingTask::prefilter_features()
|
||||||
|
{
|
||||||
|
device_sub_ptr unfiltered (buffer.mem, 8*buffer.pass_stride, buffer.pass_stride);
|
||||||
|
device_sub_ptr variance (buffer.mem, 9*buffer.pass_stride, buffer.pass_stride);
|
||||||
|
device_sub_ptr nlm_temporary_1(buffer.mem, 10*buffer.pass_stride, buffer.pass_stride);
|
||||||
|
device_sub_ptr nlm_temporary_2(buffer.mem, 11*buffer.pass_stride, buffer.pass_stride);
|
||||||
|
device_sub_ptr nlm_temporary_3(buffer.mem, 12*buffer.pass_stride, buffer.pass_stride);
|
||||||
|
|
||||||
|
nlm_state.temporary_1_ptr = *nlm_temporary_1;
|
||||||
|
nlm_state.temporary_2_ptr = *nlm_temporary_2;
|
||||||
|
nlm_state.temporary_3_ptr = *nlm_temporary_3;
|
||||||
|
|
||||||
|
int mean_from[] = { 0, 1, 2, 12, 6, 7, 8 };
|
||||||
|
int variance_from[] = { 3, 4, 5, 13, 9, 10, 11};
|
||||||
|
int pass_to[] = { 1, 2, 3, 0, 5, 6, 7};
|
||||||
|
for(int pass = 0; pass < 7; pass++) {
|
||||||
|
device_sub_ptr feature_pass(buffer.mem, pass_to[pass]*buffer.pass_stride, buffer.pass_stride);
|
||||||
|
/* Get the unfiltered pass and its variance from the RenderBuffers. */
|
||||||
|
functions.get_feature(mean_from[pass], variance_from[pass], *unfiltered, *variance);
|
||||||
|
/* Smooth the pass and store the result in the denoising buffers. */
|
||||||
|
nlm_state.set_parameters(2, 2, 1.0f, 0.25f);
|
||||||
|
functions.non_local_means(*unfiltered, *unfiltered, *variance, *feature_pass);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DenoisingTask::prefilter_color()
|
||||||
|
{
|
||||||
|
int mean_from[] = {20, 21, 22};
|
||||||
|
int variance_from[] = {23, 24, 25};
|
||||||
|
int mean_to[] = { 8, 9, 10};
|
||||||
|
int variance_to[] = {11, 12, 13};
|
||||||
|
int num_color_passes = 3;
|
||||||
|
|
||||||
|
storage.temporary_color.alloc_to_device(3*buffer.pass_stride, false);
|
||||||
|
device_sub_ptr nlm_temporary_1(storage.temporary_color, 0*buffer.pass_stride, buffer.pass_stride);
|
||||||
|
device_sub_ptr nlm_temporary_2(storage.temporary_color, 1*buffer.pass_stride, buffer.pass_stride);
|
||||||
|
device_sub_ptr nlm_temporary_3(storage.temporary_color, 2*buffer.pass_stride, buffer.pass_stride);
|
||||||
|
|
||||||
|
nlm_state.temporary_1_ptr = *nlm_temporary_1;
|
||||||
|
nlm_state.temporary_2_ptr = *nlm_temporary_2;
|
||||||
|
nlm_state.temporary_3_ptr = *nlm_temporary_3;
|
||||||
|
|
||||||
|
for(int pass = 0; pass < num_color_passes; pass++) {
|
||||||
|
device_sub_ptr color_pass(storage.temporary_color, pass*buffer.pass_stride, buffer.pass_stride);
|
||||||
|
device_sub_ptr color_var_pass(buffer.mem, variance_to[pass]*buffer.pass_stride, buffer.pass_stride);
|
||||||
|
functions.get_feature(mean_from[pass], variance_from[pass], *color_pass, *color_var_pass);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Prefilter general features. */
|
device_sub_ptr depth_pass (buffer.mem, 0, buffer.pass_stride);
|
||||||
{
|
device_sub_ptr color_var_pass(buffer.mem, variance_to[0]*buffer.pass_stride, 3*buffer.pass_stride);
|
||||||
device_sub_ptr unfiltered (buffer.mem, 8*buffer.pass_stride, buffer.pass_stride);
|
device_sub_ptr output_pass (buffer.mem, mean_to[0]*buffer.pass_stride, 3*buffer.pass_stride);
|
||||||
device_sub_ptr variance (buffer.mem, 9*buffer.pass_stride, buffer.pass_stride);
|
functions.detect_outliers(storage.temporary_color.device_pointer, *color_var_pass, *depth_pass, *output_pass);
|
||||||
device_sub_ptr nlm_temporary_1(buffer.mem, 10*buffer.pass_stride, buffer.pass_stride);
|
|
||||||
device_sub_ptr nlm_temporary_2(buffer.mem, 11*buffer.pass_stride, buffer.pass_stride);
|
|
||||||
device_sub_ptr nlm_temporary_3(buffer.mem, 12*buffer.pass_stride, buffer.pass_stride);
|
|
||||||
|
|
||||||
nlm_state.temporary_1_ptr = *nlm_temporary_1;
|
storage.temporary_color.free();
|
||||||
nlm_state.temporary_2_ptr = *nlm_temporary_2;
|
}
|
||||||
nlm_state.temporary_3_ptr = *nlm_temporary_3;
|
|
||||||
|
|
||||||
int mean_from[] = { 0, 1, 2, 12, 6, 7, 8 };
|
|
||||||
int variance_from[] = { 3, 4, 5, 13, 9, 10, 11};
|
|
||||||
int pass_to[] = { 1, 2, 3, 0, 5, 6, 7};
|
|
||||||
for(int pass = 0; pass < 7; pass++) {
|
|
||||||
device_sub_ptr feature_pass(buffer.mem, pass_to[pass]*buffer.pass_stride, buffer.pass_stride);
|
|
||||||
/* Get the unfiltered pass and its variance from the RenderBuffers. */
|
|
||||||
functions.get_feature(mean_from[pass], variance_from[pass], *unfiltered, *variance);
|
|
||||||
/* Smooth the pass and store the result in the denoising buffers. */
|
|
||||||
nlm_state.set_parameters(2, 2, 1.0f, 0.25f);
|
|
||||||
functions.non_local_means(*unfiltered, *unfiltered, *variance, *feature_pass);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Copy color passes. */
|
|
||||||
{
|
|
||||||
int mean_from[] = {20, 21, 22};
|
|
||||||
int variance_from[] = {23, 24, 25};
|
|
||||||
int mean_to[] = { 8, 9, 10};
|
|
||||||
int variance_to[] = {11, 12, 13};
|
|
||||||
int num_color_passes = 3;
|
|
||||||
|
|
||||||
storage.temporary_color.alloc_to_device(3*buffer.pass_stride, false);
|
|
||||||
|
|
||||||
for(int pass = 0; pass < num_color_passes; pass++) {
|
|
||||||
device_sub_ptr color_pass(storage.temporary_color, pass*buffer.pass_stride, buffer.pass_stride);
|
|
||||||
device_sub_ptr color_var_pass(buffer.mem, variance_to[pass]*buffer.pass_stride, buffer.pass_stride);
|
|
||||||
functions.get_feature(mean_from[pass], variance_from[pass], *color_pass, *color_var_pass);
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
device_sub_ptr depth_pass (buffer.mem, 0, buffer.pass_stride);
|
|
||||||
device_sub_ptr color_var_pass(buffer.mem, variance_to[0]*buffer.pass_stride, 3*buffer.pass_stride);
|
|
||||||
device_sub_ptr output_pass (buffer.mem, mean_to[0]*buffer.pass_stride, 3*buffer.pass_stride);
|
|
||||||
functions.detect_outliers(storage.temporary_color.device_pointer, *color_var_pass, *depth_pass, *output_pass);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
void DenoisingTask::construct_transform()
|
||||||
|
{
|
||||||
storage.w = filter_area.z;
|
storage.w = filter_area.z;
|
||||||
storage.h = filter_area.w;
|
storage.h = filter_area.w;
|
||||||
|
|
||||||
storage.transform.alloc_to_device(storage.w*storage.h*TRANSFORM_SIZE, false);
|
storage.transform.alloc_to_device(storage.w*storage.h*TRANSFORM_SIZE, false);
|
||||||
storage.rank.alloc_to_device(storage.w*storage.h, false);
|
storage.rank.alloc_to_device(storage.w*storage.h, false);
|
||||||
|
|
||||||
functions.construct_transform();
|
functions.construct_transform();
|
||||||
|
}
|
||||||
|
|
||||||
|
void DenoisingTask::reconstruct()
|
||||||
|
{
|
||||||
|
|
||||||
device_only_memory<float> temporary_1(device, "Denoising NLM temporary 1");
|
device_only_memory<float> temporary_1(device, "Denoising NLM temporary 1");
|
||||||
device_only_memory<float> temporary_2(device, "Denoising NLM temporary 2");
|
device_only_memory<float> temporary_2(device, "Denoising NLM temporary 2");
|
||||||
@@ -214,21 +231,36 @@ bool DenoisingTask::run_denoising()
|
|||||||
storage.XtWY.alloc_to_device(storage.w*storage.h*XTWY_SIZE, false);
|
storage.XtWY.alloc_to_device(storage.w*storage.h*XTWY_SIZE, false);
|
||||||
|
|
||||||
reconstruction_state.filter_window = rect_from_shape(filter_area.x-rect.x, filter_area.y-rect.y, storage.w, storage.h);
|
reconstruction_state.filter_window = rect_from_shape(filter_area.x-rect.x, filter_area.y-rect.y, storage.w, storage.h);
|
||||||
int tile_coordinate_offset = filter_area.y*render_buffer.stride + filter_area.x;
|
int tile_coordinate_offset = filter_area.y*target_buffer.stride + filter_area.x;
|
||||||
reconstruction_state.buffer_params = make_int4(render_buffer.offset + tile_coordinate_offset,
|
reconstruction_state.buffer_params = make_int4(target_buffer.offset + tile_coordinate_offset,
|
||||||
render_buffer.stride,
|
target_buffer.stride,
|
||||||
render_buffer.pass_stride,
|
target_buffer.pass_stride,
|
||||||
render_buffer.denoising_clean_offset);
|
target_buffer.denoising_clean_offset);
|
||||||
reconstruction_state.source_w = rect.z-rect.x;
|
reconstruction_state.source_w = rect.z-rect.x;
|
||||||
reconstruction_state.source_h = rect.w-rect.y;
|
reconstruction_state.source_h = rect.w-rect.y;
|
||||||
|
|
||||||
{
|
device_sub_ptr color_ptr (buffer.mem, 8*buffer.pass_stride, 3*buffer.pass_stride);
|
||||||
device_sub_ptr color_ptr (buffer.mem, 8*buffer.pass_stride, 3*buffer.pass_stride);
|
device_sub_ptr color_var_ptr(buffer.mem, 11*buffer.pass_stride, 3*buffer.pass_stride);
|
||||||
device_sub_ptr color_var_ptr(buffer.mem, 11*buffer.pass_stride, 3*buffer.pass_stride);
|
functions.reconstruct(*color_ptr, *color_var_ptr, target_buffer.ptr);
|
||||||
functions.reconstruct(*color_ptr, *color_var_ptr, render_buffer.ptr);
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
void DenoisingTask::run_denoising(RenderTile *tile)
|
||||||
|
{
|
||||||
|
RenderTile rtiles[10];
|
||||||
|
rtiles[4] = *tile;
|
||||||
|
functions.map_neighbor_tiles(rtiles);
|
||||||
|
set_render_buffer(rtiles);
|
||||||
|
|
||||||
|
setup_denoising_buffer();
|
||||||
|
|
||||||
|
prefilter_shadowing();
|
||||||
|
prefilter_features();
|
||||||
|
prefilter_color();
|
||||||
|
|
||||||
|
construct_transform();
|
||||||
|
reconstruct();
|
||||||
|
|
||||||
|
functions.unmap_neighbor_tiles(rtiles);
|
||||||
}
|
}
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
|||||||
@@ -32,20 +32,24 @@ public:
|
|||||||
float nlm_k_2;
|
float nlm_k_2;
|
||||||
float pca_threshold;
|
float pca_threshold;
|
||||||
|
|
||||||
/* Pointer and parameters of the RenderBuffers. */
|
/* Parameters of the RenderBuffers. */
|
||||||
struct RenderBuffers {
|
struct RenderBuffers {
|
||||||
int denoising_data_offset;
|
|
||||||
int denoising_clean_offset;
|
|
||||||
int pass_stride;
|
|
||||||
int offset;
|
int offset;
|
||||||
int stride;
|
int pass_stride;
|
||||||
device_ptr ptr;
|
|
||||||
int samples;
|
int samples;
|
||||||
} render_buffer;
|
} render_buffer;
|
||||||
|
|
||||||
TilesInfo *tiles;
|
/* Pointer and parameters of the target buffer. */
|
||||||
device_vector<int> tiles_mem;
|
struct TargetBuffer {
|
||||||
void tiles_from_rendertiles(RenderTile *rtiles);
|
int offset;
|
||||||
|
int stride;
|
||||||
|
int pass_stride;
|
||||||
|
int denoising_clean_offset;
|
||||||
|
device_ptr ptr;
|
||||||
|
} target_buffer;
|
||||||
|
|
||||||
|
TileInfo *tile_info;
|
||||||
|
device_vector<int> tile_info_mem;
|
||||||
|
|
||||||
int4 rect;
|
int4 rect;
|
||||||
int4 filter_area;
|
int4 filter_area;
|
||||||
@@ -85,7 +89,8 @@ public:
|
|||||||
device_ptr depth_ptr,
|
device_ptr depth_ptr,
|
||||||
device_ptr output_ptr
|
device_ptr output_ptr
|
||||||
)> detect_outliers;
|
)> detect_outliers;
|
||||||
function<bool(device_ptr*)> set_tiles;
|
function<void(RenderTile *rtiles)> map_neighbor_tiles;
|
||||||
|
function<void(RenderTile *rtiles)> unmap_neighbor_tiles;
|
||||||
} functions;
|
} functions;
|
||||||
|
|
||||||
/* Stores state of the current Reconstruction operation,
|
/* Stores state of the current Reconstruction operation,
|
||||||
@@ -138,12 +143,10 @@ public:
|
|||||||
{}
|
{}
|
||||||
} storage;
|
} storage;
|
||||||
|
|
||||||
DenoisingTask(Device *device);
|
DenoisingTask(Device *device, const DeviceTask &task);
|
||||||
~DenoisingTask();
|
~DenoisingTask();
|
||||||
|
|
||||||
void init_from_devicetask(const DeviceTask &task);
|
void run_denoising(RenderTile *tile);
|
||||||
|
|
||||||
bool run_denoising();
|
|
||||||
|
|
||||||
struct DenoiseBuffers {
|
struct DenoiseBuffers {
|
||||||
int pass_stride;
|
int pass_stride;
|
||||||
@@ -160,6 +163,14 @@ public:
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
Device *device;
|
Device *device;
|
||||||
|
|
||||||
|
void set_render_buffer(RenderTile *rtiles);
|
||||||
|
void setup_denoising_buffer();
|
||||||
|
void prefilter_shadowing();
|
||||||
|
void prefilter_features();
|
||||||
|
void prefilter_color();
|
||||||
|
void construct_transform();
|
||||||
|
void reconstruct();
|
||||||
};
|
};
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
|||||||
@@ -104,6 +104,26 @@ void device_memory::device_zero()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void device_memory::swap_device(Device *new_device,
|
||||||
|
size_t new_device_size,
|
||||||
|
device_ptr new_device_ptr)
|
||||||
|
{
|
||||||
|
original_device = device;
|
||||||
|
original_device_size = device_size;
|
||||||
|
original_device_ptr = device_pointer;
|
||||||
|
|
||||||
|
device = new_device;
|
||||||
|
device_size = new_device_size;
|
||||||
|
device_pointer = new_device_ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void device_memory::restore_device()
|
||||||
|
{
|
||||||
|
device = original_device;
|
||||||
|
device_size = original_device_size;
|
||||||
|
device_pointer = original_device_ptr;
|
||||||
|
}
|
||||||
|
|
||||||
/* Device Sub Ptr */
|
/* Device Sub Ptr */
|
||||||
|
|
||||||
device_sub_ptr::device_sub_ptr(device_memory& mem, int offset, int size)
|
device_sub_ptr::device_sub_ptr(device_memory& mem, int offset, int size)
|
||||||
|
|||||||
@@ -200,6 +200,9 @@ public:
|
|||||||
|
|
||||||
virtual ~device_memory();
|
virtual ~device_memory();
|
||||||
|
|
||||||
|
void swap_device(Device *new_device, size_t new_device_size, device_ptr new_device_ptr);
|
||||||
|
void restore_device();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
friend class CUDADevice;
|
friend class CUDADevice;
|
||||||
|
|
||||||
@@ -222,6 +225,10 @@ protected:
|
|||||||
void device_copy_to();
|
void device_copy_to();
|
||||||
void device_copy_from(int y, int w, int h, int elem);
|
void device_copy_from(int y, int w, int h, int elem);
|
||||||
void device_zero();
|
void device_zero();
|
||||||
|
|
||||||
|
device_ptr original_device_ptr;
|
||||||
|
size_t original_device_size;
|
||||||
|
Device *original_device;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Device Only Memory
|
/* Device Only Memory
|
||||||
|
|||||||
@@ -285,26 +285,27 @@ public:
|
|||||||
mem.copy_from_device(0, mem.data_size, 1);
|
mem.copy_from_device(0, mem.data_size, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
Device *original_device = mem.device;
|
mem.swap_device(sub_device, 0, 0);
|
||||||
device_ptr original_ptr = mem.device_pointer;
|
|
||||||
size_t original_size = mem.device_size;
|
|
||||||
|
|
||||||
mem.device = sub_device;
|
|
||||||
mem.device_pointer = 0;
|
|
||||||
mem.device_size = 0;
|
|
||||||
|
|
||||||
mem.copy_to_device();
|
mem.copy_to_device();
|
||||||
tiles[i].buffer = mem.device_pointer;
|
tiles[i].buffer = mem.device_pointer;
|
||||||
|
tiles[i].device_size = mem.device_size;
|
||||||
|
|
||||||
mem.device = original_device;
|
mem.restore_device();
|
||||||
mem.device_pointer = original_ptr;
|
|
||||||
mem.device_size = original_size;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void unmap_neighbor_tiles(Device * sub_device, RenderTile * tiles)
|
void unmap_neighbor_tiles(Device * sub_device, RenderTile * tiles)
|
||||||
{
|
{
|
||||||
|
/* Copy denoised result back to the host. */
|
||||||
|
device_vector<float> &mem = tiles[9].buffers->buffer;
|
||||||
|
mem.swap_device(sub_device, tiles[9].device_size, tiles[9].buffer);
|
||||||
|
mem.copy_from_device(0, mem.data_size, 1);
|
||||||
|
mem.restore_device();
|
||||||
|
/* Copy denoised result to the original device. */
|
||||||
|
mem.copy_to_device();
|
||||||
|
|
||||||
for(int i = 0; i < 9; i++) {
|
for(int i = 0; i < 9; i++) {
|
||||||
if(!tiles[i].buffers) {
|
if(!tiles[i].buffers) {
|
||||||
continue;
|
continue;
|
||||||
@@ -312,28 +313,9 @@ public:
|
|||||||
|
|
||||||
device_vector<float> &mem = tiles[i].buffers->buffer;
|
device_vector<float> &mem = tiles[i].buffers->buffer;
|
||||||
if(mem.device != sub_device) {
|
if(mem.device != sub_device) {
|
||||||
Device *original_device = mem.device;
|
mem.swap_device(sub_device, tiles[i].device_size, tiles[i].buffer);
|
||||||
device_ptr original_ptr = mem.device_pointer;
|
|
||||||
size_t original_size = mem.device_size;
|
|
||||||
|
|
||||||
mem.device = sub_device;
|
|
||||||
mem.device_pointer = tiles[i].buffer;
|
|
||||||
|
|
||||||
/* Copy denoised tile to the host. */
|
|
||||||
if(i == 4) {
|
|
||||||
mem.copy_from_device(0, mem.data_size, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
sub_device->mem_free(mem);
|
sub_device->mem_free(mem);
|
||||||
|
mem.restore_device();
|
||||||
mem.device = original_device;
|
|
||||||
mem.device_pointer = original_ptr;
|
|
||||||
mem.device_size = original_size;
|
|
||||||
|
|
||||||
/* Copy denoised tile to the original device. */
|
|
||||||
if(i == 4) {
|
|
||||||
mem.copy_to_device();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -362,7 +362,7 @@ public:
|
|||||||
void film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half);
|
void film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half);
|
||||||
void shader(DeviceTask& task);
|
void shader(DeviceTask& task);
|
||||||
|
|
||||||
void denoise(RenderTile& tile, DenoisingTask& denoising, const DeviceTask& task);
|
void denoise(RenderTile& tile, DenoisingTask& denoising);
|
||||||
|
|
||||||
class OpenCLDeviceTask : public DeviceTask {
|
class OpenCLDeviceTask : public DeviceTask {
|
||||||
public:
|
public:
|
||||||
@@ -436,8 +436,6 @@ protected:
|
|||||||
device_ptr depth_ptr,
|
device_ptr depth_ptr,
|
||||||
device_ptr output_ptr,
|
device_ptr output_ptr,
|
||||||
DenoisingTask *task);
|
DenoisingTask *task);
|
||||||
bool denoising_set_tiles(device_ptr *buffers,
|
|
||||||
DenoisingTask *task);
|
|
||||||
|
|
||||||
device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int size);
|
device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int size);
|
||||||
void mem_free_sub_ptr(device_ptr ptr);
|
void mem_free_sub_ptr(device_ptr ptr);
|
||||||
|
|||||||
@@ -246,7 +246,6 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
|
|||||||
denoising_program.add_kernel(ustring("filter_nlm_normalize"));
|
denoising_program.add_kernel(ustring("filter_nlm_normalize"));
|
||||||
denoising_program.add_kernel(ustring("filter_nlm_construct_gramian"));
|
denoising_program.add_kernel(ustring("filter_nlm_construct_gramian"));
|
||||||
denoising_program.add_kernel(ustring("filter_finalize"));
|
denoising_program.add_kernel(ustring("filter_finalize"));
|
||||||
denoising_program.add_kernel(ustring("filter_set_tiles"));
|
|
||||||
|
|
||||||
vector<OpenCLProgram*> programs;
|
vector<OpenCLProgram*> programs;
|
||||||
programs.push_back(&base_program);
|
programs.push_back(&base_program);
|
||||||
@@ -977,13 +976,20 @@ bool OpenCLDeviceBase::denoising_divide_shadow(device_ptr a_ptr,
|
|||||||
cl_mem sv_variance_mem = CL_MEM_PTR(sv_variance_ptr);
|
cl_mem sv_variance_mem = CL_MEM_PTR(sv_variance_ptr);
|
||||||
cl_mem buffer_variance_mem = CL_MEM_PTR(buffer_variance_ptr);
|
cl_mem buffer_variance_mem = CL_MEM_PTR(buffer_variance_ptr);
|
||||||
|
|
||||||
cl_mem tiles_mem = CL_MEM_PTR(task->tiles_mem.device_pointer);
|
cl_mem tile_info_mem = CL_MEM_PTR(task->tile_info_mem.device_pointer);
|
||||||
|
|
||||||
cl_kernel ckFilterDivideShadow = denoising_program(ustring("filter_divide_shadow"));
|
cl_kernel ckFilterDivideShadow = denoising_program(ustring("filter_divide_shadow"));
|
||||||
|
|
||||||
kernel_set_args(ckFilterDivideShadow, 0,
|
int arg_ofs = kernel_set_args(ckFilterDivideShadow, 0,
|
||||||
task->render_buffer.samples,
|
task->render_buffer.samples,
|
||||||
tiles_mem,
|
tile_info_mem);
|
||||||
|
cl_mem buffers[9];
|
||||||
|
for(int i = 0; i < 9; i++) {
|
||||||
|
buffers[i] = CL_MEM_PTR(task->tile_info->buffers[i]);
|
||||||
|
arg_ofs += kernel_set_args(ckFilterDivideShadow, arg_ofs,
|
||||||
|
buffers[i]);
|
||||||
|
}
|
||||||
|
kernel_set_args(ckFilterDivideShadow, arg_ofs,
|
||||||
a_mem,
|
a_mem,
|
||||||
b_mem,
|
b_mem,
|
||||||
sample_variance_mem,
|
sample_variance_mem,
|
||||||
@@ -991,7 +997,7 @@ bool OpenCLDeviceBase::denoising_divide_shadow(device_ptr a_ptr,
|
|||||||
buffer_variance_mem,
|
buffer_variance_mem,
|
||||||
task->rect,
|
task->rect,
|
||||||
task->render_buffer.pass_stride,
|
task->render_buffer.pass_stride,
|
||||||
task->render_buffer.denoising_data_offset);
|
task->render_buffer.offset);
|
||||||
enqueue_kernel(ckFilterDivideShadow,
|
enqueue_kernel(ckFilterDivideShadow,
|
||||||
task->rect.z-task->rect.x,
|
task->rect.z-task->rect.x,
|
||||||
task->rect.w-task->rect.y);
|
task->rect.w-task->rect.y);
|
||||||
@@ -1008,20 +1014,27 @@ bool OpenCLDeviceBase::denoising_get_feature(int mean_offset,
|
|||||||
cl_mem mean_mem = CL_MEM_PTR(mean_ptr);
|
cl_mem mean_mem = CL_MEM_PTR(mean_ptr);
|
||||||
cl_mem variance_mem = CL_MEM_PTR(variance_ptr);
|
cl_mem variance_mem = CL_MEM_PTR(variance_ptr);
|
||||||
|
|
||||||
cl_mem tiles_mem = CL_MEM_PTR(task->tiles_mem.device_pointer);
|
cl_mem tile_info_mem = CL_MEM_PTR(task->tile_info_mem.device_pointer);
|
||||||
|
|
||||||
cl_kernel ckFilterGetFeature = denoising_program(ustring("filter_get_feature"));
|
cl_kernel ckFilterGetFeature = denoising_program(ustring("filter_get_feature"));
|
||||||
|
|
||||||
kernel_set_args(ckFilterGetFeature, 0,
|
int arg_ofs = kernel_set_args(ckFilterGetFeature, 0,
|
||||||
task->render_buffer.samples,
|
task->render_buffer.samples,
|
||||||
tiles_mem,
|
tile_info_mem);
|
||||||
|
cl_mem buffers[9];
|
||||||
|
for(int i = 0; i < 9; i++) {
|
||||||
|
buffers[i] = CL_MEM_PTR(task->tile_info->buffers[i]);
|
||||||
|
arg_ofs += kernel_set_args(ckFilterGetFeature, arg_ofs,
|
||||||
|
buffers[i]);
|
||||||
|
}
|
||||||
|
kernel_set_args(ckFilterGetFeature, arg_ofs,
|
||||||
mean_offset,
|
mean_offset,
|
||||||
variance_offset,
|
variance_offset,
|
||||||
mean_mem,
|
mean_mem,
|
||||||
variance_mem,
|
variance_mem,
|
||||||
task->rect,
|
task->rect,
|
||||||
task->render_buffer.pass_stride,
|
task->render_buffer.pass_stride,
|
||||||
task->render_buffer.denoising_data_offset);
|
task->render_buffer.offset);
|
||||||
enqueue_kernel(ckFilterGetFeature,
|
enqueue_kernel(ckFilterGetFeature,
|
||||||
task->rect.z-task->rect.x,
|
task->rect.z-task->rect.x,
|
||||||
task->rect.w-task->rect.y);
|
task->rect.w-task->rect.y);
|
||||||
@@ -1056,29 +1069,8 @@ bool OpenCLDeviceBase::denoising_detect_outliers(device_ptr image_ptr,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool OpenCLDeviceBase::denoising_set_tiles(device_ptr *buffers,
|
void OpenCLDeviceBase::denoise(RenderTile &rtile, DenoisingTask& denoising)
|
||||||
DenoisingTask *task)
|
|
||||||
{
|
{
|
||||||
task->tiles_mem.copy_to_device();
|
|
||||||
|
|
||||||
cl_mem tiles_mem = CL_MEM_PTR(task->tiles_mem.device_pointer);
|
|
||||||
|
|
||||||
cl_kernel ckFilterSetTiles = denoising_program(ustring("filter_set_tiles"));
|
|
||||||
|
|
||||||
kernel_set_args(ckFilterSetTiles, 0, tiles_mem);
|
|
||||||
for(int i = 0; i < 9; i++) {
|
|
||||||
cl_mem buffer_mem = CL_MEM_PTR(buffers[i]);
|
|
||||||
kernel_set_args(ckFilterSetTiles, i+1, buffer_mem);
|
|
||||||
}
|
|
||||||
|
|
||||||
enqueue_kernel(ckFilterSetTiles, 1, 1);
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void OpenCLDeviceBase::denoise(RenderTile &rtile, DenoisingTask& denoising, const DeviceTask &task)
|
|
||||||
{
|
|
||||||
denoising.functions.set_tiles = function_bind(&OpenCLDeviceBase::denoising_set_tiles, this, _1, &denoising);
|
|
||||||
denoising.functions.construct_transform = function_bind(&OpenCLDeviceBase::denoising_construct_transform, this, &denoising);
|
denoising.functions.construct_transform = function_bind(&OpenCLDeviceBase::denoising_construct_transform, this, &denoising);
|
||||||
denoising.functions.reconstruct = function_bind(&OpenCLDeviceBase::denoising_reconstruct, this, _1, _2, _3, &denoising);
|
denoising.functions.reconstruct = function_bind(&OpenCLDeviceBase::denoising_reconstruct, this, _1, _2, _3, &denoising);
|
||||||
denoising.functions.divide_shadow = function_bind(&OpenCLDeviceBase::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
|
denoising.functions.divide_shadow = function_bind(&OpenCLDeviceBase::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
|
||||||
@@ -1090,16 +1082,7 @@ void OpenCLDeviceBase::denoise(RenderTile &rtile, DenoisingTask& denoising, cons
|
|||||||
denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h);
|
denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h);
|
||||||
denoising.render_buffer.samples = rtile.sample;
|
denoising.render_buffer.samples = rtile.sample;
|
||||||
|
|
||||||
RenderTile rtiles[9];
|
denoising.run_denoising(&rtile);
|
||||||
rtiles[4] = rtile;
|
|
||||||
task.map_neighbor_tiles(rtiles, this);
|
|
||||||
denoising.tiles_from_rendertiles(rtiles);
|
|
||||||
|
|
||||||
denoising.init_from_devicetask(task);
|
|
||||||
|
|
||||||
denoising.run_denoising();
|
|
||||||
|
|
||||||
task.unmap_neighbor_tiles(rtiles, this);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void OpenCLDeviceBase::shader(DeviceTask& task)
|
void OpenCLDeviceBase::shader(DeviceTask& task)
|
||||||
|
|||||||
@@ -107,7 +107,7 @@ public:
|
|||||||
}
|
}
|
||||||
else if(task->type == DeviceTask::RENDER) {
|
else if(task->type == DeviceTask::RENDER) {
|
||||||
RenderTile tile;
|
RenderTile tile;
|
||||||
DenoisingTask denoising(this);
|
DenoisingTask denoising(this, *task);
|
||||||
|
|
||||||
/* Keep rendering tiles until done. */
|
/* Keep rendering tiles until done. */
|
||||||
while(task->acquire_tile(this, tile)) {
|
while(task->acquire_tile(this, tile)) {
|
||||||
@@ -141,7 +141,7 @@ public:
|
|||||||
}
|
}
|
||||||
else if(tile.task == RenderTile::DENOISE) {
|
else if(tile.task == RenderTile::DENOISE) {
|
||||||
tile.sample = tile.start_sample + tile.num_samples;
|
tile.sample = tile.start_sample + tile.num_samples;
|
||||||
denoise(tile, denoising, *task);
|
denoise(tile, denoising);
|
||||||
task->update_progress(&tile, tile.w*tile.h);
|
task->update_progress(&tile, tile.w*tile.h);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -129,7 +129,7 @@ public:
|
|||||||
}
|
}
|
||||||
else if(task->type == DeviceTask::RENDER) {
|
else if(task->type == DeviceTask::RENDER) {
|
||||||
RenderTile tile;
|
RenderTile tile;
|
||||||
DenoisingTask denoising(this);
|
DenoisingTask denoising(this, *task);
|
||||||
|
|
||||||
/* Allocate buffer for kernel globals */
|
/* Allocate buffer for kernel globals */
|
||||||
device_only_memory<KernelGlobalsDummy> kgbuffer(this, "kernel_globals");
|
device_only_memory<KernelGlobalsDummy> kgbuffer(this, "kernel_globals");
|
||||||
@@ -159,7 +159,7 @@ public:
|
|||||||
}
|
}
|
||||||
else if(tile.task == RenderTile::DENOISE) {
|
else if(tile.task == RenderTile::DENOISE) {
|
||||||
tile.sample = tile.start_sample + tile.num_samples;
|
tile.sample = tile.start_sample + tile.num_samples;
|
||||||
denoise(tile, denoising, *task);
|
denoise(tile, denoising);
|
||||||
task->update_progress(&tile, tile.w*tile.h);
|
task->update_progress(&tile, tile.w*tile.h);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -22,7 +22,7 @@
|
|||||||
#define XTWX_SIZE (((DENOISE_FEATURES+1)*(DENOISE_FEATURES+2))/2)
|
#define XTWX_SIZE (((DENOISE_FEATURES+1)*(DENOISE_FEATURES+2))/2)
|
||||||
#define XTWY_SIZE (DENOISE_FEATURES+1)
|
#define XTWY_SIZE (DENOISE_FEATURES+1)
|
||||||
|
|
||||||
typedef struct TilesInfo {
|
typedef struct TileInfo {
|
||||||
int offsets[9];
|
int offsets[9];
|
||||||
int strides[9];
|
int strides[9];
|
||||||
int x[4];
|
int x[4];
|
||||||
@@ -33,6 +33,31 @@ typedef struct TilesInfo {
|
|||||||
#else
|
#else
|
||||||
long long int buffers[9];
|
long long int buffers[9];
|
||||||
#endif
|
#endif
|
||||||
} TilesInfo;
|
} TileInfo;
|
||||||
|
|
||||||
|
#ifdef __KERNEL_OPENCL__
|
||||||
|
# define CCL_FILTER_TILE_INFO ccl_global TileInfo* tile_info, \
|
||||||
|
ccl_global float *tile_buffer_1, \
|
||||||
|
ccl_global float *tile_buffer_2, \
|
||||||
|
ccl_global float *tile_buffer_3, \
|
||||||
|
ccl_global float *tile_buffer_4, \
|
||||||
|
ccl_global float *tile_buffer_5, \
|
||||||
|
ccl_global float *tile_buffer_6, \
|
||||||
|
ccl_global float *tile_buffer_7, \
|
||||||
|
ccl_global float *tile_buffer_8, \
|
||||||
|
ccl_global float *tile_buffer_9
|
||||||
|
# define CCL_FILTER_TILE_INFO_ARG tile_info, \
|
||||||
|
tile_buffer_1, tile_buffer_2, tile_buffer_3, \
|
||||||
|
tile_buffer_4, tile_buffer_5, tile_buffer_6, \
|
||||||
|
tile_buffer_7, tile_buffer_8, tile_buffer_9
|
||||||
|
# define ccl_get_tile_buffer(id) (tile_buffer_ ## id)
|
||||||
|
#else
|
||||||
|
# ifdef __KERNEL_CUDA__
|
||||||
|
# define CCL_FILTER_TILE_INFO ccl_global TileInfo* tile_info
|
||||||
|
# else
|
||||||
|
# define CCL_FILTER_TILE_INFO TileInfo* tile_info
|
||||||
|
# endif
|
||||||
|
# define ccl_get_tile_buffer(id) (tile_info->buffers[id])
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* __FILTER_DEFINES_H__*/
|
#endif /* __FILTER_DEFINES_H__*/
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ CCL_NAMESPACE_BEGIN
|
|||||||
* bufferVariance: The buffer-based variance of the shadow feature. Unbiased, but quite noisy.
|
* bufferVariance: The buffer-based variance of the shadow feature. Unbiased, but quite noisy.
|
||||||
*/
|
*/
|
||||||
ccl_device void kernel_filter_divide_shadow(int sample,
|
ccl_device void kernel_filter_divide_shadow(int sample,
|
||||||
ccl_global TilesInfo *tiles,
|
CCL_FILTER_TILE_INFO,
|
||||||
int x, int y,
|
int x, int y,
|
||||||
ccl_global float *unfilteredA,
|
ccl_global float *unfilteredA,
|
||||||
ccl_global float *unfilteredB,
|
ccl_global float *unfilteredB,
|
||||||
@@ -37,13 +37,13 @@ ccl_device void kernel_filter_divide_shadow(int sample,
|
|||||||
int buffer_pass_stride,
|
int buffer_pass_stride,
|
||||||
int buffer_denoising_offset)
|
int buffer_denoising_offset)
|
||||||
{
|
{
|
||||||
int xtile = (x < tiles->x[1])? 0: ((x < tiles->x[2])? 1: 2);
|
int xtile = (x < tile_info->x[1])? 0: ((x < tile_info->x[2])? 1: 2);
|
||||||
int ytile = (y < tiles->y[1])? 0: ((y < tiles->y[2])? 1: 2);
|
int ytile = (y < tile_info->y[1])? 0: ((y < tile_info->y[2])? 1: 2);
|
||||||
int tile = ytile*3+xtile;
|
int tile = ytile*3+xtile;
|
||||||
|
|
||||||
int offset = tiles->offsets[tile];
|
int offset = tile_info->offsets[tile];
|
||||||
int stride = tiles->strides[tile];
|
int stride = tile_info->strides[tile];
|
||||||
const ccl_global float *ccl_restrict center_buffer = (ccl_global float*) tiles->buffers[tile];
|
const ccl_global float *ccl_restrict center_buffer = (ccl_global float*) ccl_get_tile_buffer(tile);
|
||||||
center_buffer += (y*stride + x + offset)*buffer_pass_stride;
|
center_buffer += (y*stride + x + offset)*buffer_pass_stride;
|
||||||
center_buffer += buffer_denoising_offset + 14;
|
center_buffer += buffer_denoising_offset + 14;
|
||||||
|
|
||||||
@@ -79,7 +79,7 @@ ccl_device void kernel_filter_divide_shadow(int sample,
|
|||||||
* - rect: The prefilter area (lower pixels inclusive, upper pixels exclusive).
|
* - rect: The prefilter area (lower pixels inclusive, upper pixels exclusive).
|
||||||
*/
|
*/
|
||||||
ccl_device void kernel_filter_get_feature(int sample,
|
ccl_device void kernel_filter_get_feature(int sample,
|
||||||
ccl_global TilesInfo *tiles,
|
CCL_FILTER_TILE_INFO,
|
||||||
int m_offset, int v_offset,
|
int m_offset, int v_offset,
|
||||||
int x, int y,
|
int x, int y,
|
||||||
ccl_global float *mean,
|
ccl_global float *mean,
|
||||||
@@ -87,10 +87,10 @@ ccl_device void kernel_filter_get_feature(int sample,
|
|||||||
int4 rect, int buffer_pass_stride,
|
int4 rect, int buffer_pass_stride,
|
||||||
int buffer_denoising_offset)
|
int buffer_denoising_offset)
|
||||||
{
|
{
|
||||||
int xtile = (x < tiles->x[1])? 0: ((x < tiles->x[2])? 1: 2);
|
int xtile = (x < tile_info->x[1])? 0: ((x < tile_info->x[2])? 1: 2);
|
||||||
int ytile = (y < tiles->y[1])? 0: ((y < tiles->y[2])? 1: 2);
|
int ytile = (y < tile_info->y[1])? 0: ((y < tile_info->y[2])? 1: 2);
|
||||||
int tile = ytile*3+xtile;
|
int tile = ytile*3+xtile;
|
||||||
ccl_global float *center_buffer = ((ccl_global float*) tiles->buffers[tile]) + (tiles->offsets[tile] + y*tiles->strides[tile] + x)*buffer_pass_stride + buffer_denoising_offset;
|
ccl_global float *center_buffer = ((ccl_global float*) ccl_get_tile_buffer(tile)) + (tile_info->offsets[tile] + y*tile_info->strides[tile] + x)*buffer_pass_stride + buffer_denoising_offset;
|
||||||
|
|
||||||
int buffer_w = align_up(rect.z - rect.x, 4);
|
int buffer_w = align_up(rect.z - rect.x, 4);
|
||||||
int idx = (y-rect.y)*buffer_w + (x - rect.x);
|
int idx = (y-rect.y)*buffer_w + (x - rect.x);
|
||||||
|
|||||||
@@ -17,7 +17,7 @@
|
|||||||
/* Templated common declaration part of all CPU kernels. */
|
/* Templated common declaration part of all CPU kernels. */
|
||||||
|
|
||||||
void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
|
void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
|
||||||
TilesInfo *tiles,
|
TileInfo *tile_info,
|
||||||
int x,
|
int x,
|
||||||
int y,
|
int y,
|
||||||
float *unfilteredA,
|
float *unfilteredA,
|
||||||
@@ -30,7 +30,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
|
|||||||
int buffer_denoising_offset);
|
int buffer_denoising_offset);
|
||||||
|
|
||||||
void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
|
void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
|
||||||
TilesInfo *tiles,
|
TileInfo *tile_info,
|
||||||
int m_offset,
|
int m_offset,
|
||||||
int v_offset,
|
int v_offset,
|
||||||
int x,
|
int x,
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ CCL_NAMESPACE_BEGIN
|
|||||||
/* Denoise filter */
|
/* Denoise filter */
|
||||||
|
|
||||||
void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
|
void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
|
||||||
TilesInfo *tiles,
|
TileInfo *tile_info,
|
||||||
int x,
|
int x,
|
||||||
int y,
|
int y,
|
||||||
float *unfilteredA,
|
float *unfilteredA,
|
||||||
@@ -49,7 +49,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
|
|||||||
#ifdef KERNEL_STUB
|
#ifdef KERNEL_STUB
|
||||||
STUB_ASSERT(KERNEL_ARCH, filter_divide_shadow);
|
STUB_ASSERT(KERNEL_ARCH, filter_divide_shadow);
|
||||||
#else
|
#else
|
||||||
kernel_filter_divide_shadow(sample, tiles,
|
kernel_filter_divide_shadow(sample, tile_info,
|
||||||
x, y,
|
x, y,
|
||||||
unfilteredA,
|
unfilteredA,
|
||||||
unfilteredB,
|
unfilteredB,
|
||||||
@@ -63,7 +63,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
|
void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
|
||||||
TilesInfo *tiles,
|
TileInfo *tile_info,
|
||||||
int m_offset,
|
int m_offset,
|
||||||
int v_offset,
|
int v_offset,
|
||||||
int x,
|
int x,
|
||||||
@@ -76,7 +76,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
|
|||||||
#ifdef KERNEL_STUB
|
#ifdef KERNEL_STUB
|
||||||
STUB_ASSERT(KERNEL_ARCH, filter_get_feature);
|
STUB_ASSERT(KERNEL_ARCH, filter_get_feature);
|
||||||
#else
|
#else
|
||||||
kernel_filter_get_feature(sample, tiles,
|
kernel_filter_get_feature(sample, tile_info,
|
||||||
m_offset, v_offset,
|
m_offset, v_offset,
|
||||||
x, y,
|
x, y,
|
||||||
mean, variance,
|
mean, variance,
|
||||||
|
|||||||
@@ -29,7 +29,7 @@
|
|||||||
extern "C" __global__ void
|
extern "C" __global__ void
|
||||||
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
|
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
|
||||||
kernel_cuda_filter_divide_shadow(int sample,
|
kernel_cuda_filter_divide_shadow(int sample,
|
||||||
TilesInfo *tiles,
|
TileInfo *tile_info,
|
||||||
float *unfilteredA,
|
float *unfilteredA,
|
||||||
float *unfilteredB,
|
float *unfilteredB,
|
||||||
float *sampleVariance,
|
float *sampleVariance,
|
||||||
@@ -43,7 +43,7 @@ kernel_cuda_filter_divide_shadow(int sample,
|
|||||||
int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y;
|
int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y;
|
||||||
if(x < prefilter_rect.z && y < prefilter_rect.w) {
|
if(x < prefilter_rect.z && y < prefilter_rect.w) {
|
||||||
kernel_filter_divide_shadow(sample,
|
kernel_filter_divide_shadow(sample,
|
||||||
tiles,
|
tile_info,
|
||||||
x, y,
|
x, y,
|
||||||
unfilteredA,
|
unfilteredA,
|
||||||
unfilteredB,
|
unfilteredB,
|
||||||
@@ -59,7 +59,7 @@ kernel_cuda_filter_divide_shadow(int sample,
|
|||||||
extern "C" __global__ void
|
extern "C" __global__ void
|
||||||
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
|
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
|
||||||
kernel_cuda_filter_get_feature(int sample,
|
kernel_cuda_filter_get_feature(int sample,
|
||||||
TilesInfo *tiles,
|
TileInfo *tile_info,
|
||||||
int m_offset,
|
int m_offset,
|
||||||
int v_offset,
|
int v_offset,
|
||||||
float *mean,
|
float *mean,
|
||||||
@@ -72,7 +72,7 @@ kernel_cuda_filter_get_feature(int sample,
|
|||||||
int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y;
|
int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y;
|
||||||
if(x < prefilter_rect.z && y < prefilter_rect.w) {
|
if(x < prefilter_rect.z && y < prefilter_rect.w) {
|
||||||
kernel_filter_get_feature(sample,
|
kernel_filter_get_feature(sample,
|
||||||
tiles,
|
tile_info,
|
||||||
m_offset, v_offset,
|
m_offset, v_offset,
|
||||||
x, y,
|
x, y,
|
||||||
mean, variance,
|
mean, variance,
|
||||||
|
|||||||
@@ -23,7 +23,7 @@
|
|||||||
/* kernels */
|
/* kernels */
|
||||||
|
|
||||||
__kernel void kernel_ocl_filter_divide_shadow(int sample,
|
__kernel void kernel_ocl_filter_divide_shadow(int sample,
|
||||||
ccl_global TilesInfo *tiles,
|
CCL_FILTER_TILE_INFO,
|
||||||
ccl_global float *unfilteredA,
|
ccl_global float *unfilteredA,
|
||||||
ccl_global float *unfilteredB,
|
ccl_global float *unfilteredB,
|
||||||
ccl_global float *sampleVariance,
|
ccl_global float *sampleVariance,
|
||||||
@@ -37,7 +37,7 @@ __kernel void kernel_ocl_filter_divide_shadow(int sample,
|
|||||||
int y = prefilter_rect.y + get_global_id(1);
|
int y = prefilter_rect.y + get_global_id(1);
|
||||||
if(x < prefilter_rect.z && y < prefilter_rect.w) {
|
if(x < prefilter_rect.z && y < prefilter_rect.w) {
|
||||||
kernel_filter_divide_shadow(sample,
|
kernel_filter_divide_shadow(sample,
|
||||||
tiles,
|
CCL_FILTER_TILE_INFO_ARG,
|
||||||
x, y,
|
x, y,
|
||||||
unfilteredA,
|
unfilteredA,
|
||||||
unfilteredB,
|
unfilteredB,
|
||||||
@@ -51,7 +51,7 @@ __kernel void kernel_ocl_filter_divide_shadow(int sample,
|
|||||||
}
|
}
|
||||||
|
|
||||||
__kernel void kernel_ocl_filter_get_feature(int sample,
|
__kernel void kernel_ocl_filter_get_feature(int sample,
|
||||||
ccl_global TilesInfo *tiles,
|
CCL_FILTER_TILE_INFO,
|
||||||
int m_offset,
|
int m_offset,
|
||||||
int v_offset,
|
int v_offset,
|
||||||
ccl_global float *mean,
|
ccl_global float *mean,
|
||||||
@@ -64,7 +64,7 @@ __kernel void kernel_ocl_filter_get_feature(int sample,
|
|||||||
int y = prefilter_rect.y + get_global_id(1);
|
int y = prefilter_rect.y + get_global_id(1);
|
||||||
if(x < prefilter_rect.z && y < prefilter_rect.w) {
|
if(x < prefilter_rect.z && y < prefilter_rect.w) {
|
||||||
kernel_filter_get_feature(sample,
|
kernel_filter_get_feature(sample,
|
||||||
tiles,
|
CCL_FILTER_TILE_INFO_ARG,
|
||||||
m_offset, v_offset,
|
m_offset, v_offset,
|
||||||
x, y,
|
x, y,
|
||||||
mean, variance,
|
mean, variance,
|
||||||
@@ -276,27 +276,3 @@ __kernel void kernel_ocl_filter_finalize(ccl_global float *buffer,
|
|||||||
buffer_params, sample);
|
buffer_params, sample);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__kernel void kernel_ocl_filter_set_tiles(ccl_global TilesInfo* tiles,
|
|
||||||
ccl_global float *buffer_1,
|
|
||||||
ccl_global float *buffer_2,
|
|
||||||
ccl_global float *buffer_3,
|
|
||||||
ccl_global float *buffer_4,
|
|
||||||
ccl_global float *buffer_5,
|
|
||||||
ccl_global float *buffer_6,
|
|
||||||
ccl_global float *buffer_7,
|
|
||||||
ccl_global float *buffer_8,
|
|
||||||
ccl_global float *buffer_9)
|
|
||||||
{
|
|
||||||
if((get_global_id(0) == 0) && (get_global_id(1) == 0)) {
|
|
||||||
tiles->buffers[0] = buffer_1;
|
|
||||||
tiles->buffers[1] = buffer_2;
|
|
||||||
tiles->buffers[2] = buffer_3;
|
|
||||||
tiles->buffers[3] = buffer_4;
|
|
||||||
tiles->buffers[4] = buffer_5;
|
|
||||||
tiles->buffers[5] = buffer_6;
|
|
||||||
tiles->buffers[6] = buffer_7;
|
|
||||||
tiles->buffers[7] = buffer_8;
|
|
||||||
tiles->buffers[8] = buffer_9;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -137,6 +137,7 @@ public:
|
|||||||
int tile_index;
|
int tile_index;
|
||||||
|
|
||||||
device_ptr buffer;
|
device_ptr buffer;
|
||||||
|
int device_size;
|
||||||
|
|
||||||
RenderBuffers *buffers;
|
RenderBuffers *buffers;
|
||||||
|
|
||||||
|
|||||||
@@ -502,6 +502,9 @@ void Session::map_neighbor_tiles(RenderTile *tiles, Device *tile_device)
|
|||||||
|
|
||||||
assert(tiles[4].buffers);
|
assert(tiles[4].buffers);
|
||||||
device->map_neighbor_tiles(tile_device, tiles);
|
device->map_neighbor_tiles(tile_device, tiles);
|
||||||
|
|
||||||
|
/* The denoised result is written back to the original tile. */
|
||||||
|
tiles[9] = tiles[4];
|
||||||
}
|
}
|
||||||
|
|
||||||
void Session::unmap_neighbor_tiles(RenderTile *tiles, Device *tile_device)
|
void Session::unmap_neighbor_tiles(RenderTile *tiles, Device *tile_device)
|
||||||
|
|||||||
@@ -488,11 +488,11 @@ def smpte_from_frame(frame, fps=None, fps_base=None):
|
|||||||
|
|
||||||
return (
|
return (
|
||||||
"%s%02d:%02d:%02d:%02d" % (
|
"%s%02d:%02d:%02d:%02d" % (
|
||||||
sign,
|
sign,
|
||||||
int(frame / (3600 * fps)), # HH
|
int(frame / (3600 * fps)), # HH
|
||||||
int((frame / (60 * fps)) % 60), # MM
|
int((frame / (60 * fps)) % 60), # MM
|
||||||
int((frame / fps) % 60), # SS
|
int((frame / fps) % 60), # SS
|
||||||
int(frame % fps), # FF
|
int(frame % fps), # FF
|
||||||
))
|
))
|
||||||
|
|
||||||
|
|
||||||
@@ -773,6 +773,7 @@ def _blender_default_map():
|
|||||||
del _sys.modules["rna_manual_reference"]
|
del _sys.modules["rna_manual_reference"]
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
# hooks for doc lookups
|
# hooks for doc lookups
|
||||||
_manual_map = [_blender_default_map]
|
_manual_map = [_blender_default_map]
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user