GPv3: Cyclical set operator #111904

Merged
Falk David merged 16 commits from casey-bianco-davis/blender:GPv3-cyclical-set-operator into main 2023-10-20 10:12:34 +02:00
1056 changed files with 30578 additions and 31677 deletions
Showing only changes of commit f280d22437 - Show all commits

View File

@ -514,7 +514,7 @@ check_spelling_shaders: .FORCE
PYTHONIOENCODING=utf_8 $(PYTHON) \
"$(BLENDER_DIR)/tools/check_source/check_spelling.py" \
--cache-file=$(CHECK_SPELLING_CACHE) \
--match=".*\.(osl|msl|glsl)$$" \
--match=".*\.(osl|metal|msl|glsl)$$" \
"$(BLENDER_DIR)/intern/" \
"$(BLENDER_DIR)/source/"

View File

@ -11,7 +11,7 @@
# dependencies have one assigned.
set(ZLIB_VERSION 1.2.13)
set(ZLIB_URI https://zlib.net/zlib-${ZLIB_VERSION}.tar.gz)
set(ZLIB_URI https://github.com/madler/zlib/releases/download/v${ZLIB_VERSION}/zlib-${ZLIB_VERSION}.tar.gz)
set(ZLIB_HASH 9b8aa094c4e5765dabf4da391f00d15c)
set(ZLIB_HASH_TYPE MD5)
set(ZLIB_FILE zlib-${ZLIB_VERSION}.tar.gz)

View File

@ -90,3 +90,24 @@ index 6bb0d175..19f13513 100644
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS" )
diff --git a/src/include/OSL/mask.h b/src/include/OSL/mask.h
index 24197af..b9275f6 100644
--- a/src/include/OSL/mask.h
+++ b/src/include/OSL/mask.h
@@ -4,7 +4,6 @@
#pragma once
-#include <immintrin.h>
#include <type_traits>
#include <OSL/oslconfig.h>
@@ -23,6 +22,8 @@ using std::countr_zero;
#elif OSL_INTEL_CLASSIC_COMPILER_VERSION
+#include <immintrin.h>
+
OSL_FORCEINLINE int popcount(uint32_t x) noexcept { return _mm_popcnt_u32(x);}
OSL_FORCEINLINE int popcount(uint64_t x) noexcept { return _mm_popcnt_u64(x); }
OSL_FORCEINLINE int countr_zero(uint32_t x) noexcept { return _bit_scan_forward(x); }

View File

@ -8,10 +8,13 @@
# HIP_FOUND, if the HIP toolkit is found.
# If `HIP_ROOT_DIR` was defined in the environment, use it.
if(DEFINED HIP_ROOT_DIR)
if(DEFINED HIP_ROOT_DIR AND HIP_ROOT_DIR)
# Pass.
elseif(DEFINED ENV{HIP_ROOT_DIR})
set(HIP_ROOT_DIR $ENV{HIP_ROOT_DIR})
elseif(DEFINED ENV{HIP_PATH})
# Built-in environment variable from SDK.
set(HIP_ROOT_DIR $ENV{HIP_PATH})
else()
set(HIP_ROOT_DIR "")
endif()

View File

@ -8,10 +8,13 @@
# HIPRT_FOUND, if SDK found
# If `HIPRT_ROOT_DIR` was defined in the environment, use it.
if(DEFINED HIPRT_ROOT_DIR)
if(DEFINED HIPRT_ROOT_DIR AND HIPRT_ROOT_DIR)
# Pass.
elseif(DEFINED ENV{HIPRT_ROOT_DIR})
set(HIPRT_ROOT_DIR $ENV{HIPRT_ROOT_DIR})
elseif(DEFINED ENV{HIP_PATH})
# Built-in environment variable from SDK.
set(HIPRT_ROOT_DIR $ENV{HIP_PATH})
else()
set(HIPRT_ROOT_DIR "")
endif()
@ -24,6 +27,7 @@ find_path(HIPRT_INCLUDE_DIR
NAMES
hiprt/hiprt.h
HINTS
${_hiprt_SEARCH_DIRS}/include
${_hiprt_SEARCH_DIRS}
)
@ -36,6 +40,7 @@ if(HIPRT_INCLUDE_DIR)
NAMES
hiprt${_hiprt_version}_amd_lib_win.bc
HINTS
${HIPRT_ROOT_DIR}/bin
${HIPRT_ROOT_DIR}/dist/bin/Release
NO_DEFAULT_PATH
)

View File

@ -12,7 +12,7 @@
# OPTIX_FOUND, If false, do not try to use OptiX.
# If `OPTIX_ROOT_DIR` was defined in the environment, use it.
if(DEFINED OPTIX_ROOT_DIR)
if(DEFINED OPTIX_ROOT_DIR AND OPTIX_ROOT_DIR)
# Pass.
elseif(DEFINED ENV{OPTIX_ROOT_DIR})
set(OPTIX_ROOT_DIR $ENV{OPTIX_ROOT_DIR})

View File

@ -12,8 +12,12 @@
# This can also be an environment variable.
# SYCL_FOUND, If false, then don't try to use SYCL.
if(NOT SYCL_ROOT_DIR AND NOT $ENV{SYCL_ROOT_DIR} STREQUAL "")
if(DEFINED SYCL_ROOT_DIR AND SYCL_ROOT_DIR)
# Pass.
elseif(DEFINED ENV{SYCL_ROOT_DIR} AND NOT $ENV{SYCL_ROOT_DIR} STREQUAL "")
set(SYCL_ROOT_DIR $ENV{SYCL_ROOT_DIR})
else()
set(SYCL_ROOT_DIR "")
endif()
set(_sycl_search_dirs

View File

@ -21,28 +21,28 @@ extern void *AUD_createSet(void);
/**
* Deletes a set.
* \param set The set to delete.
* \param set: The set to delete.
*/
extern void AUD_destroySet(void *set);
/**
* Removes an entry from a set.
* \param set The set work on.
* \param entry The entry to remove.
* \param set: The set work on.
* \param entry: The entry to remove.
* \return Whether the entry was in the set or not.
*/
extern char AUD_removeSet(void *set, void *entry);
/**
* Adds a new entry to a set.
* \param set The set work on.
* \param entry The entry to add.
* \param set: The set work on.
* \param entry: The entry to add.
*/
extern void AUD_addSet(void *set, void *entry);
/**
* Removes one entry from a set and returns it.
* \param set The set work on.
* \param set: The set work on.
* \return The entry or NULL if the set is empty.
*/
extern void *AUD_getSet(void *set);

View File

@ -16,6 +16,8 @@
#include "kernel/sample/lcg.h"
#include "kernel/sample/mapping.h"
#include "kernel/util/color.h"
#include "kernel/closure/bsdf_microfacet.h"
#include <iostream>
@ -39,7 +41,6 @@ static float precompute_ggx_E(float rough, float mu, float3 rand)
float pdf = 0.0f, sampled_eta;
float2 sampled_roughness;
bsdf_microfacet_ggx_sample((ShaderClosure *)&bsdf,
0,
make_float3(0.0f, 0.0f, 1.0f),
make_float3(sqrtf(1.0f - sqr(mu)), 0.0f, mu),
rand,
@ -71,7 +72,6 @@ static float precompute_ggx_glass_E(float rough, float mu, float eta, float3 ran
float pdf = 0.0f, sampled_eta;
float2 sampled_roughness;
bsdf_microfacet_ggx_sample((ShaderClosure *)&bsdf,
0,
make_float3(0.0f, 0.0f, 1.0f),
make_float3(sqrtf(1.0f - sqr(mu)), 0.0f, mu),
rand,
@ -114,7 +114,6 @@ static float precompute_ggx_gen_schlick_s(
float pdf = 0.0f, sampled_eta;
float2 sampled_roughness;
bsdf_microfacet_ggx_sample((ShaderClosure *)&bsdf,
0,
make_float3(0.0f, 0.0f, 1.0f),
make_float3(sqrtf(1.0f - sqr(mu)), 0.0f, mu),
rand,

View File

@ -335,8 +335,8 @@ static void ExportCurveSegments(Scene *scene, Hair *hair, ParticleCurveData *CDa
if (attr_normal) {
/* NOTE: the geometry normals are not computed for legacy particle hairs. This hair
* system is expected to be discarded. */
attr_normal->add(make_float3(1.0f, 0.0f, 0.0f));
* system is expected to be deprecated. */
attr_normal->add(make_float3(0.0f, 0.0f, 0.0f));
}
num_curve_keys++;

View File

@ -172,29 +172,27 @@ static PyObject *create_func(PyObject * /*self*/, PyObject *args)
/* RNA */
ID *bScreen = (ID *)PyLong_AsVoidPtr(pyscreen);
PointerRNA engineptr;
RNA_pointer_create(NULL, &RNA_RenderEngine, (void *)PyLong_AsVoidPtr(pyengine), &engineptr);
PointerRNA engineptr = RNA_pointer_create(
NULL, &RNA_RenderEngine, (void *)PyLong_AsVoidPtr(pyengine));
BL::RenderEngine engine(engineptr);
PointerRNA preferencesptr;
RNA_pointer_create(
NULL, &RNA_Preferences, (void *)PyLong_AsVoidPtr(pypreferences), &preferencesptr);
PointerRNA preferencesptr = RNA_pointer_create(
NULL, &RNA_Preferences, (void *)PyLong_AsVoidPtr(pypreferences));
BL::Preferences preferences(preferencesptr);
PointerRNA dataptr;
RNA_main_pointer_create((Main *)PyLong_AsVoidPtr(pydata), &dataptr);
PointerRNA dataptr = RNA_main_pointer_create((Main *)PyLong_AsVoidPtr(pydata));
BL::BlendData data(dataptr);
PointerRNA regionptr;
RNA_pointer_create(bScreen, &RNA_Region, pylong_as_voidptr_typesafe(pyregion), &regionptr);
PointerRNA regionptr = RNA_pointer_create(
bScreen, &RNA_Region, pylong_as_voidptr_typesafe(pyregion));
BL::Region region(regionptr);
PointerRNA v3dptr;
RNA_pointer_create(bScreen, &RNA_SpaceView3D, pylong_as_voidptr_typesafe(pyv3d), &v3dptr);
PointerRNA v3dptr = RNA_pointer_create(
bScreen, &RNA_SpaceView3D, pylong_as_voidptr_typesafe(pyv3d));
BL::SpaceView3D v3d(v3dptr);
PointerRNA rv3dptr;
RNA_pointer_create(bScreen, &RNA_RegionView3D, pylong_as_voidptr_typesafe(pyrv3d), &rv3dptr);
PointerRNA rv3dptr = RNA_pointer_create(
bScreen, &RNA_RegionView3D, pylong_as_voidptr_typesafe(pyrv3d));
BL::RegionView3D rv3d(rv3dptr);
/* create session */
@ -231,8 +229,8 @@ static PyObject *render_func(PyObject * /*self*/, PyObject *args)
BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(pysession);
PointerRNA depsgraphptr;
RNA_pointer_create(NULL, &RNA_Depsgraph, (ID *)PyLong_AsVoidPtr(pydepsgraph), &depsgraphptr);
PointerRNA depsgraphptr = RNA_pointer_create(
NULL, &RNA_Depsgraph, (ID *)PyLong_AsVoidPtr(pydepsgraph));
BL::Depsgraph b_depsgraph(depsgraphptr);
/* Allow Blender to execute other Python scripts. */
@ -277,11 +275,8 @@ static PyObject *draw_func(PyObject * /*self*/, PyObject *args)
ID *b_screen = (ID *)PyLong_AsVoidPtr(py_screen);
PointerRNA b_space_image_ptr;
RNA_pointer_create(b_screen,
&RNA_SpaceImageEditor,
pylong_as_voidptr_typesafe(py_space_image),
&b_space_image_ptr);
PointerRNA b_space_image_ptr = RNA_pointer_create(
b_screen, &RNA_SpaceImageEditor, pylong_as_voidptr_typesafe(py_space_image));
BL::SpaceImageEditor b_space_image(b_space_image_ptr);
session->draw(b_space_image);
@ -309,12 +304,11 @@ static PyObject *bake_func(PyObject * /*self*/, PyObject *args)
BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(pysession);
PointerRNA depsgraphptr;
RNA_pointer_create(NULL, &RNA_Depsgraph, PyLong_AsVoidPtr(pydepsgraph), &depsgraphptr);
PointerRNA depsgraphptr = RNA_pointer_create(
NULL, &RNA_Depsgraph, PyLong_AsVoidPtr(pydepsgraph));
BL::Depsgraph b_depsgraph(depsgraphptr);
PointerRNA objectptr;
RNA_id_pointer_create((ID *)PyLong_AsVoidPtr(pyobject), &objectptr);
PointerRNA objectptr = RNA_id_pointer_create((ID *)PyLong_AsVoidPtr(pyobject));
BL::Object b_object(objectptr);
python_thread_state_save(&session->python_thread_state);
@ -355,12 +349,11 @@ static PyObject *reset_func(PyObject * /*self*/, PyObject *args)
BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(pysession);
PointerRNA dataptr;
RNA_main_pointer_create((Main *)PyLong_AsVoidPtr(pydata), &dataptr);
PointerRNA dataptr = RNA_main_pointer_create((Main *)PyLong_AsVoidPtr(pydata));
BL::BlendData b_data(dataptr);
PointerRNA depsgraphptr;
RNA_pointer_create(NULL, &RNA_Depsgraph, PyLong_AsVoidPtr(pydepsgraph), &depsgraphptr);
PointerRNA depsgraphptr = RNA_pointer_create(
NULL, &RNA_Depsgraph, PyLong_AsVoidPtr(pydepsgraph));
BL::Depsgraph b_depsgraph(depsgraphptr);
python_thread_state_save(&session->python_thread_state);
@ -381,8 +374,8 @@ static PyObject *sync_func(PyObject * /*self*/, PyObject *args)
BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(pysession);
PointerRNA depsgraphptr;
RNA_pointer_create(NULL, &RNA_Depsgraph, PyLong_AsVoidPtr(pydepsgraph), &depsgraphptr);
PointerRNA depsgraphptr = RNA_pointer_create(
NULL, &RNA_Depsgraph, PyLong_AsVoidPtr(pydepsgraph));
BL::Depsgraph b_depsgraph(depsgraphptr);
python_thread_state_save(&session->python_thread_state);
@ -439,15 +432,12 @@ static PyObject *osl_update_node_func(PyObject * /*self*/, PyObject *args)
return NULL;
/* RNA */
PointerRNA dataptr;
RNA_main_pointer_create((Main *)PyLong_AsVoidPtr(pydata), &dataptr);
PointerRNA dataptr = RNA_main_pointer_create((Main *)PyLong_AsVoidPtr(pydata));
BL::BlendData b_data(dataptr);
PointerRNA nodeptr;
RNA_pointer_create((ID *)PyLong_AsVoidPtr(pynodegroup),
&RNA_ShaderNodeScript,
(void *)PyLong_AsVoidPtr(pynode),
&nodeptr);
PointerRNA nodeptr = RNA_pointer_create((ID *)PyLong_AsVoidPtr(pynodegroup),
&RNA_ShaderNodeScript,
(void *)PyLong_AsVoidPtr(pynode));
BL::ShaderNodeScript b_node(nodeptr);
/* update bytecode hash */
@ -747,23 +737,18 @@ static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject *key
}
/* Get device specification from preferences and scene. */
PointerRNA preferencesptr;
RNA_pointer_create(
NULL, &RNA_Preferences, (void *)PyLong_AsVoidPtr(pypreferences), &preferencesptr);
PointerRNA preferencesptr = RNA_pointer_create(
NULL, &RNA_Preferences, (void *)PyLong_AsVoidPtr(pypreferences));
BL::Preferences b_preferences(preferencesptr);
PointerRNA sceneptr;
RNA_id_pointer_create((ID *)PyLong_AsVoidPtr(pyscene), &sceneptr);
PointerRNA sceneptr = RNA_id_pointer_create((ID *)PyLong_AsVoidPtr(pyscene));
BL::Scene b_scene(sceneptr);
DeviceInfo device = blender_device_info(b_preferences, b_scene, true, true);
/* Get denoising parameters from view layer. */
PointerRNA viewlayerptr;
RNA_pointer_create((ID *)PyLong_AsVoidPtr(pyscene),
&RNA_ViewLayer,
PyLong_AsVoidPtr(pyviewlayer),
&viewlayerptr);
PointerRNA viewlayerptr = RNA_pointer_create(
(ID *)PyLong_AsVoidPtr(pyscene), &RNA_ViewLayer, PyLong_AsVoidPtr(pyviewlayer));
BL::ViewLayer b_view_layer(viewlayerptr);
DenoiseParams params = BlenderSync::get_denoise_params(b_scene, b_view_layer, true);
@ -851,8 +836,7 @@ static PyObject *debug_flags_update_func(PyObject * /*self*/, PyObject *args)
return NULL;
}
PointerRNA sceneptr;
RNA_id_pointer_create((ID *)PyLong_AsVoidPtr(pyscene), &sceneptr);
PointerRNA sceneptr = RNA_id_pointer_create((ID *)PyLong_AsVoidPtr(pyscene));
BL::Scene b_scene(sceneptr);
debug_flags_sync_from_scene(b_scene);

View File

@ -120,8 +120,7 @@ void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d
if (geom->is_mesh()) {
Mesh *mesh = static_cast<Mesh *>(geom);
if (mesh->get_subdivision_type() != Mesh::SUBDIVISION_NONE) {
PointerRNA id_ptr;
RNA_id_pointer_create((::ID *)iter.first.id, &id_ptr);
PointerRNA id_ptr = RNA_id_pointer_create((::ID *)iter.first.id);
geometry_map.set_recalc(BL::ID(id_ptr));
}
}

View File

@ -55,6 +55,9 @@ bool device_oneapi_init()
if (getenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE") == nullptr) {
_putenv_s("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE", "0");
}
if (getenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE") == nullptr) {
_putenv_s("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE", "0");
}
# elif __linux__
setenv("SYCL_CACHE_PERSISTENT", "1", false);
setenv("SYCL_CACHE_THRESHOLD", "0", false);
@ -66,6 +69,7 @@ bool device_oneapi_init()
}
setenv("SYCL_ENABLE_PCI", "1", false);
setenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE", "0", false);
setenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE", "0", false);
# endif
return true;

View File

@ -843,12 +843,12 @@ void OneapiDevice::get_adjusted_global_and_local_sizes(SyclQueue *queue,
kernel == DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_PATHS_ARRAY)
{
/* Path array implementation is serial in case of SYCL Host Task execution. */
global_size = 1;
local_size = 1;
kernel_global_size = 1;
kernel_local_size = 1;
}
# endif
// assert(uniformed_kernel_work_size % local_size == 0);
assert(kernel_global_size % kernel_local_size == 0);
}
/* Compute-runtime (ie. NEO) version is what gets returned by sycl/L0 on Windows

View File

@ -139,7 +139,7 @@ class UsdToCycles {
{TfToken("diffuseColor"), ustring("base_color")},
{TfToken("emissiveColor"), ustring("emission")},
{TfToken("specularColor"), ustring("specular")},
{TfToken("clearcoatRoughness"), ustring("clearcoat_roughness")},
{TfToken("clearcoatRoughness"), ustring("coat_roughness")},
{TfToken("opacity"), ustring("alpha")},
// opacityThreshold
// occlusion

View File

@ -90,6 +90,72 @@ bool DenoiserGPU::denoise_buffer(const BufferParams &buffer_params,
return denoise_result;
}
bool DenoiserGPU::denoise_buffer(const DenoiseTask &task)
{
DenoiseContext context(denoiser_device_, task);
if (!denoise_ensure(context)) {
return false;
}
if (!denoise_filter_guiding_preprocess(context)) {
LOG(ERROR) << "Error preprocessing guiding passes.";
return false;
}
/* Passes which will use real albedo when it is available. */
denoise_pass(context, PASS_COMBINED);
denoise_pass(context, PASS_SHADOW_CATCHER_MATTE);
/* Passes which do not need albedo and hence if real is present it needs to become fake. */
denoise_pass(context, PASS_SHADOW_CATCHER);
return true;
}
bool DenoiserGPU::denoise_ensure(DenoiseContext &context)
{
if (!denoise_create_if_needed(context)) {
LOG(ERROR) << "GPU denoiser creation has failed.";
return false;
}
if (!denoise_configure_if_needed(context)) {
LOG(ERROR) << "GPU denoiser configuration has failed.";
return false;
}
return true;
}
bool DenoiserGPU::denoise_filter_guiding_preprocess(const DenoiseContext &context)
{
const BufferParams &buffer_params = context.buffer_params;
const int work_size = buffer_params.width * buffer_params.height;
DeviceKernelArguments args(&context.guiding_params.device_pointer,
&context.guiding_params.pass_stride,
&context.guiding_params.pass_albedo,
&context.guiding_params.pass_normal,
&context.guiding_params.pass_flow,
&context.render_buffers->buffer.device_pointer,
&buffer_params.offset,
&buffer_params.stride,
&buffer_params.pass_stride,
&context.pass_sample_count,
&context.pass_denoising_albedo,
&context.pass_denoising_normal,
&context.pass_motion,
&buffer_params.full_x,
&buffer_params.full_y,
&buffer_params.width,
&buffer_params.height,
&context.num_samples);
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS, work_size, args);
}
Device *DenoiserGPU::ensure_denoiser_device(Progress *progress)
{
Device *denoiser_device = Denoiser::ensure_denoiser_device(progress);

View File

@ -45,6 +45,17 @@ class DenoiserGPU : public Denoiser {
bool allow_inplace_modification;
};
/* Make sure the GPU denoiser is created and configured. */
virtual bool denoise_ensure(DenoiseContext &context);
/* Create GPU denoiser descriptor if needed.
* Will do nothing if the current GPU descriptor is usable for the given parameters.
* If the GPU denoiser descriptor did re-allocate here it is left unconfigured. */
virtual bool denoise_create_if_needed(DenoiseContext &context) = 0;
/* Configure existing GPU denoiser descriptor for the use for the given task. */
virtual bool denoise_configure_if_needed(DenoiseContext &context) = 0;
/* Read input color pass from the render buffer into the memory which corresponds to the noisy
* input within the given context. Pixels are scaled to the number of samples, but are not
* preprocessed yet. */
@ -56,10 +67,17 @@ class DenoiserGPU : public Denoiser {
bool denoise_filter_color_postprocess(const DenoiseContext &context, const DenoisePass &pass);
bool denoise_filter_guiding_set_fake_albedo(const DenoiseContext &context);
/* Read guiding passes from the render buffers, preprocess them in a way which is expected by
* the GPU denoiser and store in the guiding passes memory within the given context.
*
* Pre-processing of the guiding passes is to only happen once per context lifetime. DO not
* preprocess them for every pass which is being denoised. */
bool denoise_filter_guiding_preprocess(const DenoiseContext &context);
void denoise_pass(DenoiseContext &context, PassType pass_type);
/* Returns true if task is fully handled. */
virtual bool denoise_buffer(const DenoiseTask & /*task*/) = 0;
virtual bool denoise_buffer(const DenoiseTask &task);
virtual bool denoise_run(const DenoiseContext &context, const DenoisePass &pass) = 0;
virtual Device *ensure_denoiser_device(Progress *progress) override;

View File

@ -225,68 +225,7 @@ bool OptiXDenoiser::denoise_buffer(const DenoiseTask &task)
const CUDAContextScope scope(optix_device);
DenoiseContext context(optix_device, task);
if (!denoise_ensure(context)) {
return false;
}
if (!denoise_filter_guiding_preprocess(context)) {
LOG(ERROR) << "Error preprocessing guiding passes.";
return false;
}
/* Passes which will use real albedo when it is available. */
denoise_pass(context, PASS_COMBINED);
denoise_pass(context, PASS_SHADOW_CATCHER_MATTE);
/* Passes which do not need albedo and hence if real is present it needs to become fake. */
denoise_pass(context, PASS_SHADOW_CATCHER);
return true;
}
bool OptiXDenoiser::denoise_filter_guiding_preprocess(const DenoiseContext &context)
{
const BufferParams &buffer_params = context.buffer_params;
const int work_size = buffer_params.width * buffer_params.height;
DeviceKernelArguments args(&context.guiding_params.device_pointer,
&context.guiding_params.pass_stride,
&context.guiding_params.pass_albedo,
&context.guiding_params.pass_normal,
&context.guiding_params.pass_flow,
&context.render_buffers->buffer.device_pointer,
&buffer_params.offset,
&buffer_params.stride,
&buffer_params.pass_stride,
&context.pass_sample_count,
&context.pass_denoising_albedo,
&context.pass_denoising_normal,
&context.pass_motion,
&buffer_params.full_x,
&buffer_params.full_y,
&buffer_params.width,
&buffer_params.height,
&context.num_samples);
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS, work_size, args);
}
bool OptiXDenoiser::denoise_ensure(DenoiseContext &context)
{
if (!denoise_create_if_needed(context)) {
LOG(ERROR) << "OptiX denoiser creation has failed.";
return false;
}
if (!denoise_configure_if_needed(context)) {
LOG(ERROR) << "OptiX denoiser configuration has failed.";
return false;
}
return true;
return DenoiserGPU::denoise_buffer(task);
}
bool OptiXDenoiser::denoise_create_if_needed(DenoiseContext &context)

View File

@ -24,29 +24,20 @@ class OptiXDenoiser : public DenoiserGPU {
private:
virtual bool denoise_buffer(const DenoiseTask &task) override;
/* Read guiding passes from the render buffers, preprocess them in a way which is expected by
* OptiX and store in the guiding passes memory within the given context.
*
* Pre-processing of the guiding passes is to only happen once per context lifetime. DO not
* preprocess them for every pass which is being denoised. */
bool denoise_filter_guiding_preprocess(const DenoiseContext &context);
/* Set fake albedo pixels in the albedo guiding pass storage.
* After this point only passes which do not need albedo for denoising can be processed. */
bool denoise_filter_guiding_set_fake_albedo(const DenoiseContext &context);
/* Make sure the OptiX denoiser is created and configured. */
bool denoise_ensure(DenoiseContext &context);
/* Create OptiX denoiser descriptor if needed.
* Will do nothing if the current OptiX descriptor is usable for the given parameters.
* If the OptiX denoiser descriptor did re-allocate here it is left unconfigured. */
bool denoise_create_if_needed(DenoiseContext &context);
virtual bool denoise_create_if_needed(DenoiseContext &context) override;
/* Configure existing OptiX denoiser descriptor for the use for the given task. */
bool denoise_configure_if_needed(DenoiseContext &context);
virtual bool denoise_configure_if_needed(DenoiseContext &context) override;
/* Run configured denoiser. */
bool denoise_run(const DenoiseContext &context, const DenoisePass &pass) override;
virtual bool denoise_run(const DenoiseContext &context, const DenoisePass &pass) override;
OptixDenoiser optix_denoiser_ = nullptr;

View File

@ -1000,6 +1000,7 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
-L"${WINDOWS_KIT_DIR}/ucrt/x64")
else() # Add Linux specific compiler flags.
list(APPEND sycl_compiler_flags -fPIC)
list(APPEND sycl_compiler_flags -fvisibility=hidden)
# We avoid getting __FAST_MATH__ to be defined when building on CentOS-7 and Rocky-8
# until the compilation issues it triggers at either AoT or JIT stages gets fixed.

View File

@ -147,7 +147,7 @@ ccl_device_inline
}
#ifdef __SHADOW_LINKING__
if (intersection_skip_shadow_link(kg, ray, prim_object)) {
if (intersection_skip_shadow_link(kg, ray->self, prim_object)) {
continue;
}
#endif

View File

@ -132,7 +132,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
}
#ifdef __SHADOW_LINKING__
if (intersection_skip_shadow_link(kg, ray, prim_object)) {
if (intersection_skip_shadow_link(kg, ray->self, prim_object)) {
continue;
}
#endif

View File

@ -233,14 +233,14 @@ ccl_device_inline float intersection_curve_shadow_transparency(
return (1.0f - u) * f0 + u * f1;
}
ccl_device_inline bool intersection_skip_self(ccl_private const RaySelfPrimitives &self,
ccl_device_inline bool intersection_skip_self(ccl_ray_data const RaySelfPrimitives &self,
const int object,
const int prim)
{
return (self.prim == prim) && (self.object == object);
}
ccl_device_inline bool intersection_skip_self_shadow(ccl_private const RaySelfPrimitives &self,
ccl_device_inline bool intersection_skip_self_shadow(ccl_ray_data const RaySelfPrimitives &self,
const int object,
const int prim)
{
@ -248,22 +248,22 @@ ccl_device_inline bool intersection_skip_self_shadow(ccl_private const RaySelfPr
((self.light_prim == prim) && (self.light_object == object));
}
ccl_device_inline bool intersection_skip_self_local(ccl_private const RaySelfPrimitives &self,
ccl_device_inline bool intersection_skip_self_local(ccl_ray_data const RaySelfPrimitives &self,
const int prim)
{
return (self.prim == prim);
}
#ifdef __SHADOW_LINKING__
ccl_device_inline uint64_t ray_get_shadow_set_membership(KernelGlobals kg,
ccl_private const Ray *ray)
ccl_device_inline uint64_t
ray_get_shadow_set_membership(KernelGlobals kg, ccl_ray_data const RaySelfPrimitives &self)
{
if (ray->self.light != LAMP_NONE) {
return kernel_data_fetch(lights, ray->self.light).shadow_set_membership;
if (self.light != LAMP_NONE) {
return kernel_data_fetch(lights, self.light).shadow_set_membership;
}
if (ray->self.light_object != OBJECT_NONE) {
return kernel_data_fetch(objects, ray->self.light_object).shadow_set_membership;
if (self.light_object != OBJECT_NONE) {
return kernel_data_fetch(objects, self.light_object).shadow_set_membership;
}
return LIGHT_LINK_MASK_ALL;
@ -271,7 +271,7 @@ ccl_device_inline uint64_t ray_get_shadow_set_membership(KernelGlobals kg,
#endif
ccl_device_inline bool intersection_skip_shadow_link(KernelGlobals kg,
ccl_private const Ray *ray,
ccl_ray_data const RaySelfPrimitives &self,
const int isect_object)
{
#ifdef __SHADOW_LINKING__
@ -279,7 +279,7 @@ ccl_device_inline bool intersection_skip_shadow_link(KernelGlobals kg,
return false;
}
const uint64_t set_membership = ray_get_shadow_set_membership(kg, ray);
const uint64_t set_membership = ray_get_shadow_set_membership(kg, self);
if (set_membership == LIGHT_LINK_MASK_ALL) {
return false;
}

View File

@ -153,17 +153,16 @@ ccl_device_inline int bsdf_sample(KernelGlobals kg,
*eta = 1.0f;
break;
case CLOSURE_BSDF_MICROFACET_GGX_ID:
case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
case CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID:
label = bsdf_microfacet_ggx_sample(
sc, path_flag, Ng, sd->wi, rand, eval, wo, pdf, sampled_roughness, eta);
sc, Ng, sd->wi, rand, eval, wo, pdf, sampled_roughness, eta);
break;
case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID:
label = bsdf_microfacet_beckmann_sample(
sc, path_flag, Ng, sd->wi, rand, eval, wo, pdf, sampled_roughness, eta);
sc, Ng, sd->wi, rand, eval, wo, pdf, sampled_roughness, eta);
break;
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
label = bsdf_ashikhmin_shirley_sample(
@ -284,7 +283,6 @@ ccl_device_inline void bsdf_roughness_eta(const KernelGlobals kg,
*eta = 1.0f;
break;
case CLOSURE_BSDF_MICROFACET_GGX_ID:
case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
case CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
@ -385,7 +383,6 @@ ccl_device_inline int bsdf_label(const KernelGlobals kg,
label = LABEL_TRANSMIT | LABEL_TRANSPARENT;
break;
case CLOSURE_BSDF_MICROFACET_GGX_ID:
case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
@ -459,6 +456,7 @@ ccl_device_inline
{
Spectrum eval = zero_spectrum();
*pdf = 0.f;
const float3 Ng = (sd->type & PRIMITIVE_CURVE) ? sc->N : sd->Ng;
switch (sc->type) {
case CLOSURE_BSDF_DIFFUSE_ID:
@ -483,18 +481,17 @@ ccl_device_inline
eval = bsdf_transparent_eval(sc, sd->wi, wo, pdf);
break;
case CLOSURE_BSDF_MICROFACET_GGX_ID:
case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
case CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID:
eval = bsdf_microfacet_ggx_eval(sc, sd->N, sd->wi, wo, pdf);
eval = bsdf_microfacet_ggx_eval(sc, Ng, sd->wi, wo, pdf);
break;
case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID:
eval = bsdf_microfacet_beckmann_eval(sc, sd->N, sd->wi, wo, pdf);
eval = bsdf_microfacet_beckmann_eval(sc, Ng, sd->wi, wo, pdf);
break;
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
eval = bsdf_ashikhmin_shirley_eval(sc, sd->N, sd->wi, wo, pdf);
eval = bsdf_ashikhmin_shirley_eval(sc, Ng, sd->wi, wo, pdf);
break;
case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
eval = bsdf_ashikhmin_velvet_eval(sc, sd->wi, wo, pdf);
@ -554,7 +551,6 @@ ccl_device void bsdf_blur(KernelGlobals kg, ccl_private ShaderClosure *sc, float
#if defined(__SVM__) || defined(__OSL__)
switch (sc->type) {
case CLOSURE_BSDF_MICROFACET_GGX_ID:
case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
case CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:

View File

@ -207,7 +207,7 @@ ccl_device int bsdf_ashikhmin_shirley_sample(ccl_private const ShaderClosure *sc
}
else {
/* leave the rest to eval */
*eval = bsdf_ashikhmin_shirley_eval(sc, N, wi, *wo, pdf);
*eval = bsdf_ashikhmin_shirley_eval(sc, Ng, wi, *wo, pdf);
}
return label;

View File

@ -329,7 +329,11 @@ ccl_device_inline void microfacet_ggx_preserve_energy(KernelGlobals kg,
* code for that), but e.g. a reflection-only closure with Fresnel applied can end up having
* a very low overall albedo.
* This is used to adjust the sample weight, as well as for the Diff/Gloss/Trans Color pass
* and the Denoising Albedo pass. */
* and the Denoising Albedo pass.
* Use lookup tables for generalized Schlick. Otherwise assuming that the surface is smooth. */
/* TODO: The Schlick LUT seems to assume energy preservation, which is not true for GGX. if
* energy-preserving then transmission should just be `1 - reflection`. For dielectric we could
* probably split the LUT for multiGGX if smooth assumption is not good enough. */
ccl_device Spectrum bsdf_microfacet_estimate_albedo(KernelGlobals kg,
ccl_private const ShaderData *sd,
ccl_private const MicrofacetBsdf *bsdf,
@ -362,8 +366,6 @@ ccl_device Spectrum bsdf_microfacet_estimate_albedo(KernelGlobals kg,
albedo += mix(fresnel->f0, fresnel->f90, s) * fresnel->reflection_tint;
}
else {
/* If we don't (yet) have a way to estimate albedo in a way that accounts for roughness,
* fall back to assuming that the surface is smooth. */
albedo += microfacet_fresnel(bsdf, sd->wi, bsdf->N, false);
}
}
@ -375,17 +377,6 @@ ccl_device Spectrum bsdf_microfacet_estimate_albedo(KernelGlobals kg,
return albedo;
}
/* Generalized Trowbridge-Reitz for clearcoat. */
ccl_device_forceinline float bsdf_clearcoat_D(float alpha2, float cos_NH)
{
if (alpha2 >= 1.0f) {
return M_1_PI_F;
}
const float t = 1.0f + (alpha2 - 1.0f) * cos_NH * cos_NH;
return (alpha2 - 1.0f) / (M_PI_F * logf(alpha2) * t);
}
/* Smith shadowing-masking term, here in the non-separable form.
* For details, see:
* Understanding the Masking-Shadowing Function in Microfacet-Based BRDFs.
@ -513,6 +504,9 @@ ccl_device Spectrum bsdf_microfacet_eval(ccl_private const ShaderClosure *sc,
}
/* Compute half vector. */
/* TODO: deal with the case when `bsdf->ior` is close to one. */
/* TODO: check if the refraction configuration is valid. See `btdf_ggx()` in
* `eevee_bxdf_lib.glsl`. */
float3 H = is_transmission ? -(bsdf->ior * wo + wi) : (wi + wo);
const float inv_len_H = 1.0f / len(H);
H *= inv_len_H;
@ -520,21 +514,11 @@ ccl_device Spectrum bsdf_microfacet_eval(ccl_private const ShaderClosure *sc,
const float cos_NH = dot(N, H);
float D, lambdaI, lambdaO;
/* TODO: add support for anisotropic transmission. */
/* NOTE: we could add support for anisotropic transmission, although it will make dispersion
* harder to compute. */
if (alpha_x == alpha_y || is_transmission) { /* Isotropic. */
float alpha2 = alpha_x * alpha_y;
if (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) {
D = bsdf_clearcoat_D(alpha2, cos_NH);
/* The masking-shadowing term for clearcoat has a fixed alpha of 0.25
* => alpha2 = 0.25 * 0.25 */
alpha2 = 0.0625f;
}
else {
D = bsdf_D<m_type>(alpha2, cos_NH);
}
D = bsdf_D<m_type>(alpha2, cos_NH);
lambdaI = bsdf_lambda<m_type>(alpha2, cos_NI);
lambdaO = bsdf_lambda<m_type>(alpha2, cos_NO);
}
@ -559,8 +543,7 @@ ccl_device Spectrum bsdf_microfacet_eval(ccl_private const ShaderClosure *sc,
float lobe_pdf = 1.0f;
if (m_glass) {
float fresnel = fresnel_dielectric_cos(dot(H, wi), bsdf->ior);
float reflect_pdf = (fresnel == 1.0f) ? 1.0f : clamp(fresnel, 0.125f, 0.875f);
lobe_pdf = is_transmission ? (1.0f - reflect_pdf) : reflect_pdf;
lobe_pdf = is_transmission ? (1.0f - fresnel) : fresnel;
}
*pdf = common * lobe_pdf / (1.0f + lambdaI);
@ -571,7 +554,6 @@ ccl_device Spectrum bsdf_microfacet_eval(ccl_private const ShaderClosure *sc,
template<MicrofacetType m_type>
ccl_device int bsdf_microfacet_sample(ccl_private const ShaderClosure *sc,
const int path_flag,
float3 Ng,
float3 wi,
const float3 rand,
@ -583,32 +565,29 @@ ccl_device int bsdf_microfacet_sample(ccl_private const ShaderClosure *sc,
{
ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
const float3 N = bsdf->N;
const float cos_NI = dot(N, wi);
if (cos_NI <= 0) {
/* Incident angle from the lower hemisphere is invalid. */
return LABEL_NONE;
}
const float m_eta = bsdf->ior;
const float m_inv_eta = 1.0f / bsdf->ior;
const bool m_refraction = CLOSURE_IS_REFRACTION(bsdf->type);
const bool m_glass = CLOSURE_IS_GLASS(bsdf->type);
const bool m_reflection = !(m_refraction || m_glass);
const float alpha_x = bsdf->alpha_x;
const float alpha_y = bsdf->alpha_y;
bool m_singular = !bsdf_microfacet_eval_flag(bsdf);
const float3 N = bsdf->N;
const float cos_NI = dot(N, wi);
if (cos_NI <= 0) {
*eval = zero_spectrum();
*pdf = 0.0f;
return (m_reflection ? LABEL_REFLECT : LABEL_TRANSMIT) |
(m_singular ? LABEL_SINGULAR : LABEL_GLOSSY);
}
float3 H;
float cos_NH, cos_HI;
float3 local_H, local_I, X, Y; /* Needed for anisotropic microfacets later. */
/* Needed for anisotropic microfacets later. */
float3 local_H, local_I;
if (m_singular) {
H = N;
cos_NH = 1.0f;
cos_HI = cos_NI;
}
else {
float3 X, Y;
if (alpha_x == alpha_y) {
make_orthonormals(N, &X, &Y);
}
@ -628,100 +607,70 @@ ccl_device int bsdf_microfacet_sample(ccl_private const ShaderClosure *sc,
}
H = X * local_H.x + Y * local_H.y + N * local_H.z;
cos_NH = local_H.z;
cos_HI = dot(H, wi);
}
const float cos_HI = dot(H, wi);
bool valid;
/* The angle between the half vector and the refracted ray. Not used when sampling reflection. */
float cos_HO;
bool do_refract;
float lobe_pdf;
if (m_refraction || m_glass) {
bool inside;
float fresnel = fresnel_dielectric(m_eta, H, wi, wo, &inside);
valid = !inside;
float fresnel = fresnel_dielectric(cos_HI, m_eta, &cos_HO);
/* For glass closures, we decide between reflection and refraction here. */
if (m_glass) {
if (fresnel == 1.0f) {
/* TIR, reflection is the only option. */
do_refract = false;
lobe_pdf = 1.0f;
}
else {
/* Decide between reflection and refraction, using defensive sampling to avoid
* excessive noise for reflection highlights. */
float reflect_pdf = (path_flag & PATH_RAY_CAMERA) ? clamp(fresnel, 0.125f, 0.875f) :
fresnel;
do_refract = (rand.z >= reflect_pdf);
lobe_pdf = do_refract ? (1.0f - reflect_pdf) : reflect_pdf;
}
do_refract = (rand.z >= fresnel);
lobe_pdf = do_refract ? (1.0f - fresnel) : fresnel;
}
else {
/* For pure refractive closures, refraction is the only option. */
if (fresnel == 1.0f) {
return LABEL_NONE;
}
do_refract = true;
lobe_pdf = 1.0f;
valid = valid && (fresnel != 1.0f);
}
}
else {
/* Pure reflective closure, reflection is the only option. */
valid = true;
lobe_pdf = 1.0f;
do_refract = false;
}
int label;
if (do_refract) {
/* wo was already set to the refracted direction by fresnel_dielectric. */
// valid = valid && (dot(Ng, *wo) < 0);
label = LABEL_TRANSMIT;
*wo = refract_angle(wi, H, cos_HO, m_inv_eta);
/* If the IOR is close enough to 1.0, just treat the interaction as specular. */
m_singular = m_singular || (fabsf(m_eta - 1.0f) < 1e-4f);
}
else {
/* Eq. 39 - compute actual reflected direction */
*wo = 2 * cos_HI * H - wi;
valid = valid && (dot(Ng, *wo) > 0);
label = LABEL_REFLECT;
}
if (!valid) {
*eval = zero_spectrum();
*pdf = 0.0f;
return label | (m_singular ? LABEL_SINGULAR : LABEL_GLOSSY);
if ((dot(Ng, *wo) < 0) != do_refract) {
return LABEL_NONE;
}
if (m_singular) {
label |= LABEL_SINGULAR;
/* Some high number for MIS. */
*pdf = lobe_pdf * 1e6f;
*eval = make_spectrum(1e6f) * microfacet_fresnel(bsdf, wi, H, do_refract);
}
else {
label |= LABEL_GLOSSY;
float cos_NO = dot(N, *wo);
float D, lambdaI, lambdaO;
/* TODO: add support for anisotropic transmission. */
if (alpha_x == alpha_y || do_refract) { /* Isotropic. */
float alpha2 = alpha_x * alpha_y;
const float cos_NH = dot(N, H);
const float cos_NO = dot(N, *wo);