Cycles: upgrade Embree to version 4.0 #105974

Merged
Xavier Hallade merged 5 commits from xavierh/blender:cycles_embree4 into main 2023-04-05 11:03:19 +02:00
17 changed files with 778 additions and 347 deletions

View File

@ -6,15 +6,24 @@
set(EMBREE_EXTRA_ARGS
-DEMBREE_ISPC_SUPPORT=OFF
-DEMBREE_TUTORIALS=OFF
-DEMBREE_STATIC_LIB=ON
-DEMBREE_STATIC_LIB=OFF
-DEMBREE_RAY_MASK=ON
-DEMBREE_FILTER_FUNCTION=ON
-DEMBREE_BACKFACE_CULLING=OFF
-DEMBREE_BACKFACE_CULLING_CURVES=ON
-DEMBREE_BACKFACE_CULLING_SPHERES=ON
-DEMBREE_TASKING_SYSTEM=TBB
-DEMBREE_TBB_ROOT=${LIBDIR}/tbb
-DTBB_ROOT=${LIBDIR}/tbb
)
if(WIN32)
set(EMBREE_EXTRA_ARGS
${EMBREE_EXTRA_ARGS}
-DCMAKE_DEBUG_POSTFIX=_d
)
endif()
if(NOT BLENDER_PLATFORM_ARM)
set(EMBREE_EXTRA_ARGS
${EMBREE_EXTRA_ARGS}
@ -45,25 +54,19 @@ add_dependencies(
)
if(WIN32)
if(BUILD_MODE STREQUAL Release)
ExternalProject_Add_Step(external_embree after_install
COMMAND ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/embree ${HARVEST_TARGET}/embree
COMMAND ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/embree/include ${HARVEST_TARGET}/embree/include
xavierh marked this conversation as resolved Outdated
  • The dll lives in /bin, not /lib
  • the release branch above now copies a bunch of runtime dlls which is undesirable
  • my copy had some whitespace issues in this block

for convenience here's what this whole block needs to be

if(WIN32)
  if(BUILD_MODE STREQUAL Release)
    ExternalProject_Add_Step(external_embree after_install
      COMMAND ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/embree/include ${HARVEST_TARGET}/embree/include
      COMMAND ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/embree/lib ${HARVEST_TARGET}/embree/lib
      COMMAND ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/embree/share ${HARVEST_TARGET}/embree/share
      COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/bin/embree4.dll ${HARVEST_TARGET}/embree/bin/embree4.dll
      DEPENDEES install
    )
  else()
    ExternalProject_Add_Step(external_embree after_install
      COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/bin/embree4_d.dll ${HARVEST_TARGET}/embree/bin/embree4_d.dll
      COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/lib/embree4_d.lib ${HARVEST_TARGET}/embree/lib/embree4_d.lib
      DEPENDEES install
    )
  endif()
endif()
- The dll lives in /bin, not /lib - the release branch above now copies a bunch of runtime dlls which is undesirable - my copy had some whitespace issues in this block for convenience here's what this whole block needs to be ``` if(WIN32) if(BUILD_MODE STREQUAL Release) ExternalProject_Add_Step(external_embree after_install COMMAND ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/embree/include ${HARVEST_TARGET}/embree/include COMMAND ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/embree/lib ${HARVEST_TARGET}/embree/lib COMMAND ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/embree/share ${HARVEST_TARGET}/embree/share COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/bin/embree4.dll ${HARVEST_TARGET}/embree/bin/embree4.dll DEPENDEES install ) else() ExternalProject_Add_Step(external_embree after_install COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/bin/embree4_d.dll ${HARVEST_TARGET}/embree/bin/embree4_d.dll COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/lib/embree4_d.lib ${HARVEST_TARGET}/embree/lib/embree4_d.lib DEPENDEES install ) endif() endif() ```

Good catch. Runtime DLLs appeared when switching back to MSVC and I missed using bin when slicing the patch back to have only embree4 + dynamic library.
Thanks for the block. I've pushed the change.

Good catch. Runtime DLLs appeared when switching back to MSVC and I missed using bin when slicing the patch back to have only embree4 + dynamic library. Thanks for the block. I've pushed the change.
COMMAND ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/embree/lib ${HARVEST_TARGET}/embree/lib
COMMAND ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/embree/share ${HARVEST_TARGET}/embree/share
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/bin/embree4.dll ${HARVEST_TARGET}/embree/bin/embree4.dll
DEPENDEES install
)
else()
ExternalProject_Add_Step(external_embree after_install
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/lib/embree3.lib ${HARVEST_TARGET}/embree/lib/embree3_d.lib
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/lib/embree_avx.lib ${HARVEST_TARGET}/embree/lib/embree_avx_d.lib
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/lib/embree_avx2.lib ${HARVEST_TARGET}/embree/lib/embree_avx2_d.lib
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/lib/embree_sse42.lib ${HARVEST_TARGET}/embree/lib/embree_sse42_d.lib
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/lib/lexers.lib ${HARVEST_TARGET}/embree/lib/lexers_d.lib
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/lib/math.lib ${HARVEST_TARGET}/embree/lib/math_d.lib
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/lib/simd.lib ${HARVEST_TARGET}/embree/lib/simd_d.lib
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/lib/sys.lib ${HARVEST_TARGET}/embree/lib/sys_d.lib
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/lib/tasking.lib ${HARVEST_TARGET}/embree/lib/tasking_d.lib
DEPENDEES install
)
endif()
ExternalProject_Add_Step(external_embree after_install
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/bin/embree4_d.dll ${HARVEST_TARGET}/embree/bin/embree4_d.dll
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/lib/embree4_d.lib ${HARVEST_TARGET}/embree/lib/embree4_d.lib
DEPENDEES install
)
endif()
endif()

View File

@ -478,9 +478,9 @@ set(SQLITE_HASH_TYPE SHA1)
set(SQLITE_FILE sqlite-autoconf-${SQLLITE_LONG_VERSION}.tar.gz)
set(SQLITE_CPE "cpe:2.3:a:sqlite:sqlite:${SQLITE_VERSION}:*:*:*:*:*:*:*")
set(EMBREE_VERSION 3.13.4)
set(EMBREE_VERSION 4.0.1)
set(EMBREE_URI https://github.com/embree/embree/archive/v${EMBREE_VERSION}.zip)
set(EMBREE_HASH 52d0be294d6c88ba7a6c9e046796e7be)
set(EMBREE_HASH dd26617719a587e126b341d1b32f7fd0)
set(EMBREE_HASH_TYPE MD5)
set(EMBREE_FILE embree-v${EMBREE_VERSION}.zip)

View File

@ -1,19 +1,8 @@
diff -Naur org/kernels/rtcore_config.h.in embree-3.13.4/kernels/rtcore_config.h.in
--- org/kernels/rtcore_config.h.in 2022-06-14 22:13:52 -0600
+++ embree-3.13.4/kernels/rtcore_config.h.in 2022-06-24 15:20:12 -0600
@@ -14,6 +14,7 @@
#cmakedefine01 EMBREE_MIN_WIDTH
#define RTC_MIN_WIDTH EMBREE_MIN_WIDTH
+#cmakedefine EMBREE_STATIC_LIB
#cmakedefine EMBREE_API_NAMESPACE
#if defined(EMBREE_API_NAMESPACE)
diff --git a/kernels/CMakeLists.txt b/kernels/CMakeLists.txt
index 7c2f43d..106b1d5 100644
--- a/kernels/CMakeLists.txt
+++ b/kernels/CMakeLists.txt
@@ -201,6 +201,12 @@ embree_files(EMBREE_LIBRARY_FILES_AVX512 ${AVX512})
@@ -208,6 +208,12 @@ embree_files(EMBREE_LIBRARY_FILES_AVX512 ${AVX512})
#message("AVX2: ${EMBREE_LIBRARY_FILES_AVX2}")
#message("AVX512: ${EMBREE_LIBRARY_FILES_AVX512}")
@ -26,7 +15,7 @@ index 7c2f43d..106b1d5 100644
# replaces all .cpp files with a dummy file that includes that .cpp file
# this is to work around an ICC name mangling issue related to lambda functions under windows
MACRO (CreateISADummyFiles list isa)
@@ -277,7 +283,7 @@ IF (EMBREE_ISA_AVX AND EMBREE_LIBRARY_FILES_AVX)
@@ -311,7 +317,7 @@ IF (EMBREE_ISA_AVX AND EMBREE_LIBRARY_FILES_AVX)
ENDIF()
ENDIF()
@ -35,3 +24,128 @@ index 7c2f43d..106b1d5 100644
DISABLE_STACK_PROTECTOR_FOR_INTERSECTORS(${EMBREE_LIBRARY_FILES_AVX2})
ADD_LIBRARY(embree_avx2 STATIC ${EMBREE_LIBRARY_FILES_AVX2})
TARGET_LINK_LIBRARIES(embree_avx2 PRIVATE tasking)
diff --git a/include/embree4/rtcore_device.h b/include/embree4/rtcore_device.h
index 45bf95583..62ee7787d 100644
--- a/include/embree4/rtcore_device.h
+++ b/include/embree4/rtcore_device.h
@@ -55,6 +55,7 @@ enum RTCDeviceProperty
RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED = 66,
RTC_DEVICE_PROPERTY_IGNORE_INVALID_RAYS_ENABLED = 67,
RTC_DEVICE_PROPERTY_COMPACT_POLYS_ENABLED = 68,
+ RTC_DEVICE_PROPERTY_BACKFACE_CULLING_SPHERES_ENABLED = 69,
RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED = 96,
RTC_DEVICE_PROPERTY_QUAD_GEOMETRY_SUPPORTED = 97,
diff --git a/kernels/common/device.cpp b/kernels/common/device.cpp
index 3ffac7e37..215ccc961 100644
--- a/kernels/common/device.cpp
+++ b/kernels/common/device.cpp
@@ -170,6 +170,9 @@ namespace embree
#if defined (EMBREE_BACKFACE_CULLING_CURVES)
v += "backfacecullingcurves ";
#endif
+#if defined (EMBREE_BACKFACE_CULLING_SPHERES)
+ v += "backfacecullingspheres ";
+#endif
#if defined(EMBREE_FILTER_FUNCTION)
v += "intersection_filter ";
#endif
@@ -477,6 +480,12 @@ namespace embree
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_CURVES_ENABLED: return 0;
#endif
+#if defined(EMBREE_BACKFACE_CULLING_SPHERES)
+ case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_SPHERES_ENABLED: return 1;
+#else
+ case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_SPHERES_ENABLED: return 0;
+#endif
+
#if defined(EMBREE_COMPACT_POLYS)
case RTC_DEVICE_PROPERTY_COMPACT_POLYS_ENABLED: return 1;
#else
diff --git a/kernels/config.h.in b/kernels/config.h.in
index f02c90360..ba9acde56 100644
--- a/kernels/config.h.in
+++ b/kernels/config.h.in
@@ -5,6 +5,7 @@
#cmakedefine EMBREE_STAT_COUNTERS
#cmakedefine EMBREE_BACKFACE_CULLING
#cmakedefine EMBREE_BACKFACE_CULLING_CURVES
+#cmakedefine EMBREE_BACKFACE_CULLING_SPHERES
#cmakedefine EMBREE_FILTER_FUNCTION
#cmakedefine EMBREE_IGNORE_INVALID_RAYS
#cmakedefine EMBREE_GEOMETRY_TRIANGLE
diff --git a/kernels/geometry/sphere_intersector.h b/kernels/geometry/sphere_intersector.h
index 074f910a2..30f490818 100644
--- a/kernels/geometry/sphere_intersector.h
+++ b/kernels/geometry/sphere_intersector.h
@@ -106,8 +106,13 @@ namespace embree
const vbool<M> valid_front = valid & (ray.tnear() <= t_front) & (t_front <= ray.tfar);
const vbool<M> valid_back = valid & (ray.tnear() <= t_back ) & (t_back <= ray.tfar);
+#if defined (EMBREE_BACKFACE_CULLING_SPHERES)
+ /* check if there is a first hit */
+ const vbool<M> valid_first = valid_front;
+#else
/* check if there is a first hit */
const vbool<M> valid_first = valid_front | valid_back;
+#endif
if (unlikely(none(valid_first)))
return false;
@@ -120,7 +125,8 @@ namespace embree
/* invoke intersection filter for first hit */
const bool is_hit_first = epilog(valid_first, hit);
-
+
+#if !defined (EMBREE_BACKFACE_CULLING_SPHERES)
/* check for possible second hits before potentially accepted hit */
const vfloat<M> t_second = t_back;
const vbool<M> valid_second = valid_front & valid_back & (t_second <= ray.tfar);
@@ -131,7 +137,9 @@ namespace embree
const Vec3vf<M> Ng_second = td_back * ray_dir - perp;
hit = SphereIntersectorHitM<M> (t_second, Ng_second);
const bool is_hit_second = epilog(valid_second, hit);
-
+#else
+ constexpr bool is_hit_second = false;
+#endif
return is_hit_first | is_hit_second;
}
@@ -186,8 +194,13 @@ namespace embree
const vbool<M> valid_front = valid & (ray.tnear()[k] <= t_front) & (t_front <= ray.tfar[k]);
const vbool<M> valid_back = valid & (ray.tnear()[k] <= t_back ) & (t_back <= ray.tfar[k]);
+#if defined (EMBREE_BACKFACE_CULLING_SPHERES)
+ /* check if there is a first hit */
+ const vbool<M> valid_first = valid_front;
+#else
/* check if there is a first hit */
const vbool<M> valid_first = valid_front | valid_back;
+#endif
if (unlikely(none(valid_first)))
return false;
@@ -200,7 +213,8 @@ namespace embree
/* invoke intersection filter for first hit */
const bool is_hit_first = epilog(valid_first, hit);
-
+
+#if !defined (EMBREE_BACKFACE_CULLING_SPHERES)
/* check for possible second hits before potentially accepted hit */
const vfloat<M> t_second = t_back;
const vbool<M> valid_second = valid_front & valid_back & (t_second <= ray.tfar[k]);
@@ -211,7 +225,9 @@ namespace embree
const Vec3vf<M> Ng_second = td_back * ray_dir - perp;
hit = SphereIntersectorHitM<M> (t_second, Ng_second);
const bool is_hit_second = epilog(valid_second, hit);
-
+#else
+ constexpr bool is_hit_second = false;
+#endif
return is_hit_first | is_hit_second;
}
};

View File

@ -23,35 +23,81 @@ SET(_embree_SEARCH_DIRS
FIND_PATH(EMBREE_INCLUDE_DIR
NAMES
embree3/rtcore.h
embree4/rtcore.h
HINTS
${_embree_SEARCH_DIRS}
PATH_SUFFIXES
include
)
IF(NOT (("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64") OR (APPLE AND ("${CMAKE_OSX_ARCHITECTURES}" STREQUAL "arm64"))))
SET(_embree_SIMD_COMPONENTS
embree_sse42
embree_avx
embree_avx2
IF(EMBREE_INCLUDE_DIR)
SET(EMBREE_MAJOR_VERSION 4)
ELSE()
SET(EMBREE_MAJOR_VERSION 3)
FIND_PATH(EMBREE_INCLUDE_DIR
NAMES
embree3/rtcore.h
HINTS
${_embree_SEARCH_DIRS}
PATH_SUFFIXES
include
)
ENDIF()
SET(_embree_FIND_COMPONENTS
embree3
${_embree_SIMD_COMPONENTS}
lexers
math
simd
sys
tasking
)
IF(EMBREE_INCLUDE_DIR)
xavierh marked this conversation as resolved Outdated

Should this check if EMBREE_INCLUDE_DIR was found, to gracefully handle Embree not being found?

Should this check if EMBREE_INCLUDE_DIR was found, to gracefully handle Embree not being found?

ok yes, I've just pushed this change.

ok yes, I've just pushed this change.
FILE(READ ${EMBREE_INCLUDE_DIR}/embree${EMBREE_MAJOR_VERSION}/rtcore_config.h _embree_config_header)
IF(_embree_config_header MATCHES "#define EMBREE_STATIC_LIB")
SET(EMBREE_STATIC_LIB TRUE)
ELSE()
SET(EMBREE_STATIC_LIB FALSE)
ENDIF()
IF(_embree_config_header MATCHES "#define EMBREE_SYCL_SUPPORT")
SET(EMBREE_SYCL_SUPPORT TRUE)
ELSE()
SET(EMBREE_SYCL_SUPPORT FALSE)
ENDIF()
UNSET(_embree_config_header)
ENDIF()
IF(EMBREE_STATIC_LIB)
IF(NOT (("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64") OR (APPLE AND ("${CMAKE_OSX_ARCHITECTURES}" STREQUAL "arm64"))))
SET(_embree_SIMD_COMPONENTS
embree_sse42
embree_avx
embree_avx2
)
ENDIF()
IF(EMBREE_SYCL_SUPPORT)
SET(_embree_GPU_COMPONENTS
embree4_sycl
embree_rthwif
)
ENDIF()
SET(_embree_FIND_COMPONENTS
embree${EMBREE_MAJOR_VERSION}
${_embree_SIMD_COMPONENTS}
${_embree_GPU_COMPONENTS}
lexers
math
simd
sys
tasking
xavierh marked this conversation as resolved Outdated

This logic is still rather fuzzy to me. It seems like it will prefer embree3 over embree4, and break before embree_rthwif is found?

Can you detect the Embree version first (e.g. with the existence of a header file, or parsing the version number from a header file), and then set the libraries to search for based on that?

This logic is still rather fuzzy to me. It seems like it will prefer embree3 over embree4, and break before `embree_rthwif` is found? Can you detect the Embree version first (e.g. with the existence of a header file, or parsing the version number from a header file), and then set the libraries to search for based on that?

The whole foreach is for the case in which there are no static libraries, as initially.
In that case, there is no embree_rthwif to find and either EMBREE_EMBREE4_LIBRARY or EMBREE_EMBREE3_LIBRARY will exist but not both.
We can determine embree version with the include_dir search (embree4/rtcore.h vs embree3/rtcore.h)

The whole foreach is for the case in which there are no static libraries, as initially. In that case, there is no embree_rthwif to find and either EMBREE_EMBREE4_LIBRARY or EMBREE_EMBREE3_LIBRARY will exist but not both. We can determine embree version with the include_dir search (embree4/rtcore.h vs embree3/rtcore.h)
)
ELSE()
SET(_embree_FIND_COMPONENTS
embree${EMBREE_MAJOR_VERSION}
)
IF(EMBREE_SYCL_SUPPORT)
LIST(APPEND _embree_FIND_COMPONENTS
embree4_sycl
)
ENDIF()
ENDIF()
SET(_embree_LIBRARIES)
FOREACH(COMPONENT ${_embree_FIND_COMPONENTS})
STRING(TOUPPER ${COMPONENT} UPPERCOMPONENT)
FIND_LIBRARY(EMBREE_${UPPERCOMPONENT}_LIBRARY
NAMES
${COMPONENT}
@ -60,18 +106,9 @@ FOREACH(COMPONENT ${_embree_FIND_COMPONENTS})
PATH_SUFFIXES
lib64 lib
xavierh marked this conversation as resolved Outdated

I'd prefer to have EMBREE_LIBRARIES_GPU instead of this, so we can link to them depending if oneAPI is used.

I'd prefer to have `EMBREE_LIBRARIES_GPU` instead of this, so we can link to them depending if oneAPI is used.

We can't, when build with dpcpp it'll have sycl references in sys.lib and embree4.lib, regardless if oneAPI is ON or OFF, we're gonna have to link sycl.

We can't, when build with dpcpp it'll have sycl references in sys.lib and embree4.lib, regardless if oneAPI is ON or OFF, we're gonna have to link sycl.
)
IF(NOT EMBREE_${UPPERCOMPONENT}_LIBRARY)
IF(EMBREE_EMBREE3_LIBRARY)
# If we can't find all the static libraries, try to fall back to the shared library if found.
# This allows building with a shared embree library
SET(_embree_LIBRARIES ${EMBREE_EMBREE3_LIBRARY})
BREAK()
ENDIF()
ENDIF()
LIST(APPEND _embree_LIBRARIES "${EMBREE_${UPPERCOMPONENT}_LIBRARY}")
ENDFOREACH()
# handle the QUIETLY and REQUIRED arguments and set EMBREE_FOUND to TRUE if
# all listed variables are TRUE
INCLUDE(FindPackageHandleStandardArgs)
@ -85,6 +122,9 @@ ENDIF()
MARK_AS_ADVANCED(
EMBREE_INCLUDE_DIR
EMBREE_MAJOR_VERSION
EMBREE_SYCL_SUPPORT
EMBREE_STATIC_LIB
)
FOREACH(COMPONENT ${_embree_FIND_COMPONENTS})

View File

@ -108,6 +108,7 @@ FIND_PACKAGE_HANDLE_STANDARD_ARGS(SYCL
IF(SYCL_FOUND)
SET(SYCL_INCLUDE_DIR ${SYCL_INCLUDE_DIR} ${SYCL_INCLUDE_DIR}/sycl)
SET(SYCL_LIBRARIES ${SYCL_LIBRARY})
ELSE()
SET(SYCL_SYCL_FOUND FALSE)
ENDIF()

View File

@ -330,6 +330,7 @@ if(WITH_CYCLES AND WITH_CYCLES_EMBREE)
endforeach()
set(EMBREE_LIBRARIES ${_embree_libraries_force_load})
endif()
add_bundled_libraries(embree/lib)
if(WITH_OPENIMAGEDENOISE)
find_package(OpenImageDenoise REQUIRED)

View File

@ -317,7 +317,7 @@ if(WITH_CYCLES AND WITH_CYCLES_OSL)
endif()
endif()
if(WITH_CYCLES AND WITH_CYCLES_DEVICE_ONEAPI)
if(WITH_CYCLES AND (WITH_CYCLES_DEVICE_ONEAPI OR (WITH_CYCLES_EMBREE AND EMBREE_SYCL_SUPPORT)))
xavierh marked this conversation as resolved Outdated

Can we not link to embree_rthwif and embree4_sycl instead of this? Or does embree4 have a dependency on this?

Can we not link to `embree_rthwif` and `embree4_sycl` instead of this? Or does `embree4` have a dependency on this?

embree4 depends on embree_rthwif and embree4_sycl when it's built with GPU support.

embree4 depends on embree_rthwif and embree4_sycl when it's built with GPU support.

Ok, it should check WITH_CYCLES_EMBREE as well then.

Ok, it should check `WITH_CYCLES_EMBREE` as well then.
set(CYCLES_LEVEL_ZERO ${LIBDIR}/level-zero CACHE PATH "Path to Level Zero installation")
if(EXISTS ${CYCLES_LEVEL_ZERO} AND NOT LEVEL_ZERO_ROOT_DIR)
set(LEVEL_ZERO_ROOT_DIR ${CYCLES_LEVEL_ZERO})
@ -453,6 +453,7 @@ add_bundled_libraries(opencolorio/lib)
if(WITH_CYCLES AND WITH_CYCLES_EMBREE)
find_package(Embree 3.8.0 REQUIRED)
endif()
add_bundled_libraries(embree/lib)
if(WITH_OPENIMAGEDENOISE)
find_package_wrapper(OpenImageDenoise)

View File

@ -850,27 +850,75 @@ endif()
if(WITH_CYCLES AND WITH_CYCLES_EMBREE)
windows_find_package(Embree)
if(NOT Embree_FOUND)
set(EMBREE_ROOT_DIR ${LIBDIR}/embree)
set(EMBREE_INCLUDE_DIRS ${LIBDIR}/embree/include)
xavierh marked this conversation as resolved Outdated

Is the plan to switch Embree to be a shared library? Since we generally don't have code for both cases in Windows, rather just what we actually use.

I'd be fine with, as we have other shared libraries as well now and maybe it's easier. Not sure if there are any performance implications.

Is the plan to switch Embree to be a shared library? Since we generally don't have code for both cases in Windows, rather just what we actually use. I'd be fine with, as we have other shared libraries as well now and maybe it's easier. Not sure if there are any performance implications.

It's not really a plan but there were already traces for support of Embree as shared lib in the current codebase, and it got useful to me to debug one issue coming from static linking, so I kept it working.

It's not really a plan but there were already traces for support of Embree as shared lib in the current codebase, and it got useful to me to debug one issue coming from static linking, so I kept it working.

as discussed, I now let it be built as a shared library.

as discussed, I now let it be built as a shared library.
set(EMBREE_LIBRARIES
optimized ${LIBDIR}/embree/lib/embree3.lib
optimized ${LIBDIR}/embree/lib/embree_avx2.lib
optimized ${LIBDIR}/embree/lib/embree_avx.lib
optimized ${LIBDIR}/embree/lib/embree_sse42.lib
optimized ${LIBDIR}/embree/lib/lexers.lib
optimized ${LIBDIR}/embree/lib/math.lib
optimized ${LIBDIR}/embree/lib/simd.lib
optimized ${LIBDIR}/embree/lib/sys.lib
optimized ${LIBDIR}/embree/lib/tasking.lib
debug ${LIBDIR}/embree/lib/embree3_d.lib
debug ${LIBDIR}/embree/lib/embree_avx2_d.lib
debug ${LIBDIR}/embree/lib/embree_avx_d.lib
debug ${LIBDIR}/embree/lib/embree_sse42_d.lib
debug ${LIBDIR}/embree/lib/lexers_d.lib
debug ${LIBDIR}/embree/lib/math_d.lib
debug ${LIBDIR}/embree/lib/simd_d.lib
debug ${LIBDIR}/embree/lib/sys_d.lib
debug ${LIBDIR}/embree/lib/tasking_d.lib
if(EXISTS ${LIBDIR}/embree/include/embree4/rtcore_config.h)
set(EMBREE_MAJOR_VERSION 4)
else()
set(EMBREE_MAJOR_VERSION 3)
endif()
file(READ ${LIBDIR}/embree/include/embree${EMBREE_MAJOR_VERSION}/rtcore_config.h _embree_config_header)
if(_embree_config_header MATCHES "#define EMBREE_STATIC_LIB")
set(EMBREE_STATIC_LIB TRUE)
else()
set(EMBREE_STATIC_LIB FALSE)
endif()
if(_embree_config_header MATCHES "#define EMBREE_SYCL_SUPPORT")
set(EMBREE_SYCL_SUPPORT TRUE)
else()
set(EMBREE_SYCL_SUPPORT FALSE)
endif()
set(EMBREE_LIBRARIES
optimized ${LIBDIR}/embree/lib/embree${EMBREE_MAJOR_VERSION}.lib
debug ${LIBDIR}/embree/lib/embree${EMBREE_MAJOR_VERSION}_d.lib
)
if(EMBREE_SYCL_SUPPORT)
set(EMBREE_LIBRARIES
${EMBREE_LIBRARIES}
optimized ${LIBDIR}/embree/lib/embree4_sycl.lib
debug ${LIBDIR}/embree/lib/embree4_sycl_d.lib
)
endif()
if(EMBREE_STATIC_LIB)
set(EMBREE_LIBRARIES
${EMBREE_LIBRARIES}
optimized ${LIBDIR}/embree/lib/embree_avx2.lib
optimized ${LIBDIR}/embree/lib/embree_avx.lib
optimized ${LIBDIR}/embree/lib/embree_sse42.lib
optimized ${LIBDIR}/embree/lib/lexers.lib
optimized ${LIBDIR}/embree/lib/math.lib
optimized ${LIBDIR}/embree/lib/simd.lib
optimized ${LIBDIR}/embree/lib/sys.lib
optimized ${LIBDIR}/embree/lib/tasking.lib
debug ${LIBDIR}/embree/lib/embree_avx2_d.lib
debug ${LIBDIR}/embree/lib/embree_avx_d.lib
debug ${LIBDIR}/embree/lib/embree_sse42_d.lib
debug ${LIBDIR}/embree/lib/lexers_d.lib
debug ${LIBDIR}/embree/lib/math_d.lib
debug ${LIBDIR}/embree/lib/simd_d.lib
debug ${LIBDIR}/embree/lib/sys_d.lib
debug ${LIBDIR}/embree/lib/tasking_d.lib
)
if(EMBREE_SYCL_SUPPORT)
set(EMBREE_LIBRARIES
${EMBREE_LIBRARIES}
optimized ${LIBDIR}/embree/lib/embree_rthwif.lib
debug ${LIBDIR}/embree/lib/embree_rthwif_d.lib
)
endif()
endif()
endif()
if(NOT EMBREE_STATIC_LIB)
list(APPEND PLATFORM_BUNDLED_LIBRARIES
xavierh marked this conversation as resolved Outdated

PLATFORM_BUNDLED_LIBRARIES wasn't really meant to work yet on windows and just dropped the libs right next to the blender binary. I added support for using the blender.shared folder in #106348 , this does however mean that the final filenames need to be known at configure time, which precludes the use of generator expressions.

list(APPEND PLATFORM_BUNDLED_LIBRARIES 
  RELEASE ${EMBREE_ROOT_DIR}/bin/embree${EMBREE_MAJOR_VERSION}.dll
  DEBUG ${EMBREE_ROOT_DIR}/bin/embree${EMBREE_MAJOR_VERSION}_d.dll
)

should do the trick (also merge main, as #106348 only landed minutes ago)

`PLATFORM_BUNDLED_LIBRARIES` wasn't really meant to work yet on windows and just dropped the libs right next to the blender binary. I added support for using the `blender.shared` folder in #106348 , this does however mean that the final filenames need to be known at configure time, which precludes the use of generator expressions. ``` list(APPEND PLATFORM_BUNDLED_LIBRARIES RELEASE ${EMBREE_ROOT_DIR}/bin/embree${EMBREE_MAJOR_VERSION}.dll DEBUG ${EMBREE_ROOT_DIR}/bin/embree${EMBREE_MAJOR_VERSION}_d.dll ) ``` should do the trick (also merge main, as #106348 only landed minutes ago)
RELEASE ${EMBREE_ROOT_DIR}/bin/embree${EMBREE_MAJOR_VERSION}.dll
DEBUG ${EMBREE_ROOT_DIR}/bin/embree${EMBREE_MAJOR_VERSION}_d.dll
)
endif()
endif()
@ -1029,7 +1077,7 @@ endif()
set(ZSTD_INCLUDE_DIRS ${LIBDIR}/zstd/include)
set(ZSTD_LIBRARIES ${LIBDIR}/zstd/lib/zstd_static.lib)
if(WITH_CYCLES AND WITH_CYCLES_DEVICE_ONEAPI)
if(WITH_CYCLES AND (WITH_CYCLES_DEVICE_ONEAPI OR (WITH_CYCLES_EMBREE AND EMBREE_SYCL_SUPPORT)))
set(LEVEL_ZERO_ROOT_DIR ${LIBDIR}/level_zero)
set(CYCLES_SYCL ${LIBDIR}/dpcpp CACHE PATH "Path to oneAPI DPC++ compiler")
if(EXISTS ${CYCLES_SYCL} AND NOT SYCL_ROOT_DIR)
@ -1055,6 +1103,8 @@ if(WITH_CYCLES AND WITH_CYCLES_DEVICE_ONEAPI)
list(APPEND PLATFORM_BUNDLED_LIBRARIES ${_sycl_runtime_libraries})
unset(_sycl_runtime_libraries)
set(SYCL_LIBRARIES optimized ${SYCL_LIBRARY} debug ${SYCL_LIBRARY_DEBUG})
endif()

View File

@ -281,6 +281,7 @@ endif()
if(WITH_CYCLES_EMBREE)
add_definitions(-DWITH_EMBREE)
add_definitions(-DEMBREE_MAJOR_VERSION=${EMBREE_MAJOR_VERSION})
include_directories(
SYSTEM
${EMBREE_INCLUDE_DIRS}

View File

@ -61,6 +61,11 @@ if(WITH_CYCLES_EMBREE)
list(APPEND LIB
${EMBREE_LIBRARIES}
)
if(EMBREE_SYCL_SUPPORT)
list(APPEND LIB
${SYCL_LIBRARIES}
)
endif()
endif()
xavierh marked this conversation as resolved Outdated

Can this Windows specific logic be kept in platform_win32.cmake? We normally don't need to do this debug/optimized distinction in places like this.

Can this Windows specific logic be kept in `platform_win32.cmake`? We normally don't need to do this debug/optimized distinction in places like this.
cycles_add_library(cycles_bvh "${LIB}" ${SRC} ${SRC_HEADERS})

View File

@ -17,7 +17,11 @@
#ifdef WITH_EMBREE
# include <embree3/rtcore_geometry.h>
# if EMBREE_MAJOR_VERSION >= 4
# include <embree4/rtcore_geometry.h>
# else
# include <embree3/rtcore_geometry.h>
# endif
# include "bvh/embree.h"
@ -128,7 +132,11 @@ void BVHEmbree::build(Progress &progress, Stats *stats, RTCDevice rtc_device_)
scene = rtcNewScene(rtc_device);
const RTCSceneFlags scene_flags = (dynamic ? RTC_SCENE_FLAG_DYNAMIC : RTC_SCENE_FLAG_NONE) |
(compact ? RTC_SCENE_FLAG_COMPACT : RTC_SCENE_FLAG_NONE) |
RTC_SCENE_FLAG_ROBUST;
RTC_SCENE_FLAG_ROBUST
# if EMBREE_MAJOR_VERSION >= 4
| RTC_SCENE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS
# endif
;
rtcSetSceneFlags(scene, scene_flags);
build_quality = dynamic ? RTC_BUILD_QUALITY_LOW :
(params.use_spatial_split ? RTC_BUILD_QUALITY_HIGH :
@ -226,6 +234,9 @@ void BVHEmbree::add_instance(Object *ob, int i)
rtcSetGeometryUserData(geom_id, (void *)instance_bvh->scene);
rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());
# if EMBREE_MAJOR_VERSION >= 4
rtcSetGeometryEnableFilterFunctionFromArguments(geom_id, true);
# endif
rtcCommitGeometry(geom_id);
rtcAttachGeometryByID(scene, geom_id, i * 2);
@ -267,9 +278,13 @@ void BVHEmbree::add_triangles(const Object *ob, const Mesh *mesh, int i)
set_tri_vertex_buffer(geom_id, mesh, false);
rtcSetGeometryUserData(geom_id, (void *)prim_offset);
rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());
# if EMBREE_MAJOR_VERSION >= 4
rtcSetGeometryEnableFilterFunctionFromArguments(geom_id, true);
# else
rtcSetGeometryOccludedFilterFunction(geom_id, kernel_embree_filter_occluded_func);
rtcSetGeometryIntersectFilterFunction(geom_id, kernel_embree_filter_intersection_func);
rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());
# endif
rtcCommitGeometry(geom_id);
rtcAttachGeometryByID(scene, geom_id, i * 2);
@ -494,9 +509,13 @@ void BVHEmbree::add_points(const Object *ob, const PointCloud *pointcloud, int i
set_point_vertex_buffer(geom_id, pointcloud, false);
rtcSetGeometryUserData(geom_id, (void *)prim_offset);
rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());
# if EMBREE_MAJOR_VERSION >= 4
rtcSetGeometryEnableFilterFunctionFromArguments(geom_id, true);
# else
rtcSetGeometryIntersectFilterFunction(geom_id, kernel_embree_filter_func_backface_cull);
rtcSetGeometryOccludedFilterFunction(geom_id, kernel_embree_filter_occluded_func_backface_cull);
rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());
# endif
rtcCommitGeometry(geom_id);
rtcAttachGeometryByID(scene, geom_id, i * 2);
@ -553,6 +572,10 @@ void BVHEmbree::add_curves(const Object *ob, const Hair *hair, int i)
set_curve_vertex_buffer(geom_id, hair, false);
rtcSetGeometryUserData(geom_id, (void *)prim_offset);
rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());
# if EMBREE_MAJOR_VERSION >= 4
rtcSetGeometryEnableFilterFunctionFromArguments(geom_id, true);
# else
if (hair->curve_shape == CURVE_RIBBON) {
rtcSetGeometryIntersectFilterFunction(geom_id, kernel_embree_filter_intersection_func);
rtcSetGeometryOccludedFilterFunction(geom_id, kernel_embree_filter_occluded_func);
@ -562,7 +585,7 @@ void BVHEmbree::add_curves(const Object *ob, const Hair *hair, int i)
rtcSetGeometryOccludedFilterFunction(geom_id,
kernel_embree_filter_occluded_func_backface_cull);
}
rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());
# endif
rtcCommitGeometry(geom_id);
rtcAttachGeometryByID(scene, geom_id, i * 2 + 1);

View File

@ -6,8 +6,13 @@
#ifdef WITH_EMBREE
# include <embree3/rtcore.h>
# include <embree3/rtcore_scene.h>
# if EMBREE_MAJOR_VERSION >= 4
# include <embree4/rtcore.h>
# include <embree4/rtcore_scene.h>
# else
# include <embree3/rtcore.h>
# include <embree3/rtcore_scene.h>
# endif
# include "bvh/bvh.h"
# include "bvh/params.h"

View File

@ -176,12 +176,10 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
else()
set(cycles_kernel_oneapi_lib ${CMAKE_CURRENT_BINARY_DIR}/../kernel/libcycles_kernel_oneapi${cycles_kernel_oneapi_lib_suffix}.so)
endif()
list(APPEND LIB ${cycles_kernel_oneapi_lib})
if(WIN32)
list(APPEND LIB debug ${SYCL_LIBRARY_DEBUG} optimized ${SYCL_LIBRARY})
else()
list(APPEND LIB ${SYCL_LIBRARY})
endif()
list(APPEND LIB
${cycles_kernel_oneapi_lib}
${SYCL_LIBRARIES}
)
list(APPEND SRC
${SRC_ONEAPI}
)

View File

@ -14,7 +14,11 @@
#endif
#ifdef WITH_EMBREE
# include <embree3/rtcore.h>
# if EMBREE_MAJOR_VERSION >= 4
# include <embree4/rtcore.h>
# else
# include <embree3/rtcore.h>
# endif
#endif
#include "device/cpu/kernel.h"

View File

@ -11,7 +11,11 @@
#endif
#ifdef WITH_EMBREE
# include <embree3/rtcore.h>
# if EMBREE_MAJOR_VERSION >= 4
# include <embree4/rtcore.h>
# else
# include <embree3/rtcore.h>
# endif
#endif
#include "device/cpu/kernel.h"

View File

@ -5,8 +5,13 @@
#pragma once
#include <embree3/rtcore_ray.h>
#include <embree3/rtcore_scene.h>
#if EMBREE_MAJOR_VERSION >= 4
# include <embree4/rtcore_ray.h>
# include <embree4/rtcore_scene.h>
#else
# include <embree3/rtcore_ray.h>
# include <embree3/rtcore_scene.h>
#endif
#include "kernel/device/cpu/compat.h"
#include "kernel/device/cpu/globals.h"
@ -28,11 +33,84 @@ using numhit_t = uint8_t;
using numhit_t = uint32_t;
#endif
#define CYCLES_EMBREE_USED_FEATURES \
(RTCFeatureFlags)(RTC_FEATURE_FLAG_TRIANGLE | RTC_FEATURE_FLAG_INSTANCE | \
RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS | RTC_FEATURE_FLAG_POINT | \
RTC_FEATURE_FLAG_MOTION_BLUR | RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE | \
RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE)
#define EMBREE_IS_HAIR(x) (x & 1)
#if EMBREE_MAJOR_VERSION < 4
# define rtcGetGeometryUserDataFromScene(scene, id) \
(rtcGetGeometryUserData(rtcGetGeometry(scene, id)))
#endif
/* Intersection context. */
struct CCLIntersectContext {
struct CCLFirstHitContext
#if EMBREE_MAJOR_VERSION >= 4
: public RTCRayQueryContext
#endif
{
KernelGlobals kg;
/* For avoiding self intersections */
const Ray *ray;
};
struct CCLShadowContext
#if EMBREE_MAJOR_VERSION >= 4
: public RTCRayQueryContext
#endif
{
#if EMBREE_MAJOR_VERSION >= 4
KernelGlobals kg;
const Ray *ray;
#endif
IntegratorShadowState isect_s;
float throughput;
bool opaque_hit;
numhit_t max_hits;
numhit_t num_hits;
numhit_t num_recorded_hits;
};
struct CCLLocalContext
#if EMBREE_MAJOR_VERSION >= 4
: public RTCRayQueryContext
#endif
{
#if EMBREE_MAJOR_VERSION >= 4
KernelGlobals kg;
const Ray *ray;
numhit_t max_hits;
#endif
int local_object_id;
LocalIntersection *local_isect;
uint *lcg_state;
bool is_sss;
};
struct CCLVolumeContext
#if EMBREE_MAJOR_VERSION >= 4
: public RTCRayQueryContext
#endif
{
#if EMBREE_MAJOR_VERSION >= 4
KernelGlobals kg;
const Ray *ray;
numhit_t max_hits;
numhit_t num_hits;
#endif
Intersection *vol_isect;
};
#if EMBREE_MAJOR_VERSION < 4
struct CCLIntersectContext : public RTCIntersectContext,
public CCLFirstHitContext,
public CCLShadowContext,
public CCLLocalContext,
public CCLVolumeContext {
typedef enum {
RAY_REGULAR = 0,
RAY_SHADOW_ALL = 1,
@ -41,28 +119,8 @@ struct CCLIntersectContext {
RAY_VOLUME_ALL = 4,
} RayType;
KernelGlobals kg;
RayType type;
/* For avoiding self intersections */
const Ray *ray;
/* for shadow rays */
IntegratorShadowState isect_s;
numhit_t max_hits;
numhit_t num_hits;
numhit_t num_recorded_hits;
float throughput;
bool opaque_hit;
/* for SSS Rays: */
LocalIntersection *local_isect;
int local_object_id;
uint *lcg_state;
/* for Volume */
Intersection *vol_isect;
CCLIntersectContext(KernelGlobals kg_, RayType type_)
{
kg = kg_;
@ -79,17 +137,7 @@ struct CCLIntersectContext {
lcg_state = NULL;
}
};
class IntersectContext {
public:
IntersectContext(CCLIntersectContext *ctx)
{
rtcInitIntersectContext(&context);
userRayExt = ctx;
}
RTCIntersectContext context;
CCLIntersectContext *userRayExt;
};
#endif
/* Utilities. */
@ -173,13 +221,12 @@ ccl_device_inline void kernel_embree_convert_hit(KernelGlobals kg,
{
intptr_t prim_offset;
if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
rtcGetGeometry(kernel_data.device_bvh, hit->instID[0]));
prim_offset = intptr_t(rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)));
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserDataFromScene(kernel_data.device_bvh,
hit->instID[0]);
prim_offset = intptr_t(rtcGetGeometryUserDataFromScene(inst_scene, hit->geomID));
}
else {
prim_offset = intptr_t(
rtcGetGeometryUserData(rtcGetGeometry(kernel_data.device_bvh, hit->geomID)));
prim_offset = intptr_t(rtcGetGeometryUserDataFromScene(kernel_data.device_bvh, hit->geomID));
}
kernel_embree_convert_hit(kg, ray, hit, isect, prim_offset);
}
@ -211,7 +258,11 @@ ccl_device void kernel_embree_filter_intersection_func(const RTCFilterFunctionNA
assert(args->N == 1);
RTCHit *hit = (RTCHit *)args->hit;
CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
#if EMBREE_MAJOR_VERSION >= 4
CCLFirstHitContext *ctx = (CCLFirstHitContext *)(args->context);
#else
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
#endif
const KernelGlobalsCPU *kg = ctx->kg;
const Ray *cray = ctx->ray;
@ -226,205 +277,257 @@ ccl_device void kernel_embree_filter_intersection_func(const RTCFilterFunctionNA
* as well as filtering for volume objects happen here.
* Cycles' own BVH does that directly inside the traversal calls.
*/
ccl_device void kernel_embree_filter_occluded_func(const RTCFilterFunctionNArguments *args)
ccl_device void kernel_embree_filter_occluded_shadow_all_func(
const RTCFilterFunctionNArguments *args)
{
/* Current implementation in Cycles assumes only single-ray intersection queries. */
assert(args->N == 1);
RTCRay *ray = (RTCRay *)args->ray;
RTCHit *hit = (RTCHit *)args->hit;
CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
#if EMBREE_MAJOR_VERSION >= 4
CCLShadowContext *ctx = (CCLShadowContext *)(args->context);
#else
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
#endif
const KernelGlobalsCPU *kg = ctx->kg;
const Ray *cray = ctx->ray;
Intersection current_isect;
kernel_embree_convert_hit(
kg, ray, hit, &current_isect, reinterpret_cast<intptr_t>(args->geometryUserPtr));
if (intersection_skip_self_shadow(cray->self, current_isect.object, current_isect.prim)) {
*args->valid = 0;
return;
}
/* If no transparent shadows or max number of hits exceeded, all light is blocked. */
const int flags = intersection_get_shader_flags(kg, current_isect.prim, current_isect.type);
if (!(flags & (SD_HAS_TRANSPARENT_SHADOW)) || ctx->num_hits >= ctx->max_hits) {
ctx->opaque_hit = true;
return;
}
++ctx->num_hits;
/* Always use baked shadow transparency for curves. */
if (current_isect.type & PRIMITIVE_CURVE) {
ctx->throughput *= intersection_curve_shadow_transparency(
kg, current_isect.object, current_isect.prim, current_isect.type, current_isect.u);
if (ctx->throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
ctx->opaque_hit = true;
return;
}
else {
*args->valid = 0;
return;
}
}
/* Test if we need to record this transparent intersection. */
const numhit_t max_record_hits = min(ctx->max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
if (ctx->num_recorded_hits < max_record_hits) {
/* If maximum number of hits was reached, replace the intersection with the
* highest distance. We want to find the N closest intersections. */
const numhit_t num_recorded_hits = min(ctx->num_recorded_hits, max_record_hits);
numhit_t isect_index = num_recorded_hits;
if (num_recorded_hits + 1 >= max_record_hits) {
float max_t = INTEGRATOR_STATE_ARRAY(ctx->isect_s, shadow_isect, 0, t);
numhit_t max_recorded_hit = numhit_t(0);
for (numhit_t i = numhit_t(1); i < num_recorded_hits; ++i) {
const float isect_t = INTEGRATOR_STATE_ARRAY(ctx->isect_s, shadow_isect, i, t);
if (isect_t > max_t) {
max_recorded_hit = i;
max_t = isect_t;
}
}
if (num_recorded_hits >= max_record_hits) {
isect_index = max_recorded_hit;
}
/* Limit the ray distance and stop counting hits beyond this. */
ray->tfar = max(current_isect.t, max_t);
}
integrator_state_write_shadow_isect(ctx->isect_s, &current_isect, isect_index);
}
/* Always increase the number of recorded hits, even beyond the maximum,
* so that we can detect this and trace another ray if needed. */
++ctx->num_recorded_hits;
/* This tells Embree to continue tracing. */
*args->valid = 0;
}
ccl_device_forceinline void kernel_embree_filter_occluded_local_func(
const RTCFilterFunctionNArguments *args)
{
/* Current implementation in Cycles assumes only single-ray intersection queries. */
assert(args->N == 1);
const RTCRay *ray = (RTCRay *)args->ray;
RTCHit *hit = (RTCHit *)args->hit;
#if EMBREE_MAJOR_VERSION >= 4
CCLLocalContext *ctx = (CCLLocalContext *)(args->context);
#else
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
#endif
const KernelGlobalsCPU *kg = ctx->kg;
const Ray *cray = ctx->ray;
/* Check if it's hitting the correct object. */
Intersection current_isect;
if (ctx->is_sss) {
kernel_embree_convert_sss_hit(kg,
ray,
hit,
&current_isect,
ctx->local_object_id,
reinterpret_cast<intptr_t>(args->geometryUserPtr));
}
else {
kernel_embree_convert_hit(
kg, ray, hit, &current_isect, reinterpret_cast<intptr_t>(args->geometryUserPtr));
if (ctx->local_object_id != current_isect.object) {
/* This tells Embree to continue tracing. */
*args->valid = 0;
return;
}
}
if (intersection_skip_self_local(cray->self, current_isect.prim)) {
*args->valid = 0;
return;
}
/* No intersection information requested, just return a hit. */
if (ctx->max_hits == 0) {
return;
}
/* Ignore curves. */
if (EMBREE_IS_HAIR(hit->geomID)) {
/* This tells Embree to continue tracing. */
*args->valid = 0;
return;
}
LocalIntersection *local_isect = ctx->local_isect;
int hit_idx = 0;
if (ctx->lcg_state) {
/* See triangle_intersect_subsurface() for the native equivalent. */
for (int i = min((int)ctx->max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
if (local_isect->hits[i].t == ray->tfar) {
/* This tells Embree to continue tracing. */
*args->valid = 0;
return;
}
}
local_isect->num_hits++;
if (local_isect->num_hits <= ctx->max_hits) {
hit_idx = local_isect->num_hits - 1;
}
else {
/* reservoir sampling: if we are at the maximum number of
* hits, randomly replace element or skip it */
hit_idx = lcg_step_uint(ctx->lcg_state) % local_isect->num_hits;
if (hit_idx >= ctx->max_hits) {
/* This tells Embree to continue tracing. */
*args->valid = 0;
return;
}
}
}
else {
/* Record closest intersection only. */
if (local_isect->num_hits && current_isect.t > local_isect->hits[0].t) {
*args->valid = 0;
return;
}
local_isect->num_hits = 1;
}
/* record intersection */
local_isect->hits[hit_idx] = current_isect;
local_isect->Ng[hit_idx] = normalize(make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z));
/* This tells Embree to continue tracing. */
*args->valid = 0;
}
ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func(
const RTCFilterFunctionNArguments *args)
{
/* Current implementation in Cycles assumes only single-ray intersection queries. */
assert(args->N == 1);
const RTCRay *ray = (RTCRay *)args->ray;
RTCHit *hit = (RTCHit *)args->hit;
#if EMBREE_MAJOR_VERSION >= 4
CCLVolumeContext *ctx = (CCLVolumeContext *)(args->context);
#else
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
#endif
const KernelGlobalsCPU *kg = ctx->kg;
const Ray *cray = ctx->ray;
/* Append the intersection to the end of the array. */
if (ctx->num_hits < ctx->max_hits) {
Intersection current_isect;
kernel_embree_convert_hit(
kg, ray, hit, &current_isect, reinterpret_cast<intptr_t>(args->geometryUserPtr));
if (intersection_skip_self(cray->self, current_isect.object, current_isect.prim)) {
*args->valid = 0;
return;
}
Intersection *isect = &ctx->vol_isect[ctx->num_hits];
++ctx->num_hits;
*isect = current_isect;
/* Only primitives from volume object. */
uint tri_object = isect->object;
int object_flag = kernel_data_fetch(object_flag, tri_object);
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
--ctx->num_hits;
}
/* This tells Embree to continue tracing. */
*args->valid = 0;
}
}
#if EMBREE_MAJOR_VERSION < 4
ccl_device_forceinline void kernel_embree_filter_occluded_func(
const RTCFilterFunctionNArguments *args)
{
/* Current implementation in Cycles assumes only single-ray intersection queries. */
assert(args->N == 1);
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
switch (ctx->type) {
case CCLIntersectContext::RAY_SHADOW_ALL: {
Intersection current_isect;
kernel_embree_convert_hit(
kg, ray, hit, &current_isect, reinterpret_cast<intptr_t>(args->geometryUserPtr));
if (intersection_skip_self_shadow(cray->self, current_isect.object, current_isect.prim)) {
*args->valid = 0;
return;
}
/* If no transparent shadows or max number of hits exceeded, all light is blocked. */
const int flags = intersection_get_shader_flags(kg, current_isect.prim, current_isect.type);
if (!(flags & (SD_HAS_TRANSPARENT_SHADOW)) || ctx->num_hits >= ctx->max_hits) {
ctx->opaque_hit = true;
return;
}
++ctx->num_hits;
/* Always use baked shadow transparency for curves. */
if (current_isect.type & PRIMITIVE_CURVE) {
ctx->throughput *= intersection_curve_shadow_transparency(
kg, current_isect.object, current_isect.prim, current_isect.type, current_isect.u);
if (ctx->throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
ctx->opaque_hit = true;
return;
}
else {
*args->valid = 0;
return;
}
}
/* Test if we need to record this transparent intersection. */
const numhit_t max_record_hits = min(ctx->max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
if (ctx->num_recorded_hits < max_record_hits) {
/* If maximum number of hits was reached, replace the intersection with the
* highest distance. We want to find the N closest intersections. */
const numhit_t num_recorded_hits = min(ctx->num_recorded_hits, max_record_hits);
numhit_t isect_index = num_recorded_hits;
if (num_recorded_hits + 1 >= max_record_hits) {
float max_t = INTEGRATOR_STATE_ARRAY(ctx->isect_s, shadow_isect, 0, t);
numhit_t max_recorded_hit = numhit_t(0);
for (numhit_t i = numhit_t(1); i < num_recorded_hits; ++i) {
const float isect_t = INTEGRATOR_STATE_ARRAY(ctx->isect_s, shadow_isect, i, t);
if (isect_t > max_t) {
max_recorded_hit = i;
max_t = isect_t;
}
}
if (num_recorded_hits >= max_record_hits) {
isect_index = max_recorded_hit;
}
/* Limit the ray distance and stop counting hits beyond this. */
ray->tfar = max(current_isect.t, max_t);
}
integrator_state_write_shadow_isect(ctx->isect_s, &current_isect, isect_index);
}
/* Always increase the number of recorded hits, even beyond the maximum,
* so that we can detect this and trace another ray if needed. */
++ctx->num_recorded_hits;
/* This tells Embree to continue tracing. */
*args->valid = 0;
case CCLIntersectContext::RAY_SHADOW_ALL:
kernel_embree_filter_occluded_shadow_all_func(args);
break;
}
case CCLIntersectContext::RAY_LOCAL:
case CCLIntersectContext::RAY_SSS: {
/* Check if it's hitting the correct object. */
Intersection current_isect;
if (ctx->type == CCLIntersectContext::RAY_SSS) {
kernel_embree_convert_sss_hit(kg,
ray,
hit,
&current_isect,
ctx->local_object_id,
reinterpret_cast<intptr_t>(args->geometryUserPtr));
}
else {
kernel_embree_convert_hit(
kg, ray, hit, &current_isect, reinterpret_cast<intptr_t>(args->geometryUserPtr));
if (ctx->local_object_id != current_isect.object) {
/* This tells Embree to continue tracing. */
*args->valid = 0;
break;
}
}
if (intersection_skip_self_local(cray->self, current_isect.prim)) {
*args->valid = 0;
return;
}
/* No intersection information requested, just return a hit. */
if (ctx->max_hits == 0) {
break;
}
/* Ignore curves. */
if (EMBREE_IS_HAIR(hit->geomID)) {
/* This tells Embree to continue tracing. */
*args->valid = 0;
break;
}
LocalIntersection *local_isect = ctx->local_isect;
int hit_idx = 0;
if (ctx->lcg_state) {
/* See triangle_intersect_subsurface() for the native equivalent. */
for (int i = min((int)ctx->max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
if (local_isect->hits[i].t == ray->tfar) {
/* This tells Embree to continue tracing. */
*args->valid = 0;
return;
}
}
local_isect->num_hits++;
if (local_isect->num_hits <= ctx->max_hits) {
hit_idx = local_isect->num_hits - 1;
}
else {
/* reservoir sampling: if we are at the maximum number of
* hits, randomly replace element or skip it */
hit_idx = lcg_step_uint(ctx->lcg_state) % local_isect->num_hits;
if (hit_idx >= ctx->max_hits) {
/* This tells Embree to continue tracing. */
*args->valid = 0;
return;
}
}
}
else {
/* Record closest intersection only. */
if (local_isect->num_hits && current_isect.t > local_isect->hits[0].t) {
*args->valid = 0;
return;
}
local_isect->num_hits = 1;
}
/* record intersection */
local_isect->hits[hit_idx] = current_isect;
local_isect->Ng[hit_idx] = normalize(make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z));
/* This tells Embree to continue tracing. */
*args->valid = 0;
case CCLIntersectContext::RAY_SSS:
kernel_embree_filter_occluded_local_func(args);
break;
}
case CCLIntersectContext::RAY_VOLUME_ALL: {
/* Append the intersection to the end of the array. */
if (ctx->num_hits < ctx->max_hits) {
Intersection current_isect;
kernel_embree_convert_hit(
kg, ray, hit, &current_isect, reinterpret_cast<intptr_t>(args->geometryUserPtr));
if (intersection_skip_self(cray->self, current_isect.object, current_isect.prim)) {
*args->valid = 0;
return;
}
Intersection *isect = &ctx->vol_isect[ctx->num_hits];
++ctx->num_hits;
*isect = current_isect;
/* Only primitives from volume object. */
uint tri_object = isect->object;
int object_flag = kernel_data_fetch(object_flag, tri_object);
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
--ctx->num_hits;
}
/* This tells Embree to continue tracing. */
*args->valid = 0;
}
case CCLIntersectContext::RAY_VOLUME_ALL:
kernel_embree_filter_occluded_volume_all_func(args);
break;
}
case CCLIntersectContext::RAY_REGULAR:
default:
if (kernel_embree_is_self_intersection(
kg, hit, cray, reinterpret_cast<intptr_t>(args->geometryUserPtr))) {
*args->valid = 0;
return;
}
/* We should never reach this point, because
* REGULAR intersection is handled in intersection filter. */
kernel_assert(false);
break;
}
}
@ -441,7 +544,7 @@ ccl_device void kernel_embree_filter_func_backface_cull(const RTCFilterFunctionN
return;
}
CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
CCLIntersectContext *ctx = ((CCLIntersectContext *)args->context);
const KernelGlobalsCPU *kg = ctx->kg;
const Ray *cray = ctx->ray;
@ -467,6 +570,8 @@ ccl_device void kernel_embree_filter_occluded_func_backface_cull(
kernel_embree_filter_occluded_func(args);
}
#endif
/* Scene intersection. */
ccl_device_intersect bool kernel_embree_intersect(KernelGlobals kg,
@ -475,12 +580,29 @@ ccl_device_intersect bool kernel_embree_intersect(KernelGlobals kg,
ccl_private Intersection *isect)
{
isect->t = ray->tmax;
#if EMBREE_MAJOR_VERSION >= 4
CCLFirstHitContext ctx;
rtcInitRayQueryContext(&ctx);
ctx.kg = kg;
#else
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
IntersectContext rtc_ctx(&ctx);
rtcInitIntersectContext(&ctx);
#endif
RTCRayHit ray_hit;
ctx.ray = ray;
kernel_embree_setup_rayhit(*ray, ray_hit, visibility);
rtcIntersect1(kernel_data.device_bvh, &rtc_ctx.context, &ray_hit);
#if EMBREE_MAJOR_VERSION >= 4
RTCIntersectArguments args;
rtcInitIntersectArguments(&args);
args.filter = (RTCFilterFunctionN)kernel_embree_filter_intersection_func;
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
args.context = &ctx;
rtcIntersect1(kernel_data.device_bvh, &ray_hit, &args);
#else
rtcIntersect1(kernel_data.device_bvh, &ctx, &ray_hit);
#endif
if (ray_hit.hit.geomID == RTC_INVALID_GEOMETRY_ID ||
ray_hit.hit.primID == RTC_INVALID_GEOMETRY_ID) {
return false;
@ -500,8 +622,16 @@ ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg,
{
const bool has_bvh = !(kernel_data_fetch(object_flag, local_object) &
SD_OBJECT_TRANSFORM_APPLIED);
# if EMBREE_MAJOR_VERSION >= 4
CCLLocalContext ctx;
rtcInitRayQueryContext(&ctx);
ctx.kg = kg;
# else
CCLIntersectContext ctx(kg,
has_bvh ? CCLIntersectContext::RAY_SSS : CCLIntersectContext::RAY_LOCAL);
rtcInitIntersectContext(&ctx);
# endif
ctx.is_sss = has_bvh;
ctx.lcg_state = lcg_state;
ctx.max_hits = max_hits;
ctx.ray = ray;
@ -510,36 +640,49 @@ ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg,
local_isect->num_hits = 0;
}
ctx.local_object_id = local_object;
IntersectContext rtc_ctx(&ctx);
RTCRay rtc_ray;
kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_ALL_VISIBILITY);
# if EMBREE_MAJOR_VERSION >= 4
RTCOccludedArguments args;
rtcInitOccludedArguments(&args);
args.filter = (RTCFilterFunctionN)(kernel_embree_filter_occluded_local_func);
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
args.context = &ctx;
# endif
/* If this object has its own BVH, use it. */
if (has_bvh) {
RTCGeometry geom = rtcGetGeometry(kernel_data.device_bvh, local_object * 2);
if (geom) {
float3 P = ray->P;
float3 dir = ray->D;
float3 idir = ray->D;
bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir);
float3 P = ray->P;
float3 dir = ray->D;
float3 idir = ray->D;
bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir);
rtc_ray.org_x = P.x;
rtc_ray.org_y = P.y;
rtc_ray.org_z = P.z;
rtc_ray.dir_x = dir.x;
rtc_ray.dir_y = dir.y;
rtc_ray.dir_z = dir.z;
rtc_ray.tnear = ray->tmin;
rtc_ray.tfar = ray->tmax;
RTCScene scene = (RTCScene)rtcGetGeometryUserData(geom);
kernel_assert(scene);
if (scene) {
rtcOccluded1(scene, &rtc_ctx.context, &rtc_ray);
}
rtc_ray.org_x = P.x;
rtc_ray.org_y = P.y;
rtc_ray.org_z = P.z;
rtc_ray.dir_x = dir.x;
rtc_ray.dir_y = dir.y;
rtc_ray.dir_z = dir.z;
rtc_ray.tnear = ray->tmin;
rtc_ray.tfar = ray->tmax;
RTCScene scene = (RTCScene)rtcGetGeometryUserDataFromScene(kernel_data.device_bvh,
local_object * 2);
kernel_assert(scene);
if (scene) {
# if EMBREE_MAJOR_VERSION >= 4
rtcOccluded1(scene, &rtc_ray, &args);
# else
rtcOccluded1(scene, &ctx, &rtc_ray);
# endif
}
}
else {
rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
# if EMBREE_MAJOR_VERSION >= 4
rtcOccluded1(kernel_data.device_bvh, &rtc_ray, &args);
# else
rtcOccluded1(kernel_data.device_bvh, &ctx, &rtc_ray);
# endif
}
/* rtcOccluded1 sets tfar to -inf if a hit was found. */
@ -556,14 +699,32 @@ ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
ccl_private uint *num_recorded_hits,
ccl_private float *throughput)
{
# if EMBREE_MAJOR_VERSION >= 4
CCLShadowContext ctx;
rtcInitRayQueryContext(&ctx);
ctx.kg = kg;
# else
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
rtcInitIntersectContext(&ctx);
# endif
ctx.num_hits = ctx.num_recorded_hits = numhit_t(0);
ctx.throughput = 1.0f;
ctx.opaque_hit = false;
ctx.isect_s = state;
ctx.max_hits = numhit_t(max_hits);
ctx.ray = ray;
IntersectContext rtc_ctx(&ctx);
RTCRay rtc_ray;
kernel_embree_setup_ray(*ray, rtc_ray, visibility);
rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
# if EMBREE_MAJOR_VERSION >= 4
RTCOccludedArguments args;
rtcInitOccludedArguments(&args);
args.filter = (RTCFilterFunctionN)kernel_embree_filter_occluded_shadow_all_func;
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
args.context = &ctx;
rtcOccluded1(kernel_data.device_bvh, &rtc_ray, &args);
# else
rtcOccluded1(kernel_data.device_bvh, &ctx, &rtc_ray);
# endif
*num_recorded_hits = ctx.num_recorded_hits;
*throughput = ctx.throughput;
@ -578,15 +739,30 @@ ccl_device_intersect uint kernel_embree_intersect_volume(KernelGlobals kg,
const uint max_hits,
const uint visibility)
{
# if EMBREE_MAJOR_VERSION >= 4
CCLVolumeContext ctx;
rtcInitRayQueryContext(&ctx);
ctx.kg = kg;
# else
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
rtcInitIntersectContext(&ctx);
# endif
ctx.vol_isect = isect;
ctx.max_hits = numhit_t(max_hits);
ctx.num_hits = numhit_t(0);
ctx.ray = ray;
IntersectContext rtc_ctx(&ctx);
RTCRay rtc_ray;
kernel_embree_setup_ray(*ray, rtc_ray, visibility);
rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
# if EMBREE_MAJOR_VERSION >= 4
RTCOccludedArguments args;
rtcInitOccludedArguments(&args);
args.filter = (RTCFilterFunctionN)kernel_embree_filter_occluded_volume_all_func;
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
args.context = &ctx;
rtcOccluded1(kernel_data.device_bvh, &rtc_ray, &args);
# else
rtcOccluded1(kernel_data.device_bvh, &ctx, &rtc_ray);
# endif
return ctx.num_hits;
}
#endif

View File

@ -4,8 +4,13 @@
#pragma once
#if !defined(__KERNEL_GPU__) && defined(WITH_EMBREE)
# include <embree3/rtcore.h>
# include <embree3/rtcore_scene.h>
# if EMBREE_MAJOR_VERSION >= 4
# include <embree4/rtcore.h>
# include <embree4/rtcore_scene.h>
# else
# include <embree3/rtcore.h>
# include <embree3/rtcore_scene.h>
# endif
# define __EMBREE__
#endif