Compare commits

..

27 Commits

Author SHA1 Message Date
aa9b976e9f Merge branch 'master' into temp-gpencil-automask 2022-11-08 16:29:56 +01:00
410b87ca78 Merge branch 'master' into temp-gpencil-automask 2022-11-07 16:12:16 +01:00
2b9994257f Merge branch 'master' into temp-gpencil-automask 2022-10-31 09:57:01 +01:00
a035861d9d Cleanup: Format 2022-10-27 16:55:34 +02:00
8712f952e0 Fix merge issues 2022-10-27 16:50:48 +02:00
28de4468d9 Merge branch 'master' into temp-gpencil-automask 2022-10-27 16:48:44 +02:00
9c717dcff8 Merge branch 'master' into temp-gpencil-automask 2022-10-25 09:51:38 +02:00
2c3ee8854f Merge branch 'master' into temp-gpencil-automask 2022-10-21 15:39:53 +02:00
e3ab75d17d Merge branch 'master' into temp-gpencil-automask 2022-10-18 10:15:46 +02:00
67e053b1fb Fix merge error 2022-10-17 16:59:46 +02:00
3150277d0c Merge branch 'master' into temp-gpencil-automask 2022-10-17 16:57:31 +02:00
7437b6b4bc Merge branch 'master' into temp-gpencil-automask 2022-10-14 16:28:18 +02:00
0dd98c54b9 GPencil: Change Pie shortcut to Shift+Alt+A 2022-10-11 11:28:09 +02:00
910b579aff GPencil: New Automasking Pie menu (Ctrl+Alt+A) 2022-10-10 19:59:18 +02:00
f1dbbbbd45 GPencil: Speed up checking 2022-10-10 17:41:42 +02:00
101819bdf9 GPencil: Make automasking to use AND logic
Now, the automasking only allows to sculpt if all masking conditions are met.
2022-10-10 16:47:14 +02:00
62b29f71b9 Merge branch 'master' into temp-gpencil-automask 2022-10-10 16:13:26 +02:00
3d2dc8c897 Merge branch 'master' into temp-gpencil-automask 2022-10-09 11:11:40 +02:00
3ebd01675e Merge branch 'master' into temp-gpencil-automask 2022-10-08 09:58:20 +02:00
9b24e5d54f GPencil: Change Tooltips and Menu Text 2022-10-07 18:07:22 +02:00
535662cd0d GPencil: Hide Advanced panel if empty 2022-10-07 17:24:33 +02:00
dc6281d87f GPencil: Fix bug for automasking stroke layer and material
If both layers modes are selected, the check was wrong.
2022-10-07 17:07:31 +02:00
ad87f81e38 GPencil: Add new Automasking popover
Also removed from Brush settings the automasking
2022-10-07 16:59:30 +02:00
8965118eec Merge branch 'master' into temp-gpencil-automask 2022-10-07 16:21:49 +02:00
71e26f6eff GPencil: New Automasking modes and fix bug
* New Layer and Material of the stroke Masking.
* Fixed Automasking with modifiers.
2022-10-06 22:29:45 +02:00
1e286d7f8f GPencil: Keep old bit value 2022-10-06 18:46:13 +02:00
2ec1e38b91 GPencil: Move the automasking options to ToolSettings
Still pending move out Brush panel
2022-10-06 18:38:19 +02:00
1225 changed files with 26824 additions and 61895 deletions

View File

@@ -162,7 +162,6 @@ PenaltyBreakString: 1000000
ForEachMacros:
- BEGIN_ANIMFILTER_SUBCHANNELS
- BKE_pbvh_vertex_iter_begin
- BKE_pbvh_face_iter_begin
- BLI_FOREACH_SPARSE_RANGE
- BLI_SMALLSTACK_ITER_BEGIN
- BMO_ITER

View File

@@ -257,12 +257,6 @@ if(UNIX AND NOT (APPLE OR HAIKU))
option(WITH_GHOST_WAYLAND_DYNLOAD "Enable runtime dynamic WAYLAND libraries loading" ON)
mark_as_advanced(WITH_GHOST_WAYLAND_DYNLOAD)
set(WITH_GHOST_WAYLAND_APP_ID "" CACHE STRING "\
The application ID used for Blender (use default when an empty string), \
this can be used to differentiate Blender instances by version or branch for example."
)
mark_as_advanced(WITH_GHOST_WAYLAND_APP_ID)
endif()
endif()
@@ -345,12 +339,8 @@ if(APPLE)
else()
set(WITH_COREAUDIO OFF)
endif()
if(NOT WIN32)
if(APPLE)
option(WITH_JACK "Enable JACK Support (http://www.jackaudio.org)" OFF)
else()
option(WITH_JACK "Enable JACK Support (http://www.jackaudio.org)" ON)
endif()
if(UNIX AND NOT APPLE)
option(WITH_JACK "Enable JACK Support (http://www.jackaudio.org)" ON)
option(WITH_JACK_DYNLOAD "Enable runtime dynamic JACK libraries loading" OFF)
else()
set(WITH_JACK OFF)
@@ -467,6 +457,7 @@ if(NOT APPLE)
option(WITH_CYCLES_CUDA_BINARIES "Build Cycles NVIDIA CUDA binaries" OFF)
set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 sm_86 compute_75 CACHE STRING "CUDA architectures to build binaries for")
option(WITH_CYCLES_CUBIN_COMPILER "Build cubins with nvrtc based compiler instead of nvcc" OFF)
option(WITH_CYCLES_CUDA_BUILD_SERIAL "Build cubins one after another (useful on machines with limited RAM)" OFF)
option(WITH_CUDA_DYNLOAD "Dynamically load CUDA libraries at runtime (for developers, makes cuda-gdb work)" ON)
@@ -474,6 +465,7 @@ if(NOT APPLE)
set(CYCLES_RUNTIME_OPTIX_ROOT_DIR "" CACHE PATH "Path to the OptiX SDK root directory. When set, this path will be used at runtime to compile OptiX kernels.")
mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
mark_as_advanced(WITH_CYCLES_CUBIN_COMPILER)
mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL)
mark_as_advanced(WITH_CUDA_DYNLOAD)
mark_as_advanced(OPTIX_ROOT_DIR)
@@ -484,7 +476,7 @@ endif()
if(NOT APPLE)
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" ON)
option(WITH_CYCLES_HIP_BINARIES "Build Cycles AMD HIP binaries" OFF)
set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx906 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx906 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 CACHE STRING "AMD HIP architectures to build binaries for")
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
endif()
@@ -1239,11 +1231,12 @@ if(WITH_OPENGL)
add_definitions(-DWITH_OPENGL)
endif()
#-----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Configure Vulkan.
if(WITH_VULKAN_BACKEND)
list(APPEND BLENDER_GL_LIBRARIES ${VULKAN_LIBRARIES})
add_definitions(-DWITH_VULKAN_BACKEND)
endif()
# -----------------------------------------------------------------------------

View File

@@ -40,15 +40,15 @@ ver-ocio:,ver-oiio:,ver-llvm:,ver-osl:,ver-osd:,ver-openvdb:,ver-xr-openxr:,ver-
force-all,force-python,force-boost,force-tbb,\
force-ocio,force-imath,force-openexr,force-oiio,force-llvm,force-osl,force-osd,force-openvdb,\
force-ffmpeg,force-opencollada,force-alembic,force-embree,force-oidn,force-usd,\
force-xr-openxr,force-level-zero,force-openpgl,\
force-xr-openxr,force-level-zero, force-openpgl,\
build-all,build-python,build-boost,build-tbb,\
build-ocio,build-imath,build-openexr,build-oiio,build-llvm,build-osl,build-osd,build-openvdb,\
build-ffmpeg,build-opencollada,build-alembic,build-embree,build-oidn,build-usd,\
build-xr-openxr,build-level-zero,build-openpgl,\
build-xr-openxr,build-level-zero, build-openpgl,\
skip-python,skip-boost,skip-tbb,\
skip-ocio,skip-imath,skip-openexr,skip-oiio,skip-llvm,skip-osl,skip-osd,skip-openvdb,\
skip-ffmpeg,skip-opencollada,skip-alembic,skip-embree,skip-oidn,skip-usd,\
skip-xr-openxr,skip-level-zero,skip-openpgl \
skip-xr-openxr,skip-level-zero, skip-openpgl \
-- "$@" \
)
@@ -602,9 +602,9 @@ LEVEL_ZERO_FORCE_BUILD=false
LEVEL_ZERO_FORCE_REBUILD=false
LEVEL_ZERO_SKIP=false
OPENPGL_VERSION="0.4.1"
OPENPGL_VERSION="0.4.0"
OPENPGL_VERSION_SHORT="0.4"
OPENPGL_VERSION_MIN="0.4.1"
OPENPGL_VERSION_MIN="0.3.1"
OPENPGL_VERSION_MEX="0.5"
OPENPGL_FORCE_BUILD=false
OPENPGL_FORCE_REBUILD=false

View File

@@ -1,59 +0,0 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright 2022 Blender Foundation.
# - Find MoltenVK libraries
# Find the MoltenVK includes and libraries
# This module defines
# MOLTENVK_INCLUDE_DIRS, where to find MoltenVK headers, Set when
# MOLTENVK_INCLUDE_DIR is found.
# MOLTENVK_LIBRARIES, libraries to link against to use MoltenVK.
# MOLTENVK_ROOT_DIR, The base directory to search for MoltenVK.
# This can also be an environment variable.
# MOLTENVK_FOUND, If false, do not try to use MoltenVK.
#
# If MOLTENVK_ROOT_DIR was defined in the environment, use it.
IF(NOT MOLTENVK_ROOT_DIR AND NOT $ENV{MOLTENVK_ROOT_DIR} STREQUAL "")
SET(MOLTENVK_ROOT_DIR $ENV{MOLTENVK_ROOT_DIR})
ENDIF()
SET(_moltenvk_SEARCH_DIRS
${MOLTENVK_ROOT_DIR}
${LIBDIR}/vulkan/MoltenVK
)
FIND_PATH(MOLTENVK_INCLUDE_DIR
NAMES
MoltenVK/vk_mvk_moltenvk.h
HINTS
${_moltenvk_SEARCH_DIRS}
PATH_SUFFIXES
include
)
FIND_LIBRARY(MOLTENVK_LIBRARY
NAMES
MoltenVK
HINTS
${_moltenvk_SEARCH_DIRS}
PATH_SUFFIXES
dylib/macOS
)
# handle the QUIETLY and REQUIRED arguments and set MOLTENVK_FOUND to TRUE if
# all listed variables are TRUE
INCLUDE(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(MoltenVK DEFAULT_MSG MOLTENVK_LIBRARY MOLTENVK_INCLUDE_DIR)
IF(MOLTENVK_FOUND)
SET(MOLTENVK_LIBRARIES ${MOLTENVK_LIBRARY})
SET(MOLTENVK_INCLUDE_DIRS ${MOLTENVK_INCLUDE_DIR})
ENDIF()
MARK_AS_ADVANCED(
MOLTENVK_INCLUDE_DIR
MOLTENVK_LIBRARY
)
UNSET(_moltenvk_SEARCH_DIRS)

View File

@@ -103,6 +103,10 @@ if(EXISTS ${SOURCE_DIR}/.git)
endif()
endif()
if(MY_WC_BRANCH MATCHES "^blender-v")
set(MY_WC_BRANCH "master")
endif()
unset(_git_below_check)
endif()

View File

@@ -1240,7 +1240,7 @@ endmacro()
macro(set_and_warn_library_found
_library_name _library_found _setting)
if(((NOT ${_library_found}) OR (NOT ${${_library_found}})) AND ${${_setting}})
if(NOT ${${_library_found}} AND ${${_setting}})
if(WITH_STRICT_BUILD_OPTIONS)
message(SEND_ERROR "${_library_name} required but not found")
else()

View File

@@ -106,8 +106,8 @@ if(WIN32)
set(CPACK_WIX_LIGHT_EXTRA_FLAGS -dcl:medium)
endif()
set(CPACK_PACKAGE_EXECUTABLES "blender-launcher" "Blender ${MAJOR_VERSION}.${MINOR_VERSION}")
set(CPACK_CREATE_DESKTOP_LINKS "blender-launcher" "Blender ${MAJOR_VERSION}.${MINOR_VERSION}")
set(CPACK_PACKAGE_EXECUTABLES "blender-launcher" "Blender")
set(CPACK_CREATE_DESKTOP_LINKS "blender-launcher" "Blender")
include(CPack)

View File

@@ -100,23 +100,6 @@ if(WITH_USD)
find_package(USD REQUIRED)
endif()
if(WITH_VULKAN_BACKEND)
find_package(MoltenVK REQUIRED)
if(EXISTS ${LIBDIR}/vulkan)
set(VULKAN_FOUND On)
set(VULKAN_ROOT_DIR ${LIBDIR}/vulkan/macOS)
set(VULKAN_INCLUDE_DIR ${VULKAN_ROOT_DIR}/include)
set(VULKAN_LIBRARY ${VULKAN_ROOT_DIR}/lib/libvulkan.1.dylib)
set(VULKAN_INCLUDE_DIRS ${VULKAN_INCLUDE_DIR} ${MOLTENVK_INCLUDE_DIRS})
set(VULKAN_LIBRARIES ${VULKAN_LIBRARY} ${MOLTENVK_LIBRARIES})
else()
message(WARNING "Vulkan SDK was not found, disabling WITH_VULKAN_BACKEND")
set(WITH_VULKAN_BACKEND OFF)
endif()
endif()
if(WITH_OPENSUBDIV)
find_package(OpenSubdiv)
endif()

View File

@@ -108,10 +108,6 @@ find_package_wrapper(ZLIB REQUIRED)
find_package_wrapper(Zstd REQUIRED)
find_package_wrapper(Epoxy REQUIRED)
if(WITH_VULKAN_BACKEND)
find_package_wrapper(Vulkan REQUIRED)
endif()
function(check_freetype_for_brotli)
include(CheckSymbolExists)
set(CMAKE_REQUIRED_INCLUDES ${FREETYPE_INCLUDE_DIRS})
@@ -326,10 +322,9 @@ if(WITH_CYCLES AND WITH_CYCLES_DEVICE_ONEAPI)
file(GLOB _sycl_runtime_libraries
${SYCL_ROOT_DIR}/lib/libsycl.so
${SYCL_ROOT_DIR}/lib/libsycl.so.*
${SYCL_ROOT_DIR}/lib/libpi_*.so
${SYCL_ROOT_DIR}/lib/libpi_level_zero.so
)
list(FILTER _sycl_runtime_libraries EXCLUDE REGEX ".*\.py")
list(REMOVE_ITEM _sycl_runtime_libraries "${SYCL_ROOT_DIR}/lib/libpi_opencl.so")
list(APPEND PLATFORM_BUNDLED_LIBRARIES ${_sycl_runtime_libraries})
unset(_sycl_runtime_libraries)
endif()
@@ -970,9 +965,16 @@ if(WITH_COMPILER_CCACHE)
endif()
endif()
# Always link with libatomic if available, as it is required for data types
# which don't have intrinsics.
function(configure_atomic_lib_if_needed)
# On some platforms certain atomic operations are not possible with assembly and/or intrinsics and
# they are emulated in software with locks. For example, on armel there is no intrinsics to grant
# 64 bit atomic operations and STL library uses libatomic to offload software emulation of atomics
# to.
# This function will check whether libatomic is required and if so will configure linker flags.
# If atomic operations are possible without libatomic then linker flags are left as-is.
function(CONFIGURE_ATOMIC_LIB_IF_NEEDED)
# Source which is used to enforce situation when software emulation of atomics is required.
# Assume that using 64bit integer gives a definitive answer (as in, if 64bit atomic operations
# are possible using assembly/intrinsics 8, 16, and 32 bit operations will also be possible.
set(_source
"#include <atomic>
#include <cstdint>
@@ -983,12 +985,25 @@ function(configure_atomic_lib_if_needed)
)
include(CheckCXXSourceCompiles)
set(CMAKE_REQUIRED_LIBRARIES atomic)
check_cxx_source_compiles("${_source}" ATOMIC_OPS_WITH_LIBATOMIC)
unset(CMAKE_REQUIRED_LIBRARIES)
check_cxx_source_compiles("${_source}" ATOMIC_OPS_WITHOUT_LIBATOMIC)
if(ATOMIC_OPS_WITH_LIBATOMIC)
set(PLATFORM_LINKFLAGS "${PLATFORM_LINKFLAGS} -latomic" PARENT_SCOPE)
if(NOT ATOMIC_OPS_WITHOUT_LIBATOMIC)
# Compilation of the test program has failed.
# Try it again with -latomic to see if this is what is needed, or whether something else is
# going on.
set(CMAKE_REQUIRED_LIBRARIES atomic)
check_cxx_source_compiles("${_source}" ATOMIC_OPS_WITH_LIBATOMIC)
unset(CMAKE_REQUIRED_LIBRARIES)
if(ATOMIC_OPS_WITH_LIBATOMIC)
set(PLATFORM_LINKFLAGS "${PLATFORM_LINKFLAGS} -latomic" PARENT_SCOPE)
else()
# Atomic operations are required part of Blender and it is not possible to process forward.
# We expect that either standard library or libatomic will make atomics to work. If both
# cases has failed something fishy o na bigger scope is going on.
message(FATAL_ERROR "Failed to detect required configuration for atomic operations")
endif()
endif()
endfunction()

View File

@@ -419,7 +419,7 @@ if(WITH_IMAGE_OPENEXR)
warn_hardcoded_paths(OpenEXR)
set(OPENEXR ${LIBDIR}/openexr)
set(OPENEXR_INCLUDE_DIR ${OPENEXR}/include)
set(OPENEXR_INCLUDE_DIRS ${OPENEXR_INCLUDE_DIR} ${IMATH_INCLUDE_DIRS} ${OPENEXR_INCLUDE_DIR}/OpenEXR)
set(OPENEXR_INCLUDE_DIRS ${OPENEXR_INCLUDE_DIR} ${IMATH_INCLUDE_DIRS} ${OPENEXR}/include/OpenEXR)
set(OPENEXR_LIBPATH ${OPENEXR}/lib)
# Check if the 3.x library name exists
# if not assume this is a 2.x library folder
@@ -568,8 +568,7 @@ if(WITH_OPENIMAGEIO)
if(NOT OpenImageIO_FOUND)
set(OPENIMAGEIO ${LIBDIR}/OpenImageIO)
set(OPENIMAGEIO_LIBPATH ${OPENIMAGEIO}/lib)
set(OPENIMAGEIO_INCLUDE_DIR ${OPENIMAGEIO}/include)
set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO_INCLUDE_DIR})
set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO}/include)
set(OIIO_OPTIMIZED optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO.lib optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util.lib)
set(OIIO_DEBUG debug ${OPENIMAGEIO_LIBPATH}/OpenImageIO_d.lib debug ${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util_d.lib)
set(OPENIMAGEIO_LIBRARIES ${OIIO_OPTIMIZED} ${OIIO_DEBUG})
@@ -786,14 +785,6 @@ if(WITH_CYCLES AND WITH_CYCLES_OSL)
endif()
find_path(OSL_INCLUDE_DIR OSL/oslclosure.h PATHS ${CYCLES_OSL}/include)
find_program(OSL_COMPILER NAMES oslc PATHS ${CYCLES_OSL}/bin)
file(STRINGS "${OSL_INCLUDE_DIR}/OSL/oslversion.h" OSL_LIBRARY_VERSION_MAJOR
REGEX "^[ \t]*#define[ \t]+OSL_LIBRARY_VERSION_MAJOR[ \t]+[0-9]+.*$")
file(STRINGS "${OSL_INCLUDE_DIR}/OSL/oslversion.h" OSL_LIBRARY_VERSION_MINOR
REGEX "^[ \t]*#define[ \t]+OSL_LIBRARY_VERSION_MINOR[ \t]+[0-9]+.*$")
string(REGEX REPLACE ".*#define[ \t]+OSL_LIBRARY_VERSION_MAJOR[ \t]+([.0-9]+).*"
"\\1" OSL_LIBRARY_VERSION_MAJOR ${OSL_LIBRARY_VERSION_MAJOR})
string(REGEX REPLACE ".*#define[ \t]+OSL_LIBRARY_VERSION_MINOR[ \t]+([.0-9]+).*"
"\\1" OSL_LIBRARY_VERSION_MINOR ${OSL_LIBRARY_VERSION_MINOR})
endif()
if(WITH_CYCLES AND WITH_CYCLES_EMBREE)
@@ -926,20 +917,6 @@ if(WITH_HARU)
set(HARU_LIBRARIES ${HARU_ROOT_DIR}/lib/libhpdfs.lib)
endif()
if(WITH_VULKAN_BACKEND)
if(EXISTS ${LIBDIR}/vulkan)
set(VULKAN_FOUND On)
set(VULKAN_ROOT_DIR ${LIBDIR}/vulkan)
set(VULKAN_INCLUDE_DIR ${VULKAN_ROOT_DIR}/include)
set(VULKAN_INCLUDE_DIRS ${VULKAN_INCLUDE_DIR})
set(VULKAN_LIBRARY ${VULKAN_ROOT_DIR}/lib/vulkan-1.lib)
set(VULKAN_LIBRARIES ${VULKAN_LIBRARY})
else()
message(WARNING "Vulkan SDK was not found, disabling WITH_VULKAN_BACKEND")
set(WITH_VULKAN_BACKEND OFF)
endif()
endif()
if(WITH_CYCLES AND WITH_CYCLES_PATH_GUIDING)
find_package(openpgl QUIET)
if(openpgl_FOUND)
@@ -972,13 +949,7 @@ if(WITH_CYCLES AND WITH_CYCLES_DEVICE_ONEAPI)
endforeach()
unset(_sycl_runtime_libraries_glob)
file(GLOB _sycl_pi_runtime_libraries_glob
${SYCL_ROOT_DIR}/bin/pi_*.dll
)
list(REMOVE_ITEM _sycl_pi_runtime_libraries_glob "${SYCL_ROOT_DIR}/bin/pi_opencl.dll")
list (APPEND _sycl_runtime_libraries ${_sycl_pi_runtime_libraries_glob})
unset(_sycl_pi_runtime_libraries_glob)
list(APPEND _sycl_runtime_libraries ${SYCL_ROOT_DIR}/bin/pi_level_zero.dll)
list(APPEND PLATFORM_BUNDLED_LIBRARIES ${_sycl_runtime_libraries})
unset(_sycl_runtime_libraries)
endif()

View File

@@ -55,7 +55,7 @@ buildbot:
cuda11:
version: '11.4.1'
hip:
version: '5.3.22480'
version: '5.2.21440'
optix:
version: '7.3.0'
ocloc:

View File

@@ -35,41 +35,10 @@ from typing import (
Tuple,
)
# ------------------------------------------------------------------------------
# Long Description
long_description = """# Blender
[Blender](https://www.blender.org) is the free and open source 3D creation suite. It supports the entirety of the 3D pipeline—modeling, rigging, animation, simulation, rendering, compositing and motion tracking, even video editing.
This package provides Blender as a Python module for use in studio pipelines, web services, scientific research, and more.
## Documentation
* [Blender Python API](https://docs.blender.org/api/current/)
* [Blender as a Python Module](https://docs.blender.org/api/current/info_advanced_blender_as_bpy.html)
## Requirements
[System requirements](https://www.blender.org/download/requirements/) are the same as Blender.
Each Blender release supports one Python version, and the package is only compatible with that version.
## Source Code
* [Releases](https://download.blender.org/source/)
* Repository: [git.blender.org/blender.git](https://git.blender.org/gitweb/gitweb.cgi/blender.git)
## Credits
Created by the [Blender developer community](https://www.blender.org/about/credits/).
Thanks to Tyler Alden Gubala for maintaining the original version of this package."""
# ------------------------------------------------------------------------------
# Generic Functions
def find_dominating_file(
path: str,
search: Sequence[str],
@@ -226,8 +195,6 @@ def main() -> None:
options={"bdist_wheel": {"plat_name": platform_tag}},
description="Blender as a Python module",
long_description=long_description,
long_description_content_type='text/markdown',
license="GPL-3.0",
author="Blender Foundation",
author_email="bf-committers@blender.org",

View File

@@ -59,11 +59,10 @@ def svn_update(args: argparse.Namespace, release_version: Optional[str]) -> None
# Checkout precompiled libraries
if sys.platform == 'darwin':
# Check platform.version to detect arm64 with x86_64 python binary.
if platform.machine() == 'arm64' or ('ARM64' in platform.version()):
lib_platform = "darwin_arm64"
elif platform.machine() == 'x86_64':
if platform.machine() == 'x86_64':
lib_platform = "darwin"
elif platform.machine() == 'arm64':
lib_platform = "darwin_arm64"
else:
lib_platform = None
elif sys.platform == 'win32':

View File

@@ -870,26 +870,6 @@ an issue but, due to internal implementation details, currently are:
thus breaking any current iteration over ``Collection.all_objects``.
.. rubric:: Do not:
.. code-block:: python
# `all_objects` is an iterator. Using it directly while performing operations on its members that will update
# the memory accessed by the `all_objects` iterator will lead to invalid memory accesses and crashes.
for object in bpy.data.collections["Collection"].all_objects:
object.hide_viewport = True
.. rubric:: Do:
.. code-block:: python
# `all_objects[:]` is an independent list generated from the iterator. As long as no objects are deleted,
# its content will remain valid even if the data accessed by the `all_objects` iterator is modified.
for object in bpy.data.collections["Collection"].all_objects[:]:
object.hide_viewport = True
sys.exit
========

View File

@@ -1294,7 +1294,6 @@ def pycontext2sphinx(basepath):
type_descr = prop.get_type_description(
class_fmt=":class:`bpy.types.%s`",
mathutils_fmt=":class:`mathutils.%s`",
collection_id=_BPY_PROP_COLLECTION_ID,
enum_descr_override=enum_descr_override,
)
@@ -1447,7 +1446,6 @@ def pyrna2sphinx(basepath):
identifier = " %s" % prop.identifier
kwargs["class_fmt"] = ":class:`%s`"
kwargs["mathutils_fmt"] = ":class:`mathutils.%s`"
kwargs["collection_id"] = _BPY_PROP_COLLECTION_ID
@@ -1567,7 +1565,6 @@ def pyrna2sphinx(basepath):
type_descr = prop.get_type_description(
class_fmt=":class:`%s`",
mathutils_fmt=":class:`mathutils.%s`",
collection_id=_BPY_PROP_COLLECTION_ID,
enum_descr_override=enum_descr_override,
)
@@ -1634,7 +1631,6 @@ def pyrna2sphinx(basepath):
type_descr = prop.get_type_description(
as_ret=True, class_fmt=":class:`%s`",
mathutils_fmt=":class:`mathutils.%s`",
collection_id=_BPY_PROP_COLLECTION_ID,
enum_descr_override=enum_descr_override,
)

View File

@@ -91,7 +91,3 @@ endif()
if(WITH_COMPOSITOR_CPU)
add_subdirectory(smaa_areatex)
endif()
if(WITH_VULKAN_BACKEND)
add_subdirectory(vulkan_memory_allocator)
endif()

View File

@@ -27,7 +27,6 @@
#include <memory>
#include <vector>
#include <unordered_map>
#include <string>
AUD_NAMESPACE_BEGIN

View File

@@ -1,24 +0,0 @@
# SPDX-License-Identifier: GPL-2.0-or-later
# Copyright 2022 Blender Foundation. All rights reserved.
set(INC
.
)
set(INC_SYS
${VULKAN_INCLUDE_DIRS}
)
set(SRC
vk_mem_alloc_impl.cc
vk_mem_alloc.h
)
blender_add_lib(extern_vulkan_memory_allocator "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang")
target_compile_options(extern_vulkan_memory_allocator
PRIVATE "-Wno-nullability-completeness"
)
endif()

View File

@@ -1,19 +0,0 @@
Copyright (c) 2017-2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

View File

@@ -1,5 +0,0 @@
Project: VulkanMemoryAllocator
URL: https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator
License: MIT
Upstream version: a6bfc23
Local modifications: None

View File

@@ -1,175 +0,0 @@
# Vulkan Memory Allocator
Easy to integrate Vulkan memory allocation library.
**Documentation:** Browse online: [Vulkan Memory Allocator](https://gpuopen-librariesandsdks.github.io/VulkanMemoryAllocator/html/) (generated from Doxygen-style comments in [include/vk_mem_alloc.h](include/vk_mem_alloc.h))
**License:** MIT. See [LICENSE.txt](LICENSE.txt)
**Changelog:** See [CHANGELOG.md](CHANGELOG.md)
**Product page:** [Vulkan Memory Allocator on GPUOpen](https://gpuopen.com/gaming-product/vulkan-memory-allocator/)
**Build status:**
- Windows: [![Build status](https://ci.appveyor.com/api/projects/status/4vlcrb0emkaio2pn/branch/master?svg=true)](https://ci.appveyor.com/project/adam-sawicki-amd/vulkanmemoryallocator/branch/master)
- Linux: [![Build Status](https://app.travis-ci.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator.svg?branch=master)](https://app.travis-ci.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator)
[![Average time to resolve an issue](http://isitmaintained.com/badge/resolution/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator.svg)](http://isitmaintained.com/project/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator "Average time to resolve an issue")
# Problem
Memory allocation and resource (buffer and image) creation in Vulkan is difficult (comparing to older graphics APIs, like D3D11 or OpenGL) for several reasons:
- It requires a lot of boilerplate code, just like everything else in Vulkan, because it is a low-level and high-performance API.
- There is additional level of indirection: `VkDeviceMemory` is allocated separately from creating `VkBuffer`/`VkImage` and they must be bound together.
- Driver must be queried for supported memory heaps and memory types. Different GPU vendors provide different types of it.
- It is recommended to allocate bigger chunks of memory and assign parts of them to particular resources, as there is a limit on maximum number of memory blocks that can be allocated.
# Features
This library can help game developers to manage memory allocations and resource creation by offering some higher-level functions:
1. Functions that help to choose correct and optimal memory type based on intended usage of the memory.
- Required or preferred traits of the memory are expressed using higher-level description comparing to Vulkan flags.
2. Functions that allocate memory blocks, reserve and return parts of them (`VkDeviceMemory` + offset + size) to the user.
- Library keeps track of allocated memory blocks, used and unused ranges inside them, finds best matching unused ranges for new allocations, respects all the rules of alignment and buffer/image granularity.
3. Functions that can create an image/buffer, allocate memory for it and bind them together - all in one call.
Additional features:
- Well-documented - description of all functions and structures provided, along with chapters that contain general description and example code.
- Thread-safety: Library is designed to be used in multithreaded code. Access to a single device memory block referred by different buffers and textures (binding, mapping) is synchronized internally. Memory mapping is reference-counted.
- Configuration: Fill optional members of `VmaAllocatorCreateInfo` structure to provide custom CPU memory allocator, pointers to Vulkan functions and other parameters.
- Customization and integration with custom engines: Predefine appropriate macros to provide your own implementation of all external facilities used by the library like assert, mutex, atomic.
- Support for memory mapping, reference-counted internally. Support for persistently mapped memory: Just allocate with appropriate flag and access the pointer to already mapped memory.
- Support for non-coherent memory. Functions that flush/invalidate memory. `nonCoherentAtomSize` is respected automatically.
- Support for resource aliasing (overlap).
- Support for sparse binding and sparse residency: Convenience functions that allocate or free multiple memory pages at once.
- Custom memory pools: Create a pool with desired parameters (e.g. fixed or limited maximum size) and allocate memory out of it.
- Linear allocator: Create a pool with linear algorithm and use it for much faster allocations and deallocations in free-at-once, stack, double stack, or ring buffer fashion.
- Support for Vulkan 1.0, 1.1, 1.2, 1.3.
- Support for extensions (and equivalent functionality included in new Vulkan versions):
- VK_KHR_dedicated_allocation: Just enable it and it will be used automatically by the library.
- VK_KHR_buffer_device_address: Flag `VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR` is automatically added to memory allocations where needed.
- VK_EXT_memory_budget: Used internally if available to query for current usage and budget. If not available, it falls back to an estimation based on memory heap sizes.
- VK_EXT_memory_priority: Set `priority` of allocations or custom pools and it will be set automatically using this extension.
- VK_AMD_device_coherent_memory
- Defragmentation of GPU and CPU memory: Let the library move data around to free some memory blocks and make your allocations better compacted.
- Statistics: Obtain brief or detailed statistics about the amount of memory used, unused, number of allocated blocks, number of allocations etc. - globally, per memory heap, and per memory type.
- Debug annotations: Associate custom `void* pUserData` and debug `char* pName` with each allocation.
- JSON dump: Obtain a string in JSON format with detailed map of internal state, including list of allocations, their string names, and gaps between them.
- Convert this JSON dump into a picture to visualize your memory. See [tools/GpuMemDumpVis](tools/GpuMemDumpVis/README.md).
- Debugging incorrect memory usage: Enable initialization of all allocated memory with a bit pattern to detect usage of uninitialized or freed memory. Enable validation of a magic number after every allocation to detect out-of-bounds memory corruption.
- Support for interoperability with OpenGL.
- Virtual allocator: Interface for using core allocation algorithm to allocate any custom data, e.g. pieces of one large buffer.
# Prerequisites
- Self-contained C++ library in single header file. No external dependencies other than standard C and C++ library and of course Vulkan. Some features of C++14 used. STL containers, RTTI, or C++ exceptions are not used.
- Public interface in C, in same convention as Vulkan API. Implementation in C++.
- Error handling implemented by returning `VkResult` error codes - same way as in Vulkan.
- Interface documented using Doxygen-style comments.
- Platform-independent, but developed and tested on Windows using Visual Studio. Continuous integration setup for Windows and Linux. Used also on Android, MacOS, and other platforms.
# Example
Basic usage of this library is very simple. Advanced features are optional. After you created global `VmaAllocator` object, a complete code needed to create a buffer may look like this:
```cpp
VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
bufferInfo.size = 65536;
bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
VmaAllocationCreateInfo allocInfo = {};
allocInfo.usage = VMA_MEMORY_USAGE_AUTO;
VkBuffer buffer;
VmaAllocation allocation;
vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr);
```
With this one function call:
1. `VkBuffer` is created.
2. `VkDeviceMemory` block is allocated if needed.
3. An unused region of the memory block is bound to this buffer.
`VmaAllocation` is an object that represents memory assigned to this buffer. It can be queried for parameters like `VkDeviceMemory` handle and offset.
# How to build
On Windows it is recommended to use [CMake UI](https://cmake.org/runningcmake/). Alternatively you can generate a Visual Studio project map using CMake in command line: `cmake -B./build/ -DCMAKE_BUILD_TYPE=Debug -G "Visual Studio 16 2019" -A x64 ./`
On Linux:
```
mkdir build
cd build
cmake ..
make
```
The following targets are available
| Target | Description | CMake option | Default setting |
| ------------- | ------------- | ------------- | ------------- |
| VmaSample | VMA sample application | `VMA_BUILD_SAMPLE` | `OFF` |
| VmaBuildSampleShaders | Shaders for VmaSample | `VMA_BUILD_SAMPLE_SHADERS` | `OFF` |
Please note that while VulkanMemoryAllocator library is supported on other platforms besides Windows, VmaSample is not.
These CMake options are available
| CMake option | Description | Default setting |
| ------------- | ------------- | ------------- |
| `VMA_RECORDING_ENABLED` | Enable VMA memory recording for debugging | `OFF` |
| `VMA_USE_STL_CONTAINERS` | Use C++ STL containers instead of VMA's containers | `OFF` |
| `VMA_STATIC_VULKAN_FUNCTIONS` | Link statically with Vulkan API | `OFF` |
| `VMA_DYNAMIC_VULKAN_FUNCTIONS` | Fetch pointers to Vulkan functions internally (no static linking) | `ON` |
| `VMA_DEBUG_ALWAYS_DEDICATED_MEMORY` | Every allocation will have its own memory block | `OFF` |
| `VMA_DEBUG_INITIALIZE_ALLOCATIONS` | Automatically fill new allocations and destroyed allocations with some bit pattern | `OFF` |
| `VMA_DEBUG_GLOBAL_MUTEX` | Enable single mutex protecting all entry calls to the library | `OFF` |
| `VMA_DEBUG_DONT_EXCEED_MAX_MEMORY_ALLOCATION_COUNT` | Never exceed [VkPhysicalDeviceLimits::maxMemoryAllocationCount](https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#limits-maxMemoryAllocationCount) and return error | `OFF` |
# Binaries
The release comes with precompiled binary executable for "VulkanSample" application which contains test suite. It is compiled using Visual Studio 2019, so it requires appropriate libraries to work, including "MSVCP140.dll", "VCRUNTIME140.dll", "VCRUNTIME140_1.dll". If the launch fails with error message telling about those files missing, please download and install [Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017 and 2019](https://support.microsoft.com/en-us/help/2977003/the-latest-supported-visual-c-downloads), "x64" version.
# Read more
See **[Documentation](https://gpuopen-librariesandsdks.github.io/VulkanMemoryAllocator/html/)**.
# Software using this library
- **[X-Plane](https://x-plane.com/)**
- **[Detroit: Become Human](https://gpuopen.com/learn/porting-detroit-3/)**
- **[Vulkan Samples](https://github.com/LunarG/VulkanSamples)** - official Khronos Vulkan samples. License: Apache-style.
- **[Anvil](https://github.com/GPUOpen-LibrariesAndSDKs/Anvil)** - cross-platform framework for Vulkan. License: MIT.
- **[Filament](https://github.com/google/filament)** - physically based rendering engine for Android, Windows, Linux and macOS, from Google. Apache License 2.0.
- **[Atypical Games - proprietary game engine](https://developer.samsung.com/galaxy-gamedev/gamedev-blog/infinitejet.html)**
- **[Flax Engine](https://flaxengine.com/)**
- **[Godot Engine](https://github.com/godotengine/godot/)** - multi-platform 2D and 3D game engine. License: MIT.
- **[Lightweight Java Game Library (LWJGL)](https://www.lwjgl.org/)** - includes binding of the library for Java. License: BSD.
- **[PowerVR SDK](https://github.com/powervr-graphics/Native_SDK)** - C++ cross-platform 3D graphics SDK, from Imagination. License: MIT.
- **[Skia](https://github.com/google/skia)** - complete 2D graphic library for drawing Text, Geometries, and Images, from Google.
- **[The Forge](https://github.com/ConfettiFX/The-Forge)** - cross-platform rendering framework. Apache License 2.0.
- **[VK9](https://github.com/disks86/VK9)** - Direct3D 9 compatibility layer using Vulkan. Zlib lincese.
- **[vkDOOM3](https://github.com/DustinHLand/vkDOOM3)** - Vulkan port of GPL DOOM 3 BFG Edition. License: GNU GPL.
- **[vkQuake2](https://github.com/kondrak/vkQuake2)** - vanilla Quake 2 with Vulkan support. License: GNU GPL.
- **[Vulkan Best Practice for Mobile Developers](https://github.com/ARM-software/vulkan_best_practice_for_mobile_developers)** from ARM. License: MIT.
- **[RPCS3](https://github.com/RPCS3/rpcs3)** - PlayStation 3 emulator/debugger. License: GNU GPLv2.
- **[PPSSPP](https://github.com/hrydgard/ppsspp)** - Playstation Portable emulator/debugger. License: GNU GPLv2+.
[Many other projects on GitHub](https://github.com/search?q=AMD_VULKAN_MEMORY_ALLOCATOR_H&type=Code) and some game development studios that use Vulkan in their games.
# See also
- **[D3D12 Memory Allocator](https://github.com/GPUOpen-LibrariesAndSDKs/D3D12MemoryAllocator)** - equivalent library for Direct3D 12. License: MIT.
- **[Awesome Vulkan](https://github.com/vinjn/awesome-vulkan)** - a curated list of awesome Vulkan libraries, debuggers and resources.
- **[VulkanMemoryAllocator-Hpp](https://github.com/malte-v/VulkanMemoryAllocator-Hpp)** - C++ binding for this library. License: CC0-1.0.
- **[PyVMA](https://github.com/realitix/pyvma)** - Python wrapper for this library. Author: Jean-Sébastien B. (@realitix). License: Apache 2.0.
- **[vk-mem](https://github.com/gwihlidal/vk-mem-rs)** - Rust binding for this library. Author: Graham Wihlidal. License: Apache 2.0 or MIT.
- **[Haskell bindings](https://hackage.haskell.org/package/VulkanMemoryAllocator)**, **[github](https://github.com/expipiplus1/vulkan/tree/master/VulkanMemoryAllocator)** - Haskell bindings for this library. Author: Ellie Hermaszewska (@expipiplus1). License BSD-3-Clause.
- **[vma_sample_sdl](https://github.com/rextimmy/vma_sample_sdl)** - SDL port of the sample app of this library (with the goal of running it on multiple platforms, including MacOS). Author: @rextimmy. License: MIT.
- **[vulkan-malloc](https://github.com/dylanede/vulkan-malloc)** - Vulkan memory allocation library for Rust. Based on version 1 of this library. Author: Dylan Ede (@dylanede). License: MIT / Apache 2.0.

File diff suppressed because it is too large Load Diff

View File

@@ -1,12 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0-or-later
* Copyright 2022 Blender Foundation. All rights reserved. */
#ifdef __APPLE__
# include <MoltenVK/vk_mvk_moltenvk.h>
#else
# include <vulkan/vulkan.h>
#endif
#define VMA_IMPLEMENTATION
#include "vk_mem_alloc.h"

View File

@@ -253,33 +253,6 @@ if(WITH_CYCLES_OSL)
)
endif()
if(WITH_CYCLES_DEVICE_CUDA OR WITH_CYCLES_DEVICE_OPTIX)
add_definitions(-DWITH_CUDA)
if(WITH_CUDA_DYNLOAD)
include_directories(
../../extern/cuew/include
)
add_definitions(-DWITH_CUDA_DYNLOAD)
else()
include_directories(
SYSTEM
${CUDA_TOOLKIT_INCLUDE}
)
endif()
endif()
if(WITH_CYCLES_DEVICE_HIP)
add_definitions(-DWITH_HIP)
if(WITH_HIP_DYNLOAD)
include_directories(
../../extern/hipew/include
)
add_definitions(-DWITH_HIP_DYNLOAD)
endif()
endif()
if(WITH_CYCLES_DEVICE_OPTIX)
find_package(OptiX 7.3.0)
@@ -288,16 +261,12 @@ if(WITH_CYCLES_DEVICE_OPTIX)
include_directories(
SYSTEM
${OPTIX_INCLUDE_DIR}
)
)
else()
set_and_warn_library_found("OptiX" OPTIX_FOUND WITH_CYCLES_DEVICE_OPTIX)
endif()
endif()
if(WITH_CYCLES_DEVICE_METAL)
add_definitions(-DWITH_METAL)
endif()
if (WITH_CYCLES_DEVICE_ONEAPI)
add_definitions(-DWITH_ONEAPI)
endif()
@@ -423,7 +392,7 @@ if(WITH_CYCLES_HYDRA_RENDER_DELEGATE AND (NOT WITH_BLENDER) AND (NOT WITH_CYCLES
set(CYCLES_INSTALL_PATH ${CYCLES_INSTALL_PATH}/hdCycles/resources)
endif()
if(WITH_CYCLES_CUDA_BINARIES)
if(WITH_CYCLES_CUDA_BINARIES AND (NOT WITH_CYCLES_CUBIN_COMPILER))
if(MSVC)
set(MAX_MSVC 1800)
if(${CUDA_VERSION} EQUAL "8.0")
@@ -435,7 +404,24 @@ if(WITH_CYCLES_CUDA_BINARIES)
elseif(${CUDA_VERSION} VERSION_GREATER_EQUAL 10.0)
set(MAX_MSVC 1999)
endif()
if(NOT MSVC_VERSION LESS ${MAX_MSVC} OR CMAKE_C_COMPILER_ID MATCHES "Clang")
message(STATUS "nvcc not supported for this compiler version, using cycles_cubin_cc instead.")
set(WITH_CYCLES_CUBIN_COMPILER ON)
endif()
unset(MAX_MSVC)
elseif(APPLE)
if(NOT (${XCODE_VERSION} VERSION_LESS 10.0))
message(STATUS "nvcc not supported for this compiler version, using cycles_cubin_cc instead.")
set(WITH_CYCLES_CUBIN_COMPILER ON)
endif()
endif()
endif()
# NVRTC gives wrong rendering result in CUDA 10.0, so we must use NVCC.
if(WITH_CYCLES_CUDA_BINARIES AND WITH_CYCLES_CUBIN_COMPILER AND NOT WITH_CYCLES_CUBIN_COMPILER_OVERRRIDE)
if(NOT (${CUDA_VERSION} VERSION_LESS 10.0))
message(STATUS "cycles_cubin_cc not supported for CUDA 10.0+, using nvcc instead.")
set(WITH_CYCLES_CUBIN_COMPILER OFF)
endif()
endif()

View File

@@ -103,3 +103,32 @@ if(WITH_CYCLES_STANDALONE)
$<TARGET_FILE:cycles>
DESTINATION ${CMAKE_INSTALL_PREFIX})
endif()
#####################################################################
# Cycles cubin compiler executable
#####################################################################
if(WITH_CYCLES_CUBIN_COMPILER)
# 32 bit windows is special, nvrtc is not supported on x86, so even
# though we are building 32 bit blender a 64 bit cubin_cc will have
# to be build to compile the cubins.
if(MSVC AND NOT CMAKE_CL_64)
message("Building with CUDA not supported on 32 bit, skipped")
set(WITH_CYCLES_CUDA_BINARIES OFF CACHE BOOL "" FORCE)
else()
set(SRC
cycles_cubin_cc.cpp
)
set(INC
../../../extern/cuew/include
)
set(LIB
)
cycles_external_libraries_append(LIB)
add_executable(cycles_cubin_cc ${SRC})
include_directories(${INC})
target_link_libraries(cycles_cubin_cc PRIVATE ${LIB})
unset(SRC)
unset(INC)
endif()
endif()

View File

@@ -0,0 +1,311 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2017-2022 Blender Foundation */
#include <stdint.h>
#include <stdio.h>
#include <string>
#include <vector>
#include <OpenImageIO/argparse.h>
#include <OpenImageIO/filesystem.h>
#include "cuew.h"
#ifdef _MSC_VER
# include <Windows.h>
#endif
using std::string;
using std::vector;
namespace std {
template<typename T> std::string to_string(const T &n)
{
std::ostringstream s;
s << n;
return s.str();
}
} // namespace std
class CompilationSettings {
public:
CompilationSettings()
: target_arch(0), bits(64), verbose(false), fast_math(false), ptx_only(false)
{
}
string cuda_toolkit_dir;
string input_file;
string output_file;
string ptx_file;
vector<string> defines;
vector<string> includes;
int target_arch;
int bits;
bool verbose;
bool fast_math;
bool ptx_only;
};
static bool compile_cuda(CompilationSettings &settings)
{
const char *headers[] = {"stdlib.h", "float.h", "math.h", "stdio.h", "stddef.h"};
const char *header_content[] = {"\n", "\n", "\n", "\n", "\n"};
printf("Building %s\n", settings.input_file.c_str());
string code;
if (!OIIO::Filesystem::read_text_file(settings.input_file, code)) {
fprintf(stderr, "Error: unable to read %s\n", settings.input_file.c_str());
return false;
}
vector<string> options;
for (size_t i = 0; i < settings.includes.size(); i++) {
options.push_back("-I" + settings.includes[i]);
}
for (size_t i = 0; i < settings.defines.size(); i++) {
options.push_back("-D" + settings.defines[i]);
}
options.push_back("-D__KERNEL_CUDA_VERSION__=" + std::to_string(cuewNvrtcVersion()));
options.push_back("-arch=compute_" + std::to_string(settings.target_arch));
options.push_back("--device-as-default-execution-space");
options.push_back("-DCYCLES_CUBIN_CC");
options.push_back("--std=c++11");
if (settings.fast_math)
options.push_back("--use_fast_math");
nvrtcProgram prog;
nvrtcResult result = nvrtcCreateProgram(&prog,
code.c_str(), // buffer
NULL, // name
sizeof(headers) / sizeof(void *), // numHeaders
header_content, // headers
headers); // includeNames
if (result != NVRTC_SUCCESS) {
fprintf(stderr, "Error: nvrtcCreateProgram failed (%d)\n\n", (int)result);
return false;
}
/* Transfer options to a classic C array. */
vector<const char *> opts(options.size());
for (size_t i = 0; i < options.size(); i++) {
opts[i] = options[i].c_str();
}
result = nvrtcCompileProgram(prog, options.size(), &opts[0]);
if (result != NVRTC_SUCCESS) {
fprintf(stderr, "Error: nvrtcCompileProgram failed (%d)\n\n", (int)result);
size_t log_size;
nvrtcGetProgramLogSize(prog, &log_size);
vector<char> log(log_size);
nvrtcGetProgramLog(prog, &log[0]);
fprintf(stderr, "%s\n", &log[0]);
return false;
}
/* Retrieve the ptx code. */
size_t ptx_size;
result = nvrtcGetPTXSize(prog, &ptx_size);
if (result != NVRTC_SUCCESS) {
fprintf(stderr, "Error: nvrtcGetPTXSize failed (%d)\n\n", (int)result);
return false;
}
vector<char> ptx_code(ptx_size);
result = nvrtcGetPTX(prog, &ptx_code[0]);
if (result != NVRTC_SUCCESS) {
fprintf(stderr, "Error: nvrtcGetPTX failed (%d)\n\n", (int)result);
return false;
}
if (settings.ptx_only) {
settings.ptx_file = settings.output_file;
}
else {
/* Write a file in the temp folder with the ptx code. */
settings.ptx_file = OIIO::Filesystem::temp_directory_path() + "/" +
OIIO::Filesystem::unique_path();
}
FILE *f = fopen(settings.ptx_file.c_str(), "wb");
fwrite(&ptx_code[0], 1, ptx_size, f);
fclose(f);
return true;
}
static bool link_ptxas(CompilationSettings &settings)
{
string cudapath = "";
if (settings.cuda_toolkit_dir.size())
cudapath = settings.cuda_toolkit_dir + "/bin/";
string ptx = "\"" + cudapath + "ptxas\" " + settings.ptx_file + " -o " + settings.output_file +
" --gpu-name sm_" + std::to_string(settings.target_arch) + " -m" +
std::to_string(settings.bits);
if (settings.verbose) {
ptx += " --verbose";
printf("%s\n", ptx.c_str());
}
int pxresult = system(ptx.c_str());
if (pxresult) {
fprintf(stderr, "Error: ptxas failed (%d)\n\n", pxresult);
return false;
}
if (!OIIO::Filesystem::remove(settings.ptx_file)) {
fprintf(stderr, "Error: removing %s\n\n", settings.ptx_file.c_str());
}
return true;
}
static bool init(CompilationSettings &settings)
{
#ifdef _MSC_VER
if (settings.cuda_toolkit_dir.size()) {
SetDllDirectory((settings.cuda_toolkit_dir + "/bin").c_str());
}
#else
(void)settings;
#endif
int cuewresult = cuewInit(CUEW_INIT_NVRTC);
if (cuewresult != CUEW_SUCCESS) {
fprintf(stderr, "Error: cuew init fialed (0x%d)\n\n", cuewresult);
return false;
}
if (cuewNvrtcVersion() < 80) {
fprintf(stderr, "Error: only cuda 8 and higher is supported, %d\n\n", cuewCompilerVersion());
return false;
}
if (!nvrtcCreateProgram) {
fprintf(stderr, "Error: nvrtcCreateProgram not resolved\n");
return false;
}
if (!nvrtcCompileProgram) {
fprintf(stderr, "Error: nvrtcCompileProgram not resolved\n");
return false;
}
if (!nvrtcGetProgramLogSize) {
fprintf(stderr, "Error: nvrtcGetProgramLogSize not resolved\n");
return false;
}
if (!nvrtcGetProgramLog) {
fprintf(stderr, "Error: nvrtcGetProgramLog not resolved\n");
return false;
}
if (!nvrtcGetPTXSize) {
fprintf(stderr, "Error: nvrtcGetPTXSize not resolved\n");
return false;
}
if (!nvrtcGetPTX) {
fprintf(stderr, "Error: nvrtcGetPTX not resolved\n");
return false;
}
return true;
}
static bool parse_parameters(int argc, const char **argv, CompilationSettings &settings)
{
OIIO::ArgParse ap;
ap.options("Usage: cycles_cubin_cc [options]",
"-target %d",
&settings.target_arch,
"target shader model",
"-m %d",
&settings.bits,
"Cuda architecture bits",
"-i %s",
&settings.input_file,
"Input source filename",
"-o %s",
&settings.output_file,
"Output cubin filename",
"-I %L",
&settings.includes,
"Add additional includepath",
"-D %L",
&settings.defines,
"Add additional defines",
"-ptx",
&settings.ptx_only,
"emit PTX code",
"-v",
&settings.verbose,
"Use verbose logging",
"--use_fast_math",
&settings.fast_math,
"Use fast math",
"-cuda-toolkit-dir %s",
&settings.cuda_toolkit_dir,
"path to the cuda toolkit binary directory",
NULL);
if (ap.parse(argc, argv) < 0) {
fprintf(stderr, "%s\n", ap.geterror().c_str());
ap.usage();
return false;
}
if (!settings.output_file.size()) {
fprintf(stderr, "Error: Output file not set(-o), required\n\n");
return false;
}
if (!settings.input_file.size()) {
fprintf(stderr, "Error: Input file not set(-i, required\n\n");
return false;
}
if (!settings.target_arch) {
fprintf(stderr, "Error: target shader model not set (-target), required\n\n");
return false;
}
return true;
}
int main(int argc, const char **argv)
{
CompilationSettings settings;
if (!parse_parameters(argc, argv, settings)) {
fprintf(stderr, "Error: invalid parameters, exiting\n");
exit(EXIT_FAILURE);
}
if (!init(settings)) {
fprintf(stderr, "Error: initialization error, exiting\n");
exit(EXIT_FAILURE);
}
if (!compile_cuda(settings)) {
fprintf(stderr, "Error: compilation error, exiting\n");
exit(EXIT_FAILURE);
}
if (!settings.ptx_only) {
if (!link_ptxas(settings)) {
exit(EXIT_FAILURE);
}
}
return 0;
}

View File

@@ -58,7 +58,7 @@ class CyclesRender(bpy.types.RenderEngine):
if not self.session:
if self.is_preview:
cscene = bpy.context.scene.cycles
use_osl = cscene.shading_system
use_osl = cscene.shading_system and cscene.device == 'CPU'
engine.create(self, data, preview_osl=use_osl)
else:

View File

@@ -156,11 +156,6 @@ def with_osl():
return _cycles.with_osl
def osl_version():
import _cycles
return _cycles.osl_version
def with_path_guiding():
import _cycles
return _cycles.with_path_guiding
@@ -204,6 +199,7 @@ def list_render_passes(scene, srl):
if crl.use_pass_volume_indirect: yield ("VolumeInd", "RGB", 'COLOR')
if srl.use_pass_emit: yield ("Emit", "RGB", 'COLOR')
if srl.use_pass_environment: yield ("Env", "RGB", 'COLOR')
if srl.use_pass_shadow: yield ("Shadow", "RGB", 'COLOR')
if srl.use_pass_ambient_occlusion: yield ("AO", "RGB", 'COLOR')
if crl.use_pass_shadow_catcher: yield ("Shadow Catcher", "RGB", 'COLOR')
# autopep8: on

View File

@@ -114,7 +114,7 @@ class CYCLES_OT_denoise_animation(Operator):
class CYCLES_OT_merge_images(Operator):
"Combine OpenEXR multi-layer images rendered with different sample " \
"Combine OpenEXR multilayer images rendered with different sample " \
"ranges into one image with reduced noise"
bl_idname = "cycles.merge_images"
bl_label = "Merge Images"

View File

@@ -91,7 +91,7 @@ class AddPresetPerformance(AddPresetBase, Operator):
preset_menu = "CYCLES_PT_performance_presets"
preset_defines = [
"render = bpy.context.scene.render",
"render = bpy.context.scene.render"
"cycles = bpy.context.scene.cycles"
]

View File

@@ -86,29 +86,6 @@ enum_sampling_pattern = (
('PROGRESSIVE_MULTI_JITTER', "Progressive Multi-Jitter", "Use Progressive Multi-Jitter random sampling pattern", 1),
)
enum_emission_sampling = (
('NONE',
'None',
"Do not use this surface as a light for sampling",
0),
('AUTO',
'Auto',
"Automatically determine if the surface should be treated as a light for sampling, based on estimated emission intensity",
1),
('FRONT',
'Front',
"Treat only front side of the surface as a light, usually for closed meshes whose interior is not visible",
2),
('BACK',
'Back',
"Treat only back side of the surface as a light for sampling",
3),
('FRONT_BACK',
'Front and Back',
"Treat surface as a light for sampling, emitting from both the front and back side",
4),
)
enum_volume_sampling = (
('DISTANCE',
"Distance",
@@ -170,6 +147,7 @@ enum_view3d_shading_render_pass = (
('EMISSION', "Emission", "Show the Emission render pass"),
('BACKGROUND', "Background", "Show the Background render pass"),
('AO', "Ambient Occlusion", "Show the Ambient Occlusion render pass"),
('SHADOW', "Shadow", "Show the Shadow render pass"),
('SHADOW_CATCHER', "Shadow Catcher", "Show the Shadow Catcher render pass"),
('', "Light", ""),
@@ -312,7 +290,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
)
shading_system: BoolProperty(
name="Open Shading Language",
description="Use Open Shading Language",
description="Use Open Shading Language (CPU rendering only)",
)
preview_pause: BoolProperty(
@@ -503,12 +481,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
default='MULTIPLE_IMPORTANCE_SAMPLING',
)
use_light_tree: BoolProperty(
name="Light Tree",
description="Sample multiple lights more efficiently based on estimated contribution at every shading point",
default=True,
)
min_light_bounces: IntProperty(
name="Min Light Bounces",
description="Minimum number of light bounces. Setting this higher reduces noise in the first bounces, "
@@ -650,7 +622,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
transparent_max_bounces: IntProperty(
name="Transparent Max Bounces",
description="Maximum number of transparent bounces. This is independent of maximum number of other bounces",
description="Maximum number of transparent bounces. This is independent of maximum number of other bounces ",
min=0, max=1024,
default=8,
)
@@ -1071,13 +1043,13 @@ class CyclesCameraSettings(bpy.types.PropertyGroup):
class CyclesMaterialSettings(bpy.types.PropertyGroup):
emission_sampling: EnumProperty(
name="Emission Sampling",
description="Sampling strategy for emissive surfaces",
items=enum_emission_sampling,
default="AUTO",
sample_as_light: BoolProperty(
name="Multiple Importance Sample",
description="Use multiple importance sampling for this material, "
"disabling may reduce overall noise for large "
"objects that emit little light compared to other light sources",
default=True,
)
use_transparent_shadow: BoolProperty(
name="Transparent Shadows",
description="Use transparent shadows for this material if it contains a Transparent BSDF, "
@@ -1670,7 +1642,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
col.label(text="and Windows driver version 101.3430 or newer", icon='BLANK1')
elif sys.platform.startswith("linux"):
col.label(text="Requires Intel GPU with Xe-HPG architecture and", icon='BLANK1')
col.label(text=" - intel-level-zero-gpu version 1.3.23904 or newer", icon='BLANK1')
col.label(text=" - Linux driver version xx.xx.23904 or newer", icon='BLANK1')
col.label(text=" - oneAPI Level-Zero Loader", icon='BLANK1')
elif device_type == 'METAL':
col.label(text="Requires Apple Silicon with macOS 12.2 or newer", icon='BLANK1')

View File

@@ -154,9 +154,8 @@ def use_mnee(context):
# The MNEE kernel doesn't compile on macOS < 13.
if use_metal(context):
import platform
version, _, _ = platform.mac_ver()
major_version = version.split(".")[0]
if int(major_version) < 13:
v, _, _ = platform.mac_ver()
if float(v) < 13.0:
return False
return True
@@ -314,11 +313,10 @@ class CYCLES_RENDER_PT_sampling_path_guiding(CyclesButtonsPanel, Panel):
layout.use_property_decorate = False
layout.active = cscene.use_guiding
layout.prop(cscene, "guiding_training_samples")
col = layout.column(align=True)
col.prop(cscene, "use_surface_guiding", text="Surface")
col.prop(cscene, "use_volume_guiding", text="Volume")
col.prop(cscene, "use_surface_guiding")
col.prop(cscene, "use_volume_guiding")
col.prop(cscene, "guiding_training_samples")
class CYCLES_RENDER_PT_sampling_path_guiding_debug(CyclesDebugButtonsPanel, Panel):
@@ -383,6 +381,7 @@ class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
col = layout.column(align=True)
col.prop(cscene, "min_light_bounces")
col.prop(cscene, "min_transparent_bounces")
col.prop(cscene, "light_sampling_threshold", text="Light Threshold")
for view_layer in scene.view_layers:
if view_layer.samples > 0:
@@ -391,31 +390,6 @@ class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
break
class CYCLES_RENDER_PT_sampling_lights(CyclesButtonsPanel, Panel):
bl_label = "Lights"
bl_parent_id = "CYCLES_RENDER_PT_sampling"
bl_options = {'DEFAULT_CLOSED'}
def draw_header(self, context):
layout = self.layout
scene = context.scene
cscene = scene.cycles
def draw(self, context):
layout = self.layout
layout.use_property_split = True
layout.use_property_decorate = False
scene = context.scene
cscene = scene.cycles
col = layout.column(align=True)
col.prop(cscene, "use_light_tree")
sub = col.row()
sub.prop(cscene, "light_sampling_threshold", text="Light Threshold")
sub.active = not cscene.use_light_tree
class CYCLES_RENDER_PT_subdivision(CyclesButtonsPanel, Panel):
bl_label = "Subdivision"
bl_options = {'DEFAULT_CLOSED'}
@@ -978,6 +952,7 @@ class CYCLES_RENDER_PT_passes_light(CyclesButtonsPanel, Panel):
col = layout.column(heading="Other", align=True)
col.prop(view_layer, "use_pass_emit", text="Emission")
col.prop(view_layer, "use_pass_environment")
col.prop(view_layer, "use_pass_shadow")
col.prop(view_layer, "use_pass_ambient_occlusion", text="Ambient Occlusion")
col.prop(cycles_view_layer, "use_pass_shadow_catcher")
@@ -1855,9 +1830,9 @@ class CYCLES_MATERIAL_PT_settings_surface(CyclesButtonsPanel, Panel):
cmat = mat.cycles
col = layout.column()
col.prop(cmat, "displacement_method", text="Displacement")
col.prop(cmat, "emission_sampling")
col.prop(cmat, "sample_as_light", text="Multiple Importance")
col.prop(cmat, "use_transparent_shadow")
col.prop(cmat, "displacement_method", text="Displacement")
def draw(self, context):
self.draw_shared(self, context.material)
@@ -2330,10 +2305,7 @@ def draw_device(self, context):
col.prop(cscene, "device")
from . import engine
if engine.with_osl() and (
use_cpu(context) or
(use_optix(context) and (engine.osl_version()[1] >= 13 or engine.osl_version()[0] > 1))
):
if engine.with_osl() and use_cpu(context):
col.prop(cscene, "shading_system")
@@ -2389,7 +2361,6 @@ classes = (
CYCLES_RENDER_PT_sampling_render_denoise,
CYCLES_RENDER_PT_sampling_path_guiding,
CYCLES_RENDER_PT_sampling_path_guiding_debug,
CYCLES_RENDER_PT_sampling_lights,
CYCLES_RENDER_PT_sampling_advanced,
CYCLES_RENDER_PT_light_paths,
CYCLES_RENDER_PT_light_paths_max_bounces,

View File

@@ -99,7 +99,7 @@ def do_versions(self):
library_versions.setdefault(library.version, []).append(library)
# Do versioning per library, since they might have different versions.
max_need_versioning = (3, 5, 2)
max_need_versioning = (3, 0, 25)
for version, libraries in library_versions.items():
if version > max_need_versioning:
continue
@@ -297,8 +297,3 @@ def do_versions(self):
cmat = mat.cycles
if not cmat.is_property_set("displacement_method"):
cmat.displacement_method = 'DISPLACEMENT'
if version <= (3, 5, 3):
cmat = mat.cycles
if not cmat.get("sample_as_light", True):
cmat.emission_sampling = 'NONE'

File diff suppressed because it is too large Load Diff

View File

@@ -15,10 +15,6 @@
#include "util/unique_ptr.h"
#include "util/vector.h"
typedef struct GPUContext GPUContext;
typedef struct GPUFence GPUFence;
typedef struct GPUShader GPUShader;
CCL_NAMESPACE_BEGIN
/* Base class of shader used for display driver rendering. */
@@ -33,7 +29,7 @@ class BlenderDisplayShader {
BlenderDisplayShader() = default;
virtual ~BlenderDisplayShader() = default;
virtual GPUShader *bind(int width, int height) = 0;
virtual void bind(int width, int height) = 0;
virtual void unbind() = 0;
/* Get attribute location for position and texture coordinate respectively.
@@ -44,7 +40,7 @@ class BlenderDisplayShader {
protected:
/* Get program of this display shader.
* NOTE: The shader needs to be bound to have access to this. */
virtual GPUShader *get_shader_program() = 0;
virtual uint get_shader_program() = 0;
/* Cached values of various OpenGL resources. */
int position_attribute_location_ = -1;
@@ -55,16 +51,16 @@ class BlenderDisplayShader {
* display space shader. */
class BlenderFallbackDisplayShader : public BlenderDisplayShader {
public:
virtual GPUShader *bind(int width, int height) override;
virtual void bind(int width, int height) override;
virtual void unbind() override;
protected:
virtual GPUShader *get_shader_program() override;
virtual uint get_shader_program() override;
void create_shader_if_needed();
void destroy_shader();
GPUShader *shader_program_ = 0;
uint shader_program_ = 0;
int image_texture_location_ = -1;
int fullscreen_location_ = -1;
@@ -77,17 +73,17 @@ class BlenderDisplaySpaceShader : public BlenderDisplayShader {
public:
BlenderDisplaySpaceShader(BL::RenderEngine &b_engine, BL::Scene &b_scene);
virtual GPUShader *bind(int width, int height) override;
virtual void bind(int width, int height) override;
virtual void unbind() override;
protected:
virtual GPUShader *get_shader_program() override;
virtual uint get_shader_program() override;
BL::RenderEngine b_engine_;
BL::Scene &b_scene_;
/* Cached values of various OpenGL resources. */
GPUShader *shader_program_ = nullptr;
uint shader_program_ = 0;
};
/* Display driver implementation which is specific for Blender viewport integration. */
@@ -126,9 +122,6 @@ class BlenderDisplayDriver : public DisplayDriver {
void gpu_context_lock();
void gpu_context_unlock();
/* Create GPU resources used by the display driver. */
bool gpu_resources_create();
/* Destroy all GPU resources which are being used by this object. */
void gpu_resources_destroy();
@@ -144,8 +137,8 @@ class BlenderDisplayDriver : public DisplayDriver {
struct Tiles;
unique_ptr<Tiles> tiles_;
GPUFence *gpu_render_sync_ = nullptr;
GPUFence *gpu_upload_sync_ = nullptr;
void *gl_render_sync_ = nullptr;
void *gl_upload_sync_ = nullptr;
float2 zoom_ = make_float2(1.0f, 1.0f);
};

View File

@@ -72,11 +72,6 @@ bool BlenderImageLoader::load_metadata(const ImageDeviceFeatures &, ImageMetaDat
metadata.colorspace = u_colorspace_raw;
}
else {
/* In some cases (e.g. T94135), the colorspace setting in Blender gets updated as part of the
* metadata queries in this function, so update the colorspace setting here. */
PointerRNA colorspace_ptr = b_image.colorspace_settings().ptr;
metadata.colorspace = get_enum_identifier(colorspace_ptr, "name");
if (metadata.channels == 1) {
metadata.type = IMAGE_DATA_TYPE_BYTE;
}

View File

@@ -1085,11 +1085,11 @@ static void create_subd_mesh(Scene *scene,
const int edges_num = b_mesh.edges.length();
if (edges_num != 0 && b_mesh.edge_creases.length() > 0) {
BL::MeshEdgeCreaseLayer creases = b_mesh.edge_creases[0];
size_t num_creases = 0;
const float *creases = static_cast<float *>(b_mesh.edge_creases[0].ptr.data);
for (int i = 0; i < edges_num; i++) {
if (creases.data[i].value() != 0.0f) {
if (creases[i] != 0.0f) {
num_creases++;
}
}
@@ -1098,18 +1098,17 @@ static void create_subd_mesh(Scene *scene,
const MEdge *edges = static_cast<MEdge *>(b_mesh.edges[0].ptr.data);
for (int i = 0; i < edges_num; i++) {
const float crease = creases.data[i].value();
if (crease != 0.0f) {
if (creases[i] != 0.0f) {
const MEdge &b_edge = edges[i];
mesh->add_edge_crease(b_edge.v1, b_edge.v2, crease);
mesh->add_edge_crease(b_edge.v1, b_edge.v2, creases[i]);
}
}
}
for (BL::MeshVertexCreaseLayer &c : b_mesh.vertex_creases) {
for (int i = 0; i < c.data.length(); ++i) {
if (c.data[i].value() != 0.0f) {
mesh->add_vertex_crease(i, c.data[i].value());
for (BL::MeshVertexCreaseLayer &c : b_mesh.vertex_creases) {
for (int i = 0; i < c.data.length(); ++i) {
if (c.data[i].value() != 0.0f) {
mesh->add_vertex_crease(i, c.data[i].value());
}
}
}
}

View File

@@ -18,6 +18,7 @@
#include "util/guiding.h"
#include "util/log.h"
#include "util/md5.h"
#include "util/opengl.h"
#include "util/openimagedenoise.h"
#include "util/path.h"
#include "util/string.h"
@@ -25,8 +26,6 @@
#include "util/tbb.h"
#include "util/types.h"
#include "GPU_state.h"
#ifdef WITH_OSL
# include "scene/osl.h"
@@ -338,7 +337,7 @@ static PyObject *view_draw_func(PyObject * /*self*/, PyObject *args)
if (PyLong_AsVoidPtr(pyrv3d)) {
/* 3d view drawing */
int viewport[4];
GPU_viewport_size_get_i(viewport);
glGetIntegerv(GL_VIEWPORT, viewport);
session->view_draw(viewport[2], viewport[3]);
}
@@ -479,7 +478,6 @@ static PyObject *osl_update_node_func(PyObject * /*self*/, PyObject *args)
/* Read metadata. */
bool is_bool_param = false;
bool hide_value = !param->validdefault;
ustring param_label = param->name;
for (const OSL::OSLQuery::Parameter &metadata : param->metadata) {
@@ -489,9 +487,6 @@ static PyObject *osl_update_node_func(PyObject * /*self*/, PyObject *args)
if (metadata.sdefault[0] == "boolean" || metadata.sdefault[0] == "checkBox") {
is_bool_param = true;
}
else if (metadata.sdefault[0] == "null") {
hide_value = true;
}
}
else if (metadata.name == "label") {
/* Socket label. */
@@ -601,9 +596,6 @@ static PyObject *osl_update_node_func(PyObject * /*self*/, PyObject *args)
if (b_sock.name() != param_label) {
b_sock.name(param_label.string());
}
if (b_sock.hide_value() != hide_value) {
b_sock.hide_value(hide_value);
}
used_sockets.insert(b_sock.ptr.data);
found_existing = true;
}
@@ -643,8 +635,6 @@ static PyObject *osl_update_node_func(PyObject * /*self*/, PyObject *args)
set_boolean(b_sock.ptr, "default_value", default_boolean);
}
b_sock.hide_value(hide_value);
used_sockets.insert(b_sock.ptr.data);
}
}

View File

@@ -559,6 +559,11 @@ static bool bake_setup_pass(Scene *scene, const string &bake_type_str, const int
0);
integrator->set_use_emission((bake_filter & BL::BakeSettings::pass_filter_EMIT) != 0);
}
/* Shadow pass. */
else if (strcmp(bake_type, "SHADOW") == 0) {
type = PASS_SHADOW;
use_direct_light = true;
}
/* Light component passes. */
else if (strcmp(bake_type, "DIFFUSE") == 0) {
if ((bake_filter & BL::BakeSettings::pass_filter_DIRECT) &&

View File

@@ -61,12 +61,6 @@ static DisplacementMethod get_displacement_method(PointerRNA &ptr)
ptr, "displacement_method", DISPLACE_NUM_METHODS, DISPLACE_BUMP);
}
static EmissionSampling get_emission_sampling(PointerRNA &ptr)
{
return (EmissionSampling)get_enum(
ptr, "emission_sampling", EMISSION_SAMPLING_NUM, EMISSION_SAMPLING_AUTO);
}
static int validate_enum_value(int value, int num_values, int default_value)
{
if (value >= num_values) {
@@ -1565,7 +1559,7 @@ void BlenderSync::sync_materials(BL::Depsgraph &b_depsgraph, bool update_all)
/* settings */
PointerRNA cmat = RNA_pointer_get(&b_mat.ptr, "cycles");
shader->set_emission_sampling_method(get_emission_sampling(cmat));
shader->set_use_mis(get_boolean(cmat, "sample_as_light"));
shader->set_use_transparent_shadow(get_boolean(cmat, "use_transparent_shadow"));
shader->set_heterogeneous_volume(!get_boolean(cmat, "homogeneous_volume"));
shader->set_volume_sampling_method(get_volume_sampling(cmat));

View File

@@ -26,6 +26,7 @@
#include "util/foreach.h"
#include "util/hash.h"
#include "util/log.h"
#include "util/opengl.h"
#include "util/openimagedenoise.h"
CCL_NAMESPACE_BEGIN
@@ -347,14 +348,7 @@ void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer, bool background)
integrator->set_motion_blur(view_layer.use_motion_blur);
}
bool use_light_tree = get_boolean(cscene, "use_light_tree");
integrator->set_use_light_tree(use_light_tree);
integrator->set_light_sampling_threshold(
(use_light_tree) ? 0.0f : get_float(cscene, "light_sampling_threshold"));
if (integrator->use_light_tree_is_modified()) {
scene->light_manager->tag_update(scene, LightManager::UPDATE_ALL);
}
integrator->set_light_sampling_threshold(get_float(cscene, "light_sampling_threshold"));
SamplingPattern sampling_pattern = (SamplingPattern)get_enum(
cscene, "sampling_pattern", SAMPLING_NUM_PATTERNS, SAMPLING_PATTERN_PMJ);
@@ -623,6 +617,7 @@ static bool get_known_pass_type(BL::RenderPass &b_pass, PassType &type, PassMode
MAP_PASS("Emit", PASS_EMISSION, false);
MAP_PASS("Env", PASS_BACKGROUND, false);
MAP_PASS("AO", PASS_AO, false);
MAP_PASS("Shadow", PASS_SHADOW, false);
MAP_PASS("BakePrimitive", PASS_BAKE_PRIMITIVE, false);
MAP_PASS("BakeDifferential", PASS_BAKE_DIFFERENTIAL, false);

View File

@@ -1,6 +1,584 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright 2011-2022 Blender Foundation
###########################################################################
# Helper macros
###########################################################################
macro(_set_default variable value)
if(NOT ${variable})
set(${variable} ${value})
endif()
endmacro()
###########################################################################
# Precompiled libraries detection
#
# Use precompiled libraries from Blender repository
###########################################################################
if(CYCLES_STANDALONE_REPOSITORY)
if(APPLE)
if("${CMAKE_OSX_ARCHITECTURES}" STREQUAL "x86_64")
set(_cycles_lib_dir "${CMAKE_SOURCE_DIR}/../lib/darwin")
else()
set(_cycles_lib_dir "${CMAKE_SOURCE_DIR}/../lib/darwin_arm64")
endif()
# Always use system zlib
find_package(ZLIB REQUIRED)
elseif(WIN32)
if(CMAKE_CL_64)
set(_cycles_lib_dir "${CMAKE_SOURCE_DIR}/../lib/win64_vc15")
else()
message(FATAL_ERROR "Unsupported Visual Studio Version")
endif()
else()
# Path to a locally compiled libraries.
set(LIBDIR_NAME ${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR})
string(TOLOWER ${LIBDIR_NAME} LIBDIR_NAME)
set(LIBDIR_NATIVE_ABI ${CMAKE_SOURCE_DIR}/../lib/${LIBDIR_NAME})
# Path to precompiled libraries with known CentOS 7 ABI.
set(LIBDIR_CENTOS7_ABI ${CMAKE_SOURCE_DIR}/../lib/linux_centos7_x86_64)
# Choose the best suitable libraries.
if(EXISTS ${LIBDIR_NATIVE_ABI})
set(_cycles_lib_dir ${LIBDIR_NATIVE_ABI})
elseif(EXISTS ${LIBDIR_CENTOS7_ABI})
set(_cycles_lib_dir ${LIBDIR_CENTOS7_ABI})
set(WITH_CXX11_ABI OFF)
if(CMAKE_COMPILER_IS_GNUCC AND
CMAKE_C_COMPILER_VERSION VERSION_LESS 9.3)
message(FATAL_ERROR "GCC version must be at least 9.3 for precompiled libraries, found ${CMAKE_C_COMPILER_VERSION}")
endif()
endif()
if(DEFINED _cycles_lib_dir)
message(STATUS "Using precompiled libraries at ${_cycles_lib_dir}")
endif()
# Avoid namespace pollustion.
unset(LIBDIR_NATIVE_ABI)
unset(LIBDIR_CENTOS7_ABI)
endif()
if(EXISTS ${_cycles_lib_dir})
_set_default(ALEMBIC_ROOT_DIR "${_cycles_lib_dir}/alembic")
_set_default(BOOST_ROOT "${_cycles_lib_dir}/boost")
_set_default(BLOSC_ROOT_DIR "${_cycles_lib_dir}/blosc")
_set_default(EMBREE_ROOT_DIR "${_cycles_lib_dir}/embree")
_set_default(EPOXY_ROOT_DIR "${_cycles_lib_dir}/epoxy")
_set_default(IMATH_ROOT_DIR "${_cycles_lib_dir}/imath")
_set_default(GLEW_ROOT_DIR "${_cycles_lib_dir}/glew")
_set_default(JPEG_ROOT "${_cycles_lib_dir}/jpeg")
_set_default(LLVM_ROOT_DIR "${_cycles_lib_dir}/llvm")
_set_default(CLANG_ROOT_DIR "${_cycles_lib_dir}/llvm")
_set_default(NANOVDB_ROOT_DIR "${_cycles_lib_dir}/openvdb")
_set_default(OPENCOLORIO_ROOT_DIR "${_cycles_lib_dir}/opencolorio")
_set_default(OPENEXR_ROOT_DIR "${_cycles_lib_dir}/openexr")
_set_default(OPENIMAGEDENOISE_ROOT_DIR "${_cycles_lib_dir}/openimagedenoise")
_set_default(OPENIMAGEIO_ROOT_DIR "${_cycles_lib_dir}/openimageio")
_set_default(OPENJPEG_ROOT_DIR "${_cycles_lib_dir}/openjpeg")
_set_default(OPENSUBDIV_ROOT_DIR "${_cycles_lib_dir}/opensubdiv")
_set_default(OPENVDB_ROOT_DIR "${_cycles_lib_dir}/openvdb")
_set_default(OSL_ROOT_DIR "${_cycles_lib_dir}/osl")
_set_default(PNG_ROOT "${_cycles_lib_dir}/png")
_set_default(PUGIXML_ROOT_DIR "${_cycles_lib_dir}/pugixml")
_set_default(SDL2_ROOT_DIR "${_cycles_lib_dir}/sdl")
_set_default(TBB_ROOT_DIR "${_cycles_lib_dir}/tbb")
_set_default(TIFF_ROOT "${_cycles_lib_dir}/tiff")
_set_default(USD_ROOT_DIR "${_cycles_lib_dir}/usd")
_set_default(WEBP_ROOT_DIR "${_cycles_lib_dir}/webp")
_set_default(ZLIB_ROOT "${_cycles_lib_dir}/zlib")
if(WIN32)
set(LEVEL_ZERO_ROOT_DIR ${_cycles_lib_dir}/level_zero)
else()
set(LEVEL_ZERO_ROOT_DIR ${_cycles_lib_dir}/level-zero)
endif()
_set_default(SYCL_ROOT_DIR "${_cycles_lib_dir}/dpcpp")
# Ignore system libraries
set(CMAKE_IGNORE_PATH "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES};${CMAKE_SYSTEM_INCLUDE_PATH};${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES};${CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES}")
else()
unset(_cycles_lib_dir)
endif()
endif()
###########################################################################
# Zlib
###########################################################################
if(CYCLES_STANDALONE_REPOSITORY)
if(MSVC AND EXISTS ${_cycles_lib_dir})
set(ZLIB_INCLUDE_DIRS ${_cycles_lib_dir}/zlib/include)
set(ZLIB_LIBRARIES ${_cycles_lib_dir}/zlib/lib/libz_st.lib)
set(ZLIB_INCLUDE_DIR ${_cycles_lib_dir}/zlib/include)
set(ZLIB_LIBRARY ${_cycles_lib_dir}/zlib/lib/libz_st.lib)
set(ZLIB_DIR ${_cycles_lib_dir}/zlib)
set(ZLIB_FOUND ON)
elseif(NOT APPLE)
find_package(ZLIB REQUIRED)
endif()
endif()
###########################################################################
# PThreads
###########################################################################
if(CYCLES_STANDALONE_REPOSITORY)
if(MSVC AND EXISTS ${_cycles_lib_dir})
set(PTHREADS_LIBRARIES "${_cycles_lib_dir}/pthreads/lib/pthreadVC3.lib")
include_directories("${_cycles_lib_dir}/pthreads/include")
else()
set(CMAKE_THREAD_PREFER_PTHREAD TRUE)
find_package(Threads REQUIRED)
set(PTHREADS_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
endif()
endif()
###########################################################################
# OpenImageIO and image libraries
###########################################################################
if(CYCLES_STANDALONE_REPOSITORY)
if(MSVC AND EXISTS ${_cycles_lib_dir})
add_definitions(
# OIIO changed the name of this define in newer versions
# we define both, so it would work with both old and new
# versions.
-DOIIO_STATIC_BUILD
-DOIIO_STATIC_DEFINE
)
set(OPENIMAGEIO_INCLUDE_DIR ${OPENIMAGEIO_ROOT_DIR}/include)
set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO_INCLUDE_DIR} ${OPENIMAGEIO_INCLUDE_DIR}/OpenImageIO)
# Special exceptions for libraries which needs explicit debug version
set(OPENIMAGEIO_LIBRARIES
optimized ${OPENIMAGEIO_ROOT_DIR}/lib/OpenImageIO.lib
optimized ${OPENIMAGEIO_ROOT_DIR}/lib/OpenImageIO_Util.lib
debug ${OPENIMAGEIO_ROOT_DIR}/lib/OpenImageIO_d.lib
debug ${OPENIMAGEIO_ROOT_DIR}/lib/OpenImageIO_Util_d.lib
)
set(PUGIXML_INCLUDE_DIR ${PUGIXML_ROOT_DIR}/include)
set(PUGIXML_LIBRARIES
optimized ${PUGIXML_ROOT_DIR}/lib/pugixml.lib
debug ${PUGIXML_ROOT_DIR}/lib/pugixml_d.lib
)
else()
find_package(OpenImageIO REQUIRED)
if(OPENIMAGEIO_PUGIXML_FOUND)
set(PUGIXML_INCLUDE_DIR "${OPENIMAGEIO_INCLUDE_DIR}/OpenImageIO")
set(PUGIXML_LIBRARIES "")
else()
find_package(PugiXML REQUIRED)
endif()
endif()
# Dependencies
if(MSVC AND EXISTS ${_cycles_lib_dir})
set(OPENJPEG_INCLUDE_DIR ${OPENJPEG}/include/openjpeg-2.3)
set(OPENJPEG_LIBRARIES ${_cycles_lib_dir}/openjpeg/lib/openjp2${CMAKE_STATIC_LIBRARY_SUFFIX})
else()
find_package(OpenJPEG REQUIRED)
endif()
find_package(JPEG REQUIRED)
find_package(TIFF REQUIRED)
find_package(WebP)
if(EXISTS ${_cycles_lib_dir})
set(PNG_NAMES png16 libpng16 png libpng)
endif()
find_package(PNG REQUIRED)
endif()
###########################################################################
# OpenEXR
###########################################################################
if(CYCLES_STANDALONE_REPOSITORY)
if(MSVC AND EXISTS ${_cycles_lib_dir})
set(OPENEXR_INCLUDE_DIR ${OPENEXR_ROOT_DIR}/include)
set(OPENEXR_INCLUDE_DIRS ${OPENEXR_INCLUDE_DIR} ${OPENEXR_ROOT_DIR}/include/OpenEXR ${IMATH_ROOT_DIR}/include ${IMATH_ROOT_DIR}/include/Imath)
set(OPENEXR_LIBRARIES
optimized ${OPENEXR_ROOT_DIR}/lib/OpenEXR_s.lib
optimized ${OPENEXR_ROOT_DIR}/lib/OpenEXRCore_s.lib
optimized ${OPENEXR_ROOT_DIR}/lib/Iex_s.lib
optimized ${IMATH_ROOT_DIR}/lib/Imath_s.lib
optimized ${OPENEXR_ROOT_DIR}/lib/IlmThread_s.lib
debug ${OPENEXR_ROOT_DIR}/lib/OpenEXR_s_d.lib
debug ${OPENEXR_ROOT_DIR}/lib/OpenEXRCore_s_d.lib
debug ${OPENEXR_ROOT_DIR}/lib/Iex_s_d.lib
debug ${IMATH_ROOT_DIR}/lib/Imath_s_d.lib
debug ${OPENEXR_ROOT_DIR}/lib/IlmThread_s_d.lib
)
else()
find_package(OpenEXR REQUIRED)
endif()
endif()
###########################################################################
# OpenShadingLanguage & LLVM
###########################################################################
if(CYCLES_STANDALONE_REPOSITORY AND WITH_CYCLES_OSL)
if(EXISTS ${_cycles_lib_dir})
set(LLVM_STATIC ON)
endif()
if(MSVC AND EXISTS ${_cycles_lib_dir})
# TODO(sergey): On Windows llvm-config doesn't give proper results for the
# library names, use hardcoded libraries for now.
file(GLOB _llvm_libs_release ${LLVM_ROOT_DIR}/lib/*.lib)
file(GLOB _llvm_libs_debug ${LLVM_ROOT_DIR}/debug/lib/*.lib)
set(_llvm_libs)
foreach(_llvm_lib_path ${_llvm_libs_release})
get_filename_component(_llvm_lib_name ${_llvm_lib_path} ABSOLUTE)
list(APPEND _llvm_libs optimized ${_llvm_lib_name})
endforeach()
foreach(_llvm_lib_path ${_llvm_libs_debug})
get_filename_component(_llvm_lib_name ${_llvm_lib_path} ABSOLUTE)
list(APPEND _llvm_libs debug ${_llvm_lib_name})
endforeach()
set(LLVM_LIBRARY ${_llvm_libs})
unset(_llvm_lib_name)
unset(_llvm_lib_path)
unset(_llvm_libs)
unset(_llvm_libs_debug)
unset(_llvm_libs_release)
set(OSL_INCLUDE_DIR ${OSL_ROOT_DIR}/include)
set(OSL_LIBRARIES
optimized ${OSL_ROOT_DIR}/lib/oslcomp.lib
optimized ${OSL_ROOT_DIR}/lib/oslexec.lib
optimized ${OSL_ROOT_DIR}/lib/oslquery.lib
debug ${OSL_ROOT_DIR}/lib/oslcomp_d.lib
debug ${OSL_ROOT_DIR}/lib/oslexec_d.lib
debug ${OSL_ROOT_DIR}/lib/oslquery_d.lib
${PUGIXML_LIBRARIES}
)
find_program(OSL_COMPILER NAMES oslc PATHS ${OSL_ROOT_DIR}/bin)
else()
find_package(OSL REQUIRED)
find_package(LLVM REQUIRED)
find_package(Clang REQUIRED)
endif()
endif()
###########################################################################
# OpenPGL
###########################################################################
if(CYCLES_STANDALONE_REPOSITORY AND WITH_CYCLES_PATH_GUIDING)
if(NOT openpgl_DIR AND EXISTS ${_cycles_lib_dir})
set(openpgl_DIR ${_cycles_lib_dir}/openpgl/lib/cmake/openpgl)
endif()
find_package(openpgl QUIET)
if(openpgl_FOUND)
if(WIN32)
get_target_property(OPENPGL_LIBRARIES_RELEASE openpgl::openpgl LOCATION_RELEASE)
get_target_property(OPENPGL_LIBRARIES_DEBUG openpgl::openpgl LOCATION_DEBUG)
set(OPENPGL_LIBRARIES optimized ${OPENPGL_LIBRARIES_RELEASE} debug ${OPENPGL_LIBRARIES_DEBUG})
else()
get_target_property(OPENPGL_LIBRARIES openpgl::openpgl LOCATION)
endif()
get_target_property(OPENPGL_INCLUDE_DIR openpgl::openpgl INTERFACE_INCLUDE_DIRECTORIES)
else()
set_and_warn_library_found("OpenPGL" openpgl_FOUND WITH_CYCLES_PATH_GUIDING)
endif()
endif()
###########################################################################
# OpenColorIO
###########################################################################
if(CYCLES_STANDALONE_REPOSITORY AND WITH_CYCLES_OPENCOLORIO)
set(WITH_OPENCOLORIO ON)
if(NOT USD_OVERRIDE_OPENCOLORIO)
if(MSVC AND EXISTS ${_cycles_lib_dir})
set(OPENCOLORIO_INCLUDE_DIRS ${OPENCOLORIO_ROOT_DIR}/include)
set(OPENCOLORIO_LIBRARIES
optimized ${OPENCOLORIO_ROOT_DIR}/lib/OpenColorIO.lib
optimized ${OPENCOLORIO_ROOT_DIR}/lib/libyaml-cpp.lib
optimized ${OPENCOLORIO_ROOT_DIR}/lib/libexpatMD.lib
optimized ${OPENCOLORIO_ROOT_DIR}/lib/pystring.lib
debug ${OPENCOLORIO_ROOT_DIR}/lib/OpencolorIO_d.lib
debug ${OPENCOLORIO_ROOT_DIR}/lib/libyaml-cpp_d.lib
debug ${OPENCOLORIO_ROOT_DIR}/lib/libexpatdMD.lib
debug ${OPENCOLORIO_ROOT_DIR}/lib/pystring_d.lib
)
else()
find_package(OpenColorIO REQUIRED)
endif()
endif()
endif()
###########################################################################
# Boost
###########################################################################
if(CYCLES_STANDALONE_REPOSITORY)
if(EXISTS ${_cycles_lib_dir})
if(MSVC)
set(Boost_USE_STATIC_RUNTIME OFF)
set(Boost_USE_MULTITHREADED ON)
set(Boost_USE_STATIC_LIBS ON)
else()
set(BOOST_LIBRARYDIR ${_cycles_lib_dir}/boost/lib)
set(Boost_NO_BOOST_CMAKE ON)
set(Boost_NO_SYSTEM_PATHS ON)
endif()
endif()
if(MSVC AND EXISTS ${_cycles_lib_dir})
set(BOOST_INCLUDE_DIR ${BOOST_ROOT}/include)
set(BOOST_VERSION_HEADER ${BOOST_INCLUDE_DIR}/boost/version.hpp)
if(EXISTS ${BOOST_VERSION_HEADER})
file(STRINGS "${BOOST_VERSION_HEADER}" BOOST_LIB_VERSION REGEX "#define BOOST_LIB_VERSION ")
if(BOOST_LIB_VERSION MATCHES "#define BOOST_LIB_VERSION \"([0-9_]+)\"")
set(BOOST_VERSION "${CMAKE_MATCH_1}")
endif()
endif()
if(NOT BOOST_VERSION)
message(FATAL_ERROR "Unable to determine Boost version")
endif()
set(BOOST_POSTFIX "vc142-mt-x64-${BOOST_VERSION}.lib")
set(BOOST_DEBUG_POSTFIX "vc142-mt-gd-x64-${BOOST_VERSION}.lib")
set(BOOST_LIBRARIES
optimized ${BOOST_ROOT}/lib/libboost_date_time-${BOOST_POSTFIX}
optimized ${BOOST_ROOT}/lib/libboost_iostreams-${BOOST_POSTFIX}
optimized ${BOOST_ROOT}/lib/libboost_filesystem-${BOOST_POSTFIX}
optimized ${BOOST_ROOT}/lib/libboost_regex-${BOOST_POSTFIX}
optimized ${BOOST_ROOT}/lib/libboost_system-${BOOST_POSTFIX}
optimized ${BOOST_ROOT}/lib/libboost_thread-${BOOST_POSTFIX}
optimized ${BOOST_ROOT}/lib/libboost_chrono-${BOOST_POSTFIX}
debug ${BOOST_ROOT}/lib/libboost_date_time-${BOOST_DEBUG_POSTFIX}
debug ${BOOST_ROOT}/lib/libboost_iostreams-${BOOST_DEBUG_POSTFIX}
debug ${BOOST_ROOT}/lib/libboost_filesystem-${BOOST_DEBUG_POSTFIX}
debug ${BOOST_ROOT}/lib/libboost_regex-${BOOST_DEBUG_POSTFIX}
debug ${BOOST_ROOT}/lib/libboost_system-${BOOST_DEBUG_POSTFIX}
debug ${BOOST_ROOT}/lib/libboost_thread-${BOOST_DEBUG_POSTFIX}
debug ${BOOST_ROOT}/lib/libboost_chrono-${BOOST_DEBUG_POSTFIX}
)
if(WITH_CYCLES_OSL)
set(BOOST_LIBRARIES ${BOOST_LIBRARIES}
optimized ${BOOST_ROOT}/lib/libboost_wave-${BOOST_POSTFIX}
debug ${BOOST_ROOT}/lib/libboost_wave-${BOOST_DEBUG_POSTFIX})
endif()
else()
set(__boost_packages iostreams filesystem regex system thread date_time)
if(WITH_CYCLES_OSL)
list(APPEND __boost_packages wave)
endif()
find_package(Boost 1.48 COMPONENTS ${__boost_packages} REQUIRED)
if(NOT Boost_FOUND)
# Try to find non-multithreaded if -mt not found, this flag
# doesn't matter for us, it has nothing to do with thread
# safety, but keep it to not disturb build setups.
set(Boost_USE_MULTITHREADED OFF)
find_package(Boost 1.48 COMPONENTS ${__boost_packages})
endif()
unset(__boost_packages)
set(BOOST_INCLUDE_DIR ${Boost_INCLUDE_DIRS})
set(BOOST_LIBRARIES ${Boost_LIBRARIES})
set(BOOST_LIBPATH ${Boost_LIBRARY_DIRS})
endif()
set(BOOST_DEFINITIONS "-DBOOST_ALL_NO_LIB ${BOOST_DEFINITIONS}")
endif()
###########################################################################
# Embree
###########################################################################
if(CYCLES_STANDALONE_REPOSITORY AND WITH_CYCLES_EMBREE)
if(MSVC AND EXISTS ${_cycles_lib_dir})
set(EMBREE_INCLUDE_DIRS ${EMBREE_ROOT_DIR}/include)
set(EMBREE_LIBRARIES
optimized ${EMBREE_ROOT_DIR}/lib/embree3.lib
optimized ${EMBREE_ROOT_DIR}/lib/embree_avx2.lib
optimized ${EMBREE_ROOT_DIR}/lib/embree_avx.lib
optimized ${EMBREE_ROOT_DIR}/lib/embree_sse42.lib
optimized ${EMBREE_ROOT_DIR}/lib/lexers.lib
optimized ${EMBREE_ROOT_DIR}/lib/math.lib
optimized ${EMBREE_ROOT_DIR}/lib/simd.lib
optimized ${EMBREE_ROOT_DIR}/lib/tasking.lib
optimized ${EMBREE_ROOT_DIR}/lib/sys.lib
debug ${EMBREE_ROOT_DIR}/lib/embree3_d.lib
debug ${EMBREE_ROOT_DIR}/lib/embree_avx2_d.lib
debug ${EMBREE_ROOT_DIR}/lib/embree_avx_d.lib
debug ${EMBREE_ROOT_DIR}/lib/embree_sse42_d.lib
debug ${EMBREE_ROOT_DIR}/lib/lexers_d.lib
debug ${EMBREE_ROOT_DIR}/lib/math_d.lib
debug ${EMBREE_ROOT_DIR}/lib/simd_d.lib
debug ${EMBREE_ROOT_DIR}/lib/sys_d.lib
debug ${EMBREE_ROOT_DIR}/lib/tasking_d.lib
)
else()
find_package(Embree 3.8.0 REQUIRED)
endif()
endif()
###########################################################################
# Logging
###########################################################################
if(CYCLES_STANDALONE_REPOSITORY AND WITH_CYCLES_LOGGING)
find_package(Glog REQUIRED)
find_package(Gflags REQUIRED)
endif()
###########################################################################
# OpenSubdiv
###########################################################################
if(CYCLES_STANDALONE_REPOSITORY AND WITH_CYCLES_OPENSUBDIV)
set(WITH_OPENSUBDIV ON)
if(NOT USD_OVERRIDE_OPENSUBDIV)
if(MSVC AND EXISTS ${_cycles_lib_dir})
set(OPENSUBDIV_INCLUDE_DIRS ${OPENSUBDIV_ROOT_DIR}/include)
set(OPENSUBDIV_LIBRARIES
optimized ${OPENSUBDIV_ROOT_DIR}/lib/osdCPU.lib
optimized ${OPENSUBDIV_ROOT_DIR}/lib/osdGPU.lib
debug ${OPENSUBDIV_ROOT_DIR}/lib/osdCPU_d.lib
debug ${OPENSUBDIV_ROOT_DIR}/lib/osdGPU_d.lib
)
else()
find_package(OpenSubdiv REQUIRED)
endif()
endif()
endif()
###########################################################################
# OpenVDB
###########################################################################
if(CYCLES_STANDALONE_REPOSITORY AND WITH_CYCLES_OPENVDB)
set(WITH_OPENVDB ON)
set(OPENVDB_DEFINITIONS -DNOMINMAX -D_USE_MATH_DEFINES)
if(NOT USD_OVERRIDE_OPENVDB)
find_package(OpenVDB REQUIRED)
if(MSVC AND EXISTS ${_cycles_lib_dir})
set(BLOSC_LIBRARY
optimized ${BLOSC_ROOT_DIR}/lib/libblosc.lib
debug ${BLOSC_ROOT_DIR}/lib/libblosc_d.lib
)
else()
find_package(Blosc REQUIRED)
endif()
endif()
endif()
###########################################################################
# NanoVDB
###########################################################################
if(CYCLES_STANDALONE_REPOSITORY AND WITH_CYCLES_NANOVDB)
set(WITH_NANOVDB ON)
if(MSVC AND EXISTS ${_cycles_lib_dir})
set(NANOVDB_INCLUDE_DIR ${NANOVDB_ROOT_DIR}/include)
set(NANOVDB_INCLUDE_DIRS ${NANOVDB_INCLUDE_DIR})
else()
find_package(NanoVDB REQUIRED)
endif()
endif()
###########################################################################
# OpenImageDenoise
###########################################################################
if(CYCLES_STANDALONE_REPOSITORY AND WITH_CYCLES_OPENIMAGEDENOISE)
set(WITH_OPENIMAGEDENOISE ON)
if(MSVC AND EXISTS ${_cycles_lib_dir})
set(OPENIMAGEDENOISE_INCLUDE_DIRS ${OPENIMAGEDENOISE_ROOT_DIR}/include)
set(OPENIMAGEDENOISE_LIBRARIES
optimized ${OPENIMAGEDENOISE_ROOT_DIR}/lib/OpenImageDenoise.lib
optimized ${OPENIMAGEDENOISE_ROOT_DIR}/lib/common.lib
optimized ${OPENIMAGEDENOISE_ROOT_DIR}/lib/dnnl.lib
debug ${OPENIMAGEDENOISE_ROOT_DIR}/lib/OpenImageDenoise_d.lib
debug ${OPENIMAGEDENOISE_ROOT_DIR}/lib/common_d.lib
debug ${OPENIMAGEDENOISE_ROOT_DIR}/lib/dnnl_d.lib
)
else()
find_package(OpenImageDenoise REQUIRED)
endif()
endif()
###########################################################################
# TBB
###########################################################################
if(CYCLES_STANDALONE_REPOSITORY)
if(NOT USD_OVERRIDE_TBB)
if(MSVC AND EXISTS ${_cycles_lib_dir})
set(TBB_INCLUDE_DIRS ${TBB_ROOT_DIR}/include)
set(TBB_LIBRARIES
optimized ${TBB_ROOT_DIR}/lib/tbb.lib
debug ${TBB_ROOT_DIR}/lib/tbb_debug.lib
)
else()
find_package(TBB REQUIRED)
endif()
endif()
endif()
###########################################################################
# Epoxy
###########################################################################
if(CYCLES_STANDALONE_REPOSITORY)
if((WITH_CYCLES_STANDALONE AND WITH_CYCLES_STANDALONE_GUI) OR
WITH_CYCLES_HYDRA_RENDER_DELEGATE)
if(MSVC AND EXISTS ${_cycles_lib_dir})
set(Epoxy_LIBRARIES "${_cycles_lib_dir}/epoxy/lib/epoxy.lib")
set(Epoxy_INCLUDE_DIRS "${_cycles_lib_dir}/epoxy/include")
else()
find_package(Epoxy REQUIRED)
endif()
endif()
endif()
###########################################################################
# Alembic
###########################################################################
if(WITH_CYCLES_ALEMBIC)
if(CYCLES_STANDALONE_REPOSITORY)
if(MSVC AND EXISTS ${_cycles_lib_dir})
set(ALEMBIC_INCLUDE_DIRS ${_cycles_lib_dir}/alembic/include)
set(ALEMBIC_LIBRARIES
optimized ${_cycles_lib_dir}/alembic/lib/Alembic.lib
debug ${_cycles_lib_dir}/alembic/lib/Alembic_d.lib)
else()
find_package(Alembic REQUIRED)
endif()
set(WITH_ALEMBIC ON)
endif()
endif()
###########################################################################
# System Libraries
###########################################################################
# Detect system libraries again
if(EXISTS ${_cycles_lib_dir})
unset(CMAKE_IGNORE_PATH)
unset(_cycles_lib_dir)
endif()
###########################################################################
# SDL
###########################################################################
@@ -109,3 +687,5 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
set(WITH_CYCLES_DEVICE_ONEAPI OFF)
endif()
endif()
unset(_cycles_lib_dir)

View File

@@ -8,13 +8,28 @@ set(INC
set(INC_SYS )
if(WITH_CYCLES_DEVICE_OPTIX OR WITH_CYCLES_DEVICE_CUDA)
if(NOT WITH_CUDA_DYNLOAD)
if(WITH_CUDA_DYNLOAD)
list(APPEND INC
../../../extern/cuew/include
)
add_definitions(-DWITH_CUDA_DYNLOAD)
else()
list(APPEND INC_SYS
${CUDA_TOOLKIT_INCLUDE}
)
add_definitions(-DCYCLES_CUDA_NVCC_EXECUTABLE="${CUDA_NVCC_EXECUTABLE}")
endif()
add_definitions(-DCYCLES_RUNTIME_OPTIX_ROOT_DIR="${CYCLES_RUNTIME_OPTIX_ROOT_DIR}")
endif()
if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
list(APPEND INC
../../../extern/hipew/include
)
add_definitions(-DWITH_HIP_DYNLOAD)
endif()
set(SRC_BASE
device.cpp
denoise.cpp
@@ -153,15 +168,24 @@ if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
)
endif()
if(WITH_CYCLES_DEVICE_CUDA)
add_definitions(-DWITH_CUDA)
endif()
if(WITH_CYCLES_DEVICE_HIP)
add_definitions(-DWITH_HIP)
endif()
if(WITH_CYCLES_DEVICE_OPTIX)
add_definitions(-DWITH_OPTIX)
endif()
if(WITH_CYCLES_DEVICE_METAL)
list(APPEND LIB
${METAL_LIBRARY}
)
add_definitions(-DWITH_METAL)
list(APPEND SRC
${SRC_METAL}
)
endif()
if (WITH_CYCLES_DEVICE_ONEAPI)
if(WITH_CYCLES_ONEAPI_BINARIES)
set(cycles_kernel_oneapi_lib_suffix "_aot")
@@ -179,6 +203,7 @@ if (WITH_CYCLES_DEVICE_ONEAPI)
else()
list(APPEND LIB ${SYCL_LIBRARY})
endif()
add_definitions(-DWITH_ONEAPI)
list(APPEND SRC
${SRC_ONEAPI}
)

View File

@@ -232,7 +232,7 @@ string CUDADevice::compile_kernel_get_common_cflags(const uint kernel_features)
return cflags;
}
string CUDADevice::compile_kernel(const string &common_cflags,
string CUDADevice::compile_kernel(const uint kernel_features,
const char *name,
const char *base,
bool force_ptx)
@@ -281,6 +281,7 @@ string CUDADevice::compile_kernel(const string &common_cflags,
/* We include cflags into md5 so changing cuda toolkit or changing other
* compiler command line arguments makes sure cubin gets re-built.
*/
string common_cflags = compile_kernel_get_common_cflags(kernel_features);
const string kernel_md5 = util_md5_string(source_md5 + common_cflags);
const char *const kernel_ext = force_ptx ? "ptx" : "cubin";
@@ -416,8 +417,7 @@ bool CUDADevice::load_kernels(const uint kernel_features)
/* get kernel */
const char *kernel_name = "kernel";
string cflags = compile_kernel_get_common_cflags(kernel_features);
string cubin = compile_kernel(cflags, kernel_name);
string cubin = compile_kernel(kernel_features, kernel_name);
if (cubin.empty())
return false;

View File

@@ -77,9 +77,9 @@ class CUDADevice : public Device {
bool use_adaptive_compilation();
string compile_kernel_get_common_cflags(const uint kernel_features);
virtual string compile_kernel_get_common_cflags(const uint kernel_features);
string compile_kernel(const string &cflags,
string compile_kernel(const uint kernel_features,
const char *name,
const char *base = "cuda",
bool force_ptx = false);

View File

@@ -38,7 +38,7 @@ class CUDADeviceGraphicsInterop : public DeviceGraphicsInterop {
CUDADevice *device_ = nullptr;
/* OpenGL PBO which is currently registered as the destination for the CUDA buffer. */
int64_t opengl_pbo_id_ = 0;
uint opengl_pbo_id_ = 0;
/* Buffer area in pixels of the corresponding PBO. */
int64_t buffer_area_ = 0;

View File

@@ -78,4 +78,24 @@ class DenoiseParams : public Node {
}
};
/* All the parameters needed to perform buffer denoising on a device.
* Is not really a task in its canonical terms (as in, is not an asynchronous running task). Is
* more like a wrapper for all the arguments and parameters needed to perform denoising. Is a
* single place where they are all listed, so that it's not required to modify all device methods
* when these parameters do change. */
class DeviceDenoiseTask {
public:
DenoiseParams params;
int num_samples;
RenderBuffers *render_buffers;
BufferParams buffer_params;
/* Allow to do in-place modification of the input passes (scaling them down i.e.). This will
* lower the memory footprint of the denoiser but will make input passes "invalid" (from path
* tracer) point of view. */
bool allow_inplace_modification;
};
CCL_NAMESPACE_END

View File

@@ -351,7 +351,6 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
info.num = 0;
info.has_nanovdb = true;
info.has_light_tree = true;
info.has_osl = true;
info.has_guiding = true;
info.has_profiling = true;
@@ -400,7 +399,6 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
/* Accumulate device info. */
info.has_nanovdb &= device.has_nanovdb;
info.has_light_tree &= device.has_light_tree;
info.has_osl &= device.has_osl;
info.has_guiding &= device.has_guiding;
info.has_profiling &= device.has_profiling;

View File

@@ -65,7 +65,6 @@ class DeviceInfo {
int num;
bool display_device; /* GPU is used as a display device. */
bool has_nanovdb; /* Support NanoVDB volumes. */
bool has_light_tree; /* Support light tree. */
bool has_osl; /* Support Open Shading Language. */
bool has_guiding; /* Support path guiding. */
bool has_profiling; /* Supports runtime collection of profiling info. */
@@ -85,7 +84,6 @@ class DeviceInfo {
cpu_threads = 0;
display_device = false;
has_nanovdb = false;
has_light_tree = true;
has_osl = false;
has_guiding = false;
has_profiling = false;
@@ -162,11 +160,6 @@ class Device {
return true;
}
virtual bool load_osl_kernels()
{
return true;
}
/* GPU device only functions.
* These may not be used on CPU or multi-devices. */
@@ -235,6 +228,21 @@ class Device {
return nullptr;
}
/* Buffer denoising. */
/* Returns true if task is fully handled. */
virtual bool denoise_buffer(const DeviceDenoiseTask & /*task*/)
{
LOG(ERROR) << "Request buffer denoising from a device which does not support it.";
return false;
}
virtual DeviceQueue *get_denoise_queue()
{
LOG(ERROR) << "Request denoising queue from a device which does not support it.";
return nullptr;
}
/* Sub-devices */
/* Run given callback for every individual device which will be handling rendering.

View File

@@ -137,7 +137,6 @@ void device_hip_info(vector<DeviceInfo> &devices)
info.num = num;
info.has_nanovdb = true;
info.has_light_tree = false;
info.denoisers = 0;
info.has_gpu_queue = true;

View File

@@ -74,7 +74,7 @@ class HIPDevice : public Device {
bool use_adaptive_compilation();
string compile_kernel_get_common_cflags(const uint kernel_features);
virtual string compile_kernel_get_common_cflags(const uint kernel_features);
string compile_kernel(const uint kernel_features, const char *name, const char *base = "hip");

View File

@@ -36,7 +36,7 @@ class HIPDeviceGraphicsInterop : public DeviceGraphicsInterop {
HIPDevice *device_ = nullptr;
/* OpenGL PBO which is currently registered as the destination for the HIP buffer. */
int64_t opengl_pbo_id_ = 0;
uint opengl_pbo_id_ = 0;
/* Buffer area in pixels of the corresponding PBO. */
int64_t buffer_area_ = 0;

View File

@@ -7,30 +7,6 @@
CCL_NAMESPACE_BEGIN
bool device_kernel_has_shading(DeviceKernel kernel)
{
return (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND ||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT ||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE ||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE ||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME ||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW ||
kernel == DEVICE_KERNEL_SHADER_EVAL_DISPLACE ||
kernel == DEVICE_KERNEL_SHADER_EVAL_BACKGROUND ||
kernel == DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY);
}
bool device_kernel_has_intersection(DeviceKernel kernel)
{
return (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK ||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE);
}
const char *device_kernel_as_string(DeviceKernel kernel)
{
switch (kernel) {

View File

@@ -11,9 +11,6 @@
CCL_NAMESPACE_BEGIN
bool device_kernel_has_shading(DeviceKernel kernel);
bool device_kernel_has_intersection(DeviceKernel kernel);
const char *device_kernel_as_string(DeviceKernel kernel);
std::ostream &operator<<(std::ostream &os, DeviceKernel kernel);

View File

@@ -496,7 +496,7 @@ bool BVHMetal::build_BLAS_pointcloud(Progress &progress,
num_motion_steps = pointcloud->get_motion_steps();
}
const size_t num_aabbs = num_motion_steps * num_points;
const size_t num_aabbs = num_motion_steps;
MTLResourceOptions storage_mode;
if (device.hasUnifiedMemory) {
@@ -757,10 +757,6 @@ bool BVHMetal::build_TLAS(Progress &progress,
}
}
if (num_instances == 0) {
return false;
}
/*------------------------------------------------*/
BVH_status("Building TLAS | %7d instances", (int)num_instances);
/*------------------------------------------------*/

View File

@@ -117,8 +117,6 @@ class MetalDevice : public Device {
/* ------------------------------------------------------------------ */
/* low-level memory management */
bool max_working_set_exceeded(size_t safety_margin = 8 * 1024 * 1024) const;
MetalMem *generic_alloc(device_memory &mem);
void generic_copy_to(device_memory &mem);

View File

@@ -307,9 +307,6 @@ void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_feat
MD5Hash md5;
md5.append(baked_constants);
md5.append(source);
if (use_metalrt) {
md5.append(std::to_string(kernel_features & METALRT_FEATURE_MASK));
}
source_md5[pso_type] = md5.get_hex();
}
@@ -446,14 +443,6 @@ void MetalDevice::erase_allocation(device_memory &mem)
}
}
bool MetalDevice::max_working_set_exceeded(size_t safety_margin) const
{
/* We're allowed to allocate beyond the safe working set size, but then if all resources are made
* resident we will get command buffer failures at render time. */
size_t available = [mtlDevice recommendedMaxWorkingSetSize] - safety_margin;
return (stats.mem_used > available);
}
MetalDevice::MetalMem *MetalDevice::generic_alloc(device_memory &mem)
{
size_t size = mem.memory_size();
@@ -531,11 +520,6 @@ MetalDevice::MetalMem *MetalDevice::generic_alloc(device_memory &mem)
mmem->use_UMA = false;
}
if (max_working_set_exceeded()) {
set_error("System is out of GPU memory");
return nullptr;
}
return mmem;
}
@@ -934,8 +918,9 @@ void MetalDevice::tex_alloc(device_texture &mem)
<< string_human_readable_size(mem.memory_size()) << ")";
mtlTexture = [mtlDevice newTextureWithDescriptor:desc];
assert(mtlTexture);
if (!mtlTexture) {
set_error("System is out of GPU memory");
return;
}
@@ -967,10 +952,7 @@ void MetalDevice::tex_alloc(device_texture &mem)
<< string_human_readable_size(mem.memory_size()) << ")";
mtlTexture = [mtlDevice newTextureWithDescriptor:desc];
if (!mtlTexture) {
set_error("System is out of GPU memory");
return;
}
assert(mtlTexture);
[mtlTexture replaceRegion:MTLRegionMake2D(0, 0, mem.data_width, mem.data_height)
mipmapLevel:0
@@ -1032,10 +1014,6 @@ void MetalDevice::tex_alloc(device_texture &mem)
need_texture_info = true;
texture_info[slot].data = uint64_t(slot) | (sampler_index << 32);
if (max_working_set_exceeded()) {
set_error("System is out of GPU memory");
}
}
void MetalDevice::tex_free(device_texture &mem)
@@ -1096,10 +1074,6 @@ void MetalDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
}
}
}
if (max_working_set_exceeded()) {
set_error("System is out of GPU memory");
}
}
CCL_NAMESPACE_END

View File

@@ -54,10 +54,6 @@ enum MetalPipelineType {
PSO_NUM
};
# define METALRT_FEATURE_MASK \
(KERNEL_FEATURE_HAIR | KERNEL_FEATURE_HAIR_THICK | KERNEL_FEATURE_POINTCLOUD | \
KERNEL_FEATURE_OBJECT_MOTION)
const char *kernel_type_as_string(MetalPipelineType pso_type);
struct MetalKernelPipeline {
@@ -71,7 +67,9 @@ struct MetalKernelPipeline {
KernelData kernel_data_;
bool use_metalrt;
uint32_t metalrt_features = 0;
bool metalrt_hair;
bool metalrt_hair_thick;
bool metalrt_pointcloud;
int threads_per_threadgroup;

View File

@@ -274,9 +274,12 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
/* metalrt options */
request.pipeline->use_metalrt = device->use_metalrt;
request.pipeline->metalrt_features = device->use_metalrt ?
(device->kernel_features & METALRT_FEATURE_MASK) :
0;
request.pipeline->metalrt_hair = device->use_metalrt &&
(device->kernel_features & KERNEL_FEATURE_HAIR);
request.pipeline->metalrt_hair_thick = device->use_metalrt &&
(device->kernel_features & KERNEL_FEATURE_HAIR_THICK);
request.pipeline->metalrt_pointcloud = device->use_metalrt &&
(device->kernel_features & KERNEL_FEATURE_POINTCLOUD);
{
thread_scoped_lock lock(cache_mutex);
@@ -313,13 +316,9 @@ MetalKernelPipeline *ShaderCache::get_best_pipeline(DeviceKernel kernel, const M
/* metalrt options */
bool use_metalrt = device->use_metalrt;
bool device_metalrt_hair = use_metalrt && device->kernel_features & KERNEL_FEATURE_HAIR;
bool device_metalrt_hair_thick = use_metalrt &&
device->kernel_features & KERNEL_FEATURE_HAIR_THICK;
bool device_metalrt_pointcloud = use_metalrt &&
device->kernel_features & KERNEL_FEATURE_POINTCLOUD;
bool device_metalrt_motion = use_metalrt &&
device->kernel_features & KERNEL_FEATURE_OBJECT_MOTION;
bool metalrt_hair = use_metalrt && (device->kernel_features & KERNEL_FEATURE_HAIR);
bool metalrt_hair_thick = use_metalrt && (device->kernel_features & KERNEL_FEATURE_HAIR_THICK);
bool metalrt_pointcloud = use_metalrt && (device->kernel_features & KERNEL_FEATURE_POINTCLOUD);
MetalKernelPipeline *best_pipeline = nullptr;
for (auto &pipeline : collection) {
@@ -328,16 +327,9 @@ MetalKernelPipeline *ShaderCache::get_best_pipeline(DeviceKernel kernel, const M
continue;
}
bool pipeline_metalrt_hair = pipeline->metalrt_features & KERNEL_FEATURE_HAIR;
bool pipeline_metalrt_hair_thick = pipeline->metalrt_features & KERNEL_FEATURE_HAIR_THICK;
bool pipeline_metalrt_pointcloud = pipeline->metalrt_features & KERNEL_FEATURE_POINTCLOUD;
bool pipeline_metalrt_motion = use_metalrt &&
pipeline->metalrt_features & KERNEL_FEATURE_OBJECT_MOTION;
if (pipeline->use_metalrt != use_metalrt || pipeline_metalrt_hair != device_metalrt_hair ||
pipeline_metalrt_hair_thick != device_metalrt_hair_thick ||
pipeline_metalrt_pointcloud != device_metalrt_pointcloud ||
pipeline_metalrt_motion != device_metalrt_motion) {
if (pipeline->use_metalrt != use_metalrt || pipeline->metalrt_hair != metalrt_hair ||
pipeline->metalrt_hair_thick != metalrt_hair_thick ||
pipeline->metalrt_pointcloud != metalrt_pointcloud) {
/* wrong combination of metalrt options */
continue;
}
@@ -408,8 +400,6 @@ static MTLFunctionConstantValues *GetConstantValues(KernelData const *data = nul
if (!data) {
data = &zero_data;
}
int zero_int = 0;
[constant_values setConstantValue:&zero_int type:MTLDataType_int atIndex:Kernel_DummyConstant];
# define KERNEL_STRUCT_MEMBER(parent, _type, name) \
[constant_values setConstantValue:&data->parent.name \
@@ -433,7 +423,10 @@ void MetalKernelPipeline::compile()
MTLFunctionDescriptor *func_desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
func_desc.name = entryPoint;
if (pso_type != PSO_GENERIC) {
if (pso_type == PSO_SPECIALIZED_SHADE) {
func_desc.constantValues = GetConstantValues(&kernel_data_);
}
else if (pso_type == PSO_SPECIALIZED_INTERSECT) {
func_desc.constantValues = GetConstantValues(&kernel_data_);
}
else {
@@ -478,13 +471,6 @@ void MetalKernelPipeline::compile()
const char *function_name = function_names[i];
desc.name = [@(function_name) copy];
if (pso_type != PSO_GENERIC) {
desc.constantValues = GetConstantValues(&kernel_data_);
}
else {
desc.constantValues = GetConstantValues();
}
NSError *error = NULL;
rt_intersection_function[i] = [mtlLibrary newFunctionWithDescriptor:desc error:&error];
@@ -505,10 +491,6 @@ void MetalKernelPipeline::compile()
NSArray *table_functions[METALRT_TABLE_NUM] = {nil};
NSArray *linked_functions = nil;
bool metalrt_hair = use_metalrt && (metalrt_features & KERNEL_FEATURE_HAIR);
bool metalrt_hair_thick = use_metalrt && (metalrt_features & KERNEL_FEATURE_HAIR_THICK);
bool metalrt_pointcloud = use_metalrt && (metalrt_features & KERNEL_FEATURE_POINTCLOUD);
if (use_metalrt) {
id<MTLFunction> curve_intersect_default = nil;
id<MTLFunction> curve_intersect_shadow = nil;
@@ -636,9 +618,7 @@ void MetalKernelPipeline::compile()
metalbin_path = path_cache_get(path_join("kernels", metalbin_name));
path_create_directories(metalbin_path);
/* Retrieve shader binary from disk, and update the file timestamp for LRU purging to work as
* intended. */
if (use_binary_archive && path_cache_kernel_exists_and_mark_used(metalbin_path)) {
if (path_exists(metalbin_path) && use_binary_archive) {
if (@available(macOS 11.0, *)) {
MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
archiveDesc.url = [NSURL fileURLWithPath:@(metalbin_path.c_str())];
@@ -715,9 +695,6 @@ void MetalKernelPipeline::compile()
metal_printf("Failed to save binary archive, error:\n%s\n",
[[error localizedDescription] UTF8String]);
}
else {
path_cache_kernel_mark_added_and_clear_old(metalbin_path);
}
}
}
};
@@ -753,8 +730,7 @@ void MetalKernelPipeline::compile()
newIntersectionFunctionTableWithDescriptor:ift_desc];
/* Finally write the function handles into this pipeline's table */
int size = (int)[table_functions[table] count];
for (int i = 0; i < size; i++) {
for (int i = 0; i < 2; i++) {
id<MTLFunctionHandle> handle = [pipeline
functionHandleWithFunction:table_functions[table][i]];
[intersection_func_table[table] setFunction:handle atIndex:i];

View File

@@ -138,15 +138,6 @@ class MultiDevice : public Device {
return true;
}
bool load_osl_kernels() override
{
foreach (SubDevice &sub, devices)
if (!sub.device->load_osl_kernels())
return false;
return true;
}
void build_bvh(BVH *bvh, Progress &progress, bool refit) override
{
/* Try to build and share a single acceleration structure, if possible */
@@ -213,12 +204,10 @@ class MultiDevice : public Device {
virtual void *get_cpu_osl_memory() override
{
/* Always return the OSL memory of the CPU device (this works since the constructor above
* guarantees that CPU devices are always added to the back). */
if (devices.size() > 1 && devices.back().device->info.type != DEVICE_CPU) {
if (devices.size() > 1) {
return NULL;
}
return devices.back().device->get_cpu_osl_memory();
return devices.front().device->get_cpu_osl_memory();
}
bool is_resident(device_ptr key, Device *sub_device) override

View File

@@ -31,8 +31,6 @@ bool device_oneapi_init()
* improves stability as of intel/LLVM SYCL-nightly/20220529.
* All these env variable can be set beforehand by end-users and
* will in that case -not- be overwritten. */
/* By default, enable only Level-Zero and if all devices are allowed, also CUDA and HIP.
* OpenCL backend isn't currently well supported. */
# ifdef _WIN32
if (getenv("SYCL_CACHE_PERSISTENT") == nullptr) {
_putenv_s("SYCL_CACHE_PERSISTENT", "1");
@@ -41,12 +39,7 @@ bool device_oneapi_init()
_putenv_s("SYCL_CACHE_THRESHOLD", "0");
}
if (getenv("SYCL_DEVICE_FILTER") == nullptr) {
if (getenv("CYCLES_ONEAPI_ALL_DEVICES") == nullptr) {
_putenv_s("SYCL_DEVICE_FILTER", "level_zero");
}
else {
_putenv_s("SYCL_DEVICE_FILTER", "level_zero,cuda,hip");
}
_putenv_s("SYCL_DEVICE_FILTER", "level_zero");
}
if (getenv("SYCL_ENABLE_PCI") == nullptr) {
_putenv_s("SYCL_ENABLE_PCI", "1");
@@ -57,12 +50,7 @@ bool device_oneapi_init()
# elif __linux__
setenv("SYCL_CACHE_PERSISTENT", "1", false);
setenv("SYCL_CACHE_THRESHOLD", "0", false);
if (getenv("CYCLES_ONEAPI_ALL_DEVICES") == nullptr) {
setenv("SYCL_DEVICE_FILTER", "level_zero", false);
}
else {
setenv("SYCL_DEVICE_FILTER", "level_zero,cuda,hip", false);
}
setenv("SYCL_DEVICE_FILTER", "level_zero", false);
setenv("SYCL_ENABLE_PCI", "1", false);
setenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE", "0", false);
# endif

View File

@@ -430,9 +430,9 @@ void OneapiDevice::check_usm(SyclQueue *queue_, const void *usm_ptr, bool allow_
sycl::usm::alloc usm_type = get_pointer_type(usm_ptr, queue->get_context());
(void)usm_type;
assert(usm_type == sycl::usm::alloc::device ||
(usm_type == sycl::usm::alloc::host &&
(allow_host || device_type == sycl::info::device_type::cpu)) ||
usm_type == sycl::usm::alloc::unknown);
((device_type == sycl::info::device_type::cpu || allow_host) &&
usm_type == sycl::usm::alloc::host ||
usm_type == sycl::usm::alloc::unknown));
# else
/* Silence warning about unused arguments. */
(void)queue_;

View File

@@ -9,10 +9,6 @@
#include "util/log.h"
#ifdef WITH_OSL
# include <OSL/oslversion.h>
#endif
#ifdef WITH_OPTIX
# include <optix_function_table_definition.h>
#endif
@@ -69,9 +65,6 @@ void device_optix_info(const vector<DeviceInfo> &cuda_devices, vector<DeviceInfo
info.type = DEVICE_OPTIX;
info.id += "_OptiX";
# if defined(WITH_OSL) && (OSL_VERSION_MINOR >= 13 || OSL_VERSION_MAJOR > 1)
info.has_osl = true;
# endif
info.denoisers |= DENOISER_OPTIX;
devices.push_back(info);

File diff suppressed because it is too large Load Diff

View File

@@ -1,14 +1,16 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2019, NVIDIA Corporation
* Copyright 2019-2022 Blender Foundation */
* Copyright 2019, NVIDIA Corporation.
* Copyright 2019-2022 Blender Foundation. */
#pragma once
#ifdef WITH_OPTIX
# include "device/cuda/device_impl.h"
# include "device/optix/queue.h"
# include "device/optix/util.h"
# include "kernel/osl/globals.h"
# include "kernel/types.h"
# include "util/unique_ptr.h"
CCL_NAMESPACE_BEGIN
@@ -21,16 +23,8 @@ enum {
PG_RGEN_INTERSECT_SHADOW,
PG_RGEN_INTERSECT_SUBSURFACE,
PG_RGEN_INTERSECT_VOLUME_STACK,
PG_RGEN_SHADE_BACKGROUND,
PG_RGEN_SHADE_LIGHT,
PG_RGEN_SHADE_SURFACE,
PG_RGEN_SHADE_SURFACE_RAYTRACE,
PG_RGEN_SHADE_SURFACE_MNEE,
PG_RGEN_SHADE_VOLUME,
PG_RGEN_SHADE_SHADOW,
PG_RGEN_EVAL_DISPLACE,
PG_RGEN_EVAL_BACKGROUND,
PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY,
PG_MISS,
PG_HITD, /* Default hit group. */
PG_HITS, /* __SHADOW_RECORD_ALL__ hit group. */
@@ -46,14 +40,14 @@ enum {
};
static const int MISS_PROGRAM_GROUP_OFFSET = PG_MISS;
static const int NUM_MISS_PROGRAM_GROUPS = 1;
static const int NUM_MIS_PROGRAM_GROUPS = 1;
static const int HIT_PROGAM_GROUP_OFFSET = PG_HITD;
static const int NUM_HIT_PROGRAM_GROUPS = 8;
static const int CALLABLE_PROGRAM_GROUPS_BASE = PG_CALL_SVM_AO;
static const int NUM_CALLABLE_PROGRAM_GROUPS = 2;
/* List of OptiX pipelines. */
enum { PIP_SHADE, PIP_INTERSECT, NUM_PIPELINES };
enum { PIP_SHADE_RAYTRACE, PIP_SHADE_MNEE, PIP_INTERSECT, NUM_PIPELINES };
/* A single shader binding table entry. */
struct SbtRecord {
@@ -67,35 +61,52 @@ class OptiXDevice : public CUDADevice {
OptixModule optix_module = NULL; /* All necessary OptiX kernels are in one module. */
OptixModule builtin_modules[2] = {};
OptixPipeline pipelines[NUM_PIPELINES] = {};
OptixProgramGroup groups[NUM_PROGRAM_GROUPS] = {};
OptixPipelineCompileOptions pipeline_options = {};
bool motion_blur = false;
device_vector<SbtRecord> sbt_data;
device_only_memory<KernelParamsOptiX> launch_params;
# ifdef WITH_OSL
OSLGlobals osl_globals;
vector<OptixModule> osl_modules;
vector<OptixProgramGroup> osl_groups;
# endif
private:
OptixTraversableHandle tlas_handle = 0;
vector<unique_ptr<device_only_memory<char>>> delayed_free_bvh_memory;
thread_mutex delayed_free_bvh_mutex;
class Denoiser {
public:
explicit Denoiser(OptiXDevice *device);
OptiXDevice *device;
OptiXDeviceQueue queue;
OptixDenoiser optix_denoiser = nullptr;
/* Configuration size, as provided to `optixDenoiserSetup`.
* If the `optixDenoiserSetup()` was never used on the current `optix_denoiser` the
* `is_configured` will be false. */
bool is_configured = false;
int2 configured_size = make_int2(0, 0);
/* OptiX denoiser state and scratch buffers, stored in a single memory buffer.
* The memory layout goes as following: [denoiser state][scratch buffer]. */
device_only_memory<unsigned char> state;
OptixDenoiserSizes sizes = {};
bool use_pass_albedo = false;
bool use_pass_normal = false;
bool use_pass_flow = false;
};
Denoiser denoiser_;
public:
OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
~OptiXDevice();
private:
BVHLayoutMask get_bvh_layout_mask() const override;
string compile_kernel_get_common_cflags(const uint kernel_features);
string compile_kernel_get_common_cflags(const uint kernel_features) override;
bool load_kernels(const uint kernel_features) override;
bool load_osl_kernels() override;
bool build_optix_bvh(BVHOptiX *bvh,
OptixBuildOperation operation,
const OptixBuildInput &build_input,
@@ -112,7 +123,52 @@ class OptiXDevice : public CUDADevice {
virtual unique_ptr<DeviceQueue> gpu_queue_create() override;
void *get_cpu_osl_memory() override;
/* --------------------------------------------------------------------
* Denoising.
*/
class DenoiseContext;
class DenoisePass;
virtual bool denoise_buffer(const DeviceDenoiseTask &task) override;
virtual DeviceQueue *get_denoise_queue() override;
/* Read guiding passes from the render buffers, preprocess them in a way which is expected by
* OptiX and store in the guiding passes memory within the given context.
*
* Pre=-processing of the guiding passes is to only happen once per context lifetime. DO not
* preprocess them for every pass which is being denoised. */
bool denoise_filter_guiding_preprocess(DenoiseContext &context);
/* Set fake albedo pixels in the albedo guiding pass storage.
* After this point only passes which do not need albedo for denoising can be processed. */
bool denoise_filter_guiding_set_fake_albedo(DenoiseContext &context);
void denoise_pass(DenoiseContext &context, PassType pass_type);
/* Read input color pass from the render buffer into the memory which corresponds to the noisy
* input within the given context. Pixels are scaled to the number of samples, but are not
* preprocessed yet. */
void denoise_color_read(DenoiseContext &context, const DenoisePass &pass);
/* Run corresponding filter kernels, preparing data for the denoiser or copying data from the
* denoiser result to the render buffer. */
bool denoise_filter_color_preprocess(DenoiseContext &context, const DenoisePass &pass);
bool denoise_filter_color_postprocess(DenoiseContext &context, const DenoisePass &pass);
/* Make sure the OptiX denoiser is created and configured. */
bool denoise_ensure(DenoiseContext &context);
/* Create OptiX denoiser descriptor if needed.
* Will do nothing if the current OptiX descriptor is usable for the given parameters.
* If the OptiX denoiser descriptor did re-allocate here it is left unconfigured. */
bool denoise_create_if_needed(DenoiseContext &context);
/* Configure existing OptiX denoiser descriptor for the use for the given task. */
bool denoise_configure_if_needed(DenoiseContext &context);
/* Run configured denoiser. */
bool denoise_run(DenoiseContext &context, const DenoisePass &pass);
};
CCL_NAMESPACE_END

View File

@@ -24,33 +24,21 @@ void OptiXDeviceQueue::init_execution()
CUDADeviceQueue::init_execution();
}
static bool is_optix_specific_kernel(DeviceKernel kernel, bool use_osl)
static bool is_optix_specific_kernel(DeviceKernel kernel)
{
# ifdef WITH_OSL
/* OSL uses direct callables to execute, so shading needs to be done in OptiX if OSL is used. */
if (use_osl && device_kernel_has_shading(kernel)) {
return true;
}
# else
(void)use_osl;
# endif
return device_kernel_has_intersection(kernel);
return (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE ||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
}
bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
const int work_size,
DeviceKernelArguments const &args)
{
OptiXDevice *const optix_device = static_cast<OptiXDevice *>(cuda_device_);
# ifdef WITH_OSL
const bool use_osl = static_cast<OSLGlobals *>(optix_device->get_cpu_osl_memory())->use;
# else
const bool use_osl = false;
# endif
if (!is_optix_specific_kernel(kernel, use_osl)) {
if (!is_optix_specific_kernel(kernel)) {
return CUDADeviceQueue::enqueue(kernel, work_size, args);
}
@@ -62,6 +50,8 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
const CUDAContextScope scope(cuda_device_);
OptiXDevice *const optix_device = static_cast<OptiXDevice *>(cuda_device_);
const device_ptr sbt_data_ptr = optix_device->sbt_data.device_pointer;
const device_ptr launch_params_ptr = optix_device->launch_params.device_pointer;
@@ -72,7 +62,9 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
sizeof(device_ptr),
cuda_stream_));
if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST || device_kernel_has_shading(kernel)) {
if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE) {
cuda_device_assert(
cuda_device_,
cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer),
@@ -80,15 +72,6 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
sizeof(device_ptr),
cuda_stream_));
}
if (kernel == DEVICE_KERNEL_SHADER_EVAL_DISPLACE ||
kernel == DEVICE_KERNEL_SHADER_EVAL_BACKGROUND ||
kernel == DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY) {
cuda_device_assert(cuda_device_,
cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, offset),
args.values[2], // &d_offset
sizeof(int32_t),
cuda_stream_));
}
cuda_device_assert(cuda_device_, cuStreamSynchronize(cuda_stream_));
@@ -96,35 +79,14 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
OptixShaderBindingTable sbt_params = {};
switch (kernel) {
case DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND:
pipeline = optix_device->pipelines[PIP_SHADE];
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_BACKGROUND * sizeof(SbtRecord);
break;
case DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT:
pipeline = optix_device->pipelines[PIP_SHADE];
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_LIGHT * sizeof(SbtRecord);
break;
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE:
pipeline = optix_device->pipelines[PIP_SHADE];
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE * sizeof(SbtRecord);
break;
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
pipeline = optix_device->pipelines[PIP_SHADE];
pipeline = optix_device->pipelines[PIP_SHADE_RAYTRACE];
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_RAYTRACE * sizeof(SbtRecord);
break;
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE:
pipeline = optix_device->pipelines[PIP_SHADE];
pipeline = optix_device->pipelines[PIP_SHADE_MNEE];
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_MNEE * sizeof(SbtRecord);
break;
case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME:
pipeline = optix_device->pipelines[PIP_SHADE];
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_VOLUME * sizeof(SbtRecord);
break;
case DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW:
pipeline = optix_device->pipelines[PIP_SHADE];
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SHADOW * sizeof(SbtRecord);
break;
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
pipeline = optix_device->pipelines[PIP_INTERSECT];
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_CLOSEST * sizeof(SbtRecord);
@@ -142,20 +104,6 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_VOLUME_STACK * sizeof(SbtRecord);
break;
case DEVICE_KERNEL_SHADER_EVAL_DISPLACE:
pipeline = optix_device->pipelines[PIP_SHADE];
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_EVAL_DISPLACE * sizeof(SbtRecord);
break;
case DEVICE_KERNEL_SHADER_EVAL_BACKGROUND:
pipeline = optix_device->pipelines[PIP_SHADE];
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_EVAL_BACKGROUND * sizeof(SbtRecord);
break;
case DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY:
pipeline = optix_device->pipelines[PIP_SHADE];
sbt_params.raygenRecord = sbt_data_ptr +
PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY * sizeof(SbtRecord);
break;
default:
LOG(ERROR) << "Invalid kernel " << device_kernel_as_string(kernel)
<< " is attempted to be enqueued.";
@@ -164,7 +112,7 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
sbt_params.missRecordBase = sbt_data_ptr + MISS_PROGRAM_GROUP_OFFSET * sizeof(SbtRecord);
sbt_params.missRecordStrideInBytes = sizeof(SbtRecord);
sbt_params.missRecordCount = NUM_MISS_PROGRAM_GROUPS;
sbt_params.missRecordCount = NUM_MIS_PROGRAM_GROUPS;
sbt_params.hitgroupRecordBase = sbt_data_ptr + HIT_PROGAM_GROUP_OFFSET * sizeof(SbtRecord);
sbt_params.hitgroupRecordStrideInBytes = sizeof(SbtRecord);
sbt_params.hitgroupRecordCount = NUM_HIT_PROGRAM_GROUPS;
@@ -172,12 +120,6 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
sbt_params.callablesRecordCount = NUM_CALLABLE_PROGRAM_GROUPS;
sbt_params.callablesRecordStrideInBytes = sizeof(SbtRecord);
# ifdef WITH_OSL
if (use_osl) {
sbt_params.callablesRecordCount += static_cast<unsigned int>(optix_device->osl_groups.size());
}
# endif
/* Launch the ray generation program. */
optix_device_assert(optix_device,
optixLaunch(pipeline,

View File

@@ -66,9 +66,7 @@ struct SocketType {
LINK_NORMAL = (1 << 8),
LINK_POSITION = (1 << 9),
LINK_TANGENT = (1 << 10),
LINK_OSL_INITIALIZER = (1 << 11),
DEFAULT_LINK_MASK = (1 << 4) | (1 << 5) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) |
(1 << 10) | (1 << 11)
DEFAULT_LINK_MASK = (1 << 4) | (1 << 5) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) | (1 << 10)
};
ustring name;

View File

@@ -8,7 +8,7 @@ set(INC
set(SRC
adaptive_sampling.cpp
denoiser.cpp
denoiser_gpu.cpp
denoiser_device.cpp
denoiser_oidn.cpp
denoiser_optix.cpp
path_trace.cpp
@@ -30,7 +30,7 @@ set(SRC
set(SRC_HEADERS
adaptive_sampling.h
denoiser.h
denoiser_gpu.h
denoiser_device.h
denoiser_oidn.h
denoiser_optix.h
path_trace.h

View File

@@ -16,11 +16,9 @@ unique_ptr<Denoiser> Denoiser::create(Device *path_trace_device, const DenoisePa
{
DCHECK(params.use);
#ifdef WITH_OPTIX
if (params.type == DENOISER_OPTIX && Device::available_devices(DEVICE_MASK_OPTIX).size()) {
return make_unique<OptiXDenoiser>(path_trace_device, params);
}
#endif
/* Always fallback to OIDN. */
DenoiseParams oidn_params = params;

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#include "integrator/denoiser_gpu.h"
#include "integrator/denoiser_device.h"
#include "device/denoise.h"
#include "device/device.h"
@@ -13,27 +13,27 @@
CCL_NAMESPACE_BEGIN
DenoiserGPU::DenoiserGPU(Device *path_trace_device, const DenoiseParams &params)
DeviceDenoiser::DeviceDenoiser(Device *path_trace_device, const DenoiseParams &params)
: Denoiser(path_trace_device, params)
{
}
DenoiserGPU::~DenoiserGPU()
DeviceDenoiser::~DeviceDenoiser()
{
/* Explicit implementation, to allow forward declaration of Device in the header. */
}
bool DenoiserGPU::denoise_buffer(const BufferParams &buffer_params,
RenderBuffers *render_buffers,
const int num_samples,
bool allow_inplace_modification)
bool DeviceDenoiser::denoise_buffer(const BufferParams &buffer_params,
RenderBuffers *render_buffers,
const int num_samples,
bool allow_inplace_modification)
{
Device *denoiser_device = get_denoiser_device();
if (!denoiser_device) {
return false;
}
DenoiseTask task;
DeviceDenoiseTask task;
task.params = params_;
task.num_samples = num_samples;
task.buffer_params = buffer_params;
@@ -50,6 +50,8 @@ bool DenoiserGPU::denoise_buffer(const BufferParams &buffer_params,
else {
VLOG_WORK << "Creating temporary buffer on denoiser device.";
DeviceQueue *queue = denoiser_device->get_denoise_queue();
/* Create buffer which is available by the device used by denoiser. */
/* TODO(sergey): Optimize data transfers. For example, only copy denoising related passes,
@@ -68,13 +70,13 @@ bool DenoiserGPU::denoise_buffer(const BufferParams &buffer_params,
render_buffers->buffer.data(),
sizeof(float) * local_render_buffers.buffer.size());
denoiser_queue_->copy_to_device(local_render_buffers.buffer);
queue->copy_to_device(local_render_buffers.buffer);
task.render_buffers = &local_render_buffers;
task.allow_inplace_modification = true;
}
const bool denoise_result = denoise_buffer(task);
const bool denoise_result = denoiser_device->denoise_buffer(task);
if (local_buffer_used) {
local_render_buffers.copy_from_device();
@@ -88,21 +90,4 @@ bool DenoiserGPU::denoise_buffer(const BufferParams &buffer_params,
return denoise_result;
}
Device *DenoiserGPU::ensure_denoiser_device(Progress *progress)
{
Device *denoiser_device = Denoiser::ensure_denoiser_device(progress);
if (!denoiser_device) {
return nullptr;
}
if (!denoiser_queue_) {
denoiser_queue_ = denoiser_device->gpu_queue_create();
if (!denoiser_queue_) {
return nullptr;
}
}
return denoiser_device;
}
CCL_NAMESPACE_END

View File

@@ -0,0 +1,27 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#pragma once
#include "integrator/denoiser.h"
#include "util/unique_ptr.h"
CCL_NAMESPACE_BEGIN
/* Denoiser which uses device-specific denoising implementation, such as OptiX denoiser which are
* implemented as a part of a driver of specific device.
*
* This implementation makes sure the to-be-denoised buffer is available on the denoising device
* and invoke denoising kernel via device API. */
class DeviceDenoiser : public Denoiser {
public:
DeviceDenoiser(Device *path_trace_device, const DenoiseParams &params);
~DeviceDenoiser();
virtual bool denoise_buffer(const BufferParams &buffer_params,
RenderBuffers *render_buffers,
const int num_samples,
bool allow_inplace_modification) override;
};
CCL_NAMESPACE_END

View File

@@ -1,52 +0,0 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#pragma once
#include "integrator/denoiser.h"
CCL_NAMESPACE_BEGIN
/* Implementation of Denoiser which uses a device-specific denoising implementation, running on a
* GPU device queue. It makes sure the to-be-denoised buffer is available on the denoising device
* and invokes denoising kernels via the device queue API. */
class DenoiserGPU : public Denoiser {
public:
DenoiserGPU(Device *path_trace_device, const DenoiseParams &params);
~DenoiserGPU();
virtual bool denoise_buffer(const BufferParams &buffer_params,
RenderBuffers *render_buffers,
const int num_samples,
bool allow_inplace_modification) override;
protected:
/* All the parameters needed to perform buffer denoising on a device.
* Is not really a task in its canonical terms (as in, is not an asynchronous running task). Is
* more like a wrapper for all the arguments and parameters needed to perform denoising. Is a
* single place where they are all listed, so that it's not required to modify all device methods
* when these parameters do change. */
class DenoiseTask {
public:
DenoiseParams params;
int num_samples;
RenderBuffers *render_buffers;
BufferParams buffer_params;
/* Allow to do in-place modification of the input passes (scaling them down i.e.). This will
* lower the memory footprint of the denoiser but will make input passes "invalid" (from path
* tracer) point of view. */
bool allow_inplace_modification;
};
/* Returns true if task is fully handled. */
virtual bool denoise_buffer(const DenoiseTask & /*task*/) = 0;
virtual Device *ensure_denoiser_device(Progress *progress) override;
unique_ptr<DeviceQueue> denoiser_queue_;
};
CCL_NAMESPACE_END

View File

@@ -1,786 +1,21 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#ifdef WITH_OPTIX
#include "integrator/denoiser_optix.h"
# include "integrator/denoiser_optix.h"
# include "integrator/pass_accessor_gpu.h"
# include "device/optix/device_impl.h"
# include "device/optix/queue.h"
# include <optix_denoiser_tiling.h>
#include "device/denoise.h"
#include "device/device.h"
CCL_NAMESPACE_BEGIN
# if OPTIX_ABI_VERSION >= 60
using ::optixUtilDenoiserInvokeTiled;
# else
// A minimal copy of functionality `optix_denoiser_tiling.h` which allows to fix integer overflow
// issues without bumping SDK or driver requirement.
//
// The original code is Copyright NVIDIA Corporation, BSD-3-Clause.
static OptixResult optixUtilDenoiserSplitImage(const OptixImage2D &input,
const OptixImage2D &output,
unsigned int overlapWindowSizeInPixels,
unsigned int tileWidth,
unsigned int tileHeight,
std::vector<OptixUtilDenoiserImageTile> &tiles)
{
if (tileWidth == 0 || tileHeight == 0)
return OPTIX_ERROR_INVALID_VALUE;
unsigned int inPixelStride = optixUtilGetPixelStride(input);
unsigned int outPixelStride = optixUtilGetPixelStride(output);
int inp_w = std::min(tileWidth + 2 * overlapWindowSizeInPixels, input.width);
int inp_h = std::min(tileHeight + 2 * overlapWindowSizeInPixels, input.height);
int inp_y = 0, copied_y = 0;
do {
int inputOffsetY = inp_y == 0 ? 0 :
std::max((int)overlapWindowSizeInPixels,
inp_h - ((int)input.height - inp_y));
int copy_y = inp_y == 0 ? std::min(input.height, tileHeight + overlapWindowSizeInPixels) :
std::min(tileHeight, input.height - copied_y);
int inp_x = 0, copied_x = 0;
do {
int inputOffsetX = inp_x == 0 ? 0 :
std::max((int)overlapWindowSizeInPixels,
inp_w - ((int)input.width - inp_x));
int copy_x = inp_x == 0 ? std::min(input.width, tileWidth + overlapWindowSizeInPixels) :
std::min(tileWidth, input.width - copied_x);
OptixUtilDenoiserImageTile tile;
tile.input.data = input.data + (size_t)(inp_y - inputOffsetY) * input.rowStrideInBytes +
+(size_t)(inp_x - inputOffsetX) * inPixelStride;
tile.input.width = inp_w;
tile.input.height = inp_h;
tile.input.rowStrideInBytes = input.rowStrideInBytes;
tile.input.pixelStrideInBytes = input.pixelStrideInBytes;
tile.input.format = input.format;
tile.output.data = output.data + (size_t)inp_y * output.rowStrideInBytes +
(size_t)inp_x * outPixelStride;
tile.output.width = copy_x;
tile.output.height = copy_y;
tile.output.rowStrideInBytes = output.rowStrideInBytes;
tile.output.pixelStrideInBytes = output.pixelStrideInBytes;
tile.output.format = output.format;
tile.inputOffsetX = inputOffsetX;
tile.inputOffsetY = inputOffsetY;
tiles.push_back(tile);
inp_x += inp_x == 0 ? tileWidth + overlapWindowSizeInPixels : tileWidth;
copied_x += copy_x;
} while (inp_x < static_cast<int>(input.width));
inp_y += inp_y == 0 ? tileHeight + overlapWindowSizeInPixels : tileHeight;
copied_y += copy_y;
} while (inp_y < static_cast<int>(input.height));
return OPTIX_SUCCESS;
}
static OptixResult optixUtilDenoiserInvokeTiled(OptixDenoiser denoiser,
CUstream stream,
const OptixDenoiserParams *params,
CUdeviceptr denoiserState,
size_t denoiserStateSizeInBytes,
const OptixDenoiserGuideLayer *guideLayer,
const OptixDenoiserLayer *layers,
unsigned int numLayers,
CUdeviceptr scratch,
size_t scratchSizeInBytes,
unsigned int overlapWindowSizeInPixels,
unsigned int tileWidth,
unsigned int tileHeight)
{
if (!guideLayer || !layers)
return OPTIX_ERROR_INVALID_VALUE;
std::vector<std::vector<OptixUtilDenoiserImageTile>> tiles(numLayers);
std::vector<std::vector<OptixUtilDenoiserImageTile>> prevTiles(numLayers);
for (unsigned int l = 0; l < numLayers; l++) {
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(layers[l].input,
layers[l].output,
overlapWindowSizeInPixels,
tileWidth,
tileHeight,
tiles[l]))
return res;
if (layers[l].previousOutput.data) {
OptixImage2D dummyOutput = layers[l].previousOutput;
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(layers[l].previousOutput,
dummyOutput,
overlapWindowSizeInPixels,
tileWidth,
tileHeight,
prevTiles[l]))
return res;
}
}
std::vector<OptixUtilDenoiserImageTile> albedoTiles;
if (guideLayer->albedo.data) {
OptixImage2D dummyOutput = guideLayer->albedo;
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->albedo,
dummyOutput,
overlapWindowSizeInPixels,
tileWidth,
tileHeight,
albedoTiles))
return res;
}
std::vector<OptixUtilDenoiserImageTile> normalTiles;
if (guideLayer->normal.data) {
OptixImage2D dummyOutput = guideLayer->normal;
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->normal,
dummyOutput,
overlapWindowSizeInPixels,
tileWidth,
tileHeight,
normalTiles))
return res;
}
std::vector<OptixUtilDenoiserImageTile> flowTiles;
if (guideLayer->flow.data) {
OptixImage2D dummyOutput = guideLayer->flow;
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->flow,
dummyOutput,
overlapWindowSizeInPixels,
tileWidth,
tileHeight,
flowTiles))
return res;
}
for (size_t t = 0; t < tiles[0].size(); t++) {
std::vector<OptixDenoiserLayer> tlayers;
for (unsigned int l = 0; l < numLayers; l++) {
OptixDenoiserLayer layer = {};
layer.input = (tiles[l])[t].input;
layer.output = (tiles[l])[t].output;
if (layers[l].previousOutput.data)
layer.previousOutput = (prevTiles[l])[t].input;
tlayers.push_back(layer);
}
OptixDenoiserGuideLayer gl = {};
if (guideLayer->albedo.data)
gl.albedo = albedoTiles[t].input;
if (guideLayer->normal.data)
gl.normal = normalTiles[t].input;
if (guideLayer->flow.data)
gl.flow = flowTiles[t].input;
if (const OptixResult res = optixDenoiserInvoke(denoiser,
stream,
params,
denoiserState,
denoiserStateSizeInBytes,
&gl,
&tlayers[0],
numLayers,
(tiles[0])[t].inputOffsetX,
(tiles[0])[t].inputOffsetY,
scratch,
scratchSizeInBytes))
return res;
}
return OPTIX_SUCCESS;
}
# endif
OptiXDenoiser::OptiXDenoiser(Device *path_trace_device, const DenoiseParams &params)
: DenoiserGPU(path_trace_device, params), state_(path_trace_device, "__denoiser_state", true)
: DeviceDenoiser(path_trace_device, params)
{
}
OptiXDenoiser::~OptiXDenoiser()
{
/* It is important that the OptixDenoiser handle is destroyed before the OptixDeviceContext
* handle, which is guaranteed since the local denoising device owning the OptiX device context
* is deleted as part of the Denoiser class destructor call after this. */
if (optix_denoiser_ != nullptr) {
optixDenoiserDestroy(optix_denoiser_);
}
}
uint OptiXDenoiser::get_device_type_mask() const
{
return DEVICE_MASK_OPTIX;
}
class OptiXDenoiser::DenoiseContext {
public:
explicit DenoiseContext(OptiXDevice *device, const DenoiseTask &task)
: denoise_params(task.params),
render_buffers(task.render_buffers),
buffer_params(task.buffer_params),
guiding_buffer(device, "denoiser guiding passes buffer", true),
num_samples(task.num_samples)
{
num_input_passes = 1;
if (denoise_params.use_pass_albedo) {
num_input_passes += 1;
use_pass_albedo = true;
pass_denoising_albedo = buffer_params.get_pass_offset(PASS_DENOISING_ALBEDO);
if (denoise_params.use_pass_normal) {
num_input_passes += 1;
use_pass_normal = true;
pass_denoising_normal = buffer_params.get_pass_offset(PASS_DENOISING_NORMAL);
}
}
if (denoise_params.temporally_stable) {
prev_output.device_pointer = render_buffers->buffer.device_pointer;
prev_output.offset = buffer_params.get_pass_offset(PASS_DENOISING_PREVIOUS);
prev_output.stride = buffer_params.stride;
prev_output.pass_stride = buffer_params.pass_stride;
num_input_passes += 1;
use_pass_motion = true;
pass_motion = buffer_params.get_pass_offset(PASS_MOTION);
}
use_guiding_passes = (num_input_passes - 1) > 0;
if (use_guiding_passes) {
if (task.allow_inplace_modification) {
guiding_params.device_pointer = render_buffers->buffer.device_pointer;
guiding_params.pass_albedo = pass_denoising_albedo;
guiding_params.pass_normal = pass_denoising_normal;
guiding_params.pass_flow = pass_motion;
guiding_params.stride = buffer_params.stride;
guiding_params.pass_stride = buffer_params.pass_stride;
}
else {
guiding_params.pass_stride = 0;
if (use_pass_albedo) {
guiding_params.pass_albedo = guiding_params.pass_stride;
guiding_params.pass_stride += 3;
}
if (use_pass_normal) {
guiding_params.pass_normal = guiding_params.pass_stride;
guiding_params.pass_stride += 3;
}
if (use_pass_motion) {
guiding_params.pass_flow = guiding_params.pass_stride;
guiding_params.pass_stride += 2;
}
guiding_params.stride = buffer_params.width;
guiding_buffer.alloc_to_device(buffer_params.width * buffer_params.height *
guiding_params.pass_stride);
guiding_params.device_pointer = guiding_buffer.device_pointer;
}
}
pass_sample_count = buffer_params.get_pass_offset(PASS_SAMPLE_COUNT);
}
const DenoiseParams &denoise_params;
RenderBuffers *render_buffers = nullptr;
const BufferParams &buffer_params;
/* Previous output. */
struct {
device_ptr device_pointer = 0;
int offset = PASS_UNUSED;
int stride = -1;
int pass_stride = -1;
} prev_output;
/* Device-side storage of the guiding passes. */
device_only_memory<float> guiding_buffer;
struct {
device_ptr device_pointer = 0;
/* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
int pass_albedo = PASS_UNUSED;
int pass_normal = PASS_UNUSED;
int pass_flow = PASS_UNUSED;
int stride = -1;
int pass_stride = -1;
} guiding_params;
/* Number of input passes. Including the color and extra auxiliary passes. */
int num_input_passes = 0;
bool use_guiding_passes = false;
bool use_pass_albedo = false;
bool use_pass_normal = false;
bool use_pass_motion = false;
int num_samples = 0;
int pass_sample_count = PASS_UNUSED;
/* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
int pass_denoising_albedo = PASS_UNUSED;
int pass_denoising_normal = PASS_UNUSED;
int pass_motion = PASS_UNUSED;
/* For passes which don't need albedo channel for denoising we replace the actual albedo with
* the (0.5, 0.5, 0.5). This flag indicates that the real albedo pass has been replaced with
* the fake values and denoising of passes which do need albedo can no longer happen. */
bool albedo_replaced_with_fake = false;
};
class OptiXDenoiser::DenoisePass {
public:
DenoisePass(const PassType type, const BufferParams &buffer_params) : type(type)
{
noisy_offset = buffer_params.get_pass_offset(type, PassMode::NOISY);
denoised_offset = buffer_params.get_pass_offset(type, PassMode::DENOISED);
const PassInfo pass_info = Pass::get_info(type);
num_components = pass_info.num_components;
use_compositing = pass_info.use_compositing;
use_denoising_albedo = pass_info.use_denoising_albedo;
}
PassType type;
int noisy_offset;
int denoised_offset;
int num_components;
bool use_compositing;
bool use_denoising_albedo;
};
bool OptiXDenoiser::denoise_buffer(const DenoiseTask &task)
{
OptiXDevice *const optix_device = static_cast<OptiXDevice *>(denoiser_device_);
const CUDAContextScope scope(optix_device);
DenoiseContext context(optix_device, task);
if (!denoise_ensure(context)) {
return false;
}
if (!denoise_filter_guiding_preprocess(context)) {
LOG(ERROR) << "Error preprocessing guiding passes.";
return false;
}
/* Passes which will use real albedo when it is available. */
denoise_pass(context, PASS_COMBINED);
denoise_pass(context, PASS_SHADOW_CATCHER_MATTE);
/* Passes which do not need albedo and hence if real is present it needs to become fake. */
denoise_pass(context, PASS_SHADOW_CATCHER);
return true;
}
bool OptiXDenoiser::denoise_filter_guiding_preprocess(const DenoiseContext &context)
{
const BufferParams &buffer_params = context.buffer_params;
const int work_size = buffer_params.width * buffer_params.height;
DeviceKernelArguments args(&context.guiding_params.device_pointer,
&context.guiding_params.pass_stride,
&context.guiding_params.pass_albedo,
&context.guiding_params.pass_normal,
&context.guiding_params.pass_flow,
&context.render_buffers->buffer.device_pointer,
&buffer_params.offset,
&buffer_params.stride,
&buffer_params.pass_stride,
&context.pass_sample_count,
&context.pass_denoising_albedo,
&context.pass_denoising_normal,
&context.pass_motion,
&buffer_params.full_x,
&buffer_params.full_y,
&buffer_params.width,
&buffer_params.height,
&context.num_samples);
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS, work_size, args);
}
bool OptiXDenoiser::denoise_filter_guiding_set_fake_albedo(const DenoiseContext &context)
{
const BufferParams &buffer_params = context.buffer_params;
const int work_size = buffer_params.width * buffer_params.height;
DeviceKernelArguments args(&context.guiding_params.device_pointer,
&context.guiding_params.pass_stride,
&context.guiding_params.pass_albedo,
&buffer_params.width,
&buffer_params.height);
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO, work_size, args);
}
void OptiXDenoiser::denoise_pass(DenoiseContext &context, PassType pass_type)
{
const BufferParams &buffer_params = context.buffer_params;
const DenoisePass pass(pass_type, buffer_params);
if (pass.noisy_offset == PASS_UNUSED) {
return;
}
if (pass.denoised_offset == PASS_UNUSED) {
LOG(DFATAL) << "Missing denoised pass " << pass_type_as_string(pass_type);
return;
}
if (pass.use_denoising_albedo) {
if (context.albedo_replaced_with_fake) {
LOG(ERROR) << "Pass which requires albedo is denoised after fake albedo has been set.";
return;
}
}
else if (context.use_guiding_passes && !context.albedo_replaced_with_fake) {
context.albedo_replaced_with_fake = true;
if (!denoise_filter_guiding_set_fake_albedo(context)) {
LOG(ERROR) << "Error replacing real albedo with the fake one.";
return;
}
}
/* Read and preprocess noisy color input pass. */
denoise_color_read(context, pass);
if (!denoise_filter_color_preprocess(context, pass)) {
LOG(ERROR) << "Error converting denoising passes to RGB buffer.";
return;
}
if (!denoise_run(context, pass)) {
LOG(ERROR) << "Error running OptiX denoiser.";
return;
}
/* Store result in the combined pass of the render buffer.
*
* This will scale the denoiser result up to match the number of, possibly per-pixel, samples. */
if (!denoise_filter_color_postprocess(context, pass)) {
LOG(ERROR) << "Error copying denoiser result to the denoised pass.";
return;
}
denoiser_queue_->synchronize();
}
void OptiXDenoiser::denoise_color_read(const DenoiseContext &context, const DenoisePass &pass)
{
PassAccessor::PassAccessInfo pass_access_info;
pass_access_info.type = pass.type;
pass_access_info.mode = PassMode::NOISY;
pass_access_info.offset = pass.noisy_offset;
/* Denoiser operates on passes which are used to calculate the approximation, and is never used
* on the approximation. The latter is not even possible because OptiX does not support
* denoising of semi-transparent pixels. */
pass_access_info.use_approximate_shadow_catcher = false;
pass_access_info.use_approximate_shadow_catcher_background = false;
pass_access_info.show_active_pixels = false;
/* TODO(sergey): Consider adding support of actual exposure, to avoid clamping in extreme cases.
*/
const PassAccessorGPU pass_accessor(
denoiser_queue_.get(), pass_access_info, 1.0f, context.num_samples);
PassAccessor::Destination destination(pass_access_info.type);
destination.d_pixels = context.render_buffers->buffer.device_pointer +
pass.denoised_offset * sizeof(float);
destination.num_components = 3;
destination.pixel_stride = context.buffer_params.pass_stride;
BufferParams buffer_params = context.buffer_params;
buffer_params.window_x = 0;
buffer_params.window_y = 0;
buffer_params.window_width = buffer_params.width;
buffer_params.window_height = buffer_params.height;
pass_accessor.get_render_tile_pixels(context.render_buffers, buffer_params, destination);
}
bool OptiXDenoiser::denoise_filter_color_preprocess(const DenoiseContext &context,
const DenoisePass &pass)
{
const BufferParams &buffer_params = context.buffer_params;
const int work_size = buffer_params.width * buffer_params.height;
DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
&buffer_params.full_x,
&buffer_params.full_y,
&buffer_params.width,
&buffer_params.height,
&buffer_params.offset,
&buffer_params.stride,
&buffer_params.pass_stride,
&pass.denoised_offset);
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_COLOR_PREPROCESS, work_size, args);
}
bool OptiXDenoiser::denoise_filter_color_postprocess(const DenoiseContext &context,
const DenoisePass &pass)
{
const BufferParams &buffer_params = context.buffer_params;
const int work_size = buffer_params.width * buffer_params.height;
DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
&buffer_params.full_x,
&buffer_params.full_y,
&buffer_params.width,
&buffer_params.height,
&buffer_params.offset,
&buffer_params.stride,
&buffer_params.pass_stride,
&context.num_samples,
&pass.noisy_offset,
&pass.denoised_offset,
&context.pass_sample_count,
&pass.num_components,
&pass.use_compositing);
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS, work_size, args);
}
bool OptiXDenoiser::denoise_ensure(DenoiseContext &context)
{
if (!denoise_create_if_needed(context)) {
LOG(ERROR) << "OptiX denoiser creation has failed.";
return false;
}
if (!denoise_configure_if_needed(context)) {
LOG(ERROR) << "OptiX denoiser configuration has failed.";
return false;
}
return true;
}
bool OptiXDenoiser::denoise_create_if_needed(DenoiseContext &context)
{
const bool recreate_denoiser = (optix_denoiser_ == nullptr) ||
(use_pass_albedo_ != context.use_pass_albedo) ||
(use_pass_normal_ != context.use_pass_normal) ||
(use_pass_motion_ != context.use_pass_motion);
if (!recreate_denoiser) {
return true;
}
/* Destroy existing handle before creating new one. */
if (optix_denoiser_) {
optixDenoiserDestroy(optix_denoiser_);
}
/* Create OptiX denoiser handle on demand when it is first used. */
OptixDenoiserOptions denoiser_options = {};
denoiser_options.guideAlbedo = context.use_pass_albedo;
denoiser_options.guideNormal = context.use_pass_normal;
OptixDenoiserModelKind model = OPTIX_DENOISER_MODEL_KIND_HDR;
if (context.use_pass_motion) {
model = OPTIX_DENOISER_MODEL_KIND_TEMPORAL;
}
const OptixResult result = optixDenoiserCreate(
static_cast<OptiXDevice *>(denoiser_device_)->context,
model,
&denoiser_options,
&optix_denoiser_);
if (result != OPTIX_SUCCESS) {
denoiser_device_->set_error("Failed to create OptiX denoiser");
return false;
}
/* OptiX denoiser handle was created with the requested number of input passes. */
use_pass_albedo_ = context.use_pass_albedo;
use_pass_normal_ = context.use_pass_normal;
use_pass_motion_ = context.use_pass_motion;
/* OptiX denoiser has been created, but it needs configuration. */
is_configured_ = false;
return true;
}
bool OptiXDenoiser::denoise_configure_if_needed(DenoiseContext &context)
{
/* Limit maximum tile size denoiser can be invoked with. */
const int2 tile_size = make_int2(min(context.buffer_params.width, 4096),
min(context.buffer_params.height, 4096));
if (is_configured_ && (configured_size_.x == tile_size.x && configured_size_.y == tile_size.y)) {
return true;
}
optix_device_assert(
denoiser_device_,
optixDenoiserComputeMemoryResources(optix_denoiser_, tile_size.x, tile_size.y, &sizes_));
/* Allocate denoiser state if tile size has changed since last setup. */
state_.device = denoiser_device_;
state_.alloc_to_device(sizes_.stateSizeInBytes + sizes_.withOverlapScratchSizeInBytes);
/* Initialize denoiser state for the current tile size. */
const OptixResult result = optixDenoiserSetup(
optix_denoiser_,
0, /* Work around bug in r495 drivers that causes artifacts when denoiser setup is called
* on a stream that is not the default stream. */
tile_size.x + sizes_.overlapWindowSizeInPixels * 2,
tile_size.y + sizes_.overlapWindowSizeInPixels * 2,
state_.device_pointer,
sizes_.stateSizeInBytes,
state_.device_pointer + sizes_.stateSizeInBytes,
sizes_.withOverlapScratchSizeInBytes);
if (result != OPTIX_SUCCESS) {
denoiser_device_->set_error("Failed to set up OptiX denoiser");
return false;
}
cuda_device_assert(denoiser_device_, cuCtxSynchronize());
is_configured_ = true;
configured_size_ = tile_size;
return true;
}
bool OptiXDenoiser::denoise_run(const DenoiseContext &context, const DenoisePass &pass)
{
const BufferParams &buffer_params = context.buffer_params;
const int width = buffer_params.width;
const int height = buffer_params.height;
/* Set up input and output layer information. */
OptixImage2D color_layer = {0};
OptixImage2D albedo_layer = {0};
OptixImage2D normal_layer = {0};
OptixImage2D flow_layer = {0};
OptixImage2D output_layer = {0};
OptixImage2D prev_output_layer = {0};
/* Color pass. */
{
const int pass_denoised = pass.denoised_offset;
const int64_t pass_stride_in_bytes = context.buffer_params.pass_stride * sizeof(float);
color_layer.data = context.render_buffers->buffer.device_pointer +
pass_denoised * sizeof(float);
color_layer.width = width;
color_layer.height = height;
color_layer.rowStrideInBytes = pass_stride_in_bytes * context.buffer_params.stride;
color_layer.pixelStrideInBytes = pass_stride_in_bytes;
color_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
}
/* Previous output. */
if (context.prev_output.offset != PASS_UNUSED) {
const int64_t pass_stride_in_bytes = context.prev_output.pass_stride * sizeof(float);
prev_output_layer.data = context.prev_output.device_pointer +
context.prev_output.offset * sizeof(float);
prev_output_layer.width = width;
prev_output_layer.height = height;
prev_output_layer.rowStrideInBytes = pass_stride_in_bytes * context.prev_output.stride;
prev_output_layer.pixelStrideInBytes = pass_stride_in_bytes;
prev_output_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
}
/* Optional albedo and color passes. */
if (context.num_input_passes > 1) {
const device_ptr d_guiding_buffer = context.guiding_params.device_pointer;
const int64_t pixel_stride_in_bytes = context.guiding_params.pass_stride * sizeof(float);
const int64_t row_stride_in_bytes = context.guiding_params.stride * pixel_stride_in_bytes;
if (context.use_pass_albedo) {
albedo_layer.data = d_guiding_buffer + context.guiding_params.pass_albedo * sizeof(float);
albedo_layer.width = width;
albedo_layer.height = height;
albedo_layer.rowStrideInBytes = row_stride_in_bytes;
albedo_layer.pixelStrideInBytes = pixel_stride_in_bytes;
albedo_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
}
if (context.use_pass_normal) {
normal_layer.data = d_guiding_buffer + context.guiding_params.pass_normal * sizeof(float);
normal_layer.width = width;
normal_layer.height = height;
normal_layer.rowStrideInBytes = row_stride_in_bytes;
normal_layer.pixelStrideInBytes = pixel_stride_in_bytes;
normal_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
}
if (context.use_pass_motion) {
flow_layer.data = d_guiding_buffer + context.guiding_params.pass_flow * sizeof(float);
flow_layer.width = width;
flow_layer.height = height;
flow_layer.rowStrideInBytes = row_stride_in_bytes;
flow_layer.pixelStrideInBytes = pixel_stride_in_bytes;
flow_layer.format = OPTIX_PIXEL_FORMAT_FLOAT2;
}
}
/* Denoise in-place of the noisy input in the render buffers. */
output_layer = color_layer;
OptixDenoiserGuideLayer guide_layers = {};
guide_layers.albedo = albedo_layer;
guide_layers.normal = normal_layer;
guide_layers.flow = flow_layer;
OptixDenoiserLayer image_layers = {};
image_layers.input = color_layer;
image_layers.previousOutput = prev_output_layer;
image_layers.output = output_layer;
/* Finally run denoising. */
OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */
optix_device_assert(denoiser_device_,
ccl::optixUtilDenoiserInvokeTiled(
optix_denoiser_,
static_cast<OptiXDeviceQueue *>(denoiser_queue_.get())->stream(),
&params,
state_.device_pointer,
sizes_.stateSizeInBytes,
&guide_layers,
&image_layers,
1,
state_.device_pointer + sizes_.stateSizeInBytes,
sizes_.withOverlapScratchSizeInBytes,
sizes_.overlapWindowSizeInPixels,
configured_size_.x,
configured_size_.y));
return true;
}
CCL_NAMESPACE_END
#endif

View File

@@ -3,84 +3,16 @@
#pragma once
#ifdef WITH_OPTIX
# include "integrator/denoiser_gpu.h"
# include "device/optix/util.h"
#include "integrator/denoiser_device.h"
CCL_NAMESPACE_BEGIN
/* Implementation of denoising API which uses the OptiX denoiser. */
class OptiXDenoiser : public DenoiserGPU {
class OptiXDenoiser : public DeviceDenoiser {
public:
OptiXDenoiser(Device *path_trace_device, const DenoiseParams &params);
~OptiXDenoiser();
protected:
virtual uint get_device_type_mask() const override;
private:
class DenoiseContext;
class DenoisePass;
virtual bool denoise_buffer(const DenoiseTask &task) override;
/* Read guiding passes from the render buffers, preprocess them in a way which is expected by
* OptiX and store in the guiding passes memory within the given context.
*
* Pre-processing of the guiding passes is to only happen once per context lifetime. DO not
* preprocess them for every pass which is being denoised. */
bool denoise_filter_guiding_preprocess(const DenoiseContext &context);
/* Set fake albedo pixels in the albedo guiding pass storage.
* After this point only passes which do not need albedo for denoising can be processed. */
bool denoise_filter_guiding_set_fake_albedo(const DenoiseContext &context);
void denoise_pass(DenoiseContext &context, PassType pass_type);
/* Read input color pass from the render buffer into the memory which corresponds to the noisy
* input within the given context. Pixels are scaled to the number of samples, but are not
* preprocessed yet. */
void denoise_color_read(const DenoiseContext &context, const DenoisePass &pass);
/* Run corresponding filter kernels, preparing data for the denoiser or copying data from the
* denoiser result to the render buffer. */
bool denoise_filter_color_preprocess(const DenoiseContext &context, const DenoisePass &pass);
bool denoise_filter_color_postprocess(const DenoiseContext &context, const DenoisePass &pass);
/* Make sure the OptiX denoiser is created and configured. */
bool denoise_ensure(DenoiseContext &context);
/* Create OptiX denoiser descriptor if needed.
* Will do nothing if the current OptiX descriptor is usable for the given parameters.
* If the OptiX denoiser descriptor did re-allocate here it is left unconfigured. */
bool denoise_create_if_needed(DenoiseContext &context);
/* Configure existing OptiX denoiser descriptor for the use for the given task. */
bool denoise_configure_if_needed(DenoiseContext &context);
/* Run configured denoiser. */
bool denoise_run(const DenoiseContext &context, const DenoisePass &pass);
OptixDenoiser optix_denoiser_ = nullptr;
/* Configuration size, as provided to `optixDenoiserSetup`.
* If the `optixDenoiserSetup()` was never used on the current `optix_denoiser` the
* `is_configured` will be false. */
bool is_configured_ = false;
int2 configured_size_ = make_int2(0, 0);
/* OptiX denoiser state and scratch buffers, stored in a single memory buffer.
* The memory layout goes as following: [denoiser state][scratch buffer]. */
device_only_memory<unsigned char> state_;
OptixDenoiserSizes sizes_ = {};
bool use_pass_albedo_ = false;
bool use_pass_normal_ = false;
bool use_pass_motion_ = false;
};
CCL_NAMESPACE_END
#endif

View File

@@ -37,14 +37,6 @@ set(SRC_KERNEL_DEVICE_OPTIX
device/optix/kernel_shader_raytrace.cu
)
if(WITH_CYCLES_OSL AND (OSL_LIBRARY_VERSION_MINOR GREATER_EQUAL 13 OR OSL_LIBRARY_VERSION_MAJOR GREATER 1))
set(SRC_KERNEL_DEVICE_OPTIX
${SRC_KERNEL_DEVICE_OPTIX}
osl/services_optix.cu
device/optix/kernel_osl.cu
)
endif()
set(SRC_KERNEL_DEVICE_ONEAPI
device/oneapi/kernel.cpp
)
@@ -189,16 +181,6 @@ set(SRC_KERNEL_SVM_HEADERS
svm/vertex_color.h
)
if(WITH_CYCLES_OSL)
set(SRC_KERNEL_OSL_HEADERS
osl/osl.h
osl/closures_setup.h
osl/closures_template.h
osl/services_gpu.h
osl/types.h
)
endif()
set(SRC_KERNEL_GEOM_HEADERS
geom/geom.h
geom/attribute.h
@@ -285,17 +267,10 @@ set(SRC_KERNEL_INTEGRATOR_HEADERS
)
set(SRC_KERNEL_LIGHT_HEADERS
light/area.h
light/light.h
light/background.h
light/common.h
light/distant.h
light/distribution.h
light/light.h
light/point.h
light/sample.h
light/spot.h
light/tree.h
light/triangle.h
)
set(SRC_KERNEL_SAMPLE_HEADERS
@@ -331,7 +306,6 @@ set(SRC_KERNEL_HEADERS
${SRC_KERNEL_GEOM_HEADERS}
${SRC_KERNEL_INTEGRATOR_HEADERS}
${SRC_KERNEL_LIGHT_HEADERS}
${SRC_KERNEL_OSL_HEADERS}
${SRC_KERNEL_SAMPLE_HEADERS}
${SRC_KERNEL_SVM_HEADERS}
${SRC_KERNEL_TYPES_HEADERS}
@@ -473,29 +447,54 @@ if(WITH_CYCLES_CUDA_BINARIES)
if(WITH_CYCLES_DEBUG)
set(cuda_flags ${cuda_flags} -D WITH_CYCLES_DEBUG)
set(cuda_flags ${cuda_flags} --ptxas-options="-v")
endif()
set(_cuda_nvcc_args
-arch=${arch}
${CUDA_NVCC_FLAGS}
--${format}
${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
${cuda_flags})
if(WITH_CYCLES_CUBIN_COMPILER)
string(SUBSTRING ${arch} 3 -1 CUDA_ARCH)
# Needed to find libnvrtc-builtins.so. Can't do it from inside
# cycles_cubin_cc since the env variable is read before main()
if(APPLE)
set(CUBIN_CC_ENV ${CMAKE_COMMAND}
-E env DYLD_LIBRARY_PATH="${cuda_toolkit_root_dir}/lib")
elseif(UNIX)
set(CUBIN_CC_ENV ${CMAKE_COMMAND}
-E env LD_LIBRARY_PATH="${cuda_toolkit_root_dir}/lib64")
endif()
if(WITH_COMPILER_CCACHE AND CCACHE_PROGRAM)
add_custom_command(
OUTPUT ${cuda_file}
COMMAND ${CCACHE_PROGRAM} ${cuda_nvcc_executable} ${_cuda_nvcc_args}
DEPENDS ${kernel_sources})
COMMAND ${CUBIN_CC_ENV}
"$<TARGET_FILE:cycles_cubin_cc>"
-target ${CUDA_ARCH}
-i ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
${cuda_flags}
-v
-cuda-toolkit-dir "${cuda_toolkit_root_dir}"
DEPENDS ${kernel_sources} cycles_cubin_cc)
else()
add_custom_command(
OUTPUT ${cuda_file}
COMMAND ${cuda_nvcc_executable} ${_cuda_nvcc_args}
DEPENDS ${kernel_sources})
endif()
set(_cuda_nvcc_args
-arch=${arch}
${CUDA_NVCC_FLAGS}
--${format}
${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
--ptxas-options="-v"
${cuda_flags})
unset(_cuda_nvcc_args)
if(WITH_COMPILER_CCACHE AND CCACHE_PROGRAM)
add_custom_command(
OUTPUT ${cuda_file}
COMMAND ${CCACHE_PROGRAM} ${cuda_nvcc_executable} ${_cuda_nvcc_args}
DEPENDS ${kernel_sources})
else()
add_custom_command(
OUTPUT ${cuda_file}
COMMAND ${cuda_nvcc_executable} ${_cuda_nvcc_args}
DEPENDS ${kernel_sources})
endif()
unset(_cuda_nvcc_args)
endif()
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_file}" ${CYCLES_INSTALL_PATH}/lib)
list(APPEND cuda_cubins ${cuda_file})
@@ -571,22 +570,13 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
if(WIN32)
set(hip_command ${CMAKE_COMMAND})
set(hip_flags
-E env "HIP_PATH=${HIP_ROOT_DIR}"
-E env "HIP_PATH=${HIP_ROOT_DIR}" "PATH=${HIP_PERL_DIR}"
${HIP_HIPCC_EXECUTABLE}.bat)
else()
set(hip_command ${HIP_HIPCC_EXECUTABLE})
set(hip_flags)
endif()
# There's a bug in the compiler causing some scenes to fail to render on Vega cards
# A workaround currently is to set -O1 opt level during kernel compilation for these
# cards Remove this when a newer compiler is available with fixes.
if(WIN32 AND (${arch} MATCHES "gfx90[a-z0-9]+"))
set(hip_opt_flags "-O1")
else()
set(hip_opt_flags)
endif()
set(hip_flags
${hip_flags}
--amdgpu-target=${arch}
@@ -603,7 +593,6 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
-Wno-unused-value
--hipcc-func-supp
-ffast-math
${hip_opt_flags}
-o ${CMAKE_CURRENT_BINARY_DIR}/${hip_file})
if(WITH_NANOVDB)
@@ -657,25 +646,55 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
set(cuda_flags ${cuda_flags} -D WITH_CYCLES_DEBUG)
endif()
add_custom_command(
OUTPUT
${output}
DEPENDS
${input}
${SRC_KERNEL_HEADERS}
${SRC_KERNEL_DEVICE_GPU_HEADERS}
${SRC_KERNEL_DEVICE_CUDA_HEADERS}
${SRC_KERNEL_DEVICE_OPTIX_HEADERS}
${SRC_UTIL_HEADERS}
COMMAND
${CUDA_NVCC_EXECUTABLE}
--ptx
-arch=sm_50
${cuda_flags}
${input}
WORKING_DIRECTORY
"${CMAKE_CURRENT_SOURCE_DIR}")
if(WITH_CYCLES_CUBIN_COMPILER)
# Needed to find libnvrtc-builtins.so. Can't do it from inside
# cycles_cubin_cc since the env variable is read before main()
if(APPLE)
set(CUBIN_CC_ENV ${CMAKE_COMMAND}
-E env DYLD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib")
elseif(UNIX)
set(CUBIN_CC_ENV ${CMAKE_COMMAND}
-E env LD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib64")
endif()
add_custom_command(
OUTPUT ${output}
DEPENDS
${input}
${SRC_KERNEL_HEADERS}
${SRC_KERNEL_DEVICE_GPU_HEADERS}
${SRC_KERNEL_DEVICE_CUDA_HEADERS}
${SRC_KERNEL_DEVICE_OPTIX_HEADERS}
${SRC_UTIL_HEADERS}
COMMAND ${CUBIN_CC_ENV}
"$<TARGET_FILE:cycles_cubin_cc>"
-target 50
-ptx
-i ${CMAKE_CURRENT_SOURCE_DIR}/${input}
${cuda_flags}
-v
-cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}"
DEPENDS ${kernel_sources} cycles_cubin_cc)
else()
add_custom_command(
OUTPUT
${output}
DEPENDS
${input}
${SRC_KERNEL_HEADERS}
${SRC_KERNEL_DEVICE_GPU_HEADERS}
${SRC_KERNEL_DEVICE_CUDA_HEADERS}
${SRC_KERNEL_DEVICE_OPTIX_HEADERS}
${SRC_UTIL_HEADERS}
COMMAND
${CUDA_NVCC_EXECUTABLE}
--ptx
-arch=sm_50
${cuda_flags}
${input}
WORKING_DIRECTORY
"${CMAKE_CURRENT_SOURCE_DIR}")
endif()
list(APPEND optix_ptx ${output})
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${output}" ${CYCLES_INSTALL_PATH}/lib)
@@ -689,16 +708,6 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
kernel_optix_shader_raytrace
"device/optix/kernel_shader_raytrace.cu"
"--keep-device-functions")
if(WITH_CYCLES_OSL AND (OSL_LIBRARY_VERSION_MINOR GREATER_EQUAL 13 OR OSL_LIBRARY_VERSION_MAJOR GREATER 1))
CYCLES_OPTIX_KERNEL_ADD(
kernel_optix_osl
"device/optix/kernel_osl.cu"
"--relocatable-device-code=true")
CYCLES_OPTIX_KERNEL_ADD(
kernel_optix_osl_services
"osl/services_optix.cu"
"--relocatable-device-code=true")
endif()
add_custom_target(cycles_kernel_optix ALL DEPENDS ${optix_ptx})
cycles_set_solution_folder(cycles_kernel_optix)
@@ -986,7 +995,6 @@ source_group("geom" FILES ${SRC_KERNEL_GEOM_HEADERS})
source_group("integrator" FILES ${SRC_KERNEL_INTEGRATOR_HEADERS})
source_group("kernel" FILES ${SRC_KERNEL_TYPES_HEADERS})
source_group("light" FILES ${SRC_KERNEL_LIGHT_HEADERS})
source_group("osl" FILES ${SRC_KERNEL_OSL_HEADERS})
source_group("sample" FILES ${SRC_KERNEL_SAMPLE_HEADERS})
source_group("svm" FILES ${SRC_KERNEL_SVM_HEADERS})
source_group("util" FILES ${SRC_KERNEL_UTIL_HEADERS})
@@ -1023,7 +1031,6 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_FILM_HEADERS}" ${CYCLE
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_GEOM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/geom)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_INTEGRATOR_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/integrator)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_LIGHT_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/light)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_OSL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/osl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_SAMPLE_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/sample)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_SVM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/svm)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_TYPES_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel)

View File

@@ -297,10 +297,8 @@ ccl_device_inline void bsdf_roughness_eta(const KernelGlobals kg,
ccl_private float2 *roughness,
ccl_private float *eta)
{
#ifdef __SVM__
bool refractive = false;
float alpha = 1.0f;
#endif
switch (sc->type) {
case CLOSURE_BSDF_DIFFUSE_ID:
*roughness = one_float2();

View File

@@ -69,7 +69,7 @@ ccl_device int bsdf_diffuse_sample(ccl_private const ShaderClosure *sc,
ccl_device int bsdf_translucent_setup(ccl_private DiffuseBsdf *bsdf)
{
bsdf->type = CLOSURE_BSDF_TRANSLUCENT_ID;
return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_HAS_TRANSMISSION;
return SD_BSDF | SD_BSDF_HAS_EVAL;
}
ccl_device Spectrum bsdf_translucent_eval(ccl_private const ShaderClosure *sc,

View File

@@ -34,7 +34,7 @@ ccl_device int bsdf_hair_transmission_setup(ccl_private HairBsdf *bsdf)
bsdf->type = CLOSURE_BSDF_HAIR_TRANSMISSION_ID;
bsdf->roughness1 = clamp(bsdf->roughness1, 0.001f, 1.0f);
bsdf->roughness2 = clamp(bsdf->roughness2, 0.001f, 1.0f);
return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_HAS_TRANSMISSION;
return SD_BSDF | SD_BSDF_HAS_EVAL;
}
ccl_device Spectrum bsdf_hair_reflection_eval(ccl_private const ShaderClosure *sc,

View File

@@ -196,7 +196,7 @@ ccl_device int bsdf_principled_hair_setup(ccl_private ShaderData *sd,
bsdf->extra->geom = make_float4(Y.x, Y.y, Y.z, h);
return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG | SD_BSDF_HAS_TRANSMISSION;
return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG;
}
#endif /* __HAIR__ */

View File

@@ -346,7 +346,7 @@ ccl_device int bsdf_microfacet_ggx_refraction_setup(ccl_private MicrofacetBsdf *
bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_HAS_TRANSMISSION;
return SD_BSDF | SD_BSDF_HAS_EVAL;
}
ccl_device void bsdf_microfacet_ggx_blur(ccl_private ShaderClosure *sc, float roughness)
@@ -776,7 +776,7 @@ ccl_device int bsdf_microfacet_beckmann_refraction_setup(ccl_private MicrofacetB
bsdf->alpha_y = bsdf->alpha_x;
bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_HAS_TRANSMISSION;
return SD_BSDF | SD_BSDF_HAS_EVAL;
}
ccl_device void bsdf_microfacet_beckmann_blur(ccl_private ShaderClosure *sc, float roughness)

View File

@@ -559,7 +559,7 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_setup(ccl_private MicrofacetBsdf
bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID;
return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG | SD_BSDF_HAS_TRANSMISSION;
return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG;
}
ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(ccl_private MicrofacetBsdf *bsdf,

View File

@@ -60,13 +60,6 @@ KERNEL_DATA_ARRAY(KernelLight, lights)
KERNEL_DATA_ARRAY(float2, light_background_marginal_cdf)
KERNEL_DATA_ARRAY(float2, light_background_conditional_cdf)
/* light tree */
KERNEL_DATA_ARRAY(KernelLightTreeNode, light_tree_nodes)
KERNEL_DATA_ARRAY(KernelLightTreeEmitter, light_tree_emitters)
KERNEL_DATA_ARRAY(uint, light_to_tree)
KERNEL_DATA_ARRAY(uint, object_lookup_offset)
KERNEL_DATA_ARRAY(uint, triangle_to_tree)
/* particles */
KERNEL_DATA_ARRAY(KernelParticle, particles)

View File

@@ -23,19 +23,24 @@ KERNEL_STRUCT_MEMBER(background, int, volume_shader)
KERNEL_STRUCT_MEMBER(background, float, volume_step_size)
KERNEL_STRUCT_MEMBER(background, int, transparent)
KERNEL_STRUCT_MEMBER(background, float, transparent_roughness_squared_threshold)
/* Portal sampling. */
KERNEL_STRUCT_MEMBER(background, float, portal_weight)
KERNEL_STRUCT_MEMBER(background, int, num_portals)
KERNEL_STRUCT_MEMBER(background, int, portal_offset)
/* Sun sampling. */
KERNEL_STRUCT_MEMBER(background, float, sun_weight)
/* Importance map sampling. */
KERNEL_STRUCT_MEMBER(background, float, map_weight)
KERNEL_STRUCT_MEMBER(background, float, portal_weight)
KERNEL_STRUCT_MEMBER(background, int, map_res_x)
KERNEL_STRUCT_MEMBER(background, int, map_res_y)
/* Multiple importance sampling. */
KERNEL_STRUCT_MEMBER(background, int, use_mis)
/* Lightgroup. */
KERNEL_STRUCT_MEMBER(background, int, lightgroup)
/* Light Index. */
KERNEL_STRUCT_MEMBER(background, int, light_index)
/* Padding. */
KERNEL_STRUCT_MEMBER(background, int, pad1)
KERNEL_STRUCT_MEMBER(background, int, pad2)
KERNEL_STRUCT_MEMBER(background, int, pad3)
KERNEL_STRUCT_END(KernelBackground)
/* BVH: own BVH2 if no native device acceleration struct used. */
@@ -44,11 +49,11 @@ KERNEL_STRUCT_BEGIN(KernelBVH, bvh)
KERNEL_STRUCT_MEMBER(bvh, int, root)
KERNEL_STRUCT_MEMBER(bvh, int, have_motion)
KERNEL_STRUCT_MEMBER(bvh, int, have_curves)
KERNEL_STRUCT_MEMBER(bvh, int, have_points)
KERNEL_STRUCT_MEMBER(bvh, int, have_volumes)
KERNEL_STRUCT_MEMBER(bvh, int, bvh_layout)
KERNEL_STRUCT_MEMBER(bvh, int, use_bvh_steps)
KERNEL_STRUCT_MEMBER(bvh, int, curve_subdivisions)
KERNEL_STRUCT_MEMBER(bvh, int, pad1)
KERNEL_STRUCT_MEMBER(bvh, int, pad2)
KERNEL_STRUCT_END(KernelBVH)
/* Film. */
@@ -97,6 +102,8 @@ KERNEL_STRUCT_MEMBER(film, int, pass_emission)
KERNEL_STRUCT_MEMBER(film, int, pass_background)
KERNEL_STRUCT_MEMBER(film, int, pass_ao)
KERNEL_STRUCT_MEMBER(film, float, pass_alpha_threshold)
KERNEL_STRUCT_MEMBER(film, int, pass_shadow)
KERNEL_STRUCT_MEMBER(film, float, pass_shadow_scale)
KERNEL_STRUCT_MEMBER(film, int, pass_shadow_catcher)
KERNEL_STRUCT_MEMBER(film, int, pass_shadow_catcher_sample_count)
KERNEL_STRUCT_MEMBER(film, int, pass_shadow_catcher_matte)
@@ -130,6 +137,9 @@ KERNEL_STRUCT_MEMBER(film, int, use_approximate_shadow_catcher)
KERNEL_STRUCT_MEMBER(film, int, pass_guiding_color)
KERNEL_STRUCT_MEMBER(film, int, pass_guiding_probability)
KERNEL_STRUCT_MEMBER(film, int, pass_guiding_avg_roughness)
/* Padding. */
KERNEL_STRUCT_MEMBER(film, int, pad1)
KERNEL_STRUCT_MEMBER(film, int, pad2)
KERNEL_STRUCT_END(KernelFilm)
/* Integrator. */
@@ -137,18 +147,10 @@ KERNEL_STRUCT_END(KernelFilm)
KERNEL_STRUCT_BEGIN(KernelIntegrator, integrator)
/* Emission. */
KERNEL_STRUCT_MEMBER(integrator, int, use_direct_light)
KERNEL_STRUCT_MEMBER(integrator, int, use_light_mis)
KERNEL_STRUCT_MEMBER(integrator, int, use_light_tree)
KERNEL_STRUCT_MEMBER(integrator, int, num_lights)
KERNEL_STRUCT_MEMBER(integrator, int, num_distant_lights)
KERNEL_STRUCT_MEMBER(integrator, int, num_background_lights)
/* Portal sampling. */
KERNEL_STRUCT_MEMBER(integrator, int, num_portals)
KERNEL_STRUCT_MEMBER(integrator, int, portal_offset)
/* Flat light distribution. */
KERNEL_STRUCT_MEMBER(integrator, int, num_distribution)
KERNEL_STRUCT_MEMBER(integrator, float, distribution_pdf_triangles)
KERNEL_STRUCT_MEMBER(integrator, float, distribution_pdf_lights)
KERNEL_STRUCT_MEMBER(integrator, int, num_all_lights)
KERNEL_STRUCT_MEMBER(integrator, float, pdf_triangles)
KERNEL_STRUCT_MEMBER(integrator, float, pdf_lights)
KERNEL_STRUCT_MEMBER(integrator, float, light_inv_rr_threshold)
/* Bounces. */
KERNEL_STRUCT_MEMBER(integrator, int, min_bounce)
@@ -175,11 +177,12 @@ KERNEL_STRUCT_MEMBER(integrator, int, seed)
/* Clamp. */
KERNEL_STRUCT_MEMBER(integrator, float, sample_clamp_direct)
KERNEL_STRUCT_MEMBER(integrator, float, sample_clamp_indirect)
/* MIS. */
KERNEL_STRUCT_MEMBER(integrator, int, use_lamp_mis)
/* Caustics. */
KERNEL_STRUCT_MEMBER(integrator, int, use_caustics)
/* Sampling pattern. */
KERNEL_STRUCT_MEMBER(integrator, int, sampling_pattern)
KERNEL_STRUCT_MEMBER(integrator, int, pmj_sequence_size)
KERNEL_STRUCT_MEMBER(integrator, float, scrambling_distance)
/* Volume render. */
KERNEL_STRUCT_MEMBER(integrator, int, use_volumes)
@@ -191,6 +194,7 @@ KERNEL_STRUCT_MEMBER(integrator, int, has_shadow_catcher)
KERNEL_STRUCT_MEMBER(integrator, int, filter_closures)
/* MIS debugging. */
KERNEL_STRUCT_MEMBER(integrator, int, direct_light_sampling_type)
/* Path Guiding */
KERNEL_STRUCT_MEMBER(integrator, float, surface_guiding_probability)
KERNEL_STRUCT_MEMBER(integrator, float, volume_guiding_probability)
@@ -201,10 +205,6 @@ KERNEL_STRUCT_MEMBER(integrator, int, use_surface_guiding)
KERNEL_STRUCT_MEMBER(integrator, int, use_volume_guiding)
KERNEL_STRUCT_MEMBER(integrator, int, use_guiding_direct_light)
KERNEL_STRUCT_MEMBER(integrator, int, use_guiding_mis_weights)
/* Padding. */
KERNEL_STRUCT_MEMBER(integrator, int, pad1)
KERNEL_STRUCT_MEMBER(integrator, int, pad2)
KERNEL_STRUCT_END(KernelIntegrator)
/* SVM. For shader specialization. */

View File

@@ -30,7 +30,6 @@ typedef unsigned long long uint64_t;
/* Qualifiers */
#define ccl_device __device__ __inline__
#define ccl_device_extern extern "C" __device__
#if __CUDA_ARCH__ < 500
# define ccl_device_inline __device__ __forceinline__
# define ccl_device_forceinline __device__ __forceinline__
@@ -110,14 +109,14 @@ ccl_device_forceinline T ccl_gpu_tex_object_read_3D(const ccl_gpu_tex_object_3D
typedef unsigned short half;
ccl_device_forceinline half __float2half(const float f)
__device__ half __float2half(const float f)
{
half val;
asm("{ cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f));
return val;
}
ccl_device_forceinline float __half2float(const half h)
__device__ float __half2float(const half h)
{
float val;
asm("{ cvt.f32.f16 %0, %1;}\n" : "=f"(val) : "h"(h));

View File

@@ -314,7 +314,11 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
int kernel_index);
ccl_gpu_kernel_lambda_pass.kernel_index = kernel_index;
gpu_parallel_active_index_array(num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass);
gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
num_states,
indices,
num_indices,
ccl_gpu_kernel_lambda_pass);
}
ccl_gpu_kernel_postfix
@@ -329,7 +333,11 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
int kernel_index);
ccl_gpu_kernel_lambda_pass.kernel_index = kernel_index;
gpu_parallel_active_index_array(num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass);
gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
num_states,
indices,
num_indices,
ccl_gpu_kernel_lambda_pass);
}
ccl_gpu_kernel_postfix
@@ -341,7 +349,11 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
{
ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, path, queued_kernel) != 0);
gpu_parallel_active_index_array(num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass);
gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
num_states,
indices,
num_indices,
ccl_gpu_kernel_lambda_pass);
}
ccl_gpu_kernel_postfix
@@ -354,8 +366,11 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
{
ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, path, queued_kernel) == 0);
gpu_parallel_active_index_array(
num_states, indices + indices_offset, num_indices, ccl_gpu_kernel_lambda_pass);
gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
num_states,
indices + indices_offset,
num_indices,
ccl_gpu_kernel_lambda_pass);
}
ccl_gpu_kernel_postfix
@@ -368,8 +383,11 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
{
ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0);
gpu_parallel_active_index_array(
num_states, indices + indices_offset, num_indices, ccl_gpu_kernel_lambda_pass);
gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
num_states,
indices + indices_offset,
num_indices,
ccl_gpu_kernel_lambda_pass);
}
ccl_gpu_kernel_postfix
@@ -413,7 +431,11 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
int num_active_paths);
ccl_gpu_kernel_lambda_pass.num_active_paths = num_active_paths;
gpu_parallel_active_index_array(num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass);
gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
num_states,
indices,
num_indices,
ccl_gpu_kernel_lambda_pass);
}
ccl_gpu_kernel_postfix
@@ -447,7 +469,11 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
int num_active_paths);
ccl_gpu_kernel_lambda_pass.num_active_paths = num_active_paths;
gpu_parallel_active_index_array(num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass);
gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
num_states,
indices,
num_indices,
ccl_gpu_kernel_lambda_pass);
}
ccl_gpu_kernel_postfix

View File

@@ -56,7 +56,7 @@ void gpu_parallel_active_index_array_impl(const uint num_states,
const uint is_active = (state_index < num_states) ? is_active_op(state_index) : 0;
#else /* !__KERNEL__ONEAPI__ */
# ifndef __KERNEL_METAL__
template<typename IsActiveOp>
template<uint blocksize, typename IsActiveOp>
__device__
# endif
void
@@ -79,10 +79,6 @@ __device__
{
extern ccl_gpu_shared int warp_offset[];
# ifndef __KERNEL_METAL__
const uint blocksize = ccl_gpu_block_dim_x;
# endif
const uint thread_index = ccl_gpu_thread_idx_x;
const uint thread_warp = thread_index % ccl_gpu_warp_size;
@@ -153,7 +149,7 @@ __device__
#ifdef __KERNEL_METAL__
# define gpu_parallel_active_index_array(num_states, indices, num_indices, is_active_op) \
# define gpu_parallel_active_index_array(dummy, num_states, indices, num_indices, is_active_op) \
const uint is_active = (ccl_gpu_global_id_x() < num_states) ? \
is_active_op(ccl_gpu_global_id_x()) : \
0; \
@@ -171,13 +167,15 @@ __device__
simdgroup_offset)
#elif defined(__KERNEL_ONEAPI__)
# define gpu_parallel_active_index_array(num_states, indices, num_indices, is_active_op) \
# define gpu_parallel_active_index_array( \
blocksize, num_states, indices, num_indices, is_active_op) \
gpu_parallel_active_index_array_impl(num_states, indices, num_indices, is_active_op)
#else
# define gpu_parallel_active_index_array(num_states, indices, num_indices, is_active_op) \
gpu_parallel_active_index_array_impl(num_states, indices, num_indices, is_active_op)
# define gpu_parallel_active_index_array( \
blocksize, num_states, indices, num_indices, is_active_op) \
gpu_parallel_active_index_array_impl<blocksize>(num_states, indices, num_indices, is_active_op)
#endif

View File

@@ -28,7 +28,6 @@ typedef unsigned long long uint64_t;
/* Qualifiers */
#define ccl_device __device__ __inline__
#define ccl_device_extern extern "C" __device__
#define ccl_device_inline __device__ __inline__
#define ccl_device_forceinline __device__ __forceinline__
#define ccl_device_noinline __device__ __noinline__

View File

@@ -79,8 +79,7 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
metalrt_intersector_type metalrt_intersect;
bool triangle_only = !kernel_data.bvh.have_curves && !kernel_data.bvh.have_points;
if (triangle_only) {
if (!kernel_data.bvh.have_curves) {
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
}
@@ -178,9 +177,7 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
metalrt_intersector_type metalrt_intersect;
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
bool triangle_only = !kernel_data.bvh.have_curves && !kernel_data.bvh.have_points;
if (triangle_only) {
if (!kernel_data.bvh.have_curves) {
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
}
@@ -208,9 +205,7 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
if (lcg_state) {
*lcg_state = payload.lcg_state;
}
if (local_isect) {
*local_isect = payload.local_isect;
}
*local_isect = payload.local_isect;
return payload.result;
}
@@ -245,9 +240,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
metalrt_intersector_type metalrt_intersect;
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
bool triangle_only = !kernel_data.bvh.have_curves && !kernel_data.bvh.have_points;
if (triangle_only) {
if (!kernel_data.bvh.have_curves) {
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
}
@@ -314,9 +307,7 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
metalrt_intersector_type metalrt_intersect;
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
bool triangle_only = !kernel_data.bvh.have_curves && !kernel_data.bvh.have_points;
if (triangle_only) {
if (!kernel_data.bvh.have_curves) {
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
}

View File

@@ -38,7 +38,6 @@ using namespace metal::raytracing;
# define ccl_device_noinline ccl_device __attribute__((noinline))
#endif
#define ccl_device_extern extern "C"
#define ccl_device_noinline_cpu ccl_device
#define ccl_device_inline_method ccl_device
#define ccl_global device

View File

@@ -182,20 +182,20 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
const float u = barycentrics.x;
const float v = barycentrics.y;
const int prim_type = kernel_data_fetch(objects, object).primitive_type;
int type = prim_type;
int type = 0;
if (intersection_type == METALRT_HIT_TRIANGLE) {
type = kernel_data_fetch(objects, object).primitive_type;
}
# ifdef __HAIR__
if (intersection_type != METALRT_HIT_TRIANGLE) {
if ( (prim_type == PRIMITIVE_CURVE_THICK || prim_type == PRIMITIVE_CURVE_RIBBON)) {
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
type = segment.type;
prim = segment.prim;
/* Filter out curve endcaps */
if (u == 0.0f || u == 1.0f) {
/* continue search */
return true;
}
else {
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
type = segment.type;
prim = segment.prim;
/* Filter out curve endcaps */
if (u == 0.0f || u == 1.0f) {
/* continue search */
return true;
}
}
# endif
@@ -279,7 +279,7 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim;
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object;
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type;
/* Continue tracing. */
# endif /* __TRANSPARENT_SHADOWS__ */
#endif /* __SHADOW_RECORD_ALL__ */
@@ -327,8 +327,7 @@ inline TReturnType metalrt_visibility_test(
TReturnType result;
#ifdef __HAIR__
const int type = kernel_data_fetch(objects, object).primitive_type;
if (intersection_type == METALRT_HIT_BOUNDING_BOX && (type == PRIMITIVE_CURVE_THICK || type == PRIMITIVE_CURVE_RIBBON)) {
if (intersection_type == METALRT_HIT_BOUNDING_BOX) {
/* Filter out curve endcaps. */
if (u == 0.0f || u == 1.0f) {
result.accept = false;
@@ -464,12 +463,7 @@ ccl_device_inline void metalrt_intersection_curve_shadow(
const float ray_tmax,
thread BoundingBoxIntersectionResult &result)
{
# ifdef __VISIBILITY_FLAG__
const uint visibility = payload.visibility;
if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
return;
}
# endif
Intersection isect;
isect.t = ray_tmax;
@@ -691,12 +685,7 @@ ccl_device_inline void metalrt_intersection_point_shadow(
const float ray_tmax,
thread BoundingBoxIntersectionResult &result)
{
# ifdef __VISIBILITY_FLAG__
const uint visibility = payload.visibility;
if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
return;
}
# endif
Intersection isect;
isect.t = ray_tmax;

View File

@@ -28,7 +28,6 @@
/* Qualifier wrappers for different names on different devices */
#define ccl_device
#define ccl_device_extern extern "C"
#define ccl_global
#define ccl_always_inline __attribute__((always_inline))
#define ccl_device_inline inline

View File

@@ -33,16 +33,14 @@ typedef unsigned long long uint64_t;
#endif
#define ccl_device \
static __device__ \
__forceinline__ // Function calls are bad for OptiX performance, so inline everything
#define ccl_device_extern extern "C" __device__
__device__ __forceinline__ // Function calls are bad for OptiX performance, so inline everything
#define ccl_device_inline ccl_device
#define ccl_device_forceinline ccl_device
#define ccl_device_inline_method __device__ __forceinline__
#define ccl_device_noinline static __device__ __noinline__
#define ccl_device_inline_method ccl_device
#define ccl_device_noinline __device__ __noinline__
#define ccl_device_noinline_cpu ccl_device
#define ccl_global
#define ccl_inline_constant static __constant__
#define ccl_inline_constant __constant__
#define ccl_device_constant __constant__ __device__
#define ccl_constant const
#define ccl_gpu_shared __shared__
@@ -59,6 +57,23 @@ typedef unsigned long long uint64_t;
#define kernel_assert(cond)
/* GPU thread, block, grid size and index */
#define ccl_gpu_thread_idx_x (threadIdx.x)
#define ccl_gpu_block_dim_x (blockDim.x)
#define ccl_gpu_block_idx_x (blockIdx.x)
#define ccl_gpu_grid_dim_x (gridDim.x)
#define ccl_gpu_warp_size (warpSize)
#define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
#define ccl_gpu_global_id_x() (ccl_gpu_block_idx_x * ccl_gpu_block_dim_x + ccl_gpu_thread_idx_x)
#define ccl_gpu_global_size_x() (ccl_gpu_grid_dim_x * ccl_gpu_block_dim_x)
/* GPU warp synchronization. */
#define ccl_gpu_syncthreads() __syncthreads()
#define ccl_gpu_ballot(predicate) __ballot_sync(0xFFFFFFFF, predicate)
/* GPU texture objects */
typedef unsigned long long CUtexObject;
@@ -86,14 +101,14 @@ ccl_device_forceinline T ccl_gpu_tex_object_read_3D(const ccl_gpu_tex_object_3D
typedef unsigned short half;
ccl_device_forceinline half __float2half(const float f)
__device__ half __float2half(const float f)
{
half val;
asm("{ cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f));
return val;
}
ccl_device_forceinline float __half2float(const half h)
__device__ float __half2float(const half h)
{
float val;
asm("{ cvt.f32.f16 %0, %1;}\n" : "=f"(val) : "h"(h));

View File

@@ -25,7 +25,6 @@ struct KernelParamsOptiX {
/* Kernel arguments */
const int *path_index_array;
float *render_buffer;
int offset;
/* Global scene data and textures */
KernelData data;
@@ -37,11 +36,7 @@ struct KernelParamsOptiX {
};
#ifdef __NVCC__
extern "C"
# ifndef __CUDACC_RDC__
static
# endif
__constant__ KernelParamsOptiX kernel_params;
extern "C" static __constant__ KernelParamsOptiX kernel_params;
#endif
/* Abstraction macros */

View File

@@ -1,83 +0,0 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#define WITH_OSL
/* Copy of the regular OptiX kernels with additional OSL support. */
#include "kernel/device/optix/kernel_shader_raytrace.cu"
#include "kernel/bake/bake.h"
#include "kernel/integrator/shade_background.h"
#include "kernel/integrator/shade_light.h"
#include "kernel/integrator/shade_shadow.h"
#include "kernel/integrator/shade_volume.h"
extern "C" __global__ void __raygen__kernel_optix_integrator_shade_background()
{
const int global_index = optixGetLaunchIndex().x;
const int path_index = (kernel_params.path_index_array) ?
kernel_params.path_index_array[global_index] :
global_index;
integrator_shade_background(nullptr, path_index, kernel_params.render_buffer);
}
extern "C" __global__ void __raygen__kernel_optix_integrator_shade_light()
{
const int global_index = optixGetLaunchIndex().x;
const int path_index = (kernel_params.path_index_array) ?
kernel_params.path_index_array[global_index] :
global_index;
integrator_shade_light(nullptr, path_index, kernel_params.render_buffer);
}
extern "C" __global__ void __raygen__kernel_optix_integrator_shade_surface()
{
const int global_index = optixGetLaunchIndex().x;
const int path_index = (kernel_params.path_index_array) ?
kernel_params.path_index_array[global_index] :
global_index;
integrator_shade_surface(nullptr, path_index, kernel_params.render_buffer);
}
extern "C" __global__ void __raygen__kernel_optix_integrator_shade_volume()
{
const int global_index = optixGetLaunchIndex().x;
const int path_index = (kernel_params.path_index_array) ?
kernel_params.path_index_array[global_index] :
global_index;
integrator_shade_volume(nullptr, path_index, kernel_params.render_buffer);
}
extern "C" __global__ void __raygen__kernel_optix_integrator_shade_shadow()
{
const int global_index = optixGetLaunchIndex().x;
const int path_index = (kernel_params.path_index_array) ?
kernel_params.path_index_array[global_index] :
global_index;
integrator_shade_shadow(nullptr, path_index, kernel_params.render_buffer);
}
extern "C" __global__ void __raygen__kernel_optix_shader_eval_displace()
{
KernelShaderEvalInput *const input = (KernelShaderEvalInput *)kernel_params.path_index_array;
float *const output = kernel_params.render_buffer;
const int global_index = kernel_params.offset + optixGetLaunchIndex().x;
kernel_displace_evaluate(nullptr, input, output, global_index);
}
extern "C" __global__ void __raygen__kernel_optix_shader_eval_background()
{
KernelShaderEvalInput *const input = (KernelShaderEvalInput *)kernel_params.path_index_array;
float *const output = kernel_params.render_buffer;
const int global_index = kernel_params.offset + optixGetLaunchIndex().x;
kernel_background_evaluate(nullptr, input, output, global_index);
}
extern "C" __global__ void __raygen__kernel_optix_shader_eval_curve_shadow_transparency()
{
KernelShaderEvalInput *const input = (KernelShaderEvalInput *)kernel_params.path_index_array;
float *const output = kernel_params.render_buffer;
const int global_index = kernel_params.offset + optixGetLaunchIndex().x;
kernel_curve_shadow_transparency_evaluate(nullptr, input, output, global_index);
}

View File

@@ -58,29 +58,13 @@ ccl_device bool film_adaptive_sampling_convergence_check(KernelGlobals kg,
const float4 I = kernel_read_pass_float4(buffer + kernel_data.film.pass_combined);
const float sample = __float_as_uint(buffer[kernel_data.film.pass_sample_count]);
const float intensity_scale = kernel_data.film.exposure / sample;
const float inv_sample = 1.0f / sample;
/* The per pixel error as seen in section 2.1 of
* "A hierarchical automatic stopping condition for Monte Carlo global illumination" */
const float error_difference = (fabsf(I.x - A.x) + fabsf(I.y - A.y) + fabsf(I.z - A.z)) *
intensity_scale;
const float intensity = (I.x + I.y + I.z) * intensity_scale;
/* Anything with R+G+B > 1 is highly exposed - even in sRGB it's a range that
* some displays aren't even able to display without significant losses in
* detalization. Everything with R+G+B > 3 is overexposed and should receive
* even less samples. Filmic-like curves need maximum sampling rate at
* intensity near 0.1-0.2, so threshold of 1 for R+G+B leaves an additional
* fstop in case it is needed for compositing.
*/
float error_normalize;
if (intensity < 1.0f) {
error_normalize = sqrtf(intensity);
}
else {
error_normalize = intensity;
}
inv_sample;
const float error_normalize = sqrtf((I.x + I.y + I.z) * inv_sample);
/* A small epsilon is added to the divisor to prevent division by zero. */
const float error = error_difference / (0.0001f + error_normalize);
const bool did_converge = (error < threshold);

View File

@@ -42,27 +42,27 @@ ccl_device_inline void film_write_data_passes(KernelGlobals kg,
ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer);
if (!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) {
if (INTEGRATOR_STATE(state, path, sample) == 0) {
if (flag & PASSMASK(DEPTH)) {
const float depth = camera_z_depth(kg, sd->P);
film_overwrite_pass_float(buffer + kernel_data.film.pass_depth, depth);
}
if (flag & PASSMASK(OBJECT_ID)) {
const float id = object_pass_id(kg, sd->object);
film_overwrite_pass_float(buffer + kernel_data.film.pass_object_id, id);
}
if (flag & PASSMASK(MATERIAL_ID)) {
const float id = shader_pass_id(kg, sd);
film_overwrite_pass_float(buffer + kernel_data.film.pass_material_id, id);
}
if (flag & PASSMASK(POSITION)) {
const float3 position = sd->P;
film_overwrite_pass_float3(buffer + kernel_data.film.pass_position, position);
}
}
if (!(sd->flag & SD_TRANSPARENT) || kernel_data.film.pass_alpha_threshold == 0.0f ||
average(surface_shader_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) {
if (INTEGRATOR_STATE(state, path, sample) == 0) {
if (flag & PASSMASK(DEPTH)) {
const float depth = camera_z_depth(kg, sd->P);
film_write_pass_float(buffer + kernel_data.film.pass_depth, depth);
}
if (flag & PASSMASK(OBJECT_ID)) {
const float id = object_pass_id(kg, sd->object);
film_write_pass_float(buffer + kernel_data.film.pass_object_id, id);
}
if (flag & PASSMASK(MATERIAL_ID)) {
const float id = shader_pass_id(kg, sd);
film_write_pass_float(buffer + kernel_data.film.pass_material_id, id);
}
if (flag & PASSMASK(POSITION)) {
const float3 position = sd->P;
film_write_pass_float3(buffer + kernel_data.film.pass_position, position);
}
}
if (flag & PASSMASK(NORMAL)) {
const float3 normal = surface_shader_average_normal(kg, sd);
film_write_pass_float3(buffer + kernel_data.film.pass_normal, normal);
@@ -157,47 +157,4 @@ ccl_device_inline void film_write_data_passes(KernelGlobals kg,
#endif
}
ccl_device_inline void film_write_data_passes_background(
KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer)
{
#ifdef __PASSES__
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
if (!(path_flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
return;
}
/* Don't write data passes for paths that were split off for shadow catchers
* to avoid double-counting. */
if (path_flag & PATH_RAY_SHADOW_CATCHER_PASS) {
return;
}
const int flag = kernel_data.film.pass_flag;
if (!(flag & PASS_ANY)) {
return;
}
if (!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) {
ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer);
if (INTEGRATOR_STATE(state, path, sample) == 0) {
if (flag & PASSMASK(DEPTH)) {
film_overwrite_pass_float(buffer + kernel_data.film.pass_depth, 0.0f);
}
if (flag & PASSMASK(OBJECT_ID)) {
film_overwrite_pass_float(buffer + kernel_data.film.pass_object_id, 0.0f);
}
if (flag & PASSMASK(MATERIAL_ID)) {
film_overwrite_pass_float(buffer + kernel_data.film.pass_material_id, 0.0f);
}
if (flag & PASSMASK(POSITION)) {
film_overwrite_pass_float3(buffer + kernel_data.film.pass_position, zero_float3());
}
}
}
#endif
}
CCL_NAMESPACE_END

View File

@@ -527,6 +527,17 @@ ccl_device_inline void film_write_direct_light(KernelGlobals kg,
film_write_pass_spectrum(buffer + pass_offset, contribution);
}
}
/* Write shadow pass. */
if (kernel_data.film.pass_shadow != PASS_UNUSED && (path_flag & PATH_RAY_SHADOW_FOR_LIGHT) &&
(path_flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
const Spectrum unshadowed_throughput = INTEGRATOR_STATE(
state, shadow_path, unshadowed_throughput);
const Spectrum shadowed_throughput = INTEGRATOR_STATE(state, shadow_path, throughput);
const Spectrum shadow = safe_divide(shadowed_throughput, unshadowed_throughput) *
kernel_data.film.pass_shadow_scale;
film_write_pass_spectrum(buffer + kernel_data.film.pass_shadow, shadow);
}
}
#endif
}

View File

@@ -12,7 +12,6 @@
CCL_NAMESPACE_BEGIN
/* Get pointer to pixel in render buffer. */
ccl_device_forceinline ccl_global float *film_pass_pixel_render_buffer(
KernelGlobals kg, ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer)
{
@@ -22,8 +21,7 @@ ccl_device_forceinline ccl_global float *film_pass_pixel_render_buffer(
return render_buffer + render_buffer_offset;
}
/* Accumulate in passes. */
/* Write to pixel. */
ccl_device_inline void film_write_pass_float(ccl_global float *ccl_restrict buffer, float value)
{
#ifdef __ATOMIC_PASS_WRITE__
@@ -76,25 +74,6 @@ ccl_device_inline void film_write_pass_float4(ccl_global float *ccl_restrict buf
#endif
}
/* Overwrite for passes that only write on sample 0. This assumes only a single thread will write
* to this pixel and no atomics are needed. */
ccl_device_inline void film_overwrite_pass_float(ccl_global float *ccl_restrict buffer,
float value)
{
*buffer = value;
}
ccl_device_inline void film_overwrite_pass_float3(ccl_global float *ccl_restrict buffer,
float3 value)
{
buffer[0] = value.x;
buffer[1] = value.y;
buffer[2] = value.z;
}
/* Read back from passes. */
ccl_device_inline float kernel_read_pass_float(ccl_global float *ccl_restrict buffer)
{
return *buffer;

View File

@@ -24,8 +24,8 @@ ccl_device void displacement_shader_eval(KernelGlobals kg,
/* this will modify sd->P */
#ifdef __OSL__
if (kernel_data.kernel_features & KERNEL_FEATURE_OSL) {
osl_eval_nodes<SHADER_TYPE_DISPLACEMENT>(kg, state, sd, 0);
if (kg->osl) {
OSLShader::eval_displacement(kg, state, sd);
}
else
#endif

View File

@@ -156,13 +156,6 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
u = v;
v = 1.0f - tmp - v;
const float tmpdx = dudx;
const float tmpdy = dudy;
dudx = dvdx;
dudy = dvdy;
dvdx = -tmpdx - dvdx;
dvdy = -tmpdy - dvdy;
/* Position and normal on triangle. */
const int object = kernel_data.bake.object_index;
float3 P, Ng;

Some files were not shown because too many files have changed in this diff Show More