Compare commits
27 Commits
temp-image
...
temp-gpenc
Author | SHA1 | Date | |
---|---|---|---|
aa9b976e9f | |||
410b87ca78 | |||
2b9994257f | |||
a035861d9d | |||
8712f952e0 | |||
28de4468d9 | |||
9c717dcff8 | |||
2c3ee8854f | |||
e3ab75d17d | |||
67e053b1fb | |||
3150277d0c | |||
7437b6b4bc | |||
0dd98c54b9 | |||
910b579aff | |||
f1dbbbbd45 | |||
101819bdf9 | |||
62b29f71b9 | |||
3d2dc8c897 | |||
3ebd01675e | |||
9b24e5d54f | |||
535662cd0d | |||
dc6281d87f | |||
ad87f81e38 | |||
8965118eec | |||
71e26f6eff | |||
1e286d7f8f | |||
2ec1e38b91 |
@@ -162,7 +162,6 @@ PenaltyBreakString: 1000000
|
||||
ForEachMacros:
|
||||
- BEGIN_ANIMFILTER_SUBCHANNELS
|
||||
- BKE_pbvh_vertex_iter_begin
|
||||
- BKE_pbvh_face_iter_begin
|
||||
- BLI_FOREACH_SPARSE_RANGE
|
||||
- BLI_SMALLSTACK_ITER_BEGIN
|
||||
- BMO_ITER
|
||||
|
@@ -257,12 +257,6 @@ if(UNIX AND NOT (APPLE OR HAIKU))
|
||||
|
||||
option(WITH_GHOST_WAYLAND_DYNLOAD "Enable runtime dynamic WAYLAND libraries loading" ON)
|
||||
mark_as_advanced(WITH_GHOST_WAYLAND_DYNLOAD)
|
||||
|
||||
set(WITH_GHOST_WAYLAND_APP_ID "" CACHE STRING "\
|
||||
The application ID used for Blender (use default when an empty string), \
|
||||
this can be used to differentiate Blender instances by version or branch for example."
|
||||
)
|
||||
mark_as_advanced(WITH_GHOST_WAYLAND_APP_ID)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@@ -345,12 +339,8 @@ if(APPLE)
|
||||
else()
|
||||
set(WITH_COREAUDIO OFF)
|
||||
endif()
|
||||
if(NOT WIN32)
|
||||
if(APPLE)
|
||||
option(WITH_JACK "Enable JACK Support (http://www.jackaudio.org)" OFF)
|
||||
else()
|
||||
option(WITH_JACK "Enable JACK Support (http://www.jackaudio.org)" ON)
|
||||
endif()
|
||||
if(UNIX AND NOT APPLE)
|
||||
option(WITH_JACK "Enable JACK Support (http://www.jackaudio.org)" ON)
|
||||
option(WITH_JACK_DYNLOAD "Enable runtime dynamic JACK libraries loading" OFF)
|
||||
else()
|
||||
set(WITH_JACK OFF)
|
||||
@@ -467,6 +457,7 @@ if(NOT APPLE)
|
||||
|
||||
option(WITH_CYCLES_CUDA_BINARIES "Build Cycles NVIDIA CUDA binaries" OFF)
|
||||
set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 sm_86 compute_75 CACHE STRING "CUDA architectures to build binaries for")
|
||||
option(WITH_CYCLES_CUBIN_COMPILER "Build cubins with nvrtc based compiler instead of nvcc" OFF)
|
||||
option(WITH_CYCLES_CUDA_BUILD_SERIAL "Build cubins one after another (useful on machines with limited RAM)" OFF)
|
||||
option(WITH_CUDA_DYNLOAD "Dynamically load CUDA libraries at runtime (for developers, makes cuda-gdb work)" ON)
|
||||
|
||||
@@ -474,6 +465,7 @@ if(NOT APPLE)
|
||||
set(CYCLES_RUNTIME_OPTIX_ROOT_DIR "" CACHE PATH "Path to the OptiX SDK root directory. When set, this path will be used at runtime to compile OptiX kernels.")
|
||||
|
||||
mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
|
||||
mark_as_advanced(WITH_CYCLES_CUBIN_COMPILER)
|
||||
mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL)
|
||||
mark_as_advanced(WITH_CUDA_DYNLOAD)
|
||||
mark_as_advanced(OPTIX_ROOT_DIR)
|
||||
@@ -484,7 +476,7 @@ endif()
|
||||
if(NOT APPLE)
|
||||
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" ON)
|
||||
option(WITH_CYCLES_HIP_BINARIES "Build Cycles AMD HIP binaries" OFF)
|
||||
set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx906 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
|
||||
set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx906 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 CACHE STRING "AMD HIP architectures to build binaries for")
|
||||
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
|
||||
mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
|
||||
endif()
|
||||
@@ -1239,11 +1231,12 @@ if(WITH_OPENGL)
|
||||
add_definitions(-DWITH_OPENGL)
|
||||
endif()
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Configure Vulkan.
|
||||
|
||||
if(WITH_VULKAN_BACKEND)
|
||||
list(APPEND BLENDER_GL_LIBRARIES ${VULKAN_LIBRARIES})
|
||||
add_definitions(-DWITH_VULKAN_BACKEND)
|
||||
endif()
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
@@ -40,15 +40,15 @@ ver-ocio:,ver-oiio:,ver-llvm:,ver-osl:,ver-osd:,ver-openvdb:,ver-xr-openxr:,ver-
|
||||
force-all,force-python,force-boost,force-tbb,\
|
||||
force-ocio,force-imath,force-openexr,force-oiio,force-llvm,force-osl,force-osd,force-openvdb,\
|
||||
force-ffmpeg,force-opencollada,force-alembic,force-embree,force-oidn,force-usd,\
|
||||
force-xr-openxr,force-level-zero,force-openpgl,\
|
||||
force-xr-openxr,force-level-zero, force-openpgl,\
|
||||
build-all,build-python,build-boost,build-tbb,\
|
||||
build-ocio,build-imath,build-openexr,build-oiio,build-llvm,build-osl,build-osd,build-openvdb,\
|
||||
build-ffmpeg,build-opencollada,build-alembic,build-embree,build-oidn,build-usd,\
|
||||
build-xr-openxr,build-level-zero,build-openpgl,\
|
||||
build-xr-openxr,build-level-zero, build-openpgl,\
|
||||
skip-python,skip-boost,skip-tbb,\
|
||||
skip-ocio,skip-imath,skip-openexr,skip-oiio,skip-llvm,skip-osl,skip-osd,skip-openvdb,\
|
||||
skip-ffmpeg,skip-opencollada,skip-alembic,skip-embree,skip-oidn,skip-usd,\
|
||||
skip-xr-openxr,skip-level-zero,skip-openpgl \
|
||||
skip-xr-openxr,skip-level-zero, skip-openpgl \
|
||||
-- "$@" \
|
||||
)
|
||||
|
||||
@@ -602,9 +602,9 @@ LEVEL_ZERO_FORCE_BUILD=false
|
||||
LEVEL_ZERO_FORCE_REBUILD=false
|
||||
LEVEL_ZERO_SKIP=false
|
||||
|
||||
OPENPGL_VERSION="0.4.1"
|
||||
OPENPGL_VERSION="0.4.0"
|
||||
OPENPGL_VERSION_SHORT="0.4"
|
||||
OPENPGL_VERSION_MIN="0.4.1"
|
||||
OPENPGL_VERSION_MIN="0.3.1"
|
||||
OPENPGL_VERSION_MEX="0.5"
|
||||
OPENPGL_FORCE_BUILD=false
|
||||
OPENPGL_FORCE_REBUILD=false
|
||||
|
@@ -1,59 +0,0 @@
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
# Copyright 2022 Blender Foundation.
|
||||
|
||||
# - Find MoltenVK libraries
|
||||
# Find the MoltenVK includes and libraries
|
||||
# This module defines
|
||||
# MOLTENVK_INCLUDE_DIRS, where to find MoltenVK headers, Set when
|
||||
# MOLTENVK_INCLUDE_DIR is found.
|
||||
# MOLTENVK_LIBRARIES, libraries to link against to use MoltenVK.
|
||||
# MOLTENVK_ROOT_DIR, The base directory to search for MoltenVK.
|
||||
# This can also be an environment variable.
|
||||
# MOLTENVK_FOUND, If false, do not try to use MoltenVK.
|
||||
#
|
||||
|
||||
# If MOLTENVK_ROOT_DIR was defined in the environment, use it.
|
||||
IF(NOT MOLTENVK_ROOT_DIR AND NOT $ENV{MOLTENVK_ROOT_DIR} STREQUAL "")
|
||||
SET(MOLTENVK_ROOT_DIR $ENV{MOLTENVK_ROOT_DIR})
|
||||
ENDIF()
|
||||
|
||||
SET(_moltenvk_SEARCH_DIRS
|
||||
${MOLTENVK_ROOT_DIR}
|
||||
${LIBDIR}/vulkan/MoltenVK
|
||||
)
|
||||
|
||||
|
||||
FIND_PATH(MOLTENVK_INCLUDE_DIR
|
||||
NAMES
|
||||
MoltenVK/vk_mvk_moltenvk.h
|
||||
HINTS
|
||||
${_moltenvk_SEARCH_DIRS}
|
||||
PATH_SUFFIXES
|
||||
include
|
||||
)
|
||||
|
||||
FIND_LIBRARY(MOLTENVK_LIBRARY
|
||||
NAMES
|
||||
MoltenVK
|
||||
HINTS
|
||||
${_moltenvk_SEARCH_DIRS}
|
||||
PATH_SUFFIXES
|
||||
dylib/macOS
|
||||
)
|
||||
|
||||
# handle the QUIETLY and REQUIRED arguments and set MOLTENVK_FOUND to TRUE if
|
||||
# all listed variables are TRUE
|
||||
INCLUDE(FindPackageHandleStandardArgs)
|
||||
FIND_PACKAGE_HANDLE_STANDARD_ARGS(MoltenVK DEFAULT_MSG MOLTENVK_LIBRARY MOLTENVK_INCLUDE_DIR)
|
||||
|
||||
IF(MOLTENVK_FOUND)
|
||||
SET(MOLTENVK_LIBRARIES ${MOLTENVK_LIBRARY})
|
||||
SET(MOLTENVK_INCLUDE_DIRS ${MOLTENVK_INCLUDE_DIR})
|
||||
ENDIF()
|
||||
|
||||
MARK_AS_ADVANCED(
|
||||
MOLTENVK_INCLUDE_DIR
|
||||
MOLTENVK_LIBRARY
|
||||
)
|
||||
|
||||
UNSET(_moltenvk_SEARCH_DIRS)
|
@@ -103,6 +103,10 @@ if(EXISTS ${SOURCE_DIR}/.git)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(MY_WC_BRANCH MATCHES "^blender-v")
|
||||
set(MY_WC_BRANCH "master")
|
||||
endif()
|
||||
|
||||
unset(_git_below_check)
|
||||
endif()
|
||||
|
||||
|
@@ -1240,7 +1240,7 @@ endmacro()
|
||||
|
||||
macro(set_and_warn_library_found
|
||||
_library_name _library_found _setting)
|
||||
if(((NOT ${_library_found}) OR (NOT ${${_library_found}})) AND ${${_setting}})
|
||||
if(NOT ${${_library_found}} AND ${${_setting}})
|
||||
if(WITH_STRICT_BUILD_OPTIONS)
|
||||
message(SEND_ERROR "${_library_name} required but not found")
|
||||
else()
|
||||
|
@@ -106,8 +106,8 @@ if(WIN32)
|
||||
set(CPACK_WIX_LIGHT_EXTRA_FLAGS -dcl:medium)
|
||||
endif()
|
||||
|
||||
set(CPACK_PACKAGE_EXECUTABLES "blender-launcher" "Blender ${MAJOR_VERSION}.${MINOR_VERSION}")
|
||||
set(CPACK_CREATE_DESKTOP_LINKS "blender-launcher" "Blender ${MAJOR_VERSION}.${MINOR_VERSION}")
|
||||
set(CPACK_PACKAGE_EXECUTABLES "blender-launcher" "Blender")
|
||||
set(CPACK_CREATE_DESKTOP_LINKS "blender-launcher" "Blender")
|
||||
|
||||
include(CPack)
|
||||
|
||||
|
@@ -100,23 +100,6 @@ if(WITH_USD)
|
||||
find_package(USD REQUIRED)
|
||||
endif()
|
||||
|
||||
if(WITH_VULKAN_BACKEND)
|
||||
find_package(MoltenVK REQUIRED)
|
||||
|
||||
if(EXISTS ${LIBDIR}/vulkan)
|
||||
set(VULKAN_FOUND On)
|
||||
set(VULKAN_ROOT_DIR ${LIBDIR}/vulkan/macOS)
|
||||
set(VULKAN_INCLUDE_DIR ${VULKAN_ROOT_DIR}/include)
|
||||
set(VULKAN_LIBRARY ${VULKAN_ROOT_DIR}/lib/libvulkan.1.dylib)
|
||||
|
||||
set(VULKAN_INCLUDE_DIRS ${VULKAN_INCLUDE_DIR} ${MOLTENVK_INCLUDE_DIRS})
|
||||
set(VULKAN_LIBRARIES ${VULKAN_LIBRARY} ${MOLTENVK_LIBRARIES})
|
||||
else()
|
||||
message(WARNING "Vulkan SDK was not found, disabling WITH_VULKAN_BACKEND")
|
||||
set(WITH_VULKAN_BACKEND OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WITH_OPENSUBDIV)
|
||||
find_package(OpenSubdiv)
|
||||
endif()
|
||||
|
@@ -108,10 +108,6 @@ find_package_wrapper(ZLIB REQUIRED)
|
||||
find_package_wrapper(Zstd REQUIRED)
|
||||
find_package_wrapper(Epoxy REQUIRED)
|
||||
|
||||
if(WITH_VULKAN_BACKEND)
|
||||
find_package_wrapper(Vulkan REQUIRED)
|
||||
endif()
|
||||
|
||||
function(check_freetype_for_brotli)
|
||||
include(CheckSymbolExists)
|
||||
set(CMAKE_REQUIRED_INCLUDES ${FREETYPE_INCLUDE_DIRS})
|
||||
@@ -326,10 +322,9 @@ if(WITH_CYCLES AND WITH_CYCLES_DEVICE_ONEAPI)
|
||||
file(GLOB _sycl_runtime_libraries
|
||||
${SYCL_ROOT_DIR}/lib/libsycl.so
|
||||
${SYCL_ROOT_DIR}/lib/libsycl.so.*
|
||||
${SYCL_ROOT_DIR}/lib/libpi_*.so
|
||||
${SYCL_ROOT_DIR}/lib/libpi_level_zero.so
|
||||
)
|
||||
list(FILTER _sycl_runtime_libraries EXCLUDE REGEX ".*\.py")
|
||||
list(REMOVE_ITEM _sycl_runtime_libraries "${SYCL_ROOT_DIR}/lib/libpi_opencl.so")
|
||||
list(APPEND PLATFORM_BUNDLED_LIBRARIES ${_sycl_runtime_libraries})
|
||||
unset(_sycl_runtime_libraries)
|
||||
endif()
|
||||
@@ -970,9 +965,16 @@ if(WITH_COMPILER_CCACHE)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Always link with libatomic if available, as it is required for data types
|
||||
# which don't have intrinsics.
|
||||
function(configure_atomic_lib_if_needed)
|
||||
# On some platforms certain atomic operations are not possible with assembly and/or intrinsics and
|
||||
# they are emulated in software with locks. For example, on armel there is no intrinsics to grant
|
||||
# 64 bit atomic operations and STL library uses libatomic to offload software emulation of atomics
|
||||
# to.
|
||||
# This function will check whether libatomic is required and if so will configure linker flags.
|
||||
# If atomic operations are possible without libatomic then linker flags are left as-is.
|
||||
function(CONFIGURE_ATOMIC_LIB_IF_NEEDED)
|
||||
# Source which is used to enforce situation when software emulation of atomics is required.
|
||||
# Assume that using 64bit integer gives a definitive answer (as in, if 64bit atomic operations
|
||||
# are possible using assembly/intrinsics 8, 16, and 32 bit operations will also be possible.
|
||||
set(_source
|
||||
"#include <atomic>
|
||||
#include <cstdint>
|
||||
@@ -983,12 +985,25 @@ function(configure_atomic_lib_if_needed)
|
||||
)
|
||||
|
||||
include(CheckCXXSourceCompiles)
|
||||
set(CMAKE_REQUIRED_LIBRARIES atomic)
|
||||
check_cxx_source_compiles("${_source}" ATOMIC_OPS_WITH_LIBATOMIC)
|
||||
unset(CMAKE_REQUIRED_LIBRARIES)
|
||||
check_cxx_source_compiles("${_source}" ATOMIC_OPS_WITHOUT_LIBATOMIC)
|
||||
|
||||
if(ATOMIC_OPS_WITH_LIBATOMIC)
|
||||
set(PLATFORM_LINKFLAGS "${PLATFORM_LINKFLAGS} -latomic" PARENT_SCOPE)
|
||||
if(NOT ATOMIC_OPS_WITHOUT_LIBATOMIC)
|
||||
# Compilation of the test program has failed.
|
||||
# Try it again with -latomic to see if this is what is needed, or whether something else is
|
||||
# going on.
|
||||
|
||||
set(CMAKE_REQUIRED_LIBRARIES atomic)
|
||||
check_cxx_source_compiles("${_source}" ATOMIC_OPS_WITH_LIBATOMIC)
|
||||
unset(CMAKE_REQUIRED_LIBRARIES)
|
||||
|
||||
if(ATOMIC_OPS_WITH_LIBATOMIC)
|
||||
set(PLATFORM_LINKFLAGS "${PLATFORM_LINKFLAGS} -latomic" PARENT_SCOPE)
|
||||
else()
|
||||
# Atomic operations are required part of Blender and it is not possible to process forward.
|
||||
# We expect that either standard library or libatomic will make atomics to work. If both
|
||||
# cases has failed something fishy o na bigger scope is going on.
|
||||
message(FATAL_ERROR "Failed to detect required configuration for atomic operations")
|
||||
endif()
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
|
@@ -419,7 +419,7 @@ if(WITH_IMAGE_OPENEXR)
|
||||
warn_hardcoded_paths(OpenEXR)
|
||||
set(OPENEXR ${LIBDIR}/openexr)
|
||||
set(OPENEXR_INCLUDE_DIR ${OPENEXR}/include)
|
||||
set(OPENEXR_INCLUDE_DIRS ${OPENEXR_INCLUDE_DIR} ${IMATH_INCLUDE_DIRS} ${OPENEXR_INCLUDE_DIR}/OpenEXR)
|
||||
set(OPENEXR_INCLUDE_DIRS ${OPENEXR_INCLUDE_DIR} ${IMATH_INCLUDE_DIRS} ${OPENEXR}/include/OpenEXR)
|
||||
set(OPENEXR_LIBPATH ${OPENEXR}/lib)
|
||||
# Check if the 3.x library name exists
|
||||
# if not assume this is a 2.x library folder
|
||||
@@ -568,8 +568,7 @@ if(WITH_OPENIMAGEIO)
|
||||
if(NOT OpenImageIO_FOUND)
|
||||
set(OPENIMAGEIO ${LIBDIR}/OpenImageIO)
|
||||
set(OPENIMAGEIO_LIBPATH ${OPENIMAGEIO}/lib)
|
||||
set(OPENIMAGEIO_INCLUDE_DIR ${OPENIMAGEIO}/include)
|
||||
set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO_INCLUDE_DIR})
|
||||
set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO}/include)
|
||||
set(OIIO_OPTIMIZED optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO.lib optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util.lib)
|
||||
set(OIIO_DEBUG debug ${OPENIMAGEIO_LIBPATH}/OpenImageIO_d.lib debug ${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util_d.lib)
|
||||
set(OPENIMAGEIO_LIBRARIES ${OIIO_OPTIMIZED} ${OIIO_DEBUG})
|
||||
@@ -786,14 +785,6 @@ if(WITH_CYCLES AND WITH_CYCLES_OSL)
|
||||
endif()
|
||||
find_path(OSL_INCLUDE_DIR OSL/oslclosure.h PATHS ${CYCLES_OSL}/include)
|
||||
find_program(OSL_COMPILER NAMES oslc PATHS ${CYCLES_OSL}/bin)
|
||||
file(STRINGS "${OSL_INCLUDE_DIR}/OSL/oslversion.h" OSL_LIBRARY_VERSION_MAJOR
|
||||
REGEX "^[ \t]*#define[ \t]+OSL_LIBRARY_VERSION_MAJOR[ \t]+[0-9]+.*$")
|
||||
file(STRINGS "${OSL_INCLUDE_DIR}/OSL/oslversion.h" OSL_LIBRARY_VERSION_MINOR
|
||||
REGEX "^[ \t]*#define[ \t]+OSL_LIBRARY_VERSION_MINOR[ \t]+[0-9]+.*$")
|
||||
string(REGEX REPLACE ".*#define[ \t]+OSL_LIBRARY_VERSION_MAJOR[ \t]+([.0-9]+).*"
|
||||
"\\1" OSL_LIBRARY_VERSION_MAJOR ${OSL_LIBRARY_VERSION_MAJOR})
|
||||
string(REGEX REPLACE ".*#define[ \t]+OSL_LIBRARY_VERSION_MINOR[ \t]+([.0-9]+).*"
|
||||
"\\1" OSL_LIBRARY_VERSION_MINOR ${OSL_LIBRARY_VERSION_MINOR})
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES AND WITH_CYCLES_EMBREE)
|
||||
@@ -926,20 +917,6 @@ if(WITH_HARU)
|
||||
set(HARU_LIBRARIES ${HARU_ROOT_DIR}/lib/libhpdfs.lib)
|
||||
endif()
|
||||
|
||||
if(WITH_VULKAN_BACKEND)
|
||||
if(EXISTS ${LIBDIR}/vulkan)
|
||||
set(VULKAN_FOUND On)
|
||||
set(VULKAN_ROOT_DIR ${LIBDIR}/vulkan)
|
||||
set(VULKAN_INCLUDE_DIR ${VULKAN_ROOT_DIR}/include)
|
||||
set(VULKAN_INCLUDE_DIRS ${VULKAN_INCLUDE_DIR})
|
||||
set(VULKAN_LIBRARY ${VULKAN_ROOT_DIR}/lib/vulkan-1.lib)
|
||||
set(VULKAN_LIBRARIES ${VULKAN_LIBRARY})
|
||||
else()
|
||||
message(WARNING "Vulkan SDK was not found, disabling WITH_VULKAN_BACKEND")
|
||||
set(WITH_VULKAN_BACKEND OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES AND WITH_CYCLES_PATH_GUIDING)
|
||||
find_package(openpgl QUIET)
|
||||
if(openpgl_FOUND)
|
||||
@@ -972,13 +949,7 @@ if(WITH_CYCLES AND WITH_CYCLES_DEVICE_ONEAPI)
|
||||
endforeach()
|
||||
unset(_sycl_runtime_libraries_glob)
|
||||
|
||||
file(GLOB _sycl_pi_runtime_libraries_glob
|
||||
${SYCL_ROOT_DIR}/bin/pi_*.dll
|
||||
)
|
||||
list(REMOVE_ITEM _sycl_pi_runtime_libraries_glob "${SYCL_ROOT_DIR}/bin/pi_opencl.dll")
|
||||
list (APPEND _sycl_runtime_libraries ${_sycl_pi_runtime_libraries_glob})
|
||||
unset(_sycl_pi_runtime_libraries_glob)
|
||||
|
||||
list(APPEND _sycl_runtime_libraries ${SYCL_ROOT_DIR}/bin/pi_level_zero.dll)
|
||||
list(APPEND PLATFORM_BUNDLED_LIBRARIES ${_sycl_runtime_libraries})
|
||||
unset(_sycl_runtime_libraries)
|
||||
endif()
|
||||
|
@@ -55,7 +55,7 @@ buildbot:
|
||||
cuda11:
|
||||
version: '11.4.1'
|
||||
hip:
|
||||
version: '5.3.22480'
|
||||
version: '5.2.21440'
|
||||
optix:
|
||||
version: '7.3.0'
|
||||
ocloc:
|
||||
|
@@ -35,41 +35,10 @@ from typing import (
|
||||
Tuple,
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Long Description
|
||||
|
||||
long_description = """# Blender
|
||||
|
||||
[Blender](https://www.blender.org) is the free and open source 3D creation suite. It supports the entirety of the 3D pipeline—modeling, rigging, animation, simulation, rendering, compositing and motion tracking, even video editing.
|
||||
|
||||
This package provides Blender as a Python module for use in studio pipelines, web services, scientific research, and more.
|
||||
|
||||
## Documentation
|
||||
|
||||
* [Blender Python API](https://docs.blender.org/api/current/)
|
||||
* [Blender as a Python Module](https://docs.blender.org/api/current/info_advanced_blender_as_bpy.html)
|
||||
|
||||
## Requirements
|
||||
|
||||
[System requirements](https://www.blender.org/download/requirements/) are the same as Blender.
|
||||
|
||||
Each Blender release supports one Python version, and the package is only compatible with that version.
|
||||
|
||||
## Source Code
|
||||
|
||||
* [Releases](https://download.blender.org/source/)
|
||||
* Repository: [git.blender.org/blender.git](https://git.blender.org/gitweb/gitweb.cgi/blender.git)
|
||||
|
||||
## Credits
|
||||
|
||||
Created by the [Blender developer community](https://www.blender.org/about/credits/).
|
||||
|
||||
Thanks to Tyler Alden Gubala for maintaining the original version of this package."""
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Generic Functions
|
||||
|
||||
|
||||
def find_dominating_file(
|
||||
path: str,
|
||||
search: Sequence[str],
|
||||
@@ -226,8 +195,6 @@ def main() -> None:
|
||||
options={"bdist_wheel": {"plat_name": platform_tag}},
|
||||
|
||||
description="Blender as a Python module",
|
||||
long_description=long_description,
|
||||
long_description_content_type='text/markdown',
|
||||
license="GPL-3.0",
|
||||
author="Blender Foundation",
|
||||
author_email="bf-committers@blender.org",
|
||||
|
@@ -59,11 +59,10 @@ def svn_update(args: argparse.Namespace, release_version: Optional[str]) -> None
|
||||
|
||||
# Checkout precompiled libraries
|
||||
if sys.platform == 'darwin':
|
||||
# Check platform.version to detect arm64 with x86_64 python binary.
|
||||
if platform.machine() == 'arm64' or ('ARM64' in platform.version()):
|
||||
lib_platform = "darwin_arm64"
|
||||
elif platform.machine() == 'x86_64':
|
||||
if platform.machine() == 'x86_64':
|
||||
lib_platform = "darwin"
|
||||
elif platform.machine() == 'arm64':
|
||||
lib_platform = "darwin_arm64"
|
||||
else:
|
||||
lib_platform = None
|
||||
elif sys.platform == 'win32':
|
||||
|
@@ -870,26 +870,6 @@ an issue but, due to internal implementation details, currently are:
|
||||
thus breaking any current iteration over ``Collection.all_objects``.
|
||||
|
||||
|
||||
.. rubric:: Do not:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
# `all_objects` is an iterator. Using it directly while performing operations on its members that will update
|
||||
# the memory accessed by the `all_objects` iterator will lead to invalid memory accesses and crashes.
|
||||
for object in bpy.data.collections["Collection"].all_objects:
|
||||
object.hide_viewport = True
|
||||
|
||||
|
||||
.. rubric:: Do:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
# `all_objects[:]` is an independent list generated from the iterator. As long as no objects are deleted,
|
||||
# its content will remain valid even if the data accessed by the `all_objects` iterator is modified.
|
||||
for object in bpy.data.collections["Collection"].all_objects[:]:
|
||||
object.hide_viewport = True
|
||||
|
||||
|
||||
sys.exit
|
||||
========
|
||||
|
||||
|
@@ -1294,7 +1294,6 @@ def pycontext2sphinx(basepath):
|
||||
|
||||
type_descr = prop.get_type_description(
|
||||
class_fmt=":class:`bpy.types.%s`",
|
||||
mathutils_fmt=":class:`mathutils.%s`",
|
||||
collection_id=_BPY_PROP_COLLECTION_ID,
|
||||
enum_descr_override=enum_descr_override,
|
||||
)
|
||||
@@ -1447,7 +1446,6 @@ def pyrna2sphinx(basepath):
|
||||
identifier = " %s" % prop.identifier
|
||||
|
||||
kwargs["class_fmt"] = ":class:`%s`"
|
||||
kwargs["mathutils_fmt"] = ":class:`mathutils.%s`"
|
||||
|
||||
kwargs["collection_id"] = _BPY_PROP_COLLECTION_ID
|
||||
|
||||
@@ -1567,7 +1565,6 @@ def pyrna2sphinx(basepath):
|
||||
|
||||
type_descr = prop.get_type_description(
|
||||
class_fmt=":class:`%s`",
|
||||
mathutils_fmt=":class:`mathutils.%s`",
|
||||
collection_id=_BPY_PROP_COLLECTION_ID,
|
||||
enum_descr_override=enum_descr_override,
|
||||
)
|
||||
@@ -1634,7 +1631,6 @@ def pyrna2sphinx(basepath):
|
||||
|
||||
type_descr = prop.get_type_description(
|
||||
as_ret=True, class_fmt=":class:`%s`",
|
||||
mathutils_fmt=":class:`mathutils.%s`",
|
||||
collection_id=_BPY_PROP_COLLECTION_ID,
|
||||
enum_descr_override=enum_descr_override,
|
||||
)
|
||||
|
4
extern/CMakeLists.txt
vendored
4
extern/CMakeLists.txt
vendored
@@ -91,7 +91,3 @@ endif()
|
||||
if(WITH_COMPOSITOR_CPU)
|
||||
add_subdirectory(smaa_areatex)
|
||||
endif()
|
||||
|
||||
if(WITH_VULKAN_BACKEND)
|
||||
add_subdirectory(vulkan_memory_allocator)
|
||||
endif()
|
||||
|
@@ -27,7 +27,6 @@
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include <string>
|
||||
|
||||
AUD_NAMESPACE_BEGIN
|
||||
|
||||
|
24
extern/vulkan_memory_allocator/CMakeLists.txt
vendored
24
extern/vulkan_memory_allocator/CMakeLists.txt
vendored
@@ -1,24 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
# Copyright 2022 Blender Foundation. All rights reserved.
|
||||
|
||||
set(INC
|
||||
.
|
||||
)
|
||||
|
||||
set(INC_SYS
|
||||
${VULKAN_INCLUDE_DIRS}
|
||||
)
|
||||
|
||||
set(SRC
|
||||
vk_mem_alloc_impl.cc
|
||||
|
||||
vk_mem_alloc.h
|
||||
)
|
||||
|
||||
blender_add_lib(extern_vulkan_memory_allocator "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
target_compile_options(extern_vulkan_memory_allocator
|
||||
PRIVATE "-Wno-nullability-completeness"
|
||||
)
|
||||
endif()
|
19
extern/vulkan_memory_allocator/LICENSE.txt
vendored
19
extern/vulkan_memory_allocator/LICENSE.txt
vendored
@@ -1,19 +0,0 @@
|
||||
Copyright (c) 2017-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
@@ -1,5 +0,0 @@
|
||||
Project: VulkanMemoryAllocator
|
||||
URL: https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator
|
||||
License: MIT
|
||||
Upstream version: a6bfc23
|
||||
Local modifications: None
|
175
extern/vulkan_memory_allocator/README.md
vendored
175
extern/vulkan_memory_allocator/README.md
vendored
@@ -1,175 +0,0 @@
|
||||
# Vulkan Memory Allocator
|
||||
|
||||
Easy to integrate Vulkan memory allocation library.
|
||||
|
||||
**Documentation:** Browse online: [Vulkan Memory Allocator](https://gpuopen-librariesandsdks.github.io/VulkanMemoryAllocator/html/) (generated from Doxygen-style comments in [include/vk_mem_alloc.h](include/vk_mem_alloc.h))
|
||||
|
||||
**License:** MIT. See [LICENSE.txt](LICENSE.txt)
|
||||
|
||||
**Changelog:** See [CHANGELOG.md](CHANGELOG.md)
|
||||
|
||||
**Product page:** [Vulkan Memory Allocator on GPUOpen](https://gpuopen.com/gaming-product/vulkan-memory-allocator/)
|
||||
|
||||
**Build status:**
|
||||
|
||||
- Windows: [](https://ci.appveyor.com/project/adam-sawicki-amd/vulkanmemoryallocator/branch/master)
|
||||
- Linux: [](https://app.travis-ci.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator)
|
||||
|
||||
[](http://isitmaintained.com/project/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator "Average time to resolve an issue")
|
||||
|
||||
# Problem
|
||||
|
||||
Memory allocation and resource (buffer and image) creation in Vulkan is difficult (comparing to older graphics APIs, like D3D11 or OpenGL) for several reasons:
|
||||
|
||||
- It requires a lot of boilerplate code, just like everything else in Vulkan, because it is a low-level and high-performance API.
|
||||
- There is additional level of indirection: `VkDeviceMemory` is allocated separately from creating `VkBuffer`/`VkImage` and they must be bound together.
|
||||
- Driver must be queried for supported memory heaps and memory types. Different GPU vendors provide different types of it.
|
||||
- It is recommended to allocate bigger chunks of memory and assign parts of them to particular resources, as there is a limit on maximum number of memory blocks that can be allocated.
|
||||
|
||||
# Features
|
||||
|
||||
This library can help game developers to manage memory allocations and resource creation by offering some higher-level functions:
|
||||
|
||||
1. Functions that help to choose correct and optimal memory type based on intended usage of the memory.
|
||||
- Required or preferred traits of the memory are expressed using higher-level description comparing to Vulkan flags.
|
||||
2. Functions that allocate memory blocks, reserve and return parts of them (`VkDeviceMemory` + offset + size) to the user.
|
||||
- Library keeps track of allocated memory blocks, used and unused ranges inside them, finds best matching unused ranges for new allocations, respects all the rules of alignment and buffer/image granularity.
|
||||
3. Functions that can create an image/buffer, allocate memory for it and bind them together - all in one call.
|
||||
|
||||
Additional features:
|
||||
|
||||
- Well-documented - description of all functions and structures provided, along with chapters that contain general description and example code.
|
||||
- Thread-safety: Library is designed to be used in multithreaded code. Access to a single device memory block referred by different buffers and textures (binding, mapping) is synchronized internally. Memory mapping is reference-counted.
|
||||
- Configuration: Fill optional members of `VmaAllocatorCreateInfo` structure to provide custom CPU memory allocator, pointers to Vulkan functions and other parameters.
|
||||
- Customization and integration with custom engines: Predefine appropriate macros to provide your own implementation of all external facilities used by the library like assert, mutex, atomic.
|
||||
- Support for memory mapping, reference-counted internally. Support for persistently mapped memory: Just allocate with appropriate flag and access the pointer to already mapped memory.
|
||||
- Support for non-coherent memory. Functions that flush/invalidate memory. `nonCoherentAtomSize` is respected automatically.
|
||||
- Support for resource aliasing (overlap).
|
||||
- Support for sparse binding and sparse residency: Convenience functions that allocate or free multiple memory pages at once.
|
||||
- Custom memory pools: Create a pool with desired parameters (e.g. fixed or limited maximum size) and allocate memory out of it.
|
||||
- Linear allocator: Create a pool with linear algorithm and use it for much faster allocations and deallocations in free-at-once, stack, double stack, or ring buffer fashion.
|
||||
- Support for Vulkan 1.0, 1.1, 1.2, 1.3.
|
||||
- Support for extensions (and equivalent functionality included in new Vulkan versions):
|
||||
- VK_KHR_dedicated_allocation: Just enable it and it will be used automatically by the library.
|
||||
- VK_KHR_buffer_device_address: Flag `VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR` is automatically added to memory allocations where needed.
|
||||
- VK_EXT_memory_budget: Used internally if available to query for current usage and budget. If not available, it falls back to an estimation based on memory heap sizes.
|
||||
- VK_EXT_memory_priority: Set `priority` of allocations or custom pools and it will be set automatically using this extension.
|
||||
- VK_AMD_device_coherent_memory
|
||||
- Defragmentation of GPU and CPU memory: Let the library move data around to free some memory blocks and make your allocations better compacted.
|
||||
- Statistics: Obtain brief or detailed statistics about the amount of memory used, unused, number of allocated blocks, number of allocations etc. - globally, per memory heap, and per memory type.
|
||||
- Debug annotations: Associate custom `void* pUserData` and debug `char* pName` with each allocation.
|
||||
- JSON dump: Obtain a string in JSON format with detailed map of internal state, including list of allocations, their string names, and gaps between them.
|
||||
- Convert this JSON dump into a picture to visualize your memory. See [tools/GpuMemDumpVis](tools/GpuMemDumpVis/README.md).
|
||||
- Debugging incorrect memory usage: Enable initialization of all allocated memory with a bit pattern to detect usage of uninitialized or freed memory. Enable validation of a magic number after every allocation to detect out-of-bounds memory corruption.
|
||||
- Support for interoperability with OpenGL.
|
||||
- Virtual allocator: Interface for using core allocation algorithm to allocate any custom data, e.g. pieces of one large buffer.
|
||||
|
||||
# Prerequisites
|
||||
|
||||
- Self-contained C++ library in single header file. No external dependencies other than standard C and C++ library and of course Vulkan. Some features of C++14 used. STL containers, RTTI, or C++ exceptions are not used.
|
||||
- Public interface in C, in same convention as Vulkan API. Implementation in C++.
|
||||
- Error handling implemented by returning `VkResult` error codes - same way as in Vulkan.
|
||||
- Interface documented using Doxygen-style comments.
|
||||
- Platform-independent, but developed and tested on Windows using Visual Studio. Continuous integration setup for Windows and Linux. Used also on Android, MacOS, and other platforms.
|
||||
|
||||
# Example
|
||||
|
||||
Basic usage of this library is very simple. Advanced features are optional. After you created global `VmaAllocator` object, a complete code needed to create a buffer may look like this:
|
||||
|
||||
```cpp
|
||||
VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
|
||||
bufferInfo.size = 65536;
|
||||
bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
|
||||
|
||||
VmaAllocationCreateInfo allocInfo = {};
|
||||
allocInfo.usage = VMA_MEMORY_USAGE_AUTO;
|
||||
|
||||
VkBuffer buffer;
|
||||
VmaAllocation allocation;
|
||||
vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr);
|
||||
```
|
||||
|
||||
With this one function call:
|
||||
|
||||
1. `VkBuffer` is created.
|
||||
2. `VkDeviceMemory` block is allocated if needed.
|
||||
3. An unused region of the memory block is bound to this buffer.
|
||||
|
||||
`VmaAllocation` is an object that represents memory assigned to this buffer. It can be queried for parameters like `VkDeviceMemory` handle and offset.
|
||||
|
||||
# How to build
|
||||
|
||||
On Windows it is recommended to use [CMake UI](https://cmake.org/runningcmake/). Alternatively you can generate a Visual Studio project map using CMake in command line: `cmake -B./build/ -DCMAKE_BUILD_TYPE=Debug -G "Visual Studio 16 2019" -A x64 ./`
|
||||
|
||||
On Linux:
|
||||
|
||||
```
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ..
|
||||
make
|
||||
```
|
||||
|
||||
The following targets are available
|
||||
|
||||
| Target | Description | CMake option | Default setting |
|
||||
| ------------- | ------------- | ------------- | ------------- |
|
||||
| VmaSample | VMA sample application | `VMA_BUILD_SAMPLE` | `OFF` |
|
||||
| VmaBuildSampleShaders | Shaders for VmaSample | `VMA_BUILD_SAMPLE_SHADERS` | `OFF` |
|
||||
|
||||
Please note that while VulkanMemoryAllocator library is supported on other platforms besides Windows, VmaSample is not.
|
||||
|
||||
These CMake options are available
|
||||
|
||||
| CMake option | Description | Default setting |
|
||||
| ------------- | ------------- | ------------- |
|
||||
| `VMA_RECORDING_ENABLED` | Enable VMA memory recording for debugging | `OFF` |
|
||||
| `VMA_USE_STL_CONTAINERS` | Use C++ STL containers instead of VMA's containers | `OFF` |
|
||||
| `VMA_STATIC_VULKAN_FUNCTIONS` | Link statically with Vulkan API | `OFF` |
|
||||
| `VMA_DYNAMIC_VULKAN_FUNCTIONS` | Fetch pointers to Vulkan functions internally (no static linking) | `ON` |
|
||||
| `VMA_DEBUG_ALWAYS_DEDICATED_MEMORY` | Every allocation will have its own memory block | `OFF` |
|
||||
| `VMA_DEBUG_INITIALIZE_ALLOCATIONS` | Automatically fill new allocations and destroyed allocations with some bit pattern | `OFF` |
|
||||
| `VMA_DEBUG_GLOBAL_MUTEX` | Enable single mutex protecting all entry calls to the library | `OFF` |
|
||||
| `VMA_DEBUG_DONT_EXCEED_MAX_MEMORY_ALLOCATION_COUNT` | Never exceed [VkPhysicalDeviceLimits::maxMemoryAllocationCount](https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#limits-maxMemoryAllocationCount) and return error | `OFF` |
|
||||
|
||||
# Binaries
|
||||
|
||||
The release comes with precompiled binary executable for "VulkanSample" application which contains test suite. It is compiled using Visual Studio 2019, so it requires appropriate libraries to work, including "MSVCP140.dll", "VCRUNTIME140.dll", "VCRUNTIME140_1.dll". If the launch fails with error message telling about those files missing, please download and install [Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017 and 2019](https://support.microsoft.com/en-us/help/2977003/the-latest-supported-visual-c-downloads), "x64" version.
|
||||
|
||||
# Read more
|
||||
|
||||
See **[Documentation](https://gpuopen-librariesandsdks.github.io/VulkanMemoryAllocator/html/)**.
|
||||
|
||||
# Software using this library
|
||||
|
||||
- **[X-Plane](https://x-plane.com/)**
|
||||
- **[Detroit: Become Human](https://gpuopen.com/learn/porting-detroit-3/)**
|
||||
- **[Vulkan Samples](https://github.com/LunarG/VulkanSamples)** - official Khronos Vulkan samples. License: Apache-style.
|
||||
- **[Anvil](https://github.com/GPUOpen-LibrariesAndSDKs/Anvil)** - cross-platform framework for Vulkan. License: MIT.
|
||||
- **[Filament](https://github.com/google/filament)** - physically based rendering engine for Android, Windows, Linux and macOS, from Google. Apache License 2.0.
|
||||
- **[Atypical Games - proprietary game engine](https://developer.samsung.com/galaxy-gamedev/gamedev-blog/infinitejet.html)**
|
||||
- **[Flax Engine](https://flaxengine.com/)**
|
||||
- **[Godot Engine](https://github.com/godotengine/godot/)** - multi-platform 2D and 3D game engine. License: MIT.
|
||||
- **[Lightweight Java Game Library (LWJGL)](https://www.lwjgl.org/)** - includes binding of the library for Java. License: BSD.
|
||||
- **[PowerVR SDK](https://github.com/powervr-graphics/Native_SDK)** - C++ cross-platform 3D graphics SDK, from Imagination. License: MIT.
|
||||
- **[Skia](https://github.com/google/skia)** - complete 2D graphic library for drawing Text, Geometries, and Images, from Google.
|
||||
- **[The Forge](https://github.com/ConfettiFX/The-Forge)** - cross-platform rendering framework. Apache License 2.0.
|
||||
- **[VK9](https://github.com/disks86/VK9)** - Direct3D 9 compatibility layer using Vulkan. Zlib lincese.
|
||||
- **[vkDOOM3](https://github.com/DustinHLand/vkDOOM3)** - Vulkan port of GPL DOOM 3 BFG Edition. License: GNU GPL.
|
||||
- **[vkQuake2](https://github.com/kondrak/vkQuake2)** - vanilla Quake 2 with Vulkan support. License: GNU GPL.
|
||||
- **[Vulkan Best Practice for Mobile Developers](https://github.com/ARM-software/vulkan_best_practice_for_mobile_developers)** from ARM. License: MIT.
|
||||
- **[RPCS3](https://github.com/RPCS3/rpcs3)** - PlayStation 3 emulator/debugger. License: GNU GPLv2.
|
||||
- **[PPSSPP](https://github.com/hrydgard/ppsspp)** - Playstation Portable emulator/debugger. License: GNU GPLv2+.
|
||||
|
||||
[Many other projects on GitHub](https://github.com/search?q=AMD_VULKAN_MEMORY_ALLOCATOR_H&type=Code) and some game development studios that use Vulkan in their games.
|
||||
|
||||
# See also
|
||||
|
||||
- **[D3D12 Memory Allocator](https://github.com/GPUOpen-LibrariesAndSDKs/D3D12MemoryAllocator)** - equivalent library for Direct3D 12. License: MIT.
|
||||
- **[Awesome Vulkan](https://github.com/vinjn/awesome-vulkan)** - a curated list of awesome Vulkan libraries, debuggers and resources.
|
||||
- **[VulkanMemoryAllocator-Hpp](https://github.com/malte-v/VulkanMemoryAllocator-Hpp)** - C++ binding for this library. License: CC0-1.0.
|
||||
- **[PyVMA](https://github.com/realitix/pyvma)** - Python wrapper for this library. Author: Jean-Sébastien B. (@realitix). License: Apache 2.0.
|
||||
- **[vk-mem](https://github.com/gwihlidal/vk-mem-rs)** - Rust binding for this library. Author: Graham Wihlidal. License: Apache 2.0 or MIT.
|
||||
- **[Haskell bindings](https://hackage.haskell.org/package/VulkanMemoryAllocator)**, **[github](https://github.com/expipiplus1/vulkan/tree/master/VulkanMemoryAllocator)** - Haskell bindings for this library. Author: Ellie Hermaszewska (@expipiplus1). License BSD-3-Clause.
|
||||
- **[vma_sample_sdl](https://github.com/rextimmy/vma_sample_sdl)** - SDL port of the sample app of this library (with the goal of running it on multiple platforms, including MacOS). Author: @rextimmy. License: MIT.
|
||||
- **[vulkan-malloc](https://github.com/dylanede/vulkan-malloc)** - Vulkan memory allocation library for Rust. Based on version 1 of this library. Author: Dylan Ede (@dylanede). License: MIT / Apache 2.0.
|
19558
extern/vulkan_memory_allocator/vk_mem_alloc.h
vendored
19558
extern/vulkan_memory_allocator/vk_mem_alloc.h
vendored
File diff suppressed because it is too large
Load Diff
@@ -1,12 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later
|
||||
* Copyright 2022 Blender Foundation. All rights reserved. */
|
||||
|
||||
#ifdef __APPLE__
|
||||
# include <MoltenVK/vk_mvk_moltenvk.h>
|
||||
#else
|
||||
# include <vulkan/vulkan.h>
|
||||
#endif
|
||||
|
||||
#define VMA_IMPLEMENTATION
|
||||
|
||||
#include "vk_mem_alloc.h"
|
@@ -253,33 +253,6 @@ if(WITH_CYCLES_OSL)
|
||||
)
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEVICE_CUDA OR WITH_CYCLES_DEVICE_OPTIX)
|
||||
add_definitions(-DWITH_CUDA)
|
||||
|
||||
if(WITH_CUDA_DYNLOAD)
|
||||
include_directories(
|
||||
../../extern/cuew/include
|
||||
)
|
||||
add_definitions(-DWITH_CUDA_DYNLOAD)
|
||||
else()
|
||||
include_directories(
|
||||
SYSTEM
|
||||
${CUDA_TOOLKIT_INCLUDE}
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEVICE_HIP)
|
||||
add_definitions(-DWITH_HIP)
|
||||
|
||||
if(WITH_HIP_DYNLOAD)
|
||||
include_directories(
|
||||
../../extern/hipew/include
|
||||
)
|
||||
add_definitions(-DWITH_HIP_DYNLOAD)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEVICE_OPTIX)
|
||||
find_package(OptiX 7.3.0)
|
||||
|
||||
@@ -288,16 +261,12 @@ if(WITH_CYCLES_DEVICE_OPTIX)
|
||||
include_directories(
|
||||
SYSTEM
|
||||
${OPTIX_INCLUDE_DIR}
|
||||
)
|
||||
)
|
||||
else()
|
||||
set_and_warn_library_found("OptiX" OPTIX_FOUND WITH_CYCLES_DEVICE_OPTIX)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEVICE_METAL)
|
||||
add_definitions(-DWITH_METAL)
|
||||
endif()
|
||||
|
||||
if (WITH_CYCLES_DEVICE_ONEAPI)
|
||||
add_definitions(-DWITH_ONEAPI)
|
||||
endif()
|
||||
@@ -423,7 +392,7 @@ if(WITH_CYCLES_HYDRA_RENDER_DELEGATE AND (NOT WITH_BLENDER) AND (NOT WITH_CYCLES
|
||||
set(CYCLES_INSTALL_PATH ${CYCLES_INSTALL_PATH}/hdCycles/resources)
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_CUDA_BINARIES)
|
||||
if(WITH_CYCLES_CUDA_BINARIES AND (NOT WITH_CYCLES_CUBIN_COMPILER))
|
||||
if(MSVC)
|
||||
set(MAX_MSVC 1800)
|
||||
if(${CUDA_VERSION} EQUAL "8.0")
|
||||
@@ -435,7 +404,24 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
||||
elseif(${CUDA_VERSION} VERSION_GREATER_EQUAL 10.0)
|
||||
set(MAX_MSVC 1999)
|
||||
endif()
|
||||
if(NOT MSVC_VERSION LESS ${MAX_MSVC} OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
message(STATUS "nvcc not supported for this compiler version, using cycles_cubin_cc instead.")
|
||||
set(WITH_CYCLES_CUBIN_COMPILER ON)
|
||||
endif()
|
||||
unset(MAX_MSVC)
|
||||
elseif(APPLE)
|
||||
if(NOT (${XCODE_VERSION} VERSION_LESS 10.0))
|
||||
message(STATUS "nvcc not supported for this compiler version, using cycles_cubin_cc instead.")
|
||||
set(WITH_CYCLES_CUBIN_COMPILER ON)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# NVRTC gives wrong rendering result in CUDA 10.0, so we must use NVCC.
|
||||
if(WITH_CYCLES_CUDA_BINARIES AND WITH_CYCLES_CUBIN_COMPILER AND NOT WITH_CYCLES_CUBIN_COMPILER_OVERRRIDE)
|
||||
if(NOT (${CUDA_VERSION} VERSION_LESS 10.0))
|
||||
message(STATUS "cycles_cubin_cc not supported for CUDA 10.0+, using nvcc instead.")
|
||||
set(WITH_CYCLES_CUBIN_COMPILER OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
@@ -103,3 +103,32 @@ if(WITH_CYCLES_STANDALONE)
|
||||
$<TARGET_FILE:cycles>
|
||||
DESTINATION ${CMAKE_INSTALL_PREFIX})
|
||||
endif()
|
||||
|
||||
#####################################################################
|
||||
# Cycles cubin compiler executable
|
||||
#####################################################################
|
||||
|
||||
if(WITH_CYCLES_CUBIN_COMPILER)
|
||||
# 32 bit windows is special, nvrtc is not supported on x86, so even
|
||||
# though we are building 32 bit blender a 64 bit cubin_cc will have
|
||||
# to be build to compile the cubins.
|
||||
if(MSVC AND NOT CMAKE_CL_64)
|
||||
message("Building with CUDA not supported on 32 bit, skipped")
|
||||
set(WITH_CYCLES_CUDA_BINARIES OFF CACHE BOOL "" FORCE)
|
||||
else()
|
||||
set(SRC
|
||||
cycles_cubin_cc.cpp
|
||||
)
|
||||
set(INC
|
||||
../../../extern/cuew/include
|
||||
)
|
||||
set(LIB
|
||||
)
|
||||
cycles_external_libraries_append(LIB)
|
||||
add_executable(cycles_cubin_cc ${SRC})
|
||||
include_directories(${INC})
|
||||
target_link_libraries(cycles_cubin_cc PRIVATE ${LIB})
|
||||
unset(SRC)
|
||||
unset(INC)
|
||||
endif()
|
||||
endif()
|
||||
|
311
intern/cycles/app/cycles_cubin_cc.cpp
Normal file
311
intern/cycles/app/cycles_cubin_cc.cpp
Normal file
@@ -0,0 +1,311 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2017-2022 Blender Foundation */
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <OpenImageIO/argparse.h>
|
||||
#include <OpenImageIO/filesystem.h>
|
||||
|
||||
#include "cuew.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# include <Windows.h>
|
||||
#endif
|
||||
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
namespace std {
|
||||
template<typename T> std::string to_string(const T &n)
|
||||
{
|
||||
std::ostringstream s;
|
||||
s << n;
|
||||
return s.str();
|
||||
}
|
||||
} // namespace std
|
||||
|
||||
class CompilationSettings {
|
||||
public:
|
||||
CompilationSettings()
|
||||
: target_arch(0), bits(64), verbose(false), fast_math(false), ptx_only(false)
|
||||
{
|
||||
}
|
||||
|
||||
string cuda_toolkit_dir;
|
||||
string input_file;
|
||||
string output_file;
|
||||
string ptx_file;
|
||||
vector<string> defines;
|
||||
vector<string> includes;
|
||||
int target_arch;
|
||||
int bits;
|
||||
bool verbose;
|
||||
bool fast_math;
|
||||
bool ptx_only;
|
||||
};
|
||||
|
||||
static bool compile_cuda(CompilationSettings &settings)
|
||||
{
|
||||
const char *headers[] = {"stdlib.h", "float.h", "math.h", "stdio.h", "stddef.h"};
|
||||
const char *header_content[] = {"\n", "\n", "\n", "\n", "\n"};
|
||||
|
||||
printf("Building %s\n", settings.input_file.c_str());
|
||||
|
||||
string code;
|
||||
if (!OIIO::Filesystem::read_text_file(settings.input_file, code)) {
|
||||
fprintf(stderr, "Error: unable to read %s\n", settings.input_file.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
vector<string> options;
|
||||
for (size_t i = 0; i < settings.includes.size(); i++) {
|
||||
options.push_back("-I" + settings.includes[i]);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < settings.defines.size(); i++) {
|
||||
options.push_back("-D" + settings.defines[i]);
|
||||
}
|
||||
options.push_back("-D__KERNEL_CUDA_VERSION__=" + std::to_string(cuewNvrtcVersion()));
|
||||
options.push_back("-arch=compute_" + std::to_string(settings.target_arch));
|
||||
options.push_back("--device-as-default-execution-space");
|
||||
options.push_back("-DCYCLES_CUBIN_CC");
|
||||
options.push_back("--std=c++11");
|
||||
if (settings.fast_math)
|
||||
options.push_back("--use_fast_math");
|
||||
|
||||
nvrtcProgram prog;
|
||||
nvrtcResult result = nvrtcCreateProgram(&prog,
|
||||
code.c_str(), // buffer
|
||||
NULL, // name
|
||||
sizeof(headers) / sizeof(void *), // numHeaders
|
||||
header_content, // headers
|
||||
headers); // includeNames
|
||||
|
||||
if (result != NVRTC_SUCCESS) {
|
||||
fprintf(stderr, "Error: nvrtcCreateProgram failed (%d)\n\n", (int)result);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Transfer options to a classic C array. */
|
||||
vector<const char *> opts(options.size());
|
||||
for (size_t i = 0; i < options.size(); i++) {
|
||||
opts[i] = options[i].c_str();
|
||||
}
|
||||
|
||||
result = nvrtcCompileProgram(prog, options.size(), &opts[0]);
|
||||
|
||||
if (result != NVRTC_SUCCESS) {
|
||||
fprintf(stderr, "Error: nvrtcCompileProgram failed (%d)\n\n", (int)result);
|
||||
|
||||
size_t log_size;
|
||||
nvrtcGetProgramLogSize(prog, &log_size);
|
||||
|
||||
vector<char> log(log_size);
|
||||
nvrtcGetProgramLog(prog, &log[0]);
|
||||
fprintf(stderr, "%s\n", &log[0]);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Retrieve the ptx code. */
|
||||
size_t ptx_size;
|
||||
result = nvrtcGetPTXSize(prog, &ptx_size);
|
||||
if (result != NVRTC_SUCCESS) {
|
||||
fprintf(stderr, "Error: nvrtcGetPTXSize failed (%d)\n\n", (int)result);
|
||||
return false;
|
||||
}
|
||||
|
||||
vector<char> ptx_code(ptx_size);
|
||||
result = nvrtcGetPTX(prog, &ptx_code[0]);
|
||||
if (result != NVRTC_SUCCESS) {
|
||||
fprintf(stderr, "Error: nvrtcGetPTX failed (%d)\n\n", (int)result);
|
||||
return false;
|
||||
}
|
||||
if (settings.ptx_only) {
|
||||
settings.ptx_file = settings.output_file;
|
||||
}
|
||||
else {
|
||||
/* Write a file in the temp folder with the ptx code. */
|
||||
settings.ptx_file = OIIO::Filesystem::temp_directory_path() + "/" +
|
||||
OIIO::Filesystem::unique_path();
|
||||
}
|
||||
FILE *f = fopen(settings.ptx_file.c_str(), "wb");
|
||||
fwrite(&ptx_code[0], 1, ptx_size, f);
|
||||
fclose(f);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool link_ptxas(CompilationSettings &settings)
|
||||
{
|
||||
string cudapath = "";
|
||||
if (settings.cuda_toolkit_dir.size())
|
||||
cudapath = settings.cuda_toolkit_dir + "/bin/";
|
||||
|
||||
string ptx = "\"" + cudapath + "ptxas\" " + settings.ptx_file + " -o " + settings.output_file +
|
||||
" --gpu-name sm_" + std::to_string(settings.target_arch) + " -m" +
|
||||
std::to_string(settings.bits);
|
||||
|
||||
if (settings.verbose) {
|
||||
ptx += " --verbose";
|
||||
printf("%s\n", ptx.c_str());
|
||||
}
|
||||
|
||||
int pxresult = system(ptx.c_str());
|
||||
if (pxresult) {
|
||||
fprintf(stderr, "Error: ptxas failed (%d)\n\n", pxresult);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!OIIO::Filesystem::remove(settings.ptx_file)) {
|
||||
fprintf(stderr, "Error: removing %s\n\n", settings.ptx_file.c_str());
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool init(CompilationSettings &settings)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
if (settings.cuda_toolkit_dir.size()) {
|
||||
SetDllDirectory((settings.cuda_toolkit_dir + "/bin").c_str());
|
||||
}
|
||||
#else
|
||||
(void)settings;
|
||||
#endif
|
||||
|
||||
int cuewresult = cuewInit(CUEW_INIT_NVRTC);
|
||||
if (cuewresult != CUEW_SUCCESS) {
|
||||
fprintf(stderr, "Error: cuew init fialed (0x%d)\n\n", cuewresult);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (cuewNvrtcVersion() < 80) {
|
||||
fprintf(stderr, "Error: only cuda 8 and higher is supported, %d\n\n", cuewCompilerVersion());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!nvrtcCreateProgram) {
|
||||
fprintf(stderr, "Error: nvrtcCreateProgram not resolved\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!nvrtcCompileProgram) {
|
||||
fprintf(stderr, "Error: nvrtcCompileProgram not resolved\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!nvrtcGetProgramLogSize) {
|
||||
fprintf(stderr, "Error: nvrtcGetProgramLogSize not resolved\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!nvrtcGetProgramLog) {
|
||||
fprintf(stderr, "Error: nvrtcGetProgramLog not resolved\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!nvrtcGetPTXSize) {
|
||||
fprintf(stderr, "Error: nvrtcGetPTXSize not resolved\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!nvrtcGetPTX) {
|
||||
fprintf(stderr, "Error: nvrtcGetPTX not resolved\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool parse_parameters(int argc, const char **argv, CompilationSettings &settings)
|
||||
{
|
||||
OIIO::ArgParse ap;
|
||||
ap.options("Usage: cycles_cubin_cc [options]",
|
||||
"-target %d",
|
||||
&settings.target_arch,
|
||||
"target shader model",
|
||||
"-m %d",
|
||||
&settings.bits,
|
||||
"Cuda architecture bits",
|
||||
"-i %s",
|
||||
&settings.input_file,
|
||||
"Input source filename",
|
||||
"-o %s",
|
||||
&settings.output_file,
|
||||
"Output cubin filename",
|
||||
"-I %L",
|
||||
&settings.includes,
|
||||
"Add additional includepath",
|
||||
"-D %L",
|
||||
&settings.defines,
|
||||
"Add additional defines",
|
||||
"-ptx",
|
||||
&settings.ptx_only,
|
||||
"emit PTX code",
|
||||
"-v",
|
||||
&settings.verbose,
|
||||
"Use verbose logging",
|
||||
"--use_fast_math",
|
||||
&settings.fast_math,
|
||||
"Use fast math",
|
||||
"-cuda-toolkit-dir %s",
|
||||
&settings.cuda_toolkit_dir,
|
||||
"path to the cuda toolkit binary directory",
|
||||
NULL);
|
||||
|
||||
if (ap.parse(argc, argv) < 0) {
|
||||
fprintf(stderr, "%s\n", ap.geterror().c_str());
|
||||
ap.usage();
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!settings.output_file.size()) {
|
||||
fprintf(stderr, "Error: Output file not set(-o), required\n\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!settings.input_file.size()) {
|
||||
fprintf(stderr, "Error: Input file not set(-i, required\n\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!settings.target_arch) {
|
||||
fprintf(stderr, "Error: target shader model not set (-target), required\n\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int main(int argc, const char **argv)
|
||||
{
|
||||
CompilationSettings settings;
|
||||
|
||||
if (!parse_parameters(argc, argv, settings)) {
|
||||
fprintf(stderr, "Error: invalid parameters, exiting\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (!init(settings)) {
|
||||
fprintf(stderr, "Error: initialization error, exiting\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (!compile_cuda(settings)) {
|
||||
fprintf(stderr, "Error: compilation error, exiting\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (!settings.ptx_only) {
|
||||
if (!link_ptxas(settings)) {
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@@ -58,7 +58,7 @@ class CyclesRender(bpy.types.RenderEngine):
|
||||
if not self.session:
|
||||
if self.is_preview:
|
||||
cscene = bpy.context.scene.cycles
|
||||
use_osl = cscene.shading_system
|
||||
use_osl = cscene.shading_system and cscene.device == 'CPU'
|
||||
|
||||
engine.create(self, data, preview_osl=use_osl)
|
||||
else:
|
||||
|
@@ -156,11 +156,6 @@ def with_osl():
|
||||
return _cycles.with_osl
|
||||
|
||||
|
||||
def osl_version():
|
||||
import _cycles
|
||||
return _cycles.osl_version
|
||||
|
||||
|
||||
def with_path_guiding():
|
||||
import _cycles
|
||||
return _cycles.with_path_guiding
|
||||
@@ -204,6 +199,7 @@ def list_render_passes(scene, srl):
|
||||
if crl.use_pass_volume_indirect: yield ("VolumeInd", "RGB", 'COLOR')
|
||||
if srl.use_pass_emit: yield ("Emit", "RGB", 'COLOR')
|
||||
if srl.use_pass_environment: yield ("Env", "RGB", 'COLOR')
|
||||
if srl.use_pass_shadow: yield ("Shadow", "RGB", 'COLOR')
|
||||
if srl.use_pass_ambient_occlusion: yield ("AO", "RGB", 'COLOR')
|
||||
if crl.use_pass_shadow_catcher: yield ("Shadow Catcher", "RGB", 'COLOR')
|
||||
# autopep8: on
|
||||
|
@@ -114,7 +114,7 @@ class CYCLES_OT_denoise_animation(Operator):
|
||||
|
||||
|
||||
class CYCLES_OT_merge_images(Operator):
|
||||
"Combine OpenEXR multi-layer images rendered with different sample " \
|
||||
"Combine OpenEXR multilayer images rendered with different sample " \
|
||||
"ranges into one image with reduced noise"
|
||||
bl_idname = "cycles.merge_images"
|
||||
bl_label = "Merge Images"
|
||||
|
@@ -91,7 +91,7 @@ class AddPresetPerformance(AddPresetBase, Operator):
|
||||
preset_menu = "CYCLES_PT_performance_presets"
|
||||
|
||||
preset_defines = [
|
||||
"render = bpy.context.scene.render",
|
||||
"render = bpy.context.scene.render"
|
||||
"cycles = bpy.context.scene.cycles"
|
||||
]
|
||||
|
||||
|
@@ -86,29 +86,6 @@ enum_sampling_pattern = (
|
||||
('PROGRESSIVE_MULTI_JITTER', "Progressive Multi-Jitter", "Use Progressive Multi-Jitter random sampling pattern", 1),
|
||||
)
|
||||
|
||||
enum_emission_sampling = (
|
||||
('NONE',
|
||||
'None',
|
||||
"Do not use this surface as a light for sampling",
|
||||
0),
|
||||
('AUTO',
|
||||
'Auto',
|
||||
"Automatically determine if the surface should be treated as a light for sampling, based on estimated emission intensity",
|
||||
1),
|
||||
('FRONT',
|
||||
'Front',
|
||||
"Treat only front side of the surface as a light, usually for closed meshes whose interior is not visible",
|
||||
2),
|
||||
('BACK',
|
||||
'Back',
|
||||
"Treat only back side of the surface as a light for sampling",
|
||||
3),
|
||||
('FRONT_BACK',
|
||||
'Front and Back',
|
||||
"Treat surface as a light for sampling, emitting from both the front and back side",
|
||||
4),
|
||||
)
|
||||
|
||||
enum_volume_sampling = (
|
||||
('DISTANCE',
|
||||
"Distance",
|
||||
@@ -170,6 +147,7 @@ enum_view3d_shading_render_pass = (
|
||||
('EMISSION', "Emission", "Show the Emission render pass"),
|
||||
('BACKGROUND', "Background", "Show the Background render pass"),
|
||||
('AO', "Ambient Occlusion", "Show the Ambient Occlusion render pass"),
|
||||
('SHADOW', "Shadow", "Show the Shadow render pass"),
|
||||
('SHADOW_CATCHER', "Shadow Catcher", "Show the Shadow Catcher render pass"),
|
||||
|
||||
('', "Light", ""),
|
||||
@@ -312,7 +290,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
||||
)
|
||||
shading_system: BoolProperty(
|
||||
name="Open Shading Language",
|
||||
description="Use Open Shading Language",
|
||||
description="Use Open Shading Language (CPU rendering only)",
|
||||
)
|
||||
|
||||
preview_pause: BoolProperty(
|
||||
@@ -503,12 +481,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
||||
default='MULTIPLE_IMPORTANCE_SAMPLING',
|
||||
)
|
||||
|
||||
use_light_tree: BoolProperty(
|
||||
name="Light Tree",
|
||||
description="Sample multiple lights more efficiently based on estimated contribution at every shading point",
|
||||
default=True,
|
||||
)
|
||||
|
||||
min_light_bounces: IntProperty(
|
||||
name="Min Light Bounces",
|
||||
description="Minimum number of light bounces. Setting this higher reduces noise in the first bounces, "
|
||||
@@ -650,7 +622,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
||||
|
||||
transparent_max_bounces: IntProperty(
|
||||
name="Transparent Max Bounces",
|
||||
description="Maximum number of transparent bounces. This is independent of maximum number of other bounces",
|
||||
description="Maximum number of transparent bounces. This is independent of maximum number of other bounces ",
|
||||
min=0, max=1024,
|
||||
default=8,
|
||||
)
|
||||
@@ -1071,13 +1043,13 @@ class CyclesCameraSettings(bpy.types.PropertyGroup):
|
||||
|
||||
class CyclesMaterialSettings(bpy.types.PropertyGroup):
|
||||
|
||||
emission_sampling: EnumProperty(
|
||||
name="Emission Sampling",
|
||||
description="Sampling strategy for emissive surfaces",
|
||||
items=enum_emission_sampling,
|
||||
default="AUTO",
|
||||
sample_as_light: BoolProperty(
|
||||
name="Multiple Importance Sample",
|
||||
description="Use multiple importance sampling for this material, "
|
||||
"disabling may reduce overall noise for large "
|
||||
"objects that emit little light compared to other light sources",
|
||||
default=True,
|
||||
)
|
||||
|
||||
use_transparent_shadow: BoolProperty(
|
||||
name="Transparent Shadows",
|
||||
description="Use transparent shadows for this material if it contains a Transparent BSDF, "
|
||||
@@ -1670,7 +1642,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
col.label(text="and Windows driver version 101.3430 or newer", icon='BLANK1')
|
||||
elif sys.platform.startswith("linux"):
|
||||
col.label(text="Requires Intel GPU with Xe-HPG architecture and", icon='BLANK1')
|
||||
col.label(text=" - intel-level-zero-gpu version 1.3.23904 or newer", icon='BLANK1')
|
||||
col.label(text=" - Linux driver version xx.xx.23904 or newer", icon='BLANK1')
|
||||
col.label(text=" - oneAPI Level-Zero Loader", icon='BLANK1')
|
||||
elif device_type == 'METAL':
|
||||
col.label(text="Requires Apple Silicon with macOS 12.2 or newer", icon='BLANK1')
|
||||
|
@@ -154,9 +154,8 @@ def use_mnee(context):
|
||||
# The MNEE kernel doesn't compile on macOS < 13.
|
||||
if use_metal(context):
|
||||
import platform
|
||||
version, _, _ = platform.mac_ver()
|
||||
major_version = version.split(".")[0]
|
||||
if int(major_version) < 13:
|
||||
v, _, _ = platform.mac_ver()
|
||||
if float(v) < 13.0:
|
||||
return False
|
||||
return True
|
||||
|
||||
@@ -314,11 +313,10 @@ class CYCLES_RENDER_PT_sampling_path_guiding(CyclesButtonsPanel, Panel):
|
||||
layout.use_property_decorate = False
|
||||
layout.active = cscene.use_guiding
|
||||
|
||||
layout.prop(cscene, "guiding_training_samples")
|
||||
|
||||
col = layout.column(align=True)
|
||||
col.prop(cscene, "use_surface_guiding", text="Surface")
|
||||
col.prop(cscene, "use_volume_guiding", text="Volume")
|
||||
col.prop(cscene, "use_surface_guiding")
|
||||
col.prop(cscene, "use_volume_guiding")
|
||||
col.prop(cscene, "guiding_training_samples")
|
||||
|
||||
|
||||
class CYCLES_RENDER_PT_sampling_path_guiding_debug(CyclesDebugButtonsPanel, Panel):
|
||||
@@ -383,6 +381,7 @@ class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
|
||||
col = layout.column(align=True)
|
||||
col.prop(cscene, "min_light_bounces")
|
||||
col.prop(cscene, "min_transparent_bounces")
|
||||
col.prop(cscene, "light_sampling_threshold", text="Light Threshold")
|
||||
|
||||
for view_layer in scene.view_layers:
|
||||
if view_layer.samples > 0:
|
||||
@@ -391,31 +390,6 @@ class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
|
||||
break
|
||||
|
||||
|
||||
class CYCLES_RENDER_PT_sampling_lights(CyclesButtonsPanel, Panel):
|
||||
bl_label = "Lights"
|
||||
bl_parent_id = "CYCLES_RENDER_PT_sampling"
|
||||
bl_options = {'DEFAULT_CLOSED'}
|
||||
|
||||
def draw_header(self, context):
|
||||
layout = self.layout
|
||||
scene = context.scene
|
||||
cscene = scene.cycles
|
||||
|
||||
def draw(self, context):
|
||||
layout = self.layout
|
||||
layout.use_property_split = True
|
||||
layout.use_property_decorate = False
|
||||
|
||||
scene = context.scene
|
||||
cscene = scene.cycles
|
||||
|
||||
col = layout.column(align=True)
|
||||
col.prop(cscene, "use_light_tree")
|
||||
sub = col.row()
|
||||
sub.prop(cscene, "light_sampling_threshold", text="Light Threshold")
|
||||
sub.active = not cscene.use_light_tree
|
||||
|
||||
|
||||
class CYCLES_RENDER_PT_subdivision(CyclesButtonsPanel, Panel):
|
||||
bl_label = "Subdivision"
|
||||
bl_options = {'DEFAULT_CLOSED'}
|
||||
@@ -978,6 +952,7 @@ class CYCLES_RENDER_PT_passes_light(CyclesButtonsPanel, Panel):
|
||||
col = layout.column(heading="Other", align=True)
|
||||
col.prop(view_layer, "use_pass_emit", text="Emission")
|
||||
col.prop(view_layer, "use_pass_environment")
|
||||
col.prop(view_layer, "use_pass_shadow")
|
||||
col.prop(view_layer, "use_pass_ambient_occlusion", text="Ambient Occlusion")
|
||||
col.prop(cycles_view_layer, "use_pass_shadow_catcher")
|
||||
|
||||
@@ -1855,9 +1830,9 @@ class CYCLES_MATERIAL_PT_settings_surface(CyclesButtonsPanel, Panel):
|
||||
cmat = mat.cycles
|
||||
|
||||
col = layout.column()
|
||||
col.prop(cmat, "displacement_method", text="Displacement")
|
||||
col.prop(cmat, "emission_sampling")
|
||||
col.prop(cmat, "sample_as_light", text="Multiple Importance")
|
||||
col.prop(cmat, "use_transparent_shadow")
|
||||
col.prop(cmat, "displacement_method", text="Displacement")
|
||||
|
||||
def draw(self, context):
|
||||
self.draw_shared(self, context.material)
|
||||
@@ -2330,10 +2305,7 @@ def draw_device(self, context):
|
||||
col.prop(cscene, "device")
|
||||
|
||||
from . import engine
|
||||
if engine.with_osl() and (
|
||||
use_cpu(context) or
|
||||
(use_optix(context) and (engine.osl_version()[1] >= 13 or engine.osl_version()[0] > 1))
|
||||
):
|
||||
if engine.with_osl() and use_cpu(context):
|
||||
col.prop(cscene, "shading_system")
|
||||
|
||||
|
||||
@@ -2389,7 +2361,6 @@ classes = (
|
||||
CYCLES_RENDER_PT_sampling_render_denoise,
|
||||
CYCLES_RENDER_PT_sampling_path_guiding,
|
||||
CYCLES_RENDER_PT_sampling_path_guiding_debug,
|
||||
CYCLES_RENDER_PT_sampling_lights,
|
||||
CYCLES_RENDER_PT_sampling_advanced,
|
||||
CYCLES_RENDER_PT_light_paths,
|
||||
CYCLES_RENDER_PT_light_paths_max_bounces,
|
||||
|
@@ -99,7 +99,7 @@ def do_versions(self):
|
||||
library_versions.setdefault(library.version, []).append(library)
|
||||
|
||||
# Do versioning per library, since they might have different versions.
|
||||
max_need_versioning = (3, 5, 2)
|
||||
max_need_versioning = (3, 0, 25)
|
||||
for version, libraries in library_versions.items():
|
||||
if version > max_need_versioning:
|
||||
continue
|
||||
@@ -297,8 +297,3 @@ def do_versions(self):
|
||||
cmat = mat.cycles
|
||||
if not cmat.is_property_set("displacement_method"):
|
||||
cmat.displacement_method = 'DISPLACEMENT'
|
||||
|
||||
if version <= (3, 5, 3):
|
||||
cmat = mat.cycles
|
||||
if not cmat.get("sample_as_light", True):
|
||||
cmat.emission_sampling = 'NONE'
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -15,10 +15,6 @@
|
||||
#include "util/unique_ptr.h"
|
||||
#include "util/vector.h"
|
||||
|
||||
typedef struct GPUContext GPUContext;
|
||||
typedef struct GPUFence GPUFence;
|
||||
typedef struct GPUShader GPUShader;
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Base class of shader used for display driver rendering. */
|
||||
@@ -33,7 +29,7 @@ class BlenderDisplayShader {
|
||||
BlenderDisplayShader() = default;
|
||||
virtual ~BlenderDisplayShader() = default;
|
||||
|
||||
virtual GPUShader *bind(int width, int height) = 0;
|
||||
virtual void bind(int width, int height) = 0;
|
||||
virtual void unbind() = 0;
|
||||
|
||||
/* Get attribute location for position and texture coordinate respectively.
|
||||
@@ -44,7 +40,7 @@ class BlenderDisplayShader {
|
||||
protected:
|
||||
/* Get program of this display shader.
|
||||
* NOTE: The shader needs to be bound to have access to this. */
|
||||
virtual GPUShader *get_shader_program() = 0;
|
||||
virtual uint get_shader_program() = 0;
|
||||
|
||||
/* Cached values of various OpenGL resources. */
|
||||
int position_attribute_location_ = -1;
|
||||
@@ -55,16 +51,16 @@ class BlenderDisplayShader {
|
||||
* display space shader. */
|
||||
class BlenderFallbackDisplayShader : public BlenderDisplayShader {
|
||||
public:
|
||||
virtual GPUShader *bind(int width, int height) override;
|
||||
virtual void bind(int width, int height) override;
|
||||
virtual void unbind() override;
|
||||
|
||||
protected:
|
||||
virtual GPUShader *get_shader_program() override;
|
||||
virtual uint get_shader_program() override;
|
||||
|
||||
void create_shader_if_needed();
|
||||
void destroy_shader();
|
||||
|
||||
GPUShader *shader_program_ = 0;
|
||||
uint shader_program_ = 0;
|
||||
int image_texture_location_ = -1;
|
||||
int fullscreen_location_ = -1;
|
||||
|
||||
@@ -77,17 +73,17 @@ class BlenderDisplaySpaceShader : public BlenderDisplayShader {
|
||||
public:
|
||||
BlenderDisplaySpaceShader(BL::RenderEngine &b_engine, BL::Scene &b_scene);
|
||||
|
||||
virtual GPUShader *bind(int width, int height) override;
|
||||
virtual void bind(int width, int height) override;
|
||||
virtual void unbind() override;
|
||||
|
||||
protected:
|
||||
virtual GPUShader *get_shader_program() override;
|
||||
virtual uint get_shader_program() override;
|
||||
|
||||
BL::RenderEngine b_engine_;
|
||||
BL::Scene &b_scene_;
|
||||
|
||||
/* Cached values of various OpenGL resources. */
|
||||
GPUShader *shader_program_ = nullptr;
|
||||
uint shader_program_ = 0;
|
||||
};
|
||||
|
||||
/* Display driver implementation which is specific for Blender viewport integration. */
|
||||
@@ -126,9 +122,6 @@ class BlenderDisplayDriver : public DisplayDriver {
|
||||
void gpu_context_lock();
|
||||
void gpu_context_unlock();
|
||||
|
||||
/* Create GPU resources used by the display driver. */
|
||||
bool gpu_resources_create();
|
||||
|
||||
/* Destroy all GPU resources which are being used by this object. */
|
||||
void gpu_resources_destroy();
|
||||
|
||||
@@ -144,8 +137,8 @@ class BlenderDisplayDriver : public DisplayDriver {
|
||||
struct Tiles;
|
||||
unique_ptr<Tiles> tiles_;
|
||||
|
||||
GPUFence *gpu_render_sync_ = nullptr;
|
||||
GPUFence *gpu_upload_sync_ = nullptr;
|
||||
void *gl_render_sync_ = nullptr;
|
||||
void *gl_upload_sync_ = nullptr;
|
||||
|
||||
float2 zoom_ = make_float2(1.0f, 1.0f);
|
||||
};
|
||||
|
@@ -72,11 +72,6 @@ bool BlenderImageLoader::load_metadata(const ImageDeviceFeatures &, ImageMetaDat
|
||||
metadata.colorspace = u_colorspace_raw;
|
||||
}
|
||||
else {
|
||||
/* In some cases (e.g. T94135), the colorspace setting in Blender gets updated as part of the
|
||||
* metadata queries in this function, so update the colorspace setting here. */
|
||||
PointerRNA colorspace_ptr = b_image.colorspace_settings().ptr;
|
||||
metadata.colorspace = get_enum_identifier(colorspace_ptr, "name");
|
||||
|
||||
if (metadata.channels == 1) {
|
||||
metadata.type = IMAGE_DATA_TYPE_BYTE;
|
||||
}
|
||||
|
@@ -1085,11 +1085,11 @@ static void create_subd_mesh(Scene *scene,
|
||||
const int edges_num = b_mesh.edges.length();
|
||||
|
||||
if (edges_num != 0 && b_mesh.edge_creases.length() > 0) {
|
||||
BL::MeshEdgeCreaseLayer creases = b_mesh.edge_creases[0];
|
||||
|
||||
size_t num_creases = 0;
|
||||
const float *creases = static_cast<float *>(b_mesh.edge_creases[0].ptr.data);
|
||||
|
||||
for (int i = 0; i < edges_num; i++) {
|
||||
if (creases.data[i].value() != 0.0f) {
|
||||
if (creases[i] != 0.0f) {
|
||||
num_creases++;
|
||||
}
|
||||
}
|
||||
@@ -1098,18 +1098,17 @@ static void create_subd_mesh(Scene *scene,
|
||||
|
||||
const MEdge *edges = static_cast<MEdge *>(b_mesh.edges[0].ptr.data);
|
||||
for (int i = 0; i < edges_num; i++) {
|
||||
const float crease = creases.data[i].value();
|
||||
if (crease != 0.0f) {
|
||||
if (creases[i] != 0.0f) {
|
||||
const MEdge &b_edge = edges[i];
|
||||
mesh->add_edge_crease(b_edge.v1, b_edge.v2, crease);
|
||||
mesh->add_edge_crease(b_edge.v1, b_edge.v2, creases[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (BL::MeshVertexCreaseLayer &c : b_mesh.vertex_creases) {
|
||||
for (int i = 0; i < c.data.length(); ++i) {
|
||||
if (c.data[i].value() != 0.0f) {
|
||||
mesh->add_vertex_crease(i, c.data[i].value());
|
||||
for (BL::MeshVertexCreaseLayer &c : b_mesh.vertex_creases) {
|
||||
for (int i = 0; i < c.data.length(); ++i) {
|
||||
if (c.data[i].value() != 0.0f) {
|
||||
mesh->add_vertex_crease(i, c.data[i].value());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -18,6 +18,7 @@
|
||||
#include "util/guiding.h"
|
||||
#include "util/log.h"
|
||||
#include "util/md5.h"
|
||||
#include "util/opengl.h"
|
||||
#include "util/openimagedenoise.h"
|
||||
#include "util/path.h"
|
||||
#include "util/string.h"
|
||||
@@ -25,8 +26,6 @@
|
||||
#include "util/tbb.h"
|
||||
#include "util/types.h"
|
||||
|
||||
#include "GPU_state.h"
|
||||
|
||||
#ifdef WITH_OSL
|
||||
# include "scene/osl.h"
|
||||
|
||||
@@ -338,7 +337,7 @@ static PyObject *view_draw_func(PyObject * /*self*/, PyObject *args)
|
||||
if (PyLong_AsVoidPtr(pyrv3d)) {
|
||||
/* 3d view drawing */
|
||||
int viewport[4];
|
||||
GPU_viewport_size_get_i(viewport);
|
||||
glGetIntegerv(GL_VIEWPORT, viewport);
|
||||
|
||||
session->view_draw(viewport[2], viewport[3]);
|
||||
}
|
||||
@@ -479,7 +478,6 @@ static PyObject *osl_update_node_func(PyObject * /*self*/, PyObject *args)
|
||||
|
||||
/* Read metadata. */
|
||||
bool is_bool_param = false;
|
||||
bool hide_value = !param->validdefault;
|
||||
ustring param_label = param->name;
|
||||
|
||||
for (const OSL::OSLQuery::Parameter &metadata : param->metadata) {
|
||||
@@ -489,9 +487,6 @@ static PyObject *osl_update_node_func(PyObject * /*self*/, PyObject *args)
|
||||
if (metadata.sdefault[0] == "boolean" || metadata.sdefault[0] == "checkBox") {
|
||||
is_bool_param = true;
|
||||
}
|
||||
else if (metadata.sdefault[0] == "null") {
|
||||
hide_value = true;
|
||||
}
|
||||
}
|
||||
else if (metadata.name == "label") {
|
||||
/* Socket label. */
|
||||
@@ -601,9 +596,6 @@ static PyObject *osl_update_node_func(PyObject * /*self*/, PyObject *args)
|
||||
if (b_sock.name() != param_label) {
|
||||
b_sock.name(param_label.string());
|
||||
}
|
||||
if (b_sock.hide_value() != hide_value) {
|
||||
b_sock.hide_value(hide_value);
|
||||
}
|
||||
used_sockets.insert(b_sock.ptr.data);
|
||||
found_existing = true;
|
||||
}
|
||||
@@ -643,8 +635,6 @@ static PyObject *osl_update_node_func(PyObject * /*self*/, PyObject *args)
|
||||
set_boolean(b_sock.ptr, "default_value", default_boolean);
|
||||
}
|
||||
|
||||
b_sock.hide_value(hide_value);
|
||||
|
||||
used_sockets.insert(b_sock.ptr.data);
|
||||
}
|
||||
}
|
||||
|
@@ -559,6 +559,11 @@ static bool bake_setup_pass(Scene *scene, const string &bake_type_str, const int
|
||||
0);
|
||||
integrator->set_use_emission((bake_filter & BL::BakeSettings::pass_filter_EMIT) != 0);
|
||||
}
|
||||
/* Shadow pass. */
|
||||
else if (strcmp(bake_type, "SHADOW") == 0) {
|
||||
type = PASS_SHADOW;
|
||||
use_direct_light = true;
|
||||
}
|
||||
/* Light component passes. */
|
||||
else if (strcmp(bake_type, "DIFFUSE") == 0) {
|
||||
if ((bake_filter & BL::BakeSettings::pass_filter_DIRECT) &&
|
||||
|
@@ -61,12 +61,6 @@ static DisplacementMethod get_displacement_method(PointerRNA &ptr)
|
||||
ptr, "displacement_method", DISPLACE_NUM_METHODS, DISPLACE_BUMP);
|
||||
}
|
||||
|
||||
static EmissionSampling get_emission_sampling(PointerRNA &ptr)
|
||||
{
|
||||
return (EmissionSampling)get_enum(
|
||||
ptr, "emission_sampling", EMISSION_SAMPLING_NUM, EMISSION_SAMPLING_AUTO);
|
||||
}
|
||||
|
||||
static int validate_enum_value(int value, int num_values, int default_value)
|
||||
{
|
||||
if (value >= num_values) {
|
||||
@@ -1565,7 +1559,7 @@ void BlenderSync::sync_materials(BL::Depsgraph &b_depsgraph, bool update_all)
|
||||
|
||||
/* settings */
|
||||
PointerRNA cmat = RNA_pointer_get(&b_mat.ptr, "cycles");
|
||||
shader->set_emission_sampling_method(get_emission_sampling(cmat));
|
||||
shader->set_use_mis(get_boolean(cmat, "sample_as_light"));
|
||||
shader->set_use_transparent_shadow(get_boolean(cmat, "use_transparent_shadow"));
|
||||
shader->set_heterogeneous_volume(!get_boolean(cmat, "homogeneous_volume"));
|
||||
shader->set_volume_sampling_method(get_volume_sampling(cmat));
|
||||
|
@@ -26,6 +26,7 @@
|
||||
#include "util/foreach.h"
|
||||
#include "util/hash.h"
|
||||
#include "util/log.h"
|
||||
#include "util/opengl.h"
|
||||
#include "util/openimagedenoise.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
@@ -347,14 +348,7 @@ void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer, bool background)
|
||||
integrator->set_motion_blur(view_layer.use_motion_blur);
|
||||
}
|
||||
|
||||
bool use_light_tree = get_boolean(cscene, "use_light_tree");
|
||||
integrator->set_use_light_tree(use_light_tree);
|
||||
integrator->set_light_sampling_threshold(
|
||||
(use_light_tree) ? 0.0f : get_float(cscene, "light_sampling_threshold"));
|
||||
|
||||
if (integrator->use_light_tree_is_modified()) {
|
||||
scene->light_manager->tag_update(scene, LightManager::UPDATE_ALL);
|
||||
}
|
||||
integrator->set_light_sampling_threshold(get_float(cscene, "light_sampling_threshold"));
|
||||
|
||||
SamplingPattern sampling_pattern = (SamplingPattern)get_enum(
|
||||
cscene, "sampling_pattern", SAMPLING_NUM_PATTERNS, SAMPLING_PATTERN_PMJ);
|
||||
@@ -623,6 +617,7 @@ static bool get_known_pass_type(BL::RenderPass &b_pass, PassType &type, PassMode
|
||||
MAP_PASS("Emit", PASS_EMISSION, false);
|
||||
MAP_PASS("Env", PASS_BACKGROUND, false);
|
||||
MAP_PASS("AO", PASS_AO, false);
|
||||
MAP_PASS("Shadow", PASS_SHADOW, false);
|
||||
|
||||
MAP_PASS("BakePrimitive", PASS_BAKE_PRIMITIVE, false);
|
||||
MAP_PASS("BakeDifferential", PASS_BAKE_DIFFERENTIAL, false);
|
||||
|
@@ -1,6 +1,584 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# Copyright 2011-2022 Blender Foundation
|
||||
|
||||
###########################################################################
|
||||
# Helper macros
|
||||
###########################################################################
|
||||
|
||||
macro(_set_default variable value)
|
||||
if(NOT ${variable})
|
||||
set(${variable} ${value})
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
###########################################################################
|
||||
# Precompiled libraries detection
|
||||
#
|
||||
# Use precompiled libraries from Blender repository
|
||||
###########################################################################
|
||||
|
||||
if(CYCLES_STANDALONE_REPOSITORY)
|
||||
if(APPLE)
|
||||
if("${CMAKE_OSX_ARCHITECTURES}" STREQUAL "x86_64")
|
||||
set(_cycles_lib_dir "${CMAKE_SOURCE_DIR}/../lib/darwin")
|
||||
else()
|
||||
set(_cycles_lib_dir "${CMAKE_SOURCE_DIR}/../lib/darwin_arm64")
|
||||
endif()
|
||||
|
||||
# Always use system zlib
|
||||
find_package(ZLIB REQUIRED)
|
||||
elseif(WIN32)
|
||||
if(CMAKE_CL_64)
|
||||
set(_cycles_lib_dir "${CMAKE_SOURCE_DIR}/../lib/win64_vc15")
|
||||
else()
|
||||
message(FATAL_ERROR "Unsupported Visual Studio Version")
|
||||
endif()
|
||||
else()
|
||||
# Path to a locally compiled libraries.
|
||||
set(LIBDIR_NAME ${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR})
|
||||
string(TOLOWER ${LIBDIR_NAME} LIBDIR_NAME)
|
||||
set(LIBDIR_NATIVE_ABI ${CMAKE_SOURCE_DIR}/../lib/${LIBDIR_NAME})
|
||||
|
||||
# Path to precompiled libraries with known CentOS 7 ABI.
|
||||
set(LIBDIR_CENTOS7_ABI ${CMAKE_SOURCE_DIR}/../lib/linux_centos7_x86_64)
|
||||
|
||||
# Choose the best suitable libraries.
|
||||
if(EXISTS ${LIBDIR_NATIVE_ABI})
|
||||
set(_cycles_lib_dir ${LIBDIR_NATIVE_ABI})
|
||||
elseif(EXISTS ${LIBDIR_CENTOS7_ABI})
|
||||
set(_cycles_lib_dir ${LIBDIR_CENTOS7_ABI})
|
||||
set(WITH_CXX11_ABI OFF)
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCC AND
|
||||
CMAKE_C_COMPILER_VERSION VERSION_LESS 9.3)
|
||||
message(FATAL_ERROR "GCC version must be at least 9.3 for precompiled libraries, found ${CMAKE_C_COMPILER_VERSION}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(DEFINED _cycles_lib_dir)
|
||||
message(STATUS "Using precompiled libraries at ${_cycles_lib_dir}")
|
||||
endif()
|
||||
|
||||
# Avoid namespace pollustion.
|
||||
unset(LIBDIR_NATIVE_ABI)
|
||||
unset(LIBDIR_CENTOS7_ABI)
|
||||
endif()
|
||||
|
||||
if(EXISTS ${_cycles_lib_dir})
|
||||
_set_default(ALEMBIC_ROOT_DIR "${_cycles_lib_dir}/alembic")
|
||||
_set_default(BOOST_ROOT "${_cycles_lib_dir}/boost")
|
||||
_set_default(BLOSC_ROOT_DIR "${_cycles_lib_dir}/blosc")
|
||||
_set_default(EMBREE_ROOT_DIR "${_cycles_lib_dir}/embree")
|
||||
_set_default(EPOXY_ROOT_DIR "${_cycles_lib_dir}/epoxy")
|
||||
_set_default(IMATH_ROOT_DIR "${_cycles_lib_dir}/imath")
|
||||
_set_default(GLEW_ROOT_DIR "${_cycles_lib_dir}/glew")
|
||||
_set_default(JPEG_ROOT "${_cycles_lib_dir}/jpeg")
|
||||
_set_default(LLVM_ROOT_DIR "${_cycles_lib_dir}/llvm")
|
||||
_set_default(CLANG_ROOT_DIR "${_cycles_lib_dir}/llvm")
|
||||
_set_default(NANOVDB_ROOT_DIR "${_cycles_lib_dir}/openvdb")
|
||||
_set_default(OPENCOLORIO_ROOT_DIR "${_cycles_lib_dir}/opencolorio")
|
||||
_set_default(OPENEXR_ROOT_DIR "${_cycles_lib_dir}/openexr")
|
||||
_set_default(OPENIMAGEDENOISE_ROOT_DIR "${_cycles_lib_dir}/openimagedenoise")
|
||||
_set_default(OPENIMAGEIO_ROOT_DIR "${_cycles_lib_dir}/openimageio")
|
||||
_set_default(OPENJPEG_ROOT_DIR "${_cycles_lib_dir}/openjpeg")
|
||||
_set_default(OPENSUBDIV_ROOT_DIR "${_cycles_lib_dir}/opensubdiv")
|
||||
_set_default(OPENVDB_ROOT_DIR "${_cycles_lib_dir}/openvdb")
|
||||
_set_default(OSL_ROOT_DIR "${_cycles_lib_dir}/osl")
|
||||
_set_default(PNG_ROOT "${_cycles_lib_dir}/png")
|
||||
_set_default(PUGIXML_ROOT_DIR "${_cycles_lib_dir}/pugixml")
|
||||
_set_default(SDL2_ROOT_DIR "${_cycles_lib_dir}/sdl")
|
||||
_set_default(TBB_ROOT_DIR "${_cycles_lib_dir}/tbb")
|
||||
_set_default(TIFF_ROOT "${_cycles_lib_dir}/tiff")
|
||||
_set_default(USD_ROOT_DIR "${_cycles_lib_dir}/usd")
|
||||
_set_default(WEBP_ROOT_DIR "${_cycles_lib_dir}/webp")
|
||||
_set_default(ZLIB_ROOT "${_cycles_lib_dir}/zlib")
|
||||
if(WIN32)
|
||||
set(LEVEL_ZERO_ROOT_DIR ${_cycles_lib_dir}/level_zero)
|
||||
else()
|
||||
set(LEVEL_ZERO_ROOT_DIR ${_cycles_lib_dir}/level-zero)
|
||||
endif()
|
||||
_set_default(SYCL_ROOT_DIR "${_cycles_lib_dir}/dpcpp")
|
||||
|
||||
# Ignore system libraries
|
||||
set(CMAKE_IGNORE_PATH "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES};${CMAKE_SYSTEM_INCLUDE_PATH};${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES};${CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES}")
|
||||
else()
|
||||
unset(_cycles_lib_dir)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
###########################################################################
|
||||
# Zlib
|
||||
###########################################################################
|
||||
|
||||
if(CYCLES_STANDALONE_REPOSITORY)
|
||||
if(MSVC AND EXISTS ${_cycles_lib_dir})
|
||||
set(ZLIB_INCLUDE_DIRS ${_cycles_lib_dir}/zlib/include)
|
||||
set(ZLIB_LIBRARIES ${_cycles_lib_dir}/zlib/lib/libz_st.lib)
|
||||
set(ZLIB_INCLUDE_DIR ${_cycles_lib_dir}/zlib/include)
|
||||
set(ZLIB_LIBRARY ${_cycles_lib_dir}/zlib/lib/libz_st.lib)
|
||||
set(ZLIB_DIR ${_cycles_lib_dir}/zlib)
|
||||
set(ZLIB_FOUND ON)
|
||||
elseif(NOT APPLE)
|
||||
find_package(ZLIB REQUIRED)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
###########################################################################
|
||||
# PThreads
|
||||
###########################################################################
|
||||
|
||||
if(CYCLES_STANDALONE_REPOSITORY)
|
||||
if(MSVC AND EXISTS ${_cycles_lib_dir})
|
||||
set(PTHREADS_LIBRARIES "${_cycles_lib_dir}/pthreads/lib/pthreadVC3.lib")
|
||||
include_directories("${_cycles_lib_dir}/pthreads/include")
|
||||
else()
|
||||
set(CMAKE_THREAD_PREFER_PTHREAD TRUE)
|
||||
find_package(Threads REQUIRED)
|
||||
set(PTHREADS_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
###########################################################################
|
||||
# OpenImageIO and image libraries
|
||||
###########################################################################
|
||||
|
||||
if(CYCLES_STANDALONE_REPOSITORY)
|
||||
if(MSVC AND EXISTS ${_cycles_lib_dir})
|
||||
add_definitions(
|
||||
# OIIO changed the name of this define in newer versions
|
||||
# we define both, so it would work with both old and new
|
||||
# versions.
|
||||
-DOIIO_STATIC_BUILD
|
||||
-DOIIO_STATIC_DEFINE
|
||||
)
|
||||
|
||||
set(OPENIMAGEIO_INCLUDE_DIR ${OPENIMAGEIO_ROOT_DIR}/include)
|
||||
set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO_INCLUDE_DIR} ${OPENIMAGEIO_INCLUDE_DIR}/OpenImageIO)
|
||||
# Special exceptions for libraries which needs explicit debug version
|
||||
set(OPENIMAGEIO_LIBRARIES
|
||||
optimized ${OPENIMAGEIO_ROOT_DIR}/lib/OpenImageIO.lib
|
||||
optimized ${OPENIMAGEIO_ROOT_DIR}/lib/OpenImageIO_Util.lib
|
||||
debug ${OPENIMAGEIO_ROOT_DIR}/lib/OpenImageIO_d.lib
|
||||
debug ${OPENIMAGEIO_ROOT_DIR}/lib/OpenImageIO_Util_d.lib
|
||||
)
|
||||
|
||||
set(PUGIXML_INCLUDE_DIR ${PUGIXML_ROOT_DIR}/include)
|
||||
set(PUGIXML_LIBRARIES
|
||||
optimized ${PUGIXML_ROOT_DIR}/lib/pugixml.lib
|
||||
debug ${PUGIXML_ROOT_DIR}/lib/pugixml_d.lib
|
||||
)
|
||||
else()
|
||||
find_package(OpenImageIO REQUIRED)
|
||||
if(OPENIMAGEIO_PUGIXML_FOUND)
|
||||
set(PUGIXML_INCLUDE_DIR "${OPENIMAGEIO_INCLUDE_DIR}/OpenImageIO")
|
||||
set(PUGIXML_LIBRARIES "")
|
||||
else()
|
||||
find_package(PugiXML REQUIRED)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Dependencies
|
||||
if(MSVC AND EXISTS ${_cycles_lib_dir})
|
||||
set(OPENJPEG_INCLUDE_DIR ${OPENJPEG}/include/openjpeg-2.3)
|
||||
set(OPENJPEG_LIBRARIES ${_cycles_lib_dir}/openjpeg/lib/openjp2${CMAKE_STATIC_LIBRARY_SUFFIX})
|
||||
else()
|
||||
find_package(OpenJPEG REQUIRED)
|
||||
endif()
|
||||
|
||||
find_package(JPEG REQUIRED)
|
||||
find_package(TIFF REQUIRED)
|
||||
find_package(WebP)
|
||||
|
||||
if(EXISTS ${_cycles_lib_dir})
|
||||
set(PNG_NAMES png16 libpng16 png libpng)
|
||||
endif()
|
||||
find_package(PNG REQUIRED)
|
||||
endif()
|
||||
|
||||
###########################################################################
|
||||
# OpenEXR
|
||||
###########################################################################
|
||||
|
||||
if(CYCLES_STANDALONE_REPOSITORY)
|
||||
if(MSVC AND EXISTS ${_cycles_lib_dir})
|
||||
set(OPENEXR_INCLUDE_DIR ${OPENEXR_ROOT_DIR}/include)
|
||||
set(OPENEXR_INCLUDE_DIRS ${OPENEXR_INCLUDE_DIR} ${OPENEXR_ROOT_DIR}/include/OpenEXR ${IMATH_ROOT_DIR}/include ${IMATH_ROOT_DIR}/include/Imath)
|
||||
set(OPENEXR_LIBRARIES
|
||||
optimized ${OPENEXR_ROOT_DIR}/lib/OpenEXR_s.lib
|
||||
optimized ${OPENEXR_ROOT_DIR}/lib/OpenEXRCore_s.lib
|
||||
optimized ${OPENEXR_ROOT_DIR}/lib/Iex_s.lib
|
||||
optimized ${IMATH_ROOT_DIR}/lib/Imath_s.lib
|
||||
optimized ${OPENEXR_ROOT_DIR}/lib/IlmThread_s.lib
|
||||
debug ${OPENEXR_ROOT_DIR}/lib/OpenEXR_s_d.lib
|
||||
debug ${OPENEXR_ROOT_DIR}/lib/OpenEXRCore_s_d.lib
|
||||
debug ${OPENEXR_ROOT_DIR}/lib/Iex_s_d.lib
|
||||
debug ${IMATH_ROOT_DIR}/lib/Imath_s_d.lib
|
||||
debug ${OPENEXR_ROOT_DIR}/lib/IlmThread_s_d.lib
|
||||
)
|
||||
else()
|
||||
find_package(OpenEXR REQUIRED)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
###########################################################################
|
||||
# OpenShadingLanguage & LLVM
|
||||
###########################################################################
|
||||
|
||||
if(CYCLES_STANDALONE_REPOSITORY AND WITH_CYCLES_OSL)
|
||||
if(EXISTS ${_cycles_lib_dir})
|
||||
set(LLVM_STATIC ON)
|
||||
endif()
|
||||
|
||||
if(MSVC AND EXISTS ${_cycles_lib_dir})
|
||||
# TODO(sergey): On Windows llvm-config doesn't give proper results for the
|
||||
# library names, use hardcoded libraries for now.
|
||||
file(GLOB _llvm_libs_release ${LLVM_ROOT_DIR}/lib/*.lib)
|
||||
file(GLOB _llvm_libs_debug ${LLVM_ROOT_DIR}/debug/lib/*.lib)
|
||||
set(_llvm_libs)
|
||||
foreach(_llvm_lib_path ${_llvm_libs_release})
|
||||
get_filename_component(_llvm_lib_name ${_llvm_lib_path} ABSOLUTE)
|
||||
list(APPEND _llvm_libs optimized ${_llvm_lib_name})
|
||||
endforeach()
|
||||
foreach(_llvm_lib_path ${_llvm_libs_debug})
|
||||
get_filename_component(_llvm_lib_name ${_llvm_lib_path} ABSOLUTE)
|
||||
list(APPEND _llvm_libs debug ${_llvm_lib_name})
|
||||
endforeach()
|
||||
set(LLVM_LIBRARY ${_llvm_libs})
|
||||
unset(_llvm_lib_name)
|
||||
unset(_llvm_lib_path)
|
||||
unset(_llvm_libs)
|
||||
unset(_llvm_libs_debug)
|
||||
unset(_llvm_libs_release)
|
||||
|
||||
set(OSL_INCLUDE_DIR ${OSL_ROOT_DIR}/include)
|
||||
set(OSL_LIBRARIES
|
||||
optimized ${OSL_ROOT_DIR}/lib/oslcomp.lib
|
||||
optimized ${OSL_ROOT_DIR}/lib/oslexec.lib
|
||||
optimized ${OSL_ROOT_DIR}/lib/oslquery.lib
|
||||
debug ${OSL_ROOT_DIR}/lib/oslcomp_d.lib
|
||||
debug ${OSL_ROOT_DIR}/lib/oslexec_d.lib
|
||||
debug ${OSL_ROOT_DIR}/lib/oslquery_d.lib
|
||||
${PUGIXML_LIBRARIES}
|
||||
)
|
||||
|
||||
find_program(OSL_COMPILER NAMES oslc PATHS ${OSL_ROOT_DIR}/bin)
|
||||
else()
|
||||
find_package(OSL REQUIRED)
|
||||
find_package(LLVM REQUIRED)
|
||||
find_package(Clang REQUIRED)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
###########################################################################
|
||||
# OpenPGL
|
||||
###########################################################################
|
||||
|
||||
if(CYCLES_STANDALONE_REPOSITORY AND WITH_CYCLES_PATH_GUIDING)
|
||||
if(NOT openpgl_DIR AND EXISTS ${_cycles_lib_dir})
|
||||
set(openpgl_DIR ${_cycles_lib_dir}/openpgl/lib/cmake/openpgl)
|
||||
endif()
|
||||
|
||||
find_package(openpgl QUIET)
|
||||
if(openpgl_FOUND)
|
||||
if(WIN32)
|
||||
get_target_property(OPENPGL_LIBRARIES_RELEASE openpgl::openpgl LOCATION_RELEASE)
|
||||
get_target_property(OPENPGL_LIBRARIES_DEBUG openpgl::openpgl LOCATION_DEBUG)
|
||||
set(OPENPGL_LIBRARIES optimized ${OPENPGL_LIBRARIES_RELEASE} debug ${OPENPGL_LIBRARIES_DEBUG})
|
||||
else()
|
||||
get_target_property(OPENPGL_LIBRARIES openpgl::openpgl LOCATION)
|
||||
endif()
|
||||
get_target_property(OPENPGL_INCLUDE_DIR openpgl::openpgl INTERFACE_INCLUDE_DIRECTORIES)
|
||||
else()
|
||||
set_and_warn_library_found("OpenPGL" openpgl_FOUND WITH_CYCLES_PATH_GUIDING)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
###########################################################################
|
||||
# OpenColorIO
|
||||
###########################################################################
|
||||
|
||||
if(CYCLES_STANDALONE_REPOSITORY AND WITH_CYCLES_OPENCOLORIO)
|
||||
set(WITH_OPENCOLORIO ON)
|
||||
|
||||
if(NOT USD_OVERRIDE_OPENCOLORIO)
|
||||
if(MSVC AND EXISTS ${_cycles_lib_dir})
|
||||
set(OPENCOLORIO_INCLUDE_DIRS ${OPENCOLORIO_ROOT_DIR}/include)
|
||||
set(OPENCOLORIO_LIBRARIES
|
||||
optimized ${OPENCOLORIO_ROOT_DIR}/lib/OpenColorIO.lib
|
||||
optimized ${OPENCOLORIO_ROOT_DIR}/lib/libyaml-cpp.lib
|
||||
optimized ${OPENCOLORIO_ROOT_DIR}/lib/libexpatMD.lib
|
||||
optimized ${OPENCOLORIO_ROOT_DIR}/lib/pystring.lib
|
||||
debug ${OPENCOLORIO_ROOT_DIR}/lib/OpencolorIO_d.lib
|
||||
debug ${OPENCOLORIO_ROOT_DIR}/lib/libyaml-cpp_d.lib
|
||||
debug ${OPENCOLORIO_ROOT_DIR}/lib/libexpatdMD.lib
|
||||
debug ${OPENCOLORIO_ROOT_DIR}/lib/pystring_d.lib
|
||||
)
|
||||
else()
|
||||
find_package(OpenColorIO REQUIRED)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
###########################################################################
|
||||
# Boost
|
||||
###########################################################################
|
||||
|
||||
if(CYCLES_STANDALONE_REPOSITORY)
|
||||
if(EXISTS ${_cycles_lib_dir})
|
||||
if(MSVC)
|
||||
set(Boost_USE_STATIC_RUNTIME OFF)
|
||||
set(Boost_USE_MULTITHREADED ON)
|
||||
set(Boost_USE_STATIC_LIBS ON)
|
||||
else()
|
||||
set(BOOST_LIBRARYDIR ${_cycles_lib_dir}/boost/lib)
|
||||
set(Boost_NO_BOOST_CMAKE ON)
|
||||
set(Boost_NO_SYSTEM_PATHS ON)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(MSVC AND EXISTS ${_cycles_lib_dir})
|
||||
set(BOOST_INCLUDE_DIR ${BOOST_ROOT}/include)
|
||||
set(BOOST_VERSION_HEADER ${BOOST_INCLUDE_DIR}/boost/version.hpp)
|
||||
if(EXISTS ${BOOST_VERSION_HEADER})
|
||||
file(STRINGS "${BOOST_VERSION_HEADER}" BOOST_LIB_VERSION REGEX "#define BOOST_LIB_VERSION ")
|
||||
if(BOOST_LIB_VERSION MATCHES "#define BOOST_LIB_VERSION \"([0-9_]+)\"")
|
||||
set(BOOST_VERSION "${CMAKE_MATCH_1}")
|
||||
endif()
|
||||
endif()
|
||||
if(NOT BOOST_VERSION)
|
||||
message(FATAL_ERROR "Unable to determine Boost version")
|
||||
endif()
|
||||
set(BOOST_POSTFIX "vc142-mt-x64-${BOOST_VERSION}.lib")
|
||||
set(BOOST_DEBUG_POSTFIX "vc142-mt-gd-x64-${BOOST_VERSION}.lib")
|
||||
set(BOOST_LIBRARIES
|
||||
optimized ${BOOST_ROOT}/lib/libboost_date_time-${BOOST_POSTFIX}
|
||||
optimized ${BOOST_ROOT}/lib/libboost_iostreams-${BOOST_POSTFIX}
|
||||
optimized ${BOOST_ROOT}/lib/libboost_filesystem-${BOOST_POSTFIX}
|
||||
optimized ${BOOST_ROOT}/lib/libboost_regex-${BOOST_POSTFIX}
|
||||
optimized ${BOOST_ROOT}/lib/libboost_system-${BOOST_POSTFIX}
|
||||
optimized ${BOOST_ROOT}/lib/libboost_thread-${BOOST_POSTFIX}
|
||||
optimized ${BOOST_ROOT}/lib/libboost_chrono-${BOOST_POSTFIX}
|
||||
debug ${BOOST_ROOT}/lib/libboost_date_time-${BOOST_DEBUG_POSTFIX}
|
||||
debug ${BOOST_ROOT}/lib/libboost_iostreams-${BOOST_DEBUG_POSTFIX}
|
||||
debug ${BOOST_ROOT}/lib/libboost_filesystem-${BOOST_DEBUG_POSTFIX}
|
||||
debug ${BOOST_ROOT}/lib/libboost_regex-${BOOST_DEBUG_POSTFIX}
|
||||
debug ${BOOST_ROOT}/lib/libboost_system-${BOOST_DEBUG_POSTFIX}
|
||||
debug ${BOOST_ROOT}/lib/libboost_thread-${BOOST_DEBUG_POSTFIX}
|
||||
debug ${BOOST_ROOT}/lib/libboost_chrono-${BOOST_DEBUG_POSTFIX}
|
||||
)
|
||||
if(WITH_CYCLES_OSL)
|
||||
set(BOOST_LIBRARIES ${BOOST_LIBRARIES}
|
||||
optimized ${BOOST_ROOT}/lib/libboost_wave-${BOOST_POSTFIX}
|
||||
debug ${BOOST_ROOT}/lib/libboost_wave-${BOOST_DEBUG_POSTFIX})
|
||||
endif()
|
||||
else()
|
||||
set(__boost_packages iostreams filesystem regex system thread date_time)
|
||||
if(WITH_CYCLES_OSL)
|
||||
list(APPEND __boost_packages wave)
|
||||
endif()
|
||||
find_package(Boost 1.48 COMPONENTS ${__boost_packages} REQUIRED)
|
||||
if(NOT Boost_FOUND)
|
||||
# Try to find non-multithreaded if -mt not found, this flag
|
||||
# doesn't matter for us, it has nothing to do with thread
|
||||
# safety, but keep it to not disturb build setups.
|
||||
set(Boost_USE_MULTITHREADED OFF)
|
||||
find_package(Boost 1.48 COMPONENTS ${__boost_packages})
|
||||
endif()
|
||||
unset(__boost_packages)
|
||||
|
||||
set(BOOST_INCLUDE_DIR ${Boost_INCLUDE_DIRS})
|
||||
set(BOOST_LIBRARIES ${Boost_LIBRARIES})
|
||||
set(BOOST_LIBPATH ${Boost_LIBRARY_DIRS})
|
||||
endif()
|
||||
|
||||
set(BOOST_DEFINITIONS "-DBOOST_ALL_NO_LIB ${BOOST_DEFINITIONS}")
|
||||
endif()
|
||||
|
||||
###########################################################################
|
||||
# Embree
|
||||
###########################################################################
|
||||
|
||||
if(CYCLES_STANDALONE_REPOSITORY AND WITH_CYCLES_EMBREE)
|
||||
if(MSVC AND EXISTS ${_cycles_lib_dir})
|
||||
set(EMBREE_INCLUDE_DIRS ${EMBREE_ROOT_DIR}/include)
|
||||
set(EMBREE_LIBRARIES
|
||||
optimized ${EMBREE_ROOT_DIR}/lib/embree3.lib
|
||||
optimized ${EMBREE_ROOT_DIR}/lib/embree_avx2.lib
|
||||
optimized ${EMBREE_ROOT_DIR}/lib/embree_avx.lib
|
||||
optimized ${EMBREE_ROOT_DIR}/lib/embree_sse42.lib
|
||||
optimized ${EMBREE_ROOT_DIR}/lib/lexers.lib
|
||||
optimized ${EMBREE_ROOT_DIR}/lib/math.lib
|
||||
optimized ${EMBREE_ROOT_DIR}/lib/simd.lib
|
||||
optimized ${EMBREE_ROOT_DIR}/lib/tasking.lib
|
||||
optimized ${EMBREE_ROOT_DIR}/lib/sys.lib
|
||||
debug ${EMBREE_ROOT_DIR}/lib/embree3_d.lib
|
||||
debug ${EMBREE_ROOT_DIR}/lib/embree_avx2_d.lib
|
||||
debug ${EMBREE_ROOT_DIR}/lib/embree_avx_d.lib
|
||||
debug ${EMBREE_ROOT_DIR}/lib/embree_sse42_d.lib
|
||||
debug ${EMBREE_ROOT_DIR}/lib/lexers_d.lib
|
||||
debug ${EMBREE_ROOT_DIR}/lib/math_d.lib
|
||||
debug ${EMBREE_ROOT_DIR}/lib/simd_d.lib
|
||||
debug ${EMBREE_ROOT_DIR}/lib/sys_d.lib
|
||||
debug ${EMBREE_ROOT_DIR}/lib/tasking_d.lib
|
||||
)
|
||||
else()
|
||||
find_package(Embree 3.8.0 REQUIRED)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
###########################################################################
|
||||
# Logging
|
||||
###########################################################################
|
||||
|
||||
if(CYCLES_STANDALONE_REPOSITORY AND WITH_CYCLES_LOGGING)
|
||||
find_package(Glog REQUIRED)
|
||||
find_package(Gflags REQUIRED)
|
||||
endif()
|
||||
|
||||
###########################################################################
|
||||
# OpenSubdiv
|
||||
###########################################################################
|
||||
|
||||
if(CYCLES_STANDALONE_REPOSITORY AND WITH_CYCLES_OPENSUBDIV)
|
||||
set(WITH_OPENSUBDIV ON)
|
||||
|
||||
if(NOT USD_OVERRIDE_OPENSUBDIV)
|
||||
if(MSVC AND EXISTS ${_cycles_lib_dir})
|
||||
set(OPENSUBDIV_INCLUDE_DIRS ${OPENSUBDIV_ROOT_DIR}/include)
|
||||
set(OPENSUBDIV_LIBRARIES
|
||||
optimized ${OPENSUBDIV_ROOT_DIR}/lib/osdCPU.lib
|
||||
optimized ${OPENSUBDIV_ROOT_DIR}/lib/osdGPU.lib
|
||||
debug ${OPENSUBDIV_ROOT_DIR}/lib/osdCPU_d.lib
|
||||
debug ${OPENSUBDIV_ROOT_DIR}/lib/osdGPU_d.lib
|
||||
)
|
||||
else()
|
||||
find_package(OpenSubdiv REQUIRED)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
###########################################################################
|
||||
# OpenVDB
|
||||
###########################################################################
|
||||
|
||||
if(CYCLES_STANDALONE_REPOSITORY AND WITH_CYCLES_OPENVDB)
|
||||
set(WITH_OPENVDB ON)
|
||||
set(OPENVDB_DEFINITIONS -DNOMINMAX -D_USE_MATH_DEFINES)
|
||||
|
||||
if(NOT USD_OVERRIDE_OPENVDB)
|
||||
find_package(OpenVDB REQUIRED)
|
||||
|
||||
if(MSVC AND EXISTS ${_cycles_lib_dir})
|
||||
set(BLOSC_LIBRARY
|
||||
optimized ${BLOSC_ROOT_DIR}/lib/libblosc.lib
|
||||
debug ${BLOSC_ROOT_DIR}/lib/libblosc_d.lib
|
||||
)
|
||||
else()
|
||||
find_package(Blosc REQUIRED)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
###########################################################################
|
||||
# NanoVDB
|
||||
###########################################################################
|
||||
|
||||
if(CYCLES_STANDALONE_REPOSITORY AND WITH_CYCLES_NANOVDB)
|
||||
set(WITH_NANOVDB ON)
|
||||
|
||||
if(MSVC AND EXISTS ${_cycles_lib_dir})
|
||||
set(NANOVDB_INCLUDE_DIR ${NANOVDB_ROOT_DIR}/include)
|
||||
set(NANOVDB_INCLUDE_DIRS ${NANOVDB_INCLUDE_DIR})
|
||||
else()
|
||||
find_package(NanoVDB REQUIRED)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
###########################################################################
|
||||
# OpenImageDenoise
|
||||
###########################################################################
|
||||
|
||||
if(CYCLES_STANDALONE_REPOSITORY AND WITH_CYCLES_OPENIMAGEDENOISE)
|
||||
set(WITH_OPENIMAGEDENOISE ON)
|
||||
|
||||
if(MSVC AND EXISTS ${_cycles_lib_dir})
|
||||
set(OPENIMAGEDENOISE_INCLUDE_DIRS ${OPENIMAGEDENOISE_ROOT_DIR}/include)
|
||||
set(OPENIMAGEDENOISE_LIBRARIES
|
||||
optimized ${OPENIMAGEDENOISE_ROOT_DIR}/lib/OpenImageDenoise.lib
|
||||
optimized ${OPENIMAGEDENOISE_ROOT_DIR}/lib/common.lib
|
||||
optimized ${OPENIMAGEDENOISE_ROOT_DIR}/lib/dnnl.lib
|
||||
debug ${OPENIMAGEDENOISE_ROOT_DIR}/lib/OpenImageDenoise_d.lib
|
||||
debug ${OPENIMAGEDENOISE_ROOT_DIR}/lib/common_d.lib
|
||||
debug ${OPENIMAGEDENOISE_ROOT_DIR}/lib/dnnl_d.lib
|
||||
)
|
||||
else()
|
||||
find_package(OpenImageDenoise REQUIRED)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
###########################################################################
|
||||
# TBB
|
||||
###########################################################################
|
||||
|
||||
if(CYCLES_STANDALONE_REPOSITORY)
|
||||
if(NOT USD_OVERRIDE_TBB)
|
||||
if(MSVC AND EXISTS ${_cycles_lib_dir})
|
||||
set(TBB_INCLUDE_DIRS ${TBB_ROOT_DIR}/include)
|
||||
set(TBB_LIBRARIES
|
||||
optimized ${TBB_ROOT_DIR}/lib/tbb.lib
|
||||
debug ${TBB_ROOT_DIR}/lib/tbb_debug.lib
|
||||
)
|
||||
else()
|
||||
find_package(TBB REQUIRED)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
###########################################################################
|
||||
# Epoxy
|
||||
###########################################################################
|
||||
|
||||
if(CYCLES_STANDALONE_REPOSITORY)
|
||||
if((WITH_CYCLES_STANDALONE AND WITH_CYCLES_STANDALONE_GUI) OR
|
||||
WITH_CYCLES_HYDRA_RENDER_DELEGATE)
|
||||
if(MSVC AND EXISTS ${_cycles_lib_dir})
|
||||
set(Epoxy_LIBRARIES "${_cycles_lib_dir}/epoxy/lib/epoxy.lib")
|
||||
set(Epoxy_INCLUDE_DIRS "${_cycles_lib_dir}/epoxy/include")
|
||||
else()
|
||||
find_package(Epoxy REQUIRED)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
###########################################################################
|
||||
# Alembic
|
||||
###########################################################################
|
||||
|
||||
if(WITH_CYCLES_ALEMBIC)
|
||||
if(CYCLES_STANDALONE_REPOSITORY)
|
||||
if(MSVC AND EXISTS ${_cycles_lib_dir})
|
||||
set(ALEMBIC_INCLUDE_DIRS ${_cycles_lib_dir}/alembic/include)
|
||||
set(ALEMBIC_LIBRARIES
|
||||
optimized ${_cycles_lib_dir}/alembic/lib/Alembic.lib
|
||||
debug ${_cycles_lib_dir}/alembic/lib/Alembic_d.lib)
|
||||
else()
|
||||
find_package(Alembic REQUIRED)
|
||||
endif()
|
||||
|
||||
set(WITH_ALEMBIC ON)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
###########################################################################
|
||||
# System Libraries
|
||||
###########################################################################
|
||||
|
||||
# Detect system libraries again
|
||||
if(EXISTS ${_cycles_lib_dir})
|
||||
unset(CMAKE_IGNORE_PATH)
|
||||
unset(_cycles_lib_dir)
|
||||
endif()
|
||||
|
||||
###########################################################################
|
||||
# SDL
|
||||
###########################################################################
|
||||
@@ -109,3 +687,5 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
|
||||
set(WITH_CYCLES_DEVICE_ONEAPI OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
unset(_cycles_lib_dir)
|
||||
|
@@ -8,13 +8,28 @@ set(INC
|
||||
set(INC_SYS )
|
||||
|
||||
if(WITH_CYCLES_DEVICE_OPTIX OR WITH_CYCLES_DEVICE_CUDA)
|
||||
if(NOT WITH_CUDA_DYNLOAD)
|
||||
if(WITH_CUDA_DYNLOAD)
|
||||
list(APPEND INC
|
||||
../../../extern/cuew/include
|
||||
)
|
||||
add_definitions(-DWITH_CUDA_DYNLOAD)
|
||||
else()
|
||||
list(APPEND INC_SYS
|
||||
${CUDA_TOOLKIT_INCLUDE}
|
||||
)
|
||||
add_definitions(-DCYCLES_CUDA_NVCC_EXECUTABLE="${CUDA_NVCC_EXECUTABLE}")
|
||||
endif()
|
||||
|
||||
add_definitions(-DCYCLES_RUNTIME_OPTIX_ROOT_DIR="${CYCLES_RUNTIME_OPTIX_ROOT_DIR}")
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
|
||||
list(APPEND INC
|
||||
../../../extern/hipew/include
|
||||
)
|
||||
add_definitions(-DWITH_HIP_DYNLOAD)
|
||||
endif()
|
||||
|
||||
set(SRC_BASE
|
||||
device.cpp
|
||||
denoise.cpp
|
||||
@@ -153,15 +168,24 @@ if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
|
||||
)
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEVICE_CUDA)
|
||||
add_definitions(-DWITH_CUDA)
|
||||
endif()
|
||||
if(WITH_CYCLES_DEVICE_HIP)
|
||||
add_definitions(-DWITH_HIP)
|
||||
endif()
|
||||
if(WITH_CYCLES_DEVICE_OPTIX)
|
||||
add_definitions(-DWITH_OPTIX)
|
||||
endif()
|
||||
if(WITH_CYCLES_DEVICE_METAL)
|
||||
list(APPEND LIB
|
||||
${METAL_LIBRARY}
|
||||
)
|
||||
add_definitions(-DWITH_METAL)
|
||||
list(APPEND SRC
|
||||
${SRC_METAL}
|
||||
)
|
||||
endif()
|
||||
|
||||
if (WITH_CYCLES_DEVICE_ONEAPI)
|
||||
if(WITH_CYCLES_ONEAPI_BINARIES)
|
||||
set(cycles_kernel_oneapi_lib_suffix "_aot")
|
||||
@@ -179,6 +203,7 @@ if (WITH_CYCLES_DEVICE_ONEAPI)
|
||||
else()
|
||||
list(APPEND LIB ${SYCL_LIBRARY})
|
||||
endif()
|
||||
add_definitions(-DWITH_ONEAPI)
|
||||
list(APPEND SRC
|
||||
${SRC_ONEAPI}
|
||||
)
|
||||
|
@@ -232,7 +232,7 @@ string CUDADevice::compile_kernel_get_common_cflags(const uint kernel_features)
|
||||
return cflags;
|
||||
}
|
||||
|
||||
string CUDADevice::compile_kernel(const string &common_cflags,
|
||||
string CUDADevice::compile_kernel(const uint kernel_features,
|
||||
const char *name,
|
||||
const char *base,
|
||||
bool force_ptx)
|
||||
@@ -281,6 +281,7 @@ string CUDADevice::compile_kernel(const string &common_cflags,
|
||||
/* We include cflags into md5 so changing cuda toolkit or changing other
|
||||
* compiler command line arguments makes sure cubin gets re-built.
|
||||
*/
|
||||
string common_cflags = compile_kernel_get_common_cflags(kernel_features);
|
||||
const string kernel_md5 = util_md5_string(source_md5 + common_cflags);
|
||||
|
||||
const char *const kernel_ext = force_ptx ? "ptx" : "cubin";
|
||||
@@ -416,8 +417,7 @@ bool CUDADevice::load_kernels(const uint kernel_features)
|
||||
|
||||
/* get kernel */
|
||||
const char *kernel_name = "kernel";
|
||||
string cflags = compile_kernel_get_common_cflags(kernel_features);
|
||||
string cubin = compile_kernel(cflags, kernel_name);
|
||||
string cubin = compile_kernel(kernel_features, kernel_name);
|
||||
if (cubin.empty())
|
||||
return false;
|
||||
|
||||
|
@@ -77,9 +77,9 @@ class CUDADevice : public Device {
|
||||
|
||||
bool use_adaptive_compilation();
|
||||
|
||||
string compile_kernel_get_common_cflags(const uint kernel_features);
|
||||
virtual string compile_kernel_get_common_cflags(const uint kernel_features);
|
||||
|
||||
string compile_kernel(const string &cflags,
|
||||
string compile_kernel(const uint kernel_features,
|
||||
const char *name,
|
||||
const char *base = "cuda",
|
||||
bool force_ptx = false);
|
||||
|
@@ -38,7 +38,7 @@ class CUDADeviceGraphicsInterop : public DeviceGraphicsInterop {
|
||||
CUDADevice *device_ = nullptr;
|
||||
|
||||
/* OpenGL PBO which is currently registered as the destination for the CUDA buffer. */
|
||||
int64_t opengl_pbo_id_ = 0;
|
||||
uint opengl_pbo_id_ = 0;
|
||||
/* Buffer area in pixels of the corresponding PBO. */
|
||||
int64_t buffer_area_ = 0;
|
||||
|
||||
|
@@ -78,4 +78,24 @@ class DenoiseParams : public Node {
|
||||
}
|
||||
};
|
||||
|
||||
/* All the parameters needed to perform buffer denoising on a device.
|
||||
* Is not really a task in its canonical terms (as in, is not an asynchronous running task). Is
|
||||
* more like a wrapper for all the arguments and parameters needed to perform denoising. Is a
|
||||
* single place where they are all listed, so that it's not required to modify all device methods
|
||||
* when these parameters do change. */
|
||||
class DeviceDenoiseTask {
|
||||
public:
|
||||
DenoiseParams params;
|
||||
|
||||
int num_samples;
|
||||
|
||||
RenderBuffers *render_buffers;
|
||||
BufferParams buffer_params;
|
||||
|
||||
/* Allow to do in-place modification of the input passes (scaling them down i.e.). This will
|
||||
* lower the memory footprint of the denoiser but will make input passes "invalid" (from path
|
||||
* tracer) point of view. */
|
||||
bool allow_inplace_modification;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -351,7 +351,6 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
|
||||
info.num = 0;
|
||||
|
||||
info.has_nanovdb = true;
|
||||
info.has_light_tree = true;
|
||||
info.has_osl = true;
|
||||
info.has_guiding = true;
|
||||
info.has_profiling = true;
|
||||
@@ -400,7 +399,6 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
|
||||
|
||||
/* Accumulate device info. */
|
||||
info.has_nanovdb &= device.has_nanovdb;
|
||||
info.has_light_tree &= device.has_light_tree;
|
||||
info.has_osl &= device.has_osl;
|
||||
info.has_guiding &= device.has_guiding;
|
||||
info.has_profiling &= device.has_profiling;
|
||||
|
@@ -65,7 +65,6 @@ class DeviceInfo {
|
||||
int num;
|
||||
bool display_device; /* GPU is used as a display device. */
|
||||
bool has_nanovdb; /* Support NanoVDB volumes. */
|
||||
bool has_light_tree; /* Support light tree. */
|
||||
bool has_osl; /* Support Open Shading Language. */
|
||||
bool has_guiding; /* Support path guiding. */
|
||||
bool has_profiling; /* Supports runtime collection of profiling info. */
|
||||
@@ -85,7 +84,6 @@ class DeviceInfo {
|
||||
cpu_threads = 0;
|
||||
display_device = false;
|
||||
has_nanovdb = false;
|
||||
has_light_tree = true;
|
||||
has_osl = false;
|
||||
has_guiding = false;
|
||||
has_profiling = false;
|
||||
@@ -162,11 +160,6 @@ class Device {
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool load_osl_kernels()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
/* GPU device only functions.
|
||||
* These may not be used on CPU or multi-devices. */
|
||||
|
||||
@@ -235,6 +228,21 @@ class Device {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/* Buffer denoising. */
|
||||
|
||||
/* Returns true if task is fully handled. */
|
||||
virtual bool denoise_buffer(const DeviceDenoiseTask & /*task*/)
|
||||
{
|
||||
LOG(ERROR) << "Request buffer denoising from a device which does not support it.";
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual DeviceQueue *get_denoise_queue()
|
||||
{
|
||||
LOG(ERROR) << "Request denoising queue from a device which does not support it.";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/* Sub-devices */
|
||||
|
||||
/* Run given callback for every individual device which will be handling rendering.
|
||||
|
@@ -137,7 +137,6 @@ void device_hip_info(vector<DeviceInfo> &devices)
|
||||
info.num = num;
|
||||
|
||||
info.has_nanovdb = true;
|
||||
info.has_light_tree = false;
|
||||
info.denoisers = 0;
|
||||
|
||||
info.has_gpu_queue = true;
|
||||
|
@@ -74,7 +74,7 @@ class HIPDevice : public Device {
|
||||
|
||||
bool use_adaptive_compilation();
|
||||
|
||||
string compile_kernel_get_common_cflags(const uint kernel_features);
|
||||
virtual string compile_kernel_get_common_cflags(const uint kernel_features);
|
||||
|
||||
string compile_kernel(const uint kernel_features, const char *name, const char *base = "hip");
|
||||
|
||||
|
@@ -36,7 +36,7 @@ class HIPDeviceGraphicsInterop : public DeviceGraphicsInterop {
|
||||
HIPDevice *device_ = nullptr;
|
||||
|
||||
/* OpenGL PBO which is currently registered as the destination for the HIP buffer. */
|
||||
int64_t opengl_pbo_id_ = 0;
|
||||
uint opengl_pbo_id_ = 0;
|
||||
/* Buffer area in pixels of the corresponding PBO. */
|
||||
int64_t buffer_area_ = 0;
|
||||
|
||||
|
@@ -7,30 +7,6 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
bool device_kernel_has_shading(DeviceKernel kernel)
|
||||
{
|
||||
return (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW ||
|
||||
kernel == DEVICE_KERNEL_SHADER_EVAL_DISPLACE ||
|
||||
kernel == DEVICE_KERNEL_SHADER_EVAL_BACKGROUND ||
|
||||
kernel == DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY);
|
||||
}
|
||||
|
||||
bool device_kernel_has_intersection(DeviceKernel kernel)
|
||||
{
|
||||
return (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE);
|
||||
}
|
||||
|
||||
const char *device_kernel_as_string(DeviceKernel kernel)
|
||||
{
|
||||
switch (kernel) {
|
||||
|
@@ -11,9 +11,6 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
bool device_kernel_has_shading(DeviceKernel kernel);
|
||||
bool device_kernel_has_intersection(DeviceKernel kernel);
|
||||
|
||||
const char *device_kernel_as_string(DeviceKernel kernel);
|
||||
std::ostream &operator<<(std::ostream &os, DeviceKernel kernel);
|
||||
|
||||
|
@@ -496,7 +496,7 @@ bool BVHMetal::build_BLAS_pointcloud(Progress &progress,
|
||||
num_motion_steps = pointcloud->get_motion_steps();
|
||||
}
|
||||
|
||||
const size_t num_aabbs = num_motion_steps * num_points;
|
||||
const size_t num_aabbs = num_motion_steps;
|
||||
|
||||
MTLResourceOptions storage_mode;
|
||||
if (device.hasUnifiedMemory) {
|
||||
@@ -757,10 +757,6 @@ bool BVHMetal::build_TLAS(Progress &progress,
|
||||
}
|
||||
}
|
||||
|
||||
if (num_instances == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/*------------------------------------------------*/
|
||||
BVH_status("Building TLAS | %7d instances", (int)num_instances);
|
||||
/*------------------------------------------------*/
|
||||
|
@@ -117,8 +117,6 @@ class MetalDevice : public Device {
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* low-level memory management */
|
||||
|
||||
bool max_working_set_exceeded(size_t safety_margin = 8 * 1024 * 1024) const;
|
||||
|
||||
MetalMem *generic_alloc(device_memory &mem);
|
||||
|
||||
void generic_copy_to(device_memory &mem);
|
||||
|
@@ -307,9 +307,6 @@ void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_feat
|
||||
MD5Hash md5;
|
||||
md5.append(baked_constants);
|
||||
md5.append(source);
|
||||
if (use_metalrt) {
|
||||
md5.append(std::to_string(kernel_features & METALRT_FEATURE_MASK));
|
||||
}
|
||||
source_md5[pso_type] = md5.get_hex();
|
||||
}
|
||||
|
||||
@@ -446,14 +443,6 @@ void MetalDevice::erase_allocation(device_memory &mem)
|
||||
}
|
||||
}
|
||||
|
||||
bool MetalDevice::max_working_set_exceeded(size_t safety_margin) const
|
||||
{
|
||||
/* We're allowed to allocate beyond the safe working set size, but then if all resources are made
|
||||
* resident we will get command buffer failures at render time. */
|
||||
size_t available = [mtlDevice recommendedMaxWorkingSetSize] - safety_margin;
|
||||
return (stats.mem_used > available);
|
||||
}
|
||||
|
||||
MetalDevice::MetalMem *MetalDevice::generic_alloc(device_memory &mem)
|
||||
{
|
||||
size_t size = mem.memory_size();
|
||||
@@ -531,11 +520,6 @@ MetalDevice::MetalMem *MetalDevice::generic_alloc(device_memory &mem)
|
||||
mmem->use_UMA = false;
|
||||
}
|
||||
|
||||
if (max_working_set_exceeded()) {
|
||||
set_error("System is out of GPU memory");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return mmem;
|
||||
}
|
||||
|
||||
@@ -934,8 +918,9 @@ void MetalDevice::tex_alloc(device_texture &mem)
|
||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||
|
||||
mtlTexture = [mtlDevice newTextureWithDescriptor:desc];
|
||||
assert(mtlTexture);
|
||||
|
||||
if (!mtlTexture) {
|
||||
set_error("System is out of GPU memory");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -967,10 +952,7 @@ void MetalDevice::tex_alloc(device_texture &mem)
|
||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||
|
||||
mtlTexture = [mtlDevice newTextureWithDescriptor:desc];
|
||||
if (!mtlTexture) {
|
||||
set_error("System is out of GPU memory");
|
||||
return;
|
||||
}
|
||||
assert(mtlTexture);
|
||||
|
||||
[mtlTexture replaceRegion:MTLRegionMake2D(0, 0, mem.data_width, mem.data_height)
|
||||
mipmapLevel:0
|
||||
@@ -1032,10 +1014,6 @@ void MetalDevice::tex_alloc(device_texture &mem)
|
||||
need_texture_info = true;
|
||||
|
||||
texture_info[slot].data = uint64_t(slot) | (sampler_index << 32);
|
||||
|
||||
if (max_working_set_exceeded()) {
|
||||
set_error("System is out of GPU memory");
|
||||
}
|
||||
}
|
||||
|
||||
void MetalDevice::tex_free(device_texture &mem)
|
||||
@@ -1096,10 +1074,6 @@ void MetalDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (max_working_set_exceeded()) {
|
||||
set_error("System is out of GPU memory");
|
||||
}
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -54,10 +54,6 @@ enum MetalPipelineType {
|
||||
PSO_NUM
|
||||
};
|
||||
|
||||
# define METALRT_FEATURE_MASK \
|
||||
(KERNEL_FEATURE_HAIR | KERNEL_FEATURE_HAIR_THICK | KERNEL_FEATURE_POINTCLOUD | \
|
||||
KERNEL_FEATURE_OBJECT_MOTION)
|
||||
|
||||
const char *kernel_type_as_string(MetalPipelineType pso_type);
|
||||
|
||||
struct MetalKernelPipeline {
|
||||
@@ -71,7 +67,9 @@ struct MetalKernelPipeline {
|
||||
|
||||
KernelData kernel_data_;
|
||||
bool use_metalrt;
|
||||
uint32_t metalrt_features = 0;
|
||||
bool metalrt_hair;
|
||||
bool metalrt_hair_thick;
|
||||
bool metalrt_pointcloud;
|
||||
|
||||
int threads_per_threadgroup;
|
||||
|
||||
|
@@ -274,9 +274,12 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
|
||||
|
||||
/* metalrt options */
|
||||
request.pipeline->use_metalrt = device->use_metalrt;
|
||||
request.pipeline->metalrt_features = device->use_metalrt ?
|
||||
(device->kernel_features & METALRT_FEATURE_MASK) :
|
||||
0;
|
||||
request.pipeline->metalrt_hair = device->use_metalrt &&
|
||||
(device->kernel_features & KERNEL_FEATURE_HAIR);
|
||||
request.pipeline->metalrt_hair_thick = device->use_metalrt &&
|
||||
(device->kernel_features & KERNEL_FEATURE_HAIR_THICK);
|
||||
request.pipeline->metalrt_pointcloud = device->use_metalrt &&
|
||||
(device->kernel_features & KERNEL_FEATURE_POINTCLOUD);
|
||||
|
||||
{
|
||||
thread_scoped_lock lock(cache_mutex);
|
||||
@@ -313,13 +316,9 @@ MetalKernelPipeline *ShaderCache::get_best_pipeline(DeviceKernel kernel, const M
|
||||
|
||||
/* metalrt options */
|
||||
bool use_metalrt = device->use_metalrt;
|
||||
bool device_metalrt_hair = use_metalrt && device->kernel_features & KERNEL_FEATURE_HAIR;
|
||||
bool device_metalrt_hair_thick = use_metalrt &&
|
||||
device->kernel_features & KERNEL_FEATURE_HAIR_THICK;
|
||||
bool device_metalrt_pointcloud = use_metalrt &&
|
||||
device->kernel_features & KERNEL_FEATURE_POINTCLOUD;
|
||||
bool device_metalrt_motion = use_metalrt &&
|
||||
device->kernel_features & KERNEL_FEATURE_OBJECT_MOTION;
|
||||
bool metalrt_hair = use_metalrt && (device->kernel_features & KERNEL_FEATURE_HAIR);
|
||||
bool metalrt_hair_thick = use_metalrt && (device->kernel_features & KERNEL_FEATURE_HAIR_THICK);
|
||||
bool metalrt_pointcloud = use_metalrt && (device->kernel_features & KERNEL_FEATURE_POINTCLOUD);
|
||||
|
||||
MetalKernelPipeline *best_pipeline = nullptr;
|
||||
for (auto &pipeline : collection) {
|
||||
@@ -328,16 +327,9 @@ MetalKernelPipeline *ShaderCache::get_best_pipeline(DeviceKernel kernel, const M
|
||||
continue;
|
||||
}
|
||||
|
||||
bool pipeline_metalrt_hair = pipeline->metalrt_features & KERNEL_FEATURE_HAIR;
|
||||
bool pipeline_metalrt_hair_thick = pipeline->metalrt_features & KERNEL_FEATURE_HAIR_THICK;
|
||||
bool pipeline_metalrt_pointcloud = pipeline->metalrt_features & KERNEL_FEATURE_POINTCLOUD;
|
||||
bool pipeline_metalrt_motion = use_metalrt &&
|
||||
pipeline->metalrt_features & KERNEL_FEATURE_OBJECT_MOTION;
|
||||
|
||||
if (pipeline->use_metalrt != use_metalrt || pipeline_metalrt_hair != device_metalrt_hair ||
|
||||
pipeline_metalrt_hair_thick != device_metalrt_hair_thick ||
|
||||
pipeline_metalrt_pointcloud != device_metalrt_pointcloud ||
|
||||
pipeline_metalrt_motion != device_metalrt_motion) {
|
||||
if (pipeline->use_metalrt != use_metalrt || pipeline->metalrt_hair != metalrt_hair ||
|
||||
pipeline->metalrt_hair_thick != metalrt_hair_thick ||
|
||||
pipeline->metalrt_pointcloud != metalrt_pointcloud) {
|
||||
/* wrong combination of metalrt options */
|
||||
continue;
|
||||
}
|
||||
@@ -408,8 +400,6 @@ static MTLFunctionConstantValues *GetConstantValues(KernelData const *data = nul
|
||||
if (!data) {
|
||||
data = &zero_data;
|
||||
}
|
||||
int zero_int = 0;
|
||||
[constant_values setConstantValue:&zero_int type:MTLDataType_int atIndex:Kernel_DummyConstant];
|
||||
|
||||
# define KERNEL_STRUCT_MEMBER(parent, _type, name) \
|
||||
[constant_values setConstantValue:&data->parent.name \
|
||||
@@ -433,7 +423,10 @@ void MetalKernelPipeline::compile()
|
||||
MTLFunctionDescriptor *func_desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
|
||||
func_desc.name = entryPoint;
|
||||
|
||||
if (pso_type != PSO_GENERIC) {
|
||||
if (pso_type == PSO_SPECIALIZED_SHADE) {
|
||||
func_desc.constantValues = GetConstantValues(&kernel_data_);
|
||||
}
|
||||
else if (pso_type == PSO_SPECIALIZED_INTERSECT) {
|
||||
func_desc.constantValues = GetConstantValues(&kernel_data_);
|
||||
}
|
||||
else {
|
||||
@@ -478,13 +471,6 @@ void MetalKernelPipeline::compile()
|
||||
const char *function_name = function_names[i];
|
||||
desc.name = [@(function_name) copy];
|
||||
|
||||
if (pso_type != PSO_GENERIC) {
|
||||
desc.constantValues = GetConstantValues(&kernel_data_);
|
||||
}
|
||||
else {
|
||||
desc.constantValues = GetConstantValues();
|
||||
}
|
||||
|
||||
NSError *error = NULL;
|
||||
rt_intersection_function[i] = [mtlLibrary newFunctionWithDescriptor:desc error:&error];
|
||||
|
||||
@@ -505,10 +491,6 @@ void MetalKernelPipeline::compile()
|
||||
NSArray *table_functions[METALRT_TABLE_NUM] = {nil};
|
||||
NSArray *linked_functions = nil;
|
||||
|
||||
bool metalrt_hair = use_metalrt && (metalrt_features & KERNEL_FEATURE_HAIR);
|
||||
bool metalrt_hair_thick = use_metalrt && (metalrt_features & KERNEL_FEATURE_HAIR_THICK);
|
||||
bool metalrt_pointcloud = use_metalrt && (metalrt_features & KERNEL_FEATURE_POINTCLOUD);
|
||||
|
||||
if (use_metalrt) {
|
||||
id<MTLFunction> curve_intersect_default = nil;
|
||||
id<MTLFunction> curve_intersect_shadow = nil;
|
||||
@@ -636,9 +618,7 @@ void MetalKernelPipeline::compile()
|
||||
metalbin_path = path_cache_get(path_join("kernels", metalbin_name));
|
||||
path_create_directories(metalbin_path);
|
||||
|
||||
/* Retrieve shader binary from disk, and update the file timestamp for LRU purging to work as
|
||||
* intended. */
|
||||
if (use_binary_archive && path_cache_kernel_exists_and_mark_used(metalbin_path)) {
|
||||
if (path_exists(metalbin_path) && use_binary_archive) {
|
||||
if (@available(macOS 11.0, *)) {
|
||||
MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
|
||||
archiveDesc.url = [NSURL fileURLWithPath:@(metalbin_path.c_str())];
|
||||
@@ -715,9 +695,6 @@ void MetalKernelPipeline::compile()
|
||||
metal_printf("Failed to save binary archive, error:\n%s\n",
|
||||
[[error localizedDescription] UTF8String]);
|
||||
}
|
||||
else {
|
||||
path_cache_kernel_mark_added_and_clear_old(metalbin_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -753,8 +730,7 @@ void MetalKernelPipeline::compile()
|
||||
newIntersectionFunctionTableWithDescriptor:ift_desc];
|
||||
|
||||
/* Finally write the function handles into this pipeline's table */
|
||||
int size = (int)[table_functions[table] count];
|
||||
for (int i = 0; i < size; i++) {
|
||||
for (int i = 0; i < 2; i++) {
|
||||
id<MTLFunctionHandle> handle = [pipeline
|
||||
functionHandleWithFunction:table_functions[table][i]];
|
||||
[intersection_func_table[table] setFunction:handle atIndex:i];
|
||||
|
@@ -138,15 +138,6 @@ class MultiDevice : public Device {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool load_osl_kernels() override
|
||||
{
|
||||
foreach (SubDevice &sub, devices)
|
||||
if (!sub.device->load_osl_kernels())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void build_bvh(BVH *bvh, Progress &progress, bool refit) override
|
||||
{
|
||||
/* Try to build and share a single acceleration structure, if possible */
|
||||
@@ -213,12 +204,10 @@ class MultiDevice : public Device {
|
||||
|
||||
virtual void *get_cpu_osl_memory() override
|
||||
{
|
||||
/* Always return the OSL memory of the CPU device (this works since the constructor above
|
||||
* guarantees that CPU devices are always added to the back). */
|
||||
if (devices.size() > 1 && devices.back().device->info.type != DEVICE_CPU) {
|
||||
if (devices.size() > 1) {
|
||||
return NULL;
|
||||
}
|
||||
return devices.back().device->get_cpu_osl_memory();
|
||||
return devices.front().device->get_cpu_osl_memory();
|
||||
}
|
||||
|
||||
bool is_resident(device_ptr key, Device *sub_device) override
|
||||
|
@@ -31,8 +31,6 @@ bool device_oneapi_init()
|
||||
* improves stability as of intel/LLVM SYCL-nightly/20220529.
|
||||
* All these env variable can be set beforehand by end-users and
|
||||
* will in that case -not- be overwritten. */
|
||||
/* By default, enable only Level-Zero and if all devices are allowed, also CUDA and HIP.
|
||||
* OpenCL backend isn't currently well supported. */
|
||||
# ifdef _WIN32
|
||||
if (getenv("SYCL_CACHE_PERSISTENT") == nullptr) {
|
||||
_putenv_s("SYCL_CACHE_PERSISTENT", "1");
|
||||
@@ -41,12 +39,7 @@ bool device_oneapi_init()
|
||||
_putenv_s("SYCL_CACHE_THRESHOLD", "0");
|
||||
}
|
||||
if (getenv("SYCL_DEVICE_FILTER") == nullptr) {
|
||||
if (getenv("CYCLES_ONEAPI_ALL_DEVICES") == nullptr) {
|
||||
_putenv_s("SYCL_DEVICE_FILTER", "level_zero");
|
||||
}
|
||||
else {
|
||||
_putenv_s("SYCL_DEVICE_FILTER", "level_zero,cuda,hip");
|
||||
}
|
||||
_putenv_s("SYCL_DEVICE_FILTER", "level_zero");
|
||||
}
|
||||
if (getenv("SYCL_ENABLE_PCI") == nullptr) {
|
||||
_putenv_s("SYCL_ENABLE_PCI", "1");
|
||||
@@ -57,12 +50,7 @@ bool device_oneapi_init()
|
||||
# elif __linux__
|
||||
setenv("SYCL_CACHE_PERSISTENT", "1", false);
|
||||
setenv("SYCL_CACHE_THRESHOLD", "0", false);
|
||||
if (getenv("CYCLES_ONEAPI_ALL_DEVICES") == nullptr) {
|
||||
setenv("SYCL_DEVICE_FILTER", "level_zero", false);
|
||||
}
|
||||
else {
|
||||
setenv("SYCL_DEVICE_FILTER", "level_zero,cuda,hip", false);
|
||||
}
|
||||
setenv("SYCL_DEVICE_FILTER", "level_zero", false);
|
||||
setenv("SYCL_ENABLE_PCI", "1", false);
|
||||
setenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE", "0", false);
|
||||
# endif
|
||||
|
@@ -430,9 +430,9 @@ void OneapiDevice::check_usm(SyclQueue *queue_, const void *usm_ptr, bool allow_
|
||||
sycl::usm::alloc usm_type = get_pointer_type(usm_ptr, queue->get_context());
|
||||
(void)usm_type;
|
||||
assert(usm_type == sycl::usm::alloc::device ||
|
||||
(usm_type == sycl::usm::alloc::host &&
|
||||
(allow_host || device_type == sycl::info::device_type::cpu)) ||
|
||||
usm_type == sycl::usm::alloc::unknown);
|
||||
((device_type == sycl::info::device_type::cpu || allow_host) &&
|
||||
usm_type == sycl::usm::alloc::host ||
|
||||
usm_type == sycl::usm::alloc::unknown));
|
||||
# else
|
||||
/* Silence warning about unused arguments. */
|
||||
(void)queue_;
|
||||
|
@@ -9,10 +9,6 @@
|
||||
|
||||
#include "util/log.h"
|
||||
|
||||
#ifdef WITH_OSL
|
||||
# include <OSL/oslversion.h>
|
||||
#endif
|
||||
|
||||
#ifdef WITH_OPTIX
|
||||
# include <optix_function_table_definition.h>
|
||||
#endif
|
||||
@@ -69,9 +65,6 @@ void device_optix_info(const vector<DeviceInfo> &cuda_devices, vector<DeviceInfo
|
||||
|
||||
info.type = DEVICE_OPTIX;
|
||||
info.id += "_OptiX";
|
||||
# if defined(WITH_OSL) && (OSL_VERSION_MINOR >= 13 || OSL_VERSION_MAJOR > 1)
|
||||
info.has_osl = true;
|
||||
# endif
|
||||
info.denoisers |= DENOISER_OPTIX;
|
||||
|
||||
devices.push_back(info);
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -1,14 +1,16 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2019, NVIDIA Corporation
|
||||
* Copyright 2019-2022 Blender Foundation */
|
||||
* Copyright 2019, NVIDIA Corporation.
|
||||
* Copyright 2019-2022 Blender Foundation. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef WITH_OPTIX
|
||||
|
||||
# include "device/cuda/device_impl.h"
|
||||
# include "device/optix/queue.h"
|
||||
# include "device/optix/util.h"
|
||||
# include "kernel/osl/globals.h"
|
||||
# include "kernel/types.h"
|
||||
# include "util/unique_ptr.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
@@ -21,16 +23,8 @@ enum {
|
||||
PG_RGEN_INTERSECT_SHADOW,
|
||||
PG_RGEN_INTERSECT_SUBSURFACE,
|
||||
PG_RGEN_INTERSECT_VOLUME_STACK,
|
||||
PG_RGEN_SHADE_BACKGROUND,
|
||||
PG_RGEN_SHADE_LIGHT,
|
||||
PG_RGEN_SHADE_SURFACE,
|
||||
PG_RGEN_SHADE_SURFACE_RAYTRACE,
|
||||
PG_RGEN_SHADE_SURFACE_MNEE,
|
||||
PG_RGEN_SHADE_VOLUME,
|
||||
PG_RGEN_SHADE_SHADOW,
|
||||
PG_RGEN_EVAL_DISPLACE,
|
||||
PG_RGEN_EVAL_BACKGROUND,
|
||||
PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY,
|
||||
PG_MISS,
|
||||
PG_HITD, /* Default hit group. */
|
||||
PG_HITS, /* __SHADOW_RECORD_ALL__ hit group. */
|
||||
@@ -46,14 +40,14 @@ enum {
|
||||
};
|
||||
|
||||
static const int MISS_PROGRAM_GROUP_OFFSET = PG_MISS;
|
||||
static const int NUM_MISS_PROGRAM_GROUPS = 1;
|
||||
static const int NUM_MIS_PROGRAM_GROUPS = 1;
|
||||
static const int HIT_PROGAM_GROUP_OFFSET = PG_HITD;
|
||||
static const int NUM_HIT_PROGRAM_GROUPS = 8;
|
||||
static const int CALLABLE_PROGRAM_GROUPS_BASE = PG_CALL_SVM_AO;
|
||||
static const int NUM_CALLABLE_PROGRAM_GROUPS = 2;
|
||||
|
||||
/* List of OptiX pipelines. */
|
||||
enum { PIP_SHADE, PIP_INTERSECT, NUM_PIPELINES };
|
||||
enum { PIP_SHADE_RAYTRACE, PIP_SHADE_MNEE, PIP_INTERSECT, NUM_PIPELINES };
|
||||
|
||||
/* A single shader binding table entry. */
|
||||
struct SbtRecord {
|
||||
@@ -67,35 +61,52 @@ class OptiXDevice : public CUDADevice {
|
||||
OptixModule optix_module = NULL; /* All necessary OptiX kernels are in one module. */
|
||||
OptixModule builtin_modules[2] = {};
|
||||
OptixPipeline pipelines[NUM_PIPELINES] = {};
|
||||
OptixProgramGroup groups[NUM_PROGRAM_GROUPS] = {};
|
||||
OptixPipelineCompileOptions pipeline_options = {};
|
||||
|
||||
bool motion_blur = false;
|
||||
device_vector<SbtRecord> sbt_data;
|
||||
device_only_memory<KernelParamsOptiX> launch_params;
|
||||
|
||||
# ifdef WITH_OSL
|
||||
OSLGlobals osl_globals;
|
||||
vector<OptixModule> osl_modules;
|
||||
vector<OptixProgramGroup> osl_groups;
|
||||
# endif
|
||||
|
||||
private:
|
||||
OptixTraversableHandle tlas_handle = 0;
|
||||
|
||||
vector<unique_ptr<device_only_memory<char>>> delayed_free_bvh_memory;
|
||||
thread_mutex delayed_free_bvh_mutex;
|
||||
|
||||
class Denoiser {
|
||||
public:
|
||||
explicit Denoiser(OptiXDevice *device);
|
||||
|
||||
OptiXDevice *device;
|
||||
OptiXDeviceQueue queue;
|
||||
|
||||
OptixDenoiser optix_denoiser = nullptr;
|
||||
|
||||
/* Configuration size, as provided to `optixDenoiserSetup`.
|
||||
* If the `optixDenoiserSetup()` was never used on the current `optix_denoiser` the
|
||||
* `is_configured` will be false. */
|
||||
bool is_configured = false;
|
||||
int2 configured_size = make_int2(0, 0);
|
||||
|
||||
/* OptiX denoiser state and scratch buffers, stored in a single memory buffer.
|
||||
* The memory layout goes as following: [denoiser state][scratch buffer]. */
|
||||
device_only_memory<unsigned char> state;
|
||||
OptixDenoiserSizes sizes = {};
|
||||
|
||||
bool use_pass_albedo = false;
|
||||
bool use_pass_normal = false;
|
||||
bool use_pass_flow = false;
|
||||
};
|
||||
Denoiser denoiser_;
|
||||
|
||||
public:
|
||||
OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
|
||||
~OptiXDevice();
|
||||
|
||||
private:
|
||||
BVHLayoutMask get_bvh_layout_mask() const override;
|
||||
|
||||
string compile_kernel_get_common_cflags(const uint kernel_features);
|
||||
string compile_kernel_get_common_cflags(const uint kernel_features) override;
|
||||
|
||||
bool load_kernels(const uint kernel_features) override;
|
||||
|
||||
bool load_osl_kernels() override;
|
||||
|
||||
bool build_optix_bvh(BVHOptiX *bvh,
|
||||
OptixBuildOperation operation,
|
||||
const OptixBuildInput &build_input,
|
||||
@@ -112,7 +123,52 @@ class OptiXDevice : public CUDADevice {
|
||||
|
||||
virtual unique_ptr<DeviceQueue> gpu_queue_create() override;
|
||||
|
||||
void *get_cpu_osl_memory() override;
|
||||
/* --------------------------------------------------------------------
|
||||
* Denoising.
|
||||
*/
|
||||
|
||||
class DenoiseContext;
|
||||
class DenoisePass;
|
||||
|
||||
virtual bool denoise_buffer(const DeviceDenoiseTask &task) override;
|
||||
virtual DeviceQueue *get_denoise_queue() override;
|
||||
|
||||
/* Read guiding passes from the render buffers, preprocess them in a way which is expected by
|
||||
* OptiX and store in the guiding passes memory within the given context.
|
||||
*
|
||||
* Pre=-processing of the guiding passes is to only happen once per context lifetime. DO not
|
||||
* preprocess them for every pass which is being denoised. */
|
||||
bool denoise_filter_guiding_preprocess(DenoiseContext &context);
|
||||
|
||||
/* Set fake albedo pixels in the albedo guiding pass storage.
|
||||
* After this point only passes which do not need albedo for denoising can be processed. */
|
||||
bool denoise_filter_guiding_set_fake_albedo(DenoiseContext &context);
|
||||
|
||||
void denoise_pass(DenoiseContext &context, PassType pass_type);
|
||||
|
||||
/* Read input color pass from the render buffer into the memory which corresponds to the noisy
|
||||
* input within the given context. Pixels are scaled to the number of samples, but are not
|
||||
* preprocessed yet. */
|
||||
void denoise_color_read(DenoiseContext &context, const DenoisePass &pass);
|
||||
|
||||
/* Run corresponding filter kernels, preparing data for the denoiser or copying data from the
|
||||
* denoiser result to the render buffer. */
|
||||
bool denoise_filter_color_preprocess(DenoiseContext &context, const DenoisePass &pass);
|
||||
bool denoise_filter_color_postprocess(DenoiseContext &context, const DenoisePass &pass);
|
||||
|
||||
/* Make sure the OptiX denoiser is created and configured. */
|
||||
bool denoise_ensure(DenoiseContext &context);
|
||||
|
||||
/* Create OptiX denoiser descriptor if needed.
|
||||
* Will do nothing if the current OptiX descriptor is usable for the given parameters.
|
||||
* If the OptiX denoiser descriptor did re-allocate here it is left unconfigured. */
|
||||
bool denoise_create_if_needed(DenoiseContext &context);
|
||||
|
||||
/* Configure existing OptiX denoiser descriptor for the use for the given task. */
|
||||
bool denoise_configure_if_needed(DenoiseContext &context);
|
||||
|
||||
/* Run configured denoiser. */
|
||||
bool denoise_run(DenoiseContext &context, const DenoisePass &pass);
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -24,33 +24,21 @@ void OptiXDeviceQueue::init_execution()
|
||||
CUDADeviceQueue::init_execution();
|
||||
}
|
||||
|
||||
static bool is_optix_specific_kernel(DeviceKernel kernel, bool use_osl)
|
||||
static bool is_optix_specific_kernel(DeviceKernel kernel)
|
||||
{
|
||||
# ifdef WITH_OSL
|
||||
/* OSL uses direct callables to execute, so shading needs to be done in OptiX if OSL is used. */
|
||||
if (use_osl && device_kernel_has_shading(kernel)) {
|
||||
return true;
|
||||
}
|
||||
# else
|
||||
(void)use_osl;
|
||||
# endif
|
||||
|
||||
return device_kernel_has_intersection(kernel);
|
||||
return (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
|
||||
}
|
||||
|
||||
bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
const int work_size,
|
||||
DeviceKernelArguments const &args)
|
||||
{
|
||||
OptiXDevice *const optix_device = static_cast<OptiXDevice *>(cuda_device_);
|
||||
|
||||
# ifdef WITH_OSL
|
||||
const bool use_osl = static_cast<OSLGlobals *>(optix_device->get_cpu_osl_memory())->use;
|
||||
# else
|
||||
const bool use_osl = false;
|
||||
# endif
|
||||
|
||||
if (!is_optix_specific_kernel(kernel, use_osl)) {
|
||||
if (!is_optix_specific_kernel(kernel)) {
|
||||
return CUDADeviceQueue::enqueue(kernel, work_size, args);
|
||||
}
|
||||
|
||||
@@ -62,6 +50,8 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
|
||||
const CUDAContextScope scope(cuda_device_);
|
||||
|
||||
OptiXDevice *const optix_device = static_cast<OptiXDevice *>(cuda_device_);
|
||||
|
||||
const device_ptr sbt_data_ptr = optix_device->sbt_data.device_pointer;
|
||||
const device_ptr launch_params_ptr = optix_device->launch_params.device_pointer;
|
||||
|
||||
@@ -72,7 +62,9 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
sizeof(device_ptr),
|
||||
cuda_stream_));
|
||||
|
||||
if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST || device_kernel_has_shading(kernel)) {
|
||||
if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE) {
|
||||
cuda_device_assert(
|
||||
cuda_device_,
|
||||
cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer),
|
||||
@@ -80,15 +72,6 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
sizeof(device_ptr),
|
||||
cuda_stream_));
|
||||
}
|
||||
if (kernel == DEVICE_KERNEL_SHADER_EVAL_DISPLACE ||
|
||||
kernel == DEVICE_KERNEL_SHADER_EVAL_BACKGROUND ||
|
||||
kernel == DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY) {
|
||||
cuda_device_assert(cuda_device_,
|
||||
cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, offset),
|
||||
args.values[2], // &d_offset
|
||||
sizeof(int32_t),
|
||||
cuda_stream_));
|
||||
}
|
||||
|
||||
cuda_device_assert(cuda_device_, cuStreamSynchronize(cuda_stream_));
|
||||
|
||||
@@ -96,35 +79,14 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
OptixShaderBindingTable sbt_params = {};
|
||||
|
||||
switch (kernel) {
|
||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND:
|
||||
pipeline = optix_device->pipelines[PIP_SHADE];
|
||||
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_BACKGROUND * sizeof(SbtRecord);
|
||||
break;
|
||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT:
|
||||
pipeline = optix_device->pipelines[PIP_SHADE];
|
||||
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_LIGHT * sizeof(SbtRecord);
|
||||
break;
|
||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE:
|
||||
pipeline = optix_device->pipelines[PIP_SHADE];
|
||||
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE * sizeof(SbtRecord);
|
||||
break;
|
||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
|
||||
pipeline = optix_device->pipelines[PIP_SHADE];
|
||||
pipeline = optix_device->pipelines[PIP_SHADE_RAYTRACE];
|
||||
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_RAYTRACE * sizeof(SbtRecord);
|
||||
break;
|
||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE:
|
||||
pipeline = optix_device->pipelines[PIP_SHADE];
|
||||
pipeline = optix_device->pipelines[PIP_SHADE_MNEE];
|
||||
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_MNEE * sizeof(SbtRecord);
|
||||
break;
|
||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME:
|
||||
pipeline = optix_device->pipelines[PIP_SHADE];
|
||||
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_VOLUME * sizeof(SbtRecord);
|
||||
break;
|
||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW:
|
||||
pipeline = optix_device->pipelines[PIP_SHADE];
|
||||
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SHADOW * sizeof(SbtRecord);
|
||||
break;
|
||||
|
||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
|
||||
pipeline = optix_device->pipelines[PIP_INTERSECT];
|
||||
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_CLOSEST * sizeof(SbtRecord);
|
||||
@@ -142,20 +104,6 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_VOLUME_STACK * sizeof(SbtRecord);
|
||||
break;
|
||||
|
||||
case DEVICE_KERNEL_SHADER_EVAL_DISPLACE:
|
||||
pipeline = optix_device->pipelines[PIP_SHADE];
|
||||
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_EVAL_DISPLACE * sizeof(SbtRecord);
|
||||
break;
|
||||
case DEVICE_KERNEL_SHADER_EVAL_BACKGROUND:
|
||||
pipeline = optix_device->pipelines[PIP_SHADE];
|
||||
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_EVAL_BACKGROUND * sizeof(SbtRecord);
|
||||
break;
|
||||
case DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY:
|
||||
pipeline = optix_device->pipelines[PIP_SHADE];
|
||||
sbt_params.raygenRecord = sbt_data_ptr +
|
||||
PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY * sizeof(SbtRecord);
|
||||
break;
|
||||
|
||||
default:
|
||||
LOG(ERROR) << "Invalid kernel " << device_kernel_as_string(kernel)
|
||||
<< " is attempted to be enqueued.";
|
||||
@@ -164,7 +112,7 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
|
||||
sbt_params.missRecordBase = sbt_data_ptr + MISS_PROGRAM_GROUP_OFFSET * sizeof(SbtRecord);
|
||||
sbt_params.missRecordStrideInBytes = sizeof(SbtRecord);
|
||||
sbt_params.missRecordCount = NUM_MISS_PROGRAM_GROUPS;
|
||||
sbt_params.missRecordCount = NUM_MIS_PROGRAM_GROUPS;
|
||||
sbt_params.hitgroupRecordBase = sbt_data_ptr + HIT_PROGAM_GROUP_OFFSET * sizeof(SbtRecord);
|
||||
sbt_params.hitgroupRecordStrideInBytes = sizeof(SbtRecord);
|
||||
sbt_params.hitgroupRecordCount = NUM_HIT_PROGRAM_GROUPS;
|
||||
@@ -172,12 +120,6 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
sbt_params.callablesRecordCount = NUM_CALLABLE_PROGRAM_GROUPS;
|
||||
sbt_params.callablesRecordStrideInBytes = sizeof(SbtRecord);
|
||||
|
||||
# ifdef WITH_OSL
|
||||
if (use_osl) {
|
||||
sbt_params.callablesRecordCount += static_cast<unsigned int>(optix_device->osl_groups.size());
|
||||
}
|
||||
# endif
|
||||
|
||||
/* Launch the ray generation program. */
|
||||
optix_device_assert(optix_device,
|
||||
optixLaunch(pipeline,
|
||||
|
@@ -66,9 +66,7 @@ struct SocketType {
|
||||
LINK_NORMAL = (1 << 8),
|
||||
LINK_POSITION = (1 << 9),
|
||||
LINK_TANGENT = (1 << 10),
|
||||
LINK_OSL_INITIALIZER = (1 << 11),
|
||||
DEFAULT_LINK_MASK = (1 << 4) | (1 << 5) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) |
|
||||
(1 << 10) | (1 << 11)
|
||||
DEFAULT_LINK_MASK = (1 << 4) | (1 << 5) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) | (1 << 10)
|
||||
};
|
||||
|
||||
ustring name;
|
||||
|
@@ -8,7 +8,7 @@ set(INC
|
||||
set(SRC
|
||||
adaptive_sampling.cpp
|
||||
denoiser.cpp
|
||||
denoiser_gpu.cpp
|
||||
denoiser_device.cpp
|
||||
denoiser_oidn.cpp
|
||||
denoiser_optix.cpp
|
||||
path_trace.cpp
|
||||
@@ -30,7 +30,7 @@ set(SRC
|
||||
set(SRC_HEADERS
|
||||
adaptive_sampling.h
|
||||
denoiser.h
|
||||
denoiser_gpu.h
|
||||
denoiser_device.h
|
||||
denoiser_oidn.h
|
||||
denoiser_optix.h
|
||||
path_trace.h
|
||||
|
@@ -16,11 +16,9 @@ unique_ptr<Denoiser> Denoiser::create(Device *path_trace_device, const DenoisePa
|
||||
{
|
||||
DCHECK(params.use);
|
||||
|
||||
#ifdef WITH_OPTIX
|
||||
if (params.type == DENOISER_OPTIX && Device::available_devices(DEVICE_MASK_OPTIX).size()) {
|
||||
return make_unique<OptiXDenoiser>(path_trace_device, params);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Always fallback to OIDN. */
|
||||
DenoiseParams oidn_params = params;
|
||||
|
@@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#include "integrator/denoiser_gpu.h"
|
||||
#include "integrator/denoiser_device.h"
|
||||
|
||||
#include "device/denoise.h"
|
||||
#include "device/device.h"
|
||||
@@ -13,27 +13,27 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
DenoiserGPU::DenoiserGPU(Device *path_trace_device, const DenoiseParams ¶ms)
|
||||
DeviceDenoiser::DeviceDenoiser(Device *path_trace_device, const DenoiseParams ¶ms)
|
||||
: Denoiser(path_trace_device, params)
|
||||
{
|
||||
}
|
||||
|
||||
DenoiserGPU::~DenoiserGPU()
|
||||
DeviceDenoiser::~DeviceDenoiser()
|
||||
{
|
||||
/* Explicit implementation, to allow forward declaration of Device in the header. */
|
||||
}
|
||||
|
||||
bool DenoiserGPU::denoise_buffer(const BufferParams &buffer_params,
|
||||
RenderBuffers *render_buffers,
|
||||
const int num_samples,
|
||||
bool allow_inplace_modification)
|
||||
bool DeviceDenoiser::denoise_buffer(const BufferParams &buffer_params,
|
||||
RenderBuffers *render_buffers,
|
||||
const int num_samples,
|
||||
bool allow_inplace_modification)
|
||||
{
|
||||
Device *denoiser_device = get_denoiser_device();
|
||||
if (!denoiser_device) {
|
||||
return false;
|
||||
}
|
||||
|
||||
DenoiseTask task;
|
||||
DeviceDenoiseTask task;
|
||||
task.params = params_;
|
||||
task.num_samples = num_samples;
|
||||
task.buffer_params = buffer_params;
|
||||
@@ -50,6 +50,8 @@ bool DenoiserGPU::denoise_buffer(const BufferParams &buffer_params,
|
||||
else {
|
||||
VLOG_WORK << "Creating temporary buffer on denoiser device.";
|
||||
|
||||
DeviceQueue *queue = denoiser_device->get_denoise_queue();
|
||||
|
||||
/* Create buffer which is available by the device used by denoiser. */
|
||||
|
||||
/* TODO(sergey): Optimize data transfers. For example, only copy denoising related passes,
|
||||
@@ -68,13 +70,13 @@ bool DenoiserGPU::denoise_buffer(const BufferParams &buffer_params,
|
||||
render_buffers->buffer.data(),
|
||||
sizeof(float) * local_render_buffers.buffer.size());
|
||||
|
||||
denoiser_queue_->copy_to_device(local_render_buffers.buffer);
|
||||
queue->copy_to_device(local_render_buffers.buffer);
|
||||
|
||||
task.render_buffers = &local_render_buffers;
|
||||
task.allow_inplace_modification = true;
|
||||
}
|
||||
|
||||
const bool denoise_result = denoise_buffer(task);
|
||||
const bool denoise_result = denoiser_device->denoise_buffer(task);
|
||||
|
||||
if (local_buffer_used) {
|
||||
local_render_buffers.copy_from_device();
|
||||
@@ -88,21 +90,4 @@ bool DenoiserGPU::denoise_buffer(const BufferParams &buffer_params,
|
||||
return denoise_result;
|
||||
}
|
||||
|
||||
Device *DenoiserGPU::ensure_denoiser_device(Progress *progress)
|
||||
{
|
||||
Device *denoiser_device = Denoiser::ensure_denoiser_device(progress);
|
||||
if (!denoiser_device) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (!denoiser_queue_) {
|
||||
denoiser_queue_ = denoiser_device->gpu_queue_create();
|
||||
if (!denoiser_queue_) {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
return denoiser_device;
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
27
intern/cycles/integrator/denoiser_device.h
Normal file
27
intern/cycles/integrator/denoiser_device.h
Normal file
@@ -0,0 +1,27 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "integrator/denoiser.h"
|
||||
#include "util/unique_ptr.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Denoiser which uses device-specific denoising implementation, such as OptiX denoiser which are
|
||||
* implemented as a part of a driver of specific device.
|
||||
*
|
||||
* This implementation makes sure the to-be-denoised buffer is available on the denoising device
|
||||
* and invoke denoising kernel via device API. */
|
||||
class DeviceDenoiser : public Denoiser {
|
||||
public:
|
||||
DeviceDenoiser(Device *path_trace_device, const DenoiseParams ¶ms);
|
||||
~DeviceDenoiser();
|
||||
|
||||
virtual bool denoise_buffer(const BufferParams &buffer_params,
|
||||
RenderBuffers *render_buffers,
|
||||
const int num_samples,
|
||||
bool allow_inplace_modification) override;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
@@ -1,52 +0,0 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "integrator/denoiser.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Implementation of Denoiser which uses a device-specific denoising implementation, running on a
|
||||
* GPU device queue. It makes sure the to-be-denoised buffer is available on the denoising device
|
||||
* and invokes denoising kernels via the device queue API. */
|
||||
class DenoiserGPU : public Denoiser {
|
||||
public:
|
||||
DenoiserGPU(Device *path_trace_device, const DenoiseParams ¶ms);
|
||||
~DenoiserGPU();
|
||||
|
||||
virtual bool denoise_buffer(const BufferParams &buffer_params,
|
||||
RenderBuffers *render_buffers,
|
||||
const int num_samples,
|
||||
bool allow_inplace_modification) override;
|
||||
|
||||
protected:
|
||||
/* All the parameters needed to perform buffer denoising on a device.
|
||||
* Is not really a task in its canonical terms (as in, is not an asynchronous running task). Is
|
||||
* more like a wrapper for all the arguments and parameters needed to perform denoising. Is a
|
||||
* single place where they are all listed, so that it's not required to modify all device methods
|
||||
* when these parameters do change. */
|
||||
class DenoiseTask {
|
||||
public:
|
||||
DenoiseParams params;
|
||||
|
||||
int num_samples;
|
||||
|
||||
RenderBuffers *render_buffers;
|
||||
BufferParams buffer_params;
|
||||
|
||||
/* Allow to do in-place modification of the input passes (scaling them down i.e.). This will
|
||||
* lower the memory footprint of the denoiser but will make input passes "invalid" (from path
|
||||
* tracer) point of view. */
|
||||
bool allow_inplace_modification;
|
||||
};
|
||||
|
||||
/* Returns true if task is fully handled. */
|
||||
virtual bool denoise_buffer(const DenoiseTask & /*task*/) = 0;
|
||||
|
||||
virtual Device *ensure_denoiser_device(Progress *progress) override;
|
||||
|
||||
unique_ptr<DeviceQueue> denoiser_queue_;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
@@ -1,786 +1,21 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#ifdef WITH_OPTIX
|
||||
#include "integrator/denoiser_optix.h"
|
||||
|
||||
# include "integrator/denoiser_optix.h"
|
||||
# include "integrator/pass_accessor_gpu.h"
|
||||
|
||||
# include "device/optix/device_impl.h"
|
||||
# include "device/optix/queue.h"
|
||||
|
||||
# include <optix_denoiser_tiling.h>
|
||||
#include "device/denoise.h"
|
||||
#include "device/device.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
# if OPTIX_ABI_VERSION >= 60
|
||||
using ::optixUtilDenoiserInvokeTiled;
|
||||
# else
|
||||
// A minimal copy of functionality `optix_denoiser_tiling.h` which allows to fix integer overflow
|
||||
// issues without bumping SDK or driver requirement.
|
||||
//
|
||||
// The original code is Copyright NVIDIA Corporation, BSD-3-Clause.
|
||||
static OptixResult optixUtilDenoiserSplitImage(const OptixImage2D &input,
|
||||
const OptixImage2D &output,
|
||||
unsigned int overlapWindowSizeInPixels,
|
||||
unsigned int tileWidth,
|
||||
unsigned int tileHeight,
|
||||
std::vector<OptixUtilDenoiserImageTile> &tiles)
|
||||
{
|
||||
if (tileWidth == 0 || tileHeight == 0)
|
||||
return OPTIX_ERROR_INVALID_VALUE;
|
||||
|
||||
unsigned int inPixelStride = optixUtilGetPixelStride(input);
|
||||
unsigned int outPixelStride = optixUtilGetPixelStride(output);
|
||||
|
||||
int inp_w = std::min(tileWidth + 2 * overlapWindowSizeInPixels, input.width);
|
||||
int inp_h = std::min(tileHeight + 2 * overlapWindowSizeInPixels, input.height);
|
||||
int inp_y = 0, copied_y = 0;
|
||||
|
||||
do {
|
||||
int inputOffsetY = inp_y == 0 ? 0 :
|
||||
std::max((int)overlapWindowSizeInPixels,
|
||||
inp_h - ((int)input.height - inp_y));
|
||||
int copy_y = inp_y == 0 ? std::min(input.height, tileHeight + overlapWindowSizeInPixels) :
|
||||
std::min(tileHeight, input.height - copied_y);
|
||||
|
||||
int inp_x = 0, copied_x = 0;
|
||||
do {
|
||||
int inputOffsetX = inp_x == 0 ? 0 :
|
||||
std::max((int)overlapWindowSizeInPixels,
|
||||
inp_w - ((int)input.width - inp_x));
|
||||
int copy_x = inp_x == 0 ? std::min(input.width, tileWidth + overlapWindowSizeInPixels) :
|
||||
std::min(tileWidth, input.width - copied_x);
|
||||
|
||||
OptixUtilDenoiserImageTile tile;
|
||||
tile.input.data = input.data + (size_t)(inp_y - inputOffsetY) * input.rowStrideInBytes +
|
||||
+(size_t)(inp_x - inputOffsetX) * inPixelStride;
|
||||
tile.input.width = inp_w;
|
||||
tile.input.height = inp_h;
|
||||
tile.input.rowStrideInBytes = input.rowStrideInBytes;
|
||||
tile.input.pixelStrideInBytes = input.pixelStrideInBytes;
|
||||
tile.input.format = input.format;
|
||||
|
||||
tile.output.data = output.data + (size_t)inp_y * output.rowStrideInBytes +
|
||||
(size_t)inp_x * outPixelStride;
|
||||
tile.output.width = copy_x;
|
||||
tile.output.height = copy_y;
|
||||
tile.output.rowStrideInBytes = output.rowStrideInBytes;
|
||||
tile.output.pixelStrideInBytes = output.pixelStrideInBytes;
|
||||
tile.output.format = output.format;
|
||||
|
||||
tile.inputOffsetX = inputOffsetX;
|
||||
tile.inputOffsetY = inputOffsetY;
|
||||
tiles.push_back(tile);
|
||||
|
||||
inp_x += inp_x == 0 ? tileWidth + overlapWindowSizeInPixels : tileWidth;
|
||||
copied_x += copy_x;
|
||||
} while (inp_x < static_cast<int>(input.width));
|
||||
|
||||
inp_y += inp_y == 0 ? tileHeight + overlapWindowSizeInPixels : tileHeight;
|
||||
copied_y += copy_y;
|
||||
} while (inp_y < static_cast<int>(input.height));
|
||||
|
||||
return OPTIX_SUCCESS;
|
||||
}
|
||||
|
||||
static OptixResult optixUtilDenoiserInvokeTiled(OptixDenoiser denoiser,
|
||||
CUstream stream,
|
||||
const OptixDenoiserParams *params,
|
||||
CUdeviceptr denoiserState,
|
||||
size_t denoiserStateSizeInBytes,
|
||||
const OptixDenoiserGuideLayer *guideLayer,
|
||||
const OptixDenoiserLayer *layers,
|
||||
unsigned int numLayers,
|
||||
CUdeviceptr scratch,
|
||||
size_t scratchSizeInBytes,
|
||||
unsigned int overlapWindowSizeInPixels,
|
||||
unsigned int tileWidth,
|
||||
unsigned int tileHeight)
|
||||
{
|
||||
if (!guideLayer || !layers)
|
||||
return OPTIX_ERROR_INVALID_VALUE;
|
||||
|
||||
std::vector<std::vector<OptixUtilDenoiserImageTile>> tiles(numLayers);
|
||||
std::vector<std::vector<OptixUtilDenoiserImageTile>> prevTiles(numLayers);
|
||||
for (unsigned int l = 0; l < numLayers; l++) {
|
||||
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(layers[l].input,
|
||||
layers[l].output,
|
||||
overlapWindowSizeInPixels,
|
||||
tileWidth,
|
||||
tileHeight,
|
||||
tiles[l]))
|
||||
return res;
|
||||
|
||||
if (layers[l].previousOutput.data) {
|
||||
OptixImage2D dummyOutput = layers[l].previousOutput;
|
||||
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(layers[l].previousOutput,
|
||||
dummyOutput,
|
||||
overlapWindowSizeInPixels,
|
||||
tileWidth,
|
||||
tileHeight,
|
||||
prevTiles[l]))
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<OptixUtilDenoiserImageTile> albedoTiles;
|
||||
if (guideLayer->albedo.data) {
|
||||
OptixImage2D dummyOutput = guideLayer->albedo;
|
||||
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->albedo,
|
||||
dummyOutput,
|
||||
overlapWindowSizeInPixels,
|
||||
tileWidth,
|
||||
tileHeight,
|
||||
albedoTiles))
|
||||
return res;
|
||||
}
|
||||
|
||||
std::vector<OptixUtilDenoiserImageTile> normalTiles;
|
||||
if (guideLayer->normal.data) {
|
||||
OptixImage2D dummyOutput = guideLayer->normal;
|
||||
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->normal,
|
||||
dummyOutput,
|
||||
overlapWindowSizeInPixels,
|
||||
tileWidth,
|
||||
tileHeight,
|
||||
normalTiles))
|
||||
return res;
|
||||
}
|
||||
std::vector<OptixUtilDenoiserImageTile> flowTiles;
|
||||
if (guideLayer->flow.data) {
|
||||
OptixImage2D dummyOutput = guideLayer->flow;
|
||||
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->flow,
|
||||
dummyOutput,
|
||||
overlapWindowSizeInPixels,
|
||||
tileWidth,
|
||||
tileHeight,
|
||||
flowTiles))
|
||||
return res;
|
||||
}
|
||||
|
||||
for (size_t t = 0; t < tiles[0].size(); t++) {
|
||||
std::vector<OptixDenoiserLayer> tlayers;
|
||||
for (unsigned int l = 0; l < numLayers; l++) {
|
||||
OptixDenoiserLayer layer = {};
|
||||
layer.input = (tiles[l])[t].input;
|
||||
layer.output = (tiles[l])[t].output;
|
||||
if (layers[l].previousOutput.data)
|
||||
layer.previousOutput = (prevTiles[l])[t].input;
|
||||
tlayers.push_back(layer);
|
||||
}
|
||||
|
||||
OptixDenoiserGuideLayer gl = {};
|
||||
if (guideLayer->albedo.data)
|
||||
gl.albedo = albedoTiles[t].input;
|
||||
|
||||
if (guideLayer->normal.data)
|
||||
gl.normal = normalTiles[t].input;
|
||||
|
||||
if (guideLayer->flow.data)
|
||||
gl.flow = flowTiles[t].input;
|
||||
|
||||
if (const OptixResult res = optixDenoiserInvoke(denoiser,
|
||||
stream,
|
||||
params,
|
||||
denoiserState,
|
||||
denoiserStateSizeInBytes,
|
||||
&gl,
|
||||
&tlayers[0],
|
||||
numLayers,
|
||||
(tiles[0])[t].inputOffsetX,
|
||||
(tiles[0])[t].inputOffsetY,
|
||||
scratch,
|
||||
scratchSizeInBytes))
|
||||
return res;
|
||||
}
|
||||
return OPTIX_SUCCESS;
|
||||
}
|
||||
# endif
|
||||
|
||||
OptiXDenoiser::OptiXDenoiser(Device *path_trace_device, const DenoiseParams ¶ms)
|
||||
: DenoiserGPU(path_trace_device, params), state_(path_trace_device, "__denoiser_state", true)
|
||||
: DeviceDenoiser(path_trace_device, params)
|
||||
{
|
||||
}
|
||||
|
||||
OptiXDenoiser::~OptiXDenoiser()
|
||||
{
|
||||
/* It is important that the OptixDenoiser handle is destroyed before the OptixDeviceContext
|
||||
* handle, which is guaranteed since the local denoising device owning the OptiX device context
|
||||
* is deleted as part of the Denoiser class destructor call after this. */
|
||||
if (optix_denoiser_ != nullptr) {
|
||||
optixDenoiserDestroy(optix_denoiser_);
|
||||
}
|
||||
}
|
||||
|
||||
uint OptiXDenoiser::get_device_type_mask() const
|
||||
{
|
||||
return DEVICE_MASK_OPTIX;
|
||||
}
|
||||
|
||||
class OptiXDenoiser::DenoiseContext {
|
||||
public:
|
||||
explicit DenoiseContext(OptiXDevice *device, const DenoiseTask &task)
|
||||
: denoise_params(task.params),
|
||||
render_buffers(task.render_buffers),
|
||||
buffer_params(task.buffer_params),
|
||||
guiding_buffer(device, "denoiser guiding passes buffer", true),
|
||||
num_samples(task.num_samples)
|
||||
{
|
||||
num_input_passes = 1;
|
||||
if (denoise_params.use_pass_albedo) {
|
||||
num_input_passes += 1;
|
||||
use_pass_albedo = true;
|
||||
pass_denoising_albedo = buffer_params.get_pass_offset(PASS_DENOISING_ALBEDO);
|
||||
if (denoise_params.use_pass_normal) {
|
||||
num_input_passes += 1;
|
||||
use_pass_normal = true;
|
||||
pass_denoising_normal = buffer_params.get_pass_offset(PASS_DENOISING_NORMAL);
|
||||
}
|
||||
}
|
||||
|
||||
if (denoise_params.temporally_stable) {
|
||||
prev_output.device_pointer = render_buffers->buffer.device_pointer;
|
||||
|
||||
prev_output.offset = buffer_params.get_pass_offset(PASS_DENOISING_PREVIOUS);
|
||||
|
||||
prev_output.stride = buffer_params.stride;
|
||||
prev_output.pass_stride = buffer_params.pass_stride;
|
||||
|
||||
num_input_passes += 1;
|
||||
use_pass_motion = true;
|
||||
pass_motion = buffer_params.get_pass_offset(PASS_MOTION);
|
||||
}
|
||||
|
||||
use_guiding_passes = (num_input_passes - 1) > 0;
|
||||
|
||||
if (use_guiding_passes) {
|
||||
if (task.allow_inplace_modification) {
|
||||
guiding_params.device_pointer = render_buffers->buffer.device_pointer;
|
||||
|
||||
guiding_params.pass_albedo = pass_denoising_albedo;
|
||||
guiding_params.pass_normal = pass_denoising_normal;
|
||||
guiding_params.pass_flow = pass_motion;
|
||||
|
||||
guiding_params.stride = buffer_params.stride;
|
||||
guiding_params.pass_stride = buffer_params.pass_stride;
|
||||
}
|
||||
else {
|
||||
guiding_params.pass_stride = 0;
|
||||
if (use_pass_albedo) {
|
||||
guiding_params.pass_albedo = guiding_params.pass_stride;
|
||||
guiding_params.pass_stride += 3;
|
||||
}
|
||||
if (use_pass_normal) {
|
||||
guiding_params.pass_normal = guiding_params.pass_stride;
|
||||
guiding_params.pass_stride += 3;
|
||||
}
|
||||
if (use_pass_motion) {
|
||||
guiding_params.pass_flow = guiding_params.pass_stride;
|
||||
guiding_params.pass_stride += 2;
|
||||
}
|
||||
|
||||
guiding_params.stride = buffer_params.width;
|
||||
|
||||
guiding_buffer.alloc_to_device(buffer_params.width * buffer_params.height *
|
||||
guiding_params.pass_stride);
|
||||
guiding_params.device_pointer = guiding_buffer.device_pointer;
|
||||
}
|
||||
}
|
||||
|
||||
pass_sample_count = buffer_params.get_pass_offset(PASS_SAMPLE_COUNT);
|
||||
}
|
||||
|
||||
const DenoiseParams &denoise_params;
|
||||
|
||||
RenderBuffers *render_buffers = nullptr;
|
||||
const BufferParams &buffer_params;
|
||||
|
||||
/* Previous output. */
|
||||
struct {
|
||||
device_ptr device_pointer = 0;
|
||||
|
||||
int offset = PASS_UNUSED;
|
||||
|
||||
int stride = -1;
|
||||
int pass_stride = -1;
|
||||
} prev_output;
|
||||
|
||||
/* Device-side storage of the guiding passes. */
|
||||
device_only_memory<float> guiding_buffer;
|
||||
|
||||
struct {
|
||||
device_ptr device_pointer = 0;
|
||||
|
||||
/* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
|
||||
int pass_albedo = PASS_UNUSED;
|
||||
int pass_normal = PASS_UNUSED;
|
||||
int pass_flow = PASS_UNUSED;
|
||||
|
||||
int stride = -1;
|
||||
int pass_stride = -1;
|
||||
} guiding_params;
|
||||
|
||||
/* Number of input passes. Including the color and extra auxiliary passes. */
|
||||
int num_input_passes = 0;
|
||||
bool use_guiding_passes = false;
|
||||
bool use_pass_albedo = false;
|
||||
bool use_pass_normal = false;
|
||||
bool use_pass_motion = false;
|
||||
|
||||
int num_samples = 0;
|
||||
|
||||
int pass_sample_count = PASS_UNUSED;
|
||||
|
||||
/* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
|
||||
int pass_denoising_albedo = PASS_UNUSED;
|
||||
int pass_denoising_normal = PASS_UNUSED;
|
||||
int pass_motion = PASS_UNUSED;
|
||||
|
||||
/* For passes which don't need albedo channel for denoising we replace the actual albedo with
|
||||
* the (0.5, 0.5, 0.5). This flag indicates that the real albedo pass has been replaced with
|
||||
* the fake values and denoising of passes which do need albedo can no longer happen. */
|
||||
bool albedo_replaced_with_fake = false;
|
||||
};
|
||||
|
||||
class OptiXDenoiser::DenoisePass {
|
||||
public:
|
||||
DenoisePass(const PassType type, const BufferParams &buffer_params) : type(type)
|
||||
{
|
||||
noisy_offset = buffer_params.get_pass_offset(type, PassMode::NOISY);
|
||||
denoised_offset = buffer_params.get_pass_offset(type, PassMode::DENOISED);
|
||||
|
||||
const PassInfo pass_info = Pass::get_info(type);
|
||||
num_components = pass_info.num_components;
|
||||
use_compositing = pass_info.use_compositing;
|
||||
use_denoising_albedo = pass_info.use_denoising_albedo;
|
||||
}
|
||||
|
||||
PassType type;
|
||||
|
||||
int noisy_offset;
|
||||
int denoised_offset;
|
||||
|
||||
int num_components;
|
||||
bool use_compositing;
|
||||
bool use_denoising_albedo;
|
||||
};
|
||||
|
||||
bool OptiXDenoiser::denoise_buffer(const DenoiseTask &task)
|
||||
{
|
||||
OptiXDevice *const optix_device = static_cast<OptiXDevice *>(denoiser_device_);
|
||||
|
||||
const CUDAContextScope scope(optix_device);
|
||||
|
||||
DenoiseContext context(optix_device, task);
|
||||
|
||||
if (!denoise_ensure(context)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!denoise_filter_guiding_preprocess(context)) {
|
||||
LOG(ERROR) << "Error preprocessing guiding passes.";
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Passes which will use real albedo when it is available. */
|
||||
denoise_pass(context, PASS_COMBINED);
|
||||
denoise_pass(context, PASS_SHADOW_CATCHER_MATTE);
|
||||
|
||||
/* Passes which do not need albedo and hence if real is present it needs to become fake. */
|
||||
denoise_pass(context, PASS_SHADOW_CATCHER);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OptiXDenoiser::denoise_filter_guiding_preprocess(const DenoiseContext &context)
|
||||
{
|
||||
const BufferParams &buffer_params = context.buffer_params;
|
||||
|
||||
const int work_size = buffer_params.width * buffer_params.height;
|
||||
|
||||
DeviceKernelArguments args(&context.guiding_params.device_pointer,
|
||||
&context.guiding_params.pass_stride,
|
||||
&context.guiding_params.pass_albedo,
|
||||
&context.guiding_params.pass_normal,
|
||||
&context.guiding_params.pass_flow,
|
||||
&context.render_buffers->buffer.device_pointer,
|
||||
&buffer_params.offset,
|
||||
&buffer_params.stride,
|
||||
&buffer_params.pass_stride,
|
||||
&context.pass_sample_count,
|
||||
&context.pass_denoising_albedo,
|
||||
&context.pass_denoising_normal,
|
||||
&context.pass_motion,
|
||||
&buffer_params.full_x,
|
||||
&buffer_params.full_y,
|
||||
&buffer_params.width,
|
||||
&buffer_params.height,
|
||||
&context.num_samples);
|
||||
|
||||
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS, work_size, args);
|
||||
}
|
||||
|
||||
bool OptiXDenoiser::denoise_filter_guiding_set_fake_albedo(const DenoiseContext &context)
|
||||
{
|
||||
const BufferParams &buffer_params = context.buffer_params;
|
||||
|
||||
const int work_size = buffer_params.width * buffer_params.height;
|
||||
|
||||
DeviceKernelArguments args(&context.guiding_params.device_pointer,
|
||||
&context.guiding_params.pass_stride,
|
||||
&context.guiding_params.pass_albedo,
|
||||
&buffer_params.width,
|
||||
&buffer_params.height);
|
||||
|
||||
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO, work_size, args);
|
||||
}
|
||||
|
||||
void OptiXDenoiser::denoise_pass(DenoiseContext &context, PassType pass_type)
|
||||
{
|
||||
const BufferParams &buffer_params = context.buffer_params;
|
||||
|
||||
const DenoisePass pass(pass_type, buffer_params);
|
||||
|
||||
if (pass.noisy_offset == PASS_UNUSED) {
|
||||
return;
|
||||
}
|
||||
if (pass.denoised_offset == PASS_UNUSED) {
|
||||
LOG(DFATAL) << "Missing denoised pass " << pass_type_as_string(pass_type);
|
||||
return;
|
||||
}
|
||||
|
||||
if (pass.use_denoising_albedo) {
|
||||
if (context.albedo_replaced_with_fake) {
|
||||
LOG(ERROR) << "Pass which requires albedo is denoised after fake albedo has been set.";
|
||||
return;
|
||||
}
|
||||
}
|
||||
else if (context.use_guiding_passes && !context.albedo_replaced_with_fake) {
|
||||
context.albedo_replaced_with_fake = true;
|
||||
if (!denoise_filter_guiding_set_fake_albedo(context)) {
|
||||
LOG(ERROR) << "Error replacing real albedo with the fake one.";
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Read and preprocess noisy color input pass. */
|
||||
denoise_color_read(context, pass);
|
||||
if (!denoise_filter_color_preprocess(context, pass)) {
|
||||
LOG(ERROR) << "Error converting denoising passes to RGB buffer.";
|
||||
return;
|
||||
}
|
||||
|
||||
if (!denoise_run(context, pass)) {
|
||||
LOG(ERROR) << "Error running OptiX denoiser.";
|
||||
return;
|
||||
}
|
||||
|
||||
/* Store result in the combined pass of the render buffer.
|
||||
*
|
||||
* This will scale the denoiser result up to match the number of, possibly per-pixel, samples. */
|
||||
if (!denoise_filter_color_postprocess(context, pass)) {
|
||||
LOG(ERROR) << "Error copying denoiser result to the denoised pass.";
|
||||
return;
|
||||
}
|
||||
|
||||
denoiser_queue_->synchronize();
|
||||
}
|
||||
|
||||
void OptiXDenoiser::denoise_color_read(const DenoiseContext &context, const DenoisePass &pass)
|
||||
{
|
||||
PassAccessor::PassAccessInfo pass_access_info;
|
||||
pass_access_info.type = pass.type;
|
||||
pass_access_info.mode = PassMode::NOISY;
|
||||
pass_access_info.offset = pass.noisy_offset;
|
||||
|
||||
/* Denoiser operates on passes which are used to calculate the approximation, and is never used
|
||||
* on the approximation. The latter is not even possible because OptiX does not support
|
||||
* denoising of semi-transparent pixels. */
|
||||
pass_access_info.use_approximate_shadow_catcher = false;
|
||||
pass_access_info.use_approximate_shadow_catcher_background = false;
|
||||
pass_access_info.show_active_pixels = false;
|
||||
|
||||
/* TODO(sergey): Consider adding support of actual exposure, to avoid clamping in extreme cases.
|
||||
*/
|
||||
const PassAccessorGPU pass_accessor(
|
||||
denoiser_queue_.get(), pass_access_info, 1.0f, context.num_samples);
|
||||
|
||||
PassAccessor::Destination destination(pass_access_info.type);
|
||||
destination.d_pixels = context.render_buffers->buffer.device_pointer +
|
||||
pass.denoised_offset * sizeof(float);
|
||||
destination.num_components = 3;
|
||||
destination.pixel_stride = context.buffer_params.pass_stride;
|
||||
|
||||
BufferParams buffer_params = context.buffer_params;
|
||||
buffer_params.window_x = 0;
|
||||
buffer_params.window_y = 0;
|
||||
buffer_params.window_width = buffer_params.width;
|
||||
buffer_params.window_height = buffer_params.height;
|
||||
|
||||
pass_accessor.get_render_tile_pixels(context.render_buffers, buffer_params, destination);
|
||||
}
|
||||
|
||||
bool OptiXDenoiser::denoise_filter_color_preprocess(const DenoiseContext &context,
|
||||
const DenoisePass &pass)
|
||||
{
|
||||
const BufferParams &buffer_params = context.buffer_params;
|
||||
|
||||
const int work_size = buffer_params.width * buffer_params.height;
|
||||
|
||||
DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
|
||||
&buffer_params.full_x,
|
||||
&buffer_params.full_y,
|
||||
&buffer_params.width,
|
||||
&buffer_params.height,
|
||||
&buffer_params.offset,
|
||||
&buffer_params.stride,
|
||||
&buffer_params.pass_stride,
|
||||
&pass.denoised_offset);
|
||||
|
||||
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_COLOR_PREPROCESS, work_size, args);
|
||||
}
|
||||
|
||||
bool OptiXDenoiser::denoise_filter_color_postprocess(const DenoiseContext &context,
|
||||
const DenoisePass &pass)
|
||||
{
|
||||
const BufferParams &buffer_params = context.buffer_params;
|
||||
|
||||
const int work_size = buffer_params.width * buffer_params.height;
|
||||
|
||||
DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
|
||||
&buffer_params.full_x,
|
||||
&buffer_params.full_y,
|
||||
&buffer_params.width,
|
||||
&buffer_params.height,
|
||||
&buffer_params.offset,
|
||||
&buffer_params.stride,
|
||||
&buffer_params.pass_stride,
|
||||
&context.num_samples,
|
||||
&pass.noisy_offset,
|
||||
&pass.denoised_offset,
|
||||
&context.pass_sample_count,
|
||||
&pass.num_components,
|
||||
&pass.use_compositing);
|
||||
|
||||
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS, work_size, args);
|
||||
}
|
||||
|
||||
bool OptiXDenoiser::denoise_ensure(DenoiseContext &context)
|
||||
{
|
||||
if (!denoise_create_if_needed(context)) {
|
||||
LOG(ERROR) << "OptiX denoiser creation has failed.";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!denoise_configure_if_needed(context)) {
|
||||
LOG(ERROR) << "OptiX denoiser configuration has failed.";
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OptiXDenoiser::denoise_create_if_needed(DenoiseContext &context)
|
||||
{
|
||||
const bool recreate_denoiser = (optix_denoiser_ == nullptr) ||
|
||||
(use_pass_albedo_ != context.use_pass_albedo) ||
|
||||
(use_pass_normal_ != context.use_pass_normal) ||
|
||||
(use_pass_motion_ != context.use_pass_motion);
|
||||
if (!recreate_denoiser) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Destroy existing handle before creating new one. */
|
||||
if (optix_denoiser_) {
|
||||
optixDenoiserDestroy(optix_denoiser_);
|
||||
}
|
||||
|
||||
/* Create OptiX denoiser handle on demand when it is first used. */
|
||||
OptixDenoiserOptions denoiser_options = {};
|
||||
denoiser_options.guideAlbedo = context.use_pass_albedo;
|
||||
denoiser_options.guideNormal = context.use_pass_normal;
|
||||
|
||||
OptixDenoiserModelKind model = OPTIX_DENOISER_MODEL_KIND_HDR;
|
||||
if (context.use_pass_motion) {
|
||||
model = OPTIX_DENOISER_MODEL_KIND_TEMPORAL;
|
||||
}
|
||||
|
||||
const OptixResult result = optixDenoiserCreate(
|
||||
static_cast<OptiXDevice *>(denoiser_device_)->context,
|
||||
model,
|
||||
&denoiser_options,
|
||||
&optix_denoiser_);
|
||||
|
||||
if (result != OPTIX_SUCCESS) {
|
||||
denoiser_device_->set_error("Failed to create OptiX denoiser");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* OptiX denoiser handle was created with the requested number of input passes. */
|
||||
use_pass_albedo_ = context.use_pass_albedo;
|
||||
use_pass_normal_ = context.use_pass_normal;
|
||||
use_pass_motion_ = context.use_pass_motion;
|
||||
|
||||
/* OptiX denoiser has been created, but it needs configuration. */
|
||||
is_configured_ = false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OptiXDenoiser::denoise_configure_if_needed(DenoiseContext &context)
|
||||
{
|
||||
/* Limit maximum tile size denoiser can be invoked with. */
|
||||
const int2 tile_size = make_int2(min(context.buffer_params.width, 4096),
|
||||
min(context.buffer_params.height, 4096));
|
||||
|
||||
if (is_configured_ && (configured_size_.x == tile_size.x && configured_size_.y == tile_size.y)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
optix_device_assert(
|
||||
denoiser_device_,
|
||||
optixDenoiserComputeMemoryResources(optix_denoiser_, tile_size.x, tile_size.y, &sizes_));
|
||||
|
||||
/* Allocate denoiser state if tile size has changed since last setup. */
|
||||
state_.device = denoiser_device_;
|
||||
state_.alloc_to_device(sizes_.stateSizeInBytes + sizes_.withOverlapScratchSizeInBytes);
|
||||
|
||||
/* Initialize denoiser state for the current tile size. */
|
||||
const OptixResult result = optixDenoiserSetup(
|
||||
optix_denoiser_,
|
||||
0, /* Work around bug in r495 drivers that causes artifacts when denoiser setup is called
|
||||
* on a stream that is not the default stream. */
|
||||
tile_size.x + sizes_.overlapWindowSizeInPixels * 2,
|
||||
tile_size.y + sizes_.overlapWindowSizeInPixels * 2,
|
||||
state_.device_pointer,
|
||||
sizes_.stateSizeInBytes,
|
||||
state_.device_pointer + sizes_.stateSizeInBytes,
|
||||
sizes_.withOverlapScratchSizeInBytes);
|
||||
if (result != OPTIX_SUCCESS) {
|
||||
denoiser_device_->set_error("Failed to set up OptiX denoiser");
|
||||
return false;
|
||||
}
|
||||
|
||||
cuda_device_assert(denoiser_device_, cuCtxSynchronize());
|
||||
|
||||
is_configured_ = true;
|
||||
configured_size_ = tile_size;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OptiXDenoiser::denoise_run(const DenoiseContext &context, const DenoisePass &pass)
|
||||
{
|
||||
const BufferParams &buffer_params = context.buffer_params;
|
||||
const int width = buffer_params.width;
|
||||
const int height = buffer_params.height;
|
||||
|
||||
/* Set up input and output layer information. */
|
||||
OptixImage2D color_layer = {0};
|
||||
OptixImage2D albedo_layer = {0};
|
||||
OptixImage2D normal_layer = {0};
|
||||
OptixImage2D flow_layer = {0};
|
||||
|
||||
OptixImage2D output_layer = {0};
|
||||
OptixImage2D prev_output_layer = {0};
|
||||
|
||||
/* Color pass. */
|
||||
{
|
||||
const int pass_denoised = pass.denoised_offset;
|
||||
const int64_t pass_stride_in_bytes = context.buffer_params.pass_stride * sizeof(float);
|
||||
|
||||
color_layer.data = context.render_buffers->buffer.device_pointer +
|
||||
pass_denoised * sizeof(float);
|
||||
color_layer.width = width;
|
||||
color_layer.height = height;
|
||||
color_layer.rowStrideInBytes = pass_stride_in_bytes * context.buffer_params.stride;
|
||||
color_layer.pixelStrideInBytes = pass_stride_in_bytes;
|
||||
color_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
|
||||
}
|
||||
|
||||
/* Previous output. */
|
||||
if (context.prev_output.offset != PASS_UNUSED) {
|
||||
const int64_t pass_stride_in_bytes = context.prev_output.pass_stride * sizeof(float);
|
||||
|
||||
prev_output_layer.data = context.prev_output.device_pointer +
|
||||
context.prev_output.offset * sizeof(float);
|
||||
prev_output_layer.width = width;
|
||||
prev_output_layer.height = height;
|
||||
prev_output_layer.rowStrideInBytes = pass_stride_in_bytes * context.prev_output.stride;
|
||||
prev_output_layer.pixelStrideInBytes = pass_stride_in_bytes;
|
||||
prev_output_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
|
||||
}
|
||||
|
||||
/* Optional albedo and color passes. */
|
||||
if (context.num_input_passes > 1) {
|
||||
const device_ptr d_guiding_buffer = context.guiding_params.device_pointer;
|
||||
const int64_t pixel_stride_in_bytes = context.guiding_params.pass_stride * sizeof(float);
|
||||
const int64_t row_stride_in_bytes = context.guiding_params.stride * pixel_stride_in_bytes;
|
||||
|
||||
if (context.use_pass_albedo) {
|
||||
albedo_layer.data = d_guiding_buffer + context.guiding_params.pass_albedo * sizeof(float);
|
||||
albedo_layer.width = width;
|
||||
albedo_layer.height = height;
|
||||
albedo_layer.rowStrideInBytes = row_stride_in_bytes;
|
||||
albedo_layer.pixelStrideInBytes = pixel_stride_in_bytes;
|
||||
albedo_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
|
||||
}
|
||||
|
||||
if (context.use_pass_normal) {
|
||||
normal_layer.data = d_guiding_buffer + context.guiding_params.pass_normal * sizeof(float);
|
||||
normal_layer.width = width;
|
||||
normal_layer.height = height;
|
||||
normal_layer.rowStrideInBytes = row_stride_in_bytes;
|
||||
normal_layer.pixelStrideInBytes = pixel_stride_in_bytes;
|
||||
normal_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
|
||||
}
|
||||
|
||||
if (context.use_pass_motion) {
|
||||
flow_layer.data = d_guiding_buffer + context.guiding_params.pass_flow * sizeof(float);
|
||||
flow_layer.width = width;
|
||||
flow_layer.height = height;
|
||||
flow_layer.rowStrideInBytes = row_stride_in_bytes;
|
||||
flow_layer.pixelStrideInBytes = pixel_stride_in_bytes;
|
||||
flow_layer.format = OPTIX_PIXEL_FORMAT_FLOAT2;
|
||||
}
|
||||
}
|
||||
|
||||
/* Denoise in-place of the noisy input in the render buffers. */
|
||||
output_layer = color_layer;
|
||||
|
||||
OptixDenoiserGuideLayer guide_layers = {};
|
||||
guide_layers.albedo = albedo_layer;
|
||||
guide_layers.normal = normal_layer;
|
||||
guide_layers.flow = flow_layer;
|
||||
|
||||
OptixDenoiserLayer image_layers = {};
|
||||
image_layers.input = color_layer;
|
||||
image_layers.previousOutput = prev_output_layer;
|
||||
image_layers.output = output_layer;
|
||||
|
||||
/* Finally run denoising. */
|
||||
OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */
|
||||
|
||||
optix_device_assert(denoiser_device_,
|
||||
ccl::optixUtilDenoiserInvokeTiled(
|
||||
optix_denoiser_,
|
||||
static_cast<OptiXDeviceQueue *>(denoiser_queue_.get())->stream(),
|
||||
¶ms,
|
||||
state_.device_pointer,
|
||||
sizes_.stateSizeInBytes,
|
||||
&guide_layers,
|
||||
&image_layers,
|
||||
1,
|
||||
state_.device_pointer + sizes_.stateSizeInBytes,
|
||||
sizes_.withOverlapScratchSizeInBytes,
|
||||
sizes_.overlapWindowSizeInPixels,
|
||||
configured_size_.x,
|
||||
configured_size_.y));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
@@ -3,84 +3,16 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef WITH_OPTIX
|
||||
|
||||
# include "integrator/denoiser_gpu.h"
|
||||
|
||||
# include "device/optix/util.h"
|
||||
#include "integrator/denoiser_device.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Implementation of denoising API which uses the OptiX denoiser. */
|
||||
class OptiXDenoiser : public DenoiserGPU {
|
||||
class OptiXDenoiser : public DeviceDenoiser {
|
||||
public:
|
||||
OptiXDenoiser(Device *path_trace_device, const DenoiseParams ¶ms);
|
||||
~OptiXDenoiser();
|
||||
|
||||
protected:
|
||||
virtual uint get_device_type_mask() const override;
|
||||
|
||||
private:
|
||||
class DenoiseContext;
|
||||
class DenoisePass;
|
||||
|
||||
virtual bool denoise_buffer(const DenoiseTask &task) override;
|
||||
|
||||
/* Read guiding passes from the render buffers, preprocess them in a way which is expected by
|
||||
* OptiX and store in the guiding passes memory within the given context.
|
||||
*
|
||||
* Pre-processing of the guiding passes is to only happen once per context lifetime. DO not
|
||||
* preprocess them for every pass which is being denoised. */
|
||||
bool denoise_filter_guiding_preprocess(const DenoiseContext &context);
|
||||
|
||||
/* Set fake albedo pixels in the albedo guiding pass storage.
|
||||
* After this point only passes which do not need albedo for denoising can be processed. */
|
||||
bool denoise_filter_guiding_set_fake_albedo(const DenoiseContext &context);
|
||||
|
||||
void denoise_pass(DenoiseContext &context, PassType pass_type);
|
||||
|
||||
/* Read input color pass from the render buffer into the memory which corresponds to the noisy
|
||||
* input within the given context. Pixels are scaled to the number of samples, but are not
|
||||
* preprocessed yet. */
|
||||
void denoise_color_read(const DenoiseContext &context, const DenoisePass &pass);
|
||||
|
||||
/* Run corresponding filter kernels, preparing data for the denoiser or copying data from the
|
||||
* denoiser result to the render buffer. */
|
||||
bool denoise_filter_color_preprocess(const DenoiseContext &context, const DenoisePass &pass);
|
||||
bool denoise_filter_color_postprocess(const DenoiseContext &context, const DenoisePass &pass);
|
||||
|
||||
/* Make sure the OptiX denoiser is created and configured. */
|
||||
bool denoise_ensure(DenoiseContext &context);
|
||||
|
||||
/* Create OptiX denoiser descriptor if needed.
|
||||
* Will do nothing if the current OptiX descriptor is usable for the given parameters.
|
||||
* If the OptiX denoiser descriptor did re-allocate here it is left unconfigured. */
|
||||
bool denoise_create_if_needed(DenoiseContext &context);
|
||||
|
||||
/* Configure existing OptiX denoiser descriptor for the use for the given task. */
|
||||
bool denoise_configure_if_needed(DenoiseContext &context);
|
||||
|
||||
/* Run configured denoiser. */
|
||||
bool denoise_run(const DenoiseContext &context, const DenoisePass &pass);
|
||||
|
||||
OptixDenoiser optix_denoiser_ = nullptr;
|
||||
|
||||
/* Configuration size, as provided to `optixDenoiserSetup`.
|
||||
* If the `optixDenoiserSetup()` was never used on the current `optix_denoiser` the
|
||||
* `is_configured` will be false. */
|
||||
bool is_configured_ = false;
|
||||
int2 configured_size_ = make_int2(0, 0);
|
||||
|
||||
/* OptiX denoiser state and scratch buffers, stored in a single memory buffer.
|
||||
* The memory layout goes as following: [denoiser state][scratch buffer]. */
|
||||
device_only_memory<unsigned char> state_;
|
||||
OptixDenoiserSizes sizes_ = {};
|
||||
|
||||
bool use_pass_albedo_ = false;
|
||||
bool use_pass_normal_ = false;
|
||||
bool use_pass_motion_ = false;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
@@ -37,14 +37,6 @@ set(SRC_KERNEL_DEVICE_OPTIX
|
||||
device/optix/kernel_shader_raytrace.cu
|
||||
)
|
||||
|
||||
if(WITH_CYCLES_OSL AND (OSL_LIBRARY_VERSION_MINOR GREATER_EQUAL 13 OR OSL_LIBRARY_VERSION_MAJOR GREATER 1))
|
||||
set(SRC_KERNEL_DEVICE_OPTIX
|
||||
${SRC_KERNEL_DEVICE_OPTIX}
|
||||
osl/services_optix.cu
|
||||
device/optix/kernel_osl.cu
|
||||
)
|
||||
endif()
|
||||
|
||||
set(SRC_KERNEL_DEVICE_ONEAPI
|
||||
device/oneapi/kernel.cpp
|
||||
)
|
||||
@@ -189,16 +181,6 @@ set(SRC_KERNEL_SVM_HEADERS
|
||||
svm/vertex_color.h
|
||||
)
|
||||
|
||||
if(WITH_CYCLES_OSL)
|
||||
set(SRC_KERNEL_OSL_HEADERS
|
||||
osl/osl.h
|
||||
osl/closures_setup.h
|
||||
osl/closures_template.h
|
||||
osl/services_gpu.h
|
||||
osl/types.h
|
||||
)
|
||||
endif()
|
||||
|
||||
set(SRC_KERNEL_GEOM_HEADERS
|
||||
geom/geom.h
|
||||
geom/attribute.h
|
||||
@@ -285,17 +267,10 @@ set(SRC_KERNEL_INTEGRATOR_HEADERS
|
||||
)
|
||||
|
||||
set(SRC_KERNEL_LIGHT_HEADERS
|
||||
light/area.h
|
||||
light/light.h
|
||||
light/background.h
|
||||
light/common.h
|
||||
light/distant.h
|
||||
light/distribution.h
|
||||
light/light.h
|
||||
light/point.h
|
||||
light/sample.h
|
||||
light/spot.h
|
||||
light/tree.h
|
||||
light/triangle.h
|
||||
)
|
||||
|
||||
set(SRC_KERNEL_SAMPLE_HEADERS
|
||||
@@ -331,7 +306,6 @@ set(SRC_KERNEL_HEADERS
|
||||
${SRC_KERNEL_GEOM_HEADERS}
|
||||
${SRC_KERNEL_INTEGRATOR_HEADERS}
|
||||
${SRC_KERNEL_LIGHT_HEADERS}
|
||||
${SRC_KERNEL_OSL_HEADERS}
|
||||
${SRC_KERNEL_SAMPLE_HEADERS}
|
||||
${SRC_KERNEL_SVM_HEADERS}
|
||||
${SRC_KERNEL_TYPES_HEADERS}
|
||||
@@ -473,29 +447,54 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
||||
|
||||
if(WITH_CYCLES_DEBUG)
|
||||
set(cuda_flags ${cuda_flags} -D WITH_CYCLES_DEBUG)
|
||||
set(cuda_flags ${cuda_flags} --ptxas-options="-v")
|
||||
endif()
|
||||
|
||||
set(_cuda_nvcc_args
|
||||
-arch=${arch}
|
||||
${CUDA_NVCC_FLAGS}
|
||||
--${format}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
|
||||
${cuda_flags})
|
||||
if(WITH_CYCLES_CUBIN_COMPILER)
|
||||
string(SUBSTRING ${arch} 3 -1 CUDA_ARCH)
|
||||
|
||||
# Needed to find libnvrtc-builtins.so. Can't do it from inside
|
||||
# cycles_cubin_cc since the env variable is read before main()
|
||||
if(APPLE)
|
||||
set(CUBIN_CC_ENV ${CMAKE_COMMAND}
|
||||
-E env DYLD_LIBRARY_PATH="${cuda_toolkit_root_dir}/lib")
|
||||
elseif(UNIX)
|
||||
set(CUBIN_CC_ENV ${CMAKE_COMMAND}
|
||||
-E env LD_LIBRARY_PATH="${cuda_toolkit_root_dir}/lib64")
|
||||
endif()
|
||||
|
||||
if(WITH_COMPILER_CCACHE AND CCACHE_PROGRAM)
|
||||
add_custom_command(
|
||||
OUTPUT ${cuda_file}
|
||||
COMMAND ${CCACHE_PROGRAM} ${cuda_nvcc_executable} ${_cuda_nvcc_args}
|
||||
DEPENDS ${kernel_sources})
|
||||
COMMAND ${CUBIN_CC_ENV}
|
||||
"$<TARGET_FILE:cycles_cubin_cc>"
|
||||
-target ${CUDA_ARCH}
|
||||
-i ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
|
||||
${cuda_flags}
|
||||
-v
|
||||
-cuda-toolkit-dir "${cuda_toolkit_root_dir}"
|
||||
DEPENDS ${kernel_sources} cycles_cubin_cc)
|
||||
else()
|
||||
add_custom_command(
|
||||
OUTPUT ${cuda_file}
|
||||
COMMAND ${cuda_nvcc_executable} ${_cuda_nvcc_args}
|
||||
DEPENDS ${kernel_sources})
|
||||
endif()
|
||||
set(_cuda_nvcc_args
|
||||
-arch=${arch}
|
||||
${CUDA_NVCC_FLAGS}
|
||||
--${format}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
|
||||
--ptxas-options="-v"
|
||||
${cuda_flags})
|
||||
|
||||
unset(_cuda_nvcc_args)
|
||||
if(WITH_COMPILER_CCACHE AND CCACHE_PROGRAM)
|
||||
add_custom_command(
|
||||
OUTPUT ${cuda_file}
|
||||
COMMAND ${CCACHE_PROGRAM} ${cuda_nvcc_executable} ${_cuda_nvcc_args}
|
||||
DEPENDS ${kernel_sources})
|
||||
else()
|
||||
add_custom_command(
|
||||
OUTPUT ${cuda_file}
|
||||
COMMAND ${cuda_nvcc_executable} ${_cuda_nvcc_args}
|
||||
DEPENDS ${kernel_sources})
|
||||
endif()
|
||||
|
||||
unset(_cuda_nvcc_args)
|
||||
endif()
|
||||
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_file}" ${CYCLES_INSTALL_PATH}/lib)
|
||||
list(APPEND cuda_cubins ${cuda_file})
|
||||
|
||||
@@ -571,22 +570,13 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
|
||||
if(WIN32)
|
||||
set(hip_command ${CMAKE_COMMAND})
|
||||
set(hip_flags
|
||||
-E env "HIP_PATH=${HIP_ROOT_DIR}"
|
||||
-E env "HIP_PATH=${HIP_ROOT_DIR}" "PATH=${HIP_PERL_DIR}"
|
||||
${HIP_HIPCC_EXECUTABLE}.bat)
|
||||
else()
|
||||
set(hip_command ${HIP_HIPCC_EXECUTABLE})
|
||||
set(hip_flags)
|
||||
endif()
|
||||
|
||||
# There's a bug in the compiler causing some scenes to fail to render on Vega cards
|
||||
# A workaround currently is to set -O1 opt level during kernel compilation for these
|
||||
# cards Remove this when a newer compiler is available with fixes.
|
||||
if(WIN32 AND (${arch} MATCHES "gfx90[a-z0-9]+"))
|
||||
set(hip_opt_flags "-O1")
|
||||
else()
|
||||
set(hip_opt_flags)
|
||||
endif()
|
||||
|
||||
set(hip_flags
|
||||
${hip_flags}
|
||||
--amdgpu-target=${arch}
|
||||
@@ -603,7 +593,6 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
|
||||
-Wno-unused-value
|
||||
--hipcc-func-supp
|
||||
-ffast-math
|
||||
${hip_opt_flags}
|
||||
-o ${CMAKE_CURRENT_BINARY_DIR}/${hip_file})
|
||||
|
||||
if(WITH_NANOVDB)
|
||||
@@ -657,25 +646,55 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
|
||||
set(cuda_flags ${cuda_flags} -D WITH_CYCLES_DEBUG)
|
||||
endif()
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT
|
||||
${output}
|
||||
DEPENDS
|
||||
${input}
|
||||
${SRC_KERNEL_HEADERS}
|
||||
${SRC_KERNEL_DEVICE_GPU_HEADERS}
|
||||
${SRC_KERNEL_DEVICE_CUDA_HEADERS}
|
||||
${SRC_KERNEL_DEVICE_OPTIX_HEADERS}
|
||||
${SRC_UTIL_HEADERS}
|
||||
COMMAND
|
||||
${CUDA_NVCC_EXECUTABLE}
|
||||
--ptx
|
||||
-arch=sm_50
|
||||
${cuda_flags}
|
||||
${input}
|
||||
WORKING_DIRECTORY
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
if(WITH_CYCLES_CUBIN_COMPILER)
|
||||
# Needed to find libnvrtc-builtins.so. Can't do it from inside
|
||||
# cycles_cubin_cc since the env variable is read before main()
|
||||
if(APPLE)
|
||||
set(CUBIN_CC_ENV ${CMAKE_COMMAND}
|
||||
-E env DYLD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib")
|
||||
elseif(UNIX)
|
||||
set(CUBIN_CC_ENV ${CMAKE_COMMAND}
|
||||
-E env LD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib64")
|
||||
endif()
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${output}
|
||||
DEPENDS
|
||||
${input}
|
||||
${SRC_KERNEL_HEADERS}
|
||||
${SRC_KERNEL_DEVICE_GPU_HEADERS}
|
||||
${SRC_KERNEL_DEVICE_CUDA_HEADERS}
|
||||
${SRC_KERNEL_DEVICE_OPTIX_HEADERS}
|
||||
${SRC_UTIL_HEADERS}
|
||||
COMMAND ${CUBIN_CC_ENV}
|
||||
"$<TARGET_FILE:cycles_cubin_cc>"
|
||||
-target 50
|
||||
-ptx
|
||||
-i ${CMAKE_CURRENT_SOURCE_DIR}/${input}
|
||||
${cuda_flags}
|
||||
-v
|
||||
-cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}"
|
||||
DEPENDS ${kernel_sources} cycles_cubin_cc)
|
||||
else()
|
||||
add_custom_command(
|
||||
OUTPUT
|
||||
${output}
|
||||
DEPENDS
|
||||
${input}
|
||||
${SRC_KERNEL_HEADERS}
|
||||
${SRC_KERNEL_DEVICE_GPU_HEADERS}
|
||||
${SRC_KERNEL_DEVICE_CUDA_HEADERS}
|
||||
${SRC_KERNEL_DEVICE_OPTIX_HEADERS}
|
||||
${SRC_UTIL_HEADERS}
|
||||
COMMAND
|
||||
${CUDA_NVCC_EXECUTABLE}
|
||||
--ptx
|
||||
-arch=sm_50
|
||||
${cuda_flags}
|
||||
${input}
|
||||
WORKING_DIRECTORY
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
endif()
|
||||
list(APPEND optix_ptx ${output})
|
||||
|
||||
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${output}" ${CYCLES_INSTALL_PATH}/lib)
|
||||
@@ -689,16 +708,6 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
|
||||
kernel_optix_shader_raytrace
|
||||
"device/optix/kernel_shader_raytrace.cu"
|
||||
"--keep-device-functions")
|
||||
if(WITH_CYCLES_OSL AND (OSL_LIBRARY_VERSION_MINOR GREATER_EQUAL 13 OR OSL_LIBRARY_VERSION_MAJOR GREATER 1))
|
||||
CYCLES_OPTIX_KERNEL_ADD(
|
||||
kernel_optix_osl
|
||||
"device/optix/kernel_osl.cu"
|
||||
"--relocatable-device-code=true")
|
||||
CYCLES_OPTIX_KERNEL_ADD(
|
||||
kernel_optix_osl_services
|
||||
"osl/services_optix.cu"
|
||||
"--relocatable-device-code=true")
|
||||
endif()
|
||||
|
||||
add_custom_target(cycles_kernel_optix ALL DEPENDS ${optix_ptx})
|
||||
cycles_set_solution_folder(cycles_kernel_optix)
|
||||
@@ -986,7 +995,6 @@ source_group("geom" FILES ${SRC_KERNEL_GEOM_HEADERS})
|
||||
source_group("integrator" FILES ${SRC_KERNEL_INTEGRATOR_HEADERS})
|
||||
source_group("kernel" FILES ${SRC_KERNEL_TYPES_HEADERS})
|
||||
source_group("light" FILES ${SRC_KERNEL_LIGHT_HEADERS})
|
||||
source_group("osl" FILES ${SRC_KERNEL_OSL_HEADERS})
|
||||
source_group("sample" FILES ${SRC_KERNEL_SAMPLE_HEADERS})
|
||||
source_group("svm" FILES ${SRC_KERNEL_SVM_HEADERS})
|
||||
source_group("util" FILES ${SRC_KERNEL_UTIL_HEADERS})
|
||||
@@ -1023,7 +1031,6 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_FILM_HEADERS}" ${CYCLE
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_GEOM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/geom)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_INTEGRATOR_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/integrator)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_LIGHT_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/light)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_OSL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/osl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_SAMPLE_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/sample)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_SVM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/svm)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_TYPES_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel)
|
||||
|
@@ -297,10 +297,8 @@ ccl_device_inline void bsdf_roughness_eta(const KernelGlobals kg,
|
||||
ccl_private float2 *roughness,
|
||||
ccl_private float *eta)
|
||||
{
|
||||
#ifdef __SVM__
|
||||
bool refractive = false;
|
||||
float alpha = 1.0f;
|
||||
#endif
|
||||
switch (sc->type) {
|
||||
case CLOSURE_BSDF_DIFFUSE_ID:
|
||||
*roughness = one_float2();
|
||||
|
@@ -69,7 +69,7 @@ ccl_device int bsdf_diffuse_sample(ccl_private const ShaderClosure *sc,
|
||||
ccl_device int bsdf_translucent_setup(ccl_private DiffuseBsdf *bsdf)
|
||||
{
|
||||
bsdf->type = CLOSURE_BSDF_TRANSLUCENT_ID;
|
||||
return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_HAS_TRANSMISSION;
|
||||
return SD_BSDF | SD_BSDF_HAS_EVAL;
|
||||
}
|
||||
|
||||
ccl_device Spectrum bsdf_translucent_eval(ccl_private const ShaderClosure *sc,
|
||||
|
@@ -34,7 +34,7 @@ ccl_device int bsdf_hair_transmission_setup(ccl_private HairBsdf *bsdf)
|
||||
bsdf->type = CLOSURE_BSDF_HAIR_TRANSMISSION_ID;
|
||||
bsdf->roughness1 = clamp(bsdf->roughness1, 0.001f, 1.0f);
|
||||
bsdf->roughness2 = clamp(bsdf->roughness2, 0.001f, 1.0f);
|
||||
return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_HAS_TRANSMISSION;
|
||||
return SD_BSDF | SD_BSDF_HAS_EVAL;
|
||||
}
|
||||
|
||||
ccl_device Spectrum bsdf_hair_reflection_eval(ccl_private const ShaderClosure *sc,
|
||||
|
@@ -196,7 +196,7 @@ ccl_device int bsdf_principled_hair_setup(ccl_private ShaderData *sd,
|
||||
|
||||
bsdf->extra->geom = make_float4(Y.x, Y.y, Y.z, h);
|
||||
|
||||
return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG | SD_BSDF_HAS_TRANSMISSION;
|
||||
return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG;
|
||||
}
|
||||
|
||||
#endif /* __HAIR__ */
|
||||
|
@@ -346,7 +346,7 @@ ccl_device int bsdf_microfacet_ggx_refraction_setup(ccl_private MicrofacetBsdf *
|
||||
|
||||
bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
|
||||
|
||||
return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_HAS_TRANSMISSION;
|
||||
return SD_BSDF | SD_BSDF_HAS_EVAL;
|
||||
}
|
||||
|
||||
ccl_device void bsdf_microfacet_ggx_blur(ccl_private ShaderClosure *sc, float roughness)
|
||||
@@ -776,7 +776,7 @@ ccl_device int bsdf_microfacet_beckmann_refraction_setup(ccl_private MicrofacetB
|
||||
bsdf->alpha_y = bsdf->alpha_x;
|
||||
|
||||
bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
|
||||
return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_HAS_TRANSMISSION;
|
||||
return SD_BSDF | SD_BSDF_HAS_EVAL;
|
||||
}
|
||||
|
||||
ccl_device void bsdf_microfacet_beckmann_blur(ccl_private ShaderClosure *sc, float roughness)
|
||||
|
@@ -559,7 +559,7 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_setup(ccl_private MicrofacetBsdf
|
||||
|
||||
bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID;
|
||||
|
||||
return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG | SD_BSDF_HAS_TRANSMISSION;
|
||||
return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG;
|
||||
}
|
||||
|
||||
ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(ccl_private MicrofacetBsdf *bsdf,
|
||||
|
@@ -60,13 +60,6 @@ KERNEL_DATA_ARRAY(KernelLight, lights)
|
||||
KERNEL_DATA_ARRAY(float2, light_background_marginal_cdf)
|
||||
KERNEL_DATA_ARRAY(float2, light_background_conditional_cdf)
|
||||
|
||||
/* light tree */
|
||||
KERNEL_DATA_ARRAY(KernelLightTreeNode, light_tree_nodes)
|
||||
KERNEL_DATA_ARRAY(KernelLightTreeEmitter, light_tree_emitters)
|
||||
KERNEL_DATA_ARRAY(uint, light_to_tree)
|
||||
KERNEL_DATA_ARRAY(uint, object_lookup_offset)
|
||||
KERNEL_DATA_ARRAY(uint, triangle_to_tree)
|
||||
|
||||
/* particles */
|
||||
KERNEL_DATA_ARRAY(KernelParticle, particles)
|
||||
|
||||
|
@@ -23,19 +23,24 @@ KERNEL_STRUCT_MEMBER(background, int, volume_shader)
|
||||
KERNEL_STRUCT_MEMBER(background, float, volume_step_size)
|
||||
KERNEL_STRUCT_MEMBER(background, int, transparent)
|
||||
KERNEL_STRUCT_MEMBER(background, float, transparent_roughness_squared_threshold)
|
||||
/* Portal sampling. */
|
||||
KERNEL_STRUCT_MEMBER(background, float, portal_weight)
|
||||
KERNEL_STRUCT_MEMBER(background, int, num_portals)
|
||||
KERNEL_STRUCT_MEMBER(background, int, portal_offset)
|
||||
/* Sun sampling. */
|
||||
KERNEL_STRUCT_MEMBER(background, float, sun_weight)
|
||||
/* Importance map sampling. */
|
||||
KERNEL_STRUCT_MEMBER(background, float, map_weight)
|
||||
KERNEL_STRUCT_MEMBER(background, float, portal_weight)
|
||||
KERNEL_STRUCT_MEMBER(background, int, map_res_x)
|
||||
KERNEL_STRUCT_MEMBER(background, int, map_res_y)
|
||||
/* Multiple importance sampling. */
|
||||
KERNEL_STRUCT_MEMBER(background, int, use_mis)
|
||||
/* Lightgroup. */
|
||||
KERNEL_STRUCT_MEMBER(background, int, lightgroup)
|
||||
/* Light Index. */
|
||||
KERNEL_STRUCT_MEMBER(background, int, light_index)
|
||||
/* Padding. */
|
||||
KERNEL_STRUCT_MEMBER(background, int, pad1)
|
||||
KERNEL_STRUCT_MEMBER(background, int, pad2)
|
||||
KERNEL_STRUCT_MEMBER(background, int, pad3)
|
||||
KERNEL_STRUCT_END(KernelBackground)
|
||||
|
||||
/* BVH: own BVH2 if no native device acceleration struct used. */
|
||||
@@ -44,11 +49,11 @@ KERNEL_STRUCT_BEGIN(KernelBVH, bvh)
|
||||
KERNEL_STRUCT_MEMBER(bvh, int, root)
|
||||
KERNEL_STRUCT_MEMBER(bvh, int, have_motion)
|
||||
KERNEL_STRUCT_MEMBER(bvh, int, have_curves)
|
||||
KERNEL_STRUCT_MEMBER(bvh, int, have_points)
|
||||
KERNEL_STRUCT_MEMBER(bvh, int, have_volumes)
|
||||
KERNEL_STRUCT_MEMBER(bvh, int, bvh_layout)
|
||||
KERNEL_STRUCT_MEMBER(bvh, int, use_bvh_steps)
|
||||
KERNEL_STRUCT_MEMBER(bvh, int, curve_subdivisions)
|
||||
KERNEL_STRUCT_MEMBER(bvh, int, pad1)
|
||||
KERNEL_STRUCT_MEMBER(bvh, int, pad2)
|
||||
KERNEL_STRUCT_END(KernelBVH)
|
||||
|
||||
/* Film. */
|
||||
@@ -97,6 +102,8 @@ KERNEL_STRUCT_MEMBER(film, int, pass_emission)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_background)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_ao)
|
||||
KERNEL_STRUCT_MEMBER(film, float, pass_alpha_threshold)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_shadow)
|
||||
KERNEL_STRUCT_MEMBER(film, float, pass_shadow_scale)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_shadow_catcher)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_shadow_catcher_sample_count)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_shadow_catcher_matte)
|
||||
@@ -130,6 +137,9 @@ KERNEL_STRUCT_MEMBER(film, int, use_approximate_shadow_catcher)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_guiding_color)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_guiding_probability)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pass_guiding_avg_roughness)
|
||||
/* Padding. */
|
||||
KERNEL_STRUCT_MEMBER(film, int, pad1)
|
||||
KERNEL_STRUCT_MEMBER(film, int, pad2)
|
||||
KERNEL_STRUCT_END(KernelFilm)
|
||||
|
||||
/* Integrator. */
|
||||
@@ -137,18 +147,10 @@ KERNEL_STRUCT_END(KernelFilm)
|
||||
KERNEL_STRUCT_BEGIN(KernelIntegrator, integrator)
|
||||
/* Emission. */
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, use_direct_light)
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, use_light_mis)
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, use_light_tree)
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, num_lights)
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, num_distant_lights)
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, num_background_lights)
|
||||
/* Portal sampling. */
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, num_portals)
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, portal_offset)
|
||||
/* Flat light distribution. */
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, num_distribution)
|
||||
KERNEL_STRUCT_MEMBER(integrator, float, distribution_pdf_triangles)
|
||||
KERNEL_STRUCT_MEMBER(integrator, float, distribution_pdf_lights)
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, num_all_lights)
|
||||
KERNEL_STRUCT_MEMBER(integrator, float, pdf_triangles)
|
||||
KERNEL_STRUCT_MEMBER(integrator, float, pdf_lights)
|
||||
KERNEL_STRUCT_MEMBER(integrator, float, light_inv_rr_threshold)
|
||||
/* Bounces. */
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, min_bounce)
|
||||
@@ -175,11 +177,12 @@ KERNEL_STRUCT_MEMBER(integrator, int, seed)
|
||||
/* Clamp. */
|
||||
KERNEL_STRUCT_MEMBER(integrator, float, sample_clamp_direct)
|
||||
KERNEL_STRUCT_MEMBER(integrator, float, sample_clamp_indirect)
|
||||
/* MIS. */
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, use_lamp_mis)
|
||||
/* Caustics. */
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, use_caustics)
|
||||
/* Sampling pattern. */
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, sampling_pattern)
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, pmj_sequence_size)
|
||||
KERNEL_STRUCT_MEMBER(integrator, float, scrambling_distance)
|
||||
/* Volume render. */
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, use_volumes)
|
||||
@@ -191,6 +194,7 @@ KERNEL_STRUCT_MEMBER(integrator, int, has_shadow_catcher)
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, filter_closures)
|
||||
/* MIS debugging. */
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, direct_light_sampling_type)
|
||||
|
||||
/* Path Guiding */
|
||||
KERNEL_STRUCT_MEMBER(integrator, float, surface_guiding_probability)
|
||||
KERNEL_STRUCT_MEMBER(integrator, float, volume_guiding_probability)
|
||||
@@ -201,10 +205,6 @@ KERNEL_STRUCT_MEMBER(integrator, int, use_surface_guiding)
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, use_volume_guiding)
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, use_guiding_direct_light)
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, use_guiding_mis_weights)
|
||||
|
||||
/* Padding. */
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, pad1)
|
||||
KERNEL_STRUCT_MEMBER(integrator, int, pad2)
|
||||
KERNEL_STRUCT_END(KernelIntegrator)
|
||||
|
||||
/* SVM. For shader specialization. */
|
||||
|
@@ -30,7 +30,6 @@ typedef unsigned long long uint64_t;
|
||||
/* Qualifiers */
|
||||
|
||||
#define ccl_device __device__ __inline__
|
||||
#define ccl_device_extern extern "C" __device__
|
||||
#if __CUDA_ARCH__ < 500
|
||||
# define ccl_device_inline __device__ __forceinline__
|
||||
# define ccl_device_forceinline __device__ __forceinline__
|
||||
@@ -110,14 +109,14 @@ ccl_device_forceinline T ccl_gpu_tex_object_read_3D(const ccl_gpu_tex_object_3D
|
||||
|
||||
typedef unsigned short half;
|
||||
|
||||
ccl_device_forceinline half __float2half(const float f)
|
||||
__device__ half __float2half(const float f)
|
||||
{
|
||||
half val;
|
||||
asm("{ cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f));
|
||||
return val;
|
||||
}
|
||||
|
||||
ccl_device_forceinline float __half2float(const half h)
|
||||
__device__ float __half2float(const half h)
|
||||
{
|
||||
float val;
|
||||
asm("{ cvt.f32.f16 %0, %1;}\n" : "=f"(val) : "h"(h));
|
||||
|
@@ -314,7 +314,11 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
||||
int kernel_index);
|
||||
ccl_gpu_kernel_lambda_pass.kernel_index = kernel_index;
|
||||
|
||||
gpu_parallel_active_index_array(num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass);
|
||||
gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
|
||||
num_states,
|
||||
indices,
|
||||
num_indices,
|
||||
ccl_gpu_kernel_lambda_pass);
|
||||
}
|
||||
ccl_gpu_kernel_postfix
|
||||
|
||||
@@ -329,7 +333,11 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
||||
int kernel_index);
|
||||
ccl_gpu_kernel_lambda_pass.kernel_index = kernel_index;
|
||||
|
||||
gpu_parallel_active_index_array(num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass);
|
||||
gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
|
||||
num_states,
|
||||
indices,
|
||||
num_indices,
|
||||
ccl_gpu_kernel_lambda_pass);
|
||||
}
|
||||
ccl_gpu_kernel_postfix
|
||||
|
||||
@@ -341,7 +349,11 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
||||
{
|
||||
ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, path, queued_kernel) != 0);
|
||||
|
||||
gpu_parallel_active_index_array(num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass);
|
||||
gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
|
||||
num_states,
|
||||
indices,
|
||||
num_indices,
|
||||
ccl_gpu_kernel_lambda_pass);
|
||||
}
|
||||
ccl_gpu_kernel_postfix
|
||||
|
||||
@@ -354,8 +366,11 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
||||
{
|
||||
ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, path, queued_kernel) == 0);
|
||||
|
||||
gpu_parallel_active_index_array(
|
||||
num_states, indices + indices_offset, num_indices, ccl_gpu_kernel_lambda_pass);
|
||||
gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
|
||||
num_states,
|
||||
indices + indices_offset,
|
||||
num_indices,
|
||||
ccl_gpu_kernel_lambda_pass);
|
||||
}
|
||||
ccl_gpu_kernel_postfix
|
||||
|
||||
@@ -368,8 +383,11 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
||||
{
|
||||
ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0);
|
||||
|
||||
gpu_parallel_active_index_array(
|
||||
num_states, indices + indices_offset, num_indices, ccl_gpu_kernel_lambda_pass);
|
||||
gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
|
||||
num_states,
|
||||
indices + indices_offset,
|
||||
num_indices,
|
||||
ccl_gpu_kernel_lambda_pass);
|
||||
}
|
||||
ccl_gpu_kernel_postfix
|
||||
|
||||
@@ -413,7 +431,11 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
||||
int num_active_paths);
|
||||
ccl_gpu_kernel_lambda_pass.num_active_paths = num_active_paths;
|
||||
|
||||
gpu_parallel_active_index_array(num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass);
|
||||
gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
|
||||
num_states,
|
||||
indices,
|
||||
num_indices,
|
||||
ccl_gpu_kernel_lambda_pass);
|
||||
}
|
||||
ccl_gpu_kernel_postfix
|
||||
|
||||
@@ -447,7 +469,11 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
||||
int num_active_paths);
|
||||
ccl_gpu_kernel_lambda_pass.num_active_paths = num_active_paths;
|
||||
|
||||
gpu_parallel_active_index_array(num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass);
|
||||
gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
|
||||
num_states,
|
||||
indices,
|
||||
num_indices,
|
||||
ccl_gpu_kernel_lambda_pass);
|
||||
}
|
||||
ccl_gpu_kernel_postfix
|
||||
|
||||
|
@@ -56,7 +56,7 @@ void gpu_parallel_active_index_array_impl(const uint num_states,
|
||||
const uint is_active = (state_index < num_states) ? is_active_op(state_index) : 0;
|
||||
#else /* !__KERNEL__ONEAPI__ */
|
||||
# ifndef __KERNEL_METAL__
|
||||
template<typename IsActiveOp>
|
||||
template<uint blocksize, typename IsActiveOp>
|
||||
__device__
|
||||
# endif
|
||||
void
|
||||
@@ -79,10 +79,6 @@ __device__
|
||||
{
|
||||
extern ccl_gpu_shared int warp_offset[];
|
||||
|
||||
# ifndef __KERNEL_METAL__
|
||||
const uint blocksize = ccl_gpu_block_dim_x;
|
||||
# endif
|
||||
|
||||
const uint thread_index = ccl_gpu_thread_idx_x;
|
||||
const uint thread_warp = thread_index % ccl_gpu_warp_size;
|
||||
|
||||
@@ -153,7 +149,7 @@ __device__
|
||||
|
||||
#ifdef __KERNEL_METAL__
|
||||
|
||||
# define gpu_parallel_active_index_array(num_states, indices, num_indices, is_active_op) \
|
||||
# define gpu_parallel_active_index_array(dummy, num_states, indices, num_indices, is_active_op) \
|
||||
const uint is_active = (ccl_gpu_global_id_x() < num_states) ? \
|
||||
is_active_op(ccl_gpu_global_id_x()) : \
|
||||
0; \
|
||||
@@ -171,13 +167,15 @@ __device__
|
||||
simdgroup_offset)
|
||||
#elif defined(__KERNEL_ONEAPI__)
|
||||
|
||||
# define gpu_parallel_active_index_array(num_states, indices, num_indices, is_active_op) \
|
||||
# define gpu_parallel_active_index_array( \
|
||||
blocksize, num_states, indices, num_indices, is_active_op) \
|
||||
gpu_parallel_active_index_array_impl(num_states, indices, num_indices, is_active_op)
|
||||
|
||||
#else
|
||||
|
||||
# define gpu_parallel_active_index_array(num_states, indices, num_indices, is_active_op) \
|
||||
gpu_parallel_active_index_array_impl(num_states, indices, num_indices, is_active_op)
|
||||
# define gpu_parallel_active_index_array( \
|
||||
blocksize, num_states, indices, num_indices, is_active_op) \
|
||||
gpu_parallel_active_index_array_impl<blocksize>(num_states, indices, num_indices, is_active_op)
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -28,7 +28,6 @@ typedef unsigned long long uint64_t;
|
||||
/* Qualifiers */
|
||||
|
||||
#define ccl_device __device__ __inline__
|
||||
#define ccl_device_extern extern "C" __device__
|
||||
#define ccl_device_inline __device__ __inline__
|
||||
#define ccl_device_forceinline __device__ __forceinline__
|
||||
#define ccl_device_noinline __device__ __noinline__
|
||||
|
@@ -79,8 +79,7 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
|
||||
metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
|
||||
metalrt_intersector_type metalrt_intersect;
|
||||
|
||||
bool triangle_only = !kernel_data.bvh.have_curves && !kernel_data.bvh.have_points;
|
||||
if (triangle_only) {
|
||||
if (!kernel_data.bvh.have_curves) {
|
||||
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
|
||||
}
|
||||
|
||||
@@ -178,9 +177,7 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
|
||||
metalrt_intersector_type metalrt_intersect;
|
||||
|
||||
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
|
||||
|
||||
bool triangle_only = !kernel_data.bvh.have_curves && !kernel_data.bvh.have_points;
|
||||
if (triangle_only) {
|
||||
if (!kernel_data.bvh.have_curves) {
|
||||
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
|
||||
}
|
||||
|
||||
@@ -208,9 +205,7 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
|
||||
if (lcg_state) {
|
||||
*lcg_state = payload.lcg_state;
|
||||
}
|
||||
if (local_isect) {
|
||||
*local_isect = payload.local_isect;
|
||||
}
|
||||
*local_isect = payload.local_isect;
|
||||
|
||||
return payload.result;
|
||||
}
|
||||
@@ -245,9 +240,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
|
||||
metalrt_intersector_type metalrt_intersect;
|
||||
|
||||
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
|
||||
|
||||
bool triangle_only = !kernel_data.bvh.have_curves && !kernel_data.bvh.have_points;
|
||||
if (triangle_only) {
|
||||
if (!kernel_data.bvh.have_curves) {
|
||||
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
|
||||
}
|
||||
|
||||
@@ -314,9 +307,7 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
|
||||
metalrt_intersector_type metalrt_intersect;
|
||||
|
||||
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
|
||||
|
||||
bool triangle_only = !kernel_data.bvh.have_curves && !kernel_data.bvh.have_points;
|
||||
if (triangle_only) {
|
||||
if (!kernel_data.bvh.have_curves) {
|
||||
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
|
||||
}
|
||||
|
||||
|
@@ -38,7 +38,6 @@ using namespace metal::raytracing;
|
||||
# define ccl_device_noinline ccl_device __attribute__((noinline))
|
||||
#endif
|
||||
|
||||
#define ccl_device_extern extern "C"
|
||||
#define ccl_device_noinline_cpu ccl_device
|
||||
#define ccl_device_inline_method ccl_device
|
||||
#define ccl_global device
|
||||
|
@@ -182,20 +182,20 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
|
||||
|
||||
const float u = barycentrics.x;
|
||||
const float v = barycentrics.y;
|
||||
const int prim_type = kernel_data_fetch(objects, object).primitive_type;
|
||||
int type = prim_type;
|
||||
int type = 0;
|
||||
if (intersection_type == METALRT_HIT_TRIANGLE) {
|
||||
type = kernel_data_fetch(objects, object).primitive_type;
|
||||
}
|
||||
# ifdef __HAIR__
|
||||
if (intersection_type != METALRT_HIT_TRIANGLE) {
|
||||
if ( (prim_type == PRIMITIVE_CURVE_THICK || prim_type == PRIMITIVE_CURVE_RIBBON)) {
|
||||
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
|
||||
type = segment.type;
|
||||
prim = segment.prim;
|
||||
|
||||
/* Filter out curve endcaps */
|
||||
if (u == 0.0f || u == 1.0f) {
|
||||
/* continue search */
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
|
||||
type = segment.type;
|
||||
prim = segment.prim;
|
||||
|
||||
/* Filter out curve endcaps */
|
||||
if (u == 0.0f || u == 1.0f) {
|
||||
/* continue search */
|
||||
return true;
|
||||
}
|
||||
}
|
||||
# endif
|
||||
@@ -279,7 +279,7 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type;
|
||||
|
||||
|
||||
/* Continue tracing. */
|
||||
# endif /* __TRANSPARENT_SHADOWS__ */
|
||||
#endif /* __SHADOW_RECORD_ALL__ */
|
||||
@@ -327,8 +327,7 @@ inline TReturnType metalrt_visibility_test(
|
||||
TReturnType result;
|
||||
|
||||
#ifdef __HAIR__
|
||||
const int type = kernel_data_fetch(objects, object).primitive_type;
|
||||
if (intersection_type == METALRT_HIT_BOUNDING_BOX && (type == PRIMITIVE_CURVE_THICK || type == PRIMITIVE_CURVE_RIBBON)) {
|
||||
if (intersection_type == METALRT_HIT_BOUNDING_BOX) {
|
||||
/* Filter out curve endcaps. */
|
||||
if (u == 0.0f || u == 1.0f) {
|
||||
result.accept = false;
|
||||
@@ -464,12 +463,7 @@ ccl_device_inline void metalrt_intersection_curve_shadow(
|
||||
const float ray_tmax,
|
||||
thread BoundingBoxIntersectionResult &result)
|
||||
{
|
||||
# ifdef __VISIBILITY_FLAG__
|
||||
const uint visibility = payload.visibility;
|
||||
if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
|
||||
return;
|
||||
}
|
||||
# endif
|
||||
|
||||
Intersection isect;
|
||||
isect.t = ray_tmax;
|
||||
@@ -691,12 +685,7 @@ ccl_device_inline void metalrt_intersection_point_shadow(
|
||||
const float ray_tmax,
|
||||
thread BoundingBoxIntersectionResult &result)
|
||||
{
|
||||
# ifdef __VISIBILITY_FLAG__
|
||||
const uint visibility = payload.visibility;
|
||||
if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
|
||||
return;
|
||||
}
|
||||
# endif
|
||||
|
||||
Intersection isect;
|
||||
isect.t = ray_tmax;
|
||||
|
@@ -28,7 +28,6 @@
|
||||
/* Qualifier wrappers for different names on different devices */
|
||||
|
||||
#define ccl_device
|
||||
#define ccl_device_extern extern "C"
|
||||
#define ccl_global
|
||||
#define ccl_always_inline __attribute__((always_inline))
|
||||
#define ccl_device_inline inline
|
||||
|
@@ -33,16 +33,14 @@ typedef unsigned long long uint64_t;
|
||||
#endif
|
||||
|
||||
#define ccl_device \
|
||||
static __device__ \
|
||||
__forceinline__ // Function calls are bad for OptiX performance, so inline everything
|
||||
#define ccl_device_extern extern "C" __device__
|
||||
__device__ __forceinline__ // Function calls are bad for OptiX performance, so inline everything
|
||||
#define ccl_device_inline ccl_device
|
||||
#define ccl_device_forceinline ccl_device
|
||||
#define ccl_device_inline_method __device__ __forceinline__
|
||||
#define ccl_device_noinline static __device__ __noinline__
|
||||
#define ccl_device_inline_method ccl_device
|
||||
#define ccl_device_noinline __device__ __noinline__
|
||||
#define ccl_device_noinline_cpu ccl_device
|
||||
#define ccl_global
|
||||
#define ccl_inline_constant static __constant__
|
||||
#define ccl_inline_constant __constant__
|
||||
#define ccl_device_constant __constant__ __device__
|
||||
#define ccl_constant const
|
||||
#define ccl_gpu_shared __shared__
|
||||
@@ -59,6 +57,23 @@ typedef unsigned long long uint64_t;
|
||||
|
||||
#define kernel_assert(cond)
|
||||
|
||||
/* GPU thread, block, grid size and index */
|
||||
|
||||
#define ccl_gpu_thread_idx_x (threadIdx.x)
|
||||
#define ccl_gpu_block_dim_x (blockDim.x)
|
||||
#define ccl_gpu_block_idx_x (blockIdx.x)
|
||||
#define ccl_gpu_grid_dim_x (gridDim.x)
|
||||
#define ccl_gpu_warp_size (warpSize)
|
||||
#define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
|
||||
|
||||
#define ccl_gpu_global_id_x() (ccl_gpu_block_idx_x * ccl_gpu_block_dim_x + ccl_gpu_thread_idx_x)
|
||||
#define ccl_gpu_global_size_x() (ccl_gpu_grid_dim_x * ccl_gpu_block_dim_x)
|
||||
|
||||
/* GPU warp synchronization. */
|
||||
|
||||
#define ccl_gpu_syncthreads() __syncthreads()
|
||||
#define ccl_gpu_ballot(predicate) __ballot_sync(0xFFFFFFFF, predicate)
|
||||
|
||||
/* GPU texture objects */
|
||||
|
||||
typedef unsigned long long CUtexObject;
|
||||
@@ -86,14 +101,14 @@ ccl_device_forceinline T ccl_gpu_tex_object_read_3D(const ccl_gpu_tex_object_3D
|
||||
|
||||
typedef unsigned short half;
|
||||
|
||||
ccl_device_forceinline half __float2half(const float f)
|
||||
__device__ half __float2half(const float f)
|
||||
{
|
||||
half val;
|
||||
asm("{ cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f));
|
||||
return val;
|
||||
}
|
||||
|
||||
ccl_device_forceinline float __half2float(const half h)
|
||||
__device__ float __half2float(const half h)
|
||||
{
|
||||
float val;
|
||||
asm("{ cvt.f32.f16 %0, %1;}\n" : "=f"(val) : "h"(h));
|
||||
|
@@ -25,7 +25,6 @@ struct KernelParamsOptiX {
|
||||
/* Kernel arguments */
|
||||
const int *path_index_array;
|
||||
float *render_buffer;
|
||||
int offset;
|
||||
|
||||
/* Global scene data and textures */
|
||||
KernelData data;
|
||||
@@ -37,11 +36,7 @@ struct KernelParamsOptiX {
|
||||
};
|
||||
|
||||
#ifdef __NVCC__
|
||||
extern "C"
|
||||
# ifndef __CUDACC_RDC__
|
||||
static
|
||||
# endif
|
||||
__constant__ KernelParamsOptiX kernel_params;
|
||||
extern "C" static __constant__ KernelParamsOptiX kernel_params;
|
||||
#endif
|
||||
|
||||
/* Abstraction macros */
|
||||
|
@@ -1,83 +0,0 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#define WITH_OSL
|
||||
|
||||
/* Copy of the regular OptiX kernels with additional OSL support. */
|
||||
|
||||
#include "kernel/device/optix/kernel_shader_raytrace.cu"
|
||||
|
||||
#include "kernel/bake/bake.h"
|
||||
#include "kernel/integrator/shade_background.h"
|
||||
#include "kernel/integrator/shade_light.h"
|
||||
#include "kernel/integrator/shade_shadow.h"
|
||||
#include "kernel/integrator/shade_volume.h"
|
||||
|
||||
extern "C" __global__ void __raygen__kernel_optix_integrator_shade_background()
|
||||
{
|
||||
const int global_index = optixGetLaunchIndex().x;
|
||||
const int path_index = (kernel_params.path_index_array) ?
|
||||
kernel_params.path_index_array[global_index] :
|
||||
global_index;
|
||||
integrator_shade_background(nullptr, path_index, kernel_params.render_buffer);
|
||||
}
|
||||
|
||||
extern "C" __global__ void __raygen__kernel_optix_integrator_shade_light()
|
||||
{
|
||||
const int global_index = optixGetLaunchIndex().x;
|
||||
const int path_index = (kernel_params.path_index_array) ?
|
||||
kernel_params.path_index_array[global_index] :
|
||||
global_index;
|
||||
integrator_shade_light(nullptr, path_index, kernel_params.render_buffer);
|
||||
}
|
||||
|
||||
extern "C" __global__ void __raygen__kernel_optix_integrator_shade_surface()
|
||||
{
|
||||
const int global_index = optixGetLaunchIndex().x;
|
||||
const int path_index = (kernel_params.path_index_array) ?
|
||||
kernel_params.path_index_array[global_index] :
|
||||
global_index;
|
||||
integrator_shade_surface(nullptr, path_index, kernel_params.render_buffer);
|
||||
}
|
||||
|
||||
extern "C" __global__ void __raygen__kernel_optix_integrator_shade_volume()
|
||||
{
|
||||
const int global_index = optixGetLaunchIndex().x;
|
||||
const int path_index = (kernel_params.path_index_array) ?
|
||||
kernel_params.path_index_array[global_index] :
|
||||
global_index;
|
||||
integrator_shade_volume(nullptr, path_index, kernel_params.render_buffer);
|
||||
}
|
||||
|
||||
extern "C" __global__ void __raygen__kernel_optix_integrator_shade_shadow()
|
||||
{
|
||||
const int global_index = optixGetLaunchIndex().x;
|
||||
const int path_index = (kernel_params.path_index_array) ?
|
||||
kernel_params.path_index_array[global_index] :
|
||||
global_index;
|
||||
integrator_shade_shadow(nullptr, path_index, kernel_params.render_buffer);
|
||||
}
|
||||
|
||||
extern "C" __global__ void __raygen__kernel_optix_shader_eval_displace()
|
||||
{
|
||||
KernelShaderEvalInput *const input = (KernelShaderEvalInput *)kernel_params.path_index_array;
|
||||
float *const output = kernel_params.render_buffer;
|
||||
const int global_index = kernel_params.offset + optixGetLaunchIndex().x;
|
||||
kernel_displace_evaluate(nullptr, input, output, global_index);
|
||||
}
|
||||
|
||||
extern "C" __global__ void __raygen__kernel_optix_shader_eval_background()
|
||||
{
|
||||
KernelShaderEvalInput *const input = (KernelShaderEvalInput *)kernel_params.path_index_array;
|
||||
float *const output = kernel_params.render_buffer;
|
||||
const int global_index = kernel_params.offset + optixGetLaunchIndex().x;
|
||||
kernel_background_evaluate(nullptr, input, output, global_index);
|
||||
}
|
||||
|
||||
extern "C" __global__ void __raygen__kernel_optix_shader_eval_curve_shadow_transparency()
|
||||
{
|
||||
KernelShaderEvalInput *const input = (KernelShaderEvalInput *)kernel_params.path_index_array;
|
||||
float *const output = kernel_params.render_buffer;
|
||||
const int global_index = kernel_params.offset + optixGetLaunchIndex().x;
|
||||
kernel_curve_shadow_transparency_evaluate(nullptr, input, output, global_index);
|
||||
}
|
@@ -58,29 +58,13 @@ ccl_device bool film_adaptive_sampling_convergence_check(KernelGlobals kg,
|
||||
const float4 I = kernel_read_pass_float4(buffer + kernel_data.film.pass_combined);
|
||||
|
||||
const float sample = __float_as_uint(buffer[kernel_data.film.pass_sample_count]);
|
||||
const float intensity_scale = kernel_data.film.exposure / sample;
|
||||
const float inv_sample = 1.0f / sample;
|
||||
|
||||
/* The per pixel error as seen in section 2.1 of
|
||||
* "A hierarchical automatic stopping condition for Monte Carlo global illumination" */
|
||||
const float error_difference = (fabsf(I.x - A.x) + fabsf(I.y - A.y) + fabsf(I.z - A.z)) *
|
||||
intensity_scale;
|
||||
const float intensity = (I.x + I.y + I.z) * intensity_scale;
|
||||
|
||||
/* Anything with R+G+B > 1 is highly exposed - even in sRGB it's a range that
|
||||
* some displays aren't even able to display without significant losses in
|
||||
* detalization. Everything with R+G+B > 3 is overexposed and should receive
|
||||
* even less samples. Filmic-like curves need maximum sampling rate at
|
||||
* intensity near 0.1-0.2, so threshold of 1 for R+G+B leaves an additional
|
||||
* fstop in case it is needed for compositing.
|
||||
*/
|
||||
float error_normalize;
|
||||
if (intensity < 1.0f) {
|
||||
error_normalize = sqrtf(intensity);
|
||||
}
|
||||
else {
|
||||
error_normalize = intensity;
|
||||
}
|
||||
|
||||
inv_sample;
|
||||
const float error_normalize = sqrtf((I.x + I.y + I.z) * inv_sample);
|
||||
/* A small epsilon is added to the divisor to prevent division by zero. */
|
||||
const float error = error_difference / (0.0001f + error_normalize);
|
||||
const bool did_converge = (error < threshold);
|
||||
|
@@ -42,27 +42,27 @@ ccl_device_inline void film_write_data_passes(KernelGlobals kg,
|
||||
ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer);
|
||||
|
||||
if (!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) {
|
||||
if (INTEGRATOR_STATE(state, path, sample) == 0) {
|
||||
if (flag & PASSMASK(DEPTH)) {
|
||||
const float depth = camera_z_depth(kg, sd->P);
|
||||
film_overwrite_pass_float(buffer + kernel_data.film.pass_depth, depth);
|
||||
}
|
||||
if (flag & PASSMASK(OBJECT_ID)) {
|
||||
const float id = object_pass_id(kg, sd->object);
|
||||
film_overwrite_pass_float(buffer + kernel_data.film.pass_object_id, id);
|
||||
}
|
||||
if (flag & PASSMASK(MATERIAL_ID)) {
|
||||
const float id = shader_pass_id(kg, sd);
|
||||
film_overwrite_pass_float(buffer + kernel_data.film.pass_material_id, id);
|
||||
}
|
||||
if (flag & PASSMASK(POSITION)) {
|
||||
const float3 position = sd->P;
|
||||
film_overwrite_pass_float3(buffer + kernel_data.film.pass_position, position);
|
||||
}
|
||||
}
|
||||
|
||||
if (!(sd->flag & SD_TRANSPARENT) || kernel_data.film.pass_alpha_threshold == 0.0f ||
|
||||
average(surface_shader_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) {
|
||||
if (INTEGRATOR_STATE(state, path, sample) == 0) {
|
||||
if (flag & PASSMASK(DEPTH)) {
|
||||
const float depth = camera_z_depth(kg, sd->P);
|
||||
film_write_pass_float(buffer + kernel_data.film.pass_depth, depth);
|
||||
}
|
||||
if (flag & PASSMASK(OBJECT_ID)) {
|
||||
const float id = object_pass_id(kg, sd->object);
|
||||
film_write_pass_float(buffer + kernel_data.film.pass_object_id, id);
|
||||
}
|
||||
if (flag & PASSMASK(MATERIAL_ID)) {
|
||||
const float id = shader_pass_id(kg, sd);
|
||||
film_write_pass_float(buffer + kernel_data.film.pass_material_id, id);
|
||||
}
|
||||
if (flag & PASSMASK(POSITION)) {
|
||||
const float3 position = sd->P;
|
||||
film_write_pass_float3(buffer + kernel_data.film.pass_position, position);
|
||||
}
|
||||
}
|
||||
|
||||
if (flag & PASSMASK(NORMAL)) {
|
||||
const float3 normal = surface_shader_average_normal(kg, sd);
|
||||
film_write_pass_float3(buffer + kernel_data.film.pass_normal, normal);
|
||||
@@ -157,47 +157,4 @@ ccl_device_inline void film_write_data_passes(KernelGlobals kg,
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline void film_write_data_passes_background(
|
||||
KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer)
|
||||
{
|
||||
#ifdef __PASSES__
|
||||
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
|
||||
|
||||
if (!(path_flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Don't write data passes for paths that were split off for shadow catchers
|
||||
* to avoid double-counting. */
|
||||
if (path_flag & PATH_RAY_SHADOW_CATCHER_PASS) {
|
||||
return;
|
||||
}
|
||||
|
||||
const int flag = kernel_data.film.pass_flag;
|
||||
|
||||
if (!(flag & PASS_ANY)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) {
|
||||
ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer);
|
||||
|
||||
if (INTEGRATOR_STATE(state, path, sample) == 0) {
|
||||
if (flag & PASSMASK(DEPTH)) {
|
||||
film_overwrite_pass_float(buffer + kernel_data.film.pass_depth, 0.0f);
|
||||
}
|
||||
if (flag & PASSMASK(OBJECT_ID)) {
|
||||
film_overwrite_pass_float(buffer + kernel_data.film.pass_object_id, 0.0f);
|
||||
}
|
||||
if (flag & PASSMASK(MATERIAL_ID)) {
|
||||
film_overwrite_pass_float(buffer + kernel_data.film.pass_material_id, 0.0f);
|
||||
}
|
||||
if (flag & PASSMASK(POSITION)) {
|
||||
film_overwrite_pass_float3(buffer + kernel_data.film.pass_position, zero_float3());
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -527,6 +527,17 @@ ccl_device_inline void film_write_direct_light(KernelGlobals kg,
|
||||
film_write_pass_spectrum(buffer + pass_offset, contribution);
|
||||
}
|
||||
}
|
||||
|
||||
/* Write shadow pass. */
|
||||
if (kernel_data.film.pass_shadow != PASS_UNUSED && (path_flag & PATH_RAY_SHADOW_FOR_LIGHT) &&
|
||||
(path_flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
|
||||
const Spectrum unshadowed_throughput = INTEGRATOR_STATE(
|
||||
state, shadow_path, unshadowed_throughput);
|
||||
const Spectrum shadowed_throughput = INTEGRATOR_STATE(state, shadow_path, throughput);
|
||||
const Spectrum shadow = safe_divide(shadowed_throughput, unshadowed_throughput) *
|
||||
kernel_data.film.pass_shadow_scale;
|
||||
film_write_pass_spectrum(buffer + kernel_data.film.pass_shadow, shadow);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@@ -12,7 +12,6 @@
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Get pointer to pixel in render buffer. */
|
||||
|
||||
ccl_device_forceinline ccl_global float *film_pass_pixel_render_buffer(
|
||||
KernelGlobals kg, ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer)
|
||||
{
|
||||
@@ -22,8 +21,7 @@ ccl_device_forceinline ccl_global float *film_pass_pixel_render_buffer(
|
||||
return render_buffer + render_buffer_offset;
|
||||
}
|
||||
|
||||
/* Accumulate in passes. */
|
||||
|
||||
/* Write to pixel. */
|
||||
ccl_device_inline void film_write_pass_float(ccl_global float *ccl_restrict buffer, float value)
|
||||
{
|
||||
#ifdef __ATOMIC_PASS_WRITE__
|
||||
@@ -76,25 +74,6 @@ ccl_device_inline void film_write_pass_float4(ccl_global float *ccl_restrict buf
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Overwrite for passes that only write on sample 0. This assumes only a single thread will write
|
||||
* to this pixel and no atomics are needed. */
|
||||
|
||||
ccl_device_inline void film_overwrite_pass_float(ccl_global float *ccl_restrict buffer,
|
||||
float value)
|
||||
{
|
||||
*buffer = value;
|
||||
}
|
||||
|
||||
ccl_device_inline void film_overwrite_pass_float3(ccl_global float *ccl_restrict buffer,
|
||||
float3 value)
|
||||
{
|
||||
buffer[0] = value.x;
|
||||
buffer[1] = value.y;
|
||||
buffer[2] = value.z;
|
||||
}
|
||||
|
||||
/* Read back from passes. */
|
||||
|
||||
ccl_device_inline float kernel_read_pass_float(ccl_global float *ccl_restrict buffer)
|
||||
{
|
||||
return *buffer;
|
||||
|
@@ -24,8 +24,8 @@ ccl_device void displacement_shader_eval(KernelGlobals kg,
|
||||
|
||||
/* this will modify sd->P */
|
||||
#ifdef __OSL__
|
||||
if (kernel_data.kernel_features & KERNEL_FEATURE_OSL) {
|
||||
osl_eval_nodes<SHADER_TYPE_DISPLACEMENT>(kg, state, sd, 0);
|
||||
if (kg->osl) {
|
||||
OSLShader::eval_displacement(kg, state, sd);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
|
@@ -156,13 +156,6 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
|
||||
u = v;
|
||||
v = 1.0f - tmp - v;
|
||||
|
||||
const float tmpdx = dudx;
|
||||
const float tmpdy = dudy;
|
||||
dudx = dvdx;
|
||||
dudy = dvdy;
|
||||
dvdx = -tmpdx - dvdx;
|
||||
dvdy = -tmpdy - dvdy;
|
||||
|
||||
/* Position and normal on triangle. */
|
||||
const int object = kernel_data.bake.object_index;
|
||||
float3 P, Ng;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user