Compare commits
80 Commits
temp-pytho
...
soc-2021-u
Author | SHA1 | Date | |
---|---|---|---|
560e015a5b | |||
f87ed1547c | |||
1b9dd08d02 | |||
0c2bc843f5 | |||
98deceb5c1 | |||
da8963af2d | |||
72ee339710 | |||
1c74abfa4b | |||
f6e594f1ee | |||
8c3128baae | |||
798214c1f6 | |||
e981df9fbd | |||
112a532f41 | |||
aa0b05ef70 | |||
c3e996a118 | |||
c7a2086253 | |||
0ac3da0e45 | |||
![]() |
a39edbfb19 | ||
e6a68e9511 | |||
1ea018eb4d | |||
08697cff10 | |||
395056561b | |||
6669c559e3 | |||
cacfdcd041 | |||
4362228562 | |||
4ea6595af0 | |||
8506edc0b8 | |||
5c4bd878a2 | |||
![]() |
ccf4103c1a | ||
76e4ffdb90 | |||
97f82a2224 | |||
2581039d55 | |||
1e6fef3aa1 | |||
446f488685 | |||
91a2f5583e | |||
![]() |
aa33073004 | ||
![]() |
bb5373ad7b | ||
![]() |
791cb92b96 | ||
3f0d85de05 | |||
506a2f43b6 | |||
1f13ff614d | |||
c6e1b2f015 | |||
790c6740dc | |||
16cd543e4f | |||
![]() |
f36013a78d | ||
11ef414ba3 | |||
39df796ec9 | |||
e17731fc9c | |||
65c36dc583 | |||
8804c698eb | |||
d6ddacabc1 | |||
9974edc857 | |||
7d5ed35602 | |||
cd75125c48 | |||
e1abd5947f | |||
8d642bbba6 | |||
5777ec9af9 | |||
86023928ba | |||
708f375f76 | |||
28c85e60cb | |||
d615f4d65e | |||
eb88ce5146 | |||
1f65001cae | |||
8a57e48a8f | |||
be270d8c8a | |||
39aa006260 | |||
1965df11f4 | |||
ad5983895a | |||
37e185980a | |||
7e1e4889c6 | |||
62d2e8130f | |||
2b37edebdf | |||
e5ab28d392 | |||
2771b931b5 | |||
f76eca3af2 | |||
279a67ecc8 | |||
1a89001151 | |||
2126fc815d | |||
790d11899a | |||
6a422b6624 |
@@ -180,7 +180,6 @@ ForEachMacros:
|
||||
- CTX_DATA_BEGIN_WITH_ID
|
||||
- DEG_OBJECT_ITER_BEGIN
|
||||
- DEG_OBJECT_ITER_FOR_RENDER_ENGINE_BEGIN
|
||||
- DRW_ENABLED_ENGINE_ITER
|
||||
- DRIVER_TARGETS_LOOPER_BEGIN
|
||||
- DRIVER_TARGETS_USED_LOOPER_BEGIN
|
||||
- FOREACH_BASE_IN_EDIT_MODE_BEGIN
|
||||
|
@@ -388,10 +388,6 @@ if(WITH_PYTHON_INSTALL)
|
||||
set(PYTHON_REQUESTS_PATH "" CACHE PATH "Path to python site-packages or dist-packages containing 'requests' module")
|
||||
mark_as_advanced(PYTHON_REQUESTS_PATH)
|
||||
endif()
|
||||
|
||||
option(WITH_PYTHON_INSTALL_ZSTANDARD "Copy zstandard into the blender install folder" ON)
|
||||
set(PYTHON_ZSTANDARD_PATH "" CACHE PATH "Path to python site-packages or dist-packages containing 'zstandard' module")
|
||||
mark_as_advanced(PYTHON_ZSTANDARD_PATH)
|
||||
endif()
|
||||
|
||||
option(WITH_CPU_SIMD "Enable SIMD instruction if they're detected on the host machine" ON)
|
||||
@@ -410,7 +406,6 @@ mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL)
|
||||
set(CYCLES_TEST_DEVICES CPU CACHE STRING "Run regression tests on the specified device types (CPU CUDA OPTIX)" )
|
||||
set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 sm_86 compute_75 CACHE STRING "CUDA architectures to build binaries for")
|
||||
mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
|
||||
option(WITH_CYCLES_HIP_BINARIES "Build Cycles HIP binaries" OFF)
|
||||
unset(PLATFORM_DEFAULT)
|
||||
option(WITH_CYCLES_LOGGING "Build Cycles with logging support" ON)
|
||||
option(WITH_CYCLES_DEBUG_NAN "Build Cycles with additional asserts for detecting NaNs and invalid values" OFF)
|
||||
@@ -424,8 +419,6 @@ mark_as_advanced(WITH_CYCLES_NATIVE_ONLY)
|
||||
|
||||
option(WITH_CYCLES_DEVICE_CUDA "Enable Cycles CUDA compute support" ON)
|
||||
option(WITH_CYCLES_DEVICE_OPTIX "Enable Cycles OptiX support" ON)
|
||||
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles HIP support" OFF)
|
||||
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
|
||||
mark_as_advanced(WITH_CYCLES_DEVICE_CUDA)
|
||||
|
||||
option(WITH_CUDA_DYNLOAD "Dynamically load CUDA libraries at runtime" ON)
|
||||
@@ -828,11 +821,6 @@ if(NOT WITH_CUDA_DYNLOAD)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEVICE_HIP)
|
||||
# Currently HIP must be dynamically loaded, this may change in future toolkits
|
||||
set(WITH_HIP_DYNLOAD ON)
|
||||
endif()
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Check check if submodules are cloned
|
||||
|
||||
@@ -1728,12 +1716,6 @@ if(WITH_PYTHON)
|
||||
elseif(WITH_PYTHON_INSTALL_REQUESTS)
|
||||
find_python_package(requests "")
|
||||
endif()
|
||||
|
||||
if(WIN32 OR APPLE)
|
||||
# pass, we have this in lib/python/site-packages
|
||||
elseif(WITH_PYTHON_INSTALL_ZSTANDARD)
|
||||
find_python_package(zstandard "")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Select C++17 as the standard for C++ projects.
|
||||
@@ -1868,9 +1850,6 @@ elseif(WITH_CYCLES_STANDALONE)
|
||||
if(WITH_CUDA_DYNLOAD)
|
||||
add_subdirectory(extern/cuew)
|
||||
endif()
|
||||
if(WITH_HIP_DYNLOAD)
|
||||
add_subdirectory(extern/hipew)
|
||||
endif()
|
||||
if(NOT WITH_SYSTEM_GLEW)
|
||||
add_subdirectory(extern/glew)
|
||||
endif()
|
||||
@@ -2005,7 +1984,6 @@ if(FIRST_RUN)
|
||||
endif()
|
||||
info_cfg_option(WITH_PYTHON_INSTALL)
|
||||
info_cfg_option(WITH_PYTHON_INSTALL_NUMPY)
|
||||
info_cfg_option(WITH_PYTHON_INSTALL_ZSTANDARD)
|
||||
info_cfg_option(WITH_PYTHON_MODULE)
|
||||
info_cfg_option(WITH_PYTHON_SAFETY)
|
||||
|
||||
|
@@ -38,6 +38,7 @@ ExternalProject_Add(external_numpy
|
||||
PREFIX ${BUILD_DIR}/numpy
|
||||
PATCH_COMMAND ${NUMPY_PATCH}
|
||||
CONFIGURE_COMMAND ""
|
||||
PATCH_COMMAND COMMAND ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/numpy/src/external_numpy < ${PATCH_DIR}/numpy.diff
|
||||
LOG_BUILD 1
|
||||
BUILD_COMMAND ${PYTHON_BINARY} ${BUILD_DIR}/numpy/src/external_numpy/setup.py build ${NUMPY_BUILD_OPTION} install --old-and-unmanageable
|
||||
INSTALL_COMMAND ""
|
||||
|
@@ -25,7 +25,7 @@ ExternalProject_Add(external_python_site_packages
|
||||
CONFIGURE_COMMAND ""
|
||||
BUILD_COMMAND ""
|
||||
PREFIX ${BUILD_DIR}/site_packages
|
||||
INSTALL_COMMAND ${PYTHON_BINARY} -m pip install ${SITE_PACKAGES_EXTRA} cython==${CYTHON_VERSION} idna==${IDNA_VERSION} charset-normalizer==${CHARSET_NORMALIZER_VERSION} urllib3==${URLLIB3_VERSION} certifi==${CERTIFI_VERSION} requests==${REQUESTS_VERSION} zstandard==${ZSTANDARD_VERSION} --no-binary :all:
|
||||
INSTALL_COMMAND ${PYTHON_BINARY} -m pip install ${SITE_PACKAGES_EXTRA} cython==${CYTHON_VERSION} idna==${IDNA_VERSION} chardet==${CHARDET_VERSION} urllib3==${URLLIB3_VERSION} certifi==${CERTIFI_VERSION} requests==${REQUESTS_VERSION} --no-binary :all:
|
||||
)
|
||||
|
||||
if(USE_PIP_NUMPY)
|
||||
|
@@ -189,11 +189,11 @@ set(OSL_HASH 1abd7ce40481771a9fa937f19595d2f2)
|
||||
set(OSL_HASH_TYPE MD5)
|
||||
set(OSL_FILE OpenShadingLanguage-${OSL_VERSION}.tar.gz)
|
||||
|
||||
set(PYTHON_VERSION 3.9.7)
|
||||
set(PYTHON_VERSION 3.9.2)
|
||||
set(PYTHON_SHORT_VERSION 3.9)
|
||||
set(PYTHON_SHORT_VERSION_NO_DOTS 39)
|
||||
set(PYTHON_URI https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tar.xz)
|
||||
set(PYTHON_HASH fddb060b483bc01850a3f412eea1d954)
|
||||
set(PYTHON_HASH f0dc9000312abeb16de4eccce9a870ab)
|
||||
set(PYTHON_HASH_TYPE MD5)
|
||||
set(PYTHON_FILE Python-${PYTHON_VERSION}.tar.xz)
|
||||
|
||||
@@ -215,18 +215,17 @@ set(NANOVDB_HASH e7b9e863ec2f3b04ead171dec2322807)
|
||||
set(NANOVDB_HASH_TYPE MD5)
|
||||
set(NANOVDB_FILE nano-vdb-${NANOVDB_GIT_UID}.tar.gz)
|
||||
|
||||
set(IDNA_VERSION 3.2)
|
||||
set(CHARSET_NORMALIZER_VERSION 2.0.6)
|
||||
set(URLLIB3_VERSION 1.26.7)
|
||||
set(CERTIFI_VERSION 2021.10.8)
|
||||
set(REQUESTS_VERSION 2.26.0)
|
||||
set(CYTHON_VERSION 0.29.24)
|
||||
set(ZSTANDARD_VERSION 0.15.2 )
|
||||
set(IDNA_VERSION 2.10)
|
||||
set(CHARDET_VERSION 4.0.0)
|
||||
set(URLLIB3_VERSION 1.26.3)
|
||||
set(CERTIFI_VERSION 2020.12.5)
|
||||
set(REQUESTS_VERSION 2.25.1)
|
||||
set(CYTHON_VERSION 0.29.21)
|
||||
|
||||
set(NUMPY_VERSION 1.21.2)
|
||||
set(NUMPY_SHORT_VERSION 1.21)
|
||||
set(NUMPY_VERSION 1.19.5)
|
||||
set(NUMPY_SHORT_VERSION 1.19)
|
||||
set(NUMPY_URI https://github.com/numpy/numpy/releases/download/v${NUMPY_VERSION}/numpy-${NUMPY_VERSION}.zip)
|
||||
set(NUMPY_HASH 5638d5dae3ca387be562912312db842e)
|
||||
set(NUMPY_HASH f6a1b48717c552bbc18f1adc3cc1fe0e)
|
||||
set(NUMPY_HASH_TYPE MD5)
|
||||
set(NUMPY_FILE numpy-${NUMPY_VERSION}.zip)
|
||||
|
||||
|
@@ -379,7 +379,7 @@ USE_CXX11=true
|
||||
CLANG_FORMAT_VERSION_MIN="6.0"
|
||||
CLANG_FORMAT_VERSION_MAX="10.0"
|
||||
|
||||
PYTHON_VERSION="3.9.7"
|
||||
PYTHON_VERSION="3.9.2"
|
||||
PYTHON_VERSION_SHORT="3.9"
|
||||
PYTHON_VERSION_MIN="3.7"
|
||||
PYTHON_VERSION_MAX="3.11"
|
||||
@@ -389,24 +389,24 @@ PYTHON_FORCE_REBUILD=false
|
||||
PYTHON_SKIP=false
|
||||
|
||||
# Additional Python modules.
|
||||
PYTHON_IDNA_VERSION="3.2"
|
||||
PYTHON_IDNA_VERSION="2.9"
|
||||
PYTHON_IDNA_VERSION_MIN="2.0"
|
||||
PYTHON_IDNA_VERSION_MAX="3.2"
|
||||
PYTHON_IDNA_VERSION_MAX="3.0"
|
||||
PYTHON_IDNA_NAME="idna"
|
||||
|
||||
PYTHON_CHARSET_NORMALIZER_VERSION="2.0.6"
|
||||
PYTHON_CHARSET_NORMALIZER_VERSION_MIN="2.0.6"
|
||||
PYTHON_CHARSET_NORMALIZER_VERSION_MAX="2.1.0" # requests uses `charset_normalizer~=2.0.0`
|
||||
PYTHON_CHARSET_NORMALIZER_NAME="charset-normalizer"
|
||||
PYTHON_CHARDET_VERSION="3.0.4"
|
||||
PYTHON_CHARDET_VERSION_MIN="3.0"
|
||||
PYTHON_CHARDET_VERSION_MAX="5.0"
|
||||
PYTHON_CHARDET_NAME="chardet"
|
||||
|
||||
PYTHON_URLLIB3_VERSION="1.26.7"
|
||||
PYTHON_URLLIB3_VERSION="1.25.9"
|
||||
PYTHON_URLLIB3_VERSION_MIN="1.0"
|
||||
PYTHON_URLLIB3_VERSION_MAX="2.0"
|
||||
PYTHON_URLLIB3_NAME="urllib3"
|
||||
|
||||
PYTHON_CERTIFI_VERSION="2021.10.8"
|
||||
PYTHON_CERTIFI_VERSION_MIN="2021.0"
|
||||
PYTHON_CERTIFI_VERSION_MAX="2023.0"
|
||||
PYTHON_CERTIFI_VERSION="2020.4.5.2"
|
||||
PYTHON_CERTIFI_VERSION_MIN="2020.0"
|
||||
PYTHON_CERTIFI_VERSION_MAX="2021.0"
|
||||
PYTHON_CERTIFI_NAME="certifi"
|
||||
|
||||
PYTHON_REQUESTS_VERSION="2.23.0"
|
||||
@@ -414,12 +414,7 @@ PYTHON_REQUESTS_VERSION_MIN="2.0"
|
||||
PYTHON_REQUESTS_VERSION_MAX="3.0"
|
||||
PYTHON_REQUESTS_NAME="requests"
|
||||
|
||||
PYTHON_ZSTANDARD_VERSION="0.15.2"
|
||||
PYTHON_ZSTANDARD_VERSION_MIN="0.15.2"
|
||||
PYTHON_ZSTANDARD_VERSION_MAX="0.16.0"
|
||||
PYTHON_ZSTANDARD_NAME="zstandard"
|
||||
|
||||
PYTHON_NUMPY_VERSION="1.21.2"
|
||||
PYTHON_NUMPY_VERSION="1.19.5"
|
||||
PYTHON_NUMPY_VERSION_MIN="1.14"
|
||||
PYTHON_NUMPY_VERSION_MAX="2.0"
|
||||
PYTHON_NUMPY_NAME="numpy"
|
||||
@@ -427,22 +422,20 @@ PYTHON_NUMPY_NAME="numpy"
|
||||
# As package-ready parameters (only used with distro packages).
|
||||
PYTHON_MODULES_PACKAGES=(
|
||||
"$PYTHON_IDNA_NAME $PYTHON_IDNA_VERSION_MIN $PYTHON_IDNA_VERSION_MAX"
|
||||
"$PYTHON_CHARSET_NORMALIZER_NAME $PYTHON_CHARSET_NORMALIZER_VERSION_MIN $PYTHON_CHARSET_NORMALIZER_VERSION_MAX"
|
||||
"$PYTHON_CHARDET_NAME $PYTHON_CHARDET_VERSION_MIN $PYTHON_CHARDET_VERSION_MAX"
|
||||
"$PYTHON_URLLIB3_NAME $PYTHON_URLLIB3_VERSION_MIN $PYTHON_URLLIB3_VERSION_MAX"
|
||||
"$PYTHON_CERTIFI_NAME $PYTHON_CERTIFI_VERSION_MIN $PYTHON_CERTIFI_VERSION_MAX"
|
||||
"$PYTHON_REQUESTS_NAME $PYTHON_REQUESTS_VERSION_MIN $PYTHON_REQUESTS_VERSION_MAX"
|
||||
"$PYTHON_ZSTANDARD_NAME $PYTHON_ZSTANDARD_VERSION_MIN $PYTHON_ZSTANDARD_VERSION_MAX"
|
||||
"$PYTHON_NUMPY_NAME $PYTHON_NUMPY_VERSION_MIN $PYTHON_NUMPY_VERSION_MAX"
|
||||
)
|
||||
|
||||
# As pip-ready parameters (only used when building python).
|
||||
PYTHON_MODULES_PIP=(
|
||||
"$PYTHON_IDNA_NAME==$PYTHON_IDNA_VERSION"
|
||||
"$PYTHON_CHARSET_NORMALIZER_NAME==$PYTHON_CHARSET_NORMALIZER_VERSION"
|
||||
"$PYTHON_CHARDET_NAME==$PYTHON_CHARDET_VERSION"
|
||||
"$PYTHON_URLLIB3_NAME==$PYTHON_URLLIB3_VERSION"
|
||||
"$PYTHON_CERTIFI_NAME==$PYTHON_CERTIFI_VERSION"
|
||||
"$PYTHON_REQUESTS_NAME==$PYTHON_REQUESTS_VERSION"
|
||||
"$PYTHON_ZSTANDARD_NAME==$PYTHON_ZSTANDARD_VERSION"
|
||||
"$PYTHON_NUMPY_NAME==$PYTHON_NUMPY_VERSION"
|
||||
)
|
||||
|
||||
@@ -1148,11 +1141,10 @@ You may also want to build them yourself (optional ones are [between brackets]):
|
||||
|
||||
* Python $PYTHON_VERSION (from $PYTHON_SOURCE).
|
||||
** [IDNA $PYTHON_IDNA_VERSION] (use pip).
|
||||
** [Chardet $PYTHON_CHARSET_NORMALIZER_VERSION] (use pip).
|
||||
** [Chardet $PYTHON_CHARDET_VERSION] (use pip).
|
||||
** [Urllib3 $PYTHON_URLLIB3_VERSION] (use pip).
|
||||
** [Certifi $PYTHON_CERTIFI_VERSION] (use pip).
|
||||
** [Requests $PYTHON_REQUESTS_VERSION] (use pip).
|
||||
** [ZStandard $PYTHON_ZSTANDARD_VERSION] (use pip).
|
||||
** [NumPy $PYTHON_NUMPY_VERSION] (use pip).
|
||||
* Boost $BOOST_VERSION (from $BOOST_SOURCE, modules: $BOOST_BUILD_MODULES).
|
||||
* TBB $TBB_VERSION (from $TBB_SOURCE).
|
||||
@@ -2021,7 +2013,7 @@ compile_OIIO() {
|
||||
fi
|
||||
|
||||
# To be changed each time we make edits that would modify the compiled result!
|
||||
oiio_magic=18
|
||||
oiio_magic=17
|
||||
_init_oiio
|
||||
|
||||
# Force having own builds for the dependencies.
|
||||
@@ -2096,7 +2088,6 @@ compile_OIIO() {
|
||||
cmake_d="$cmake_d -D USE_PYTHON=OFF"
|
||||
cmake_d="$cmake_d -D USE_FFMPEG=OFF"
|
||||
cmake_d="$cmake_d -D USE_OPENCV=OFF"
|
||||
cmake_d="$cmake_d -D USE_OPENVDB=OFF"
|
||||
cmake_d="$cmake_d -D BUILD_TESTING=OFF"
|
||||
cmake_d="$cmake_d -D OIIO_BUILD_TESTS=OFF"
|
||||
cmake_d="$cmake_d -D OIIO_BUILD_TOOLS=OFF"
|
||||
|
@@ -70,18 +70,16 @@
|
||||
}
|
||||
--- a/libavcodec/rl.c
|
||||
+++ b/libavcodec/rl.c
|
||||
@@ -71,17 +71,19 @@
|
||||
@@ -71,7 +71,7 @@ av_cold void ff_rl_init(RLTable *rl,
|
||||
av_cold void ff_rl_init_vlc(RLTable *rl, unsigned static_size)
|
||||
{
|
||||
int i, q;
|
||||
- VLC_TYPE table[1500][2] = {{0}};
|
||||
+ VLC_TYPE (*table)[2] = av_calloc(sizeof(VLC_TYPE), 1500 * 2);
|
||||
VLC vlc = { .table = table, .table_allocated = static_size };
|
||||
- av_assert0(static_size <= FF_ARRAY_ELEMS(table));
|
||||
+ av_assert0(static_size < 1500);
|
||||
av_assert0(static_size <= FF_ARRAY_ELEMS(table));
|
||||
init_vlc(&vlc, 9, rl->n + 1, &rl->table_vlc[0][1], 4, 2, &rl->table_vlc[0][0], 4, 2, INIT_VLC_USE_NEW_STATIC);
|
||||
|
||||
for (q = 0; q < 32; q++) {
|
||||
@@ -80,8 +80,10 @@ av_cold void ff_rl_init_vlc(RLTable *rl, unsigned static_size)
|
||||
int qmul = q * 2;
|
||||
int qadd = (q - 1) | 1;
|
||||
|
||||
@@ -93,7 +91,7 @@
|
||||
|
||||
if (q == 0) {
|
||||
qmul = 1;
|
||||
@@ -113,4 +115,5 @@
|
||||
@@ -113,4 +115,5 @@ av_cold void ff_rl_init_vlc(RLTable *rl, unsigned static_size)
|
||||
rl->rl_vlc[q][i].run = run;
|
||||
}
|
||||
}
|
||||
|
27
build_files/build_environment/patches/numpy.diff
Normal file
27
build_files/build_environment/patches/numpy.diff
Normal file
@@ -0,0 +1,27 @@
|
||||
diff --git a/numpy/distutils/system_info.py b/numpy/distutils/system_info.py
|
||||
index ba2b1f4..b10f7df 100644
|
||||
--- a/numpy/distutils/system_info.py
|
||||
+++ b/numpy/distutils/system_info.py
|
||||
@@ -2164,8 +2164,8 @@ class accelerate_info(system_info):
|
||||
'accelerate' in libraries):
|
||||
if intel:
|
||||
args.extend(['-msse3'])
|
||||
- else:
|
||||
- args.extend(['-faltivec'])
|
||||
+# else:
|
||||
+# args.extend(['-faltivec'])
|
||||
args.extend([
|
||||
'-I/System/Library/Frameworks/vecLib.framework/Headers'])
|
||||
link_args.extend(['-Wl,-framework', '-Wl,Accelerate'])
|
||||
@@ -2174,8 +2174,8 @@ class accelerate_info(system_info):
|
||||
'veclib' in libraries):
|
||||
if intel:
|
||||
args.extend(['-msse3'])
|
||||
- else:
|
||||
- args.extend(['-faltivec'])
|
||||
+# else:
|
||||
+# args.extend(['-faltivec'])
|
||||
args.extend([
|
||||
'-I/System/Library/Frameworks/vecLib.framework/Headers'])
|
||||
link_args.extend(['-Wl,-framework', '-Wl,vecLib'])
|
||||
|
@@ -1,81 +0,0 @@
|
||||
# - Find HIP compiler
|
||||
#
|
||||
# This module defines
|
||||
# HIP_HIPCC_EXECUTABLE, the full path to the hipcc executable
|
||||
# HIP_VERSION, the HIP compiler version
|
||||
#
|
||||
# HIP_FOUND, if the HIP toolkit is found.
|
||||
|
||||
#=============================================================================
|
||||
# Copyright 2021 Blender Foundation.
|
||||
#
|
||||
# Distributed under the OSI-approved BSD 3-Clause License,
|
||||
# see accompanying file BSD-3-Clause-license.txt for details.
|
||||
#=============================================================================
|
||||
|
||||
# If HIP_ROOT_DIR was defined in the environment, use it.
|
||||
if(NOT HIP_ROOT_DIR AND NOT $ENV{HIP_ROOT_DIR} STREQUAL "")
|
||||
set(HIP_ROOT_DIR $ENV{HIP_ROOT_DIR})
|
||||
endif()
|
||||
|
||||
set(_hip_SEARCH_DIRS
|
||||
${HIP_ROOT_DIR}
|
||||
)
|
||||
|
||||
find_program(HIP_HIPCC_EXECUTABLE
|
||||
NAMES
|
||||
hipcc
|
||||
HINTS
|
||||
${_hip_SEARCH_DIRS}
|
||||
PATH_SUFFIXES
|
||||
bin
|
||||
)
|
||||
|
||||
if(HIP_HIPCC_EXECUTABLE AND NOT EXISTS ${HIP_HIPCC_EXECUTABLE})
|
||||
message(WARNING "Cached or directly specified hipcc executable does not exist.")
|
||||
set(HIP_FOUND FALSE)
|
||||
elseif(HIP_HIPCC_EXECUTABLE)
|
||||
set(HIP_FOUND TRUE)
|
||||
|
||||
set(HIP_VERSION_MAJOR 0)
|
||||
set(HIP_VERSION_MINOR 0)
|
||||
set(HIP_VERSION_PATCH 0)
|
||||
|
||||
# Get version from the output.
|
||||
execute_process(COMMAND ${HIP_HIPCC_EXECUTABLE} --version
|
||||
OUTPUT_VARIABLE HIP_VERSION_RAW
|
||||
ERROR_QUIET
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
# Parse parts.
|
||||
if(HIP_VERSION_RAW MATCHES "HIP version: .*")
|
||||
# Strip the HIP prefix and get list of individual version components.
|
||||
string(REGEX REPLACE
|
||||
".*HIP version: ([.0-9]+).*" "\\1"
|
||||
HIP_SEMANTIC_VERSION "${HIP_VERSION_RAW}")
|
||||
string(REPLACE "." ";" HIP_VERSION_PARTS "${HIP_SEMANTIC_VERSION}")
|
||||
list(LENGTH HIP_VERSION_PARTS NUM_HIP_VERSION_PARTS)
|
||||
|
||||
# Extract components into corresponding variables.
|
||||
if(NUM_HIP_VERSION_PARTS GREATER 0)
|
||||
list(GET HIP_VERSION_PARTS 0 HIP_VERSION_MAJOR)
|
||||
endif()
|
||||
if(NUM_HIP_VERSION_PARTS GREATER 1)
|
||||
list(GET HIP_VERSION_PARTS 1 HIP_VERSION_MINOR)
|
||||
endif()
|
||||
if(NUM_HIP_VERSION_PARTS GREATER 2)
|
||||
list(GET HIP_VERSION_PARTS 2 HIP_VERSION_PATCH)
|
||||
endif()
|
||||
|
||||
# Unset temp variables.
|
||||
unset(NUM_HIP_VERSION_PARTS)
|
||||
unset(HIP_SEMANTIC_VERSION)
|
||||
unset(HIP_VERSION_PARTS)
|
||||
endif()
|
||||
|
||||
# Construct full semantic version.
|
||||
set(HIP_VERSION "${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}.${HIP_VERSION_PATCH}")
|
||||
unset(HIP_VERSION_RAW)
|
||||
else()
|
||||
set(HIP_FOUND FALSE)
|
||||
endif()
|
@@ -24,7 +24,6 @@ import project_source_info
|
||||
import subprocess
|
||||
import sys
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
from typing import (
|
||||
Any,
|
||||
@@ -36,6 +35,7 @@ USE_QUIET = (os.environ.get("QUIET", None) is not None)
|
||||
|
||||
CHECKER_IGNORE_PREFIX = [
|
||||
"extern",
|
||||
"intern/moto",
|
||||
]
|
||||
|
||||
CHECKER_BIN = "cppcheck"
|
||||
@@ -47,19 +47,13 @@ CHECKER_ARGS = [
|
||||
"--max-configs=1", # speeds up execution
|
||||
# "--check-config", # when includes are missing
|
||||
"--enable=all", # if you want sixty hundred pedantic suggestions
|
||||
|
||||
# Quiet output, otherwise all defines/includes are printed (overly verbose).
|
||||
# Only enable this for troubleshooting (if defines are not set as expected for example).
|
||||
"--quiet",
|
||||
|
||||
# NOTE: `--cppcheck-build-dir=<dir>` is added later as a temporary directory.
|
||||
]
|
||||
|
||||
if USE_QUIET:
|
||||
CHECKER_ARGS.append("--quiet")
|
||||
|
||||
|
||||
def cppcheck() -> None:
|
||||
def main() -> None:
|
||||
source_info = project_source_info.build_info(ignore_prefix_list=CHECKER_IGNORE_PREFIX)
|
||||
source_defines = project_source_info.build_defines_as_args()
|
||||
|
||||
@@ -84,10 +78,7 @@ def cppcheck() -> None:
|
||||
percent_str = "[" + ("%.2f]" % percent).rjust(7) + " %:"
|
||||
|
||||
sys.stdout.flush()
|
||||
sys.stdout.write("%s %s\n" % (
|
||||
percent_str,
|
||||
os.path.relpath(c, project_source_info.SOURCE_DIR)
|
||||
))
|
||||
sys.stdout.write("%s " % percent_str)
|
||||
|
||||
return subprocess.Popen(cmd)
|
||||
|
||||
@@ -99,11 +90,5 @@ def cppcheck() -> None:
|
||||
print("Finished!")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
CHECKER_ARGS.append("--cppcheck-build-dir=" + temp_dir)
|
||||
cppcheck()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
@@ -243,9 +243,7 @@ def build_defines_as_args() -> List[str]:
|
||||
# use this module.
|
||||
def queue_processes(
|
||||
process_funcs: Sequence[Tuple[Callable[..., subprocess.Popen[Any]], Tuple[Any, ...]]],
|
||||
*,
|
||||
job_total: int =-1,
|
||||
sleep: float = 0.1,
|
||||
) -> None:
|
||||
""" Takes a list of function arg pairs, each function must return a process
|
||||
"""
|
||||
@@ -273,20 +271,14 @@ def queue_processes(
|
||||
|
||||
if len(processes) <= job_total:
|
||||
break
|
||||
time.sleep(sleep)
|
||||
else:
|
||||
time.sleep(0.1)
|
||||
|
||||
sys.stdout.flush()
|
||||
sys.stderr.flush()
|
||||
|
||||
processes.append(func(*args))
|
||||
|
||||
# Don't return until all jobs have finished.
|
||||
while 1:
|
||||
processes[:] = [p for p in processes if p.poll() is None]
|
||||
if not processes:
|
||||
break
|
||||
time.sleep(sleep)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if not os.path.exists(join(CMAKE_DIR, "CMakeCache.txt")):
|
||||
|
@@ -1,40 +0,0 @@
|
||||
"""
|
||||
This method enables conversions between Local and Pose space for bones in
|
||||
the middle of updating the armature without having to update dependencies
|
||||
after each change, by manually carrying updated matrices in a recursive walk.
|
||||
"""
|
||||
|
||||
def set_pose_matrices(obj, matrix_map):
|
||||
"Assign pose space matrices of all bones at once, ignoring constraints."
|
||||
|
||||
def rec(pbone, parent_matrix):
|
||||
matrix = matrix_map[pbone.name]
|
||||
|
||||
## Instead of:
|
||||
# pbone.matrix = matrix
|
||||
# bpy.context.view_layer.update()
|
||||
|
||||
# Compute and assign local matrix, using the new parent matrix
|
||||
if pbone.parent:
|
||||
pbone.matrix_basis = pbone.bone.convert_local_to_pose(
|
||||
matrix,
|
||||
pbone.bone.matrix_local,
|
||||
parent_matrix=parent_matrix,
|
||||
parent_matrix_local=pbone.parent.bone.matrix_local,
|
||||
invert=True
|
||||
)
|
||||
else:
|
||||
pbone.matrix_basis = pbone.bone.convert_local_to_pose(
|
||||
matrix,
|
||||
pbone.bone.matrix_local,
|
||||
invert=True
|
||||
)
|
||||
|
||||
# Recursively process children, passing the new matrix through
|
||||
for child in pbone.children:
|
||||
rec(child, matrix)
|
||||
|
||||
# Scan all bone trees from their roots
|
||||
for pbone in obj.pose.bones:
|
||||
if not pbone.parent:
|
||||
rec(pbone, None)
|
@@ -1101,7 +1101,6 @@ context_type_map = {
|
||||
"scene": ("Scene", False),
|
||||
"sculpt_object": ("Object", False),
|
||||
"selectable_objects": ("Object", True),
|
||||
"selected_asset_files": ("FileSelectEntry", True),
|
||||
"selected_bones": ("EditBone", True),
|
||||
"selected_editable_bones": ("EditBone", True),
|
||||
"selected_editable_fcurves": ("FCurve", True),
|
||||
|
5
extern/CMakeLists.txt
vendored
5
extern/CMakeLists.txt
vendored
@@ -67,12 +67,9 @@ endif()
|
||||
|
||||
if(WITH_CYCLES OR WITH_COMPOSITOR OR WITH_OPENSUBDIV)
|
||||
add_subdirectory(clew)
|
||||
if((WITH_CYCLES_DEVICE_CUDA OR WITH_CYCLES_DEVICE_OPTIX) AND WITH_CUDA_DYNLOAD)
|
||||
if(WITH_CUDA_DYNLOAD)
|
||||
add_subdirectory(cuew)
|
||||
endif()
|
||||
if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
|
||||
add_subdirectory(hipew)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WITH_GHOST_X11 AND WITH_GHOST_XDND)
|
||||
|
38
extern/audaspace/bindings/C/AUD_Sound.cpp
vendored
38
extern/audaspace/bindings/C/AUD_Sound.cpp
vendored
@@ -102,30 +102,26 @@ AUD_API int AUD_Sound_getFileStreams(AUD_Sound* sound, AUD_StreamInfo **stream_i
|
||||
|
||||
if(file)
|
||||
{
|
||||
try
|
||||
{
|
||||
auto streams = file->queryStreams();
|
||||
|
||||
size_t size = sizeof(AUD_StreamInfo) * streams.size();
|
||||
|
||||
if(!size)
|
||||
{
|
||||
*stream_infos = nullptr;
|
||||
return 0;
|
||||
}
|
||||
|
||||
*stream_infos = reinterpret_cast<AUD_StreamInfo*>(std::malloc(size));
|
||||
std::memcpy(*stream_infos, streams.data(), size);
|
||||
|
||||
return streams.size();
|
||||
}
|
||||
catch(Exception&)
|
||||
auto streams = file->queryStreams();
|
||||
|
||||
size_t size = sizeof(AUD_StreamInfo) * streams.size();
|
||||
|
||||
if(!size)
|
||||
{
|
||||
*stream_infos = nullptr;
|
||||
return 0;
|
||||
}
|
||||
|
||||
*stream_infos = reinterpret_cast<AUD_StreamInfo*>(std::malloc(size));
|
||||
std::memcpy(*stream_infos, streams.data(), size);
|
||||
|
||||
return streams.size();
|
||||
}
|
||||
else
|
||||
{
|
||||
*stream_infos = nullptr;
|
||||
return 0;
|
||||
}
|
||||
|
||||
*stream_infos = nullptr;
|
||||
return 0;
|
||||
}
|
||||
|
||||
AUD_API sample_t* AUD_Sound_data(AUD_Sound* sound, int* length, AUD_Specs* specs)
|
||||
|
1
extern/cuew/include/cuew.h
vendored
1
extern/cuew/include/cuew.h
vendored
@@ -609,7 +609,6 @@ typedef enum cudaError_enum {
|
||||
CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219,
|
||||
CUDA_ERROR_NVLINK_UNCORRECTABLE = 220,
|
||||
CUDA_ERROR_JIT_COMPILER_NOT_FOUND = 221,
|
||||
CUDA_ERROR_UNSUPPORTED_PTX_VERSION = 222,
|
||||
CUDA_ERROR_INVALID_SOURCE = 300,
|
||||
CUDA_ERROR_FILE_NOT_FOUND = 301,
|
||||
CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
|
||||
|
1
extern/cuew/src/cuew.c
vendored
1
extern/cuew/src/cuew.c
vendored
@@ -736,7 +736,6 @@ const char *cuewErrorString(CUresult result) {
|
||||
case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT: return "Invalid graphics context";
|
||||
case CUDA_ERROR_NVLINK_UNCORRECTABLE: return "Nvlink uncorrectable";
|
||||
case CUDA_ERROR_JIT_COMPILER_NOT_FOUND: return "Jit compiler not found";
|
||||
case CUDA_ERROR_UNSUPPORTED_PTX_VERSION: return "Unsupported PTX version";
|
||||
case CUDA_ERROR_INVALID_SOURCE: return "Invalid source";
|
||||
case CUDA_ERROR_FILE_NOT_FOUND: return "File not found";
|
||||
case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: return "Link to a shared object failed to resolve";
|
||||
|
39
extern/hipew/CMakeLists.txt
vendored
39
extern/hipew/CMakeLists.txt
vendored
@@ -1,39 +0,0 @@
|
||||
# ***** BEGIN GPL LICENSE BLOCK *****
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License
|
||||
# as published by the Free Software Foundation; either version 2
|
||||
# of the License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software Foundation,
|
||||
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
#
|
||||
# The Original Code is Copyright (C) 2021, Blender Foundation
|
||||
# All rights reserved.
|
||||
# ***** END GPL LICENSE BLOCK *****
|
||||
|
||||
set(INC
|
||||
.
|
||||
include
|
||||
)
|
||||
|
||||
set(INC_SYS
|
||||
|
||||
)
|
||||
|
||||
set(SRC
|
||||
src/hipew.c
|
||||
|
||||
include/hipew.h
|
||||
)
|
||||
|
||||
set(LIB
|
||||
)
|
||||
|
||||
blender_add_lib(extern_hipew "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
|
1207
extern/hipew/include/hipew.h
vendored
1207
extern/hipew/include/hipew.h
vendored
File diff suppressed because it is too large
Load Diff
533
extern/hipew/src/hipew.c
vendored
533
extern/hipew/src/hipew.c
vendored
@@ -1,533 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011-2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License
|
||||
*/
|
||||
#ifdef _MSC_VER
|
||||
# if _MSC_VER < 1900
|
||||
# define snprintf _snprintf
|
||||
# endif
|
||||
# define popen _popen
|
||||
# define pclose _pclose
|
||||
# define _CRT_SECURE_NO_WARNINGS
|
||||
#endif
|
||||
|
||||
#include <hipew.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
# define WIN32_LEAN_AND_MEAN
|
||||
# define VC_EXTRALEAN
|
||||
# include <windows.h>
|
||||
|
||||
/* Utility macros. */
|
||||
|
||||
typedef HMODULE DynamicLibrary;
|
||||
|
||||
# define dynamic_library_open(path) LoadLibraryA(path)
|
||||
# define dynamic_library_close(lib) FreeLibrary(lib)
|
||||
# define dynamic_library_find(lib, symbol) GetProcAddress(lib, symbol)
|
||||
#else
|
||||
# include <dlfcn.h>
|
||||
|
||||
typedef void* DynamicLibrary;
|
||||
|
||||
# define dynamic_library_open(path) dlopen(path, RTLD_NOW)
|
||||
# define dynamic_library_close(lib) dlclose(lib)
|
||||
# define dynamic_library_find(lib, symbol) dlsym(lib, symbol)
|
||||
#endif
|
||||
|
||||
#define _LIBRARY_FIND_CHECKED(lib, name) \
|
||||
name = (t##name *)dynamic_library_find(lib, #name); \
|
||||
assert(name);
|
||||
|
||||
#define _LIBRARY_FIND(lib, name) \
|
||||
name = (t##name *)dynamic_library_find(lib, #name);
|
||||
|
||||
#define HIP_LIBRARY_FIND_CHECKED(name) \
|
||||
_LIBRARY_FIND_CHECKED(hip_lib, name)
|
||||
#define HIP_LIBRARY_FIND(name) _LIBRARY_FIND(hip_lib, name)
|
||||
|
||||
|
||||
static DynamicLibrary hip_lib;
|
||||
|
||||
/* Function definitions. */
|
||||
thipGetErrorName *hipGetErrorName;
|
||||
thipInit *hipInit;
|
||||
thipDriverGetVersion *hipDriverGetVersion;
|
||||
thipGetDevice *hipGetDevice;
|
||||
thipGetDeviceCount *hipGetDeviceCount;
|
||||
thipDeviceGetName *hipDeviceGetName;
|
||||
thipDeviceGetAttribute *hipDeviceGetAttribute;
|
||||
thipDeviceComputeCapability *hipDeviceComputeCapability;
|
||||
thipDevicePrimaryCtxRetain *hipDevicePrimaryCtxRetain;
|
||||
thipDevicePrimaryCtxRelease *hipDevicePrimaryCtxRelease;
|
||||
thipDevicePrimaryCtxSetFlags *hipDevicePrimaryCtxSetFlags;
|
||||
thipDevicePrimaryCtxGetState *hipDevicePrimaryCtxGetState;
|
||||
thipDevicePrimaryCtxReset *hipDevicePrimaryCtxReset;
|
||||
thipCtxCreate *hipCtxCreate;
|
||||
thipCtxDestroy *hipCtxDestroy;
|
||||
thipCtxPushCurrent *hipCtxPushCurrent;
|
||||
thipCtxPopCurrent *hipCtxPopCurrent;
|
||||
thipCtxSetCurrent *hipCtxSetCurrent;
|
||||
thipCtxGetCurrent *hipCtxGetCurrent;
|
||||
thipCtxGetDevice *hipCtxGetDevice;
|
||||
thipCtxGetFlags *hipCtxGetFlags;
|
||||
thipCtxSynchronize *hipCtxSynchronize;
|
||||
thipDeviceSynchronize *hipDeviceSynchronize;
|
||||
thipCtxGetCacheConfig *hipCtxGetCacheConfig;
|
||||
thipCtxSetCacheConfig *hipCtxSetCacheConfig;
|
||||
thipCtxGetSharedMemConfig *hipCtxGetSharedMemConfig;
|
||||
thipCtxSetSharedMemConfig *hipCtxSetSharedMemConfig;
|
||||
thipCtxGetApiVersion *hipCtxGetApiVersion;
|
||||
thipModuleLoad *hipModuleLoad;
|
||||
thipModuleLoadData *hipModuleLoadData;
|
||||
thipModuleLoadDataEx *hipModuleLoadDataEx;
|
||||
thipModuleUnload *hipModuleUnload;
|
||||
thipModuleGetFunction *hipModuleGetFunction;
|
||||
thipModuleGetGlobal *hipModuleGetGlobal;
|
||||
thipModuleGetTexRef *hipModuleGetTexRef;
|
||||
thipMemGetInfo *hipMemGetInfo;
|
||||
thipMalloc *hipMalloc;
|
||||
thipMemAllocPitch *hipMemAllocPitch;
|
||||
thipFree *hipFree;
|
||||
thipMemGetAddressRange *hipMemGetAddressRange;
|
||||
thipHostMalloc *hipHostMalloc;
|
||||
thipHostFree *hipHostFree;
|
||||
thipHostGetDevicePointer *hipHostGetDevicePointer;
|
||||
thipHostGetFlags *hipHostGetFlags;
|
||||
thipMallocManaged *hipMallocManaged;
|
||||
thipDeviceGetByPCIBusId *hipDeviceGetByPCIBusId;
|
||||
thipDeviceGetPCIBusId *hipDeviceGetPCIBusId;
|
||||
thipMemcpyPeer *hipMemcpyPeer;
|
||||
thipMemcpyHtoD *hipMemcpyHtoD;
|
||||
thipMemcpyDtoH *hipMemcpyDtoH;
|
||||
thipMemcpyDtoD *hipMemcpyDtoD;
|
||||
thipDrvMemcpy2DUnaligned *hipDrvMemcpy2DUnaligned;
|
||||
thipMemcpyParam2D *hipMemcpyParam2D;
|
||||
thipDrvMemcpy3D *hipDrvMemcpy3D;
|
||||
thipMemcpyHtoDAsync *hipMemcpyHtoDAsync;
|
||||
thipMemcpyDtoHAsync *hipMemcpyDtoHAsync;
|
||||
thipMemcpyParam2DAsync *hipMemcpyParam2DAsync;
|
||||
thipDrvMemcpy3DAsync *hipDrvMemcpy3DAsync;
|
||||
thipMemsetD8 *hipMemsetD8;
|
||||
thipMemsetD16 *hipMemsetD16;
|
||||
thipMemsetD32 *hipMemsetD32;
|
||||
thipMemsetD8Async *hipMemsetD8Async;
|
||||
thipMemsetD16Async *hipMemsetD16Async;
|
||||
thipMemsetD32Async *hipMemsetD32Async;
|
||||
thipArrayCreate *hipArrayCreate;
|
||||
thipArrayDestroy *hipArrayDestroy;
|
||||
thipArray3DCreate *hipArray3DCreate;
|
||||
thipStreamCreateWithFlags *hipStreamCreateWithFlags;
|
||||
thipStreamCreateWithPriority *hipStreamCreateWithPriority;
|
||||
thipStreamGetPriority *hipStreamGetPriority;
|
||||
thipStreamGetFlags *hipStreamGetFlags;
|
||||
thipStreamWaitEvent *hipStreamWaitEvent;
|
||||
thipStreamAddCallback *hipStreamAddCallback;
|
||||
thipStreamQuery *hipStreamQuery;
|
||||
thipStreamSynchronize *hipStreamSynchronize;
|
||||
thipStreamDestroy *hipStreamDestroy;
|
||||
thipEventCreateWithFlags *hipEventCreateWithFlags;
|
||||
thipEventRecord *hipEventRecord;
|
||||
thipEventQuery *hipEventQuery;
|
||||
thipEventSynchronize *hipEventSynchronize;
|
||||
thipEventDestroy *hipEventDestroy;
|
||||
thipEventElapsedTime *hipEventElapsedTime;
|
||||
thipFuncGetAttribute *hipFuncGetAttribute;
|
||||
thipFuncSetCacheConfig *hipFuncSetCacheConfig;
|
||||
thipModuleLaunchKernel *hipModuleLaunchKernel;
|
||||
thipDrvOccupancyMaxActiveBlocksPerMultiprocessor *hipDrvOccupancyMaxActiveBlocksPerMultiprocessor;
|
||||
thipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags *hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
|
||||
thipModuleOccupancyMaxPotentialBlockSize *hipModuleOccupancyMaxPotentialBlockSize;
|
||||
thipTexRefSetArray *hipTexRefSetArray;
|
||||
thipTexRefSetAddress *hipTexRefSetAddress;
|
||||
thipTexRefSetAddress2D *hipTexRefSetAddress2D;
|
||||
thipTexRefSetFormat *hipTexRefSetFormat;
|
||||
thipTexRefSetAddressMode *hipTexRefSetAddressMode;
|
||||
thipTexRefSetFilterMode *hipTexRefSetFilterMode;
|
||||
thipTexRefSetFlags *hipTexRefSetFlags;
|
||||
thipTexRefGetAddress *hipTexRefGetAddress;
|
||||
thipTexRefGetArray *hipTexRefGetArray;
|
||||
thipTexRefGetAddressMode *hipTexRefGetAddressMode;
|
||||
thipTexObjectCreate *hipTexObjectCreate;
|
||||
thipTexObjectDestroy *hipTexObjectDestroy;
|
||||
thipDeviceCanAccessPeer *hipDeviceCanAccessPeer;
|
||||
|
||||
thipCtxEnablePeerAccess *hipCtxEnablePeerAccess;
|
||||
thipCtxDisablePeerAccess *hipCtxDisablePeerAccess;
|
||||
thipDeviceGetP2PAttribute *hipDeviceGetP2PAttribute;
|
||||
thipGraphicsUnregisterResource *hipGraphicsUnregisterResource;
|
||||
thipGraphicsMapResources *hipGraphicsMapResources;
|
||||
thipGraphicsUnmapResources *hipGraphicsUnmapResources;
|
||||
thipGraphicsResourceGetMappedPointer *hipGraphicsResourceGetMappedPointer;
|
||||
|
||||
thipGraphicsGLRegisterBuffer *hipGraphicsGLRegisterBuffer;
|
||||
thipGLGetDevices *hipGLGetDevices;
|
||||
|
||||
|
||||
|
||||
static DynamicLibrary dynamic_library_open_find(const char **paths) {
|
||||
int i = 0;
|
||||
while (paths[i] != NULL) {
|
||||
DynamicLibrary lib = dynamic_library_open(paths[i]);
|
||||
if (lib != NULL) {
|
||||
return lib;
|
||||
}
|
||||
++i;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Implementation function. */
|
||||
static void hipewHipExit(void) {
|
||||
if (hip_lib != NULL) {
|
||||
/* Ignore errors. */
|
||||
dynamic_library_close(hip_lib);
|
||||
hip_lib = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static int hipewHipInit(void) {
|
||||
/* Library paths. */
|
||||
#ifdef _WIN32
|
||||
/* Expected in c:/windows/system or similar, no path needed. */
|
||||
const char *hip_paths[] = {"amdhip64.dll", NULL};
|
||||
#elif defined(__APPLE__)
|
||||
/* Default installation path. */
|
||||
const char *hip_paths[] = {"", NULL};
|
||||
#else
|
||||
const char *hip_paths[] = {"/opt/rocm/hip/lib/libamdhip64.so", NULL};
|
||||
#endif
|
||||
static int initialized = 0;
|
||||
static int result = 0;
|
||||
int error, driver_version;
|
||||
|
||||
if (initialized) {
|
||||
return result;
|
||||
}
|
||||
|
||||
initialized = 1;
|
||||
|
||||
error = atexit(hipewHipExit);
|
||||
if (error) {
|
||||
result = HIPEW_ERROR_ATEXIT_FAILED;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Load library. */
|
||||
hip_lib = dynamic_library_open_find(hip_paths);
|
||||
|
||||
if (hip_lib == NULL) {
|
||||
result = HIPEW_ERROR_OPEN_FAILED;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Fetch all function pointers. */
|
||||
HIP_LIBRARY_FIND_CHECKED(hipGetErrorName);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipInit);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipDriverGetVersion);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipGetDevice);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipGetDeviceCount);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetName);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetAttribute);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipDeviceComputeCapability);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxRetain);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxRelease);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxSetFlags);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxGetState);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxReset);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipCtxCreate);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipCtxDestroy);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipCtxPushCurrent);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipCtxPopCurrent);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipCtxSetCurrent);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipCtxGetCurrent);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipCtxGetDevice);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipCtxGetFlags);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipCtxSynchronize);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipDeviceSynchronize);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipCtxGetCacheConfig);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipCtxSetCacheConfig);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipCtxGetSharedMemConfig);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipCtxSetSharedMemConfig);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipCtxGetApiVersion);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipModuleLoad);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipModuleLoadData);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipModuleLoadDataEx);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipModuleUnload);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipModuleGetFunction);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipModuleGetGlobal);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipModuleGetTexRef);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipMemGetInfo);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipMalloc);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipMemAllocPitch);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipFree);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipMemGetAddressRange);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipHostMalloc);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipHostFree);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipHostGetDevicePointer);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipHostGetFlags);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipMallocManaged);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetByPCIBusId);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetPCIBusId);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipMemcpyPeer);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipMemcpyHtoD);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipMemcpyDtoH);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipMemcpyDtoD);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipMemcpyParam2D);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipDrvMemcpy3D);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipMemcpyHtoDAsync);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipMemcpyDtoHAsync);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipDrvMemcpy2DUnaligned);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipMemcpyParam2DAsync);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipDrvMemcpy3DAsync);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipMemsetD8);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipMemsetD16);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipMemsetD32);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipMemsetD8Async);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipMemsetD16Async);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipMemsetD32Async);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipArrayCreate);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipArrayDestroy);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipArray3DCreate);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipStreamCreateWithFlags);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipStreamCreateWithPriority);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipStreamGetPriority);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipStreamGetFlags);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipStreamWaitEvent);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipStreamAddCallback);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipStreamQuery);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipStreamSynchronize);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipStreamDestroy);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipEventCreateWithFlags);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipEventRecord);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipEventQuery);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipEventSynchronize);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipEventDestroy);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipEventElapsedTime);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipFuncGetAttribute);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipFuncSetCacheConfig);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipModuleLaunchKernel);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipModuleOccupancyMaxPotentialBlockSize);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipTexRefSetArray);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipTexRefSetAddress);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipTexRefSetAddress2D);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipTexRefSetFormat);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipTexRefSetAddressMode);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipTexRefSetFilterMode);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipTexRefSetFlags);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipTexRefGetAddress);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipTexRefGetAddressMode);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipTexObjectCreate);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipTexObjectDestroy);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipDeviceCanAccessPeer);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipCtxEnablePeerAccess);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipCtxDisablePeerAccess);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetP2PAttribute);
|
||||
#ifdef _WIN32
|
||||
HIP_LIBRARY_FIND_CHECKED(hipGraphicsUnregisterResource);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipGraphicsMapResources);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipGraphicsUnmapResources);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipGraphicsResourceGetMappedPointer);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipGraphicsGLRegisterBuffer);
|
||||
HIP_LIBRARY_FIND_CHECKED(hipGLGetDevices);
|
||||
#endif
|
||||
result = HIPEW_SUCCESS;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int hipewInit(hipuint32_t flags) {
|
||||
int result = HIPEW_SUCCESS;
|
||||
|
||||
if (flags & HIPEW_INIT_HIP) {
|
||||
result = hipewHipInit();
|
||||
if (result != HIPEW_SUCCESS) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
const char *hipewErrorString(hipError_t result) {
|
||||
switch (result) {
|
||||
case hipSuccess: return "No errors";
|
||||
case hipErrorInvalidValue: return "Invalid value";
|
||||
case hipErrorOutOfMemory: return "Out of memory";
|
||||
case hipErrorNotInitialized: return "Driver not initialized";
|
||||
case hipErrorDeinitialized: return "Driver deinitialized";
|
||||
case hipErrorProfilerDisabled: return "Profiler disabled";
|
||||
case hipErrorProfilerNotInitialized: return "Profiler not initialized";
|
||||
case hipErrorProfilerAlreadyStarted: return "Profiler already started";
|
||||
case hipErrorProfilerAlreadyStopped: return "Profiler already stopped";
|
||||
case hipErrorNoDevice: return "No HIP-capable device available";
|
||||
case hipErrorInvalidDevice: return "Invalid device";
|
||||
case hipErrorInvalidImage: return "Invalid kernel image";
|
||||
case hipErrorInvalidContext: return "Invalid context";
|
||||
case hipErrorContextAlreadyCurrent: return "Context already current";
|
||||
case hipErrorMapFailed: return "Map failed";
|
||||
case hipErrorUnmapFailed: return "Unmap failed";
|
||||
case hipErrorArrayIsMapped: return "Array is mapped";
|
||||
case hipErrorAlreadyMapped: return "Already mapped";
|
||||
case hipErrorNoBinaryForGpu: return "No binary for GPU";
|
||||
case hipErrorAlreadyAcquired: return "Already acquired";
|
||||
case hipErrorNotMapped: return "Not mapped";
|
||||
case hipErrorNotMappedAsArray: return "Mapped resource not available for access as an array";
|
||||
case hipErrorNotMappedAsPointer: return "Mapped resource not available for access as a pointer";
|
||||
case hipErrorECCNotCorrectable: return "Uncorrectable ECC error detected";
|
||||
case hipErrorUnsupportedLimit: return "hipLimit_t not supported by device";
|
||||
case hipErrorContextAlreadyInUse: return "Context already in use";
|
||||
case hipErrorPeerAccessUnsupported: return "Peer access unsupported";
|
||||
case hipErrorInvalidKernelFile: return "Invalid ptx";
|
||||
case hipErrorInvalidGraphicsContext: return "Invalid graphics context";
|
||||
case hipErrorInvalidSource: return "Invalid source";
|
||||
case hipErrorFileNotFound: return "File not found";
|
||||
case hipErrorSharedObjectSymbolNotFound: return "Link to a shared object failed to resolve";
|
||||
case hipErrorSharedObjectInitFailed: return "Shared object initialization failed";
|
||||
case hipErrorOperatingSystem: return "Operating system";
|
||||
case hipErrorInvalidHandle: return "Invalid handle";
|
||||
case hipErrorNotFound: return "Not found";
|
||||
case hipErrorNotReady: return "HIP not ready";
|
||||
case hipErrorIllegalAddress: return "Illegal address";
|
||||
case hipErrorLaunchOutOfResources: return "Launch exceeded resources";
|
||||
case hipErrorLaunchTimeOut: return "Launch exceeded timeout";
|
||||
case hipErrorPeerAccessAlreadyEnabled: return "Peer access already enabled";
|
||||
case hipErrorPeerAccessNotEnabled: return "Peer access not enabled";
|
||||
case hipErrorSetOnActiveProcess: return "Primary context active";
|
||||
case hipErrorAssert: return "Assert";
|
||||
case hipErrorHostMemoryAlreadyRegistered: return "Host memory already registered";
|
||||
case hipErrorHostMemoryNotRegistered: return "Host memory not registered";
|
||||
case hipErrorLaunchFailure: return "Launch failed";
|
||||
case hipErrorCooperativeLaunchTooLarge: return "Cooperative launch too large";
|
||||
case hipErrorNotSupported: return "Not supported";
|
||||
case hipErrorUnknown: return "Unknown error";
|
||||
default: return "Unknown HIP error value";
|
||||
}
|
||||
}
|
||||
|
||||
static void path_join(const char *path1,
|
||||
const char *path2,
|
||||
int maxlen,
|
||||
char *result) {
|
||||
#if defined(WIN32) || defined(_WIN32)
|
||||
const char separator = '\\';
|
||||
#else
|
||||
const char separator = '/';
|
||||
#endif
|
||||
int n = snprintf(result, maxlen, "%s%c%s", path1, separator, path2);
|
||||
if (n != -1 && n < maxlen) {
|
||||
result[n] = '\0';
|
||||
}
|
||||
else {
|
||||
result[maxlen - 1] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
static int path_exists(const char *path) {
|
||||
struct stat st;
|
||||
if (stat(path, &st)) {
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
const char *hipewCompilerPath(void) {
|
||||
#ifdef _WIN32
|
||||
const char *hipPath = getenv("HIP_ROCCLR_HOME");
|
||||
const char *windowsCommand = "perl ";
|
||||
const char *executable = "bin/hipcc";
|
||||
|
||||
static char hipcc[65536];
|
||||
static char finalCommand[65536];
|
||||
if(hipPath) {
|
||||
path_join(hipPath, executable, sizeof(hipcc), hipcc);
|
||||
if(path_exists(hipcc)) {
|
||||
snprintf(finalCommand, sizeof(hipcc), "%s %s", windowsCommand, hipcc);
|
||||
return finalCommand;
|
||||
} else {
|
||||
printf("Could not find hipcc. Make sure HIP_ROCCLR_HOME points to the directory holding /bin/hipcc");
|
||||
}
|
||||
}
|
||||
#else
|
||||
const char *hipPath = "opt/rocm/hip/bin";
|
||||
const char *executable = "hipcc";
|
||||
|
||||
static char hipcc[65536];
|
||||
if(hipPath) {
|
||||
path_join(hipPath, executable, sizeof(hipcc), hipcc);
|
||||
if(path_exists(hipcc)){
|
||||
return hipcc;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
{
|
||||
#ifdef _WIN32
|
||||
FILE *handle = popen("where hipcc", "r");
|
||||
#else
|
||||
FILE *handle = popen("which hipcc", "r");
|
||||
#endif
|
||||
if (handle) {
|
||||
char buffer[4096] = {0};
|
||||
int len = fread(buffer, 1, sizeof(buffer) - 1, handle);
|
||||
buffer[len] = '\0';
|
||||
pclose(handle);
|
||||
if (buffer[0]) {
|
||||
return "hipcc";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int hipewCompilerVersion(void) {
|
||||
const char *path = hipewCompilerPath();
|
||||
const char *marker = "Hip compilation tools, release ";
|
||||
FILE *pipe;
|
||||
int major, minor;
|
||||
char *versionstr;
|
||||
char buf[128];
|
||||
char output[65536] = "\0";
|
||||
char command[65536] = "\0";
|
||||
|
||||
if (path == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* get --version output */
|
||||
strcat(command, "\"");
|
||||
strncat(command, path, sizeof(command) - 1);
|
||||
strncat(command, "\" --version", sizeof(command) - strlen(path) - 1);
|
||||
pipe = popen(command, "r");
|
||||
if (!pipe) {
|
||||
fprintf(stderr, "HIP: failed to run compiler to retrieve version");
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (!feof(pipe)) {
|
||||
if (fgets(buf, sizeof(buf), pipe) != NULL) {
|
||||
strncat(output, buf, sizeof(output) - strlen(output) - 1);
|
||||
}
|
||||
}
|
||||
|
||||
pclose(pipe);
|
||||
return 40;
|
||||
}
|
2
extern/smaa_areatex/smaa_areatex.cpp
vendored
2
extern/smaa_areatex/smaa_areatex.cpp
vendored
@@ -574,8 +574,6 @@ Dbl2 AreaDiag::area(Dbl2 p1, Dbl2 p2, int left)
|
||||
Dbl2 d = p2 - p1;
|
||||
if (d.x == 0.0)
|
||||
return Dbl2(0.0, 1.0);
|
||||
if (d.y == 0.0)
|
||||
return Dbl2(1.0, 0.0);
|
||||
|
||||
double x1 = (double)(1 + left);
|
||||
double x2 = x1 + 1.0;
|
||||
|
3
extern/tinygltf/README.blender
vendored
3
extern/tinygltf/README.blender
vendored
@@ -2,5 +2,4 @@ Project: TinyGLTF
|
||||
URL: https://github.com/syoyo/tinygltf
|
||||
License: MIT
|
||||
Upstream version: 2.5.0, 19a41d20ec0
|
||||
Local modifications:
|
||||
* Silence "enum value not handled in switch" warnings due to JSON dependency.
|
||||
Local modifications: None
|
||||
|
1
extern/tinygltf/tiny_gltf.h
vendored
1
extern/tinygltf/tiny_gltf.h
vendored
@@ -3201,7 +3201,6 @@ static bool ParseJsonAsValue(Value *ret, const json &o) {
|
||||
val = Value(o.get<double>());
|
||||
break;
|
||||
case json::value_t::null:
|
||||
case json::value_t::binary:
|
||||
case json::value_t::discarded:
|
||||
// default:
|
||||
break;
|
||||
|
@@ -297,7 +297,6 @@ endif()
|
||||
|
||||
if(WITH_CYCLES_STANDALONE)
|
||||
set(WITH_CYCLES_DEVICE_CUDA TRUE)
|
||||
set(WITH_CYCLES_DEVICE_HIP TRUE)
|
||||
endif()
|
||||
# TODO(sergey): Consider removing it, only causes confusion in interface.
|
||||
set(WITH_CYCLES_DEVICE_MULTI TRUE)
|
||||
|
@@ -64,8 +64,6 @@ if(WITH_CYCLES_STANDALONE)
|
||||
cycles_standalone.cpp
|
||||
cycles_xml.cpp
|
||||
cycles_xml.h
|
||||
oiio_output_driver.cpp
|
||||
oiio_output_driver.h
|
||||
)
|
||||
add_executable(cycles ${SRC} ${INC} ${INC_SYS})
|
||||
unset(SRC)
|
||||
@@ -75,7 +73,7 @@ if(WITH_CYCLES_STANDALONE)
|
||||
|
||||
if(APPLE)
|
||||
if(WITH_OPENCOLORIO)
|
||||
set_property(TARGET cycles APPEND_STRING PROPERTY LINK_FLAGS " -framework IOKit -framework Carbon")
|
||||
set_property(TARGET cycles APPEND_STRING PROPERTY LINK_FLAGS " -framework IOKit")
|
||||
endif()
|
||||
if(WITH_OPENIMAGEDENOISE AND "${CMAKE_OSX_ARCHITECTURES}" STREQUAL "arm64")
|
||||
# OpenImageDenoise uses BNNS from the Accelerate framework.
|
||||
|
@@ -36,9 +36,6 @@
|
||||
#include "util/util_unique_ptr.h"
|
||||
#include "util/util_version.h"
|
||||
|
||||
#include "app/cycles_xml.h"
|
||||
#include "app/oiio_output_driver.h"
|
||||
|
||||
#ifdef WITH_CYCLES_STANDALONE_GUI
|
||||
# include "util/util_view.h"
|
||||
#endif
|
||||
@@ -56,8 +53,7 @@ struct Options {
|
||||
SessionParams session_params;
|
||||
bool quiet;
|
||||
bool show_help, interactive, pause;
|
||||
string output_filepath;
|
||||
string output_pass;
|
||||
string output_path;
|
||||
} options;
|
||||
|
||||
static void session_print(const string &str)
|
||||
@@ -93,6 +89,30 @@ static void session_print_status()
|
||||
session_print(status);
|
||||
}
|
||||
|
||||
static bool write_render(const uchar *pixels, int w, int h, int channels)
|
||||
{
|
||||
string msg = string_printf("Writing image %s", options.output_path.c_str());
|
||||
session_print(msg);
|
||||
|
||||
unique_ptr<ImageOutput> out = unique_ptr<ImageOutput>(ImageOutput::create(options.output_path));
|
||||
if (!out) {
|
||||
return false;
|
||||
}
|
||||
|
||||
ImageSpec spec(w, h, channels, TypeDesc::UINT8);
|
||||
if (!out->open(options.output_path, spec)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* conversion for different top/bottom convention */
|
||||
out->write_image(
|
||||
TypeDesc::UINT8, pixels + (h - 1) * w * channels, AutoStride, -w * channels, AutoStride);
|
||||
|
||||
out->close();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static BufferParams &session_buffer_params()
|
||||
{
|
||||
static BufferParams buffer_params;
|
||||
@@ -127,14 +147,9 @@ static void scene_init()
|
||||
|
||||
static void session_init()
|
||||
{
|
||||
options.output_pass = "combined";
|
||||
options.session_params.write_render_cb = write_render;
|
||||
options.session = new Session(options.session_params, options.scene_params);
|
||||
|
||||
if (!options.output_filepath.empty()) {
|
||||
options.session->set_output_driver(make_unique<OIIOOutputDriver>(
|
||||
options.output_filepath, options.output_pass, session_print));
|
||||
}
|
||||
|
||||
if (options.session_params.background && !options.quiet)
|
||||
options.session->progress.set_update_callback(function_bind(&session_print_status));
|
||||
#ifdef WITH_CYCLES_STANDALONE_GUI
|
||||
@@ -145,12 +160,7 @@ static void session_init()
|
||||
/* load scene */
|
||||
scene_init();
|
||||
|
||||
/* add pass for output. */
|
||||
Pass *pass = options.scene->create_node<Pass>();
|
||||
pass->set_name(ustring(options.output_pass.c_str()));
|
||||
pass->set_type(PASS_COMBINED);
|
||||
|
||||
options.session->reset(options.session_params, session_buffer_params());
|
||||
options.session->reset(session_buffer_params(), options.session_params.samples);
|
||||
options.session->start();
|
||||
}
|
||||
|
||||
@@ -212,7 +222,9 @@ static void display_info(Progress &progress)
|
||||
|
||||
static void display()
|
||||
{
|
||||
options.session->draw();
|
||||
static DeviceDrawParams draw_params = DeviceDrawParams();
|
||||
|
||||
options.session->draw(session_buffer_params(), draw_params);
|
||||
|
||||
display_info(options.session->progress);
|
||||
}
|
||||
@@ -242,7 +254,7 @@ static void motion(int x, int y, int button)
|
||||
options.session->scene->camera->need_flags_update = true;
|
||||
options.session->scene->camera->need_device_update = true;
|
||||
|
||||
options.session->reset(options.session_params, session_buffer_params());
|
||||
options.session->reset(session_buffer_params(), options.session_params.samples);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -259,7 +271,7 @@ static void resize(int width, int height)
|
||||
options.session->scene->camera->need_flags_update = true;
|
||||
options.session->scene->camera->need_device_update = true;
|
||||
|
||||
options.session->reset(options.session_params, session_buffer_params());
|
||||
options.session->reset(session_buffer_params(), options.session_params.samples);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -271,7 +283,7 @@ static void keyboard(unsigned char key)
|
||||
|
||||
/* Reset */
|
||||
else if (key == 'r')
|
||||
options.session->reset(options.session_params, session_buffer_params());
|
||||
options.session->reset(session_buffer_params(), options.session_params.samples);
|
||||
|
||||
/* Cancel */
|
||||
else if (key == 27) // escape
|
||||
@@ -308,7 +320,7 @@ static void keyboard(unsigned char key)
|
||||
options.session->scene->camera->need_flags_update = true;
|
||||
options.session->scene->camera->need_device_update = true;
|
||||
|
||||
options.session->reset(options.session_params, session_buffer_params());
|
||||
options.session->reset(session_buffer_params(), options.session_params.samples);
|
||||
}
|
||||
|
||||
/* Set Max Bounces */
|
||||
@@ -334,7 +346,7 @@ static void keyboard(unsigned char key)
|
||||
|
||||
options.session->scene->integrator->set_max_bounce(bounce);
|
||||
|
||||
options.session->reset(options.session_params, session_buffer_params());
|
||||
options.session->reset(session_buffer_params(), options.session_params.samples);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -349,13 +361,11 @@ static int files_parse(int argc, const char *argv[])
|
||||
|
||||
static void options_parse(int argc, const char **argv)
|
||||
{
|
||||
options.width = 1024;
|
||||
options.height = 512;
|
||||
options.width = 0;
|
||||
options.height = 0;
|
||||
options.filepath = "";
|
||||
options.session = NULL;
|
||||
options.quiet = false;
|
||||
options.session_params.use_auto_tile = false;
|
||||
options.session_params.tile_size = 0;
|
||||
|
||||
/* device names */
|
||||
string device_names = "";
|
||||
@@ -401,7 +411,7 @@ static void options_parse(int argc, const char **argv)
|
||||
&options.session_params.samples,
|
||||
"Number of samples to render",
|
||||
"--output %s",
|
||||
&options.output_filepath,
|
||||
&options.output_path,
|
||||
"File path to write output image",
|
||||
"--threads %d",
|
||||
&options.session_params.threads,
|
||||
@@ -412,9 +422,12 @@ static void options_parse(int argc, const char **argv)
|
||||
"--height %d",
|
||||
&options.height,
|
||||
"Window height in pixel",
|
||||
"--tile-size %d",
|
||||
&options.session_params.tile_size,
|
||||
"Tile size in pixels",
|
||||
"--tile-width %d",
|
||||
&options.session_params.tile_size.x,
|
||||
"Tile width in pixels",
|
||||
"--tile-height %d",
|
||||
&options.session_params.tile_size.y,
|
||||
"Tile height in pixels",
|
||||
"--list-devices",
|
||||
&list,
|
||||
"List information about all available devices",
|
||||
@@ -476,9 +489,8 @@ static void options_parse(int argc, const char **argv)
|
||||
options.session_params.background = true;
|
||||
#endif
|
||||
|
||||
if (options.session_params.tile_size > 0) {
|
||||
options.session_params.use_auto_tile = true;
|
||||
}
|
||||
/* Use progressive rendering */
|
||||
options.session_params.progressive = true;
|
||||
|
||||
/* find matching device */
|
||||
DeviceType device_type = Device::type_from_string(devicename.c_str());
|
||||
|
@@ -333,7 +333,6 @@ static void xml_read_shader_graph(XMLReadState &state, Shader *shader, xml_node
|
||||
}
|
||||
|
||||
snode = (ShaderNode *)node_type->create(node_type);
|
||||
snode->set_owner(graph);
|
||||
}
|
||||
|
||||
xml_read_node(graph_reader, snode, node);
|
||||
|
@@ -1,71 +0,0 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "app/oiio_output_driver.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
OIIOOutputDriver::OIIOOutputDriver(const string_view filepath,
|
||||
const string_view pass,
|
||||
LogFunction log)
|
||||
: filepath_(filepath), pass_(pass), log_(log)
|
||||
{
|
||||
}
|
||||
|
||||
OIIOOutputDriver::~OIIOOutputDriver()
|
||||
{
|
||||
}
|
||||
|
||||
void OIIOOutputDriver::write_render_tile(const Tile &tile)
|
||||
{
|
||||
/* Only write the full buffer, no intermediate tiles. */
|
||||
if (!(tile.size == tile.full_size)) {
|
||||
return;
|
||||
}
|
||||
|
||||
log_(string_printf("Writing image %s", filepath_.c_str()));
|
||||
|
||||
unique_ptr<ImageOutput> image_output(ImageOutput::create(filepath_));
|
||||
if (image_output == nullptr) {
|
||||
log_("Failed to create image file");
|
||||
return;
|
||||
}
|
||||
|
||||
const int width = tile.size.x;
|
||||
const int height = tile.size.y;
|
||||
|
||||
ImageSpec spec(width, height, 4, TypeDesc::FLOAT);
|
||||
if (!image_output->open(filepath_, spec)) {
|
||||
log_("Failed to create image file");
|
||||
return;
|
||||
}
|
||||
|
||||
vector<float> pixels(width * height * 4);
|
||||
if (!tile.get_pass_pixels(pass_, 4, pixels.data())) {
|
||||
log_("Failed to read render pass pixels");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Manipulate offset and stride to convert from bottom-up to top-down convention. */
|
||||
image_output->write_image(TypeDesc::FLOAT,
|
||||
pixels.data() + (height - 1) * width * 4,
|
||||
AutoStride,
|
||||
-width * 4 * sizeof(float),
|
||||
AutoStride);
|
||||
image_output->close();
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
@@ -1,42 +0,0 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "render/output_driver.h"
|
||||
|
||||
#include "util/util_function.h"
|
||||
#include "util/util_image.h"
|
||||
#include "util/util_string.h"
|
||||
#include "util/util_unique_ptr.h"
|
||||
#include "util/util_vector.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class OIIOOutputDriver : public OutputDriver {
|
||||
public:
|
||||
typedef function<void(const string &)> LogFunction;
|
||||
|
||||
OIIOOutputDriver(const string_view filepath, const string_view pass, LogFunction log);
|
||||
virtual ~OIIOOutputDriver();
|
||||
|
||||
void write_render_tile(const Tile &tile) override;
|
||||
|
||||
protected:
|
||||
string filepath_;
|
||||
string pass_;
|
||||
LogFunction log_;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
@@ -31,14 +31,13 @@ set(INC_SYS
|
||||
set(SRC
|
||||
blender_camera.cpp
|
||||
blender_device.cpp
|
||||
blender_display_driver.cpp
|
||||
blender_image.cpp
|
||||
blender_geometry.cpp
|
||||
blender_gpu_display.cpp
|
||||
blender_light.cpp
|
||||
blender_mesh.cpp
|
||||
blender_object.cpp
|
||||
blender_object_cull.cpp
|
||||
blender_output_driver.cpp
|
||||
blender_particles.cpp
|
||||
blender_curves.cpp
|
||||
blender_logging.cpp
|
||||
@@ -52,11 +51,10 @@ set(SRC
|
||||
|
||||
CCL_api.h
|
||||
blender_device.h
|
||||
blender_display_driver.h
|
||||
blender_gpu_display.h
|
||||
blender_id_map.h
|
||||
blender_image.h
|
||||
blender_object_cull.h
|
||||
blender_output_driver.h
|
||||
blender_sync.h
|
||||
blender_session.h
|
||||
blender_texture.h
|
||||
@@ -97,9 +95,6 @@ set(ADDON_FILES
|
||||
|
||||
add_definitions(${GL_DEFINITIONS})
|
||||
|
||||
if(WITH_CYCLES_DEVICE_HIP)
|
||||
add_definitions(-DWITH_HIP)
|
||||
endif()
|
||||
if(WITH_MOD_FLUID)
|
||||
add_definitions(-DWITH_FLUID)
|
||||
endif()
|
||||
|
@@ -28,7 +28,7 @@ def _configure_argument_parser():
|
||||
action='store_true')
|
||||
parser.add_argument("--cycles-device",
|
||||
help="Set the device to use for Cycles, overriding user preferences and the scene setting."
|
||||
"Valid options are 'CPU', 'CUDA', 'OPTIX', or 'HIP'"
|
||||
"Valid options are 'CPU', 'CUDA' or 'OPTIX'."
|
||||
"Additionally, you can append '+CPU' to any GPU type for hybrid rendering.",
|
||||
default=None)
|
||||
return parser
|
||||
@@ -211,6 +211,7 @@ def list_render_passes(scene, srl):
|
||||
if crl.use_pass_shadow_catcher: yield ("Shadow Catcher", "RGB", 'COLOR')
|
||||
|
||||
# Debug passes.
|
||||
if crl.pass_debug_render_time: yield ("Debug Render Time", "X", 'VALUE')
|
||||
if crl.pass_debug_sample_count: yield ("Debug Sample Count", "X", 'VALUE')
|
||||
|
||||
# Cryptomatte passes.
|
||||
|
@@ -111,7 +111,6 @@ enum_device_type = (
|
||||
('CPU', "CPU", "CPU", 0),
|
||||
('CUDA', "CUDA", "CUDA", 1),
|
||||
('OPTIX', "OptiX", "OptiX", 3),
|
||||
("HIP", "HIP", "HIP", 4)
|
||||
)
|
||||
|
||||
enum_texture_limit = (
|
||||
@@ -124,7 +123,7 @@ enum_texture_limit = (
|
||||
('4096', "4096", "Limit texture size to 4096 pixels", 6),
|
||||
('8192', "8192", "Limit texture size to 8192 pixels", 7),
|
||||
)
|
||||
|
||||
|
||||
# NOTE: Identifiers are expected to be an upper case version of identifiers from `Pass::get_type_enum()`
|
||||
enum_view3d_shading_render_pass = (
|
||||
('', "General", ""),
|
||||
@@ -740,14 +739,14 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
||||
|
||||
use_auto_tile: BoolProperty(
|
||||
name="Auto Tiles",
|
||||
description="Automatically render high resolution images in tiles to reduce memory usage, using the specified tile size. Tiles are cached to disk while rendering to save memory",
|
||||
description="Automatically split image into tiles",
|
||||
default=True,
|
||||
)
|
||||
tile_size: IntProperty(
|
||||
name="Tile Size",
|
||||
default=2048,
|
||||
description="",
|
||||
min=8, max=16384,
|
||||
min=0, max=16384,
|
||||
)
|
||||
|
||||
# Various fine-tuning debug flags
|
||||
@@ -1197,6 +1196,12 @@ class CyclesCurveRenderSettings(bpy.types.PropertyGroup):
|
||||
|
||||
class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
|
||||
|
||||
pass_debug_render_time: BoolProperty(
|
||||
name="Debug Render Time",
|
||||
description="Render time in milliseconds per sample and pixel",
|
||||
default=False,
|
||||
update=update_render_passes,
|
||||
)
|
||||
pass_debug_sample_count: BoolProperty(
|
||||
name="Debug Sample Count",
|
||||
description="Number of samples/camera rays per pixel",
|
||||
@@ -1261,16 +1266,12 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
|
||||
def get_device_types(self, context):
|
||||
import _cycles
|
||||
has_cuda, has_optix, has_hip = _cycles.get_device_types()
|
||||
|
||||
has_cuda, has_optix = _cycles.get_device_types()
|
||||
list = [('NONE', "None", "Don't use compute device", 0)]
|
||||
if has_cuda:
|
||||
list.append(('CUDA', "CUDA", "Use CUDA for GPU acceleration", 1))
|
||||
if has_optix:
|
||||
list.append(('OPTIX', "OptiX", "Use OptiX for GPU acceleration", 3))
|
||||
if has_hip:
|
||||
list.append(('HIP', "HIP", "Use HIP for GPU acceleration", 4))
|
||||
|
||||
return list
|
||||
|
||||
compute_device_type: EnumProperty(
|
||||
@@ -1295,7 +1296,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
|
||||
def update_device_entries(self, device_list):
|
||||
for device in device_list:
|
||||
if not device[1] in {'CUDA', 'OPTIX', 'CPU', 'HIP'}:
|
||||
if not device[1] in {'CUDA', 'OPTIX', 'CPU'}:
|
||||
continue
|
||||
# Try to find existing Device entry
|
||||
entry = self.find_existing_device_entry(device)
|
||||
@@ -1329,7 +1330,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
elif entry.type == 'CPU':
|
||||
cpu_devices.append(entry)
|
||||
# Extend all GPU devices with CPU.
|
||||
if compute_device_type != 'CPU' and compute_device_type != 'HIP':
|
||||
if compute_device_type != 'CPU':
|
||||
devices.extend(cpu_devices)
|
||||
return devices
|
||||
|
||||
@@ -1339,7 +1340,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
import _cycles
|
||||
# Ensure `self.devices` is not re-allocated when the second call to
|
||||
# get_devices_for_type is made, freeing items from the first list.
|
||||
for device_type in ('CUDA', 'OPTIX', 'HIP'):
|
||||
for device_type in ('CUDA', 'OPTIX', 'OPENCL'):
|
||||
self.update_device_entries(_cycles.available_devices(device_type))
|
||||
|
||||
# Deprecated: use refresh_devices instead.
|
||||
|
@@ -99,11 +99,6 @@ def use_cuda(context):
|
||||
return (get_device_type(context) == 'CUDA' and cscene.device == 'GPU')
|
||||
|
||||
|
||||
def use_hip(context):
|
||||
cscene = context.scene.cycles
|
||||
|
||||
return (get_device_type(context) == 'HIP' and cscene.device == 'GPU')
|
||||
|
||||
def use_optix(context):
|
||||
cscene = context.scene.cycles
|
||||
|
||||
@@ -618,8 +613,8 @@ class CYCLES_RENDER_PT_performance_threads(CyclesButtonsPanel, Panel):
|
||||
sub.prop(rd, "threads")
|
||||
|
||||
|
||||
class CYCLES_RENDER_PT_performance_memory(CyclesButtonsPanel, Panel):
|
||||
bl_label = "Memory"
|
||||
class CYCLES_RENDER_PT_performance_tiles(CyclesButtonsPanel, Panel):
|
||||
bl_label = "Tiles"
|
||||
bl_parent_id = "CYCLES_RENDER_PT_performance"
|
||||
|
||||
def draw(self, context):
|
||||
@@ -792,6 +787,7 @@ class CYCLES_RENDER_PT_passes_data(CyclesButtonsPanel, Panel):
|
||||
col.prop(view_layer, "use_pass_material_index")
|
||||
|
||||
col = layout.column(heading="Debug", align=True)
|
||||
col.prop(cycles_view_layer, "pass_debug_render_time", text="Render Time")
|
||||
col.prop(cycles_view_layer, "pass_debug_sample_count", text="Sample Count")
|
||||
|
||||
layout.prop(view_layer, "pass_alpha_threshold")
|
||||
@@ -2111,7 +2107,7 @@ classes = (
|
||||
CYCLES_RENDER_PT_film_transparency,
|
||||
CYCLES_RENDER_PT_performance,
|
||||
CYCLES_RENDER_PT_performance_threads,
|
||||
CYCLES_RENDER_PT_performance_memory,
|
||||
CYCLES_RENDER_PT_performance_tiles,
|
||||
CYCLES_RENDER_PT_performance_acceleration_structure,
|
||||
CYCLES_RENDER_PT_performance_final_render,
|
||||
CYCLES_RENDER_PT_performance_viewport,
|
||||
|
@@ -927,9 +927,6 @@ BufferParams BlenderSync::get_buffer_params(
|
||||
params.height = height;
|
||||
}
|
||||
|
||||
params.window_width = params.width;
|
||||
params.window_height = params.height;
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
|
@@ -283,13 +283,10 @@ static void ExportCurveSegments(Scene *scene, Hair *hair, ParticleCurveData *CDa
|
||||
return;
|
||||
|
||||
Attribute *attr_intercept = NULL;
|
||||
Attribute *attr_length = NULL;
|
||||
Attribute *attr_random = NULL;
|
||||
|
||||
if (hair->need_attribute(scene, ATTR_STD_CURVE_INTERCEPT))
|
||||
attr_intercept = hair->attributes.add(ATTR_STD_CURVE_INTERCEPT);
|
||||
if (hair->need_attribute(scene, ATTR_STD_CURVE_LENGTH))
|
||||
attr_length = hair->attributes.add(ATTR_STD_CURVE_LENGTH);
|
||||
if (hair->need_attribute(scene, ATTR_STD_CURVE_RANDOM))
|
||||
attr_random = hair->attributes.add(ATTR_STD_CURVE_RANDOM);
|
||||
|
||||
@@ -339,10 +336,6 @@ static void ExportCurveSegments(Scene *scene, Hair *hair, ParticleCurveData *CDa
|
||||
num_curve_keys++;
|
||||
}
|
||||
|
||||
if (attr_length != NULL) {
|
||||
attr_length->add(CData->curve_length[curve]);
|
||||
}
|
||||
|
||||
if (attr_random != NULL) {
|
||||
attr_random->add(hash_uint2_to_float(num_curves, 0));
|
||||
}
|
||||
@@ -664,15 +657,11 @@ static void export_hair_curves(Scene *scene, Hair *hair, BL::Hair b_hair)
|
||||
|
||||
/* Add requested attributes. */
|
||||
Attribute *attr_intercept = NULL;
|
||||
Attribute *attr_length = NULL;
|
||||
Attribute *attr_random = NULL;
|
||||
|
||||
if (hair->need_attribute(scene, ATTR_STD_CURVE_INTERCEPT)) {
|
||||
attr_intercept = hair->attributes.add(ATTR_STD_CURVE_INTERCEPT);
|
||||
}
|
||||
if (hair->need_attribute(scene, ATTR_STD_CURVE_LENGTH)) {
|
||||
attr_length = hair->attributes.add(ATTR_STD_CURVE_LENGTH);
|
||||
}
|
||||
if (hair->need_attribute(scene, ATTR_STD_CURVE_RANDOM)) {
|
||||
attr_random = hair->attributes.add(ATTR_STD_CURVE_RANDOM);
|
||||
}
|
||||
@@ -725,10 +714,6 @@ static void export_hair_curves(Scene *scene, Hair *hair, BL::Hair b_hair)
|
||||
}
|
||||
}
|
||||
|
||||
if (attr_length) {
|
||||
attr_length->add(length);
|
||||
}
|
||||
|
||||
/* Random number per curve. */
|
||||
if (attr_random != NULL) {
|
||||
attr_random->add(hash_uint2_to_float(b_curve.index(), 0));
|
||||
|
@@ -26,7 +26,6 @@ enum ComputeDevice {
|
||||
COMPUTE_DEVICE_CPU = 0,
|
||||
COMPUTE_DEVICE_CUDA = 1,
|
||||
COMPUTE_DEVICE_OPTIX = 3,
|
||||
COMPUTE_DEVICE_HIP = 4,
|
||||
|
||||
COMPUTE_DEVICE_NUM
|
||||
};
|
||||
@@ -82,9 +81,6 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen
|
||||
else if (compute_device == COMPUTE_DEVICE_OPTIX) {
|
||||
mask |= DEVICE_MASK_OPTIX;
|
||||
}
|
||||
else if (compute_device == COMPUTE_DEVICE_HIP) {
|
||||
mask |= DEVICE_MASK_HIP;
|
||||
}
|
||||
vector<DeviceInfo> devices = Device::available_devices(mask);
|
||||
|
||||
/* Match device preferences and available devices. */
|
||||
|
@@ -80,11 +80,7 @@ Geometry *BlenderSync::sync_geometry(BL::Depsgraph &b_depsgraph,
|
||||
{
|
||||
/* Test if we can instance or if the object is modified. */
|
||||
Geometry::Type geom_type = determine_geom_type(b_ob_info, use_particle_hair);
|
||||
BL::ID b_key_id = (b_ob_info.is_real_object_data() &&
|
||||
BKE_object_is_modified(b_ob_info.real_object)) ?
|
||||
b_ob_info.real_object :
|
||||
b_ob_info.object_data;
|
||||
GeometryKey key(b_key_id.ptr.data, geom_type);
|
||||
GeometryKey key(b_ob_info.object_data, geom_type);
|
||||
|
||||
/* Find shader indices. */
|
||||
array<Node *> used_shaders = find_used_shaders(b_ob_info.iter_object);
|
||||
@@ -114,7 +110,7 @@ Geometry *BlenderSync::sync_geometry(BL::Depsgraph &b_depsgraph,
|
||||
}
|
||||
else {
|
||||
/* Test if we need to update existing geometry. */
|
||||
sync = geometry_map.update(geom, b_key_id);
|
||||
sync = geometry_map.update(geom, b_ob_info.object_data);
|
||||
}
|
||||
|
||||
if (!sync) {
|
||||
|
@@ -14,7 +14,7 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "blender/blender_display_driver.h"
|
||||
#include "blender/blender_gpu_display.h"
|
||||
|
||||
#include "device/device.h"
|
||||
#include "util/util_logging.h"
|
||||
@@ -273,17 +273,17 @@ uint BlenderDisplaySpaceShader::get_shader_program()
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* BlenderDisplayDriver.
|
||||
* BlenderGPUDisplay.
|
||||
*/
|
||||
|
||||
BlenderDisplayDriver::BlenderDisplayDriver(BL::RenderEngine &b_engine, BL::Scene &b_scene)
|
||||
BlenderGPUDisplay::BlenderGPUDisplay(BL::RenderEngine &b_engine, BL::Scene &b_scene)
|
||||
: b_engine_(b_engine), display_shader_(BlenderDisplayShader::create(b_engine, b_scene))
|
||||
{
|
||||
/* Create context while on the main thread. */
|
||||
gl_context_create();
|
||||
}
|
||||
|
||||
BlenderDisplayDriver::~BlenderDisplayDriver()
|
||||
BlenderGPUDisplay::~BlenderGPUDisplay()
|
||||
{
|
||||
gl_resources_destroy();
|
||||
}
|
||||
@@ -292,18 +292,19 @@ BlenderDisplayDriver::~BlenderDisplayDriver()
|
||||
* Update procedure.
|
||||
*/
|
||||
|
||||
bool BlenderDisplayDriver::update_begin(const Params ¶ms,
|
||||
bool BlenderGPUDisplay::do_update_begin(const GPUDisplayParams ¶ms,
|
||||
int texture_width,
|
||||
int texture_height)
|
||||
{
|
||||
/* Note that it's the responsibility of BlenderDisplayDriver to ensure updating and drawing
|
||||
/* Note that it's the responsibility of BlenderGPUDisplay to ensure updating and drawing
|
||||
* the texture does not happen at the same time. This is achieved indirectly.
|
||||
*
|
||||
* When enabling the OpenGL context, it uses an internal mutex lock DST.gl_context_lock.
|
||||
* This same lock is also held when do_draw() is called, which together ensure mutual
|
||||
* exclusion.
|
||||
*
|
||||
* This locking is not performed on the Cycles side, because that would cause lock inversion. */
|
||||
* This locking is not performed at the GPU display level, because that would cause lock
|
||||
* inversion. */
|
||||
if (!gl_context_enable()) {
|
||||
return false;
|
||||
}
|
||||
@@ -360,7 +361,7 @@ bool BlenderDisplayDriver::update_begin(const Params ¶ms,
|
||||
return true;
|
||||
}
|
||||
|
||||
void BlenderDisplayDriver::update_end()
|
||||
void BlenderGPUDisplay::do_update_end()
|
||||
{
|
||||
gl_upload_sync_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
|
||||
glFlush();
|
||||
@@ -368,18 +369,54 @@ void BlenderDisplayDriver::update_end()
|
||||
gl_context_disable();
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* Texture update from CPU buffer.
|
||||
*/
|
||||
|
||||
void BlenderGPUDisplay::do_copy_pixels_to_texture(
|
||||
const half4 *rgba_pixels, int texture_x, int texture_y, int pixels_width, int pixels_height)
|
||||
{
|
||||
/* This call copies pixels to a Pixel Buffer Object (PBO) which is much cheaper from CPU time
|
||||
* point of view than to copy data directly to the OpenGL texture.
|
||||
*
|
||||
* The possible downside of this approach is that it might require a higher peak memory when
|
||||
* doing partial updates of the texture (although, in practice even partial updates might peak
|
||||
* with a full-frame buffer stored on the CPU if the GPU is currently occupied). */
|
||||
|
||||
half4 *mapped_rgba_pixels = map_texture_buffer();
|
||||
if (!mapped_rgba_pixels) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (texture_x == 0 && texture_y == 0 && pixels_width == texture_.width &&
|
||||
pixels_height == texture_.height) {
|
||||
const size_t size_in_bytes = sizeof(half4) * texture_.width * texture_.height;
|
||||
memcpy(mapped_rgba_pixels, rgba_pixels, size_in_bytes);
|
||||
}
|
||||
else {
|
||||
const half4 *rgba_row = rgba_pixels;
|
||||
half4 *mapped_rgba_row = mapped_rgba_pixels + texture_y * texture_.width + texture_x;
|
||||
for (int y = 0; y < pixels_height;
|
||||
++y, rgba_row += pixels_width, mapped_rgba_row += texture_.width) {
|
||||
memcpy(mapped_rgba_row, rgba_row, sizeof(half4) * pixels_width);
|
||||
}
|
||||
}
|
||||
|
||||
unmap_texture_buffer();
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* Texture buffer mapping.
|
||||
*/
|
||||
|
||||
half4 *BlenderDisplayDriver::map_texture_buffer()
|
||||
half4 *BlenderGPUDisplay::do_map_texture_buffer()
|
||||
{
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id);
|
||||
|
||||
half4 *mapped_rgba_pixels = reinterpret_cast<half4 *>(
|
||||
glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY));
|
||||
if (!mapped_rgba_pixels) {
|
||||
LOG(ERROR) << "Error mapping BlenderDisplayDriver pixel buffer object.";
|
||||
LOG(ERROR) << "Error mapping BlenderGPUDisplay pixel buffer object.";
|
||||
}
|
||||
|
||||
if (texture_.need_clear) {
|
||||
@@ -394,7 +431,7 @@ half4 *BlenderDisplayDriver::map_texture_buffer()
|
||||
return mapped_rgba_pixels;
|
||||
}
|
||||
|
||||
void BlenderDisplayDriver::unmap_texture_buffer()
|
||||
void BlenderGPUDisplay::do_unmap_texture_buffer()
|
||||
{
|
||||
glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
|
||||
|
||||
@@ -405,9 +442,9 @@ void BlenderDisplayDriver::unmap_texture_buffer()
|
||||
* Graphics interoperability.
|
||||
*/
|
||||
|
||||
BlenderDisplayDriver::GraphicsInterop BlenderDisplayDriver::graphics_interop_get()
|
||||
DeviceGraphicsInteropDestination BlenderGPUDisplay::do_graphics_interop_get()
|
||||
{
|
||||
GraphicsInterop interop_dst;
|
||||
DeviceGraphicsInteropDestination interop_dst;
|
||||
|
||||
interop_dst.buffer_width = texture_.buffer_width;
|
||||
interop_dst.buffer_height = texture_.buffer_height;
|
||||
@@ -419,12 +456,12 @@ BlenderDisplayDriver::GraphicsInterop BlenderDisplayDriver::graphics_interop_get
|
||||
return interop_dst;
|
||||
}
|
||||
|
||||
void BlenderDisplayDriver::graphics_interop_activate()
|
||||
void BlenderGPUDisplay::graphics_interop_activate()
|
||||
{
|
||||
gl_context_enable();
|
||||
}
|
||||
|
||||
void BlenderDisplayDriver::graphics_interop_deactivate()
|
||||
void BlenderGPUDisplay::graphics_interop_deactivate()
|
||||
{
|
||||
gl_context_disable();
|
||||
}
|
||||
@@ -433,21 +470,27 @@ void BlenderDisplayDriver::graphics_interop_deactivate()
|
||||
* Drawing.
|
||||
*/
|
||||
|
||||
void BlenderDisplayDriver::clear()
|
||||
void BlenderGPUDisplay::clear()
|
||||
{
|
||||
texture_.need_clear = true;
|
||||
}
|
||||
|
||||
void BlenderDisplayDriver::set_zoom(float zoom_x, float zoom_y)
|
||||
void BlenderGPUDisplay::set_zoom(float zoom_x, float zoom_y)
|
||||
{
|
||||
zoom_ = make_float2(zoom_x, zoom_y);
|
||||
}
|
||||
|
||||
void BlenderDisplayDriver::draw(const Params ¶ms)
|
||||
void BlenderGPUDisplay::do_draw(const GPUDisplayParams ¶ms)
|
||||
{
|
||||
/* See do_update_begin() for why no locking is required here. */
|
||||
const bool transparent = true; // TODO(sergey): Derive this from Film.
|
||||
|
||||
if (texture_.need_clear) {
|
||||
/* Texture is requested to be cleared and was not yet cleared.
|
||||
* Do early return which should be equivalent of drawing all-zero texture. */
|
||||
return;
|
||||
}
|
||||
|
||||
if (!gl_draw_resources_ensure()) {
|
||||
return;
|
||||
}
|
||||
@@ -456,16 +499,6 @@ void BlenderDisplayDriver::draw(const Params ¶ms)
|
||||
gl_context_mutex_.lock();
|
||||
}
|
||||
|
||||
if (texture_.need_clear) {
|
||||
/* Texture is requested to be cleared and was not yet cleared.
|
||||
*
|
||||
* Do early return which should be equivalent of drawing all-zero texture.
|
||||
* Watch out for the lock though so that the clear happening during update is properly
|
||||
* synchronized here. */
|
||||
gl_context_mutex_.unlock();
|
||||
return;
|
||||
}
|
||||
|
||||
if (gl_upload_sync_) {
|
||||
glWaitSync((GLsync)gl_upload_sync_, 0, GL_TIMEOUT_IGNORED);
|
||||
}
|
||||
@@ -491,7 +524,7 @@ void BlenderDisplayDriver::draw(const Params ¶ms)
|
||||
const float zoomed_width = params.size.x * zoom_.x;
|
||||
const float zoomed_height = params.size.y * zoom_.y;
|
||||
if (texture_.width != params.size.x || texture_.height != params.size.y) {
|
||||
/* Resolution divider is different from 1, force nearest interpolation. */
|
||||
/* Resolution divider is different from 1, force enarest interpolation. */
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
}
|
||||
else if (zoomed_width - params.size.x > 0.5f || zoomed_height - params.size.y > 0.5f) {
|
||||
@@ -547,7 +580,7 @@ void BlenderDisplayDriver::draw(const Params ¶ms)
|
||||
}
|
||||
}
|
||||
|
||||
void BlenderDisplayDriver::gl_context_create()
|
||||
void BlenderGPUDisplay::gl_context_create()
|
||||
{
|
||||
/* When rendering in viewport there is no render context available via engine.
|
||||
* Check whether own context is to be created here.
|
||||
@@ -576,7 +609,7 @@ void BlenderDisplayDriver::gl_context_create()
|
||||
}
|
||||
}
|
||||
|
||||
bool BlenderDisplayDriver::gl_context_enable()
|
||||
bool BlenderGPUDisplay::gl_context_enable()
|
||||
{
|
||||
if (use_gl_context_) {
|
||||
if (!gl_context_) {
|
||||
@@ -591,7 +624,7 @@ bool BlenderDisplayDriver::gl_context_enable()
|
||||
return true;
|
||||
}
|
||||
|
||||
void BlenderDisplayDriver::gl_context_disable()
|
||||
void BlenderGPUDisplay::gl_context_disable()
|
||||
{
|
||||
if (use_gl_context_) {
|
||||
if (gl_context_) {
|
||||
@@ -604,7 +637,7 @@ void BlenderDisplayDriver::gl_context_disable()
|
||||
RE_engine_render_context_disable(reinterpret_cast<RenderEngine *>(b_engine_.ptr.data));
|
||||
}
|
||||
|
||||
void BlenderDisplayDriver::gl_context_dispose()
|
||||
void BlenderGPUDisplay::gl_context_dispose()
|
||||
{
|
||||
if (gl_context_) {
|
||||
const bool drw_state = DRW_opengl_context_release();
|
||||
@@ -616,7 +649,7 @@ void BlenderDisplayDriver::gl_context_dispose()
|
||||
}
|
||||
}
|
||||
|
||||
bool BlenderDisplayDriver::gl_draw_resources_ensure()
|
||||
bool BlenderGPUDisplay::gl_draw_resources_ensure()
|
||||
{
|
||||
if (!texture_.gl_id) {
|
||||
/* If there is no texture allocated, there is nothing to draw. Inform the draw call that it can
|
||||
@@ -643,7 +676,7 @@ bool BlenderDisplayDriver::gl_draw_resources_ensure()
|
||||
return true;
|
||||
}
|
||||
|
||||
void BlenderDisplayDriver::gl_resources_destroy()
|
||||
void BlenderGPUDisplay::gl_resources_destroy()
|
||||
{
|
||||
gl_context_enable();
|
||||
|
||||
@@ -666,7 +699,7 @@ void BlenderDisplayDriver::gl_resources_destroy()
|
||||
gl_context_dispose();
|
||||
}
|
||||
|
||||
bool BlenderDisplayDriver::gl_texture_resources_ensure()
|
||||
bool BlenderGPUDisplay::gl_texture_resources_ensure()
|
||||
{
|
||||
if (texture_.creation_attempted) {
|
||||
return texture_.is_created;
|
||||
@@ -703,7 +736,7 @@ bool BlenderDisplayDriver::gl_texture_resources_ensure()
|
||||
return true;
|
||||
}
|
||||
|
||||
void BlenderDisplayDriver::texture_update_if_needed()
|
||||
void BlenderGPUDisplay::texture_update_if_needed()
|
||||
{
|
||||
if (!texture_.need_update) {
|
||||
return;
|
||||
@@ -717,7 +750,7 @@ void BlenderDisplayDriver::texture_update_if_needed()
|
||||
texture_.need_update = false;
|
||||
}
|
||||
|
||||
void BlenderDisplayDriver::vertex_buffer_update(const Params ¶ms)
|
||||
void BlenderGPUDisplay::vertex_buffer_update(const GPUDisplayParams ¶ms)
|
||||
{
|
||||
/* Invalidate old contents - avoids stalling if the buffer is still waiting in queue to be
|
||||
* rendered. */
|
||||
@@ -730,23 +763,23 @@ void BlenderDisplayDriver::vertex_buffer_update(const Params ¶ms)
|
||||
|
||||
vpointer[0] = 0.0f;
|
||||
vpointer[1] = 0.0f;
|
||||
vpointer[2] = params.full_offset.x;
|
||||
vpointer[3] = params.full_offset.y;
|
||||
vpointer[2] = params.offset.x;
|
||||
vpointer[3] = params.offset.y;
|
||||
|
||||
vpointer[4] = 1.0f;
|
||||
vpointer[5] = 0.0f;
|
||||
vpointer[6] = (float)params.size.x + params.full_offset.x;
|
||||
vpointer[7] = params.full_offset.y;
|
||||
vpointer[6] = (float)params.size.x + params.offset.x;
|
||||
vpointer[7] = params.offset.y;
|
||||
|
||||
vpointer[8] = 1.0f;
|
||||
vpointer[9] = 1.0f;
|
||||
vpointer[10] = (float)params.size.x + params.full_offset.x;
|
||||
vpointer[11] = (float)params.size.y + params.full_offset.y;
|
||||
vpointer[10] = (float)params.size.x + params.offset.x;
|
||||
vpointer[11] = (float)params.size.y + params.offset.y;
|
||||
|
||||
vpointer[12] = 0.0f;
|
||||
vpointer[13] = 1.0f;
|
||||
vpointer[14] = params.full_offset.x;
|
||||
vpointer[15] = (float)params.size.y + params.full_offset.y;
|
||||
vpointer[14] = params.offset.x;
|
||||
vpointer[15] = (float)params.size.y + params.offset.y;
|
||||
|
||||
glUnmapBuffer(GL_ARRAY_BUFFER);
|
||||
}
|
@@ -22,14 +22,12 @@
|
||||
|
||||
#include "RNA_blender_cpp.h"
|
||||
|
||||
#include "render/display_driver.h"
|
||||
|
||||
#include "util/util_thread.h"
|
||||
#include "render/gpu_display.h"
|
||||
#include "util/util_unique_ptr.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Base class of shader used for display driver rendering. */
|
||||
/* Base class of shader used for GPU display rendering. */
|
||||
class BlenderDisplayShader {
|
||||
public:
|
||||
static constexpr const char *position_attribute_name = "pos";
|
||||
@@ -98,11 +96,11 @@ class BlenderDisplaySpaceShader : public BlenderDisplayShader {
|
||||
uint shader_program_ = 0;
|
||||
};
|
||||
|
||||
/* Display driver implementation which is specific for Blender viewport integration. */
|
||||
class BlenderDisplayDriver : public DisplayDriver {
|
||||
/* GPU display implementation which is specific for Blender viewport integration. */
|
||||
class BlenderGPUDisplay : public GPUDisplay {
|
||||
public:
|
||||
BlenderDisplayDriver(BL::RenderEngine &b_engine, BL::Scene &b_scene);
|
||||
~BlenderDisplayDriver();
|
||||
BlenderGPUDisplay(BL::RenderEngine &b_engine, BL::Scene &b_scene);
|
||||
~BlenderGPUDisplay();
|
||||
|
||||
virtual void graphics_interop_activate() override;
|
||||
virtual void graphics_interop_deactivate() override;
|
||||
@@ -112,15 +110,22 @@ class BlenderDisplayDriver : public DisplayDriver {
|
||||
void set_zoom(float zoom_x, float zoom_y);
|
||||
|
||||
protected:
|
||||
virtual bool update_begin(const Params ¶ms, int texture_width, int texture_height) override;
|
||||
virtual void update_end() override;
|
||||
virtual bool do_update_begin(const GPUDisplayParams ¶ms,
|
||||
int texture_width,
|
||||
int texture_height) override;
|
||||
virtual void do_update_end() override;
|
||||
|
||||
virtual half4 *map_texture_buffer() override;
|
||||
virtual void unmap_texture_buffer() override;
|
||||
virtual void do_copy_pixels_to_texture(const half4 *rgba_pixels,
|
||||
int texture_x,
|
||||
int texture_y,
|
||||
int pixels_width,
|
||||
int pixels_height) override;
|
||||
virtual void do_draw(const GPUDisplayParams ¶ms) override;
|
||||
|
||||
virtual GraphicsInterop graphics_interop_get() override;
|
||||
virtual half4 *do_map_texture_buffer() override;
|
||||
virtual void do_unmap_texture_buffer() override;
|
||||
|
||||
virtual void draw(const Params ¶ms) override;
|
||||
virtual DeviceGraphicsInteropDestination do_graphics_interop_get() override;
|
||||
|
||||
/* Helper function which allocates new GPU context. */
|
||||
void gl_context_create();
|
||||
@@ -147,13 +152,13 @@ class BlenderDisplayDriver : public DisplayDriver {
|
||||
* This buffer is used to render texture in the viewport.
|
||||
*
|
||||
* NOTE: The buffer needs to be bound. */
|
||||
void vertex_buffer_update(const Params ¶ms);
|
||||
void vertex_buffer_update(const GPUDisplayParams ¶ms);
|
||||
|
||||
BL::RenderEngine b_engine_;
|
||||
|
||||
/* OpenGL context which is used the render engine doesn't have its own. */
|
||||
void *gl_context_ = nullptr;
|
||||
/* The when Blender RenderEngine side context is not available and the DisplayDriver is to create
|
||||
/* The when Blender RenderEngine side context is not available and the GPUDisplay is to create
|
||||
* its own context. */
|
||||
bool use_gl_context_ = false;
|
||||
/* Mutex used to guard the `gl_context_`. */
|
@@ -1,126 +0,0 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "blender/blender_output_driver.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
BlenderOutputDriver::BlenderOutputDriver(BL::RenderEngine &b_engine) : b_engine_(b_engine)
|
||||
{
|
||||
}
|
||||
|
||||
BlenderOutputDriver::~BlenderOutputDriver()
|
||||
{
|
||||
}
|
||||
|
||||
bool BlenderOutputDriver::read_render_tile(const Tile &tile)
|
||||
{
|
||||
/* Get render result. */
|
||||
BL::RenderResult b_rr = b_engine_.begin_result(tile.offset.x,
|
||||
tile.offset.y,
|
||||
tile.size.x,
|
||||
tile.size.y,
|
||||
tile.layer.c_str(),
|
||||
tile.view.c_str());
|
||||
|
||||
/* Can happen if the intersected rectangle gives 0 width or height. */
|
||||
if (b_rr.ptr.data == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
BL::RenderResult::layers_iterator b_single_rlay;
|
||||
b_rr.layers.begin(b_single_rlay);
|
||||
|
||||
/* layer will be missing if it was disabled in the UI */
|
||||
if (b_single_rlay == b_rr.layers.end()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
BL::RenderLayer b_rlay = *b_single_rlay;
|
||||
|
||||
vector<float> pixels(tile.size.x * tile.size.y * 4);
|
||||
|
||||
/* Copy each pass.
|
||||
* TODO:copy only the required ones for better performance? */
|
||||
for (BL::RenderPass &b_pass : b_rlay.passes) {
|
||||
tile.set_pass_pixels(b_pass.name(), b_pass.channels(), (float *)b_pass.rect());
|
||||
}
|
||||
|
||||
b_engine_.end_result(b_rr, false, false, false);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BlenderOutputDriver::update_render_tile(const Tile &tile)
|
||||
{
|
||||
/* Use final write for preview renders, otherwise render result wouldn't be be updated
|
||||
* quickly on Blender side. For all other cases we use the display driver. */
|
||||
if (b_engine_.is_preview()) {
|
||||
write_render_tile(tile);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Don't highlight full-frame tile. */
|
||||
if (!(tile.size == tile.full_size)) {
|
||||
b_engine_.tile_highlight_clear_all();
|
||||
b_engine_.tile_highlight_set(tile.offset.x, tile.offset.y, tile.size.x, tile.size.y, true);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void BlenderOutputDriver::write_render_tile(const Tile &tile)
|
||||
{
|
||||
b_engine_.tile_highlight_clear_all();
|
||||
|
||||
/* Get render result. */
|
||||
BL::RenderResult b_rr = b_engine_.begin_result(tile.offset.x,
|
||||
tile.offset.y,
|
||||
tile.size.x,
|
||||
tile.size.y,
|
||||
tile.layer.c_str(),
|
||||
tile.view.c_str());
|
||||
|
||||
/* Can happen if the intersected rectangle gives 0 width or height. */
|
||||
if (b_rr.ptr.data == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
BL::RenderResult::layers_iterator b_single_rlay;
|
||||
b_rr.layers.begin(b_single_rlay);
|
||||
|
||||
/* Layer will be missing if it was disabled in the UI. */
|
||||
if (b_single_rlay == b_rr.layers.end()) {
|
||||
return;
|
||||
}
|
||||
|
||||
BL::RenderLayer b_rlay = *b_single_rlay;
|
||||
|
||||
vector<float> pixels(tile.size.x * tile.size.y * 4);
|
||||
|
||||
/* Copy each pass. */
|
||||
for (BL::RenderPass &b_pass : b_rlay.passes) {
|
||||
if (!tile.get_pass_pixels(b_pass.name(), b_pass.channels(), &pixels[0])) {
|
||||
memset(&pixels[0], 0, pixels.size() * sizeof(float));
|
||||
}
|
||||
|
||||
b_pass.rect(&pixels[0]);
|
||||
}
|
||||
|
||||
b_engine_.end_result(b_rr, true, false, true);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
@@ -1,40 +0,0 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "MEM_guardedalloc.h"
|
||||
|
||||
#include "RNA_blender_cpp.h"
|
||||
|
||||
#include "render/output_driver.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class BlenderOutputDriver : public OutputDriver {
|
||||
public:
|
||||
explicit BlenderOutputDriver(BL::RenderEngine &b_engine);
|
||||
~BlenderOutputDriver();
|
||||
|
||||
virtual void write_render_tile(const Tile &tile) override;
|
||||
virtual bool update_render_tile(const Tile &tile) override;
|
||||
virtual bool read_render_tile(const Tile &tile) override;
|
||||
|
||||
protected:
|
||||
BL::RenderEngine b_engine_;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
@@ -911,16 +911,14 @@ static PyObject *enable_print_stats_func(PyObject * /*self*/, PyObject * /*args*
|
||||
static PyObject *get_device_types_func(PyObject * /*self*/, PyObject * /*args*/)
|
||||
{
|
||||
vector<DeviceType> device_types = Device::available_types();
|
||||
bool has_cuda = false, has_optix = false, has_hip = false;
|
||||
bool has_cuda = false, has_optix = false;
|
||||
foreach (DeviceType device_type, device_types) {
|
||||
has_cuda |= (device_type == DEVICE_CUDA);
|
||||
has_optix |= (device_type == DEVICE_OPTIX);
|
||||
has_hip |= (device_type == DEVICE_HIP);
|
||||
}
|
||||
PyObject *list = PyTuple_New(3);
|
||||
PyObject *list = PyTuple_New(2);
|
||||
PyTuple_SET_ITEM(list, 0, PyBool_FromLong(has_cuda));
|
||||
PyTuple_SET_ITEM(list, 1, PyBool_FromLong(has_optix));
|
||||
PyTuple_SET_ITEM(list, 2, PyBool_FromLong(has_hip));
|
||||
return list;
|
||||
}
|
||||
|
||||
@@ -946,9 +944,6 @@ static PyObject *set_device_override_func(PyObject * /*self*/, PyObject *arg)
|
||||
else if (override == "OPTIX") {
|
||||
BlenderSession::device_override = DEVICE_MASK_OPTIX;
|
||||
}
|
||||
else if (override == "HIP") {
|
||||
BlenderSession::device_override = DEVICE_MASK_HIP;
|
||||
}
|
||||
else {
|
||||
printf("\nError: %s is not a valid Cycles device.\n", override.c_str());
|
||||
Py_RETURN_FALSE;
|
||||
|
@@ -42,8 +42,7 @@
|
||||
#include "util/util_progress.h"
|
||||
#include "util/util_time.h"
|
||||
|
||||
#include "blender/blender_display_driver.h"
|
||||
#include "blender/blender_output_driver.h"
|
||||
#include "blender/blender_gpu_display.h"
|
||||
#include "blender/blender_session.h"
|
||||
#include "blender/blender_sync.h"
|
||||
#include "blender/blender_util.h"
|
||||
@@ -72,8 +71,7 @@ BlenderSession::BlenderSession(BL::RenderEngine &b_engine,
|
||||
width(0),
|
||||
height(0),
|
||||
preview_osl(preview_osl),
|
||||
python_thread_state(NULL),
|
||||
use_developer_ui(false)
|
||||
python_thread_state(NULL)
|
||||
{
|
||||
/* offline render */
|
||||
background = true;
|
||||
@@ -158,13 +156,11 @@ void BlenderSession::create_session()
|
||||
b_v3d, b_rv3d, scene->camera, width, height);
|
||||
session->reset(session_params, buffer_params);
|
||||
|
||||
/* Create GPU display.
|
||||
* TODO(sergey): Investigate whether DisplayDriver can be used for the preview as well. */
|
||||
/* Create GPU display. */
|
||||
if (!b_engine.is_preview() && !headless) {
|
||||
unique_ptr<BlenderDisplayDriver> display_driver = make_unique<BlenderDisplayDriver>(b_engine,
|
||||
b_scene);
|
||||
display_driver_ = display_driver.get();
|
||||
session->set_display_driver(move(display_driver));
|
||||
unique_ptr<BlenderGPUDisplay> gpu_display = make_unique<BlenderGPUDisplay>(b_engine, b_scene);
|
||||
gpu_display_ = gpu_display.get();
|
||||
session->set_gpu_display(move(gpu_display));
|
||||
}
|
||||
|
||||
/* Viewport and preview (as in, material preview) does not do tiled rendering, so can inform
|
||||
@@ -281,6 +277,94 @@ void BlenderSession::free_session()
|
||||
session = nullptr;
|
||||
}
|
||||
|
||||
void BlenderSession::read_render_tile()
|
||||
{
|
||||
const int2 tile_offset = session->get_render_tile_offset();
|
||||
const int2 tile_size = session->get_render_tile_size();
|
||||
|
||||
/* get render result */
|
||||
BL::RenderResult b_rr = b_engine.begin_result(tile_offset.x,
|
||||
tile_offset.y,
|
||||
tile_size.x,
|
||||
tile_size.y,
|
||||
b_rlay_name.c_str(),
|
||||
b_rview_name.c_str());
|
||||
|
||||
/* can happen if the intersected rectangle gives 0 width or height */
|
||||
if (b_rr.ptr.data == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
BL::RenderResult::layers_iterator b_single_rlay;
|
||||
b_rr.layers.begin(b_single_rlay);
|
||||
|
||||
/* layer will be missing if it was disabled in the UI */
|
||||
if (b_single_rlay == b_rr.layers.end())
|
||||
return;
|
||||
|
||||
BL::RenderLayer b_rlay = *b_single_rlay;
|
||||
|
||||
vector<float> pixels(tile_size.x * tile_size.y * 4);
|
||||
|
||||
/* Copy each pass.
|
||||
* TODO:copy only the required ones for better performance? */
|
||||
for (BL::RenderPass &b_pass : b_rlay.passes) {
|
||||
session->set_render_tile_pixels(b_pass.name(), b_pass.channels(), (float *)b_pass.rect());
|
||||
}
|
||||
}
|
||||
|
||||
void BlenderSession::write_render_tile()
|
||||
{
|
||||
const int2 tile_offset = session->get_render_tile_offset();
|
||||
const int2 tile_size = session->get_render_tile_size();
|
||||
|
||||
const string_view render_layer_name = session->get_render_tile_layer();
|
||||
const string_view render_view_name = session->get_render_tile_view();
|
||||
|
||||
b_engine.tile_highlight_clear_all();
|
||||
|
||||
/* get render result */
|
||||
BL::RenderResult b_rr = b_engine.begin_result(tile_offset.x,
|
||||
tile_offset.y,
|
||||
tile_size.x,
|
||||
tile_size.y,
|
||||
render_layer_name.c_str(),
|
||||
render_view_name.c_str());
|
||||
|
||||
/* can happen if the intersected rectangle gives 0 width or height */
|
||||
if (b_rr.ptr.data == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
BL::RenderResult::layers_iterator b_single_rlay;
|
||||
b_rr.layers.begin(b_single_rlay);
|
||||
|
||||
/* layer will be missing if it was disabled in the UI */
|
||||
if (b_single_rlay == b_rr.layers.end()) {
|
||||
return;
|
||||
}
|
||||
|
||||
BL::RenderLayer b_rlay = *b_single_rlay;
|
||||
|
||||
write_render_result(b_rlay);
|
||||
|
||||
b_engine.end_result(b_rr, true, false, true);
|
||||
}
|
||||
|
||||
void BlenderSession::update_render_tile()
|
||||
{
|
||||
if (!session->has_multiple_render_tiles()) {
|
||||
/* Don't highlight full-frame tile. */
|
||||
return;
|
||||
}
|
||||
|
||||
const int2 tile_offset = session->get_render_tile_offset();
|
||||
const int2 tile_size = session->get_render_tile_size();
|
||||
|
||||
b_engine.tile_highlight_clear_all();
|
||||
b_engine.tile_highlight_set(tile_offset.x, tile_offset.y, tile_size.x, tile_size.y, true);
|
||||
}
|
||||
|
||||
void BlenderSession::full_buffer_written(string_view filename)
|
||||
{
|
||||
full_buffer_files_.emplace_back(filename);
|
||||
@@ -354,8 +438,18 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
|
||||
return;
|
||||
}
|
||||
|
||||
/* Create driver to write out render results. */
|
||||
session->set_output_driver(make_unique<BlenderOutputDriver>(b_engine));
|
||||
/* set callback to write out render results */
|
||||
session->write_render_tile_cb = [&]() { write_render_tile(); };
|
||||
|
||||
/* Use final write for preview renders, otherwise render result wouldn't be be updated on Blender
|
||||
* side. */
|
||||
/* TODO(sergey): Investigate whether GPUDisplay can be used for the preview as well. */
|
||||
if (b_engine.is_preview()) {
|
||||
session->update_render_tile_cb = [&]() { write_render_tile(); };
|
||||
}
|
||||
else {
|
||||
session->update_render_tile_cb = [&]() { update_render_tile(); };
|
||||
}
|
||||
|
||||
session->full_buffer_written_cb = [&](string_view filename) { full_buffer_written(filename); };
|
||||
|
||||
@@ -463,11 +557,6 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
|
||||
/* free result without merging */
|
||||
b_engine.end_result(b_rr, true, false, false);
|
||||
|
||||
/* When tiled rendering is used there will be no "write" done for the tile. Forcefully clear
|
||||
* highlighted tiles now, so that the highlight will be removed while processing full frame from
|
||||
* file. */
|
||||
b_engine.tile_highlight_clear_all();
|
||||
|
||||
double total_time, render_time;
|
||||
session->progress.get_time(total_time, render_time);
|
||||
VLOG(1) << "Total render time: " << total_time;
|
||||
@@ -492,22 +581,13 @@ void BlenderSession::render_frame_finish()
|
||||
|
||||
for (string_view filename : full_buffer_files_) {
|
||||
session->process_full_buffer_from_disk(filename);
|
||||
if (check_and_report_session_error()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (string_view filename : full_buffer_files_) {
|
||||
path_remove(filename);
|
||||
}
|
||||
|
||||
/* Clear driver. */
|
||||
session->set_output_driver(nullptr);
|
||||
/* clear callback */
|
||||
session->write_render_tile_cb = function_null;
|
||||
session->update_render_tile_cb = function_null;
|
||||
session->full_buffer_written_cb = function_null;
|
||||
|
||||
/* All the files are handled.
|
||||
* Clear the list so that this session can be re-used by Persistent Data. */
|
||||
full_buffer_files_.clear();
|
||||
}
|
||||
|
||||
static PassType bake_type_to_pass(const string &bake_type_str, const int bake_filter)
|
||||
@@ -612,8 +692,9 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
|
||||
pass->set_type(bake_type_to_pass(bake_type, bake_filter));
|
||||
pass->set_include_albedo((bake_filter & BL::BakeSettings::pass_filter_COLOR));
|
||||
|
||||
session->set_display_driver(nullptr);
|
||||
session->set_output_driver(make_unique<BlenderOutputDriver>(b_engine));
|
||||
session->read_render_tile_cb = [&]() { read_render_tile(); };
|
||||
session->write_render_tile_cb = [&]() { write_render_tile(); };
|
||||
session->set_gpu_display(nullptr);
|
||||
|
||||
if (!session->progress.get_cancel()) {
|
||||
/* Sync scene. */
|
||||
@@ -656,7 +737,43 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
|
||||
session->wait();
|
||||
}
|
||||
|
||||
session->set_output_driver(nullptr);
|
||||
session->read_render_tile_cb = function_null;
|
||||
session->write_render_tile_cb = function_null;
|
||||
}
|
||||
|
||||
void BlenderSession::write_render_result(BL::RenderLayer &b_rlay)
|
||||
{
|
||||
if (!session->copy_render_tile_from_device()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const int2 tile_size = session->get_render_tile_size();
|
||||
vector<float> pixels(tile_size.x * tile_size.y * 4);
|
||||
|
||||
/* Copy each pass. */
|
||||
for (BL::RenderPass &b_pass : b_rlay.passes) {
|
||||
if (!session->get_render_tile_pixels(b_pass.name(), b_pass.channels(), &pixels[0])) {
|
||||
memset(&pixels[0], 0, pixels.size() * sizeof(float));
|
||||
}
|
||||
|
||||
b_pass.rect(&pixels[0]);
|
||||
}
|
||||
}
|
||||
|
||||
void BlenderSession::update_render_result(BL::RenderLayer &b_rlay)
|
||||
{
|
||||
if (!session->copy_render_tile_from_device()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const int2 tile_size = session->get_render_tile_size();
|
||||
vector<float> pixels(tile_size.x * tile_size.y * 4);
|
||||
|
||||
/* Copy combined pass. */
|
||||
BL::RenderPass b_combined_pass(b_rlay.passes.find_by_name("Combined", b_rview_name.c_str()));
|
||||
if (session->get_render_tile_pixels("Combined", b_combined_pass.channels(), &pixels[0])) {
|
||||
b_combined_pass.rect(&pixels[0]);
|
||||
}
|
||||
}
|
||||
|
||||
void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_)
|
||||
@@ -764,7 +881,7 @@ void BlenderSession::draw(BL::SpaceImageEditor &space_image)
|
||||
}
|
||||
|
||||
BL::Array<float, 2> zoom = space_image.zoom();
|
||||
display_driver_->set_zoom(zoom[0], zoom[1]);
|
||||
gpu_display_->set_zoom(zoom[0], zoom[1]);
|
||||
|
||||
session->draw();
|
||||
}
|
||||
@@ -871,9 +988,8 @@ void BlenderSession::update_status_progress()
|
||||
get_status(status, substatus);
|
||||
get_progress(progress, total_time, render_time);
|
||||
|
||||
if (progress > 0) {
|
||||
remaining_time = session->get_estimated_remaining_time();
|
||||
}
|
||||
if (progress > 0)
|
||||
remaining_time = (1.0 - (double)progress) * (render_time / (double)progress);
|
||||
|
||||
if (background) {
|
||||
if (scene)
|
||||
@@ -911,27 +1027,20 @@ void BlenderSession::update_status_progress()
|
||||
last_progress = progress;
|
||||
}
|
||||
|
||||
check_and_report_session_error();
|
||||
}
|
||||
|
||||
bool BlenderSession::check_and_report_session_error()
|
||||
{
|
||||
if (!session->progress.get_error()) {
|
||||
return false;
|
||||
if (session->progress.get_error()) {
|
||||
string error = session->progress.get_error_message();
|
||||
if (error != last_error) {
|
||||
/* TODO(sergey): Currently C++ RNA API doesn't let us to
|
||||
* use mnemonic name for the variable. Would be nice to
|
||||
* have this figured out.
|
||||
*
|
||||
* For until then, 1 << 5 means RPT_ERROR.
|
||||
*/
|
||||
b_engine.report(1 << 5, error.c_str());
|
||||
b_engine.error_set(error.c_str());
|
||||
last_error = error;
|
||||
}
|
||||
}
|
||||
|
||||
const string error = session->progress.get_error_message();
|
||||
if (error != last_error) {
|
||||
/* TODO(sergey): Currently C++ RNA API doesn't let us to use mnemonic name for the variable.
|
||||
* Would be nice to have this figured out.
|
||||
*
|
||||
* For until then, 1 << 5 means RPT_ERROR. */
|
||||
b_engine.report(1 << 5, error.c_str());
|
||||
b_engine.error_set(error.c_str());
|
||||
last_error = error;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void BlenderSession::tag_update()
|
||||
|
@@ -29,7 +29,7 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class BlenderDisplayDriver;
|
||||
class BlenderGPUDisplay;
|
||||
class BlenderSync;
|
||||
class ImageMetaData;
|
||||
class Scene;
|
||||
@@ -70,7 +70,20 @@ class BlenderSession {
|
||||
const int bake_width,
|
||||
const int bake_height);
|
||||
|
||||
void write_render_result(BL::RenderLayer &b_rlay);
|
||||
void write_render_tile();
|
||||
|
||||
void update_render_tile();
|
||||
|
||||
void full_buffer_written(string_view filename);
|
||||
|
||||
/* update functions are used to update display buffer only after sample was rendered
|
||||
* only needed for better visual feedback */
|
||||
void update_render_result(BL::RenderLayer &b_rlay);
|
||||
|
||||
/* read functions for baking input */
|
||||
void read_render_tile();
|
||||
|
||||
/* interactive updates */
|
||||
void synchronize(BL::Depsgraph &b_depsgraph);
|
||||
|
||||
@@ -97,7 +110,8 @@ class BlenderSession {
|
||||
BL::RenderSettings b_render;
|
||||
BL::Depsgraph b_depsgraph;
|
||||
/* NOTE: Blender's scene might become invalid after call
|
||||
* #free_blender_memory_if_possible(). */
|
||||
* free_blender_memory_if_possible().
|
||||
*/
|
||||
BL::Scene b_scene;
|
||||
BL::SpaceView3D b_v3d;
|
||||
BL::RegionView3D b_rv3d;
|
||||
@@ -133,11 +147,6 @@ class BlenderSession {
|
||||
protected:
|
||||
void stamp_view_layer_metadata(Scene *scene, const string &view_layer_name);
|
||||
|
||||
/* Check whether session error happened.
|
||||
* If so, it is reported to the render engine and true is returned.
|
||||
* Otherwise false is returned. */
|
||||
bool check_and_report_session_error();
|
||||
|
||||
void builtin_images_load();
|
||||
|
||||
/* Is used after each render layer synchronization is done with the goal
|
||||
@@ -151,8 +160,8 @@ class BlenderSession {
|
||||
int last_pass_index = -1;
|
||||
} draw_state_;
|
||||
|
||||
/* NOTE: The BlenderSession references the display driver. */
|
||||
BlenderDisplayDriver *display_driver_ = nullptr;
|
||||
/* NOTE: The BlenderSession references the GPU display. */
|
||||
BlenderGPUDisplay *gpu_display_ = nullptr;
|
||||
|
||||
vector<string> full_buffer_files_;
|
||||
};
|
||||
|
@@ -279,7 +279,7 @@ static ShaderNode *add_node(Scene *scene,
|
||||
array<float3> curve_mapping_curves;
|
||||
float min_x, max_x;
|
||||
curvemapping_color_to_array(mapping, curve_mapping_curves, RAMP_TABLE_SIZE, true);
|
||||
curvemapping_minmax(mapping, 4, &min_x, &max_x);
|
||||
curvemapping_minmax(mapping, true, &min_x, &max_x);
|
||||
curves->set_min_x(min_x);
|
||||
curves->set_max_x(max_x);
|
||||
curves->set_curves(curve_mapping_curves);
|
||||
@@ -292,25 +292,12 @@ static ShaderNode *add_node(Scene *scene,
|
||||
array<float3> curve_mapping_curves;
|
||||
float min_x, max_x;
|
||||
curvemapping_color_to_array(mapping, curve_mapping_curves, RAMP_TABLE_SIZE, false);
|
||||
curvemapping_minmax(mapping, 3, &min_x, &max_x);
|
||||
curvemapping_minmax(mapping, false, &min_x, &max_x);
|
||||
curves->set_min_x(min_x);
|
||||
curves->set_max_x(max_x);
|
||||
curves->set_curves(curve_mapping_curves);
|
||||
node = curves;
|
||||
}
|
||||
else if (b_node.is_a(&RNA_ShaderNodeFloatCurve)) {
|
||||
BL::ShaderNodeFloatCurve b_curve_node(b_node);
|
||||
BL::CurveMapping mapping(b_curve_node.mapping());
|
||||
FloatCurveNode *curve = graph->create_node<FloatCurveNode>();
|
||||
array<float> curve_mapping_curve;
|
||||
float min_x, max_x;
|
||||
curvemapping_float_to_array(mapping, curve_mapping_curve, RAMP_TABLE_SIZE);
|
||||
curvemapping_minmax(mapping, 1, &min_x, &max_x);
|
||||
curve->set_min_x(min_x);
|
||||
curve->set_max_x(max_x);
|
||||
curve->set_curve(curve_mapping_curve);
|
||||
node = curve;
|
||||
}
|
||||
else if (b_node.is_a(&RNA_ShaderNodeValToRGB)) {
|
||||
RGBRampNode *ramp = graph->create_node<RGBRampNode>();
|
||||
BL::ShaderNodeValToRGB b_ramp_node(b_node);
|
||||
|
@@ -545,6 +545,8 @@ static PassType get_blender_pass_type(BL::RenderPass &b_pass)
|
||||
MAP_PASS("Shadow Catcher", PASS_SHADOW_CATCHER);
|
||||
MAP_PASS("Noisy Shadow Catcher", PASS_SHADOW_CATCHER);
|
||||
|
||||
MAP_PASS("Debug Render Time", PASS_RENDER_TIME);
|
||||
|
||||
MAP_PASS("AdaptiveAuxBuffer", PASS_ADAPTIVE_AUX_BUFFER);
|
||||
MAP_PASS("Debug Sample Count", PASS_SAMPLE_COUNT);
|
||||
|
||||
@@ -602,6 +604,10 @@ void BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_v
|
||||
PointerRNA crl = RNA_pointer_get(&b_view_layer.ptr, "cycles");
|
||||
|
||||
/* Debug passes. */
|
||||
if (get_boolean(crl, "pass_debug_render_time")) {
|
||||
b_engine.add_pass("Debug Render Time", 1, "X", b_view_layer.name().c_str());
|
||||
pass_add(scene, PASS_RENDER_TIME, "Debug Render Time");
|
||||
}
|
||||
if (get_boolean(crl, "pass_debug_sample_count")) {
|
||||
b_engine.add_pass("Debug Sample Count", 1, "X", b_view_layer.name().c_str());
|
||||
pass_add(scene, PASS_SAMPLE_COUNT, "Debug Sample Count");
|
||||
@@ -849,7 +855,7 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
|
||||
|
||||
if (background) {
|
||||
params.use_auto_tile = RNA_boolean_get(&cscene, "use_auto_tile");
|
||||
params.tile_size = max(get_int(cscene, "tile_size"), 8);
|
||||
params.tile_size = get_int(cscene, "tile_size");
|
||||
}
|
||||
else {
|
||||
params.use_auto_tile = false;
|
||||
|
@@ -90,27 +90,26 @@ static inline BL::Mesh object_to_mesh(BL::BlendData & /*data*/,
|
||||
}
|
||||
#endif
|
||||
|
||||
BL::Mesh mesh = (b_ob_info.object_data.is_a(&RNA_Mesh)) ? BL::Mesh(b_ob_info.object_data) :
|
||||
BL::Mesh(PointerRNA_NULL);
|
||||
BL::Mesh mesh(PointerRNA_NULL);
|
||||
if (b_ob_info.object_data.is_a(&RNA_Mesh)) {
|
||||
/* TODO: calc_undeformed is not used. */
|
||||
mesh = BL::Mesh(b_ob_info.object_data);
|
||||
|
||||
if (b_ob_info.is_real_object_data()) {
|
||||
if (mesh) {
|
||||
/* Make a copy to split faces if we use autosmooth, otherwise not needed.
|
||||
* Also in edit mode do we need to make a copy, to ensure data layers like
|
||||
* UV are not empty. */
|
||||
if (mesh.is_editmode() ||
|
||||
(mesh.use_auto_smooth() && subdivision_type == Mesh::SUBDIVISION_NONE)) {
|
||||
BL::Depsgraph depsgraph(PointerRNA_NULL);
|
||||
mesh = b_ob_info.real_object.to_mesh(false, depsgraph);
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Make a copy to split faces if we use autosmooth, otherwise not needed.
|
||||
* Also in edit mode do we need to make a copy, to ensure data layers like
|
||||
* UV are not empty. */
|
||||
if (mesh.is_editmode() ||
|
||||
(mesh.use_auto_smooth() && subdivision_type == Mesh::SUBDIVISION_NONE)) {
|
||||
BL::Depsgraph depsgraph(PointerRNA_NULL);
|
||||
assert(b_ob_info.is_real_object_data());
|
||||
mesh = b_ob_info.real_object.to_mesh(false, depsgraph);
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* TODO: what to do about non-mesh geometry instances? */
|
||||
BL::Depsgraph depsgraph(PointerRNA_NULL);
|
||||
if (b_ob_info.is_real_object_data()) {
|
||||
mesh = b_ob_info.real_object.to_mesh(false, depsgraph);
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
@@ -171,11 +170,12 @@ static inline void curvemap_minmax_curve(/*const*/ BL::CurveMap &curve, float *m
|
||||
}
|
||||
|
||||
static inline void curvemapping_minmax(/*const*/ BL::CurveMapping &cumap,
|
||||
int num_curves,
|
||||
bool rgb_curve,
|
||||
float *min_x,
|
||||
float *max_x)
|
||||
{
|
||||
// const int num_curves = cumap.curves.length(); /* Gives linking error so far. */
|
||||
const int num_curves = rgb_curve ? 4 : 3;
|
||||
*min_x = FLT_MAX;
|
||||
*max_x = -FLT_MAX;
|
||||
for (int i = 0; i < num_curves; ++i) {
|
||||
@@ -195,28 +195,6 @@ static inline void curvemapping_to_array(BL::CurveMapping &cumap, array<float> &
|
||||
}
|
||||
}
|
||||
|
||||
static inline void curvemapping_float_to_array(BL::CurveMapping &cumap,
|
||||
array<float> &data,
|
||||
int size)
|
||||
{
|
||||
float min = 0.0f, max = 1.0f;
|
||||
|
||||
curvemapping_minmax(cumap, 1, &min, &max);
|
||||
|
||||
const float range = max - min;
|
||||
|
||||
cumap.update();
|
||||
|
||||
BL::CurveMap map = cumap.curves[0];
|
||||
|
||||
data.resize(size);
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
float t = min + (float)i / (float)(size - 1) * range;
|
||||
data[i] = cumap.evaluate(map, t);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void curvemapping_color_to_array(BL::CurveMapping &cumap,
|
||||
array<float3> &data,
|
||||
int size,
|
||||
@@ -235,8 +213,7 @@ static inline void curvemapping_color_to_array(BL::CurveMapping &cumap,
|
||||
*
|
||||
* There might be some better estimations here tho.
|
||||
*/
|
||||
const int num_curves = rgb_curve ? 4 : 3;
|
||||
curvemapping_minmax(cumap, num_curves, &min_x, &max_x);
|
||||
curvemapping_minmax(cumap, rgb_curve, &min_x, &max_x);
|
||||
|
||||
const float range_x = max_x - min_x;
|
||||
|
||||
|
@@ -50,6 +50,10 @@ struct PackedBVH {
|
||||
array<int4> leaf_nodes;
|
||||
/* object index to BVH node index mapping for instances */
|
||||
array<int> object_node;
|
||||
/* Mapping from primitive index to index in triangle array. */
|
||||
array<uint> prim_tri_index;
|
||||
/* Continuous storage of triangle vertices. */
|
||||
array<float4> prim_tri_verts;
|
||||
/* primitive type - triangle or strand */
|
||||
array<int> prim_type;
|
||||
/* visibility visibilitys for primitives */
|
||||
|
@@ -439,20 +439,61 @@ void BVH2::refit_primitives(int start, int end, BoundBox &bbox, uint &visibility
|
||||
|
||||
/* Triangles */
|
||||
|
||||
void BVH2::pack_triangle(int idx, float4 tri_verts[3])
|
||||
{
|
||||
int tob = pack.prim_object[idx];
|
||||
assert(tob >= 0 && tob < objects.size());
|
||||
const Mesh *mesh = static_cast<const Mesh *>(objects[tob]->get_geometry());
|
||||
|
||||
int tidx = pack.prim_index[idx];
|
||||
Mesh::Triangle t = mesh->get_triangle(tidx);
|
||||
const float3 *vpos = &mesh->verts[0];
|
||||
float3 v0 = vpos[t.v[0]];
|
||||
float3 v1 = vpos[t.v[1]];
|
||||
float3 v2 = vpos[t.v[2]];
|
||||
|
||||
tri_verts[0] = float3_to_float4(v0);
|
||||
tri_verts[1] = float3_to_float4(v1);
|
||||
tri_verts[2] = float3_to_float4(v2);
|
||||
}
|
||||
|
||||
void BVH2::pack_primitives()
|
||||
{
|
||||
const size_t tidx_size = pack.prim_index.size();
|
||||
size_t num_prim_triangles = 0;
|
||||
/* Count number of triangles primitives in BVH. */
|
||||
for (unsigned int i = 0; i < tidx_size; i++) {
|
||||
if ((pack.prim_index[i] != -1)) {
|
||||
if ((pack.prim_type[i] & PRIMITIVE_ALL_TRIANGLE) != 0) {
|
||||
++num_prim_triangles;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Reserve size for arrays. */
|
||||
pack.prim_tri_index.clear();
|
||||
pack.prim_tri_index.resize(tidx_size);
|
||||
pack.prim_tri_verts.clear();
|
||||
pack.prim_tri_verts.resize(num_prim_triangles * 3);
|
||||
pack.prim_visibility.clear();
|
||||
pack.prim_visibility.resize(tidx_size);
|
||||
/* Fill in all the arrays. */
|
||||
size_t prim_triangle_index = 0;
|
||||
for (unsigned int i = 0; i < tidx_size; i++) {
|
||||
if (pack.prim_index[i] != -1) {
|
||||
int tob = pack.prim_object[i];
|
||||
Object *ob = objects[tob];
|
||||
if ((pack.prim_type[i] & PRIMITIVE_ALL_TRIANGLE) != 0) {
|
||||
pack_triangle(i, (float4 *)&pack.prim_tri_verts[3 * prim_triangle_index]);
|
||||
pack.prim_tri_index[i] = 3 * prim_triangle_index;
|
||||
++prim_triangle_index;
|
||||
}
|
||||
else {
|
||||
pack.prim_tri_index[i] = -1;
|
||||
}
|
||||
pack.prim_visibility[i] = ob->visibility_for_tracing();
|
||||
}
|
||||
else {
|
||||
pack.prim_tri_index[i] = -1;
|
||||
pack.prim_visibility[i] = 0;
|
||||
}
|
||||
}
|
||||
@@ -481,8 +522,10 @@ void BVH2::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
|
||||
|
||||
/* reserve */
|
||||
size_t prim_index_size = pack.prim_index.size();
|
||||
size_t prim_tri_verts_size = pack.prim_tri_verts.size();
|
||||
|
||||
size_t pack_prim_index_offset = prim_index_size;
|
||||
size_t pack_prim_tri_verts_offset = prim_tri_verts_size;
|
||||
size_t pack_nodes_offset = nodes_size;
|
||||
size_t pack_leaf_nodes_offset = leaf_nodes_size;
|
||||
size_t object_offset = 0;
|
||||
@@ -492,6 +535,7 @@ void BVH2::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
|
||||
|
||||
if (geom->need_build_bvh(params.bvh_layout)) {
|
||||
prim_index_size += bvh->pack.prim_index.size();
|
||||
prim_tri_verts_size += bvh->pack.prim_tri_verts.size();
|
||||
nodes_size += bvh->pack.nodes.size();
|
||||
leaf_nodes_size += bvh->pack.leaf_nodes.size();
|
||||
}
|
||||
@@ -501,6 +545,8 @@ void BVH2::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
|
||||
pack.prim_type.resize(prim_index_size);
|
||||
pack.prim_object.resize(prim_index_size);
|
||||
pack.prim_visibility.resize(prim_index_size);
|
||||
pack.prim_tri_verts.resize(prim_tri_verts_size);
|
||||
pack.prim_tri_index.resize(prim_index_size);
|
||||
pack.nodes.resize(nodes_size);
|
||||
pack.leaf_nodes.resize(leaf_nodes_size);
|
||||
pack.object_node.resize(objects.size());
|
||||
@@ -513,6 +559,8 @@ void BVH2::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
|
||||
int *pack_prim_type = (pack.prim_type.size()) ? &pack.prim_type[0] : NULL;
|
||||
int *pack_prim_object = (pack.prim_object.size()) ? &pack.prim_object[0] : NULL;
|
||||
uint *pack_prim_visibility = (pack.prim_visibility.size()) ? &pack.prim_visibility[0] : NULL;
|
||||
float4 *pack_prim_tri_verts = (pack.prim_tri_verts.size()) ? &pack.prim_tri_verts[0] : NULL;
|
||||
uint *pack_prim_tri_index = (pack.prim_tri_index.size()) ? &pack.prim_tri_index[0] : NULL;
|
||||
int4 *pack_nodes = (pack.nodes.size()) ? &pack.nodes[0] : NULL;
|
||||
int4 *pack_leaf_nodes = (pack.leaf_nodes.size()) ? &pack.leaf_nodes[0] : NULL;
|
||||
float2 *pack_prim_time = (pack.prim_time.size()) ? &pack.prim_time[0] : NULL;
|
||||
@@ -561,14 +609,18 @@ void BVH2::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
|
||||
int *bvh_prim_index = &bvh->pack.prim_index[0];
|
||||
int *bvh_prim_type = &bvh->pack.prim_type[0];
|
||||
uint *bvh_prim_visibility = &bvh->pack.prim_visibility[0];
|
||||
uint *bvh_prim_tri_index = &bvh->pack.prim_tri_index[0];
|
||||
float2 *bvh_prim_time = bvh->pack.prim_time.size() ? &bvh->pack.prim_time[0] : NULL;
|
||||
|
||||
for (size_t i = 0; i < bvh_prim_index_size; i++) {
|
||||
if (bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
|
||||
pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + geom_prim_offset;
|
||||
pack_prim_tri_index[pack_prim_index_offset] = -1;
|
||||
}
|
||||
else {
|
||||
pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + geom_prim_offset;
|
||||
pack_prim_tri_index[pack_prim_index_offset] = bvh_prim_tri_index[i] +
|
||||
pack_prim_tri_verts_offset;
|
||||
}
|
||||
|
||||
pack_prim_type[pack_prim_index_offset] = bvh_prim_type[i];
|
||||
@@ -581,6 +633,15 @@ void BVH2::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
|
||||
}
|
||||
}
|
||||
|
||||
/* Merge triangle vertices data. */
|
||||
if (bvh->pack.prim_tri_verts.size()) {
|
||||
const size_t prim_tri_size = bvh->pack.prim_tri_verts.size();
|
||||
memcpy(pack_prim_tri_verts + pack_prim_tri_verts_offset,
|
||||
&bvh->pack.prim_tri_verts[0],
|
||||
prim_tri_size * sizeof(float4));
|
||||
pack_prim_tri_verts_offset += prim_tri_size;
|
||||
}
|
||||
|
||||
/* merge nodes */
|
||||
if (bvh->pack.leaf_nodes.size()) {
|
||||
int4 *leaf_nodes_offset = &bvh->pack.leaf_nodes[0];
|
||||
|
@@ -67,12 +67,8 @@ BVHBuild::~BVHBuild()
|
||||
|
||||
/* Adding References */
|
||||
|
||||
void BVHBuild::add_reference_triangles(BoundBox &root,
|
||||
BoundBox ¢er,
|
||||
Mesh *mesh,
|
||||
int object_index)
|
||||
void BVHBuild::add_reference_triangles(BoundBox &root, BoundBox ¢er, Mesh *mesh, int i)
|
||||
{
|
||||
const PrimitiveType primitive_type = mesh->primitive_type();
|
||||
const Attribute *attr_mP = NULL;
|
||||
if (mesh->has_motion_blur()) {
|
||||
attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
@@ -85,7 +81,7 @@ void BVHBuild::add_reference_triangles(BoundBox &root,
|
||||
BoundBox bounds = BoundBox::empty;
|
||||
t.bounds_grow(verts, bounds);
|
||||
if (bounds.valid() && t.valid(verts)) {
|
||||
references.push_back(BVHReference(bounds, j, object_index, primitive_type));
|
||||
references.push_back(BVHReference(bounds, j, i, PRIMITIVE_TRIANGLE));
|
||||
root.grow(bounds);
|
||||
center.grow(bounds.center2());
|
||||
}
|
||||
@@ -105,7 +101,7 @@ void BVHBuild::add_reference_triangles(BoundBox &root,
|
||||
t.bounds_grow(vert_steps + step * num_verts, bounds);
|
||||
}
|
||||
if (bounds.valid()) {
|
||||
references.push_back(BVHReference(bounds, j, object_index, primitive_type));
|
||||
references.push_back(BVHReference(bounds, j, i, PRIMITIVE_MOTION_TRIANGLE));
|
||||
root.grow(bounds);
|
||||
center.grow(bounds.center2());
|
||||
}
|
||||
@@ -144,7 +140,7 @@ void BVHBuild::add_reference_triangles(BoundBox &root,
|
||||
if (bounds.valid()) {
|
||||
const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
|
||||
references.push_back(
|
||||
BVHReference(bounds, j, object_index, primitive_type, prev_time, curr_time));
|
||||
BVHReference(bounds, j, i, PRIMITIVE_MOTION_TRIANGLE, prev_time, curr_time));
|
||||
root.grow(bounds);
|
||||
center.grow(bounds.center2());
|
||||
}
|
||||
@@ -157,14 +153,18 @@ void BVHBuild::add_reference_triangles(BoundBox &root,
|
||||
}
|
||||
}
|
||||
|
||||
void BVHBuild::add_reference_curves(BoundBox &root, BoundBox ¢er, Hair *hair, int object_index)
|
||||
void BVHBuild::add_reference_curves(BoundBox &root, BoundBox ¢er, Hair *hair, int i)
|
||||
{
|
||||
const Attribute *curve_attr_mP = NULL;
|
||||
if (hair->has_motion_blur()) {
|
||||
curve_attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
}
|
||||
|
||||
const PrimitiveType primitive_type = hair->primitive_type();
|
||||
const PrimitiveType primitive_type =
|
||||
(curve_attr_mP != NULL) ?
|
||||
((hair->curve_shape == CURVE_RIBBON) ? PRIMITIVE_MOTION_CURVE_RIBBON :
|
||||
PRIMITIVE_MOTION_CURVE_THICK) :
|
||||
((hair->curve_shape == CURVE_RIBBON) ? PRIMITIVE_CURVE_RIBBON : PRIMITIVE_CURVE_THICK);
|
||||
|
||||
const size_t num_curves = hair->num_curves();
|
||||
for (uint j = 0; j < num_curves; j++) {
|
||||
@@ -177,7 +177,7 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox ¢er, Hair *hair
|
||||
curve.bounds_grow(k, &hair->get_curve_keys()[0], curve_radius, bounds);
|
||||
if (bounds.valid()) {
|
||||
int packed_type = PRIMITIVE_PACK_SEGMENT(primitive_type, k);
|
||||
references.push_back(BVHReference(bounds, j, object_index, packed_type));
|
||||
references.push_back(BVHReference(bounds, j, i, packed_type));
|
||||
root.grow(bounds);
|
||||
center.grow(bounds.center2());
|
||||
}
|
||||
@@ -198,7 +198,7 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox ¢er, Hair *hair
|
||||
}
|
||||
if (bounds.valid()) {
|
||||
int packed_type = PRIMITIVE_PACK_SEGMENT(primitive_type, k);
|
||||
references.push_back(BVHReference(bounds, j, object_index, packed_type));
|
||||
references.push_back(BVHReference(bounds, j, i, packed_type));
|
||||
root.grow(bounds);
|
||||
center.grow(bounds.center2());
|
||||
}
|
||||
@@ -254,8 +254,7 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox ¢er, Hair *hair
|
||||
if (bounds.valid()) {
|
||||
const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
|
||||
int packed_type = PRIMITIVE_PACK_SEGMENT(primitive_type, k);
|
||||
references.push_back(
|
||||
BVHReference(bounds, j, object_index, packed_type, prev_time, curr_time));
|
||||
references.push_back(BVHReference(bounds, j, i, packed_type, prev_time, curr_time));
|
||||
root.grow(bounds);
|
||||
center.grow(bounds.center2());
|
||||
}
|
||||
@@ -269,18 +268,15 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox ¢er, Hair *hair
|
||||
}
|
||||
}
|
||||
|
||||
void BVHBuild::add_reference_geometry(BoundBox &root,
|
||||
BoundBox ¢er,
|
||||
Geometry *geom,
|
||||
int object_index)
|
||||
void BVHBuild::add_reference_geometry(BoundBox &root, BoundBox ¢er, Geometry *geom, int i)
|
||||
{
|
||||
if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) {
|
||||
Mesh *mesh = static_cast<Mesh *>(geom);
|
||||
add_reference_triangles(root, center, mesh, object_index);
|
||||
add_reference_triangles(root, center, mesh, i);
|
||||
}
|
||||
else if (geom->geometry_type == Geometry::HAIR) {
|
||||
Hair *hair = static_cast<Hair *>(geom);
|
||||
add_reference_curves(root, center, hair, object_index);
|
||||
add_reference_curves(root, center, hair, i);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -89,9 +89,20 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
|
||||
|
||||
/* Test if we need to record this transparent intersection. */
|
||||
if (ctx->num_hits < ctx->max_hits || ray->tfar < ctx->max_t) {
|
||||
/* Skip already recorded intersections. */
|
||||
int num_recorded_hits = min(ctx->num_hits, ctx->max_hits);
|
||||
|
||||
for (int i = 0; i < num_recorded_hits; ++i) {
|
||||
if (current_isect.object == ctx->isect_s[i].object &&
|
||||
current_isect.prim == ctx->isect_s[i].prim && current_isect.t == ctx->isect_s[i].t) {
|
||||
/* This intersection was already recorded, skip it. */
|
||||
*args->valid = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* If maximum number of hits was reached, replace the intersection with the
|
||||
* highest distance. We want to find the N closest intersections. */
|
||||
const int num_recorded_hits = min(ctx->num_hits, ctx->max_hits);
|
||||
int isect_index = num_recorded_hits;
|
||||
if (num_recorded_hits + 1 >= ctx->max_hits) {
|
||||
float max_t = ctx->isect_s[0].t;
|
||||
@@ -136,7 +147,10 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
|
||||
}
|
||||
else {
|
||||
kernel_embree_convert_hit(kg, ray, hit, ¤t_isect);
|
||||
if (ctx->local_object_id != current_isect.object) {
|
||||
int object = (current_isect.object == OBJECT_NONE) ?
|
||||
kernel_tex_fetch(__prim_object, current_isect.prim) :
|
||||
current_isect.object;
|
||||
if (ctx->local_object_id != object) {
|
||||
/* This tells Embree to continue tracing. */
|
||||
*args->valid = 0;
|
||||
break;
|
||||
@@ -155,49 +169,41 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
|
||||
break;
|
||||
}
|
||||
|
||||
LocalIntersection *local_isect = ctx->local_isect;
|
||||
/* See triangle_intersect_subsurface() for the native equivalent. */
|
||||
for (int i = min(ctx->max_hits, ctx->local_isect->num_hits) - 1; i >= 0; --i) {
|
||||
if (ctx->local_isect->hits[i].t == ray->tfar) {
|
||||
/* This tells Embree to continue tracing. */
|
||||
*args->valid = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int hit_idx = 0;
|
||||
|
||||
if (ctx->lcg_state) {
|
||||
/* See triangle_intersect_subsurface() for the native equivalent. */
|
||||
for (int i = min(ctx->max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
|
||||
if (local_isect->hits[i].t == ray->tfar) {
|
||||
/* This tells Embree to continue tracing. */
|
||||
*args->valid = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
local_isect->num_hits++;
|
||||
|
||||
if (local_isect->num_hits <= ctx->max_hits) {
|
||||
hit_idx = local_isect->num_hits - 1;
|
||||
++ctx->local_isect->num_hits;
|
||||
if (ctx->local_isect->num_hits <= ctx->max_hits) {
|
||||
hit_idx = ctx->local_isect->num_hits - 1;
|
||||
}
|
||||
else {
|
||||
/* reservoir sampling: if we are at the maximum number of
|
||||
* hits, randomly replace element or skip it */
|
||||
hit_idx = lcg_step_uint(ctx->lcg_state) % local_isect->num_hits;
|
||||
hit_idx = lcg_step_uint(ctx->lcg_state) % ctx->local_isect->num_hits;
|
||||
|
||||
if (hit_idx >= ctx->max_hits) {
|
||||
/* This tells Embree to continue tracing. */
|
||||
*args->valid = 0;
|
||||
return;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Record closest intersection only. */
|
||||
if (local_isect->num_hits && current_isect.t > local_isect->hits[0].t) {
|
||||
*args->valid = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
local_isect->num_hits = 1;
|
||||
ctx->local_isect->num_hits = 1;
|
||||
}
|
||||
|
||||
/* record intersection */
|
||||
local_isect->hits[hit_idx] = current_isect;
|
||||
local_isect->Ng[hit_idx] = normalize(make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z));
|
||||
ctx->local_isect->hits[hit_idx] = current_isect;
|
||||
ctx->local_isect->Ng[hit_idx] = normalize(make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z));
|
||||
/* This tells Embree to continue tracing. */
|
||||
*args->valid = 0;
|
||||
break;
|
||||
@@ -207,11 +213,21 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
|
||||
if (ctx->num_hits < ctx->max_hits) {
|
||||
Intersection current_isect;
|
||||
kernel_embree_convert_hit(kg, ray, hit, ¤t_isect);
|
||||
for (size_t i = 0; i < ctx->max_hits; ++i) {
|
||||
if (current_isect.object == ctx->isect_s[i].object &&
|
||||
current_isect.prim == ctx->isect_s[i].prim && current_isect.t == ctx->isect_s[i].t) {
|
||||
/* This intersection was already recorded, skip it. */
|
||||
*args->valid = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
Intersection *isect = &ctx->isect_s[ctx->num_hits];
|
||||
++ctx->num_hits;
|
||||
*isect = current_isect;
|
||||
/* Only primitives from volume object. */
|
||||
uint tri_object = isect->object;
|
||||
uint tri_object = (isect->object == OBJECT_NONE) ?
|
||||
kernel_tex_fetch(__prim_object, isect->prim) :
|
||||
isect->object;
|
||||
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
|
||||
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
|
||||
--ctx->num_hits;
|
||||
@@ -233,7 +249,7 @@ static void rtc_filter_func_thick_curve(const RTCFilterFunctionNArguments *args)
|
||||
const RTCRay *ray = (RTCRay *)args->ray;
|
||||
RTCHit *hit = (RTCHit *)args->hit;
|
||||
|
||||
/* Always ignore back-facing intersections. */
|
||||
/* Always ignore backfacing intersections. */
|
||||
if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
|
||||
make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
|
||||
*args->valid = 0;
|
||||
@@ -246,7 +262,7 @@ static void rtc_filter_occluded_func_thick_curve(const RTCFilterFunctionNArgumen
|
||||
const RTCRay *ray = (RTCRay *)args->ray;
|
||||
RTCHit *hit = (RTCHit *)args->hit;
|
||||
|
||||
/* Always ignore back-facing intersections. */
|
||||
/* Always ignore backfacing intersections. */
|
||||
if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
|
||||
make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
|
||||
*args->valid = 0;
|
||||
@@ -440,7 +456,7 @@ void BVHEmbree::add_instance(Object *ob, int i)
|
||||
|
||||
void BVHEmbree::add_triangles(const Object *ob, const Mesh *mesh, int i)
|
||||
{
|
||||
size_t prim_offset = mesh->prim_offset;
|
||||
size_t prim_offset = mesh->optix_prim_offset;
|
||||
|
||||
const Attribute *attr_mP = NULL;
|
||||
size_t num_motion_steps = 1;
|
||||
@@ -609,7 +625,7 @@ void BVHEmbree::set_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair, c
|
||||
|
||||
void BVHEmbree::add_curves(const Object *ob, const Hair *hair, int i)
|
||||
{
|
||||
size_t prim_offset = hair->curve_segment_offset;
|
||||
size_t prim_offset = hair->optix_prim_offset;
|
||||
|
||||
const Attribute *attr_mP = NULL;
|
||||
size_t num_motion_steps = 1;
|
||||
@@ -686,7 +702,7 @@ void BVHEmbree::refit(Progress &progress)
|
||||
if (mesh->num_triangles() > 0) {
|
||||
RTCGeometry geom = rtcGetGeometry(scene, geom_id);
|
||||
set_tri_vertex_buffer(geom, mesh, true);
|
||||
rtcSetGeometryUserData(geom, (void *)mesh->prim_offset);
|
||||
rtcSetGeometryUserData(geom, (void *)mesh->optix_prim_offset);
|
||||
rtcCommitGeometry(geom);
|
||||
}
|
||||
}
|
||||
@@ -695,7 +711,7 @@ void BVHEmbree::refit(Progress &progress)
|
||||
if (hair->num_curves() > 0) {
|
||||
RTCGeometry geom = rtcGetGeometry(scene, geom_id + 1);
|
||||
set_curve_vertex_buffer(geom, hair, true);
|
||||
rtcSetGeometryUserData(geom, (void *)hair->curve_segment_offset);
|
||||
rtcSetGeometryUserData(geom, (void *)hair->optix_prim_offset);
|
||||
rtcCommitGeometry(geom);
|
||||
}
|
||||
}
|
||||
|
@@ -521,7 +521,7 @@ endif()
|
||||
if(WITH_CYCLES_CUDA_BINARIES OR NOT WITH_CUDA_DYNLOAD)
|
||||
find_package(CUDA) # Try to auto locate CUDA toolkit
|
||||
if(CUDA_FOUND)
|
||||
message(STATUS "Found CUDA ${CUDA_NVCC_EXECUTABLE} (${CUDA_VERSION})")
|
||||
message(STATUS "CUDA nvcc = ${CUDA_NVCC_EXECUTABLE}")
|
||||
else()
|
||||
message(STATUS "CUDA compiler not found, disabling WITH_CYCLES_CUDA_BINARIES")
|
||||
set(WITH_CYCLES_CUDA_BINARIES OFF)
|
||||
@@ -532,23 +532,4 @@ if(WITH_CYCLES_CUDA_BINARIES OR NOT WITH_CUDA_DYNLOAD)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
###########################################################################
|
||||
# HIP
|
||||
###########################################################################
|
||||
|
||||
if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
|
||||
find_package(HIP)
|
||||
if(HIP_FOUND)
|
||||
message(STATUS "Found HIP ${HIP_HIPCC_EXECUTABLE} (${HIP_VERSION})")
|
||||
else()
|
||||
message(STATUS "HIP compiler not found, disabling WITH_CYCLES_HIP_BINARIES")
|
||||
set(WITH_CYCLES_HIP_BINARIES OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT WITH_HIP_DYNLOAD)
|
||||
set(WITH_HIP_DYNLOAD ON)
|
||||
endif()
|
||||
|
||||
unset(_cycles_lib_dir)
|
||||
|
@@ -156,16 +156,10 @@ macro(cycles_target_link_libraries target)
|
||||
${PLATFORM_LINKLIBS}
|
||||
)
|
||||
|
||||
if(WITH_CYCLES_DEVICE_CUDA OR WITH_CYCLES_DEVICE_OPTIX)
|
||||
if(WITH_CUDA_DYNLOAD)
|
||||
target_link_libraries(${target} extern_cuew)
|
||||
else()
|
||||
target_link_libraries(${target} ${CUDA_CUDA_LIBRARY})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
|
||||
target_link_libraries(${target} extern_hipew)
|
||||
if(WITH_CUDA_DYNLOAD)
|
||||
target_link_libraries(${target} extern_cuew)
|
||||
else()
|
||||
target_link_libraries(${target} ${CUDA_CUDA_LIBRARY})
|
||||
endif()
|
||||
|
||||
if(CYCLES_STANDALONE_REPOSITORY)
|
||||
|
@@ -22,25 +22,16 @@ set(INC_SYS
|
||||
../../../extern/clew/include
|
||||
)
|
||||
|
||||
if(WITH_CYCLES_DEVICE_OPTIX OR WITH_CYCLES_DEVICE_CUDA)
|
||||
if(WITH_CUDA_DYNLOAD)
|
||||
list(APPEND INC
|
||||
../../../extern/cuew/include
|
||||
)
|
||||
add_definitions(-DWITH_CUDA_DYNLOAD)
|
||||
else()
|
||||
list(APPEND INC_SYS
|
||||
${CUDA_TOOLKIT_INCLUDE}
|
||||
)
|
||||
add_definitions(-DCYCLES_CUDA_NVCC_EXECUTABLE="${CUDA_NVCC_EXECUTABLE}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
|
||||
if(WITH_CUDA_DYNLOAD)
|
||||
list(APPEND INC
|
||||
../../../extern/hipew/include
|
||||
../../../extern/cuew/include
|
||||
)
|
||||
add_definitions(-DWITH_HIP_DYNLOAD)
|
||||
add_definitions(-DWITH_CUDA_DYNLOAD)
|
||||
else()
|
||||
list(APPEND INC_SYS
|
||||
${CUDA_TOOLKIT_INCLUDE}
|
||||
)
|
||||
add_definitions(-DCYCLES_CUDA_NVCC_EXECUTABLE="${CUDA_NVCC_EXECUTABLE}")
|
||||
endif()
|
||||
|
||||
set(SRC
|
||||
@@ -79,21 +70,6 @@ set(SRC_CUDA
|
||||
cuda/util.h
|
||||
)
|
||||
|
||||
set(SRC_HIP
|
||||
hip/device.cpp
|
||||
hip/device.h
|
||||
hip/device_impl.cpp
|
||||
hip/device_impl.h
|
||||
hip/graphics_interop.cpp
|
||||
hip/graphics_interop.h
|
||||
hip/kernel.cpp
|
||||
hip/kernel.h
|
||||
hip/queue.cpp
|
||||
hip/queue.h
|
||||
hip/util.cpp
|
||||
hip/util.h
|
||||
)
|
||||
|
||||
set(SRC_DUMMY
|
||||
dummy/device.cpp
|
||||
dummy/device.h
|
||||
@@ -129,21 +105,13 @@ set(LIB
|
||||
${CYCLES_GL_LIBRARIES}
|
||||
)
|
||||
|
||||
if(WITH_CYCLES_DEVICE_OPTIX OR WITH_CYCLES_DEVICE_CUDA)
|
||||
if(WITH_CUDA_DYNLOAD)
|
||||
list(APPEND LIB
|
||||
extern_cuew
|
||||
)
|
||||
else()
|
||||
list(APPEND LIB
|
||||
${CUDA_CUDA_LIBRARY}
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
|
||||
if(WITH_CUDA_DYNLOAD)
|
||||
list(APPEND LIB
|
||||
extern_hipew
|
||||
extern_cuew
|
||||
)
|
||||
else()
|
||||
list(APPEND LIB
|
||||
${CUDA_CUDA_LIBRARY}
|
||||
)
|
||||
endif()
|
||||
|
||||
@@ -152,9 +120,6 @@ add_definitions(${GL_DEFINITIONS})
|
||||
if(WITH_CYCLES_DEVICE_CUDA)
|
||||
add_definitions(-DWITH_CUDA)
|
||||
endif()
|
||||
if(WITH_CYCLES_DEVICE_HIP)
|
||||
add_definitions(-DWITH_HIP)
|
||||
endif()
|
||||
if(WITH_CYCLES_DEVICE_OPTIX)
|
||||
add_definitions(-DWITH_OPTIX)
|
||||
endif()
|
||||
@@ -175,7 +140,6 @@ cycles_add_library(cycles_device "${LIB}"
|
||||
${SRC}
|
||||
${SRC_CPU}
|
||||
${SRC_CUDA}
|
||||
${SRC_HIP}
|
||||
${SRC_DUMMY}
|
||||
${SRC_MULTI}
|
||||
${SRC_OPTIX}
|
||||
|
@@ -54,6 +54,7 @@
|
||||
#include "util/util_function.h"
|
||||
#include "util/util_logging.h"
|
||||
#include "util/util_map.h"
|
||||
#include "util/util_opengl.h"
|
||||
#include "util/util_openimagedenoise.h"
|
||||
#include "util/util_optimization.h"
|
||||
#include "util/util_progress.h"
|
||||
@@ -169,7 +170,7 @@ void CPUDevice::mem_copy_to(device_memory &mem)
|
||||
}
|
||||
|
||||
void CPUDevice::mem_copy_from(
|
||||
device_memory & /*mem*/, size_t /*y*/, size_t /*w*/, size_t /*h*/, size_t /*elem*/)
|
||||
device_memory & /*mem*/, int /*y*/, int /*w*/, int /*h*/, int /*elem*/)
|
||||
{
|
||||
/* no-op */
|
||||
}
|
||||
@@ -203,7 +204,7 @@ void CPUDevice::mem_free(device_memory &mem)
|
||||
}
|
||||
}
|
||||
|
||||
device_ptr CPUDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/)
|
||||
device_ptr CPUDevice::mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/)
|
||||
{
|
||||
return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset));
|
||||
}
|
||||
@@ -297,6 +298,154 @@ void CPUDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
Device::build_bvh(bvh, progress, refit);
|
||||
}
|
||||
|
||||
#if 0
|
||||
void CPUDevice::render(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
|
||||
{
|
||||
const bool use_coverage = kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE;
|
||||
|
||||
scoped_timer timer(&tile.buffers->render_time);
|
||||
|
||||
Coverage coverage(kg, tile);
|
||||
if (use_coverage) {
|
||||
coverage.init_path_trace();
|
||||
}
|
||||
|
||||
float *render_buffer = (float *)tile.buffer;
|
||||
int start_sample = tile.start_sample;
|
||||
int end_sample = tile.start_sample + tile.num_samples;
|
||||
|
||||
/* Needed for Embree. */
|
||||
SIMD_SET_FLUSH_TO_ZERO;
|
||||
|
||||
for (int sample = start_sample; sample < end_sample; sample++) {
|
||||
if (task.get_cancel() || TaskPool::canceled()) {
|
||||
if (task.need_finish_queue == false)
|
||||
break;
|
||||
}
|
||||
|
||||
if (tile.stealing_state == RenderTile::CAN_BE_STOLEN && task.get_tile_stolen()) {
|
||||
tile.stealing_state = RenderTile::WAS_STOLEN;
|
||||
break;
|
||||
}
|
||||
|
||||
if (tile.task == RenderTile::PATH_TRACE) {
|
||||
for (int y = tile.y; y < tile.y + tile.h; y++) {
|
||||
for (int x = tile.x; x < tile.x + tile.w; x++) {
|
||||
if (use_coverage) {
|
||||
coverage.init_pixel(x, y);
|
||||
}
|
||||
kernels.path_trace(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (int y = tile.y; y < tile.y + tile.h; y++) {
|
||||
for (int x = tile.x; x < tile.x + tile.w; x++) {
|
||||
kernels.bake(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
|
||||
}
|
||||
}
|
||||
}
|
||||
tile.sample = sample + 1;
|
||||
|
||||
if (task.adaptive_sampling.use && task.adaptive_sampling.need_filter(sample)) {
|
||||
const bool stop = adaptive_sampling_filter(kg, tile, sample);
|
||||
if (stop) {
|
||||
const int num_progress_samples = end_sample - sample;
|
||||
tile.sample = end_sample;
|
||||
task.update_progress(&tile, tile.w * tile.h * num_progress_samples);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
task.update_progress(&tile, tile.w * tile.h);
|
||||
}
|
||||
if (use_coverage) {
|
||||
coverage.finalize();
|
||||
}
|
||||
|
||||
if (task.adaptive_sampling.use && (tile.stealing_state != RenderTile::WAS_STOLEN)) {
|
||||
adaptive_sampling_post(tile, kg);
|
||||
}
|
||||
}
|
||||
|
||||
void CPUDevice::thread_render(DeviceTask &task)
|
||||
{
|
||||
if (TaskPool::canceled()) {
|
||||
if (task.need_finish_queue == false)
|
||||
return;
|
||||
}
|
||||
|
||||
/* allocate buffer for kernel globals */
|
||||
CPUKernelThreadGlobals kg(kernel_globals, get_cpu_osl_memory());
|
||||
|
||||
profiler.add_state(&kg.profiler);
|
||||
|
||||
/* NLM denoiser. */
|
||||
DenoisingTask *denoising = NULL;
|
||||
|
||||
/* OpenImageDenoise: we can only denoise with one thread at a time, so to
|
||||
* avoid waiting with mutex locks in the denoiser, we let only a single
|
||||
* thread acquire denoising tiles. */
|
||||
uint tile_types = task.tile_types;
|
||||
bool hold_denoise_lock = false;
|
||||
if ((tile_types & RenderTile::DENOISE) && task.denoising.type == DENOISER_OPENIMAGEDENOISE) {
|
||||
if (!oidn_task_lock.try_lock()) {
|
||||
tile_types &= ~RenderTile::DENOISE;
|
||||
hold_denoise_lock = true;
|
||||
}
|
||||
}
|
||||
|
||||
RenderTile tile;
|
||||
while (task.acquire_tile(this, tile, tile_types)) {
|
||||
if (tile.task == RenderTile::PATH_TRACE) {
|
||||
render(task, tile, &kg);
|
||||
}
|
||||
else if (tile.task == RenderTile::BAKE) {
|
||||
render(task, tile, &kg);
|
||||
}
|
||||
else if (tile.task == RenderTile::DENOISE) {
|
||||
denoise_openimagedenoise(task, tile);
|
||||
task.update_progress(&tile, tile.w * tile.h);
|
||||
}
|
||||
|
||||
task.release_tile(tile);
|
||||
|
||||
if (TaskPool::canceled()) {
|
||||
if (task.need_finish_queue == false)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (hold_denoise_lock) {
|
||||
oidn_task_lock.unlock();
|
||||
}
|
||||
|
||||
profiler.remove_state(&kg.profiler);
|
||||
|
||||
delete denoising;
|
||||
}
|
||||
|
||||
void CPUDevice::thread_denoise(DeviceTask &task)
|
||||
{
|
||||
RenderTile tile;
|
||||
tile.x = task.x;
|
||||
tile.y = task.y;
|
||||
tile.w = task.w;
|
||||
tile.h = task.h;
|
||||
tile.buffer = task.buffer;
|
||||
tile.sample = task.sample + task.num_samples;
|
||||
tile.num_samples = task.num_samples;
|
||||
tile.start_sample = task.sample;
|
||||
tile.offset = task.offset;
|
||||
tile.stride = task.stride;
|
||||
tile.buffers = task.buffers;
|
||||
|
||||
denoise_openimagedenoise(task, tile);
|
||||
|
||||
task.update_progress(&tile, tile.w * tile.h);
|
||||
}
|
||||
#endif
|
||||
|
||||
const CPUKernels *CPUDevice::get_cpu_kernels() const
|
||||
{
|
||||
return &kernels;
|
||||
|
@@ -72,13 +72,10 @@ class CPUDevice : public Device {
|
||||
|
||||
virtual void mem_alloc(device_memory &mem) override;
|
||||
virtual void mem_copy_to(device_memory &mem) override;
|
||||
virtual void mem_copy_from(
|
||||
device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
|
||||
virtual void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) override;
|
||||
virtual void mem_zero(device_memory &mem) override;
|
||||
virtual void mem_free(device_memory &mem) override;
|
||||
virtual device_ptr mem_alloc_sub_ptr(device_memory &mem,
|
||||
size_t offset,
|
||||
size_t /*size*/) override;
|
||||
virtual device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/) override;
|
||||
|
||||
virtual void const_copy_to(const char *name, void *host, size_t size) override;
|
||||
|
||||
|
@@ -31,6 +31,7 @@
|
||||
# include "util/util_logging.h"
|
||||
# include "util/util_map.h"
|
||||
# include "util/util_md5.h"
|
||||
# include "util/util_opengl.h"
|
||||
# include "util/util_path.h"
|
||||
# include "util/util_string.h"
|
||||
# include "util/util_system.h"
|
||||
@@ -836,7 +837,7 @@ void CUDADevice::mem_copy_to(device_memory &mem)
|
||||
}
|
||||
}
|
||||
|
||||
void CUDADevice::mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem)
|
||||
void CUDADevice::mem_copy_from(device_memory &mem, int y, int w, int h, int elem)
|
||||
{
|
||||
if (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) {
|
||||
assert(!"mem_copy_from not supported for textures.");
|
||||
@@ -890,7 +891,7 @@ void CUDADevice::mem_free(device_memory &mem)
|
||||
}
|
||||
}
|
||||
|
||||
device_ptr CUDADevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/)
|
||||
device_ptr CUDADevice::mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/)
|
||||
{
|
||||
return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset));
|
||||
}
|
||||
@@ -1168,6 +1169,141 @@ void CUDADevice::tex_free(device_texture &mem)
|
||||
}
|
||||
}
|
||||
|
||||
# if 0
|
||||
void CUDADevice::render(DeviceTask &task,
|
||||
RenderTile &rtile,
|
||||
device_vector<KernelWorkTile> &work_tiles)
|
||||
{
|
||||
scoped_timer timer(&rtile.buffers->render_time);
|
||||
|
||||
if (have_error())
|
||||
return;
|
||||
|
||||
CUDAContextScope scope(this);
|
||||
CUfunction cuRender;
|
||||
|
||||
/* Get kernel function. */
|
||||
if (rtile.task == RenderTile::BAKE) {
|
||||
cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_bake"));
|
||||
}
|
||||
else {
|
||||
cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_path_trace"));
|
||||
}
|
||||
|
||||
if (have_error()) {
|
||||
return;
|
||||
}
|
||||
|
||||
cuda_assert(cuFuncSetCacheConfig(cuRender, CU_FUNC_CACHE_PREFER_L1));
|
||||
|
||||
/* Allocate work tile. */
|
||||
work_tiles.alloc(1);
|
||||
|
||||
KernelWorkTile *wtile = work_tiles.data();
|
||||
wtile->x = rtile.x;
|
||||
wtile->y = rtile.y;
|
||||
wtile->w = rtile.w;
|
||||
wtile->h = rtile.h;
|
||||
wtile->offset = rtile.offset;
|
||||
wtile->stride = rtile.stride;
|
||||
wtile->buffer = (float *)(CUdeviceptr)rtile.buffer;
|
||||
|
||||
/* Prepare work size. More step samples render faster, but for now we
|
||||
* remain conservative for GPUs connected to a display to avoid driver
|
||||
* timeouts and display freezing. */
|
||||
int min_blocks, num_threads_per_block;
|
||||
cuda_assert(
|
||||
cuOccupancyMaxPotentialBlockSize(&min_blocks, &num_threads_per_block, cuRender, NULL, 0, 0));
|
||||
if (!info.display_device) {
|
||||
min_blocks *= 8;
|
||||
}
|
||||
|
||||
uint step_samples = divide_up(min_blocks * num_threads_per_block, wtile->w * wtile->h);
|
||||
|
||||
/* Render all samples. */
|
||||
uint start_sample = rtile.start_sample;
|
||||
uint end_sample = rtile.start_sample + rtile.num_samples;
|
||||
|
||||
for (int sample = start_sample; sample < end_sample;) {
|
||||
/* Setup and copy work tile to device. */
|
||||
wtile->start_sample = sample;
|
||||
wtile->num_samples = step_samples;
|
||||
if (task.adaptive_sampling.use) {
|
||||
wtile->num_samples = task.adaptive_sampling.align_samples(sample, step_samples);
|
||||
}
|
||||
wtile->num_samples = min(wtile->num_samples, end_sample - sample);
|
||||
work_tiles.copy_to_device();
|
||||
|
||||
CUdeviceptr d_work_tiles = (CUdeviceptr)work_tiles.device_pointer;
|
||||
uint total_work_size = wtile->w * wtile->h * wtile->num_samples;
|
||||
uint num_blocks = divide_up(total_work_size, num_threads_per_block);
|
||||
|
||||
/* Launch kernel. */
|
||||
void *args[] = {&d_work_tiles, &total_work_size};
|
||||
|
||||
cuda_assert(
|
||||
cuLaunchKernel(cuRender, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
|
||||
|
||||
/* Run the adaptive sampling kernels at selected samples aligned to step samples. */
|
||||
uint filter_sample = sample + wtile->num_samples - 1;
|
||||
if (task.adaptive_sampling.use && task.adaptive_sampling.need_filter(filter_sample)) {
|
||||
adaptive_sampling_filter(filter_sample, wtile, d_work_tiles);
|
||||
}
|
||||
|
||||
cuda_assert(cuCtxSynchronize());
|
||||
|
||||
/* Update progress. */
|
||||
sample += wtile->num_samples;
|
||||
rtile.sample = sample;
|
||||
task.update_progress(&rtile, rtile.w * rtile.h * wtile->num_samples);
|
||||
|
||||
if (task.get_cancel()) {
|
||||
if (task.need_finish_queue == false)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Finalize adaptive sampling. */
|
||||
if (task.adaptive_sampling.use) {
|
||||
CUdeviceptr d_work_tiles = (CUdeviceptr)work_tiles.device_pointer;
|
||||
adaptive_sampling_post(rtile, wtile, d_work_tiles);
|
||||
cuda_assert(cuCtxSynchronize());
|
||||
task.update_progress(&rtile, rtile.w * rtile.h * wtile->num_samples);
|
||||
}
|
||||
}
|
||||
|
||||
void CUDADevice::thread_run(DeviceTask &task)
|
||||
{
|
||||
CUDAContextScope scope(this);
|
||||
|
||||
if (task.type == DeviceTask::RENDER) {
|
||||
device_vector<KernelWorkTile> work_tiles(this, "work_tiles", MEM_READ_ONLY);
|
||||
|
||||
/* keep rendering tiles until done */
|
||||
RenderTile tile;
|
||||
DenoisingTask denoising(this, task);
|
||||
|
||||
while (task.acquire_tile(this, tile, task.tile_types)) {
|
||||
if (tile.task == RenderTile::PATH_TRACE) {
|
||||
render(task, tile, work_tiles);
|
||||
}
|
||||
else if (tile.task == RenderTile::BAKE) {
|
||||
render(task, tile, work_tiles);
|
||||
}
|
||||
|
||||
task.release_tile(tile);
|
||||
|
||||
if (task.get_cancel()) {
|
||||
if (task.need_finish_queue == false)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
work_tiles.free();
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
unique_ptr<DeviceQueue> CUDADevice::gpu_queue_create()
|
||||
{
|
||||
return make_unique<CUDADeviceQueue>(this);
|
||||
|
@@ -26,6 +26,7 @@
|
||||
# ifdef WITH_CUDA_DYNLOAD
|
||||
# include "cuew.h"
|
||||
# else
|
||||
# include "util/util_opengl.h"
|
||||
# include <cuda.h>
|
||||
# include <cudaGL.h>
|
||||
# endif
|
||||
@@ -119,13 +120,13 @@ class CUDADevice : public Device {
|
||||
|
||||
void mem_copy_to(device_memory &mem) override;
|
||||
|
||||
void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
|
||||
void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) override;
|
||||
|
||||
void mem_zero(device_memory &mem) override;
|
||||
|
||||
void mem_free(device_memory &mem) override;
|
||||
|
||||
device_ptr mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/) override;
|
||||
device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/) override;
|
||||
|
||||
virtual void const_copy_to(const char *name, void *host, size_t size) override;
|
||||
|
||||
|
@@ -37,15 +37,14 @@ CUDADeviceGraphicsInterop::~CUDADeviceGraphicsInterop()
|
||||
}
|
||||
}
|
||||
|
||||
void CUDADeviceGraphicsInterop::set_display_interop(
|
||||
const DisplayDriver::GraphicsInterop &display_interop)
|
||||
void CUDADeviceGraphicsInterop::set_destination(
|
||||
const DeviceGraphicsInteropDestination &destination)
|
||||
{
|
||||
const int64_t new_buffer_area = int64_t(display_interop.buffer_width) *
|
||||
display_interop.buffer_height;
|
||||
const int64_t new_buffer_area = int64_t(destination.buffer_width) * destination.buffer_height;
|
||||
|
||||
need_clear_ = display_interop.need_clear;
|
||||
need_clear_ = destination.need_clear;
|
||||
|
||||
if (opengl_pbo_id_ == display_interop.opengl_pbo_id && buffer_area_ == new_buffer_area) {
|
||||
if (opengl_pbo_id_ == destination.opengl_pbo_id && buffer_area_ == new_buffer_area) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -56,12 +55,12 @@ void CUDADeviceGraphicsInterop::set_display_interop(
|
||||
}
|
||||
|
||||
const CUresult result = cuGraphicsGLRegisterBuffer(
|
||||
&cu_graphics_resource_, display_interop.opengl_pbo_id, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
|
||||
&cu_graphics_resource_, destination.opengl_pbo_id, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
|
||||
if (result != CUDA_SUCCESS) {
|
||||
LOG(ERROR) << "Error registering OpenGL buffer: " << cuewErrorString(result);
|
||||
}
|
||||
|
||||
opengl_pbo_id_ = display_interop.opengl_pbo_id;
|
||||
opengl_pbo_id_ = destination.opengl_pbo_id;
|
||||
buffer_area_ = new_buffer_area;
|
||||
}
|
||||
|
||||
|
@@ -41,7 +41,7 @@ class CUDADeviceGraphicsInterop : public DeviceGraphicsInterop {
|
||||
CUDADeviceGraphicsInterop &operator=(const CUDADeviceGraphicsInterop &other) = delete;
|
||||
CUDADeviceGraphicsInterop &operator=(CUDADeviceGraphicsInterop &&other) = delete;
|
||||
|
||||
virtual void set_display_interop(const DisplayDriver::GraphicsInterop &display_interop) override;
|
||||
virtual void set_destination(const DeviceGraphicsInteropDestination &destination) override;
|
||||
|
||||
virtual device_ptr map() override;
|
||||
virtual void unmap() override;
|
||||
|
@@ -116,18 +116,18 @@ bool CUDADeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *ar
|
||||
}
|
||||
|
||||
/* Launch kernel. */
|
||||
assert_success(cuLaunchKernel(cuda_kernel.function,
|
||||
num_blocks,
|
||||
1,
|
||||
1,
|
||||
num_threads_per_block,
|
||||
1,
|
||||
1,
|
||||
shared_mem_bytes,
|
||||
cuda_stream_,
|
||||
args,
|
||||
0),
|
||||
"enqueue");
|
||||
cuda_device_assert(cuda_device_,
|
||||
cuLaunchKernel(cuda_kernel.function,
|
||||
num_blocks,
|
||||
1,
|
||||
1,
|
||||
num_threads_per_block,
|
||||
1,
|
||||
1,
|
||||
shared_mem_bytes,
|
||||
cuda_stream_,
|
||||
args,
|
||||
0));
|
||||
|
||||
return !(cuda_device_->have_error());
|
||||
}
|
||||
@@ -139,8 +139,7 @@ bool CUDADeviceQueue::synchronize()
|
||||
}
|
||||
|
||||
const CUDAContextScope scope(cuda_device_);
|
||||
assert_success(cuStreamSynchronize(cuda_stream_), "synchronize");
|
||||
|
||||
cuda_device_assert(cuda_device_, cuStreamSynchronize(cuda_stream_));
|
||||
debug_synchronize();
|
||||
|
||||
return !(cuda_device_->have_error());
|
||||
@@ -163,9 +162,9 @@ void CUDADeviceQueue::zero_to_device(device_memory &mem)
|
||||
assert(mem.device_pointer != 0);
|
||||
|
||||
const CUDAContextScope scope(cuda_device_);
|
||||
assert_success(
|
||||
cuMemsetD8Async((CUdeviceptr)mem.device_pointer, 0, mem.memory_size(), cuda_stream_),
|
||||
"zero_to_device");
|
||||
cuda_device_assert(
|
||||
cuda_device_,
|
||||
cuMemsetD8Async((CUdeviceptr)mem.device_pointer, 0, mem.memory_size(), cuda_stream_));
|
||||
}
|
||||
|
||||
void CUDADeviceQueue::copy_to_device(device_memory &mem)
|
||||
@@ -186,10 +185,10 @@ void CUDADeviceQueue::copy_to_device(device_memory &mem)
|
||||
|
||||
/* Copy memory to device. */
|
||||
const CUDAContextScope scope(cuda_device_);
|
||||
assert_success(
|
||||
cuda_device_assert(
|
||||
cuda_device_,
|
||||
cuMemcpyHtoDAsync(
|
||||
(CUdeviceptr)mem.device_pointer, mem.host_pointer, mem.memory_size(), cuda_stream_),
|
||||
"copy_to_device");
|
||||
(CUdeviceptr)mem.device_pointer, mem.host_pointer, mem.memory_size(), cuda_stream_));
|
||||
}
|
||||
|
||||
void CUDADeviceQueue::copy_from_device(device_memory &mem)
|
||||
@@ -205,19 +204,10 @@ void CUDADeviceQueue::copy_from_device(device_memory &mem)
|
||||
|
||||
/* Copy memory from device. */
|
||||
const CUDAContextScope scope(cuda_device_);
|
||||
assert_success(
|
||||
cuda_device_assert(
|
||||
cuda_device_,
|
||||
cuMemcpyDtoHAsync(
|
||||
mem.host_pointer, (CUdeviceptr)mem.device_pointer, mem.memory_size(), cuda_stream_),
|
||||
"copy_from_device");
|
||||
}
|
||||
|
||||
void CUDADeviceQueue::assert_success(CUresult result, const char *operation)
|
||||
{
|
||||
if (result != CUDA_SUCCESS) {
|
||||
const char *name = cuewErrorString(result);
|
||||
cuda_device_->set_error(string_printf(
|
||||
"%s in CUDA queue %s (%s)", name, operation, debug_active_kernels().c_str()));
|
||||
}
|
||||
mem.host_pointer, (CUdeviceptr)mem.device_pointer, mem.memory_size(), cuda_stream_));
|
||||
}
|
||||
|
||||
unique_ptr<DeviceGraphicsInterop> CUDADeviceQueue::graphics_interop_create()
|
||||
|
@@ -60,8 +60,6 @@ class CUDADeviceQueue : public DeviceQueue {
|
||||
protected:
|
||||
CUDADevice *cuda_device_;
|
||||
CUstream cuda_stream_;
|
||||
|
||||
void assert_success(CUresult result, const char *operation);
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -25,7 +25,6 @@
|
||||
#include "device/cpu/device.h"
|
||||
#include "device/cuda/device.h"
|
||||
#include "device/dummy/device.h"
|
||||
#include "device/hip/device.h"
|
||||
#include "device/multi/device.h"
|
||||
#include "device/optix/device.h"
|
||||
|
||||
@@ -33,6 +32,7 @@
|
||||
#include "util/util_half.h"
|
||||
#include "util/util_logging.h"
|
||||
#include "util/util_math.h"
|
||||
#include "util/util_opengl.h"
|
||||
#include "util/util_string.h"
|
||||
#include "util/util_system.h"
|
||||
#include "util/util_time.h"
|
||||
@@ -47,7 +47,6 @@ thread_mutex Device::device_mutex;
|
||||
vector<DeviceInfo> Device::cuda_devices;
|
||||
vector<DeviceInfo> Device::optix_devices;
|
||||
vector<DeviceInfo> Device::cpu_devices;
|
||||
vector<DeviceInfo> Device::hip_devices;
|
||||
uint Device::devices_initialized_mask = 0;
|
||||
|
||||
/* Device */
|
||||
@@ -98,14 +97,6 @@ Device *Device::create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
device = device_optix_create(info, stats, profiler);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#ifdef WITH_HIP
|
||||
case DEVICE_HIP:
|
||||
if (device_hip_init())
|
||||
device = device_hip_create(info, stats, profiler);
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -127,8 +118,6 @@ DeviceType Device::type_from_string(const char *name)
|
||||
return DEVICE_OPTIX;
|
||||
else if (strcmp(name, "MULTI") == 0)
|
||||
return DEVICE_MULTI;
|
||||
else if (strcmp(name, "HIP") == 0)
|
||||
return DEVICE_HIP;
|
||||
|
||||
return DEVICE_NONE;
|
||||
}
|
||||
@@ -143,8 +132,6 @@ string Device::string_from_type(DeviceType type)
|
||||
return "OPTIX";
|
||||
else if (type == DEVICE_MULTI)
|
||||
return "MULTI";
|
||||
else if (type == DEVICE_HIP)
|
||||
return "HIP";
|
||||
|
||||
return "";
|
||||
}
|
||||
@@ -159,10 +146,6 @@ vector<DeviceType> Device::available_types()
|
||||
#ifdef WITH_OPTIX
|
||||
types.push_back(DEVICE_OPTIX);
|
||||
#endif
|
||||
#ifdef WITH_HIP
|
||||
types.push_back(DEVICE_HIP);
|
||||
#endif
|
||||
|
||||
return types;
|
||||
}
|
||||
|
||||
@@ -204,20 +187,6 @@ vector<DeviceInfo> Device::available_devices(uint mask)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef WITH_HIP
|
||||
if (mask & DEVICE_MASK_HIP) {
|
||||
if (!(devices_initialized_mask & DEVICE_MASK_HIP)) {
|
||||
if (device_hip_init()) {
|
||||
device_hip_info(hip_devices);
|
||||
}
|
||||
devices_initialized_mask |= DEVICE_MASK_HIP;
|
||||
}
|
||||
foreach (DeviceInfo &info, hip_devices) {
|
||||
devices.push_back(info);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (mask & DEVICE_MASK_CPU) {
|
||||
if (!(devices_initialized_mask & DEVICE_MASK_CPU)) {
|
||||
device_cpu_info(cpu_devices);
|
||||
@@ -258,15 +227,6 @@ string Device::device_capabilities(uint mask)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef WITH_HIP
|
||||
if (mask & DEVICE_MASK_HIP) {
|
||||
if (device_hip_init()) {
|
||||
capabilities += "\nHIP device capabilities:\n";
|
||||
capabilities += device_hip_capabilities();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return capabilities;
|
||||
}
|
||||
|
||||
@@ -355,7 +315,6 @@ void Device::free_memory()
|
||||
devices_initialized_mask = 0;
|
||||
cuda_devices.free_memory();
|
||||
optix_devices.free_memory();
|
||||
hip_devices.free_memory();
|
||||
cpu_devices.free_memory();
|
||||
}
|
||||
|
||||
|
@@ -51,7 +51,6 @@ enum DeviceType {
|
||||
DEVICE_CUDA,
|
||||
DEVICE_MULTI,
|
||||
DEVICE_OPTIX,
|
||||
DEVICE_HIP,
|
||||
DEVICE_DUMMY,
|
||||
};
|
||||
|
||||
@@ -59,7 +58,6 @@ enum DeviceTypeMask {
|
||||
DEVICE_MASK_CPU = (1 << DEVICE_CPU),
|
||||
DEVICE_MASK_CUDA = (1 << DEVICE_CUDA),
|
||||
DEVICE_MASK_OPTIX = (1 << DEVICE_OPTIX),
|
||||
DEVICE_MASK_HIP = (1 << DEVICE_HIP),
|
||||
DEVICE_MASK_ALL = ~0
|
||||
};
|
||||
|
||||
@@ -121,7 +119,7 @@ class Device {
|
||||
|
||||
string error_msg;
|
||||
|
||||
virtual device_ptr mem_alloc_sub_ptr(device_memory & /*mem*/, size_t /*offset*/, size_t /*size*/)
|
||||
virtual device_ptr mem_alloc_sub_ptr(device_memory & /*mem*/, int /*offset*/, int /*size*/)
|
||||
{
|
||||
/* Only required for devices that implement denoising. */
|
||||
assert(false);
|
||||
@@ -275,7 +273,7 @@ class Device {
|
||||
|
||||
virtual void mem_alloc(device_memory &mem) = 0;
|
||||
virtual void mem_copy_to(device_memory &mem) = 0;
|
||||
virtual void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) = 0;
|
||||
virtual void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) = 0;
|
||||
virtual void mem_zero(device_memory &mem) = 0;
|
||||
virtual void mem_free(device_memory &mem) = 0;
|
||||
|
||||
@@ -286,7 +284,6 @@ class Device {
|
||||
static vector<DeviceInfo> cuda_devices;
|
||||
static vector<DeviceInfo> optix_devices;
|
||||
static vector<DeviceInfo> cpu_devices;
|
||||
static vector<DeviceInfo> hip_devices;
|
||||
static uint devices_initialized_mask;
|
||||
};
|
||||
|
||||
|
@@ -16,12 +16,25 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "render/display_driver.h"
|
||||
|
||||
#include "util/util_types.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Information about interoperability destination.
|
||||
* Is provided by the GPUDisplay. */
|
||||
class DeviceGraphicsInteropDestination {
|
||||
public:
|
||||
/* Dimensions of the buffer, in pixels. */
|
||||
int buffer_width = 0;
|
||||
int buffer_height = 0;
|
||||
|
||||
/* OpenGL pixel buffer object. */
|
||||
int opengl_pbo_id = 0;
|
||||
|
||||
/* Clear the entire destination before doing partial write to it. */
|
||||
bool need_clear = false;
|
||||
};
|
||||
|
||||
/* Device-side graphics interoperability support.
|
||||
*
|
||||
* Takes care of holding all the handlers needed by the device to implement interoperability with
|
||||
@@ -33,7 +46,7 @@ class DeviceGraphicsInterop {
|
||||
|
||||
/* Update this device-side graphics interoperability object with the given destination resource
|
||||
* information. */
|
||||
virtual void set_display_interop(const DisplayDriver::GraphicsInterop &display_interop) = 0;
|
||||
virtual void set_destination(const DeviceGraphicsInteropDestination &destination) = 0;
|
||||
|
||||
virtual device_ptr map() = 0;
|
||||
virtual void unmap() = 0;
|
||||
|
@@ -136,7 +136,7 @@ void device_memory::device_copy_to()
|
||||
}
|
||||
}
|
||||
|
||||
void device_memory::device_copy_from(size_t y, size_t w, size_t h, size_t elem)
|
||||
void device_memory::device_copy_from(int y, int w, int h, int elem)
|
||||
{
|
||||
assert(type != MEM_TEXTURE && type != MEM_READ_ONLY && type != MEM_GLOBAL);
|
||||
device->mem_copy_from(*this, y, w, h, elem);
|
||||
@@ -181,7 +181,7 @@ bool device_memory::is_resident(Device *sub_device) const
|
||||
|
||||
/* Device Sub Ptr */
|
||||
|
||||
device_sub_ptr::device_sub_ptr(device_memory &mem, size_t offset, size_t size) : device(mem.device)
|
||||
device_sub_ptr::device_sub_ptr(device_memory &mem, int offset, int size) : device(mem.device)
|
||||
{
|
||||
ptr = device->mem_alloc_sub_ptr(mem, offset, size);
|
||||
}
|
||||
|
@@ -81,154 +81,154 @@ static constexpr size_t datatype_size(DataType datatype)
|
||||
|
||||
template<typename T> struct device_type_traits {
|
||||
static const DataType data_type = TYPE_UNKNOWN;
|
||||
static const size_t num_elements_cpu = sizeof(T);
|
||||
static const size_t num_elements_gpu = sizeof(T);
|
||||
static const int num_elements_cpu = sizeof(T);
|
||||
static const int num_elements_gpu = sizeof(T);
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uchar> {
|
||||
static const DataType data_type = TYPE_UCHAR;
|
||||
static const size_t num_elements_cpu = 1;
|
||||
static const size_t num_elements_gpu = 1;
|
||||
static const int num_elements_cpu = 1;
|
||||
static const int num_elements_gpu = 1;
|
||||
static_assert(sizeof(uchar) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uchar2> {
|
||||
static const DataType data_type = TYPE_UCHAR;
|
||||
static const size_t num_elements_cpu = 2;
|
||||
static const size_t num_elements_gpu = 2;
|
||||
static const int num_elements_cpu = 2;
|
||||
static const int num_elements_gpu = 2;
|
||||
static_assert(sizeof(uchar2) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uchar3> {
|
||||
static const DataType data_type = TYPE_UCHAR;
|
||||
static const size_t num_elements_cpu = 3;
|
||||
static const size_t num_elements_gpu = 3;
|
||||
static const int num_elements_cpu = 3;
|
||||
static const int num_elements_gpu = 3;
|
||||
static_assert(sizeof(uchar3) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uchar4> {
|
||||
static const DataType data_type = TYPE_UCHAR;
|
||||
static const size_t num_elements_cpu = 4;
|
||||
static const size_t num_elements_gpu = 4;
|
||||
static const int num_elements_cpu = 4;
|
||||
static const int num_elements_gpu = 4;
|
||||
static_assert(sizeof(uchar4) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uint> {
|
||||
static const DataType data_type = TYPE_UINT;
|
||||
static const size_t num_elements_cpu = 1;
|
||||
static const size_t num_elements_gpu = 1;
|
||||
static const int num_elements_cpu = 1;
|
||||
static const int num_elements_gpu = 1;
|
||||
static_assert(sizeof(uint) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uint2> {
|
||||
static const DataType data_type = TYPE_UINT;
|
||||
static const size_t num_elements_cpu = 2;
|
||||
static const size_t num_elements_gpu = 2;
|
||||
static const int num_elements_cpu = 2;
|
||||
static const int num_elements_gpu = 2;
|
||||
static_assert(sizeof(uint2) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uint3> {
|
||||
static const DataType data_type = TYPE_UINT;
|
||||
static const size_t num_elements_cpu = 3;
|
||||
static const size_t num_elements_gpu = 3;
|
||||
static const int num_elements_cpu = 3;
|
||||
static const int num_elements_gpu = 3;
|
||||
static_assert(sizeof(uint3) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uint4> {
|
||||
static const DataType data_type = TYPE_UINT;
|
||||
static const size_t num_elements_cpu = 4;
|
||||
static const size_t num_elements_gpu = 4;
|
||||
static const int num_elements_cpu = 4;
|
||||
static const int num_elements_gpu = 4;
|
||||
static_assert(sizeof(uint4) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<int> {
|
||||
static const DataType data_type = TYPE_INT;
|
||||
static const size_t num_elements_cpu = 1;
|
||||
static const size_t num_elements_gpu = 1;
|
||||
static const int num_elements_cpu = 1;
|
||||
static const int num_elements_gpu = 1;
|
||||
static_assert(sizeof(int) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<int2> {
|
||||
static const DataType data_type = TYPE_INT;
|
||||
static const size_t num_elements_cpu = 2;
|
||||
static const size_t num_elements_gpu = 2;
|
||||
static const int num_elements_cpu = 2;
|
||||
static const int num_elements_gpu = 2;
|
||||
static_assert(sizeof(int2) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<int3> {
|
||||
static const DataType data_type = TYPE_INT;
|
||||
static const size_t num_elements_cpu = 4;
|
||||
static const size_t num_elements_gpu = 3;
|
||||
static const int num_elements_cpu = 4;
|
||||
static const int num_elements_gpu = 3;
|
||||
static_assert(sizeof(int3) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<int4> {
|
||||
static const DataType data_type = TYPE_INT;
|
||||
static const size_t num_elements_cpu = 4;
|
||||
static const size_t num_elements_gpu = 4;
|
||||
static const int num_elements_cpu = 4;
|
||||
static const int num_elements_gpu = 4;
|
||||
static_assert(sizeof(int4) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<float> {
|
||||
static const DataType data_type = TYPE_FLOAT;
|
||||
static const size_t num_elements_cpu = 1;
|
||||
static const size_t num_elements_gpu = 1;
|
||||
static const int num_elements_cpu = 1;
|
||||
static const int num_elements_gpu = 1;
|
||||
static_assert(sizeof(float) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<float2> {
|
||||
static const DataType data_type = TYPE_FLOAT;
|
||||
static const size_t num_elements_cpu = 2;
|
||||
static const size_t num_elements_gpu = 2;
|
||||
static const int num_elements_cpu = 2;
|
||||
static const int num_elements_gpu = 2;
|
||||
static_assert(sizeof(float2) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<float3> {
|
||||
static const DataType data_type = TYPE_FLOAT;
|
||||
static const size_t num_elements_cpu = 4;
|
||||
static const size_t num_elements_gpu = 3;
|
||||
static const int num_elements_cpu = 4;
|
||||
static const int num_elements_gpu = 3;
|
||||
static_assert(sizeof(float3) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<float4> {
|
||||
static const DataType data_type = TYPE_FLOAT;
|
||||
static const size_t num_elements_cpu = 4;
|
||||
static const size_t num_elements_gpu = 4;
|
||||
static const int num_elements_cpu = 4;
|
||||
static const int num_elements_gpu = 4;
|
||||
static_assert(sizeof(float4) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<half> {
|
||||
static const DataType data_type = TYPE_HALF;
|
||||
static const size_t num_elements_cpu = 1;
|
||||
static const size_t num_elements_gpu = 1;
|
||||
static const int num_elements_cpu = 1;
|
||||
static const int num_elements_gpu = 1;
|
||||
static_assert(sizeof(half) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<ushort4> {
|
||||
static const DataType data_type = TYPE_UINT16;
|
||||
static const size_t num_elements_cpu = 4;
|
||||
static const size_t num_elements_gpu = 4;
|
||||
static const int num_elements_cpu = 4;
|
||||
static const int num_elements_gpu = 4;
|
||||
static_assert(sizeof(ushort4) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uint16_t> {
|
||||
static const DataType data_type = TYPE_UINT16;
|
||||
static const size_t num_elements_cpu = 1;
|
||||
static const size_t num_elements_gpu = 1;
|
||||
static const int num_elements_cpu = 1;
|
||||
static const int num_elements_gpu = 1;
|
||||
static_assert(sizeof(uint16_t) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<half4> {
|
||||
static const DataType data_type = TYPE_HALF;
|
||||
static const size_t num_elements_cpu = 4;
|
||||
static const size_t num_elements_gpu = 4;
|
||||
static const int num_elements_cpu = 4;
|
||||
static const int num_elements_gpu = 4;
|
||||
static_assert(sizeof(half4) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uint64_t> {
|
||||
static const DataType data_type = TYPE_UINT64;
|
||||
static const size_t num_elements_cpu = 1;
|
||||
static const size_t num_elements_gpu = 1;
|
||||
static const int num_elements_cpu = 1;
|
||||
static const int num_elements_gpu = 1;
|
||||
static_assert(sizeof(uint64_t) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
@@ -277,7 +277,6 @@ class device_memory {
|
||||
protected:
|
||||
friend class CUDADevice;
|
||||
friend class OptiXDevice;
|
||||
friend class HIPDevice;
|
||||
|
||||
/* Only create through subclasses. */
|
||||
device_memory(Device *device, const char *name, MemoryType type);
|
||||
@@ -297,7 +296,7 @@ class device_memory {
|
||||
void device_alloc();
|
||||
void device_free();
|
||||
void device_copy_to();
|
||||
void device_copy_from(size_t y, size_t w, size_t h, size_t elem);
|
||||
void device_copy_from(int y, int w, int h, int elem);
|
||||
void device_zero();
|
||||
|
||||
bool device_is_cpu();
|
||||
@@ -566,7 +565,7 @@ template<typename T> class device_vector : public device_memory {
|
||||
device_copy_from(0, data_width, (data_height == 0) ? 1 : data_height, sizeof(T));
|
||||
}
|
||||
|
||||
void copy_from_device(size_t y, size_t w, size_t h)
|
||||
void copy_from_device(int y, int w, int h)
|
||||
{
|
||||
device_copy_from(y, w, h, sizeof(T));
|
||||
}
|
||||
@@ -602,7 +601,7 @@ template<typename T> class device_vector : public device_memory {
|
||||
|
||||
class device_sub_ptr {
|
||||
public:
|
||||
device_sub_ptr(device_memory &mem, size_t offset, size_t size);
|
||||
device_sub_ptr(device_memory &mem, int offset, int size);
|
||||
~device_sub_ptr();
|
||||
|
||||
device_ptr operator*() const
|
||||
|
@@ -57,9 +57,8 @@ void DeviceQueue::debug_init_execution()
|
||||
{
|
||||
if (VLOG_IS_ON(3)) {
|
||||
last_sync_time_ = time_dt();
|
||||
last_kernels_enqueued_ = 0;
|
||||
}
|
||||
|
||||
last_kernels_enqueued_ = 0;
|
||||
}
|
||||
|
||||
void DeviceQueue::debug_enqueue(DeviceKernel kernel, const int work_size)
|
||||
@@ -67,9 +66,8 @@ void DeviceQueue::debug_enqueue(DeviceKernel kernel, const int work_size)
|
||||
if (VLOG_IS_ON(3)) {
|
||||
VLOG(4) << "GPU queue launch " << device_kernel_as_string(kernel) << ", work_size "
|
||||
<< work_size;
|
||||
last_kernels_enqueued_ |= (uint64_t(1) << (uint64_t)kernel);
|
||||
}
|
||||
|
||||
last_kernels_enqueued_ |= (uint64_t(1) << (uint64_t)kernel);
|
||||
}
|
||||
|
||||
void DeviceQueue::debug_synchronize()
|
||||
@@ -82,14 +80,8 @@ void DeviceQueue::debug_synchronize()
|
||||
stats_kernel_time_[last_kernels_enqueued_] += elapsed_time;
|
||||
|
||||
last_sync_time_ = new_time;
|
||||
last_kernels_enqueued_ = 0;
|
||||
}
|
||||
|
||||
last_kernels_enqueued_ = 0;
|
||||
}
|
||||
|
||||
string DeviceQueue::debug_active_kernels()
|
||||
{
|
||||
return device_kernel_mask_as_string(last_kernels_enqueued_);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -21,7 +21,6 @@
|
||||
#include "device/device_graphics_interop.h"
|
||||
#include "util/util_logging.h"
|
||||
#include "util/util_map.h"
|
||||
#include "util/util_string.h"
|
||||
#include "util/util_unique_ptr.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
@@ -102,7 +101,6 @@ class DeviceQueue {
|
||||
void debug_init_execution();
|
||||
void debug_enqueue(DeviceKernel kernel, const int work_size);
|
||||
void debug_synchronize();
|
||||
string debug_active_kernels();
|
||||
|
||||
/* Combination of kernels enqueued together sync last synchronize. */
|
||||
DeviceKernelMask last_kernels_enqueued_;
|
||||
|
@@ -48,7 +48,7 @@ class DummyDevice : public Device {
|
||||
{
|
||||
}
|
||||
|
||||
virtual void mem_copy_from(device_memory &, size_t, size_t, size_t, size_t) override
|
||||
virtual void mem_copy_from(device_memory &, int, int, int, int) override
|
||||
{
|
||||
}
|
||||
|
||||
|
@@ -1,276 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011-2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "device/hip/device.h"
|
||||
|
||||
#include "util/util_logging.h"
|
||||
|
||||
#ifdef WITH_HIP
|
||||
# include "device/device.h"
|
||||
# include "device/hip/device_impl.h"
|
||||
|
||||
# include "util/util_string.h"
|
||||
# include "util/util_windows.h"
|
||||
#endif /* WITH_HIP */
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
bool device_hip_init()
|
||||
{
|
||||
#if !defined(WITH_HIP)
|
||||
return false;
|
||||
#elif defined(WITH_HIP_DYNLOAD)
|
||||
static bool initialized = false;
|
||||
static bool result = false;
|
||||
|
||||
if (initialized)
|
||||
return result;
|
||||
|
||||
initialized = true;
|
||||
int hipew_result = hipewInit(HIPEW_INIT_HIP);
|
||||
if (hipew_result == HIPEW_SUCCESS) {
|
||||
VLOG(1) << "HIPEW initialization succeeded";
|
||||
if (HIPDevice::have_precompiled_kernels()) {
|
||||
VLOG(1) << "Found precompiled kernels";
|
||||
result = true;
|
||||
}
|
||||
else if (hipewCompilerPath() != NULL) {
|
||||
VLOG(1) << "Found HIPCC " << hipewCompilerPath();
|
||||
result = true;
|
||||
}
|
||||
else {
|
||||
VLOG(1) << "Neither precompiled kernels nor HIPCC was found,"
|
||||
<< " unable to use HIP";
|
||||
}
|
||||
}
|
||||
else {
|
||||
VLOG(1) << "HIPEW initialization failed: "
|
||||
<< ((hipew_result == HIPEW_ERROR_ATEXIT_FAILED) ? "Error setting up atexit() handler" :
|
||||
"Error opening the library");
|
||||
}
|
||||
|
||||
return result;
|
||||
#else /* WITH_HIP_DYNLOAD */
|
||||
return true;
|
||||
#endif /* WITH_HIP_DYNLOAD */
|
||||
}
|
||||
|
||||
Device *device_hip_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
{
|
||||
#ifdef WITH_HIP
|
||||
return new HIPDevice(info, stats, profiler);
|
||||
#else
|
||||
(void)info;
|
||||
(void)stats;
|
||||
(void)profiler;
|
||||
|
||||
LOG(FATAL) << "Request to create HIP device without compiled-in support. Should never happen.";
|
||||
|
||||
return nullptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef WITH_HIP
|
||||
static hipError_t device_hip_safe_init()
|
||||
{
|
||||
# ifdef _WIN32
|
||||
__try {
|
||||
return hipInit(0);
|
||||
}
|
||||
__except (EXCEPTION_EXECUTE_HANDLER) {
|
||||
/* Ignore crashes inside the HIP driver and hope we can
|
||||
* survive even with corrupted HIP installs. */
|
||||
fprintf(stderr, "Cycles HIP: driver crashed, continuing without HIP.\n");
|
||||
}
|
||||
|
||||
return hipErrorNoDevice;
|
||||
# else
|
||||
return hipInit(0);
|
||||
# endif
|
||||
}
|
||||
#endif /* WITH_HIP */
|
||||
|
||||
void device_hip_info(vector<DeviceInfo> &devices)
|
||||
{
|
||||
#ifdef WITH_HIP
|
||||
hipError_t result = device_hip_safe_init();
|
||||
if (result != hipSuccess) {
|
||||
if (result != hipErrorNoDevice)
|
||||
fprintf(stderr, "HIP hipInit: %s\n", hipewErrorString(result));
|
||||
return;
|
||||
}
|
||||
|
||||
int count = 0;
|
||||
result = hipGetDeviceCount(&count);
|
||||
if (result != hipSuccess) {
|
||||
fprintf(stderr, "HIP hipGetDeviceCount: %s\n", hipewErrorString(result));
|
||||
return;
|
||||
}
|
||||
|
||||
vector<DeviceInfo> display_devices;
|
||||
|
||||
for (int num = 0; num < count; num++) {
|
||||
char name[256];
|
||||
|
||||
result = hipDeviceGetName(name, 256, num);
|
||||
if (result != hipSuccess) {
|
||||
fprintf(stderr, "HIP :hipDeviceGetName: %s\n", hipewErrorString(result));
|
||||
continue;
|
||||
}
|
||||
|
||||
int major;
|
||||
hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, num);
|
||||
// TODO : (Arya) What is the last major version we are supporting?
|
||||
|
||||
DeviceInfo info;
|
||||
|
||||
info.type = DEVICE_HIP;
|
||||
info.description = string(name);
|
||||
info.num = num;
|
||||
|
||||
info.has_half_images = (major >= 3);
|
||||
info.has_nanovdb = true;
|
||||
info.denoisers = 0;
|
||||
|
||||
info.has_gpu_queue = true;
|
||||
/* Check if the device has P2P access to any other device in the system. */
|
||||
for (int peer_num = 0; peer_num < count && !info.has_peer_memory; peer_num++) {
|
||||
if (num != peer_num) {
|
||||
int can_access = 0;
|
||||
hipDeviceCanAccessPeer(&can_access, num, peer_num);
|
||||
info.has_peer_memory = (can_access != 0);
|
||||
}
|
||||
}
|
||||
|
||||
int pci_location[3] = {0, 0, 0};
|
||||
hipDeviceGetAttribute(&pci_location[0], hipDeviceAttributePciDomainID, num);
|
||||
hipDeviceGetAttribute(&pci_location[1], hipDeviceAttributePciBusId, num);
|
||||
hipDeviceGetAttribute(&pci_location[2], hipDeviceAttributePciDeviceId, num);
|
||||
info.id = string_printf("HIP_%s_%04x:%02x:%02x",
|
||||
name,
|
||||
(unsigned int)pci_location[0],
|
||||
(unsigned int)pci_location[1],
|
||||
(unsigned int)pci_location[2]);
|
||||
|
||||
/* If device has a kernel timeout and no compute preemption, we assume
|
||||
* it is connected to a display and will freeze the display while doing
|
||||
* computations. */
|
||||
int timeout_attr = 0, preempt_attr = 0;
|
||||
hipDeviceGetAttribute(&timeout_attr, hipDeviceAttributeKernelExecTimeout, num);
|
||||
|
||||
if (timeout_attr && !preempt_attr) {
|
||||
VLOG(1) << "Device is recognized as display.";
|
||||
info.description += " (Display)";
|
||||
info.display_device = true;
|
||||
display_devices.push_back(info);
|
||||
}
|
||||
else {
|
||||
VLOG(1) << "Device has compute preemption or is not used for display.";
|
||||
devices.push_back(info);
|
||||
}
|
||||
VLOG(1) << "Added device \"" << name << "\" with id \"" << info.id << "\".";
|
||||
}
|
||||
|
||||
if (!display_devices.empty())
|
||||
devices.insert(devices.end(), display_devices.begin(), display_devices.end());
|
||||
#else /* WITH_HIP */
|
||||
(void)devices;
|
||||
#endif /* WITH_HIP */
|
||||
}
|
||||
|
||||
string device_hip_capabilities()
|
||||
{
|
||||
#ifdef WITH_HIP
|
||||
hipError_t result = device_hip_safe_init();
|
||||
if (result != hipSuccess) {
|
||||
if (result != hipErrorNoDevice) {
|
||||
return string("Error initializing HIP: ") + hipewErrorString(result);
|
||||
}
|
||||
return "No HIP device found\n";
|
||||
}
|
||||
|
||||
int count;
|
||||
result = hipGetDeviceCount(&count);
|
||||
if (result != hipSuccess) {
|
||||
return string("Error getting devices: ") + hipewErrorString(result);
|
||||
}
|
||||
|
||||
string capabilities = "";
|
||||
for (int num = 0; num < count; num++) {
|
||||
char name[256];
|
||||
if (hipDeviceGetName(name, 256, num) != hipSuccess) {
|
||||
continue;
|
||||
}
|
||||
capabilities += string("\t") + name + "\n";
|
||||
int value;
|
||||
# define GET_ATTR(attr) \
|
||||
{ \
|
||||
if (hipDeviceGetAttribute(&value, hipDeviceAttribute##attr, num) == hipSuccess) { \
|
||||
capabilities += string_printf("\t\thipDeviceAttribute" #attr "\t\t\t%d\n", value); \
|
||||
} \
|
||||
} \
|
||||
(void)0
|
||||
/* TODO(sergey): Strip all attributes which are not useful for us
|
||||
* or does not depend on the driver.
|
||||
*/
|
||||
GET_ATTR(MaxThreadsPerBlock);
|
||||
GET_ATTR(MaxBlockDimX);
|
||||
GET_ATTR(MaxBlockDimY);
|
||||
GET_ATTR(MaxBlockDimZ);
|
||||
GET_ATTR(MaxGridDimX);
|
||||
GET_ATTR(MaxGridDimY);
|
||||
GET_ATTR(MaxGridDimZ);
|
||||
GET_ATTR(MaxSharedMemoryPerBlock);
|
||||
GET_ATTR(TotalConstantMemory);
|
||||
GET_ATTR(WarpSize);
|
||||
GET_ATTR(MaxPitch);
|
||||
GET_ATTR(MaxRegistersPerBlock);
|
||||
GET_ATTR(ClockRate);
|
||||
GET_ATTR(TextureAlignment);
|
||||
GET_ATTR(MultiprocessorCount);
|
||||
GET_ATTR(KernelExecTimeout);
|
||||
GET_ATTR(Integrated);
|
||||
GET_ATTR(CanMapHostMemory);
|
||||
GET_ATTR(ComputeMode);
|
||||
GET_ATTR(MaxTexture1DWidth);
|
||||
GET_ATTR(MaxTexture2DWidth);
|
||||
GET_ATTR(MaxTexture2DHeight);
|
||||
GET_ATTR(MaxTexture3DWidth);
|
||||
GET_ATTR(MaxTexture3DHeight);
|
||||
GET_ATTR(MaxTexture3DDepth);
|
||||
GET_ATTR(ConcurrentKernels);
|
||||
GET_ATTR(EccEnabled);
|
||||
GET_ATTR(MemoryClockRate);
|
||||
GET_ATTR(MemoryBusWidth);
|
||||
GET_ATTR(L2CacheSize);
|
||||
GET_ATTR(MaxThreadsPerMultiProcessor);
|
||||
GET_ATTR(ComputeCapabilityMajor);
|
||||
GET_ATTR(ComputeCapabilityMinor);
|
||||
GET_ATTR(MaxSharedMemoryPerMultiprocessor);
|
||||
GET_ATTR(ManagedMemory);
|
||||
GET_ATTR(IsMultiGpuBoard);
|
||||
# undef GET_ATTR
|
||||
capabilities += "\n";
|
||||
}
|
||||
|
||||
return capabilities;
|
||||
|
||||
#else /* WITH_HIP */
|
||||
return "";
|
||||
#endif /* WITH_HIP */
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
@@ -1,37 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011-2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "util/util_string.h"
|
||||
#include "util/util_vector.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class Device;
|
||||
class DeviceInfo;
|
||||
class Profiler;
|
||||
class Stats;
|
||||
|
||||
bool device_hip_init();
|
||||
|
||||
Device *device_hip_create(const DeviceInfo &info, Stats &stats, Profiler &profiler);
|
||||
|
||||
void device_hip_info(vector<DeviceInfo> &devices);
|
||||
|
||||
string device_hip_capabilities();
|
||||
|
||||
CCL_NAMESPACE_END
|
File diff suppressed because it is too large
Load Diff
@@ -1,153 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011-2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifdef WITH_HIP
|
||||
|
||||
# include "device/device.h"
|
||||
# include "device/hip/kernel.h"
|
||||
# include "device/hip/queue.h"
|
||||
# include "device/hip/util.h"
|
||||
|
||||
# include "util/util_map.h"
|
||||
|
||||
# ifdef WITH_HIP_DYNLOAD
|
||||
# include "hipew.h"
|
||||
# else
|
||||
# include "util/util_opengl.h"
|
||||
# endif
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class DeviceQueue;
|
||||
|
||||
class HIPDevice : public Device {
|
||||
|
||||
friend class HIPContextScope;
|
||||
|
||||
public:
|
||||
hipDevice_t hipDevice;
|
||||
hipCtx_t hipContext;
|
||||
hipModule_t hipModule;
|
||||
size_t device_texture_headroom;
|
||||
size_t device_working_headroom;
|
||||
bool move_texture_to_host;
|
||||
size_t map_host_used;
|
||||
size_t map_host_limit;
|
||||
int can_map_host;
|
||||
int pitch_alignment;
|
||||
int hipDevId;
|
||||
int hipDevArchitecture;
|
||||
bool first_error;
|
||||
|
||||
struct HIPMem {
|
||||
HIPMem() : texobject(0), array(0), use_mapped_host(false)
|
||||
{
|
||||
}
|
||||
|
||||
hipTextureObject_t texobject;
|
||||
hArray array;
|
||||
|
||||
/* If true, a mapped host memory in shared_pointer is being used. */
|
||||
bool use_mapped_host;
|
||||
};
|
||||
typedef map<device_memory *, HIPMem> HIPMemMap;
|
||||
HIPMemMap hip_mem_map;
|
||||
thread_mutex hip_mem_map_mutex;
|
||||
|
||||
/* Bindless Textures */
|
||||
device_vector<TextureInfo> texture_info;
|
||||
bool need_texture_info;
|
||||
|
||||
HIPDeviceKernels kernels;
|
||||
|
||||
static bool have_precompiled_kernels();
|
||||
|
||||
virtual bool show_samples() const override;
|
||||
|
||||
virtual BVHLayoutMask get_bvh_layout_mask() const override;
|
||||
|
||||
void set_error(const string &error) override;
|
||||
|
||||
HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
|
||||
|
||||
virtual ~HIPDevice();
|
||||
|
||||
bool support_device(const uint /*kernel_features*/);
|
||||
|
||||
bool check_peer_access(Device *peer_device) override;
|
||||
|
||||
bool use_adaptive_compilation();
|
||||
|
||||
virtual string compile_kernel_get_common_cflags(const uint kernel_features);
|
||||
|
||||
string compile_kernel(const uint kernel_features,
|
||||
const char *name,
|
||||
const char *base = "hip",
|
||||
bool force_ptx = false);
|
||||
|
||||
virtual bool load_kernels(const uint kernel_features) override;
|
||||
void reserve_local_memory(const uint kernel_features);
|
||||
|
||||
void init_host_memory();
|
||||
|
||||
void load_texture_info();
|
||||
|
||||
void move_textures_to_host(size_t size, bool for_texture);
|
||||
|
||||
HIPMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
|
||||
|
||||
void generic_copy_to(device_memory &mem);
|
||||
|
||||
void generic_free(device_memory &mem);
|
||||
|
||||
void mem_alloc(device_memory &mem) override;
|
||||
|
||||
void mem_copy_to(device_memory &mem) override;
|
||||
|
||||
void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
|
||||
|
||||
void mem_zero(device_memory &mem) override;
|
||||
|
||||
void mem_free(device_memory &mem) override;
|
||||
|
||||
device_ptr mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/) override;
|
||||
|
||||
virtual void const_copy_to(const char *name, void *host, size_t size) override;
|
||||
|
||||
void global_alloc(device_memory &mem);
|
||||
|
||||
void global_free(device_memory &mem);
|
||||
|
||||
void tex_alloc(device_texture &mem);
|
||||
|
||||
void tex_free(device_texture &mem);
|
||||
|
||||
/* Graphics resources interoperability. */
|
||||
virtual bool should_use_graphics_interop() override;
|
||||
|
||||
virtual unique_ptr<DeviceQueue> gpu_queue_create() override;
|
||||
|
||||
int get_num_multiprocessors();
|
||||
int get_max_num_threads_per_multiprocessor();
|
||||
|
||||
protected:
|
||||
bool get_device_attribute(hipDeviceAttribute_t attribute, int *value);
|
||||
int get_device_default_attribute(hipDeviceAttribute_t attribute, int default_value);
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif
|
@@ -1,105 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011-2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifdef WITH_HIP
|
||||
|
||||
# include "device/hip/graphics_interop.h"
|
||||
|
||||
# include "device/hip/device_impl.h"
|
||||
# include "device/hip/util.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
HIPDeviceGraphicsInterop::HIPDeviceGraphicsInterop(HIPDeviceQueue *queue)
|
||||
: queue_(queue), device_(static_cast<HIPDevice *>(queue->device))
|
||||
{
|
||||
}
|
||||
|
||||
HIPDeviceGraphicsInterop::~HIPDeviceGraphicsInterop()
|
||||
{
|
||||
HIPContextScope scope(device_);
|
||||
|
||||
if (hip_graphics_resource_) {
|
||||
hip_device_assert(device_, hipGraphicsUnregisterResource(hip_graphics_resource_));
|
||||
}
|
||||
}
|
||||
|
||||
void HIPDeviceGraphicsInterop::set_display_interop(
|
||||
const DisplayDriver::GraphicsInterop &display_interop)
|
||||
{
|
||||
const int64_t new_buffer_area = int64_t(display_interop.buffer_width) *
|
||||
display_interop.buffer_height;
|
||||
|
||||
need_clear_ = display_interop.need_clear;
|
||||
|
||||
if (opengl_pbo_id_ == display_interop.opengl_pbo_id && buffer_area_ == new_buffer_area) {
|
||||
return;
|
||||
}
|
||||
|
||||
HIPContextScope scope(device_);
|
||||
|
||||
if (hip_graphics_resource_) {
|
||||
hip_device_assert(device_, hipGraphicsUnregisterResource(hip_graphics_resource_));
|
||||
}
|
||||
|
||||
const hipError_t result = hipGraphicsGLRegisterBuffer(
|
||||
&hip_graphics_resource_, display_interop.opengl_pbo_id, hipGraphicsRegisterFlagsNone);
|
||||
if (result != hipSuccess) {
|
||||
LOG(ERROR) << "Error registering OpenGL buffer: " << hipewErrorString(result);
|
||||
}
|
||||
|
||||
opengl_pbo_id_ = display_interop.opengl_pbo_id;
|
||||
buffer_area_ = new_buffer_area;
|
||||
}
|
||||
|
||||
device_ptr HIPDeviceGraphicsInterop::map()
|
||||
{
|
||||
if (!hip_graphics_resource_) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
HIPContextScope scope(device_);
|
||||
|
||||
hipDeviceptr_t hip_buffer;
|
||||
size_t bytes;
|
||||
|
||||
hip_device_assert(device_,
|
||||
hipGraphicsMapResources(1, &hip_graphics_resource_, queue_->stream()));
|
||||
hip_device_assert(
|
||||
device_, hipGraphicsResourceGetMappedPointer(&hip_buffer, &bytes, hip_graphics_resource_));
|
||||
|
||||
if (need_clear_) {
|
||||
hip_device_assert(
|
||||
device_,
|
||||
hipMemsetD8Async(static_cast<hipDeviceptr_t>(hip_buffer), 0, bytes, queue_->stream()));
|
||||
|
||||
need_clear_ = false;
|
||||
}
|
||||
|
||||
return static_cast<device_ptr>(hip_buffer);
|
||||
}
|
||||
|
||||
void HIPDeviceGraphicsInterop::unmap()
|
||||
{
|
||||
HIPContextScope scope(device_);
|
||||
|
||||
hip_device_assert(device_,
|
||||
hipGraphicsUnmapResources(1, &hip_graphics_resource_, queue_->stream()));
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif
|
@@ -1,64 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011-2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifdef WITH_HIP
|
||||
|
||||
# include "device/device_graphics_interop.h"
|
||||
|
||||
# ifdef WITH_HIP_DYNLOAD
|
||||
# include "hipew.h"
|
||||
# endif
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class HIPDevice;
|
||||
class HIPDeviceQueue;
|
||||
|
||||
class HIPDeviceGraphicsInterop : public DeviceGraphicsInterop {
|
||||
public:
|
||||
explicit HIPDeviceGraphicsInterop(HIPDeviceQueue *queue);
|
||||
|
||||
HIPDeviceGraphicsInterop(const HIPDeviceGraphicsInterop &other) = delete;
|
||||
HIPDeviceGraphicsInterop(HIPDeviceGraphicsInterop &&other) noexcept = delete;
|
||||
|
||||
~HIPDeviceGraphicsInterop();
|
||||
|
||||
HIPDeviceGraphicsInterop &operator=(const HIPDeviceGraphicsInterop &other) = delete;
|
||||
HIPDeviceGraphicsInterop &operator=(HIPDeviceGraphicsInterop &&other) = delete;
|
||||
|
||||
virtual void set_display_interop(const DisplayDriver::GraphicsInterop &display_interop) override;
|
||||
|
||||
virtual device_ptr map() override;
|
||||
virtual void unmap() override;
|
||||
|
||||
protected:
|
||||
HIPDeviceQueue *queue_ = nullptr;
|
||||
HIPDevice *device_ = nullptr;
|
||||
|
||||
/* OpenGL PBO which is currently registered as the destination for the CUDA buffer. */
|
||||
uint opengl_pbo_id_ = 0;
|
||||
/* Buffer area in pixels of the corresponding PBO. */
|
||||
int64_t buffer_area_ = 0;
|
||||
|
||||
/* The destination was requested to be cleared. */
|
||||
bool need_clear_ = false;
|
||||
|
||||
hipGraphicsResource hip_graphics_resource_ = nullptr;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif
|
@@ -1,69 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011-2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifdef WITH_HIP
|
||||
|
||||
# include "device/hip/kernel.h"
|
||||
# include "device/hip/device_impl.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
void HIPDeviceKernels::load(HIPDevice *device)
|
||||
{
|
||||
hipModule_t hipModule = device->hipModule;
|
||||
|
||||
for (int i = 0; i < (int)DEVICE_KERNEL_NUM; i++) {
|
||||
HIPDeviceKernel &kernel = kernels_[i];
|
||||
|
||||
/* No mega-kernel used for GPU. */
|
||||
if (i == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const std::string function_name = std::string("kernel_gpu_") +
|
||||
device_kernel_as_string((DeviceKernel)i);
|
||||
hip_device_assert(device,
|
||||
hipModuleGetFunction(&kernel.function, hipModule, function_name.c_str()));
|
||||
|
||||
if (kernel.function) {
|
||||
hip_device_assert(device, hipFuncSetCacheConfig(kernel.function, hipFuncCachePreferL1));
|
||||
|
||||
hip_device_assert(
|
||||
device,
|
||||
hipModuleOccupancyMaxPotentialBlockSize(
|
||||
&kernel.min_blocks, &kernel.num_threads_per_block, kernel.function, 0, 0));
|
||||
}
|
||||
else {
|
||||
LOG(ERROR) << "Unable to load kernel " << function_name;
|
||||
}
|
||||
}
|
||||
|
||||
loaded = true;
|
||||
}
|
||||
|
||||
const HIPDeviceKernel &HIPDeviceKernels::get(DeviceKernel kernel) const
|
||||
{
|
||||
return kernels_[(int)kernel];
|
||||
}
|
||||
|
||||
bool HIPDeviceKernels::available(DeviceKernel kernel) const
|
||||
{
|
||||
return kernels_[(int)kernel].function != nullptr;
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_HIP*/
|
@@ -1,54 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011-2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef WITH_HIP
|
||||
|
||||
# include "device/device_kernel.h"
|
||||
|
||||
# ifdef WITH_HIP_DYNLOAD
|
||||
# include "hipew.h"
|
||||
# endif
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class HIPDevice;
|
||||
|
||||
/* HIP kernel and associate occupancy information. */
|
||||
class HIPDeviceKernel {
|
||||
public:
|
||||
hipFunction_t function = nullptr;
|
||||
|
||||
int num_threads_per_block = 0;
|
||||
int min_blocks = 0;
|
||||
};
|
||||
|
||||
/* Cache of HIP kernels for each DeviceKernel. */
|
||||
class HIPDeviceKernels {
|
||||
public:
|
||||
void load(HIPDevice *device);
|
||||
const HIPDeviceKernel &get(DeviceKernel kernel) const;
|
||||
bool available(DeviceKernel kernel) const;
|
||||
|
||||
protected:
|
||||
HIPDeviceKernel kernels_[DEVICE_KERNEL_NUM];
|
||||
bool loaded = false;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_HIP */
|
@@ -1,209 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011-2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifdef WITH_HIP
|
||||
|
||||
# include "device/hip/queue.h"
|
||||
|
||||
# include "device/hip/device_impl.h"
|
||||
# include "device/hip/graphics_interop.h"
|
||||
# include "device/hip/kernel.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* HIPDeviceQueue */
|
||||
|
||||
HIPDeviceQueue::HIPDeviceQueue(HIPDevice *device)
|
||||
: DeviceQueue(device), hip_device_(device), hip_stream_(nullptr)
|
||||
{
|
||||
const HIPContextScope scope(hip_device_);
|
||||
hip_device_assert(hip_device_, hipStreamCreateWithFlags(&hip_stream_, hipStreamNonBlocking));
|
||||
}
|
||||
|
||||
HIPDeviceQueue::~HIPDeviceQueue()
|
||||
{
|
||||
const HIPContextScope scope(hip_device_);
|
||||
hipStreamDestroy(hip_stream_);
|
||||
}
|
||||
|
||||
int HIPDeviceQueue::num_concurrent_states(const size_t /*state_size*/) const
|
||||
{
|
||||
/* TODO: compute automatically. */
|
||||
/* TODO: must have at least num_threads_per_block. */
|
||||
return 14416128;
|
||||
}
|
||||
|
||||
int HIPDeviceQueue::num_concurrent_busy_states() const
|
||||
{
|
||||
const int max_num_threads = hip_device_->get_num_multiprocessors() *
|
||||
hip_device_->get_max_num_threads_per_multiprocessor();
|
||||
|
||||
if (max_num_threads == 0) {
|
||||
return 65536;
|
||||
}
|
||||
|
||||
return 4 * max_num_threads;
|
||||
}
|
||||
|
||||
void HIPDeviceQueue::init_execution()
|
||||
{
|
||||
/* Synchronize all textures and memory copies before executing task. */
|
||||
HIPContextScope scope(hip_device_);
|
||||
hip_device_->load_texture_info();
|
||||
hip_device_assert(hip_device_, hipDeviceSynchronize());
|
||||
|
||||
debug_init_execution();
|
||||
}
|
||||
|
||||
bool HIPDeviceQueue::kernel_available(DeviceKernel kernel) const
|
||||
{
|
||||
return hip_device_->kernels.available(kernel);
|
||||
}
|
||||
|
||||
bool HIPDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *args[])
|
||||
{
|
||||
if (hip_device_->have_error()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
debug_enqueue(kernel, work_size);
|
||||
|
||||
const HIPContextScope scope(hip_device_);
|
||||
const HIPDeviceKernel &hip_kernel = hip_device_->kernels.get(kernel);
|
||||
|
||||
/* Compute kernel launch parameters. */
|
||||
const int num_threads_per_block = hip_kernel.num_threads_per_block;
|
||||
const int num_blocks = divide_up(work_size, num_threads_per_block);
|
||||
|
||||
int shared_mem_bytes = 0;
|
||||
|
||||
switch (kernel) {
|
||||
case DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY:
|
||||
case DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY:
|
||||
case DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY:
|
||||
case DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY:
|
||||
case DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY:
|
||||
case DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY:
|
||||
/* See parall_active_index.h for why this amount of shared memory is needed. */
|
||||
shared_mem_bytes = (num_threads_per_block + 1) * sizeof(int);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* Launch kernel. */
|
||||
hip_device_assert(hip_device_,
|
||||
hipModuleLaunchKernel(hip_kernel.function,
|
||||
num_blocks,
|
||||
1,
|
||||
1,
|
||||
num_threads_per_block,
|
||||
1,
|
||||
1,
|
||||
shared_mem_bytes,
|
||||
hip_stream_,
|
||||
args,
|
||||
0));
|
||||
return !(hip_device_->have_error());
|
||||
}
|
||||
|
||||
bool HIPDeviceQueue::synchronize()
|
||||
{
|
||||
if (hip_device_->have_error()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const HIPContextScope scope(hip_device_);
|
||||
hip_device_assert(hip_device_, hipStreamSynchronize(hip_stream_));
|
||||
debug_synchronize();
|
||||
|
||||
return !(hip_device_->have_error());
|
||||
}
|
||||
|
||||
void HIPDeviceQueue::zero_to_device(device_memory &mem)
|
||||
{
|
||||
assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
|
||||
|
||||
if (mem.memory_size() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Allocate on demand. */
|
||||
if (mem.device_pointer == 0) {
|
||||
hip_device_->mem_alloc(mem);
|
||||
}
|
||||
|
||||
/* Zero memory on device. */
|
||||
assert(mem.device_pointer != 0);
|
||||
|
||||
const HIPContextScope scope(hip_device_);
|
||||
hip_device_assert(
|
||||
hip_device_,
|
||||
hipMemsetD8Async((hipDeviceptr_t)mem.device_pointer, 0, mem.memory_size(), hip_stream_));
|
||||
}
|
||||
|
||||
void HIPDeviceQueue::copy_to_device(device_memory &mem)
|
||||
{
|
||||
assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
|
||||
|
||||
if (mem.memory_size() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Allocate on demand. */
|
||||
if (mem.device_pointer == 0) {
|
||||
hip_device_->mem_alloc(mem);
|
||||
}
|
||||
|
||||
assert(mem.device_pointer != 0);
|
||||
assert(mem.host_pointer != nullptr);
|
||||
|
||||
/* Copy memory to device. */
|
||||
const HIPContextScope scope(hip_device_);
|
||||
hip_device_assert(
|
||||
hip_device_,
|
||||
hipMemcpyHtoDAsync(
|
||||
(hipDeviceptr_t)mem.device_pointer, mem.host_pointer, mem.memory_size(), hip_stream_));
|
||||
}
|
||||
|
||||
void HIPDeviceQueue::copy_from_device(device_memory &mem)
|
||||
{
|
||||
assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
|
||||
|
||||
if (mem.memory_size() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
assert(mem.device_pointer != 0);
|
||||
assert(mem.host_pointer != nullptr);
|
||||
|
||||
/* Copy memory from device. */
|
||||
const HIPContextScope scope(hip_device_);
|
||||
hip_device_assert(
|
||||
hip_device_,
|
||||
hipMemcpyDtoHAsync(
|
||||
mem.host_pointer, (hipDeviceptr_t)mem.device_pointer, mem.memory_size(), hip_stream_));
|
||||
}
|
||||
|
||||
// TODO : (Arya) Enable this after stabilizing dev branch
|
||||
unique_ptr<DeviceGraphicsInterop> HIPDeviceQueue::graphics_interop_create()
|
||||
{
|
||||
return make_unique<HIPDeviceGraphicsInterop>(this);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_HIP */
|
@@ -1,68 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011-2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef WITH_HIP
|
||||
|
||||
# include "device/device_kernel.h"
|
||||
# include "device/device_memory.h"
|
||||
# include "device/device_queue.h"
|
||||
|
||||
# include "device/hip/util.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class HIPDevice;
|
||||
class device_memory;
|
||||
|
||||
/* Base class for HIP queues. */
|
||||
class HIPDeviceQueue : public DeviceQueue {
|
||||
public:
|
||||
HIPDeviceQueue(HIPDevice *device);
|
||||
~HIPDeviceQueue();
|
||||
|
||||
virtual int num_concurrent_states(const size_t state_size) const override;
|
||||
virtual int num_concurrent_busy_states() const override;
|
||||
|
||||
virtual void init_execution() override;
|
||||
|
||||
virtual bool kernel_available(DeviceKernel kernel) const override;
|
||||
|
||||
virtual bool enqueue(DeviceKernel kernel, const int work_size, void *args[]) override;
|
||||
|
||||
virtual bool synchronize() override;
|
||||
|
||||
virtual void zero_to_device(device_memory &mem) override;
|
||||
virtual void copy_to_device(device_memory &mem) override;
|
||||
virtual void copy_from_device(device_memory &mem) override;
|
||||
|
||||
virtual hipStream_t stream()
|
||||
{
|
||||
return hip_stream_;
|
||||
}
|
||||
|
||||
// TODO : (Arya) Enable this after stabilizing the dev branch
|
||||
virtual unique_ptr<DeviceGraphicsInterop> graphics_interop_create() override;
|
||||
|
||||
protected:
|
||||
HIPDevice *hip_device_;
|
||||
hipStream_t hip_stream_;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_HIP */
|
@@ -1,61 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011-2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifdef WITH_HIP
|
||||
|
||||
# include "device/hip/util.h"
|
||||
# include "device/hip/device_impl.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
HIPContextScope::HIPContextScope(HIPDevice *device) : device(device)
|
||||
{
|
||||
hip_device_assert(device, hipCtxPushCurrent(device->hipContext));
|
||||
}
|
||||
|
||||
HIPContextScope::~HIPContextScope()
|
||||
{
|
||||
hip_device_assert(device, hipCtxPopCurrent(NULL));
|
||||
}
|
||||
|
||||
# ifndef WITH_HIP_DYNLOAD
|
||||
const char *hipewErrorString(hipError_t result)
|
||||
{
|
||||
/* We can only give error code here without major code duplication, that
|
||||
* should be enough since dynamic loading is only being disabled by folks
|
||||
* who knows what they're doing anyway.
|
||||
*
|
||||
* NOTE: Avoid call from several threads.
|
||||
*/
|
||||
static string error;
|
||||
error = string_printf("%d", result);
|
||||
return error.c_str();
|
||||
}
|
||||
|
||||
const char *hipewCompilerPath()
|
||||
{
|
||||
return CYCLES_HIP_HIPCC_EXECUTABLE;
|
||||
}
|
||||
|
||||
int hipewCompilerVersion()
|
||||
{
|
||||
return (HIP_VERSION / 100) + (HIP_VERSION % 100 / 10);
|
||||
}
|
||||
# endif
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_HIP */
|
@@ -1,63 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011-2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef WITH_HIP
|
||||
|
||||
# ifdef WITH_HIP_DYNLOAD
|
||||
# include "hipew.h"
|
||||
# endif
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class HIPDevice;
|
||||
|
||||
/* Utility to push/pop HIP context. */
|
||||
class HIPContextScope {
|
||||
public:
|
||||
HIPContextScope(HIPDevice *device);
|
||||
~HIPContextScope();
|
||||
|
||||
private:
|
||||
HIPDevice *device;
|
||||
};
|
||||
|
||||
/* Utility for checking return values of HIP function calls. */
|
||||
# define hip_device_assert(hip_device, stmt) \
|
||||
{ \
|
||||
hipError_t result = stmt; \
|
||||
if (result != hipSuccess) { \
|
||||
const char *name = hipewErrorString(result); \
|
||||
hip_device->set_error( \
|
||||
string_printf("%s in %s (%s:%d)", name, #stmt, __FILE__, __LINE__)); \
|
||||
} \
|
||||
} \
|
||||
(void)0
|
||||
|
||||
# define hip_assert(stmt) hip_device_assert(this, stmt)
|
||||
|
||||
# ifndef WITH_HIP_DYNLOAD
|
||||
/* Transparently implement some functions, so majority of the file does not need
|
||||
* to worry about difference between dynamically loaded and linked HIP at all. */
|
||||
const char *hipewErrorString(hipError_t result);
|
||||
const char *hipewCompilerPath();
|
||||
int hipewCompilerVersion();
|
||||
# endif /* WITH_HIP_DYNLOAD */
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_HIP */
|
@@ -315,14 +315,14 @@ class MultiDevice : public Device {
|
||||
stats.mem_alloc(mem.device_size - existing_size);
|
||||
}
|
||||
|
||||
void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override
|
||||
void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) override
|
||||
{
|
||||
device_ptr key = mem.device_pointer;
|
||||
size_t i = 0, sub_h = h / devices.size();
|
||||
int i = 0, sub_h = h / devices.size();
|
||||
|
||||
foreach (SubDevice &sub, devices) {
|
||||
size_t sy = y + i * sub_h;
|
||||
size_t sh = (i == (size_t)devices.size() - 1) ? h - sub_h * i : sub_h;
|
||||
int sy = y + i * sub_h;
|
||||
int sh = (i == (int)devices.size() - 1) ? h - sub_h * i : sub_h;
|
||||
|
||||
SubDevice *owner_sub = find_matching_mem_device(key, sub);
|
||||
mem.device = owner_sub->device;
|
||||
|
@@ -315,11 +315,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
|
||||
group_descs[PG_HITS].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
|
||||
group_descs[PG_HITS].hitgroup.moduleAH = optix_module;
|
||||
group_descs[PG_HITS].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_shadow_all_hit";
|
||||
group_descs[PG_HITV].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
|
||||
group_descs[PG_HITV].hitgroup.moduleCH = optix_module;
|
||||
group_descs[PG_HITV].hitgroup.entryFunctionNameCH = "__closesthit__kernel_optix_hit";
|
||||
group_descs[PG_HITV].hitgroup.moduleAH = optix_module;
|
||||
group_descs[PG_HITV].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_volume_test";
|
||||
|
||||
if (kernel_features & KERNEL_FEATURE_HAIR) {
|
||||
if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
|
||||
@@ -402,7 +397,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
|
||||
trace_css = std::max(trace_css, stack_size[PG_HITD].cssIS + stack_size[PG_HITD].cssAH);
|
||||
trace_css = std::max(trace_css, stack_size[PG_HITS].cssIS + stack_size[PG_HITS].cssAH);
|
||||
trace_css = std::max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH);
|
||||
trace_css = std::max(trace_css, stack_size[PG_HITV].cssIS + stack_size[PG_HITV].cssAH);
|
||||
trace_css = std::max(trace_css,
|
||||
stack_size[PG_HITD_MOTION].cssIS + stack_size[PG_HITD_MOTION].cssAH);
|
||||
trace_css = std::max(trace_css,
|
||||
@@ -427,7 +421,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
|
||||
pipeline_groups.push_back(groups[PG_HITD]);
|
||||
pipeline_groups.push_back(groups[PG_HITS]);
|
||||
pipeline_groups.push_back(groups[PG_HITL]);
|
||||
pipeline_groups.push_back(groups[PG_HITV]);
|
||||
if (motion_blur) {
|
||||
pipeline_groups.push_back(groups[PG_HITD_MOTION]);
|
||||
pipeline_groups.push_back(groups[PG_HITS_MOTION]);
|
||||
@@ -466,7 +459,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
|
||||
pipeline_groups.push_back(groups[PG_HITD]);
|
||||
pipeline_groups.push_back(groups[PG_HITS]);
|
||||
pipeline_groups.push_back(groups[PG_HITL]);
|
||||
pipeline_groups.push_back(groups[PG_HITV]);
|
||||
if (motion_blur) {
|
||||
pipeline_groups.push_back(groups[PG_HITD_MOTION]);
|
||||
pipeline_groups.push_back(groups[PG_HITS_MOTION]);
|
||||
@@ -768,13 +760,7 @@ void OptiXDevice::denoise_color_read(DenoiseContext &context, const DenoisePass
|
||||
destination.num_components = 3;
|
||||
destination.pixel_stride = context.buffer_params.pass_stride;
|
||||
|
||||
BufferParams buffer_params = context.buffer_params;
|
||||
buffer_params.window_x = 0;
|
||||
buffer_params.window_y = 0;
|
||||
buffer_params.window_width = buffer_params.width;
|
||||
buffer_params.window_height = buffer_params.height;
|
||||
|
||||
pass_accessor.get_render_tile_pixels(context.render_buffers, buffer_params, destination);
|
||||
pass_accessor.get_render_tile_pixels(context.render_buffers, context.buffer_params, destination);
|
||||
}
|
||||
|
||||
bool OptiXDevice::denoise_filter_color_preprocess(DenoiseContext &context, const DenoisePass &pass)
|
||||
@@ -1252,7 +1238,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
build_input.curveArray.indexBuffer = (CUdeviceptr)index_data.device_pointer;
|
||||
build_input.curveArray.indexStrideInBytes = sizeof(int);
|
||||
build_input.curveArray.flag = build_flags;
|
||||
build_input.curveArray.primitiveIndexOffset = hair->curve_segment_offset;
|
||||
build_input.curveArray.primitiveIndexOffset = hair->optix_prim_offset;
|
||||
}
|
||||
else {
|
||||
/* Disable visibility test any-hit program, since it is already checked during
|
||||
@@ -1265,7 +1251,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
build_input.customPrimitiveArray.strideInBytes = sizeof(OptixAabb);
|
||||
build_input.customPrimitiveArray.flags = &build_flags;
|
||||
build_input.customPrimitiveArray.numSbtRecords = 1;
|
||||
build_input.customPrimitiveArray.primitiveIndexOffset = hair->curve_segment_offset;
|
||||
build_input.customPrimitiveArray.primitiveIndexOffset = hair->optix_prim_offset;
|
||||
}
|
||||
|
||||
if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) {
|
||||
@@ -1334,7 +1320,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
* buffers for that purpose. OptiX does not allow this to be zero though, so just pass in
|
||||
* one and rely on that having the same meaning in this case. */
|
||||
build_input.triangleArray.numSbtRecords = 1;
|
||||
build_input.triangleArray.primitiveIndexOffset = mesh->prim_offset;
|
||||
build_input.triangleArray.primitiveIndexOffset = mesh->optix_prim_offset;
|
||||
|
||||
if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) {
|
||||
progress.set_error("Failed to build OptiX acceleration structure");
|
||||
@@ -1401,36 +1387,28 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
instance.transform[5] = 1.0f;
|
||||
instance.transform[10] = 1.0f;
|
||||
|
||||
/* Set user instance ID to object index. */
|
||||
instance.instanceId = ob->get_device_index();
|
||||
|
||||
/* Add some of the object visibility bits to the mask.
|
||||
* __prim_visibility contains the combined visibility bits of all instances, so is not
|
||||
* reliable if they differ between instances. But the OptiX visibility mask can only contain
|
||||
* 8 bits, so have to trade-off here and select just a few important ones.
|
||||
*/
|
||||
instance.visibilityMask = ob->visibility_for_tracing() & 0xFF;
|
||||
/* Set user instance ID to object index (but leave low bit blank). */
|
||||
instance.instanceId = ob->get_device_index() << 1;
|
||||
|
||||
/* Have to have at least one bit in the mask, or else instance would always be culled. */
|
||||
if (0 == instance.visibilityMask) {
|
||||
instance.visibilityMask = 0xFF;
|
||||
instance.visibilityMask = 1;
|
||||
|
||||
if (ob->get_geometry()->has_volume) {
|
||||
/* Volumes have a special bit set in the visibility mask so a trace can mask only volumes.
|
||||
*/
|
||||
instance.visibilityMask |= 2;
|
||||
}
|
||||
|
||||
if (ob->get_geometry()->geometry_type == Geometry::HAIR &&
|
||||
static_cast<const Hair *>(ob->get_geometry())->curve_shape == CURVE_THICK) {
|
||||
if (motion_blur && ob->get_geometry()->has_motion_blur()) {
|
||||
if (ob->get_geometry()->geometry_type == Geometry::HAIR) {
|
||||
/* Same applies to curves (so they can be skipped in local trace calls). */
|
||||
instance.visibilityMask |= 4;
|
||||
|
||||
if (motion_blur && ob->get_geometry()->has_motion_blur() &&
|
||||
static_cast<const Hair *>(ob->get_geometry())->curve_shape == CURVE_THICK) {
|
||||
/* Select between motion blur and non-motion blur built-in intersection module. */
|
||||
instance.sbtOffset = PG_HITD_MOTION - PG_HITD;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Can disable __anyhit__kernel_optix_visibility_test by default (except for thick curves,
|
||||
* since it needs to filter out end-caps there).
|
||||
* It is enabled where necessary (visibility mask exceeds 8 bits or the other any-hit
|
||||
* programs like __anyhit__kernel_optix_shadow_all_hit) via OPTIX_RAY_FLAG_ENFORCE_ANYHIT.
|
||||
*/
|
||||
instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT;
|
||||
}
|
||||
|
||||
/* Insert motion traversable if object has motion. */
|
||||
if (motion_blur && ob->use_motion()) {
|
||||
@@ -1496,7 +1474,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
delete[] reinterpret_cast<uint8_t *>(&motion_transform);
|
||||
|
||||
/* Disable instance transform if object uses motion transform already. */
|
||||
instance.flags |= OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
|
||||
instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
|
||||
|
||||
/* Get traversable handle to motion transform. */
|
||||
optixConvertPointerToTraversableHandle(context,
|
||||
@@ -1513,7 +1491,10 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
}
|
||||
else {
|
||||
/* Disable instance transform if geometry already has it applied to vertex data. */
|
||||
instance.flags |= OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
|
||||
instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
|
||||
/* Non-instanced objects read ID from 'prim_object', so distinguish
|
||||
* them from instanced objects with the low bit set. */
|
||||
instance.instanceId |= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -40,7 +40,6 @@ enum {
|
||||
PG_HITD, /* Default hit group. */
|
||||
PG_HITS, /* __SHADOW_RECORD_ALL__ hit group. */
|
||||
PG_HITL, /* __BVH_LOCAL__ hit group (only used for triangles). */
|
||||
PG_HITV, /* __VOLUME__ hit group. */
|
||||
PG_HITD_MOTION,
|
||||
PG_HITS_MOTION,
|
||||
PG_CALL_SVM_AO,
|
||||
@@ -52,7 +51,7 @@ enum {
|
||||
static const int MISS_PROGRAM_GROUP_OFFSET = PG_MISS;
|
||||
static const int NUM_MIS_PROGRAM_GROUPS = 1;
|
||||
static const int HIT_PROGAM_GROUP_OFFSET = PG_HITD;
|
||||
static const int NUM_HIT_PROGRAM_GROUPS = 6;
|
||||
static const int NUM_HIT_PROGRAM_GROUPS = 5;
|
||||
static const int CALLABLE_PROGRAM_GROUPS_BASE = PG_CALL_SVM_AO;
|
||||
static const int NUM_CALLABLE_PROGRAM_GROUPS = 3;
|
||||
|
||||
|
@@ -27,8 +27,6 @@ set(SRC
|
||||
pass_accessor.cpp
|
||||
pass_accessor_cpu.cpp
|
||||
pass_accessor_gpu.cpp
|
||||
path_trace_display.cpp
|
||||
path_trace_tile.cpp
|
||||
path_trace_work.cpp
|
||||
path_trace_work_cpu.cpp
|
||||
path_trace_work_gpu.cpp
|
||||
@@ -49,8 +47,6 @@ set(SRC_HEADERS
|
||||
pass_accessor.h
|
||||
pass_accessor_cpu.h
|
||||
pass_accessor_gpu.h
|
||||
path_trace_display.h
|
||||
path_trace_tile.h
|
||||
path_trace_work.h
|
||||
path_trace_work_cpu.h
|
||||
path_trace_work_gpu.h
|
||||
|
@@ -289,13 +289,7 @@ class OIDNDenoiseContext {
|
||||
* pixels. */
|
||||
const PassAccessorCPU pass_accessor(pass_access_info, 1.0f, num_samples_);
|
||||
|
||||
BufferParams buffer_params = buffer_params_;
|
||||
buffer_params.window_x = 0;
|
||||
buffer_params.window_y = 0;
|
||||
buffer_params.window_width = buffer_params.width;
|
||||
buffer_params.window_height = buffer_params.height;
|
||||
|
||||
pass_accessor.get_render_tile_pixels(render_buffers_, buffer_params, destination);
|
||||
pass_accessor.get_render_tile_pixels(render_buffers_, buffer_params_, destination);
|
||||
}
|
||||
|
||||
/* Read pass pixels using PassAccessor into a temporary buffer which is owned by the pass.. */
|
||||
|
@@ -149,6 +149,9 @@ bool PassAccessor::get_render_tile_pixels(const RenderBuffers *render_buffers,
|
||||
/* Denoised passes store their final pixels, no need in special calculation. */
|
||||
get_pass_float(render_buffers, buffer_params, destination);
|
||||
}
|
||||
else if (type == PASS_RENDER_TIME) {
|
||||
/* TODO(sergey): Needs implementation. */
|
||||
}
|
||||
else if (type == PASS_DEPTH) {
|
||||
get_pass_depth(render_buffers, buffer_params, destination);
|
||||
}
|
||||
|
@@ -99,22 +99,17 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_float(
|
||||
{
|
||||
DCHECK_EQ(destination.stride, 0) << "Custom stride for float destination is not implemented.";
|
||||
|
||||
const int64_t pass_stride = buffer_params.pass_stride;
|
||||
const int64_t buffer_row_stride = buffer_params.stride * buffer_params.pass_stride;
|
||||
|
||||
const float *window_data = render_buffers->buffer.data() + buffer_params.window_x * pass_stride +
|
||||
buffer_params.window_y * buffer_row_stride;
|
||||
|
||||
const float *buffer_data = render_buffers->buffer.data();
|
||||
const int pixel_stride = destination.pixel_stride ? destination.pixel_stride :
|
||||
destination.num_components;
|
||||
|
||||
tbb::parallel_for(0, buffer_params.window_height, [&](int64_t y) {
|
||||
const float *buffer = window_data + y * buffer_row_stride;
|
||||
float *pixel = destination.pixels +
|
||||
(y * buffer_params.width + destination.offset) * pixel_stride;
|
||||
tbb::parallel_for(0, buffer_params.height, [&](int64_t y) {
|
||||
int64_t pixel_index = y * buffer_params.width;
|
||||
for (int64_t x = 0; x < buffer_params.width; ++x, ++pixel_index) {
|
||||
const int64_t input_pixel_offset = pixel_index * buffer_params.pass_stride;
|
||||
const float *buffer = buffer_data + input_pixel_offset;
|
||||
float *pixel = destination.pixels + (pixel_index + destination.offset) * pixel_stride;
|
||||
|
||||
for (int64_t x = 0; x < buffer_params.window_width;
|
||||
++x, buffer += pass_stride, pixel += pixel_stride) {
|
||||
processor(kfilm_convert, buffer, pixel);
|
||||
}
|
||||
});
|
||||
@@ -128,28 +123,26 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_half_rgba(
|
||||
const Destination &destination,
|
||||
const Processor &processor) const
|
||||
{
|
||||
const int64_t pass_stride = buffer_params.pass_stride;
|
||||
const int64_t buffer_row_stride = buffer_params.stride * buffer_params.pass_stride;
|
||||
|
||||
const float *window_data = render_buffers->buffer.data() + buffer_params.window_x * pass_stride +
|
||||
buffer_params.window_y * buffer_row_stride;
|
||||
const float *buffer_data = render_buffers->buffer.data();
|
||||
|
||||
half4 *dst_start = destination.pixels_half_rgba + destination.offset;
|
||||
const int destination_stride = destination.stride != 0 ? destination.stride :
|
||||
buffer_params.width;
|
||||
|
||||
tbb::parallel_for(0, buffer_params.window_height, [&](int64_t y) {
|
||||
const float *buffer = window_data + y * buffer_row_stride;
|
||||
half4 *pixel = dst_start + y * destination_stride;
|
||||
for (int64_t x = 0; x < buffer_params.window_width; ++x, buffer += pass_stride, ++pixel) {
|
||||
tbb::parallel_for(0, buffer_params.height, [&](int64_t y) {
|
||||
int64_t pixel_index = y * buffer_params.width;
|
||||
half4 *dst_row_start = dst_start + y * destination_stride;
|
||||
for (int64_t x = 0; x < buffer_params.width; ++x, ++pixel_index) {
|
||||
const int64_t input_pixel_offset = pixel_index * buffer_params.pass_stride;
|
||||
const float *buffer = buffer_data + input_pixel_offset;
|
||||
|
||||
float pixel_rgba[4];
|
||||
processor(kfilm_convert, buffer, pixel_rgba);
|
||||
float pixel[4];
|
||||
processor(kfilm_convert, buffer, pixel);
|
||||
|
||||
film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel_rgba);
|
||||
film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel);
|
||||
|
||||
float4_store_half(&pixel->x,
|
||||
make_float4(pixel_rgba[0], pixel_rgba[1], pixel_rgba[2], pixel_rgba[3]));
|
||||
half4 *pixel_half_rgba = dst_row_start + x;
|
||||
float4_store_half(&pixel_half_rgba->x, make_float4(pixel[0], pixel[1], pixel[2], pixel[3]));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
@@ -43,13 +43,10 @@ void PassAccessorGPU::run_film_convert_kernels(DeviceKernel kernel,
|
||||
KernelFilmConvert kfilm_convert;
|
||||
init_kernel_film_convert(&kfilm_convert, buffer_params, destination);
|
||||
|
||||
const int work_size = buffer_params.window_width * buffer_params.window_height;
|
||||
const int work_size = buffer_params.width * buffer_params.height;
|
||||
|
||||
const int destination_stride = destination.stride != 0 ? destination.stride :
|
||||
buffer_params.window_width;
|
||||
|
||||
const int offset = buffer_params.window_x * buffer_params.pass_stride +
|
||||
buffer_params.window_y * buffer_params.stride * buffer_params.pass_stride;
|
||||
buffer_params.width;
|
||||
|
||||
if (destination.d_pixels) {
|
||||
DCHECK_EQ(destination.stride, 0) << "Custom stride for float destination is not implemented.";
|
||||
@@ -58,8 +55,8 @@ void PassAccessorGPU::run_film_convert_kernels(DeviceKernel kernel,
|
||||
const_cast<device_ptr *>(&destination.d_pixels),
|
||||
const_cast<device_ptr *>(&render_buffers->buffer.device_pointer),
|
||||
const_cast<int *>(&work_size),
|
||||
const_cast<int *>(&buffer_params.window_width),
|
||||
const_cast<int *>(&offset),
|
||||
const_cast<int *>(&buffer_params.width),
|
||||
const_cast<int *>(&buffer_params.offset),
|
||||
const_cast<int *>(&buffer_params.stride),
|
||||
const_cast<int *>(&destination.offset),
|
||||
const_cast<int *>(&destination_stride)};
|
||||
@@ -73,8 +70,8 @@ void PassAccessorGPU::run_film_convert_kernels(DeviceKernel kernel,
|
||||
const_cast<device_ptr *>(&destination.d_pixels_half_rgba),
|
||||
const_cast<device_ptr *>(&render_buffers->buffer.device_pointer),
|
||||
const_cast<int *>(&work_size),
|
||||
const_cast<int *>(&buffer_params.window_width),
|
||||
const_cast<int *>(&offset),
|
||||
const_cast<int *>(&buffer_params.width),
|
||||
const_cast<int *>(&buffer_params.offset),
|
||||
const_cast<int *>(&buffer_params.stride),
|
||||
const_cast<int *>(&destination.offset),
|
||||
const_cast<int *>(&destination_stride)};
|
||||
|
@@ -19,9 +19,8 @@
|
||||
#include "device/cpu/device.h"
|
||||
#include "device/device.h"
|
||||
#include "integrator/pass_accessor.h"
|
||||
#include "integrator/path_trace_display.h"
|
||||
#include "integrator/path_trace_tile.h"
|
||||
#include "integrator/render_scheduler.h"
|
||||
#include "render/gpu_display.h"
|
||||
#include "render/pass.h"
|
||||
#include "render/scene.h"
|
||||
#include "render/tile.h"
|
||||
@@ -68,11 +67,11 @@ PathTrace::PathTrace(Device *device,
|
||||
PathTrace::~PathTrace()
|
||||
{
|
||||
/* Destroy any GPU resource which was used for graphics interop.
|
||||
* Need to have access to the PathTraceDisplay as it is the only source of drawing context which
|
||||
* is used for interop. */
|
||||
if (display_) {
|
||||
* Need to have access to the GPUDisplay as it is the only source of drawing context which is
|
||||
* used for interop. */
|
||||
if (gpu_display_) {
|
||||
for (auto &&path_trace_work : path_trace_works_) {
|
||||
path_trace_work->destroy_gpu_resources(display_.get());
|
||||
path_trace_work->destroy_gpu_resources(gpu_display_.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -95,7 +94,7 @@ bool PathTrace::ready_to_reset()
|
||||
{
|
||||
/* The logic here is optimized for the best feedback in the viewport, which implies having a GPU
|
||||
* display. Of there is no such display, the logic here will break. */
|
||||
DCHECK(display_);
|
||||
DCHECK(gpu_display_);
|
||||
|
||||
/* The logic here tries to provide behavior which feels the most interactive feel to artists.
|
||||
* General idea is to be able to reset as quickly as possible, while still providing interactive
|
||||
@@ -127,8 +126,8 @@ void PathTrace::reset(const BufferParams &full_params, const BufferParams &big_t
|
||||
/* NOTE: GPU display checks for buffer modification and avoids unnecessary re-allocation.
|
||||
* It is requires to inform about reset whenever it happens, so that the redraw state tracking is
|
||||
* properly updated. */
|
||||
if (display_) {
|
||||
display_->reset(full_params);
|
||||
if (gpu_display_) {
|
||||
gpu_display_->reset(full_params);
|
||||
}
|
||||
|
||||
render_state_.has_denoised_result = false;
|
||||
@@ -234,53 +233,42 @@ template<typename Callback>
|
||||
static void foreach_sliced_buffer_params(const vector<unique_ptr<PathTraceWork>> &path_trace_works,
|
||||
const vector<WorkBalanceInfo> &work_balance_infos,
|
||||
const BufferParams &buffer_params,
|
||||
const int overscan,
|
||||
const Callback &callback)
|
||||
{
|
||||
const int num_works = path_trace_works.size();
|
||||
const int window_height = buffer_params.window_height;
|
||||
const int height = buffer_params.height;
|
||||
|
||||
int current_y = 0;
|
||||
for (int i = 0; i < num_works; ++i) {
|
||||
const double weight = work_balance_infos[i].weight;
|
||||
const int slice_window_full_y = buffer_params.full_y + buffer_params.window_y + current_y;
|
||||
const int slice_window_height = max(lround(window_height * weight), 1);
|
||||
const int slice_height = max(lround(height * weight), 1);
|
||||
|
||||
/* Disallow negative values to deal with situations when there are more compute devices than
|
||||
* scan-lines. */
|
||||
const int remaining_window_height = max(0, window_height - current_y);
|
||||
|
||||
BufferParams slice_params = buffer_params;
|
||||
|
||||
slice_params.full_y = max(slice_window_full_y - overscan, buffer_params.full_y);
|
||||
slice_params.window_y = slice_window_full_y - slice_params.full_y;
|
||||
* scanlines. */
|
||||
const int remaining_height = max(0, height - current_y);
|
||||
|
||||
BufferParams slide_params = buffer_params;
|
||||
slide_params.full_y = buffer_params.full_y + current_y;
|
||||
if (i < num_works - 1) {
|
||||
slice_params.window_height = min(slice_window_height, remaining_window_height);
|
||||
slide_params.height = min(slice_height, remaining_height);
|
||||
}
|
||||
else {
|
||||
slice_params.window_height = remaining_window_height;
|
||||
slide_params.height = remaining_height;
|
||||
}
|
||||
|
||||
slice_params.height = slice_params.window_y + slice_params.window_height + overscan;
|
||||
slice_params.height = min(slice_params.height,
|
||||
buffer_params.height + buffer_params.full_y - slice_params.full_y);
|
||||
slide_params.update_offset_stride();
|
||||
|
||||
slice_params.update_offset_stride();
|
||||
callback(path_trace_works[i].get(), slide_params);
|
||||
|
||||
callback(path_trace_works[i].get(), slice_params);
|
||||
|
||||
current_y += slice_params.window_height;
|
||||
current_y += slide_params.height;
|
||||
}
|
||||
}
|
||||
|
||||
void PathTrace::update_allocated_work_buffer_params()
|
||||
{
|
||||
const int overscan = tile_manager_.get_tile_overscan();
|
||||
foreach_sliced_buffer_params(path_trace_works_,
|
||||
work_balance_infos_,
|
||||
big_tile_params_,
|
||||
overscan,
|
||||
[](PathTraceWork *path_trace_work, const BufferParams ¶ms) {
|
||||
RenderBuffers *buffers = path_trace_work->get_render_buffers();
|
||||
buffers->reset(params);
|
||||
@@ -293,12 +281,6 @@ static BufferParams scale_buffer_params(const BufferParams ¶ms, int resoluti
|
||||
|
||||
scaled_params.width = max(1, params.width / resolution_divider);
|
||||
scaled_params.height = max(1, params.height / resolution_divider);
|
||||
|
||||
scaled_params.window_x = params.window_x / resolution_divider;
|
||||
scaled_params.window_y = params.window_y / resolution_divider;
|
||||
scaled_params.window_width = params.window_width / resolution_divider;
|
||||
scaled_params.window_height = params.window_height / resolution_divider;
|
||||
|
||||
scaled_params.full_x = params.full_x / resolution_divider;
|
||||
scaled_params.full_y = params.full_y / resolution_divider;
|
||||
scaled_params.full_width = params.full_width / resolution_divider;
|
||||
@@ -317,12 +299,9 @@ void PathTrace::update_effective_work_buffer_params(const RenderWork &render_wor
|
||||
const BufferParams scaled_big_tile_params = scale_buffer_params(big_tile_params_,
|
||||
resolution_divider);
|
||||
|
||||
const int overscan = tile_manager_.get_tile_overscan();
|
||||
|
||||
foreach_sliced_buffer_params(path_trace_works_,
|
||||
work_balance_infos_,
|
||||
scaled_big_tile_params,
|
||||
overscan,
|
||||
[&](PathTraceWork *path_trace_work, const BufferParams params) {
|
||||
path_trace_work->set_effective_buffer_params(
|
||||
scaled_full_params, scaled_big_tile_params, params);
|
||||
@@ -556,35 +535,25 @@ void PathTrace::denoise(const RenderWork &render_work)
|
||||
render_scheduler_.report_denoise_time(render_work, time_dt() - start_time);
|
||||
}
|
||||
|
||||
void PathTrace::set_output_driver(unique_ptr<OutputDriver> driver)
|
||||
void PathTrace::set_gpu_display(unique_ptr<GPUDisplay> gpu_display)
|
||||
{
|
||||
output_driver_ = move(driver);
|
||||
gpu_display_ = move(gpu_display);
|
||||
}
|
||||
|
||||
void PathTrace::set_display_driver(unique_ptr<DisplayDriver> driver)
|
||||
void PathTrace::clear_gpu_display()
|
||||
{
|
||||
if (driver) {
|
||||
display_ = make_unique<PathTraceDisplay>(move(driver));
|
||||
}
|
||||
else {
|
||||
display_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void PathTrace::clear_display()
|
||||
{
|
||||
if (display_) {
|
||||
display_->clear();
|
||||
if (gpu_display_) {
|
||||
gpu_display_->clear();
|
||||
}
|
||||
}
|
||||
|
||||
void PathTrace::draw()
|
||||
{
|
||||
if (!display_) {
|
||||
if (!gpu_display_) {
|
||||
return;
|
||||
}
|
||||
|
||||
did_draw_after_reset_ |= display_->draw();
|
||||
did_draw_after_reset_ |= gpu_display_->draw();
|
||||
}
|
||||
|
||||
void PathTrace::update_display(const RenderWork &render_work)
|
||||
@@ -593,32 +562,31 @@ void PathTrace::update_display(const RenderWork &render_work)
|
||||
return;
|
||||
}
|
||||
|
||||
if (!display_ && !output_driver_) {
|
||||
if (!gpu_display_ && !tile_buffer_update_cb) {
|
||||
VLOG(3) << "Ignore display update.";
|
||||
return;
|
||||
}
|
||||
|
||||
if (full_params_.width == 0 || full_params_.height == 0) {
|
||||
VLOG(3) << "Skipping PathTraceDisplay update due to 0 size of the render buffer.";
|
||||
VLOG(3) << "Skipping GPUDisplay update due to 0 size of the render buffer.";
|
||||
return;
|
||||
}
|
||||
|
||||
const double start_time = time_dt();
|
||||
|
||||
if (output_driver_) {
|
||||
if (tile_buffer_update_cb) {
|
||||
VLOG(3) << "Invoke buffer update callback.";
|
||||
|
||||
PathTraceTile tile(*this);
|
||||
output_driver_->update_render_tile(tile);
|
||||
tile_buffer_update_cb();
|
||||
}
|
||||
|
||||
if (display_) {
|
||||
if (gpu_display_) {
|
||||
VLOG(3) << "Perform copy to GPUDisplay work.";
|
||||
|
||||
const int resolution_divider = render_work.resolution_divider;
|
||||
const int texture_width = max(1, full_params_.width / resolution_divider);
|
||||
const int texture_height = max(1, full_params_.height / resolution_divider);
|
||||
if (!display_->update_begin(texture_width, texture_height)) {
|
||||
if (!gpu_display_->update_begin(texture_width, texture_height)) {
|
||||
LOG(ERROR) << "Error beginning GPUDisplay update.";
|
||||
return;
|
||||
}
|
||||
@@ -632,10 +600,10 @@ void PathTrace::update_display(const RenderWork &render_work)
|
||||
* all works in parallel. */
|
||||
const int num_samples = get_num_samples_in_buffer();
|
||||
for (auto &&path_trace_work : path_trace_works_) {
|
||||
path_trace_work->copy_to_display(display_.get(), pass_mode, num_samples);
|
||||
path_trace_work->copy_to_gpu_display(gpu_display_.get(), pass_mode, num_samples);
|
||||
}
|
||||
|
||||
display_->update_end();
|
||||
gpu_display_->update_end();
|
||||
}
|
||||
|
||||
render_scheduler_.report_display_update_time(render_work, time_dt() - start_time);
|
||||
@@ -785,26 +753,20 @@ bool PathTrace::is_cancel_requested()
|
||||
|
||||
void PathTrace::tile_buffer_write()
|
||||
{
|
||||
if (!output_driver_) {
|
||||
if (!tile_buffer_write_cb) {
|
||||
return;
|
||||
}
|
||||
|
||||
PathTraceTile tile(*this);
|
||||
output_driver_->write_render_tile(tile);
|
||||
tile_buffer_write_cb();
|
||||
}
|
||||
|
||||
void PathTrace::tile_buffer_read()
|
||||
{
|
||||
if (!device_scene_->data.bake.use) {
|
||||
if (!tile_buffer_read_cb) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!output_driver_) {
|
||||
return;
|
||||
}
|
||||
|
||||
PathTraceTile tile(*this);
|
||||
if (output_driver_->read_render_tile(tile)) {
|
||||
if (tile_buffer_read_cb()) {
|
||||
tbb::parallel_for_each(path_trace_works_, [](unique_ptr<PathTraceWork> &path_trace_work) {
|
||||
path_trace_work->copy_render_buffers_to_device();
|
||||
});
|
||||
@@ -839,7 +801,7 @@ void PathTrace::tile_buffer_write_to_disk()
|
||||
}
|
||||
|
||||
if (!tile_manager_.write_tile(*buffers)) {
|
||||
device_->set_error("Error writing tile to file");
|
||||
LOG(ERROR) << "Error writing tile to file.";
|
||||
}
|
||||
}
|
||||
|
||||
@@ -932,14 +894,7 @@ void PathTrace::process_full_buffer_from_disk(string_view filename)
|
||||
|
||||
DenoiseParams denoise_params;
|
||||
if (!tile_manager_.read_full_buffer_from_disk(filename, &full_frame_buffers, &denoise_params)) {
|
||||
const string error_message = "Error reading tiles from file";
|
||||
if (progress_) {
|
||||
progress_->set_error(error_message);
|
||||
progress_->set_cancel(error_message);
|
||||
}
|
||||
else {
|
||||
LOG(ERROR) << error_message;
|
||||
}
|
||||
LOG(ERROR) << "Error reading tiles from file.";
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1025,12 +980,12 @@ bool PathTrace::set_render_tile_pixels(PassAccessor &pass_accessor,
|
||||
int2 PathTrace::get_render_tile_size() const
|
||||
{
|
||||
if (full_frame_state_.render_buffers) {
|
||||
return make_int2(full_frame_state_.render_buffers->params.window_width,
|
||||
full_frame_state_.render_buffers->params.window_height);
|
||||
return make_int2(full_frame_state_.render_buffers->params.width,
|
||||
full_frame_state_.render_buffers->params.height);
|
||||
}
|
||||
|
||||
const Tile &tile = tile_manager_.get_current_tile();
|
||||
return make_int2(tile.window_width, tile.window_height);
|
||||
return make_int2(tile.width, tile.height);
|
||||
}
|
||||
|
||||
int2 PathTrace::get_render_tile_offset() const
|
||||
@@ -1040,12 +995,7 @@ int2 PathTrace::get_render_tile_offset() const
|
||||
}
|
||||
|
||||
const Tile &tile = tile_manager_.get_current_tile();
|
||||
return make_int2(tile.x + tile.window_x, tile.y + tile.window_y);
|
||||
}
|
||||
|
||||
int2 PathTrace::get_render_size() const
|
||||
{
|
||||
return tile_manager_.get_size();
|
||||
return make_int2(tile.x, tile.y);
|
||||
}
|
||||
|
||||
const BufferParams &PathTrace::get_render_tile_params() const
|
||||
@@ -1078,8 +1028,6 @@ static const char *device_type_for_description(const DeviceType type)
|
||||
return "CUDA";
|
||||
case DEVICE_OPTIX:
|
||||
return "OptiX";
|
||||
case DEVICE_HIP:
|
||||
return "HIP";
|
||||
case DEVICE_DUMMY:
|
||||
return "Dummy";
|
||||
case DEVICE_MULTI:
|
||||
|
@@ -31,14 +31,12 @@ CCL_NAMESPACE_BEGIN
|
||||
class AdaptiveSampling;
|
||||
class Device;
|
||||
class DeviceScene;
|
||||
class DisplayDriver;
|
||||
class Film;
|
||||
class RenderBuffers;
|
||||
class RenderScheduler;
|
||||
class RenderWork;
|
||||
class PathTraceDisplay;
|
||||
class OutputDriver;
|
||||
class Progress;
|
||||
class GPUDisplay;
|
||||
class TileManager;
|
||||
|
||||
/* PathTrace class takes care of kernel graph and scheduling on a (multi)device. It takes care of
|
||||
@@ -100,16 +98,13 @@ class PathTrace {
|
||||
* Use this to configure the adaptive sampler before rendering any samples. */
|
||||
void set_adaptive_sampling(const AdaptiveSampling &adaptive_sampling);
|
||||
|
||||
/* Sets output driver for render buffer output. */
|
||||
void set_output_driver(unique_ptr<OutputDriver> driver);
|
||||
/* Set GPU display which takes care of drawing the render result. */
|
||||
void set_gpu_display(unique_ptr<GPUDisplay> gpu_display);
|
||||
|
||||
/* Set display driver for interactive render buffer display. */
|
||||
void set_display_driver(unique_ptr<DisplayDriver> driver);
|
||||
/* Clear the GPU display by filling it in with all zeroes. */
|
||||
void clear_gpu_display();
|
||||
|
||||
/* Clear the display buffer by filling it in with all zeroes. */
|
||||
void clear_display();
|
||||
|
||||
/* Perform drawing of the current state of the DisplayDriver. */
|
||||
/* Perform drawing of the current state of the GPUDisplay. */
|
||||
void draw();
|
||||
|
||||
/* Cancel rendering process as soon as possible, without waiting for full tile to be sampled.
|
||||
@@ -162,7 +157,6 @@ class PathTrace {
|
||||
* instead. */
|
||||
int2 get_render_tile_size() const;
|
||||
int2 get_render_tile_offset() const;
|
||||
int2 get_render_size() const;
|
||||
|
||||
/* Get buffer parameters of the current tile.
|
||||
*
|
||||
@@ -174,6 +168,18 @@ class PathTrace {
|
||||
* times, and so on. */
|
||||
string full_report() const;
|
||||
|
||||
/* Callback which communicates an updates state of the render buffer of the current big tile.
|
||||
* Is called during path tracing to communicate work-in-progress state of the final buffer. */
|
||||
function<void(void)> tile_buffer_update_cb;
|
||||
|
||||
/* Callback which communicates final rendered buffer. Is called after path-tracing is done. */
|
||||
function<void(void)> tile_buffer_write_cb;
|
||||
|
||||
/* Callback which initializes rendered buffer. Is called before path-tracing starts.
|
||||
*
|
||||
* This is used for baking. */
|
||||
function<bool(void)> tile_buffer_read_cb;
|
||||
|
||||
/* Callback which is called to report current rendering progress.
|
||||
*
|
||||
* It is supposed to be cheaper than buffer update/write, hence can be called more often.
|
||||
@@ -246,11 +252,7 @@ class PathTrace {
|
||||
RenderScheduler &render_scheduler_;
|
||||
TileManager &tile_manager_;
|
||||
|
||||
/* Display driver for interactive render buffer display. */
|
||||
unique_ptr<PathTraceDisplay> display_;
|
||||
|
||||
/* Output driver to write render buffer to. */
|
||||
unique_ptr<OutputDriver> output_driver_;
|
||||
unique_ptr<GPUDisplay> gpu_display_;
|
||||
|
||||
/* Per-compute device descriptors of work which is responsible for path tracing on its configured
|
||||
* device. */
|
||||
@@ -284,7 +286,7 @@ class PathTrace {
|
||||
/* Parameters of the big tile with the current resolution divider applied. */
|
||||
BufferParams effective_big_tile_params;
|
||||
|
||||
/* Denoiser was run and there are denoised versions of the passes in the render buffers. */
|
||||
/* Denosier was run and there are denoised versions of the passes in the render buffers. */
|
||||
bool has_denoised_result = false;
|
||||
|
||||
/* Current tile has been written (to either disk or callback.
|
||||
|
@@ -1,107 +0,0 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "integrator/path_trace_tile.h"
|
||||
#include "integrator/pass_accessor_cpu.h"
|
||||
#include "integrator/path_trace.h"
|
||||
|
||||
#include "render/buffers.h"
|
||||
#include "render/film.h"
|
||||
#include "render/pass.h"
|
||||
#include "render/scene.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
PathTraceTile::PathTraceTile(PathTrace &path_trace)
|
||||
: OutputDriver::Tile(path_trace.get_render_tile_offset(),
|
||||
path_trace.get_render_tile_size(),
|
||||
path_trace.get_render_size(),
|
||||
path_trace.get_render_tile_params().layer,
|
||||
path_trace.get_render_tile_params().view),
|
||||
path_trace_(path_trace),
|
||||
copied_from_device_(false)
|
||||
{
|
||||
}
|
||||
|
||||
bool PathTraceTile::get_pass_pixels(const string_view pass_name,
|
||||
const int num_channels,
|
||||
float *pixels) const
|
||||
{
|
||||
/* NOTE: The code relies on a fact that session is fully update and no scene/buffer modification
|
||||
* is happening while this function runs. */
|
||||
|
||||
if (!copied_from_device_) {
|
||||
/* Copy from device on demand. */
|
||||
path_trace_.copy_render_tile_from_device();
|
||||
const_cast<PathTraceTile *>(this)->copied_from_device_ = true;
|
||||
}
|
||||
|
||||
const BufferParams &buffer_params = path_trace_.get_render_tile_params();
|
||||
|
||||
const BufferPass *pass = buffer_params.find_pass(pass_name);
|
||||
if (pass == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool has_denoised_result = path_trace_.has_denoised_result();
|
||||
if (pass->mode == PassMode::DENOISED && !has_denoised_result) {
|
||||
pass = buffer_params.find_pass(pass->type);
|
||||
if (pass == nullptr) {
|
||||
/* Happens when denoised result pass is requested but is never written by the kernel. */
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
pass = buffer_params.get_actual_display_pass(pass);
|
||||
|
||||
const float exposure = buffer_params.exposure;
|
||||
const int num_samples = path_trace_.get_num_render_tile_samples();
|
||||
|
||||
PassAccessor::PassAccessInfo pass_access_info(*pass);
|
||||
pass_access_info.use_approximate_shadow_catcher = buffer_params.use_approximate_shadow_catcher;
|
||||
pass_access_info.use_approximate_shadow_catcher_background =
|
||||
pass_access_info.use_approximate_shadow_catcher && !buffer_params.use_transparent_background;
|
||||
|
||||
const PassAccessorCPU pass_accessor(pass_access_info, exposure, num_samples);
|
||||
const PassAccessor::Destination destination(pixels, num_channels);
|
||||
|
||||
return path_trace_.get_render_tile_pixels(pass_accessor, destination);
|
||||
}
|
||||
|
||||
bool PathTraceTile::set_pass_pixels(const string_view pass_name,
|
||||
const int num_channels,
|
||||
const float *pixels) const
|
||||
{
|
||||
/* NOTE: The code relies on a fact that session is fully update and no scene/buffer modification
|
||||
* is happening while this function runs. */
|
||||
|
||||
const BufferParams &buffer_params = path_trace_.get_render_tile_params();
|
||||
const BufferPass *pass = buffer_params.find_pass(pass_name);
|
||||
if (!pass) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const float exposure = buffer_params.exposure;
|
||||
const int num_samples = 1;
|
||||
|
||||
const PassAccessor::PassAccessInfo pass_access_info(*pass);
|
||||
PassAccessorCPU pass_accessor(pass_access_info, exposure, num_samples);
|
||||
PassAccessor::Source source(pixels, num_channels);
|
||||
|
||||
return path_trace_.set_render_tile_pixels(pass_accessor, source);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
@@ -1,43 +0,0 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "render/output_driver.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* PathTraceTile
|
||||
*
|
||||
* Implementation of OutputDriver::Tile interface for path tracer. */
|
||||
|
||||
class PathTrace;
|
||||
|
||||
class PathTraceTile : public OutputDriver::Tile {
|
||||
public:
|
||||
PathTraceTile(PathTrace &path_trace);
|
||||
|
||||
bool get_pass_pixels(const string_view pass_name, const int num_channels, float *pixels) const;
|
||||
bool set_pass_pixels(const string_view pass_name,
|
||||
const int num_channels,
|
||||
const float *pixels) const;
|
||||
|
||||
private:
|
||||
PathTrace &path_trace_;
|
||||
bool copied_from_device_;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
@@ -16,12 +16,12 @@
|
||||
|
||||
#include "device/device.h"
|
||||
|
||||
#include "integrator/path_trace_display.h"
|
||||
#include "integrator/path_trace_work.h"
|
||||
#include "integrator/path_trace_work_cpu.h"
|
||||
#include "integrator/path_trace_work_gpu.h"
|
||||
#include "render/buffers.h"
|
||||
#include "render/film.h"
|
||||
#include "render/gpu_display.h"
|
||||
#include "render/scene.h"
|
||||
|
||||
#include "kernel/kernel_types.h"
|
||||
@@ -134,8 +134,7 @@ void PathTraceWork::copy_from_denoised_render_buffers(const RenderBuffers *rende
|
||||
bool PathTraceWork::get_render_tile_pixels(const PassAccessor &pass_accessor,
|
||||
const PassAccessor::Destination &destination)
|
||||
{
|
||||
const int offset_y = (effective_buffer_params_.full_y + effective_buffer_params_.window_y) -
|
||||
(effective_big_tile_params_.full_y + effective_big_tile_params_.window_y);
|
||||
const int offset_y = effective_buffer_params_.full_y - effective_big_tile_params_.full_y;
|
||||
const int width = effective_buffer_params_.width;
|
||||
|
||||
PassAccessor::Destination slice_destination = destination;
|
||||
@@ -186,16 +185,14 @@ PassAccessor::PassAccessInfo PathTraceWork::get_display_pass_access_info(PassMod
|
||||
return pass_access_info;
|
||||
}
|
||||
|
||||
PassAccessor::Destination PathTraceWork::get_display_destination_template(
|
||||
const PathTraceDisplay *display) const
|
||||
PassAccessor::Destination PathTraceWork::get_gpu_display_destination_template(
|
||||
const GPUDisplay *gpu_display) const
|
||||
{
|
||||
PassAccessor::Destination destination(film_->get_display_pass());
|
||||
|
||||
const int2 display_texture_size = display->get_texture_size();
|
||||
const int texture_x = effective_buffer_params_.full_x - effective_full_params_.full_x +
|
||||
effective_buffer_params_.window_x;
|
||||
const int texture_y = effective_buffer_params_.full_y - effective_full_params_.full_y +
|
||||
effective_buffer_params_.window_y;
|
||||
const int2 display_texture_size = gpu_display->get_texture_size();
|
||||
const int texture_x = effective_buffer_params_.full_x - effective_full_params_.full_x;
|
||||
const int texture_y = effective_buffer_params_.full_y - effective_full_params_.full_y;
|
||||
|
||||
destination.offset = texture_y * display_texture_size.x + texture_x;
|
||||
destination.stride = display_texture_size.x;
|
||||
|
@@ -28,7 +28,7 @@ class BufferParams;
|
||||
class Device;
|
||||
class DeviceScene;
|
||||
class Film;
|
||||
class PathTraceDisplay;
|
||||
class GPUDisplay;
|
||||
class RenderBuffers;
|
||||
|
||||
class PathTraceWork {
|
||||
@@ -83,9 +83,11 @@ class PathTraceWork {
|
||||
* noisy pass mode will be passed here when it is known that the buffer does not have denoised
|
||||
* passes yet (because denoiser did not run). If the denoised pass is requested and denoiser is
|
||||
* not used then this function will fall-back to the noisy pass instead. */
|
||||
virtual void copy_to_display(PathTraceDisplay *display, PassMode pass_mode, int num_samples) = 0;
|
||||
virtual void copy_to_gpu_display(GPUDisplay *gpu_display,
|
||||
PassMode pass_mode,
|
||||
int num_samples) = 0;
|
||||
|
||||
virtual void destroy_gpu_resources(PathTraceDisplay *display) = 0;
|
||||
virtual void destroy_gpu_resources(GPUDisplay *gpu_display) = 0;
|
||||
|
||||
/* Copy data from/to given render buffers.
|
||||
* Will copy pixels from a corresponding place (from multi-device point of view) of the render
|
||||
@@ -102,7 +104,7 @@ class PathTraceWork {
|
||||
* - Copies work's render buffer to its device. */
|
||||
void copy_from_render_buffers(const RenderBuffers *render_buffers);
|
||||
|
||||
/* Special version of the `copy_from_render_buffers()` which only copies denoised passes from the
|
||||
/* Special version of the `copy_from_render_buffers()` which only copies denosied passes from the
|
||||
* given render buffers, leaving rest of the passes.
|
||||
*
|
||||
* Same notes about device copying applies to this call as well. */
|
||||
@@ -160,8 +162,8 @@ class PathTraceWork {
|
||||
|
||||
/* Get destination which offset and stride are configured so that writing to it will write to a
|
||||
* proper location of GPU display texture, taking current tile and device slice into account. */
|
||||
PassAccessor::Destination get_display_destination_template(
|
||||
const PathTraceDisplay *display) const;
|
||||
PassAccessor::Destination get_gpu_display_destination_template(
|
||||
const GPUDisplay *gpu_display) const;
|
||||
|
||||
/* Device which will be used for path tracing.
|
||||
* Note that it is an actual render device (and never is a multi-device). */
|
||||
|
@@ -19,12 +19,10 @@
|
||||
#include "device/cpu/kernel.h"
|
||||
#include "device/device.h"
|
||||
|
||||
#include "kernel/kernel_path_state.h"
|
||||
|
||||
#include "integrator/pass_accessor_cpu.h"
|
||||
#include "integrator/path_trace_display.h"
|
||||
|
||||
#include "render/buffers.h"
|
||||
#include "render/gpu_display.h"
|
||||
#include "render/scene.h"
|
||||
|
||||
#include "util/util_atomic.h"
|
||||
@@ -118,17 +116,13 @@ void PathTraceWorkCPU::render_samples_full_pipeline(KernelGlobals *kernel_global
|
||||
const KernelWorkTile &work_tile,
|
||||
const int samples_num)
|
||||
{
|
||||
const bool has_shadow_catcher = device_scene_->data.integrator.has_shadow_catcher;
|
||||
const bool has_bake = device_scene_->data.bake.use;
|
||||
|
||||
IntegratorStateCPU integrator_states[2];
|
||||
IntegratorStateCPU integrator_states[2] = {};
|
||||
|
||||
IntegratorStateCPU *state = &integrator_states[0];
|
||||
IntegratorStateCPU *shadow_catcher_state = nullptr;
|
||||
|
||||
if (device_scene_->data.integrator.has_shadow_catcher) {
|
||||
shadow_catcher_state = &integrator_states[1];
|
||||
path_state_init_queues(kernel_globals, shadow_catcher_state);
|
||||
}
|
||||
IntegratorStateCPU *shadow_catcher_state = &integrator_states[1];
|
||||
|
||||
KernelWorkTile sample_work_tile = work_tile;
|
||||
float *render_buffer = buffers_->buffer.data();
|
||||
@@ -153,7 +147,7 @@ void PathTraceWorkCPU::render_samples_full_pipeline(KernelGlobals *kernel_global
|
||||
|
||||
kernels_.integrator_megakernel(kernel_globals, state, render_buffer);
|
||||
|
||||
if (shadow_catcher_state) {
|
||||
if (has_shadow_catcher) {
|
||||
kernels_.integrator_megakernel(kernel_globals, shadow_catcher_state, render_buffer);
|
||||
}
|
||||
|
||||
@@ -161,14 +155,14 @@ void PathTraceWorkCPU::render_samples_full_pipeline(KernelGlobals *kernel_global
|
||||
}
|
||||
}
|
||||
|
||||
void PathTraceWorkCPU::copy_to_display(PathTraceDisplay *display,
|
||||
PassMode pass_mode,
|
||||
int num_samples)
|
||||
void PathTraceWorkCPU::copy_to_gpu_display(GPUDisplay *gpu_display,
|
||||
PassMode pass_mode,
|
||||
int num_samples)
|
||||
{
|
||||
half4 *rgba_half = display->map_texture_buffer();
|
||||
half4 *rgba_half = gpu_display->map_texture_buffer();
|
||||
if (!rgba_half) {
|
||||
/* TODO(sergey): Look into using copy_to_display() if mapping failed. Might be needed for
|
||||
* some implementations of PathTraceDisplay which can not map memory? */
|
||||
/* TODO(sergey): Look into using copy_to_gpu_display() if mapping failed. Might be needed for
|
||||
* some implementations of GPUDisplay which can not map memory? */
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -178,7 +172,7 @@ void PathTraceWorkCPU::copy_to_display(PathTraceDisplay *display,
|
||||
|
||||
const PassAccessorCPU pass_accessor(pass_access_info, kfilm.exposure, num_samples);
|
||||
|
||||
PassAccessor::Destination destination = get_display_destination_template(display);
|
||||
PassAccessor::Destination destination = get_gpu_display_destination_template(gpu_display);
|
||||
destination.pixels_half_rgba = rgba_half;
|
||||
|
||||
tbb::task_arena local_arena = local_tbb_arena_create(device_);
|
||||
@@ -186,10 +180,10 @@ void PathTraceWorkCPU::copy_to_display(PathTraceDisplay *display,
|
||||
pass_accessor.get_render_tile_pixels(buffers_.get(), effective_buffer_params_, destination);
|
||||
});
|
||||
|
||||
display->unmap_texture_buffer();
|
||||
gpu_display->unmap_texture_buffer();
|
||||
}
|
||||
|
||||
void PathTraceWorkCPU::destroy_gpu_resources(PathTraceDisplay * /*display*/)
|
||||
void PathTraceWorkCPU::destroy_gpu_resources(GPUDisplay * /*gpu_display*/)
|
||||
{
|
||||
}
|
||||
|
||||
|
@@ -50,10 +50,10 @@ class PathTraceWorkCPU : public PathTraceWork {
|
||||
int start_sample,
|
||||
int samples_num) override;
|
||||
|
||||
virtual void copy_to_display(PathTraceDisplay *display,
|
||||
PassMode pass_mode,
|
||||
int num_samples) override;
|
||||
virtual void destroy_gpu_resources(PathTraceDisplay *display) override;
|
||||
virtual void copy_to_gpu_display(GPUDisplay *gpu_display,
|
||||
PassMode pass_mode,
|
||||
int num_samples) override;
|
||||
virtual void destroy_gpu_resources(GPUDisplay *gpu_display) override;
|
||||
|
||||
virtual bool copy_render_buffers_from_device() override;
|
||||
virtual bool copy_render_buffers_to_device() override;
|
||||
|
@@ -15,15 +15,14 @@
|
||||
*/
|
||||
|
||||
#include "integrator/path_trace_work_gpu.h"
|
||||
#include "integrator/path_trace_display.h"
|
||||
|
||||
#include "device/device.h"
|
||||
|
||||
#include "integrator/pass_accessor_gpu.h"
|
||||
#include "render/buffers.h"
|
||||
#include "render/gpu_display.h"
|
||||
#include "render/scene.h"
|
||||
#include "util/util_logging.h"
|
||||
#include "util/util_string.h"
|
||||
#include "util/util_tbb.h"
|
||||
#include "util/util_time.h"
|
||||
|
||||
@@ -31,38 +30,6 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
static size_t estimate_single_state_size()
|
||||
{
|
||||
size_t state_size = 0;
|
||||
|
||||
#define KERNEL_STRUCT_BEGIN(name) for (int array_index = 0;; array_index++) {
|
||||
#define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) state_size += sizeof(type);
|
||||
#define KERNEL_STRUCT_ARRAY_MEMBER(parent_struct, type, name, feature) state_size += sizeof(type);
|
||||
#define KERNEL_STRUCT_END(name) \
|
||||
break; \
|
||||
}
|
||||
#define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \
|
||||
if (array_index >= gpu_array_size - 1) { \
|
||||
break; \
|
||||
} \
|
||||
}
|
||||
/* TODO(sergey): Look into better estimation for fields which depend on scene features. Maybe
|
||||
* maximum state calculation should happen as `alloc_work_memory()`, so that we can react to an
|
||||
* updated scene state here.
|
||||
* For until then use common value. Currently this size is only used for logging, but is weak to
|
||||
* rely on this. */
|
||||
#define KERNEL_STRUCT_VOLUME_STACK_SIZE 4
|
||||
#include "kernel/integrator/integrator_state_template.h"
|
||||
#undef KERNEL_STRUCT_BEGIN
|
||||
#undef KERNEL_STRUCT_MEMBER
|
||||
#undef KERNEL_STRUCT_ARRAY_MEMBER
|
||||
#undef KERNEL_STRUCT_END
|
||||
#undef KERNEL_STRUCT_END_ARRAY
|
||||
#undef KERNEL_STRUCT_VOLUME_STACK_SIZE
|
||||
|
||||
return state_size;
|
||||
}
|
||||
|
||||
PathTraceWorkGPU::PathTraceWorkGPU(Device *device,
|
||||
Film *film,
|
||||
DeviceScene *device_scene,
|
||||
@@ -79,8 +46,8 @@ PathTraceWorkGPU::PathTraceWorkGPU(Device *device,
|
||||
queued_paths_(device, "queued_paths", MEM_READ_WRITE),
|
||||
num_queued_paths_(device, "num_queued_paths", MEM_READ_WRITE),
|
||||
work_tiles_(device, "work_tiles", MEM_READ_WRITE),
|
||||
display_rgba_half_(device, "display buffer half", MEM_READ_WRITE),
|
||||
max_num_paths_(queue_->num_concurrent_states(estimate_single_state_size())),
|
||||
gpu_display_rgba_half_(device, "display buffer half", MEM_READ_WRITE),
|
||||
max_num_paths_(queue_->num_concurrent_states(sizeof(IntegratorStateCPU))),
|
||||
min_num_active_paths_(queue_->num_concurrent_busy_states()),
|
||||
max_active_path_index_(0)
|
||||
{
|
||||
@@ -128,28 +95,17 @@ void PathTraceWorkGPU::alloc_integrator_soa()
|
||||
#define KERNEL_STRUCT_END(name) \
|
||||
break; \
|
||||
}
|
||||
#define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \
|
||||
if (array_index >= gpu_array_size - 1) { \
|
||||
#define KERNEL_STRUCT_END_ARRAY(name, array_size) \
|
||||
if (array_index == array_size - 1) { \
|
||||
break; \
|
||||
} \
|
||||
}
|
||||
#define KERNEL_STRUCT_VOLUME_STACK_SIZE (device_scene_->data.volume_stack_size)
|
||||
#include "kernel/integrator/integrator_state_template.h"
|
||||
#undef KERNEL_STRUCT_BEGIN
|
||||
#undef KERNEL_STRUCT_MEMBER
|
||||
#undef KERNEL_STRUCT_ARRAY_MEMBER
|
||||
#undef KERNEL_STRUCT_END
|
||||
#undef KERNEL_STRUCT_END_ARRAY
|
||||
#undef KERNEL_STRUCT_VOLUME_STACK_SIZE
|
||||
|
||||
if (VLOG_IS_ON(3)) {
|
||||
size_t total_soa_size = 0;
|
||||
for (auto &&soa_memory : integrator_state_soa_) {
|
||||
total_soa_size += soa_memory->memory_size();
|
||||
}
|
||||
|
||||
VLOG(3) << "GPU SoA state size: " << string_human_readable_size(total_soa_size);
|
||||
}
|
||||
}
|
||||
|
||||
void PathTraceWorkGPU::alloc_integrator_queue()
|
||||
@@ -696,7 +652,7 @@ int PathTraceWorkGPU::get_num_active_paths()
|
||||
bool PathTraceWorkGPU::should_use_graphics_interop()
|
||||
{
|
||||
/* There are few aspects with the graphics interop when using multiple devices caused by the fact
|
||||
* that the PathTraceDisplay has a single texture:
|
||||
* that the GPUDisplay has a single texture:
|
||||
*
|
||||
* CUDA will return `CUDA_ERROR_NOT_SUPPORTED` from `cuGraphicsGLRegisterBuffer()` when
|
||||
* attempting to register OpenGL PBO which has been mapped. Which makes sense, because
|
||||
@@ -722,9 +678,9 @@ bool PathTraceWorkGPU::should_use_graphics_interop()
|
||||
return interop_use_;
|
||||
}
|
||||
|
||||
void PathTraceWorkGPU::copy_to_display(PathTraceDisplay *display,
|
||||
PassMode pass_mode,
|
||||
int num_samples)
|
||||
void PathTraceWorkGPU::copy_to_gpu_display(GPUDisplay *gpu_display,
|
||||
PassMode pass_mode,
|
||||
int num_samples)
|
||||
{
|
||||
if (device_->have_error()) {
|
||||
/* Don't attempt to update GPU display if the device has errors: the error state will make
|
||||
@@ -738,7 +694,7 @@ void PathTraceWorkGPU::copy_to_display(PathTraceDisplay *display,
|
||||
}
|
||||
|
||||
if (should_use_graphics_interop()) {
|
||||
if (copy_to_display_interop(display, pass_mode, num_samples)) {
|
||||
if (copy_to_gpu_display_interop(gpu_display, pass_mode, num_samples)) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -747,64 +703,65 @@ void PathTraceWorkGPU::copy_to_display(PathTraceDisplay *display,
|
||||
interop_use_ = false;
|
||||
}
|
||||
|
||||
copy_to_display_naive(display, pass_mode, num_samples);
|
||||
copy_to_gpu_display_naive(gpu_display, pass_mode, num_samples);
|
||||
}
|
||||
|
||||
void PathTraceWorkGPU::copy_to_display_naive(PathTraceDisplay *display,
|
||||
PassMode pass_mode,
|
||||
int num_samples)
|
||||
void PathTraceWorkGPU::copy_to_gpu_display_naive(GPUDisplay *gpu_display,
|
||||
PassMode pass_mode,
|
||||
int num_samples)
|
||||
{
|
||||
const int full_x = effective_buffer_params_.full_x;
|
||||
const int full_y = effective_buffer_params_.full_y;
|
||||
const int width = effective_buffer_params_.window_width;
|
||||
const int height = effective_buffer_params_.window_height;
|
||||
const int final_width = buffers_->params.window_width;
|
||||
const int final_height = buffers_->params.window_height;
|
||||
const int width = effective_buffer_params_.width;
|
||||
const int height = effective_buffer_params_.height;
|
||||
const int final_width = buffers_->params.width;
|
||||
const int final_height = buffers_->params.height;
|
||||
|
||||
const int texture_x = full_x - effective_full_params_.full_x + effective_buffer_params_.window_x;
|
||||
const int texture_y = full_y - effective_full_params_.full_y + effective_buffer_params_.window_y;
|
||||
const int texture_x = full_x - effective_full_params_.full_x;
|
||||
const int texture_y = full_y - effective_full_params_.full_y;
|
||||
|
||||
/* Re-allocate display memory if needed, and make sure the device pointer is allocated.
|
||||
*
|
||||
* NOTE: allocation happens to the final resolution so that no re-allocation happens on every
|
||||
* change of the resolution divider. However, if the display becomes smaller, shrink the
|
||||
* allocated memory as well. */
|
||||
if (display_rgba_half_.data_width != final_width ||
|
||||
display_rgba_half_.data_height != final_height) {
|
||||
display_rgba_half_.alloc(final_width, final_height);
|
||||
if (gpu_display_rgba_half_.data_width != final_width ||
|
||||
gpu_display_rgba_half_.data_height != final_height) {
|
||||
gpu_display_rgba_half_.alloc(final_width, final_height);
|
||||
/* TODO(sergey): There should be a way to make sure device-side memory is allocated without
|
||||
* transferring zeroes to the device. */
|
||||
queue_->zero_to_device(display_rgba_half_);
|
||||
queue_->zero_to_device(gpu_display_rgba_half_);
|
||||
}
|
||||
|
||||
PassAccessor::Destination destination(film_->get_display_pass());
|
||||
destination.d_pixels_half_rgba = display_rgba_half_.device_pointer;
|
||||
destination.d_pixels_half_rgba = gpu_display_rgba_half_.device_pointer;
|
||||
|
||||
get_render_tile_film_pixels(destination, pass_mode, num_samples);
|
||||
|
||||
queue_->copy_from_device(display_rgba_half_);
|
||||
queue_->synchronize();
|
||||
gpu_display_rgba_half_.copy_from_device();
|
||||
|
||||
display->copy_pixels_to_texture(display_rgba_half_.data(), texture_x, texture_y, width, height);
|
||||
gpu_display->copy_pixels_to_texture(
|
||||
gpu_display_rgba_half_.data(), texture_x, texture_y, width, height);
|
||||
}
|
||||
|
||||
bool PathTraceWorkGPU::copy_to_display_interop(PathTraceDisplay *display,
|
||||
PassMode pass_mode,
|
||||
int num_samples)
|
||||
bool PathTraceWorkGPU::copy_to_gpu_display_interop(GPUDisplay *gpu_display,
|
||||
PassMode pass_mode,
|
||||
int num_samples)
|
||||
{
|
||||
if (!device_graphics_interop_) {
|
||||
device_graphics_interop_ = queue_->graphics_interop_create();
|
||||
}
|
||||
|
||||
const DisplayDriver::GraphicsInterop graphics_interop_dst = display->graphics_interop_get();
|
||||
device_graphics_interop_->set_display_interop(graphics_interop_dst);
|
||||
const DeviceGraphicsInteropDestination graphics_interop_dst =
|
||||
gpu_display->graphics_interop_get();
|
||||
device_graphics_interop_->set_destination(graphics_interop_dst);
|
||||
|
||||
const device_ptr d_rgba_half = device_graphics_interop_->map();
|
||||
if (!d_rgba_half) {
|
||||
return false;
|
||||
}
|
||||
|
||||
PassAccessor::Destination destination = get_display_destination_template(display);
|
||||
PassAccessor::Destination destination = get_gpu_display_destination_template(gpu_display);
|
||||
destination.d_pixels_half_rgba = d_rgba_half;
|
||||
|
||||
get_render_tile_film_pixels(destination, pass_mode, num_samples);
|
||||
@@ -814,14 +771,14 @@ bool PathTraceWorkGPU::copy_to_display_interop(PathTraceDisplay *display,
|
||||
return true;
|
||||
}
|
||||
|
||||
void PathTraceWorkGPU::destroy_gpu_resources(PathTraceDisplay *display)
|
||||
void PathTraceWorkGPU::destroy_gpu_resources(GPUDisplay *gpu_display)
|
||||
{
|
||||
if (!device_graphics_interop_) {
|
||||
return;
|
||||
}
|
||||
display->graphics_interop_activate();
|
||||
gpu_display->graphics_interop_activate();
|
||||
device_graphics_interop_ = nullptr;
|
||||
display->graphics_interop_deactivate();
|
||||
gpu_display->graphics_interop_deactivate();
|
||||
}
|
||||
|
||||
void PathTraceWorkGPU::get_render_tile_film_pixels(const PassAccessor::Destination &destination,
|
||||
|
@@ -48,10 +48,10 @@ class PathTraceWorkGPU : public PathTraceWork {
|
||||
int start_sample,
|
||||
int samples_num) override;
|
||||
|
||||
virtual void copy_to_display(PathTraceDisplay *display,
|
||||
PassMode pass_mode,
|
||||
int num_samples) override;
|
||||
virtual void destroy_gpu_resources(PathTraceDisplay *display) override;
|
||||
virtual void copy_to_gpu_display(GPUDisplay *gpu_display,
|
||||
PassMode pass_mode,
|
||||
int num_samples) override;
|
||||
virtual void destroy_gpu_resources(GPUDisplay *gpu_display) override;
|
||||
|
||||
virtual bool copy_render_buffers_from_device() override;
|
||||
virtual bool copy_render_buffers_to_device() override;
|
||||
@@ -88,16 +88,16 @@ class PathTraceWorkGPU : public PathTraceWork {
|
||||
|
||||
int get_num_active_paths();
|
||||
|
||||
/* Check whether graphics interop can be used for the PathTraceDisplay update. */
|
||||
/* Check whether graphics interop can be used for the GPUDisplay update. */
|
||||
bool should_use_graphics_interop();
|
||||
|
||||
/* Naive implementation of the `copy_to_display()` which performs film conversion on the
|
||||
* device, then copies pixels to the host and pushes them to the `display`. */
|
||||
void copy_to_display_naive(PathTraceDisplay *display, PassMode pass_mode, int num_samples);
|
||||
/* Naive implementation of the `copy_to_gpu_display()` which performs film conversion on the
|
||||
* device, then copies pixels to the host and pushes them to the `gpu_display`. */
|
||||
void copy_to_gpu_display_naive(GPUDisplay *gpu_display, PassMode pass_mode, int num_samples);
|
||||
|
||||
/* Implementation of `copy_to_display()` which uses driver's OpenGL/GPU interoperability
|
||||
/* Implementation of `copy_to_gpu_display()` which uses driver's OpenGL/GPU interoperability
|
||||
* functionality, avoiding copy of pixels to the host. */
|
||||
bool copy_to_display_interop(PathTraceDisplay *display, PassMode pass_mode, int num_samples);
|
||||
bool copy_to_gpu_display_interop(GPUDisplay *gpu_display, PassMode pass_mode, int num_samples);
|
||||
|
||||
/* Synchronously run film conversion kernel and store display result in the given destination. */
|
||||
void get_render_tile_film_pixels(const PassAccessor::Destination &destination,
|
||||
@@ -139,9 +139,9 @@ class PathTraceWorkGPU : public PathTraceWork {
|
||||
/* Temporary buffer for passing work tiles to kernel. */
|
||||
device_vector<KernelWorkTile> work_tiles_;
|
||||
|
||||
/* Temporary buffer used by the copy_to_display() whenever graphics interoperability is not
|
||||
/* Temporary buffer used by the copy_to_gpu_display() whenever graphics interoperability is not
|
||||
* available. Is allocated on-demand. */
|
||||
device_vector<half4> display_rgba_half_;
|
||||
device_vector<half4> gpu_display_rgba_half_;
|
||||
|
||||
unique_ptr<DeviceGraphicsInterop> device_graphics_interop_;
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user