Compare commits
232 Commits
tmp-eevee-
...
temp-sculp
Author | SHA1 | Date | |
---|---|---|---|
a82f633ebb | |||
5a184f2ff0 | |||
4a75d4b581 | |||
13a5f5c926 | |||
ce2b6a5b44 | |||
![]() |
2c22895ee6 | ||
4fc62cf0f1 | |||
ed40f76fe2 | |||
8f56c7835e | |||
3e8c82fb68 | |||
b352d9986e | |||
8be7d35212 | |||
6136c39b56 | |||
760665e88b | |||
70897a4fea | |||
70d05ccd01 | |||
c1fd0fa079 | |||
6495e36779 | |||
66c7106672 | |||
8d8960001c | |||
95da6fddcb | |||
5fba2a156b | |||
c58e60af70 | |||
0f3a8ff8f3 | |||
3aff175c27 | |||
a6861475b6 | |||
02576a8b8e | |||
f5031dc7f4 | |||
fa55c85176 | |||
df37a60c62 | |||
f7e4c3cb69 | |||
909daaf629 | |||
28e9e40a8d | |||
c5b0eba85b | |||
52a31a8ca9 | |||
d3f14bda16 | |||
3abceef389 | |||
ed334e642f | |||
ef04a4262e | |||
6aa1b5d031 | |||
5c994d7846 | |||
53b057aa09 | |||
e817cff009 | |||
41ddd3d732 | |||
f5552d759c | |||
f01bf82480 | |||
8d9d16fb53 | |||
349350b304 | |||
cb5318b651 | |||
bd6b0bac88 | |||
3002670332 | |||
f086cf3cea | |||
2609ca2b8e | |||
![]() |
7e8153b07d | ||
622cad7073 | |||
44daeaae7d | |||
db8b5a2316 | |||
dbca0cc9d5 | |||
e4f77c1a6c | |||
![]() |
e27c89c7c7 | ||
af5706c960 | |||
a99022e22d | |||
d5af895419 | |||
![]() |
8703db393b | ||
![]() |
f152159101 | ||
d3500c482f | |||
3a1583972a | |||
6dcfb6df9c | |||
b0b9e746fa | |||
8be3fcab7e | |||
d20f992322 | |||
a38d99e0b2 | |||
75db4c082b | |||
![]() |
2d994de77c | ||
961d99d3a4 | |||
430cc9d7bf | |||
a3551ed878 | |||
288b13b252 | |||
85b2bce037 | |||
c7b601c79e | |||
81f8d74f6d | |||
9c14039a8f | |||
39e0bbfa55 | |||
8fe4f3b756 | |||
![]() |
a86f657692 | ||
8adebaeb7c | |||
773a36d2f8 | |||
cc623ee7b0 | |||
deaddbdcff | |||
f2538c7173 | |||
4bd3b02984 | |||
7beb487e9a | |||
9ad3a85f8b | |||
![]() |
654e1e901b | ||
![]() |
46c9f7702a | ||
![]() |
be0912a402 | ||
![]() |
0a3df611e7 | ||
![]() |
6d297c35c8 | ||
329eeacc66 | |||
2627635ff3 | |||
d6b6050e5b | |||
731c3efd97 | |||
9f5c17f4af | |||
4fcc9f5e7e | |||
7de1a4d1d8 | |||
d3949a4fdb | |||
b642dc7bc7 | |||
501352ef05 | |||
3dd3a0f02e | |||
3ca4ec3857 | |||
62af8ae57b | |||
bddb691dfc | |||
a489b73a38 | |||
a238533550 | |||
4faa5e30a5 | |||
2157115fa6 | |||
8931df291a | |||
d15324f1c3 | |||
90b633967b | |||
e9b573e11d | |||
ce56101897 | |||
733a764b07 | |||
999ba46d11 | |||
1fa19af508 | |||
997ff01503 | |||
88f2350c34 | |||
33a1472d4e | |||
7a6e2d1e39 | |||
f8afdc971f | |||
9a8c4aefb3 | |||
af4048f2b8 | |||
eecd4b69f2 | |||
12c5ae7e06 | |||
852a9ffa3d | |||
893565268d | |||
![]() |
00113d6888 | ||
9826204157 | |||
295dc61334 | |||
fc4c8a3647 | |||
2e4ed11dd3 | |||
f46b932c59 | |||
d67cf5d288 | |||
0a16677741 | |||
f37cbba8f1 | |||
e2449adfcd | |||
63e630b703 | |||
c2499366f2 | |||
b30e20d0d9 | |||
![]() |
51976a9e4c | ||
![]() |
6749cec22f | ||
![]() |
f5877da993 | ||
42dd5cc430 | |||
e16f473523 | |||
a97e2fd4e6 | |||
b0e48cb936 | |||
dc14353791 | |||
03218f7b45 | |||
472762f0dc | |||
6868d7b74a | |||
a3c4e28df9 | |||
![]() |
444ce7c3b7 | ||
![]() |
24044c64ba | ||
![]() |
9dd0899298 | ||
0783789a95 | |||
7e1898cfa7 | |||
a67f703ff3 | |||
a504c9a69e | |||
e86612d442 | |||
63d4f862ce | |||
8a19fcc500 | |||
ed4e04a1f6 | |||
96e93cc70c | |||
85dc681fc3 | |||
e442a17587 | |||
f4e4a0b5b3 | |||
dd669dd703 | |||
adf4dd8355 | |||
![]() |
9b36b846f0 | ||
![]() |
9d74da3352 | ||
a72f590d51 | |||
![]() |
6b590a86a6 | ||
48527a5c2d | |||
![]() |
94e761e263 | ||
85cec1deff | |||
165daf0bc6 | |||
aed5a62db4 | |||
0c1b536c57 | |||
903c25f097 | |||
6e2fc39eb7 | |||
f2f1bbdea3 | |||
96dfc8be8e | |||
4d57da0dd0 | |||
9eb885d029 | |||
e2376eceb5 | |||
f9947b50c6 | |||
812de962a3 | |||
8bea9693df | |||
035e040ee2 | |||
da33e57e39 | |||
6ee2f9dc15 | |||
453a0c430b | |||
94a35be532 | |||
4efef2fccd | |||
a80e4f4bc9 | |||
8e40e6400a | |||
9f1560a9a4 | |||
dce36e3e80 | |||
be9bac1f0d | |||
0f06080bc3 | |||
b5d4fbb9cc | |||
431d527679 | |||
![]() |
d5dff81cf6 | ||
![]() |
c619c86662 | ||
![]() |
dba0a2ad9a | ||
![]() |
2a90fc497b | ||
![]() |
36673016e9 | ||
30e9c5b7f5 | |||
6e82f6bf2f | |||
f6f05740be | |||
a873f7299a | |||
9f03d4bc2c | |||
dc2ac546cd | |||
a3f23a321e | |||
ddecc6166b | |||
9155608875 | |||
415ed999ab | |||
cfab4f432c | |||
4ac1f59a81 | |||
1ed9df3763 | |||
61e591b2a5 | |||
c29795452c | |||
9980fd0b8e |
3
.github/pull_request_template.md
vendored
3
.github/pull_request_template.md
vendored
@@ -1,5 +1,4 @@
|
||||
This repository is only used as a mirror of git.blender.org. Blender development happens on
|
||||
https://developer.blender.org.
|
||||
This repository is only used as a mirror. Blender development happens on projects.blender.org.
|
||||
|
||||
To get started with contributing code, please see:
|
||||
https://wiki.blender.org/wiki/Process/Contributing_Code
|
||||
|
3
.github/stale.yml
vendored
3
.github/stale.yml
vendored
@@ -15,8 +15,7 @@ staleLabel: stale
|
||||
# Comment to post when closing a stale Issue or Pull Request.
|
||||
closeComment: >
|
||||
This issue has been automatically closed, because this repository is only
|
||||
used as a mirror of git.blender.org. Blender development happens on
|
||||
developer.blender.org.
|
||||
used as a mirror. Blender development happens on projects.blender.org.
|
||||
|
||||
To get started contributing code, please read:
|
||||
https://wiki.blender.org/wiki/Process/Contributing_Code
|
||||
|
12
.gitmodules
vendored
12
.gitmodules
vendored
@@ -1,20 +1,16 @@
|
||||
[submodule "release/scripts/addons"]
|
||||
path = release/scripts/addons
|
||||
url = ../blender-addons.git
|
||||
branch = master
|
||||
ignore = all
|
||||
branch = main
|
||||
[submodule "release/scripts/addons_contrib"]
|
||||
path = release/scripts/addons_contrib
|
||||
url = ../blender-addons-contrib.git
|
||||
branch = master
|
||||
ignore = all
|
||||
branch = main
|
||||
[submodule "release/datafiles/locale"]
|
||||
path = release/datafiles/locale
|
||||
url = ../blender-translations.git
|
||||
branch = master
|
||||
ignore = all
|
||||
branch = main
|
||||
[submodule "source/tools"]
|
||||
path = source/tools
|
||||
url = ../blender-dev-tools.git
|
||||
branch = master
|
||||
ignore = all
|
||||
branch = main
|
||||
|
@@ -24,7 +24,7 @@ Development
|
||||
-----------
|
||||
|
||||
- [Build Instructions](https://wiki.blender.org/wiki/Building_Blender)
|
||||
- [Code Review & Bug Tracker](https://developer.blender.org)
|
||||
- [Code Review & Bug Tracker](https://projects.blender.org)
|
||||
- [Developer Forum](https://devtalk.blender.org)
|
||||
- [Developer Documentation](https://wiki.blender.org)
|
||||
|
||||
|
@@ -40,7 +40,8 @@ ExternalProject_Add(external_igc_llvm
|
||||
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0004-OpenCL-support-cl_ext_float_atomics.patch &&
|
||||
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0005-OpenCL-Add-cl_khr_integer_dot_product.patch &&
|
||||
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0001-Memory-leak-fix-for-Managed-Static-Mutex.patch &&
|
||||
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0002-Remove-repo-name-in-LLVM-IR.patch
|
||||
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0002-Remove-repo-name-in-LLVM-IR.patch &&
|
||||
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0003-Add-missing-include-limit-in-benchmark.patch
|
||||
)
|
||||
add_dependencies(
|
||||
external_igc_llvm
|
||||
@@ -55,9 +56,6 @@ ExternalProject_Add(external_igc_spirv_translator
|
||||
CONFIGURE_COMMAND echo .
|
||||
BUILD_COMMAND echo .
|
||||
INSTALL_COMMAND echo .
|
||||
PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${IGC_SPIRV_TRANSLATOR_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/spirv/0001-update-SPIR-V-headers-for-SPV_INTEL_split_barrier.patch &&
|
||||
${PATCH_CMD} -p 1 -d ${IGC_SPIRV_TRANSLATOR_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/spirv/0002-Add-support-for-split-barriers-extension-SPV_INTEL_s.patch &&
|
||||
${PATCH_CMD} -p 1 -d ${IGC_SPIRV_TRANSLATOR_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/spirv/0003-Support-cl_bf16_conversions.patch
|
||||
)
|
||||
add_dependencies(
|
||||
external_igc_spirv_translator
|
||||
|
@@ -88,6 +88,19 @@ else()
|
||||
export LDFLAGS=${PYTHON_LDFLAGS} &&
|
||||
export PKG_CONFIG_PATH=${LIBDIR}/ffi/lib/pkgconfig)
|
||||
|
||||
# NOTE: untested on APPLE so far.
|
||||
if(NOT APPLE)
|
||||
set(PYTHON_CONFIGURE_EXTRA_ARGS
|
||||
${PYTHON_CONFIGURE_EXTRA_ARGS}
|
||||
# Used on most release Linux builds (Fedora for e.g.),
|
||||
# increases build times noticeably with the benefit of a modest speedup at runtime.
|
||||
--enable-optimizations
|
||||
# While LTO is OK when building on the same system, it's incompatible across GCC versions,
|
||||
# making it impractical for developers to build against, so keep it disabled.
|
||||
# `--with-lto`
|
||||
)
|
||||
endif()
|
||||
|
||||
ExternalProject_Add(external_python
|
||||
URL file://${PACKAGE_DIR}/${PYTHON_FILE}
|
||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||
|
@@ -668,9 +668,9 @@ set(SPIRV_HEADERS_FILE SPIR-V-Headers-${SPIRV_HEADERS_VERSION}.tar.gz)
|
||||
# compiler, the versions used are taken from the following location
|
||||
# https://github.com/intel/intel-graphics-compiler/releases
|
||||
|
||||
set(IGC_VERSION 1.0.12149.1)
|
||||
set(IGC_VERSION 1.0.13064.7)
|
||||
set(IGC_URI https://github.com/intel/intel-graphics-compiler/archive/refs/tags/igc-${IGC_VERSION}.tar.gz)
|
||||
set(IGC_HASH 44f67f24e3bc5130f9f062533abf8154782a9d0a992bc19b498639a8521ae836)
|
||||
set(IGC_HASH a929abd4cca2b293961ec0437ee4b3b2147bd3b2c8a3c423af78c0c359b2e5ae)
|
||||
set(IGC_HASH_TYPE SHA256)
|
||||
set(IGC_FILE igc-${IGC_VERSION}.tar.gz)
|
||||
|
||||
@@ -690,15 +690,15 @@ set(IGC_LLVM_FILE ${IGC_LLVM_VERSION}.tar.gz)
|
||||
#
|
||||
# WARNING WARNING WARNING
|
||||
|
||||
set(IGC_OPENCL_CLANG_VERSION 363a5262d8c7cff3fb28f3bdb5d85c8d7e91c1bb)
|
||||
set(IGC_OPENCL_CLANG_VERSION ee31812ea8b89d08c2918f045d11a19bd33525c5)
|
||||
set(IGC_OPENCL_CLANG_URI https://github.com/intel/opencl-clang/archive/${IGC_OPENCL_CLANG_VERSION}.tar.gz)
|
||||
set(IGC_OPENCL_CLANG_HASH aa8cf72bb239722ce8ce44f79413c6887ecc8ca18477dd520aa5c4809756da9a)
|
||||
set(IGC_OPENCL_CLANG_HASH 1db6735bbcfaa31e8a9ba39f121d6bafa806ea8919e9f56782d6aaa67771ddda)
|
||||
set(IGC_OPENCL_CLANG_HASH_TYPE SHA256)
|
||||
set(IGC_OPENCL_CLANG_FILE opencl-clang-${IGC_OPENCL_CLANG_VERSION}.tar.gz)
|
||||
|
||||
set(IGC_VCINTRINSICS_VERSION v0.5.0)
|
||||
set(IGC_VCINTRINSICS_VERSION v0.11.0)
|
||||
set(IGC_VCINTRINSICS_URI https://github.com/intel/vc-intrinsics/archive/refs/tags/${IGC_VCINTRINSICS_VERSION}.tar.gz)
|
||||
set(IGC_VCINTRINSICS_HASH 70bb47c5e32173cf61514941e83ae7c7eb4485e6d2fca60cfa1f50d4f42c41f2)
|
||||
set(IGC_VCINTRINSICS_HASH e5acd5626ce7fa6d41ce154c50ac805eda734ee66af94ef28e680ac2ad81bb9f)
|
||||
set(IGC_VCINTRINSICS_HASH_TYPE SHA256)
|
||||
set(IGC_VCINTRINSICS_FILE vc-intrinsics-${IGC_VCINTRINSICS_VERSION}.tar.gz)
|
||||
|
||||
@@ -714,9 +714,9 @@ set(IGC_SPIRV_TOOLS_HASH 6e19900e948944243024aedd0a201baf3854b377b9cc7a386553bc1
|
||||
set(IGC_SPIRV_TOOLS_HASH_TYPE SHA256)
|
||||
set(IGC_SPIRV_TOOLS_FILE SPIR-V-Tools-${IGC_SPIRV_TOOLS_VERSION}.tar.gz)
|
||||
|
||||
set(IGC_SPIRV_TRANSLATOR_VERSION a31ffaeef77e23d500b3ea3d35e0c42ff5648ad9)
|
||||
set(IGC_SPIRV_TRANSLATOR_VERSION d739c01d65ec00dee64dedd40deed805216a7193)
|
||||
set(IGC_SPIRV_TRANSLATOR_URI https://github.com/KhronosGroup/SPIRV-LLVM-Translator/archive/${IGC_SPIRV_TRANSLATOR_VERSION}.tar.gz)
|
||||
set(IGC_SPIRV_TRANSLATOR_HASH 9e26c96a45341b8f8af521bacea20e752623346340addd02af95d669f6e89252)
|
||||
set(IGC_SPIRV_TRANSLATOR_HASH ddc0cc9ccbe59dadeaf291012d59de142b2e9f2b124dbb634644d39daddaa13e)
|
||||
set(IGC_SPIRV_TRANSLATOR_HASH_TYPE SHA256)
|
||||
set(IGC_SPIRV_TRANSLATOR_FILE SPIR-V-Translator-${IGC_SPIRV_TRANSLATOR_VERSION}.tar.gz)
|
||||
|
||||
@@ -724,15 +724,15 @@ set(IGC_SPIRV_TRANSLATOR_FILE SPIR-V-Translator-${IGC_SPIRV_TRANSLATOR_VERSION}.
|
||||
### Intel Graphics Compiler DEPS END ###
|
||||
########################################
|
||||
|
||||
set(GMMLIB_VERSION intel-gmmlib-22.1.8)
|
||||
set(GMMLIB_VERSION intel-gmmlib-22.3.0)
|
||||
set(GMMLIB_URI https://github.com/intel/gmmlib/archive/refs/tags/${GMMLIB_VERSION}.tar.gz)
|
||||
set(GMMLIB_HASH bf23e9a3742b4fb98c7666c9e9b29f3219e4b2fb4d831aaf4eed71f5e2d17368)
|
||||
set(GMMLIB_HASH c1f33e1519edfc527127baeb0436b783430dfd256c643130169a3a71dc86aff9)
|
||||
set(GMMLIB_HASH_TYPE SHA256)
|
||||
set(GMMLIB_FILE ${GMMLIB_VERSION}.tar.gz)
|
||||
|
||||
set(OCLOC_VERSION 22.38.24278)
|
||||
set(OCLOC_VERSION 22.49.25018.21)
|
||||
set(OCLOC_URI https://github.com/intel/compute-runtime/archive/refs/tags/${OCLOC_VERSION}.tar.gz)
|
||||
set(OCLOC_HASH db0c542fccd651e6404b15a74d46027f1ce0eda8dc9e25a40cbb6c0faef257ee)
|
||||
set(OCLOC_HASH 92362dae08b503a34e5d3820ed284198c452bcd5e7504d90eb69887b20492c06)
|
||||
set(OCLOC_HASH_TYPE SHA256)
|
||||
set(OCLOC_FILE ocloc-${OCLOC_VERSION}.tar.gz)
|
||||
|
||||
|
@@ -1,7 +1,7 @@
|
||||
diff -Naur external_igc_opencl_clang.orig/CMakeLists.txt external_igc_opencl_clang/CMakeLists.txt
|
||||
--- external_igc_opencl_clang.orig/CMakeLists.txt 2022-03-16 05:51:10 -0600
|
||||
+++ external_igc_opencl_clang/CMakeLists.txt 2022-05-23 10:40:09 -0600
|
||||
@@ -126,22 +126,24 @@
|
||||
@@ -147,22 +147,24 @@
|
||||
)
|
||||
endif()
|
||||
|
||||
|
@@ -23,19 +23,19 @@ if(EXISTS ${SOURCE_DIR}/.git)
|
||||
|
||||
if(MY_WC_BRANCH STREQUAL "HEAD")
|
||||
# Detached HEAD, check whether commit hash is reachable
|
||||
# in the master branch
|
||||
# in the main branch
|
||||
execute_process(COMMAND git rev-parse --short=12 HEAD
|
||||
WORKING_DIRECTORY ${SOURCE_DIR}
|
||||
OUTPUT_VARIABLE MY_WC_HASH
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
execute_process(COMMAND git branch --list master blender-v* --contains ${MY_WC_HASH}
|
||||
execute_process(COMMAND git branch --list main blender-v* --contains ${MY_WC_HASH}
|
||||
WORKING_DIRECTORY ${SOURCE_DIR}
|
||||
OUTPUT_VARIABLE _git_contains_check
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
if(NOT _git_contains_check STREQUAL "")
|
||||
set(MY_WC_BRANCH "master")
|
||||
set(MY_WC_BRANCH "main")
|
||||
else()
|
||||
execute_process(COMMAND git show-ref --tags -d
|
||||
WORKING_DIRECTORY ${SOURCE_DIR}
|
||||
@@ -48,7 +48,7 @@ if(EXISTS ${SOURCE_DIR}/.git)
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
if(_git_tag_hashes MATCHES "${_git_head_hash}")
|
||||
set(MY_WC_BRANCH "master")
|
||||
set(MY_WC_BRANCH "main")
|
||||
else()
|
||||
execute_process(COMMAND git branch --contains ${MY_WC_HASH}
|
||||
WORKING_DIRECTORY ${SOURCE_DIR}
|
||||
|
@@ -11,11 +11,11 @@
|
||||
mkdir ~/blender-git
|
||||
cd ~/blender-git
|
||||
|
||||
git clone http://git.blender.org/blender.git
|
||||
git clone https://projects.blender.org/blender/blender.git
|
||||
cd blender
|
||||
git submodule update --init --recursive
|
||||
git submodule foreach git checkout master
|
||||
git submodule foreach git pull --rebase origin master
|
||||
git submodule foreach git checkout main
|
||||
git submodule foreach git pull --rebase origin main
|
||||
|
||||
# create build dir
|
||||
mkdir ~/blender-git/build-cmake
|
||||
@@ -35,7 +35,7 @@ ln -s ~/blender-git/build-cmake/bin/blender ~/blender-git/blender/blender.bin
|
||||
echo ""
|
||||
echo "* Useful Commands *"
|
||||
echo " Run Blender: ~/blender-git/blender/blender.bin"
|
||||
echo " Update Blender: git pull --rebase; git submodule foreach git pull --rebase origin master"
|
||||
echo " Update Blender: git pull --rebase; git submodule foreach git pull --rebase origin main"
|
||||
echo " Reconfigure Blender: cd ~/blender-git/build-cmake ; cmake ."
|
||||
echo " Build Blender: cd ~/blender-git/build-cmake ; make"
|
||||
echo ""
|
||||
|
@@ -5,16 +5,16 @@
|
||||
update-code:
|
||||
git:
|
||||
submodules:
|
||||
- branch: master
|
||||
- branch: main
|
||||
commit_id: HEAD
|
||||
path: release/scripts/addons
|
||||
- branch: master
|
||||
- branch: main
|
||||
commit_id: HEAD
|
||||
path: release/scripts/addons_contrib
|
||||
- branch: master
|
||||
- branch: main
|
||||
commit_id: HEAD
|
||||
path: release/datafiles/locale
|
||||
- branch: master
|
||||
- branch: main
|
||||
commit_id: HEAD
|
||||
path: source/tools
|
||||
svn:
|
||||
|
@@ -58,7 +58,7 @@ Each Blender release supports one Python version, and the package is only compat
|
||||
## Source Code
|
||||
|
||||
* [Releases](https://download.blender.org/source/)
|
||||
* Repository: [git.blender.org/blender.git](https://git.blender.org/gitweb/gitweb.cgi/blender.git)
|
||||
* Repository: [projects.blender.org/blender/blender.git](https://projects.blender.org/blender/blender)
|
||||
|
||||
## Credits
|
||||
|
||||
|
@@ -170,7 +170,7 @@ def git_update_skip(args: argparse.Namespace, check_remote_exists: bool = True)
|
||||
return "rebase or merge in progress, complete it first"
|
||||
|
||||
# Abort if uncommitted changes.
|
||||
changes = check_output([args.git_command, 'status', '--porcelain', '--untracked-files=no'])
|
||||
changes = check_output([args.git_command, 'status', '--porcelain', '--untracked-files=no', '--ignore-submodules'])
|
||||
if len(changes) != 0:
|
||||
return "you have unstaged changes"
|
||||
|
||||
@@ -202,8 +202,8 @@ def submodules_update(
|
||||
sys.exit(1)
|
||||
|
||||
# Update submodules to appropriate given branch,
|
||||
# falling back to master if none is given and/or found in a sub-repository.
|
||||
branch_fallback = "master"
|
||||
# falling back to main if none is given and/or found in a sub-repository.
|
||||
branch_fallback = "main"
|
||||
if not branch:
|
||||
branch = branch_fallback
|
||||
|
||||
|
@@ -3,9 +3,9 @@ if NOT exist "%BLENDER_DIR%\source\tools\.git" (
|
||||
if not "%GIT%" == "" (
|
||||
"%GIT%" submodule update --init --recursive --progress
|
||||
if errorlevel 1 goto FAIL
|
||||
"%GIT%" submodule foreach git checkout master
|
||||
"%GIT%" submodule foreach git checkout main
|
||||
if errorlevel 1 goto FAIL
|
||||
"%GIT%" submodule foreach git pull --rebase origin master
|
||||
"%GIT%" submodule foreach git pull --rebase origin main
|
||||
if errorlevel 1 goto FAIL
|
||||
goto EOF
|
||||
) else (
|
||||
|
@@ -37,7 +37,7 @@ def draw_callback_px(self, context):
|
||||
# BLF drawing routine
|
||||
font_id = font_info["font_id"]
|
||||
blf.position(font_id, 2, 80, 0)
|
||||
blf.size(font_id, 50, 72)
|
||||
blf.size(font_id, 50)
|
||||
blf.draw(font_id, "Hello World")
|
||||
|
||||
|
||||
|
@@ -1816,9 +1816,9 @@ def pyrna2sphinx(basepath):
|
||||
|
||||
# operators
|
||||
def write_ops():
|
||||
API_BASEURL = "https://developer.blender.org/diffusion/B/browse/master/release/scripts"
|
||||
API_BASEURL_ADDON = "https://developer.blender.org/diffusion/BA"
|
||||
API_BASEURL_ADDON_CONTRIB = "https://developer.blender.org/diffusion/BAC"
|
||||
API_BASEURL = "https://projects.blender.org/blender/blender/src/branch/main/release/scripts"
|
||||
API_BASEURL_ADDON = "https://projects.blender.org/blender/blender-addons"
|
||||
API_BASEURL_ADDON_CONTRIB = "https://projects.blender.org/blender/blender-addons-contrib"
|
||||
|
||||
op_modules = {}
|
||||
op = None
|
||||
|
@@ -156,7 +156,7 @@ var Popover = function() {
|
||||
},
|
||||
getNamed : function(v) {
|
||||
$.each(all_versions, function(ix, title) {
|
||||
if (ix === "master" || ix === "latest") {
|
||||
if (ix === "master" || ix === "main" || ix === "latest") {
|
||||
var m = title.match(/\d\.\d[\w\d\.]*/)[0];
|
||||
if (parseFloat(m) == v) {
|
||||
v = ix;
|
||||
|
2
extern/hipew/README.blender
vendored
2
extern/hipew/README.blender
vendored
@@ -1,5 +1,5 @@
|
||||
Project: Blender
|
||||
URL: https://git.blender.org/blender.git
|
||||
URL: https://projects.blender.org/blender/blender.git
|
||||
License: Apache 2.0
|
||||
Upstream version: N/A
|
||||
Local modifications: None
|
||||
|
@@ -12,6 +12,7 @@ from bpy.props import (
|
||||
PointerProperty,
|
||||
StringProperty,
|
||||
)
|
||||
from bpy.app.translations import pgettext_iface as iface_
|
||||
|
||||
from math import pi
|
||||
|
||||
@@ -1664,30 +1665,48 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
col.label(text="No compatible GPUs found for Cycles", icon='INFO')
|
||||
|
||||
if device_type == 'CUDA':
|
||||
col.label(text="Requires NVIDIA GPU with compute capability 3.0", icon='BLANK1')
|
||||
compute_capability = "3.0"
|
||||
col.label(text=iface_("Requires NVIDIA GPU with compute capability %s") % compute_capability,
|
||||
icon='BLANK1', translate=False)
|
||||
elif device_type == 'OPTIX':
|
||||
col.label(text="Requires NVIDIA GPU with compute capability 5.0", icon='BLANK1')
|
||||
col.label(text="and NVIDIA driver version 470 or newer", icon='BLANK1')
|
||||
compute_capability = "5.0"
|
||||
driver_version = "470"
|
||||
col.label(text=iface_("Requires NVIDIA GPU with compute capability %s") % compute_capability,
|
||||
icon='BLANK1', translate=False)
|
||||
col.label(text="and NVIDIA driver version %s or newer" % driver_version,
|
||||
icon='BLANK1', translate=False)
|
||||
elif device_type == 'HIP':
|
||||
import sys
|
||||
if sys.platform[:3] == "win":
|
||||
driver_version = "21.Q4"
|
||||
col.label(text="Requires AMD GPU with RDNA architecture", icon='BLANK1')
|
||||
col.label(text="and AMD Radeon Pro 21.Q4 driver or newer", icon='BLANK1')
|
||||
col.label(text=iface_("and AMD Radeon Pro %s driver or newer") % driver_version,
|
||||
icon='BLANK1', translate=False)
|
||||
elif sys.platform.startswith("linux"):
|
||||
driver_version = "22.10"
|
||||
col.label(text="Requires AMD GPU with RDNA architecture", icon='BLANK1')
|
||||
col.label(text="and AMD driver version 22.10 or newer", icon='BLANK1')
|
||||
col.label(text=iface_("and AMD driver version %s or newer") % driver_version, icon='BLANK1',
|
||||
translate=False)
|
||||
elif device_type == 'ONEAPI':
|
||||
import sys
|
||||
if sys.platform.startswith("win"):
|
||||
driver_version = "101.4032"
|
||||
col.label(text="Requires Intel GPU with Xe-HPG architecture", icon='BLANK1')
|
||||
col.label(text="and Windows driver version 101.4032 or newer", icon='BLANK1')
|
||||
col.label(text=iface_("and Windows driver version %s or newer") % driver_version,
|
||||
icon='BLANK1', translate=False)
|
||||
elif sys.platform.startswith("linux"):
|
||||
driver_version = "1.3.24931"
|
||||
col.label(text="Requires Intel GPU with Xe-HPG architecture and", icon='BLANK1')
|
||||
col.label(text=" - intel-level-zero-gpu version 1.3.24931 or newer", icon='BLANK1')
|
||||
col.label(text=iface_(" - intel-level-zero-gpu version %s or newer") % driver_version,
|
||||
icon='BLANK1', translate=False)
|
||||
col.label(text=" - oneAPI Level-Zero Loader", icon='BLANK1')
|
||||
elif device_type == 'METAL':
|
||||
col.label(text="Requires Apple Silicon with macOS 12.2 or newer", icon='BLANK1')
|
||||
col.label(text="or AMD with macOS 12.3 or newer", icon='BLANK1')
|
||||
silicon_mac_version = "12.2"
|
||||
amd_mac_version = "12.3"
|
||||
col.label(text=iface_("Requires Apple Silicon with macOS %s or newer") % silicon_mac_version,
|
||||
icon='BLANK1', translate=False)
|
||||
col.label(text=iface_("or AMD with macOS %s or newer") % amd_mac_version, icon='BLANK1',
|
||||
translate=False)
|
||||
return
|
||||
|
||||
for device in devices:
|
||||
@@ -1723,12 +1742,21 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
|
||||
if compute_device_type == 'METAL':
|
||||
import platform
|
||||
# MetalRT only works on Apple Silicon at present, pending argument encoding fixes on AMD
|
||||
# Kernel specialization is only viable on Apple Silicon at present due to relative compilation speed
|
||||
if platform.machine() == 'arm64':
|
||||
import re
|
||||
is_navi_2 = False
|
||||
for device in devices:
|
||||
if re.search(r"((RX)|(Pro)|(PRO))\s+W?6\d00X", device.name):
|
||||
is_navi_2 = True
|
||||
break
|
||||
|
||||
# MetalRT only works on Apple Silicon and Navi2.
|
||||
is_arm64 = platform.machine() == 'arm64'
|
||||
if is_arm64 or is_navi_2:
|
||||
col = layout.column()
|
||||
col.use_property_split = True
|
||||
col.prop(self, "kernel_optimization_level")
|
||||
# Kernel specialization is only supported on Apple Silicon
|
||||
if is_arm64:
|
||||
col.prop(self, "kernel_optimization_level")
|
||||
col.prop(self, "use_metalrt")
|
||||
|
||||
def draw(self, context):
|
||||
|
@@ -53,8 +53,12 @@ void CUDADevice::set_error(const string &error)
|
||||
}
|
||||
|
||||
CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
: Device(info, stats, profiler), texture_info(this, "texture_info", MEM_GLOBAL)
|
||||
: GPUDevice(info, stats, profiler)
|
||||
{
|
||||
/* Verify that base class types can be used with specific backend types */
|
||||
static_assert(sizeof(texMemObject) == sizeof(CUtexObject));
|
||||
static_assert(sizeof(arrayMemObject) == sizeof(CUarray));
|
||||
|
||||
first_error = true;
|
||||
|
||||
cuDevId = info.num;
|
||||
@@ -65,12 +69,6 @@ CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
|
||||
need_texture_info = false;
|
||||
|
||||
device_texture_headroom = 0;
|
||||
device_working_headroom = 0;
|
||||
move_texture_to_host = false;
|
||||
map_host_limit = 0;
|
||||
map_host_used = 0;
|
||||
can_map_host = 0;
|
||||
pitch_alignment = 0;
|
||||
|
||||
/* Initialize CUDA. */
|
||||
@@ -91,8 +89,9 @@ CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
/* CU_CTX_MAP_HOST for mapping host memory when out of device memory.
|
||||
* CU_CTX_LMEM_RESIZE_TO_MAX for reserving local memory ahead of render,
|
||||
* so we can predict which memory to map to host. */
|
||||
cuda_assert(
|
||||
cuDeviceGetAttribute(&can_map_host, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuDevice));
|
||||
int value;
|
||||
cuda_assert(cuDeviceGetAttribute(&value, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuDevice));
|
||||
can_map_host = value != 0;
|
||||
|
||||
cuda_assert(cuDeviceGetAttribute(
|
||||
&pitch_alignment, CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, cuDevice));
|
||||
@@ -499,311 +498,57 @@ void CUDADevice::reserve_local_memory(const uint kernel_features)
|
||||
# endif
|
||||
}
|
||||
|
||||
void CUDADevice::init_host_memory()
|
||||
{
|
||||
/* Limit amount of host mapped memory, because allocating too much can
|
||||
* cause system instability. Leave at least half or 4 GB of system
|
||||
* memory free, whichever is smaller. */
|
||||
size_t default_limit = 4 * 1024 * 1024 * 1024LL;
|
||||
size_t system_ram = system_physical_ram();
|
||||
|
||||
if (system_ram > 0) {
|
||||
if (system_ram / 2 > default_limit) {
|
||||
map_host_limit = system_ram - default_limit;
|
||||
}
|
||||
else {
|
||||
map_host_limit = system_ram / 2;
|
||||
}
|
||||
}
|
||||
else {
|
||||
VLOG_WARNING << "Mapped host memory disabled, failed to get system RAM";
|
||||
map_host_limit = 0;
|
||||
}
|
||||
|
||||
/* Amount of device memory to keep is free after texture memory
|
||||
* and working memory allocations respectively. We set the working
|
||||
* memory limit headroom lower so that some space is left after all
|
||||
* texture memory allocations. */
|
||||
device_working_headroom = 32 * 1024 * 1024LL; // 32MB
|
||||
device_texture_headroom = 128 * 1024 * 1024LL; // 128MB
|
||||
|
||||
VLOG_INFO << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
|
||||
<< " bytes. (" << string_human_readable_size(map_host_limit) << ")";
|
||||
}
|
||||
|
||||
void CUDADevice::load_texture_info()
|
||||
{
|
||||
if (need_texture_info) {
|
||||
/* Unset flag before copying, so this does not loop indefinitely if the copy below calls
|
||||
* into 'move_textures_to_host' (which calls 'load_texture_info' again). */
|
||||
need_texture_info = false;
|
||||
texture_info.copy_to_device();
|
||||
}
|
||||
}
|
||||
|
||||
void CUDADevice::move_textures_to_host(size_t size, bool for_texture)
|
||||
{
|
||||
/* Break out of recursive call, which can happen when moving memory on a multi device. */
|
||||
static bool any_device_moving_textures_to_host = false;
|
||||
if (any_device_moving_textures_to_host) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Signal to reallocate textures in host memory only. */
|
||||
move_texture_to_host = true;
|
||||
|
||||
while (size > 0) {
|
||||
/* Find suitable memory allocation to move. */
|
||||
device_memory *max_mem = NULL;
|
||||
size_t max_size = 0;
|
||||
bool max_is_image = false;
|
||||
|
||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
||||
foreach (CUDAMemMap::value_type &pair, cuda_mem_map) {
|
||||
device_memory &mem = *pair.first;
|
||||
CUDAMem *cmem = &pair.second;
|
||||
|
||||
/* Can only move textures allocated on this device (and not those from peer devices).
|
||||
* And need to ignore memory that is already on the host. */
|
||||
if (!mem.is_resident(this) || cmem->use_mapped_host) {
|
||||
continue;
|
||||
}
|
||||
|
||||
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) &&
|
||||
(&mem != &texture_info);
|
||||
bool is_image = is_texture && (mem.data_height > 1);
|
||||
|
||||
/* Can't move this type of memory. */
|
||||
if (!is_texture || cmem->array) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* For other textures, only move image textures. */
|
||||
if (for_texture && !is_image) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Try to move largest allocation, prefer moving images. */
|
||||
if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
|
||||
max_is_image = is_image;
|
||||
max_size = mem.device_size;
|
||||
max_mem = &mem;
|
||||
}
|
||||
}
|
||||
lock.unlock();
|
||||
|
||||
/* Move to host memory. This part is mutex protected since
|
||||
* multiple CUDA devices could be moving the memory. The
|
||||
* first one will do it, and the rest will adopt the pointer. */
|
||||
if (max_mem) {
|
||||
VLOG_WORK << "Move memory from device to host: " << max_mem->name;
|
||||
|
||||
static thread_mutex move_mutex;
|
||||
thread_scoped_lock lock(move_mutex);
|
||||
|
||||
any_device_moving_textures_to_host = true;
|
||||
|
||||
/* Potentially need to call back into multi device, so pointer mapping
|
||||
* and peer devices are updated. This is also necessary since the device
|
||||
* pointer may just be a key here, so cannot be accessed and freed directly.
|
||||
* Unfortunately it does mean that memory is reallocated on all other
|
||||
* devices as well, which is potentially dangerous when still in use (since
|
||||
* a thread rendering on another devices would only be caught in this mutex
|
||||
* if it so happens to do an allocation at the same time as well. */
|
||||
max_mem->device_copy_to();
|
||||
size = (max_size >= size) ? 0 : size - max_size;
|
||||
|
||||
any_device_moving_textures_to_host = false;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Unset flag before texture info is reloaded, since it should stay in device memory. */
|
||||
move_texture_to_host = false;
|
||||
|
||||
/* Update texture info array with new pointers. */
|
||||
load_texture_info();
|
||||
}
|
||||
|
||||
CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_padding)
|
||||
void CUDADevice::get_device_memory_info(size_t &total, size_t &free)
|
||||
{
|
||||
CUDAContextScope scope(this);
|
||||
|
||||
CUdeviceptr device_pointer = 0;
|
||||
size_t size = mem.memory_size() + pitch_padding;
|
||||
|
||||
CUresult mem_alloc_result = CUDA_ERROR_OUT_OF_MEMORY;
|
||||
const char *status = "";
|
||||
|
||||
/* First try allocating in device memory, respecting headroom. We make
|
||||
* an exception for texture info. It is small and frequently accessed,
|
||||
* so treat it as working memory.
|
||||
*
|
||||
* If there is not enough room for working memory, we will try to move
|
||||
* textures to host memory, assuming the performance impact would have
|
||||
* been worse for working memory. */
|
||||
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && (&mem != &texture_info);
|
||||
bool is_image = is_texture && (mem.data_height > 1);
|
||||
|
||||
size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
|
||||
|
||||
size_t total = 0, free = 0;
|
||||
cuMemGetInfo(&free, &total);
|
||||
|
||||
/* Move textures to host memory if needed. */
|
||||
if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
|
||||
move_textures_to_host(size + headroom - free, is_texture);
|
||||
cuMemGetInfo(&free, &total);
|
||||
}
|
||||
|
||||
/* Allocate in device memory. */
|
||||
if (!move_texture_to_host && (size + headroom) < free) {
|
||||
mem_alloc_result = cuMemAlloc(&device_pointer, size);
|
||||
if (mem_alloc_result == CUDA_SUCCESS) {
|
||||
status = " in device memory";
|
||||
}
|
||||
}
|
||||
|
||||
/* Fall back to mapped host memory if needed and possible. */
|
||||
|
||||
void *shared_pointer = 0;
|
||||
|
||||
if (mem_alloc_result != CUDA_SUCCESS && can_map_host && mem.type != MEM_DEVICE_ONLY) {
|
||||
if (mem.shared_pointer) {
|
||||
/* Another device already allocated host memory. */
|
||||
mem_alloc_result = CUDA_SUCCESS;
|
||||
shared_pointer = mem.shared_pointer;
|
||||
}
|
||||
else if (map_host_used + size < map_host_limit) {
|
||||
/* Allocate host memory ourselves. */
|
||||
mem_alloc_result = cuMemHostAlloc(
|
||||
&shared_pointer, size, CU_MEMHOSTALLOC_DEVICEMAP | CU_MEMHOSTALLOC_WRITECOMBINED);
|
||||
|
||||
assert((mem_alloc_result == CUDA_SUCCESS && shared_pointer != 0) ||
|
||||
(mem_alloc_result != CUDA_SUCCESS && shared_pointer == 0));
|
||||
}
|
||||
|
||||
if (mem_alloc_result == CUDA_SUCCESS) {
|
||||
cuda_assert(cuMemHostGetDevicePointer_v2(&device_pointer, shared_pointer, 0));
|
||||
map_host_used += size;
|
||||
status = " in host memory";
|
||||
}
|
||||
}
|
||||
|
||||
if (mem_alloc_result != CUDA_SUCCESS) {
|
||||
if (mem.type == MEM_DEVICE_ONLY) {
|
||||
status = " failed, out of device memory";
|
||||
set_error("System is out of GPU memory");
|
||||
}
|
||||
else {
|
||||
status = " failed, out of device and host memory";
|
||||
set_error("System is out of GPU and shared host memory");
|
||||
}
|
||||
}
|
||||
|
||||
if (mem.name) {
|
||||
VLOG_WORK << "Buffer allocate: " << mem.name << ", "
|
||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||
<< string_human_readable_size(mem.memory_size()) << ")" << status;
|
||||
}
|
||||
|
||||
mem.device_pointer = (device_ptr)device_pointer;
|
||||
mem.device_size = size;
|
||||
stats.mem_alloc(size);
|
||||
|
||||
if (!mem.device_pointer) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Insert into map of allocations. */
|
||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
||||
CUDAMem *cmem = &cuda_mem_map[&mem];
|
||||
if (shared_pointer != 0) {
|
||||
/* Replace host pointer with our host allocation. Only works if
|
||||
* CUDA memory layout is the same and has no pitch padding. Also
|
||||
* does not work if we move textures to host during a render,
|
||||
* since other devices might be using the memory. */
|
||||
|
||||
if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
|
||||
mem.host_pointer != shared_pointer) {
|
||||
memcpy(shared_pointer, mem.host_pointer, size);
|
||||
|
||||
/* A Call to device_memory::host_free() should be preceded by
|
||||
* a call to device_memory::device_free() for host memory
|
||||
* allocated by a device to be handled properly. Two exceptions
|
||||
* are here and a call in OptiXDevice::generic_alloc(), where
|
||||
* the current host memory can be assumed to be allocated by
|
||||
* device_memory::host_alloc(), not by a device */
|
||||
|
||||
mem.host_free();
|
||||
mem.host_pointer = shared_pointer;
|
||||
}
|
||||
mem.shared_pointer = shared_pointer;
|
||||
mem.shared_counter++;
|
||||
cmem->use_mapped_host = true;
|
||||
}
|
||||
else {
|
||||
cmem->use_mapped_host = false;
|
||||
}
|
||||
|
||||
return cmem;
|
||||
}
|
||||
|
||||
void CUDADevice::generic_copy_to(device_memory &mem)
|
||||
bool CUDADevice::alloc_device(void *&device_pointer, size_t size)
|
||||
{
|
||||
if (!mem.host_pointer || !mem.device_pointer) {
|
||||
return;
|
||||
}
|
||||
CUDAContextScope scope(this);
|
||||
|
||||
/* If use_mapped_host of mem is false, the current device only uses device memory allocated by
|
||||
* cuMemAlloc regardless of mem.host_pointer and mem.shared_pointer, and should copy data from
|
||||
* mem.host_pointer. */
|
||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
||||
if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
|
||||
const CUDAContextScope scope(this);
|
||||
cuda_assert(
|
||||
cuMemcpyHtoD((CUdeviceptr)mem.device_pointer, mem.host_pointer, mem.memory_size()));
|
||||
}
|
||||
CUresult mem_alloc_result = cuMemAlloc((CUdeviceptr *)&device_pointer, size);
|
||||
return mem_alloc_result == CUDA_SUCCESS;
|
||||
}
|
||||
|
||||
void CUDADevice::generic_free(device_memory &mem)
|
||||
void CUDADevice::free_device(void *device_pointer)
|
||||
{
|
||||
if (mem.device_pointer) {
|
||||
CUDAContextScope scope(this);
|
||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
||||
DCHECK(cuda_mem_map.find(&mem) != cuda_mem_map.end());
|
||||
const CUDAMem &cmem = cuda_mem_map[&mem];
|
||||
CUDAContextScope scope(this);
|
||||
|
||||
/* If cmem.use_mapped_host is true, reference counting is used
|
||||
* to safely free a mapped host memory. */
|
||||
cuda_assert(cuMemFree((CUdeviceptr)device_pointer));
|
||||
}
|
||||
|
||||
if (cmem.use_mapped_host) {
|
||||
assert(mem.shared_pointer);
|
||||
if (mem.shared_pointer) {
|
||||
assert(mem.shared_counter > 0);
|
||||
if (--mem.shared_counter == 0) {
|
||||
if (mem.host_pointer == mem.shared_pointer) {
|
||||
mem.host_pointer = 0;
|
||||
}
|
||||
cuMemFreeHost(mem.shared_pointer);
|
||||
mem.shared_pointer = 0;
|
||||
}
|
||||
}
|
||||
map_host_used -= mem.device_size;
|
||||
}
|
||||
else {
|
||||
/* Free device memory. */
|
||||
cuda_assert(cuMemFree(mem.device_pointer));
|
||||
}
|
||||
bool CUDADevice::alloc_host(void *&shared_pointer, size_t size)
|
||||
{
|
||||
CUDAContextScope scope(this);
|
||||
|
||||
stats.mem_free(mem.device_size);
|
||||
mem.device_pointer = 0;
|
||||
mem.device_size = 0;
|
||||
CUresult mem_alloc_result = cuMemHostAlloc(
|
||||
&shared_pointer, size, CU_MEMHOSTALLOC_DEVICEMAP | CU_MEMHOSTALLOC_WRITECOMBINED);
|
||||
return mem_alloc_result == CUDA_SUCCESS;
|
||||
}
|
||||
|
||||
cuda_mem_map.erase(cuda_mem_map.find(&mem));
|
||||
}
|
||||
void CUDADevice::free_host(void *shared_pointer)
|
||||
{
|
||||
CUDAContextScope scope(this);
|
||||
|
||||
cuMemFreeHost(shared_pointer);
|
||||
}
|
||||
|
||||
bool CUDADevice::transform_host_pointer(void *&device_pointer, void *&shared_pointer)
|
||||
{
|
||||
CUDAContextScope scope(this);
|
||||
|
||||
cuda_assert(cuMemHostGetDevicePointer_v2((CUdeviceptr *)&device_pointer, shared_pointer, 0));
|
||||
return true;
|
||||
}
|
||||
|
||||
void CUDADevice::copy_host_to_device(void *device_pointer, void *host_pointer, size_t size)
|
||||
{
|
||||
const CUDAContextScope scope(this);
|
||||
|
||||
cuda_assert(cuMemcpyHtoD((CUdeviceptr)device_pointer, host_pointer, size));
|
||||
}
|
||||
|
||||
void CUDADevice::mem_alloc(device_memory &mem)
|
||||
@@ -868,8 +613,8 @@ void CUDADevice::mem_zero(device_memory &mem)
|
||||
|
||||
/* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory
|
||||
* regardless of mem.host_pointer and mem.shared_pointer. */
|
||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
||||
if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
if (!device_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
|
||||
const CUDAContextScope scope(this);
|
||||
cuda_assert(cuMemsetD8((CUdeviceptr)mem.device_pointer, 0, mem.memory_size()));
|
||||
}
|
||||
@@ -994,19 +739,19 @@ void CUDADevice::tex_alloc(device_texture &mem)
|
||||
return;
|
||||
}
|
||||
|
||||
CUDAMem *cmem = NULL;
|
||||
Mem *cmem = NULL;
|
||||
CUarray array_3d = NULL;
|
||||
size_t src_pitch = mem.data_width * dsize * mem.data_elements;
|
||||
size_t dst_pitch = src_pitch;
|
||||
|
||||
if (!mem.is_resident(this)) {
|
||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
||||
cmem = &cuda_mem_map[&mem];
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
cmem = &device_mem_map[&mem];
|
||||
cmem->texobject = 0;
|
||||
|
||||
if (mem.data_depth > 1) {
|
||||
array_3d = (CUarray)mem.device_pointer;
|
||||
cmem->array = array_3d;
|
||||
cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
|
||||
}
|
||||
else if (mem.data_height > 0) {
|
||||
dst_pitch = align_up(src_pitch, pitch_alignment);
|
||||
@@ -1050,10 +795,10 @@ void CUDADevice::tex_alloc(device_texture &mem)
|
||||
mem.device_size = size;
|
||||
stats.mem_alloc(size);
|
||||
|
||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
||||
cmem = &cuda_mem_map[&mem];
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
cmem = &device_mem_map[&mem];
|
||||
cmem->texobject = 0;
|
||||
cmem->array = array_3d;
|
||||
cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
|
||||
}
|
||||
else if (mem.data_height > 0) {
|
||||
/* 2D texture, using pitch aligned linear memory. */
|
||||
@@ -1137,8 +882,8 @@ void CUDADevice::tex_alloc(device_texture &mem)
|
||||
texDesc.filterMode = filter_mode;
|
||||
texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
|
||||
|
||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
||||
cmem = &cuda_mem_map[&mem];
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
cmem = &device_mem_map[&mem];
|
||||
|
||||
cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
|
||||
|
||||
@@ -1153,9 +898,9 @@ void CUDADevice::tex_free(device_texture &mem)
|
||||
{
|
||||
if (mem.device_pointer) {
|
||||
CUDAContextScope scope(this);
|
||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
||||
DCHECK(cuda_mem_map.find(&mem) != cuda_mem_map.end());
|
||||
const CUDAMem &cmem = cuda_mem_map[&mem];
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
DCHECK(device_mem_map.find(&mem) != device_mem_map.end());
|
||||
const Mem &cmem = device_mem_map[&mem];
|
||||
|
||||
if (cmem.texobject) {
|
||||
/* Free bindless texture. */
|
||||
@@ -1164,16 +909,16 @@ void CUDADevice::tex_free(device_texture &mem)
|
||||
|
||||
if (!mem.is_resident(this)) {
|
||||
/* Do not free memory here, since it was allocated on a different device. */
|
||||
cuda_mem_map.erase(cuda_mem_map.find(&mem));
|
||||
device_mem_map.erase(device_mem_map.find(&mem));
|
||||
}
|
||||
else if (cmem.array) {
|
||||
/* Free array. */
|
||||
cuArrayDestroy(cmem.array);
|
||||
cuArrayDestroy(reinterpret_cast<CUarray>(cmem.array));
|
||||
stats.mem_free(mem.device_size);
|
||||
mem.device_pointer = 0;
|
||||
mem.device_size = 0;
|
||||
|
||||
cuda_mem_map.erase(cuda_mem_map.find(&mem));
|
||||
device_mem_map.erase(device_mem_map.find(&mem));
|
||||
}
|
||||
else {
|
||||
lock.unlock();
|
||||
|
@@ -21,7 +21,7 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
class DeviceQueue;
|
||||
|
||||
class CUDADevice : public Device {
|
||||
class CUDADevice : public GPUDevice {
|
||||
|
||||
friend class CUDAContextScope;
|
||||
|
||||
@@ -29,36 +29,11 @@ class CUDADevice : public Device {
|
||||
CUdevice cuDevice;
|
||||
CUcontext cuContext;
|
||||
CUmodule cuModule;
|
||||
size_t device_texture_headroom;
|
||||
size_t device_working_headroom;
|
||||
bool move_texture_to_host;
|
||||
size_t map_host_used;
|
||||
size_t map_host_limit;
|
||||
int can_map_host;
|
||||
int pitch_alignment;
|
||||
int cuDevId;
|
||||
int cuDevArchitecture;
|
||||
bool first_error;
|
||||
|
||||
struct CUDAMem {
|
||||
CUDAMem() : texobject(0), array(0), use_mapped_host(false)
|
||||
{
|
||||
}
|
||||
|
||||
CUtexObject texobject;
|
||||
CUarray array;
|
||||
|
||||
/* If true, a mapped host memory in shared_pointer is being used. */
|
||||
bool use_mapped_host;
|
||||
};
|
||||
typedef map<device_memory *, CUDAMem> CUDAMemMap;
|
||||
CUDAMemMap cuda_mem_map;
|
||||
thread_mutex cuda_mem_map_mutex;
|
||||
|
||||
/* Bindless Textures */
|
||||
device_vector<TextureInfo> texture_info;
|
||||
bool need_texture_info;
|
||||
|
||||
CUDADeviceKernels kernels;
|
||||
|
||||
static bool have_precompiled_kernels();
|
||||
@@ -88,17 +63,13 @@ class CUDADevice : public Device {
|
||||
|
||||
void reserve_local_memory(const uint kernel_features);
|
||||
|
||||
void init_host_memory();
|
||||
|
||||
void load_texture_info();
|
||||
|
||||
void move_textures_to_host(size_t size, bool for_texture);
|
||||
|
||||
CUDAMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
|
||||
|
||||
void generic_copy_to(device_memory &mem);
|
||||
|
||||
void generic_free(device_memory &mem);
|
||||
virtual void get_device_memory_info(size_t &total, size_t &free) override;
|
||||
virtual bool alloc_device(void *&device_pointer, size_t size) override;
|
||||
virtual void free_device(void *device_pointer) override;
|
||||
virtual bool alloc_host(void *&shared_pointer, size_t size) override;
|
||||
virtual void free_host(void *shared_pointer) override;
|
||||
virtual bool transform_host_pointer(void *&device_pointer, void *&shared_pointer) override;
|
||||
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size) override;
|
||||
|
||||
void mem_alloc(device_memory &mem) override;
|
||||
|
||||
|
@@ -452,6 +452,320 @@ void *Device::get_cpu_osl_memory()
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
GPUDevice::~GPUDevice() noexcept(false)
|
||||
{
|
||||
}
|
||||
|
||||
bool GPUDevice::load_texture_info()
|
||||
{
|
||||
if (need_texture_info) {
|
||||
/* Unset flag before copying, so this does not loop indefinitely if the copy below calls
|
||||
* into 'move_textures_to_host' (which calls 'load_texture_info' again). */
|
||||
need_texture_info = false;
|
||||
texture_info.copy_to_device();
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void GPUDevice::init_host_memory(size_t preferred_texture_headroom,
|
||||
size_t preferred_working_headroom)
|
||||
{
|
||||
/* Limit amount of host mapped memory, because allocating too much can
|
||||
* cause system instability. Leave at least half or 4 GB of system
|
||||
* memory free, whichever is smaller. */
|
||||
size_t default_limit = 4 * 1024 * 1024 * 1024LL;
|
||||
size_t system_ram = system_physical_ram();
|
||||
|
||||
if (system_ram > 0) {
|
||||
if (system_ram / 2 > default_limit) {
|
||||
map_host_limit = system_ram - default_limit;
|
||||
}
|
||||
else {
|
||||
map_host_limit = system_ram / 2;
|
||||
}
|
||||
}
|
||||
else {
|
||||
VLOG_WARNING << "Mapped host memory disabled, failed to get system RAM";
|
||||
map_host_limit = 0;
|
||||
}
|
||||
|
||||
/* Amount of device memory to keep free after texture memory
|
||||
* and working memory allocations respectively. We set the working
|
||||
* memory limit headroom lower than the working one so there
|
||||
* is space left for it. */
|
||||
device_working_headroom = preferred_working_headroom > 0 ? preferred_working_headroom :
|
||||
32 * 1024 * 1024LL; // 32MB
|
||||
device_texture_headroom = preferred_texture_headroom > 0 ? preferred_texture_headroom :
|
||||
128 * 1024 * 1024LL; // 128MB
|
||||
|
||||
VLOG_INFO << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
|
||||
<< " bytes. (" << string_human_readable_size(map_host_limit) << ")";
|
||||
}
|
||||
|
||||
void GPUDevice::move_textures_to_host(size_t size, bool for_texture)
|
||||
{
|
||||
/* Break out of recursive call, which can happen when moving memory on a multi device. */
|
||||
static bool any_device_moving_textures_to_host = false;
|
||||
if (any_device_moving_textures_to_host) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Signal to reallocate textures in host memory only. */
|
||||
move_texture_to_host = true;
|
||||
|
||||
while (size > 0) {
|
||||
/* Find suitable memory allocation to move. */
|
||||
device_memory *max_mem = NULL;
|
||||
size_t max_size = 0;
|
||||
bool max_is_image = false;
|
||||
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
foreach (MemMap::value_type &pair, device_mem_map) {
|
||||
device_memory &mem = *pair.first;
|
||||
Mem *cmem = &pair.second;
|
||||
|
||||
/* Can only move textures allocated on this device (and not those from peer devices).
|
||||
* And need to ignore memory that is already on the host. */
|
||||
if (!mem.is_resident(this) || cmem->use_mapped_host) {
|
||||
continue;
|
||||
}
|
||||
|
||||
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) &&
|
||||
(&mem != &texture_info);
|
||||
bool is_image = is_texture && (mem.data_height > 1);
|
||||
|
||||
/* Can't move this type of memory. */
|
||||
if (!is_texture || cmem->array) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* For other textures, only move image textures. */
|
||||
if (for_texture && !is_image) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Try to move largest allocation, prefer moving images. */
|
||||
if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
|
||||
max_is_image = is_image;
|
||||
max_size = mem.device_size;
|
||||
max_mem = &mem;
|
||||
}
|
||||
}
|
||||
lock.unlock();
|
||||
|
||||
/* Move to host memory. This part is mutex protected since
|
||||
* multiple backend devices could be moving the memory. The
|
||||
* first one will do it, and the rest will adopt the pointer. */
|
||||
if (max_mem) {
|
||||
VLOG_WORK << "Move memory from device to host: " << max_mem->name;
|
||||
|
||||
static thread_mutex move_mutex;
|
||||
thread_scoped_lock lock(move_mutex);
|
||||
|
||||
any_device_moving_textures_to_host = true;
|
||||
|
||||
/* Potentially need to call back into multi device, so pointer mapping
|
||||
* and peer devices are updated. This is also necessary since the device
|
||||
* pointer may just be a key here, so cannot be accessed and freed directly.
|
||||
* Unfortunately it does mean that memory is reallocated on all other
|
||||
* devices as well, which is potentially dangerous when still in use (since
|
||||
* a thread rendering on another devices would only be caught in this mutex
|
||||
* if it so happens to do an allocation at the same time as well. */
|
||||
max_mem->device_copy_to();
|
||||
size = (max_size >= size) ? 0 : size - max_size;
|
||||
|
||||
any_device_moving_textures_to_host = false;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Unset flag before texture info is reloaded, since it should stay in device memory. */
|
||||
move_texture_to_host = false;
|
||||
|
||||
/* Update texture info array with new pointers. */
|
||||
load_texture_info();
|
||||
}
|
||||
|
||||
GPUDevice::Mem *GPUDevice::generic_alloc(device_memory &mem, size_t pitch_padding)
|
||||
{
|
||||
void *device_pointer = 0;
|
||||
size_t size = mem.memory_size() + pitch_padding;
|
||||
|
||||
bool mem_alloc_result = false;
|
||||
const char *status = "";
|
||||
|
||||
/* First try allocating in device memory, respecting headroom. We make
|
||||
* an exception for texture info. It is small and frequently accessed,
|
||||
* so treat it as working memory.
|
||||
*
|
||||
* If there is not enough room for working memory, we will try to move
|
||||
* textures to host memory, assuming the performance impact would have
|
||||
* been worse for working memory. */
|
||||
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && (&mem != &texture_info);
|
||||
bool is_image = is_texture && (mem.data_height > 1);
|
||||
|
||||
size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
|
||||
|
||||
size_t total = 0, free = 0;
|
||||
get_device_memory_info(total, free);
|
||||
|
||||
/* Move textures to host memory if needed. */
|
||||
if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
|
||||
move_textures_to_host(size + headroom - free, is_texture);
|
||||
get_device_memory_info(total, free);
|
||||
}
|
||||
|
||||
/* Allocate in device memory. */
|
||||
if (!move_texture_to_host && (size + headroom) < free) {
|
||||
mem_alloc_result = alloc_device(device_pointer, size);
|
||||
if (mem_alloc_result) {
|
||||
device_mem_in_use += size;
|
||||
status = " in device memory";
|
||||
}
|
||||
}
|
||||
|
||||
/* Fall back to mapped host memory if needed and possible. */
|
||||
|
||||
void *shared_pointer = 0;
|
||||
|
||||
if (!mem_alloc_result && can_map_host && mem.type != MEM_DEVICE_ONLY) {
|
||||
if (mem.shared_pointer) {
|
||||
/* Another device already allocated host memory. */
|
||||
mem_alloc_result = true;
|
||||
shared_pointer = mem.shared_pointer;
|
||||
}
|
||||
else if (map_host_used + size < map_host_limit) {
|
||||
/* Allocate host memory ourselves. */
|
||||
mem_alloc_result = alloc_host(shared_pointer, size);
|
||||
|
||||
assert((mem_alloc_result && shared_pointer != 0) ||
|
||||
(!mem_alloc_result && shared_pointer == 0));
|
||||
}
|
||||
|
||||
if (mem_alloc_result) {
|
||||
assert(transform_host_pointer(device_pointer, shared_pointer));
|
||||
map_host_used += size;
|
||||
status = " in host memory";
|
||||
}
|
||||
}
|
||||
|
||||
if (!mem_alloc_result) {
|
||||
if (mem.type == MEM_DEVICE_ONLY) {
|
||||
status = " failed, out of device memory";
|
||||
set_error("System is out of GPU memory");
|
||||
}
|
||||
else {
|
||||
status = " failed, out of device and host memory";
|
||||
set_error("System is out of GPU and shared host memory");
|
||||
}
|
||||
}
|
||||
|
||||
if (mem.name) {
|
||||
VLOG_WORK << "Buffer allocate: " << mem.name << ", "
|
||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||
<< string_human_readable_size(mem.memory_size()) << ")" << status;
|
||||
}
|
||||
|
||||
mem.device_pointer = (device_ptr)device_pointer;
|
||||
mem.device_size = size;
|
||||
stats.mem_alloc(size);
|
||||
|
||||
if (!mem.device_pointer) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Insert into map of allocations. */
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
Mem *cmem = &device_mem_map[&mem];
|
||||
if (shared_pointer != 0) {
|
||||
/* Replace host pointer with our host allocation. Only works if
|
||||
* memory layout is the same and has no pitch padding. Also
|
||||
* does not work if we move textures to host during a render,
|
||||
* since other devices might be using the memory. */
|
||||
|
||||
if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
|
||||
mem.host_pointer != shared_pointer) {
|
||||
memcpy(shared_pointer, mem.host_pointer, size);
|
||||
|
||||
/* A Call to device_memory::host_free() should be preceded by
|
||||
* a call to device_memory::device_free() for host memory
|
||||
* allocated by a device to be handled properly. Two exceptions
|
||||
* are here and a call in OptiXDevice::generic_alloc(), where
|
||||
* the current host memory can be assumed to be allocated by
|
||||
* device_memory::host_alloc(), not by a device */
|
||||
|
||||
mem.host_free();
|
||||
mem.host_pointer = shared_pointer;
|
||||
}
|
||||
mem.shared_pointer = shared_pointer;
|
||||
mem.shared_counter++;
|
||||
cmem->use_mapped_host = true;
|
||||
}
|
||||
else {
|
||||
cmem->use_mapped_host = false;
|
||||
}
|
||||
|
||||
return cmem;
|
||||
}
|
||||
|
||||
void GPUDevice::generic_free(device_memory &mem)
|
||||
{
|
||||
if (mem.device_pointer) {
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
DCHECK(device_mem_map.find(&mem) != device_mem_map.end());
|
||||
const Mem &cmem = device_mem_map[&mem];
|
||||
|
||||
/* If cmem.use_mapped_host is true, reference counting is used
|
||||
* to safely free a mapped host memory. */
|
||||
|
||||
if (cmem.use_mapped_host) {
|
||||
assert(mem.shared_pointer);
|
||||
if (mem.shared_pointer) {
|
||||
assert(mem.shared_counter > 0);
|
||||
if (--mem.shared_counter == 0) {
|
||||
if (mem.host_pointer == mem.shared_pointer) {
|
||||
mem.host_pointer = 0;
|
||||
}
|
||||
free_host(mem.shared_pointer);
|
||||
mem.shared_pointer = 0;
|
||||
}
|
||||
}
|
||||
map_host_used -= mem.device_size;
|
||||
}
|
||||
else {
|
||||
/* Free device memory. */
|
||||
free_device((void *)mem.device_pointer);
|
||||
device_mem_in_use -= mem.device_size;
|
||||
}
|
||||
|
||||
stats.mem_free(mem.device_size);
|
||||
mem.device_pointer = 0;
|
||||
mem.device_size = 0;
|
||||
|
||||
device_mem_map.erase(device_mem_map.find(&mem));
|
||||
}
|
||||
}
|
||||
|
||||
void GPUDevice::generic_copy_to(device_memory &mem)
|
||||
{
|
||||
if (!mem.host_pointer || !mem.device_pointer) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* If use_mapped_host of mem is false, the current device only uses device memory allocated by
|
||||
* backend device allocation regardless of mem.host_pointer and mem.shared_pointer, and should
|
||||
* copy data from mem.host_pointer. */
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
if (!device_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
|
||||
copy_host_to_device((void *)mem.device_pointer, mem.host_pointer, mem.memory_size());
|
||||
}
|
||||
}
|
||||
|
||||
/* DeviceInfo */
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -309,6 +309,93 @@ class Device {
|
||||
static uint devices_initialized_mask;
|
||||
};
|
||||
|
||||
/* Device, which is GPU, with some common functionality for GPU backends */
|
||||
class GPUDevice : public Device {
|
||||
protected:
|
||||
GPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_)
|
||||
: Device(info_, stats_, profiler_),
|
||||
texture_info(this, "texture_info", MEM_GLOBAL),
|
||||
need_texture_info(false),
|
||||
can_map_host(false),
|
||||
map_host_used(0),
|
||||
map_host_limit(0),
|
||||
device_texture_headroom(0),
|
||||
device_working_headroom(0),
|
||||
device_mem_map(),
|
||||
device_mem_map_mutex(),
|
||||
move_texture_to_host(false),
|
||||
device_mem_in_use(0)
|
||||
{
|
||||
}
|
||||
|
||||
public:
|
||||
virtual ~GPUDevice() noexcept(false);
|
||||
|
||||
/* For GPUs that can use bindless textures in some way or another. */
|
||||
device_vector<TextureInfo> texture_info;
|
||||
bool need_texture_info;
|
||||
/* Returns true if the texture info was copied to the device (meaning, some more
|
||||
* re-initialization might be needed). */
|
||||
virtual bool load_texture_info();
|
||||
|
||||
protected:
|
||||
/* Memory allocation, only accessed through device_memory. */
|
||||
friend class device_memory;
|
||||
|
||||
bool can_map_host;
|
||||
size_t map_host_used;
|
||||
size_t map_host_limit;
|
||||
size_t device_texture_headroom;
|
||||
size_t device_working_headroom;
|
||||
typedef unsigned long long texMemObject;
|
||||
typedef unsigned long long arrayMemObject;
|
||||
struct Mem {
|
||||
Mem() : texobject(0), array(0), use_mapped_host(false)
|
||||
{
|
||||
}
|
||||
|
||||
texMemObject texobject;
|
||||
arrayMemObject array;
|
||||
|
||||
/* If true, a mapped host memory in shared_pointer is being used. */
|
||||
bool use_mapped_host;
|
||||
};
|
||||
typedef map<device_memory *, Mem> MemMap;
|
||||
MemMap device_mem_map;
|
||||
thread_mutex device_mem_map_mutex;
|
||||
bool move_texture_to_host;
|
||||
/* Simple counter which will try to track amount of used device memory */
|
||||
size_t device_mem_in_use;
|
||||
|
||||
virtual void init_host_memory(size_t preferred_texture_headroom = 0,
|
||||
size_t preferred_working_headroom = 0);
|
||||
virtual void move_textures_to_host(size_t size, bool for_texture);
|
||||
|
||||
/* Allocation, deallocation and copy functions, with corresponding
|
||||
* support of device/host allocations. */
|
||||
virtual GPUDevice::Mem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
|
||||
virtual void generic_free(device_memory &mem);
|
||||
virtual void generic_copy_to(device_memory &mem);
|
||||
|
||||
/* total - amount of device memory, free - amount of available device memory */
|
||||
virtual void get_device_memory_info(size_t &total, size_t &free) = 0;
|
||||
|
||||
virtual bool alloc_device(void *&device_pointer, size_t size) = 0;
|
||||
|
||||
virtual void free_device(void *device_pointer) = 0;
|
||||
|
||||
virtual bool alloc_host(void *&shared_pointer, size_t size) = 0;
|
||||
|
||||
virtual void free_host(void *shared_pointer) = 0;
|
||||
|
||||
/* This function should return device pointer corresponding to shared pointer, which
|
||||
* is host buffer, allocated in `alloc_host`. The function should `true`, if such
|
||||
* address transformation is possible and `false` otherwise. */
|
||||
virtual bool transform_host_pointer(void *&device_pointer, void *&shared_pointer) = 0;
|
||||
|
||||
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size) = 0;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* __DEVICE_H__ */
|
||||
|
@@ -53,8 +53,12 @@ void HIPDevice::set_error(const string &error)
|
||||
}
|
||||
|
||||
HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
: Device(info, stats, profiler), texture_info(this, "texture_info", MEM_GLOBAL)
|
||||
: GPUDevice(info, stats, profiler)
|
||||
{
|
||||
/* Verify that base class types can be used with specific backend types */
|
||||
static_assert(sizeof(texMemObject) == sizeof(hipTextureObject_t));
|
||||
static_assert(sizeof(arrayMemObject) == sizeof(hArray));
|
||||
|
||||
first_error = true;
|
||||
|
||||
hipDevId = info.num;
|
||||
@@ -65,12 +69,6 @@ HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
|
||||
need_texture_info = false;
|
||||
|
||||
device_texture_headroom = 0;
|
||||
device_working_headroom = 0;
|
||||
move_texture_to_host = false;
|
||||
map_host_limit = 0;
|
||||
map_host_used = 0;
|
||||
can_map_host = 0;
|
||||
pitch_alignment = 0;
|
||||
|
||||
/* Initialize HIP. */
|
||||
@@ -91,7 +89,9 @@ HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
/* hipDeviceMapHost for mapping host memory when out of device memory.
|
||||
* hipDeviceLmemResizeToMax for reserving local memory ahead of render,
|
||||
* so we can predict which memory to map to host. */
|
||||
hip_assert(hipDeviceGetAttribute(&can_map_host, hipDeviceAttributeCanMapHostMemory, hipDevice));
|
||||
int value;
|
||||
hip_assert(hipDeviceGetAttribute(&value, hipDeviceAttributeCanMapHostMemory, hipDevice));
|
||||
can_map_host = value != 0;
|
||||
|
||||
hip_assert(
|
||||
hipDeviceGetAttribute(&pitch_alignment, hipDeviceAttributeTexturePitchAlignment, hipDevice));
|
||||
@@ -460,305 +460,58 @@ void HIPDevice::reserve_local_memory(const uint kernel_features)
|
||||
# endif
|
||||
}
|
||||
|
||||
void HIPDevice::init_host_memory()
|
||||
{
|
||||
/* Limit amount of host mapped memory, because allocating too much can
|
||||
* cause system instability. Leave at least half or 4 GB of system
|
||||
* memory free, whichever is smaller. */
|
||||
size_t default_limit = 4 * 1024 * 1024 * 1024LL;
|
||||
size_t system_ram = system_physical_ram();
|
||||
|
||||
if (system_ram > 0) {
|
||||
if (system_ram / 2 > default_limit) {
|
||||
map_host_limit = system_ram - default_limit;
|
||||
}
|
||||
else {
|
||||
map_host_limit = system_ram / 2;
|
||||
}
|
||||
}
|
||||
else {
|
||||
VLOG_WARNING << "Mapped host memory disabled, failed to get system RAM";
|
||||
map_host_limit = 0;
|
||||
}
|
||||
|
||||
/* Amount of device memory to keep is free after texture memory
|
||||
* and working memory allocations respectively. We set the working
|
||||
* memory limit headroom lower so that some space is left after all
|
||||
* texture memory allocations. */
|
||||
device_working_headroom = 32 * 1024 * 1024LL; // 32MB
|
||||
device_texture_headroom = 128 * 1024 * 1024LL; // 128MB
|
||||
|
||||
VLOG_INFO << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
|
||||
<< " bytes. (" << string_human_readable_size(map_host_limit) << ")";
|
||||
}
|
||||
|
||||
void HIPDevice::load_texture_info()
|
||||
{
|
||||
if (need_texture_info) {
|
||||
/* Unset flag before copying, so this does not loop indefinitely if the copy below calls
|
||||
* into 'move_textures_to_host' (which calls 'load_texture_info' again). */
|
||||
need_texture_info = false;
|
||||
texture_info.copy_to_device();
|
||||
}
|
||||
}
|
||||
|
||||
void HIPDevice::move_textures_to_host(size_t size, bool for_texture)
|
||||
{
|
||||
/* Break out of recursive call, which can happen when moving memory on a multi device. */
|
||||
static bool any_device_moving_textures_to_host = false;
|
||||
if (any_device_moving_textures_to_host) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Signal to reallocate textures in host memory only. */
|
||||
move_texture_to_host = true;
|
||||
|
||||
while (size > 0) {
|
||||
/* Find suitable memory allocation to move. */
|
||||
device_memory *max_mem = NULL;
|
||||
size_t max_size = 0;
|
||||
bool max_is_image = false;
|
||||
|
||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
||||
foreach (HIPMemMap::value_type &pair, hip_mem_map) {
|
||||
device_memory &mem = *pair.first;
|
||||
HIPMem *cmem = &pair.second;
|
||||
|
||||
/* Can only move textures allocated on this device (and not those from peer devices).
|
||||
* And need to ignore memory that is already on the host. */
|
||||
if (!mem.is_resident(this) || cmem->use_mapped_host) {
|
||||
continue;
|
||||
}
|
||||
|
||||
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) &&
|
||||
(&mem != &texture_info);
|
||||
bool is_image = is_texture && (mem.data_height > 1);
|
||||
|
||||
/* Can't move this type of memory. */
|
||||
if (!is_texture || cmem->array) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* For other textures, only move image textures. */
|
||||
if (for_texture && !is_image) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Try to move largest allocation, prefer moving images. */
|
||||
if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
|
||||
max_is_image = is_image;
|
||||
max_size = mem.device_size;
|
||||
max_mem = &mem;
|
||||
}
|
||||
}
|
||||
lock.unlock();
|
||||
|
||||
/* Move to host memory. This part is mutex protected since
|
||||
* multiple HIP devices could be moving the memory. The
|
||||
* first one will do it, and the rest will adopt the pointer. */
|
||||
if (max_mem) {
|
||||
VLOG_WORK << "Move memory from device to host: " << max_mem->name;
|
||||
|
||||
static thread_mutex move_mutex;
|
||||
thread_scoped_lock lock(move_mutex);
|
||||
|
||||
any_device_moving_textures_to_host = true;
|
||||
|
||||
/* Potentially need to call back into multi device, so pointer mapping
|
||||
* and peer devices are updated. This is also necessary since the device
|
||||
* pointer may just be a key here, so cannot be accessed and freed directly.
|
||||
* Unfortunately it does mean that memory is reallocated on all other
|
||||
* devices as well, which is potentially dangerous when still in use (since
|
||||
* a thread rendering on another devices would only be caught in this mutex
|
||||
* if it so happens to do an allocation at the same time as well. */
|
||||
max_mem->device_copy_to();
|
||||
size = (max_size >= size) ? 0 : size - max_size;
|
||||
|
||||
any_device_moving_textures_to_host = false;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Unset flag before texture info is reloaded, since it should stay in device memory. */
|
||||
move_texture_to_host = false;
|
||||
|
||||
/* Update texture info array with new pointers. */
|
||||
load_texture_info();
|
||||
}
|
||||
|
||||
HIPDevice::HIPMem *HIPDevice::generic_alloc(device_memory &mem, size_t pitch_padding)
|
||||
void HIPDevice::get_device_memory_info(size_t &total, size_t &free)
|
||||
{
|
||||
HIPContextScope scope(this);
|
||||
|
||||
hipDeviceptr_t device_pointer = 0;
|
||||
size_t size = mem.memory_size() + pitch_padding;
|
||||
|
||||
hipError_t mem_alloc_result = hipErrorOutOfMemory;
|
||||
const char *status = "";
|
||||
|
||||
/* First try allocating in device memory, respecting headroom. We make
|
||||
* an exception for texture info. It is small and frequently accessed,
|
||||
* so treat it as working memory.
|
||||
*
|
||||
* If there is not enough room for working memory, we will try to move
|
||||
* textures to host memory, assuming the performance impact would have
|
||||
* been worse for working memory. */
|
||||
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && (&mem != &texture_info);
|
||||
bool is_image = is_texture && (mem.data_height > 1);
|
||||
|
||||
size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
|
||||
|
||||
size_t total = 0, free = 0;
|
||||
hipMemGetInfo(&free, &total);
|
||||
|
||||
/* Move textures to host memory if needed. */
|
||||
if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
|
||||
move_textures_to_host(size + headroom - free, is_texture);
|
||||
hipMemGetInfo(&free, &total);
|
||||
}
|
||||
|
||||
/* Allocate in device memory. */
|
||||
if (!move_texture_to_host && (size + headroom) < free) {
|
||||
mem_alloc_result = hipMalloc(&device_pointer, size);
|
||||
if (mem_alloc_result == hipSuccess) {
|
||||
status = " in device memory";
|
||||
}
|
||||
}
|
||||
|
||||
/* Fall back to mapped host memory if needed and possible. */
|
||||
|
||||
void *shared_pointer = 0;
|
||||
|
||||
if (mem_alloc_result != hipSuccess && can_map_host) {
|
||||
if (mem.shared_pointer) {
|
||||
/* Another device already allocated host memory. */
|
||||
mem_alloc_result = hipSuccess;
|
||||
shared_pointer = mem.shared_pointer;
|
||||
}
|
||||
else if (map_host_used + size < map_host_limit) {
|
||||
/* Allocate host memory ourselves. */
|
||||
mem_alloc_result = hipHostMalloc(
|
||||
&shared_pointer, size, hipHostMallocMapped | hipHostMallocWriteCombined);
|
||||
|
||||
assert((mem_alloc_result == hipSuccess && shared_pointer != 0) ||
|
||||
(mem_alloc_result != hipSuccess && shared_pointer == 0));
|
||||
}
|
||||
|
||||
if (mem_alloc_result == hipSuccess) {
|
||||
hip_assert(hipHostGetDevicePointer(&device_pointer, shared_pointer, 0));
|
||||
map_host_used += size;
|
||||
status = " in host memory";
|
||||
}
|
||||
}
|
||||
|
||||
if (mem_alloc_result != hipSuccess) {
|
||||
status = " failed, out of device and host memory";
|
||||
set_error("System is out of GPU and shared host memory");
|
||||
}
|
||||
|
||||
if (mem.name) {
|
||||
VLOG_WORK << "Buffer allocate: " << mem.name << ", "
|
||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||
<< string_human_readable_size(mem.memory_size()) << ")" << status;
|
||||
}
|
||||
|
||||
mem.device_pointer = (device_ptr)device_pointer;
|
||||
mem.device_size = size;
|
||||
stats.mem_alloc(size);
|
||||
|
||||
if (!mem.device_pointer) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Insert into map of allocations. */
|
||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
||||
HIPMem *cmem = &hip_mem_map[&mem];
|
||||
if (shared_pointer != 0) {
|
||||
/* Replace host pointer with our host allocation. Only works if
|
||||
* HIP memory layout is the same and has no pitch padding. Also
|
||||
* does not work if we move textures to host during a render,
|
||||
* since other devices might be using the memory. */
|
||||
|
||||
if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
|
||||
mem.host_pointer != shared_pointer) {
|
||||
memcpy(shared_pointer, mem.host_pointer, size);
|
||||
|
||||
/* A Call to device_memory::host_free() should be preceded by
|
||||
* a call to device_memory::device_free() for host memory
|
||||
* allocated by a device to be handled properly. Two exceptions
|
||||
* are here and a call in OptiXDevice::generic_alloc(), where
|
||||
* the current host memory can be assumed to be allocated by
|
||||
* device_memory::host_alloc(), not by a device */
|
||||
|
||||
mem.host_free();
|
||||
mem.host_pointer = shared_pointer;
|
||||
}
|
||||
mem.shared_pointer = shared_pointer;
|
||||
mem.shared_counter++;
|
||||
cmem->use_mapped_host = true;
|
||||
}
|
||||
else {
|
||||
cmem->use_mapped_host = false;
|
||||
}
|
||||
|
||||
return cmem;
|
||||
}
|
||||
|
||||
void HIPDevice::generic_copy_to(device_memory &mem)
|
||||
bool HIPDevice::alloc_device(void *&device_pointer, size_t size)
|
||||
{
|
||||
if (!mem.host_pointer || !mem.device_pointer) {
|
||||
return;
|
||||
}
|
||||
HIPContextScope scope(this);
|
||||
|
||||
/* If use_mapped_host of mem is false, the current device only uses device memory allocated by
|
||||
* hipMalloc regardless of mem.host_pointer and mem.shared_pointer, and should copy data from
|
||||
* mem.host_pointer. */
|
||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
||||
if (!hip_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
|
||||
const HIPContextScope scope(this);
|
||||
hip_assert(
|
||||
hipMemcpyHtoD((hipDeviceptr_t)mem.device_pointer, mem.host_pointer, mem.memory_size()));
|
||||
}
|
||||
hipError_t mem_alloc_result = hipMalloc((hipDeviceptr_t *)&device_pointer, size);
|
||||
return mem_alloc_result == hipSuccess;
|
||||
}
|
||||
|
||||
void HIPDevice::generic_free(device_memory &mem)
|
||||
void HIPDevice::free_device(void *device_pointer)
|
||||
{
|
||||
if (mem.device_pointer) {
|
||||
HIPContextScope scope(this);
|
||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
||||
DCHECK(hip_mem_map.find(&mem) != hip_mem_map.end());
|
||||
const HIPMem &cmem = hip_mem_map[&mem];
|
||||
HIPContextScope scope(this);
|
||||
|
||||
/* If cmem.use_mapped_host is true, reference counting is used
|
||||
* to safely free a mapped host memory. */
|
||||
hip_assert(hipFree((hipDeviceptr_t)device_pointer));
|
||||
}
|
||||
|
||||
if (cmem.use_mapped_host) {
|
||||
assert(mem.shared_pointer);
|
||||
if (mem.shared_pointer) {
|
||||
assert(mem.shared_counter > 0);
|
||||
if (--mem.shared_counter == 0) {
|
||||
if (mem.host_pointer == mem.shared_pointer) {
|
||||
mem.host_pointer = 0;
|
||||
}
|
||||
hipHostFree(mem.shared_pointer);
|
||||
mem.shared_pointer = 0;
|
||||
}
|
||||
}
|
||||
map_host_used -= mem.device_size;
|
||||
}
|
||||
else {
|
||||
/* Free device memory. */
|
||||
hip_assert(hipFree(mem.device_pointer));
|
||||
}
|
||||
bool HIPDevice::alloc_host(void *&shared_pointer, size_t size)
|
||||
{
|
||||
HIPContextScope scope(this);
|
||||
|
||||
stats.mem_free(mem.device_size);
|
||||
mem.device_pointer = 0;
|
||||
mem.device_size = 0;
|
||||
hipError_t mem_alloc_result = hipHostMalloc(
|
||||
&shared_pointer, size, hipHostMallocMapped | hipHostMallocWriteCombined);
|
||||
|
||||
hip_mem_map.erase(hip_mem_map.find(&mem));
|
||||
}
|
||||
return mem_alloc_result == hipSuccess;
|
||||
}
|
||||
|
||||
void HIPDevice::free_host(void *shared_pointer)
|
||||
{
|
||||
HIPContextScope scope(this);
|
||||
|
||||
hipHostFree(shared_pointer);
|
||||
}
|
||||
|
||||
bool HIPDevice::transform_host_pointer(void *&device_pointer, void *&shared_pointer)
|
||||
{
|
||||
HIPContextScope scope(this);
|
||||
|
||||
hip_assert(hipHostGetDevicePointer((hipDeviceptr_t *)&device_pointer, shared_pointer, 0));
|
||||
return true;
|
||||
}
|
||||
|
||||
void HIPDevice::copy_host_to_device(void *device_pointer, void *host_pointer, size_t size)
|
||||
{
|
||||
const HIPContextScope scope(this);
|
||||
|
||||
hip_assert(hipMemcpyHtoD((hipDeviceptr_t)device_pointer, host_pointer, size));
|
||||
}
|
||||
|
||||
void HIPDevice::mem_alloc(device_memory &mem)
|
||||
@@ -823,8 +576,8 @@ void HIPDevice::mem_zero(device_memory &mem)
|
||||
|
||||
/* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory
|
||||
* regardless of mem.host_pointer and mem.shared_pointer. */
|
||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
||||
if (!hip_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
if (!device_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
|
||||
const HIPContextScope scope(this);
|
||||
hip_assert(hipMemsetD8((hipDeviceptr_t)mem.device_pointer, 0, mem.memory_size()));
|
||||
}
|
||||
@@ -951,19 +704,19 @@ void HIPDevice::tex_alloc(device_texture &mem)
|
||||
return;
|
||||
}
|
||||
|
||||
HIPMem *cmem = NULL;
|
||||
Mem *cmem = NULL;
|
||||
hArray array_3d = NULL;
|
||||
size_t src_pitch = mem.data_width * dsize * mem.data_elements;
|
||||
size_t dst_pitch = src_pitch;
|
||||
|
||||
if (!mem.is_resident(this)) {
|
||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
||||
cmem = &hip_mem_map[&mem];
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
cmem = &device_mem_map[&mem];
|
||||
cmem->texobject = 0;
|
||||
|
||||
if (mem.data_depth > 1) {
|
||||
array_3d = (hArray)mem.device_pointer;
|
||||
cmem->array = array_3d;
|
||||
cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
|
||||
}
|
||||
else if (mem.data_height > 0) {
|
||||
dst_pitch = align_up(src_pitch, pitch_alignment);
|
||||
@@ -1007,10 +760,10 @@ void HIPDevice::tex_alloc(device_texture &mem)
|
||||
mem.device_size = size;
|
||||
stats.mem_alloc(size);
|
||||
|
||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
||||
cmem = &hip_mem_map[&mem];
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
cmem = &device_mem_map[&mem];
|
||||
cmem->texobject = 0;
|
||||
cmem->array = array_3d;
|
||||
cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
|
||||
}
|
||||
else if (mem.data_height > 0) {
|
||||
/* 2D texture, using pitch aligned linear memory. */
|
||||
@@ -1095,8 +848,8 @@ void HIPDevice::tex_alloc(device_texture &mem)
|
||||
texDesc.filterMode = filter_mode;
|
||||
texDesc.flags = HIP_TRSF_NORMALIZED_COORDINATES;
|
||||
|
||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
||||
cmem = &hip_mem_map[&mem];
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
cmem = &device_mem_map[&mem];
|
||||
|
||||
hip_assert(hipTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
|
||||
|
||||
@@ -1111,9 +864,9 @@ void HIPDevice::tex_free(device_texture &mem)
|
||||
{
|
||||
if (mem.device_pointer) {
|
||||
HIPContextScope scope(this);
|
||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
||||
DCHECK(hip_mem_map.find(&mem) != hip_mem_map.end());
|
||||
const HIPMem &cmem = hip_mem_map[&mem];
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
DCHECK(device_mem_map.find(&mem) != device_mem_map.end());
|
||||
const Mem &cmem = device_mem_map[&mem];
|
||||
|
||||
if (cmem.texobject) {
|
||||
/* Free bindless texture. */
|
||||
@@ -1122,16 +875,16 @@ void HIPDevice::tex_free(device_texture &mem)
|
||||
|
||||
if (!mem.is_resident(this)) {
|
||||
/* Do not free memory here, since it was allocated on a different device. */
|
||||
hip_mem_map.erase(hip_mem_map.find(&mem));
|
||||
device_mem_map.erase(device_mem_map.find(&mem));
|
||||
}
|
||||
else if (cmem.array) {
|
||||
/* Free array. */
|
||||
hipArrayDestroy(cmem.array);
|
||||
hipArrayDestroy(reinterpret_cast<hArray>(cmem.array));
|
||||
stats.mem_free(mem.device_size);
|
||||
mem.device_pointer = 0;
|
||||
mem.device_size = 0;
|
||||
|
||||
hip_mem_map.erase(hip_mem_map.find(&mem));
|
||||
device_mem_map.erase(device_mem_map.find(&mem));
|
||||
}
|
||||
else {
|
||||
lock.unlock();
|
||||
|
@@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
class DeviceQueue;
|
||||
|
||||
class HIPDevice : public Device {
|
||||
class HIPDevice : public GPUDevice {
|
||||
|
||||
friend class HIPContextScope;
|
||||
|
||||
@@ -26,36 +26,11 @@ class HIPDevice : public Device {
|
||||
hipDevice_t hipDevice;
|
||||
hipCtx_t hipContext;
|
||||
hipModule_t hipModule;
|
||||
size_t device_texture_headroom;
|
||||
size_t device_working_headroom;
|
||||
bool move_texture_to_host;
|
||||
size_t map_host_used;
|
||||
size_t map_host_limit;
|
||||
int can_map_host;
|
||||
int pitch_alignment;
|
||||
int hipDevId;
|
||||
int hipDevArchitecture;
|
||||
bool first_error;
|
||||
|
||||
struct HIPMem {
|
||||
HIPMem() : texobject(0), array(0), use_mapped_host(false)
|
||||
{
|
||||
}
|
||||
|
||||
hipTextureObject_t texobject;
|
||||
hArray array;
|
||||
|
||||
/* If true, a mapped host memory in shared_pointer is being used. */
|
||||
bool use_mapped_host;
|
||||
};
|
||||
typedef map<device_memory *, HIPMem> HIPMemMap;
|
||||
HIPMemMap hip_mem_map;
|
||||
thread_mutex hip_mem_map_mutex;
|
||||
|
||||
/* Bindless Textures */
|
||||
device_vector<TextureInfo> texture_info;
|
||||
bool need_texture_info;
|
||||
|
||||
HIPDeviceKernels kernels;
|
||||
|
||||
static bool have_precompiled_kernels();
|
||||
@@ -81,17 +56,13 @@ class HIPDevice : public Device {
|
||||
virtual bool load_kernels(const uint kernel_features) override;
|
||||
void reserve_local_memory(const uint kernel_features);
|
||||
|
||||
void init_host_memory();
|
||||
|
||||
void load_texture_info();
|
||||
|
||||
void move_textures_to_host(size_t size, bool for_texture);
|
||||
|
||||
HIPMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
|
||||
|
||||
void generic_copy_to(device_memory &mem);
|
||||
|
||||
void generic_free(device_memory &mem);
|
||||
virtual void get_device_memory_info(size_t &total, size_t &free) override;
|
||||
virtual bool alloc_device(void *&device_pointer, size_t size) override;
|
||||
virtual void free_device(void *device_pointer) override;
|
||||
virtual bool alloc_host(void *&shared_pointer, size_t size) override;
|
||||
virtual void free_host(void *shared_pointer) override;
|
||||
virtual bool transform_host_pointer(void *&device_pointer, void *&shared_pointer) override;
|
||||
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size) override;
|
||||
|
||||
void mem_alloc(device_memory &mem) override;
|
||||
|
||||
|
@@ -73,6 +73,10 @@ const char *device_kernel_as_string(DeviceKernel kernel)
|
||||
return "integrator_terminated_paths_array";
|
||||
case DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY:
|
||||
return "integrator_sorted_paths_array";
|
||||
case DEVICE_KERNEL_INTEGRATOR_SORT_BUCKET_PASS:
|
||||
return "integrator_sort_bucket_pass";
|
||||
case DEVICE_KERNEL_INTEGRATOR_SORT_WRITE_PASS:
|
||||
return "integrator_sort_write_pass";
|
||||
case DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY:
|
||||
return "integrator_compact_paths_array";
|
||||
case DEVICE_KERNEL_INTEGRATOR_COMPACT_STATES:
|
||||
|
@@ -247,6 +247,8 @@ class device_memory {
|
||||
bool is_resident(Device *sub_device) const;
|
||||
|
||||
protected:
|
||||
friend class Device;
|
||||
friend class GPUDevice;
|
||||
friend class CUDADevice;
|
||||
friend class OptiXDevice;
|
||||
friend class HIPDevice;
|
||||
|
@@ -21,6 +21,7 @@ class BVHMetal : public BVH {
|
||||
|
||||
API_AVAILABLE(macos(11.0))
|
||||
vector<id<MTLAccelerationStructure>> blas_array;
|
||||
vector<uint32_t> blas_lookup;
|
||||
|
||||
bool motion_blur = false;
|
||||
|
||||
|
@@ -816,6 +816,11 @@ bool BVHMetal::build_TLAS(Progress &progress,
|
||||
|
||||
uint32_t instance_index = 0;
|
||||
uint32_t motion_transform_index = 0;
|
||||
|
||||
// allocate look up buffer for wost case scenario
|
||||
uint64_t count = objects.size();
|
||||
blas_lookup.resize(count);
|
||||
|
||||
for (Object *ob : objects) {
|
||||
/* Skip non-traceable objects */
|
||||
if (!ob->is_traceable())
|
||||
@@ -843,12 +848,15 @@ bool BVHMetal::build_TLAS(Progress &progress,
|
||||
/* Set user instance ID to object index */
|
||||
int object_index = ob->get_device_index();
|
||||
uint32_t user_id = uint32_t(object_index);
|
||||
int currIndex = instance_index++;
|
||||
assert(user_id < blas_lookup.size());
|
||||
blas_lookup[user_id] = accel_struct_index;
|
||||
|
||||
/* Bake into the appropriate descriptor */
|
||||
if (motion_blur) {
|
||||
MTLAccelerationStructureMotionInstanceDescriptor *instances =
|
||||
(MTLAccelerationStructureMotionInstanceDescriptor *)[instanceBuf contents];
|
||||
MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[instance_index++];
|
||||
MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[currIndex];
|
||||
|
||||
desc.accelerationStructureIndex = accel_struct_index;
|
||||
desc.userID = user_id;
|
||||
@@ -894,7 +902,7 @@ bool BVHMetal::build_TLAS(Progress &progress,
|
||||
else {
|
||||
MTLAccelerationStructureUserIDInstanceDescriptor *instances =
|
||||
(MTLAccelerationStructureUserIDInstanceDescriptor *)[instanceBuf contents];
|
||||
MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[instance_index++];
|
||||
MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[currIndex];
|
||||
|
||||
desc.accelerationStructureIndex = accel_struct_index;
|
||||
desc.userID = user_id;
|
||||
|
@@ -55,6 +55,10 @@ void device_metal_info(vector<DeviceInfo> &devices)
|
||||
info.denoisers = DENOISER_NONE;
|
||||
info.id = id;
|
||||
|
||||
if (MetalInfo::get_device_vendor(device) == METAL_GPU_AMD) {
|
||||
info.has_light_tree = false;
|
||||
}
|
||||
|
||||
devices.push_back(info);
|
||||
device_index++;
|
||||
}
|
||||
|
@@ -74,6 +74,11 @@ class MetalDevice : public Device {
|
||||
id<MTLBuffer> texture_bindings_3d = nil;
|
||||
std::vector<id<MTLTexture>> texture_slot_map;
|
||||
|
||||
/* BLAS encoding & lookup */
|
||||
id<MTLArgumentEncoder> mtlBlasArgEncoder = nil;
|
||||
id<MTLBuffer> blas_buffer = nil;
|
||||
id<MTLBuffer> blas_lookup_buffer = nil;
|
||||
|
||||
bool use_metalrt = false;
|
||||
MetalPipelineType kernel_specialization_level = PSO_GENERIC;
|
||||
|
||||
@@ -105,6 +110,8 @@ class MetalDevice : public Device {
|
||||
|
||||
bool use_adaptive_compilation();
|
||||
|
||||
bool use_local_atomic_sort() const;
|
||||
|
||||
bool make_source_and_check_if_compile_needed(MetalPipelineType pso_type);
|
||||
|
||||
void make_source(MetalPipelineType pso_type, const uint kernel_features);
|
||||
|
@@ -192,6 +192,10 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
|
||||
arg_desc_as.dataType = MTLDataTypeInstanceAccelerationStructure;
|
||||
arg_desc_as.access = MTLArgumentAccessReadOnly;
|
||||
|
||||
MTLArgumentDescriptor *arg_desc_ptrs = [[MTLArgumentDescriptor alloc] init];
|
||||
arg_desc_ptrs.dataType = MTLDataTypePointer;
|
||||
arg_desc_ptrs.access = MTLArgumentAccessReadOnly;
|
||||
|
||||
MTLArgumentDescriptor *arg_desc_ift = [[MTLArgumentDescriptor alloc] init];
|
||||
arg_desc_ift.dataType = MTLDataTypeIntersectionFunctionTable;
|
||||
arg_desc_ift.access = MTLArgumentAccessReadOnly;
|
||||
@@ -204,14 +208,28 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
|
||||
[ancillary_desc addObject:[arg_desc_ift copy]]; /* ift_shadow */
|
||||
arg_desc_ift.index = index++;
|
||||
[ancillary_desc addObject:[arg_desc_ift copy]]; /* ift_local */
|
||||
arg_desc_ift.index = index++;
|
||||
[ancillary_desc addObject:[arg_desc_ift copy]]; /* ift_local_prim */
|
||||
arg_desc_ptrs.index = index++;
|
||||
[ancillary_desc addObject:[arg_desc_ptrs copy]]; /* blas array */
|
||||
arg_desc_ptrs.index = index++;
|
||||
[ancillary_desc addObject:[arg_desc_ptrs copy]]; /* look up table for blas */
|
||||
|
||||
[arg_desc_ift release];
|
||||
[arg_desc_as release];
|
||||
[arg_desc_ptrs release];
|
||||
}
|
||||
}
|
||||
|
||||
mtlAncillaryArgEncoder = [mtlDevice newArgumentEncoderWithArguments:ancillary_desc];
|
||||
|
||||
// preparing the blas arg encoder
|
||||
MTLArgumentDescriptor *arg_desc_blas = [[MTLArgumentDescriptor alloc] init];
|
||||
arg_desc_blas.dataType = MTLDataTypeInstanceAccelerationStructure;
|
||||
arg_desc_blas.access = MTLArgumentAccessReadOnly;
|
||||
mtlBlasArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_blas ]];
|
||||
[arg_desc_blas release];
|
||||
|
||||
for (int i = 0; i < ancillary_desc.count; i++) {
|
||||
[ancillary_desc[i] release];
|
||||
}
|
||||
@@ -271,6 +289,11 @@ bool MetalDevice::use_adaptive_compilation()
|
||||
return DebugFlags().metal.adaptive_compile;
|
||||
}
|
||||
|
||||
bool MetalDevice::use_local_atomic_sort() const
|
||||
{
|
||||
return DebugFlags().metal.use_local_atomic_sort;
|
||||
}
|
||||
|
||||
void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_features)
|
||||
{
|
||||
string global_defines;
|
||||
@@ -278,6 +301,10 @@ void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_feat
|
||||
global_defines += "#define __KERNEL_FEATURES__ " + to_string(kernel_features) + "\n";
|
||||
}
|
||||
|
||||
if (use_local_atomic_sort()) {
|
||||
global_defines += "#define __KERNEL_LOCAL_ATOMIC_SORT__\n";
|
||||
}
|
||||
|
||||
if (use_metalrt) {
|
||||
global_defines += "#define __METALRT__\n";
|
||||
if (motion_blur) {
|
||||
@@ -1231,6 +1258,33 @@ void MetalDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
if (@available(macos 11.0, *)) {
|
||||
if (bvh->params.top_level) {
|
||||
bvhMetalRT = bvh_metal;
|
||||
|
||||
// allocate required buffers for BLAS array
|
||||
uint64_t count = bvhMetalRT->blas_array.size();
|
||||
uint64_t bufferSize = mtlBlasArgEncoder.encodedLength * count;
|
||||
blas_buffer = [mtlDevice newBufferWithLength:bufferSize options:default_storage_mode];
|
||||
stats.mem_alloc(blas_buffer.allocatedSize);
|
||||
|
||||
for (uint64_t i = 0; i < count; ++i) {
|
||||
[mtlBlasArgEncoder setArgumentBuffer:blas_buffer
|
||||
offset:i * mtlBlasArgEncoder.encodedLength];
|
||||
[mtlBlasArgEncoder setAccelerationStructure:bvhMetalRT->blas_array[i] atIndex:0];
|
||||
}
|
||||
|
||||
count = bvhMetalRT->blas_lookup.size();
|
||||
bufferSize = sizeof(uint32_t) * count;
|
||||
blas_lookup_buffer = [mtlDevice newBufferWithLength:bufferSize
|
||||
options:default_storage_mode];
|
||||
stats.mem_alloc(blas_lookup_buffer.allocatedSize);
|
||||
|
||||
memcpy([blas_lookup_buffer contents],
|
||||
bvhMetalRT -> blas_lookup.data(),
|
||||
blas_lookup_buffer.allocatedSize);
|
||||
|
||||
if (default_storage_mode == MTLResourceStorageModeManaged) {
|
||||
[blas_buffer didModifyRange:NSMakeRange(0, blas_buffer.length)];
|
||||
[blas_lookup_buffer didModifyRange:NSMakeRange(0, blas_lookup_buffer.length)];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -19,6 +19,8 @@ enum {
|
||||
METALRT_FUNC_SHADOW_BOX,
|
||||
METALRT_FUNC_LOCAL_TRI,
|
||||
METALRT_FUNC_LOCAL_BOX,
|
||||
METALRT_FUNC_LOCAL_TRI_PRIM,
|
||||
METALRT_FUNC_LOCAL_BOX_PRIM,
|
||||
METALRT_FUNC_CURVE_RIBBON,
|
||||
METALRT_FUNC_CURVE_RIBBON_SHADOW,
|
||||
METALRT_FUNC_CURVE_ALL,
|
||||
@@ -28,7 +30,13 @@ enum {
|
||||
METALRT_FUNC_NUM
|
||||
};
|
||||
|
||||
enum { METALRT_TABLE_DEFAULT, METALRT_TABLE_SHADOW, METALRT_TABLE_LOCAL, METALRT_TABLE_NUM };
|
||||
enum {
|
||||
METALRT_TABLE_DEFAULT,
|
||||
METALRT_TABLE_SHADOW,
|
||||
METALRT_TABLE_LOCAL,
|
||||
METALRT_TABLE_LOCAL_PRIM,
|
||||
METALRT_TABLE_NUM
|
||||
};
|
||||
|
||||
/* Pipeline State Object types */
|
||||
enum MetalPipelineType {
|
||||
|
@@ -87,6 +87,9 @@ struct ShaderCache {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SORT_BUCKET_PASS] = {1024, 1024};
|
||||
occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SORT_WRITE_PASS] = {1024, 1024};
|
||||
}
|
||||
~ShaderCache();
|
||||
|
||||
@@ -521,6 +524,8 @@ void MetalKernelPipeline::compile()
|
||||
"__anyhit__cycles_metalrt_shadow_all_hit_box",
|
||||
"__anyhit__cycles_metalrt_local_hit_tri",
|
||||
"__anyhit__cycles_metalrt_local_hit_box",
|
||||
"__anyhit__cycles_metalrt_local_hit_tri_prim",
|
||||
"__anyhit__cycles_metalrt_local_hit_box_prim",
|
||||
"__intersection__curve_ribbon",
|
||||
"__intersection__curve_ribbon_shadow",
|
||||
"__intersection__curve_all",
|
||||
@@ -611,11 +616,17 @@ void MetalKernelPipeline::compile()
|
||||
rt_intersection_function[METALRT_FUNC_LOCAL_BOX],
|
||||
rt_intersection_function[METALRT_FUNC_LOCAL_BOX],
|
||||
nil];
|
||||
table_functions[METALRT_TABLE_LOCAL_PRIM] = [NSArray
|
||||
arrayWithObjects:rt_intersection_function[METALRT_FUNC_LOCAL_TRI_PRIM],
|
||||
rt_intersection_function[METALRT_FUNC_LOCAL_BOX_PRIM],
|
||||
rt_intersection_function[METALRT_FUNC_LOCAL_BOX_PRIM],
|
||||
nil];
|
||||
|
||||
NSMutableSet *unique_functions = [NSMutableSet
|
||||
setWithArray:table_functions[METALRT_TABLE_DEFAULT]];
|
||||
[unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_SHADOW]];
|
||||
[unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_LOCAL]];
|
||||
[unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_LOCAL_PRIM]];
|
||||
|
||||
if (kernel_has_intersection(device_kernel)) {
|
||||
linked_functions = [[NSArray arrayWithArray:[unique_functions allObjects]]
|
||||
|
@@ -25,6 +25,7 @@ class MetalDeviceQueue : public DeviceQueue {
|
||||
virtual int num_concurrent_states(const size_t) const override;
|
||||
virtual int num_concurrent_busy_states(const size_t) const override;
|
||||
virtual int num_sort_partition_elements() const override;
|
||||
virtual bool supports_local_atomic_sort() const override;
|
||||
|
||||
virtual void init_execution() override;
|
||||
|
||||
|
@@ -315,6 +315,11 @@ int MetalDeviceQueue::num_sort_partition_elements() const
|
||||
return MetalInfo::optimal_sort_partition_elements(metal_device_->mtlDevice);
|
||||
}
|
||||
|
||||
bool MetalDeviceQueue::supports_local_atomic_sort() const
|
||||
{
|
||||
return metal_device_->use_local_atomic_sort();
|
||||
}
|
||||
|
||||
void MetalDeviceQueue::init_execution()
|
||||
{
|
||||
/* Synchronize all textures and memory copies before executing task. */
|
||||
@@ -477,6 +482,12 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
if (metal_device_->bvhMetalRT) {
|
||||
id<MTLAccelerationStructure> accel_struct = metal_device_->bvhMetalRT->accel_struct;
|
||||
[metal_device_->mtlAncillaryArgEncoder setAccelerationStructure:accel_struct atIndex:2];
|
||||
[metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->blas_buffer
|
||||
offset:0
|
||||
atIndex:7];
|
||||
[metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->blas_lookup_buffer
|
||||
offset:0
|
||||
atIndex:8];
|
||||
}
|
||||
|
||||
for (int table = 0; table < METALRT_TABLE_NUM; table++) {
|
||||
@@ -527,6 +538,10 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
if (bvhMetalRT) {
|
||||
/* Mark all Accelerations resources as used */
|
||||
[mtlComputeCommandEncoder useResource:bvhMetalRT->accel_struct usage:MTLResourceUsageRead];
|
||||
[mtlComputeCommandEncoder useResource:metal_device_->blas_buffer
|
||||
usage:MTLResourceUsageRead];
|
||||
[mtlComputeCommandEncoder useResource:metal_device_->blas_lookup_buffer
|
||||
usage:MTLResourceUsageRead];
|
||||
[mtlComputeCommandEncoder useResources:bvhMetalRT->blas_array.data()
|
||||
count:bvhMetalRT->blas_array.size()
|
||||
usage:MTLResourceUsageRead];
|
||||
@@ -553,13 +568,24 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
/* See parallel_active_index.h for why this amount of shared memory is needed.
|
||||
* Rounded up to 16 bytes for Metal */
|
||||
shared_mem_bytes = (int)round_up((num_threads_per_block + 1) * sizeof(int), 16);
|
||||
[mtlComputeCommandEncoder setThreadgroupMemoryLength:shared_mem_bytes atIndex:0];
|
||||
break;
|
||||
|
||||
case DEVICE_KERNEL_INTEGRATOR_SORT_BUCKET_PASS:
|
||||
case DEVICE_KERNEL_INTEGRATOR_SORT_WRITE_PASS: {
|
||||
int key_count = metal_device_->launch_params.data.max_shaders;
|
||||
shared_mem_bytes = (int)round_up(key_count * sizeof(int), 16);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (shared_mem_bytes) {
|
||||
assert(shared_mem_bytes <= 32 * 1024);
|
||||
[mtlComputeCommandEncoder setThreadgroupMemoryLength:shared_mem_bytes atIndex:0];
|
||||
}
|
||||
|
||||
MTLSize size_threadgroups_per_dispatch = MTLSizeMake(
|
||||
divide_up(work_size, num_threads_per_block), 1, 1);
|
||||
MTLSize size_threads_per_threadgroup = MTLSizeMake(num_threads_per_block, 1, 1);
|
||||
|
@@ -64,6 +64,12 @@ MetalGPUVendor MetalInfo::get_device_vendor(id<MTLDevice> device)
|
||||
return METAL_GPU_INTEL;
|
||||
}
|
||||
else if (strstr(device_name, "AMD")) {
|
||||
/* Setting this env var hides AMD devices thus exposing any integrated Intel devices. */
|
||||
if (auto str = getenv("CYCLES_METAL_FORCE_INTEL")) {
|
||||
if (atoi(str)) {
|
||||
return METAL_GPU_UNKNOWN;
|
||||
}
|
||||
}
|
||||
return METAL_GPU_AMD;
|
||||
}
|
||||
else if (strstr(device_name, "Apple")) {
|
||||
@@ -96,6 +102,15 @@ vector<id<MTLDevice>> const &MetalInfo::get_usable_devices()
|
||||
return usable_devices;
|
||||
}
|
||||
|
||||
/* If the system has both an AMD GPU (discrete) and an Intel one (integrated), prefer the AMD
|
||||
* one. This can be overridden with CYCLES_METAL_FORCE_INTEL. */
|
||||
bool has_usable_amd_gpu = false;
|
||||
if (@available(macos 12.3, *)) {
|
||||
for (id<MTLDevice> device in MTLCopyAllDevices()) {
|
||||
has_usable_amd_gpu |= (get_device_vendor(device) == METAL_GPU_AMD);
|
||||
}
|
||||
}
|
||||
|
||||
metal_printf("Usable Metal devices:\n");
|
||||
for (id<MTLDevice> device in MTLCopyAllDevices()) {
|
||||
string device_name = get_device_name(device);
|
||||
@@ -111,8 +126,10 @@ vector<id<MTLDevice>> const &MetalInfo::get_usable_devices()
|
||||
}
|
||||
|
||||
# if defined(MAC_OS_VERSION_13_0)
|
||||
if (@available(macos 13.0, *)) {
|
||||
usable |= (vendor == METAL_GPU_INTEL);
|
||||
if (!has_usable_amd_gpu) {
|
||||
if (@available(macos 13.0, *)) {
|
||||
usable |= (vendor == METAL_GPU_INTEL);
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
|
@@ -854,12 +854,14 @@ bool OptiXDevice::load_osl_kernels()
|
||||
context, group_descs, 2, &group_options, nullptr, 0, &osl_groups[i * 2]));
|
||||
}
|
||||
|
||||
OptixStackSizes stack_size[NUM_PROGRAM_GROUPS] = {};
|
||||
vector<OptixStackSizes> osl_stack_size(osl_groups.size());
|
||||
|
||||
/* Update SBT with new entries. */
|
||||
sbt_data.alloc(NUM_PROGRAM_GROUPS + osl_groups.size());
|
||||
for (int i = 0; i < NUM_PROGRAM_GROUPS; ++i) {
|
||||
optix_assert(optixSbtRecordPackHeader(groups[i], &sbt_data[i]));
|
||||
optix_assert(optixProgramGroupGetStackSize(groups[i], &stack_size[i]));
|
||||
}
|
||||
for (size_t i = 0; i < osl_groups.size(); ++i) {
|
||||
if (osl_groups[i] != NULL) {
|
||||
@@ -907,13 +909,15 @@ bool OptiXDevice::load_osl_kernels()
|
||||
0,
|
||||
&pipelines[PIP_SHADE]));
|
||||
|
||||
const unsigned int css = std::max(stack_size[PG_RGEN_SHADE_SURFACE_RAYTRACE].cssRG,
|
||||
stack_size[PG_RGEN_SHADE_SURFACE_MNEE].cssRG);
|
||||
unsigned int dss = 0;
|
||||
for (unsigned int i = 0; i < osl_stack_size.size(); ++i) {
|
||||
dss = std::max(dss, osl_stack_size[i].dssDC);
|
||||
}
|
||||
|
||||
optix_assert(optixPipelineSetStackSize(
|
||||
pipelines[PIP_SHADE], 0, dss, 0, pipeline_options.usesMotionBlur ? 3 : 2));
|
||||
pipelines[PIP_SHADE], 0, dss, css, pipeline_options.usesMotionBlur ? 3 : 2));
|
||||
}
|
||||
|
||||
return !have_error();
|
||||
|
@@ -112,6 +112,13 @@ class DeviceQueue {
|
||||
return 65536;
|
||||
}
|
||||
|
||||
/* Does device support local atomic sorting kernels (INTEGRATOR_SORT_BUCKET_PASS and
|
||||
* INTEGRATOR_SORT_WRITE_PASS)? */
|
||||
virtual bool supports_local_atomic_sort() const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Initialize execution of kernels on this queue.
|
||||
*
|
||||
* Will, for example, load all data required by the kernels from Device to global or path state.
|
||||
|
@@ -71,6 +71,8 @@ PathTraceWorkGPU::PathTraceWorkGPU(Device *device,
|
||||
device, "integrator_shader_mnee_sort_counter", MEM_READ_WRITE),
|
||||
integrator_shader_sort_prefix_sum_(
|
||||
device, "integrator_shader_sort_prefix_sum", MEM_READ_WRITE),
|
||||
integrator_shader_sort_partition_key_offsets_(
|
||||
device, "integrator_shader_sort_partition_key_offsets", MEM_READ_WRITE),
|
||||
integrator_next_main_path_index_(device, "integrator_next_main_path_index", MEM_READ_WRITE),
|
||||
integrator_next_shadow_path_index_(
|
||||
device, "integrator_next_shadow_path_index", MEM_READ_WRITE),
|
||||
@@ -207,33 +209,45 @@ void PathTraceWorkGPU::alloc_integrator_sorting()
|
||||
integrator_state_gpu_.sort_partition_divisor = (int)divide_up(max_num_paths_,
|
||||
num_sort_partitions_);
|
||||
|
||||
/* Allocate arrays for shader sorting. */
|
||||
const int sort_buckets = device_scene_->data.max_shaders * num_sort_partitions_;
|
||||
if (integrator_shader_sort_counter_.size() < sort_buckets) {
|
||||
integrator_shader_sort_counter_.alloc(sort_buckets);
|
||||
integrator_shader_sort_counter_.zero_to_device();
|
||||
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE] =
|
||||
(int *)integrator_shader_sort_counter_.device_pointer;
|
||||
|
||||
integrator_shader_sort_prefix_sum_.alloc(sort_buckets);
|
||||
integrator_shader_sort_prefix_sum_.zero_to_device();
|
||||
}
|
||||
|
||||
if (device_scene_->data.kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
|
||||
if (integrator_shader_raytrace_sort_counter_.size() < sort_buckets) {
|
||||
integrator_shader_raytrace_sort_counter_.alloc(sort_buckets);
|
||||
integrator_shader_raytrace_sort_counter_.zero_to_device();
|
||||
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE] =
|
||||
(int *)integrator_shader_raytrace_sort_counter_.device_pointer;
|
||||
if (num_sort_partitions_ > 1 && queue_->supports_local_atomic_sort()) {
|
||||
/* Allocate array for partitioned shader sorting using local atomics. */
|
||||
const int num_offsets = (device_scene_->data.max_shaders + 1) * num_sort_partitions_;
|
||||
if (integrator_shader_sort_partition_key_offsets_.size() < num_offsets) {
|
||||
integrator_shader_sort_partition_key_offsets_.alloc(num_offsets);
|
||||
integrator_shader_sort_partition_key_offsets_.zero_to_device();
|
||||
}
|
||||
integrator_state_gpu_.sort_partition_key_offsets =
|
||||
(int *)integrator_shader_sort_partition_key_offsets_.device_pointer;
|
||||
}
|
||||
else {
|
||||
/* Allocate arrays for shader sorting. */
|
||||
const int sort_buckets = device_scene_->data.max_shaders * num_sort_partitions_;
|
||||
if (integrator_shader_sort_counter_.size() < sort_buckets) {
|
||||
integrator_shader_sort_counter_.alloc(sort_buckets);
|
||||
integrator_shader_sort_counter_.zero_to_device();
|
||||
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE] =
|
||||
(int *)integrator_shader_sort_counter_.device_pointer;
|
||||
|
||||
if (device_scene_->data.kernel_features & KERNEL_FEATURE_MNEE) {
|
||||
if (integrator_shader_mnee_sort_counter_.size() < sort_buckets) {
|
||||
integrator_shader_mnee_sort_counter_.alloc(sort_buckets);
|
||||
integrator_shader_mnee_sort_counter_.zero_to_device();
|
||||
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE] =
|
||||
(int *)integrator_shader_mnee_sort_counter_.device_pointer;
|
||||
integrator_shader_sort_prefix_sum_.alloc(sort_buckets);
|
||||
integrator_shader_sort_prefix_sum_.zero_to_device();
|
||||
}
|
||||
|
||||
if (device_scene_->data.kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
|
||||
if (integrator_shader_raytrace_sort_counter_.size() < sort_buckets) {
|
||||
integrator_shader_raytrace_sort_counter_.alloc(sort_buckets);
|
||||
integrator_shader_raytrace_sort_counter_.zero_to_device();
|
||||
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE] =
|
||||
(int *)integrator_shader_raytrace_sort_counter_.device_pointer;
|
||||
}
|
||||
}
|
||||
|
||||
if (device_scene_->data.kernel_features & KERNEL_FEATURE_MNEE) {
|
||||
if (integrator_shader_mnee_sort_counter_.size() < sort_buckets) {
|
||||
integrator_shader_mnee_sort_counter_.alloc(sort_buckets);
|
||||
integrator_shader_mnee_sort_counter_.zero_to_device();
|
||||
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE] =
|
||||
(int *)integrator_shader_mnee_sort_counter_.device_pointer;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -451,8 +465,7 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num
|
||||
work_size = num_queued;
|
||||
d_path_index = queued_paths_.device_pointer;
|
||||
|
||||
compute_sorted_queued_paths(
|
||||
DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY, kernel, num_paths_limit);
|
||||
compute_sorted_queued_paths(kernel, num_paths_limit);
|
||||
}
|
||||
else if (num_queued < work_size) {
|
||||
work_size = num_queued;
|
||||
@@ -511,11 +524,26 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num
|
||||
}
|
||||
}
|
||||
|
||||
void PathTraceWorkGPU::compute_sorted_queued_paths(DeviceKernel kernel,
|
||||
DeviceKernel queued_kernel,
|
||||
void PathTraceWorkGPU::compute_sorted_queued_paths(DeviceKernel queued_kernel,
|
||||
const int num_paths_limit)
|
||||
{
|
||||
int d_queued_kernel = queued_kernel;
|
||||
|
||||
/* Launch kernel to fill the active paths arrays. */
|
||||
if (num_sort_partitions_ > 1 && queue_->supports_local_atomic_sort()) {
|
||||
const int work_size = kernel_max_active_main_path_index(queued_kernel);
|
||||
device_ptr d_queued_paths = queued_paths_.device_pointer;
|
||||
|
||||
int partition_size = (int)integrator_state_gpu_.sort_partition_divisor;
|
||||
|
||||
DeviceKernelArguments args(
|
||||
&work_size, &partition_size, &num_paths_limit, &d_queued_paths, &d_queued_kernel);
|
||||
|
||||
queue_->enqueue(DEVICE_KERNEL_INTEGRATOR_SORT_BUCKET_PASS, 1024 * num_sort_partitions_, args);
|
||||
queue_->enqueue(DEVICE_KERNEL_INTEGRATOR_SORT_WRITE_PASS, 1024 * num_sort_partitions_, args);
|
||||
return;
|
||||
}
|
||||
|
||||
device_ptr d_counter = (device_ptr)integrator_state_gpu_.sort_key_counter[d_queued_kernel];
|
||||
device_ptr d_prefix_sum = integrator_shader_sort_prefix_sum_.device_pointer;
|
||||
assert(d_counter != 0 && d_prefix_sum != 0);
|
||||
@@ -552,7 +580,7 @@ void PathTraceWorkGPU::compute_sorted_queued_paths(DeviceKernel kernel,
|
||||
&d_prefix_sum,
|
||||
&d_queued_kernel);
|
||||
|
||||
queue_->enqueue(kernel, work_size, args);
|
||||
queue_->enqueue(DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY, work_size, args);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -70,9 +70,7 @@ class PathTraceWorkGPU : public PathTraceWork {
|
||||
void enqueue_path_iteration(DeviceKernel kernel, const int num_paths_limit = INT_MAX);
|
||||
|
||||
void compute_queued_paths(DeviceKernel kernel, DeviceKernel queued_kernel);
|
||||
void compute_sorted_queued_paths(DeviceKernel kernel,
|
||||
DeviceKernel queued_kernel,
|
||||
const int num_paths_limit);
|
||||
void compute_sorted_queued_paths(DeviceKernel queued_kernel, const int num_paths_limit);
|
||||
|
||||
void compact_main_paths(const int num_active_paths);
|
||||
void compact_shadow_paths();
|
||||
@@ -135,6 +133,7 @@ class PathTraceWorkGPU : public PathTraceWork {
|
||||
device_vector<int> integrator_shader_raytrace_sort_counter_;
|
||||
device_vector<int> integrator_shader_mnee_sort_counter_;
|
||||
device_vector<int> integrator_shader_sort_prefix_sum_;
|
||||
device_vector<int> integrator_shader_sort_partition_key_offsets_;
|
||||
/* Path split. */
|
||||
device_vector<int> integrator_next_main_path_index_;
|
||||
device_vector<int> integrator_next_shadow_path_index_;
|
||||
|
@@ -661,7 +661,8 @@ ccl_device void bsdf_blur(KernelGlobals kg, ccl_private ShaderClosure *sc, float
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline Spectrum bsdf_albedo(ccl_private const ShaderData *sd, ccl_private const ShaderClosure *sc)
|
||||
ccl_device_inline Spectrum bsdf_albedo(ccl_private const ShaderData *sd,
|
||||
ccl_private const ShaderClosure *sc)
|
||||
{
|
||||
Spectrum albedo = sc->weight;
|
||||
/* Some closures include additional components such as Fresnel terms that cause their albedo to
|
||||
|
@@ -519,14 +519,6 @@ ccl_device int bsdf_microfacet_ggx_setup(ccl_private MicrofacetBsdf *bsdf)
|
||||
return SD_BSDF | SD_BSDF_HAS_EVAL;
|
||||
}
|
||||
|
||||
/* Required to maintain OSL interface. */
|
||||
ccl_device int bsdf_microfacet_ggx_isotropic_setup(ccl_private MicrofacetBsdf *bsdf)
|
||||
{
|
||||
bsdf->alpha_y = bsdf->alpha_x;
|
||||
|
||||
return bsdf_microfacet_ggx_setup(bsdf);
|
||||
}
|
||||
|
||||
ccl_device int bsdf_microfacet_ggx_fresnel_setup(ccl_private MicrofacetBsdf *bsdf,
|
||||
ccl_private const ShaderData *sd)
|
||||
{
|
||||
@@ -613,14 +605,6 @@ ccl_device int bsdf_microfacet_beckmann_setup(ccl_private MicrofacetBsdf *bsdf)
|
||||
return SD_BSDF | SD_BSDF_HAS_EVAL;
|
||||
}
|
||||
|
||||
/* Required to maintain OSL interface. */
|
||||
ccl_device int bsdf_microfacet_beckmann_isotropic_setup(ccl_private MicrofacetBsdf *bsdf)
|
||||
{
|
||||
bsdf->alpha_y = bsdf->alpha_x;
|
||||
|
||||
return bsdf_microfacet_beckmann_setup(bsdf);
|
||||
}
|
||||
|
||||
ccl_device int bsdf_microfacet_beckmann_refraction_setup(ccl_private MicrofacetBsdf *bsdf)
|
||||
{
|
||||
bsdf->alpha_x = saturatef(bsdf->alpha_x);
|
||||
|
@@ -90,8 +90,10 @@ ccl_device float schlick_fresnel(float u)
|
||||
}
|
||||
|
||||
/* Calculate the fresnel color, which is a blend between white and the F0 color */
|
||||
ccl_device_forceinline Spectrum
|
||||
interpolate_fresnel_color(float3 L, float3 H, float ior, Spectrum F0)
|
||||
ccl_device_forceinline Spectrum interpolate_fresnel_color(float3 L,
|
||||
float3 H,
|
||||
float ior,
|
||||
Spectrum F0)
|
||||
{
|
||||
/* Compute the real Fresnel term and remap it from real_F0..1 to F0..1.
|
||||
* The reason why we use this remapping instead of directly doing the
|
||||
|
@@ -401,6 +401,72 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE)
|
||||
}
|
||||
ccl_gpu_kernel_postfix
|
||||
|
||||
ccl_gpu_kernel_threads(GPU_PARALLEL_SORT_BLOCK_SIZE)
|
||||
ccl_gpu_kernel_signature(integrator_sort_bucket_pass,
|
||||
int num_states,
|
||||
int partition_size,
|
||||
int num_states_limit,
|
||||
ccl_global int *indices,
|
||||
int kernel_index)
|
||||
{
|
||||
#if defined(__KERNEL_LOCAL_ATOMIC_SORT__)
|
||||
int max_shaders = context.launch_params_metal.data.max_shaders;
|
||||
ccl_global ushort *d_queued_kernel = (ccl_global ushort *)
|
||||
kernel_integrator_state.path.queued_kernel;
|
||||
ccl_global uint *d_shader_sort_key = (ccl_global uint *)
|
||||
kernel_integrator_state.path.shader_sort_key;
|
||||
ccl_global int *key_offsets = (ccl_global int *)
|
||||
kernel_integrator_state.sort_partition_key_offsets;
|
||||
|
||||
gpu_parallel_sort_bucket_pass(num_states,
|
||||
partition_size,
|
||||
max_shaders,
|
||||
kernel_index,
|
||||
d_queued_kernel,
|
||||
d_shader_sort_key,
|
||||
key_offsets,
|
||||
(threadgroup int *)threadgroup_array,
|
||||
metal_local_id,
|
||||
metal_local_size,
|
||||
metal_grid_id);
|
||||
#endif
|
||||
}
|
||||
ccl_gpu_kernel_postfix
|
||||
|
||||
ccl_gpu_kernel_threads(GPU_PARALLEL_SORT_BLOCK_SIZE)
|
||||
ccl_gpu_kernel_signature(integrator_sort_write_pass,
|
||||
int num_states,
|
||||
int partition_size,
|
||||
int num_states_limit,
|
||||
ccl_global int *indices,
|
||||
int kernel_index)
|
||||
{
|
||||
#if defined(__KERNEL_LOCAL_ATOMIC_SORT__)
|
||||
int max_shaders = context.launch_params_metal.data.max_shaders;
|
||||
ccl_global ushort *d_queued_kernel = (ccl_global ushort *)
|
||||
kernel_integrator_state.path.queued_kernel;
|
||||
ccl_global uint *d_shader_sort_key = (ccl_global uint *)
|
||||
kernel_integrator_state.path.shader_sort_key;
|
||||
ccl_global int *key_offsets = (ccl_global int *)
|
||||
kernel_integrator_state.sort_partition_key_offsets;
|
||||
|
||||
gpu_parallel_sort_write_pass(num_states,
|
||||
partition_size,
|
||||
max_shaders,
|
||||
kernel_index,
|
||||
num_states_limit,
|
||||
indices,
|
||||
d_queued_kernel,
|
||||
d_shader_sort_key,
|
||||
key_offsets,
|
||||
(threadgroup int *)threadgroup_array,
|
||||
metal_local_id,
|
||||
metal_local_size,
|
||||
metal_grid_id);
|
||||
#endif
|
||||
}
|
||||
ccl_gpu_kernel_postfix
|
||||
|
||||
ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
||||
ccl_gpu_kernel_signature(integrator_compact_paths_array,
|
||||
int num_states,
|
||||
|
@@ -178,7 +178,7 @@ __device__
|
||||
simd_lane_index, \
|
||||
simd_group_index, \
|
||||
num_simd_groups, \
|
||||
simdgroup_offset)
|
||||
(threadgroup int *)threadgroup_array)
|
||||
#elif defined(__KERNEL_ONEAPI__)
|
||||
|
||||
# define gpu_parallel_active_index_array(num_states, indices, num_indices, is_active_op) \
|
||||
|
@@ -19,6 +19,115 @@ CCL_NAMESPACE_BEGIN
|
||||
# define GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE 512
|
||||
#endif
|
||||
#define GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY (~0)
|
||||
#define GPU_PARALLEL_SORT_BLOCK_SIZE 1024
|
||||
|
||||
#if defined(__KERNEL_LOCAL_ATOMIC_SORT__)
|
||||
|
||||
# define atomic_store_local(p, x) \
|
||||
atomic_store_explicit((threadgroup atomic_int *)p, x, memory_order_relaxed)
|
||||
# define atomic_load_local(p) \
|
||||
atomic_load_explicit((threadgroup atomic_int *)p, memory_order_relaxed)
|
||||
|
||||
ccl_device_inline void gpu_parallel_sort_bucket_pass(const uint num_states,
|
||||
const uint partition_size,
|
||||
const uint max_shaders,
|
||||
const uint queued_kernel,
|
||||
ccl_global ushort *d_queued_kernel,
|
||||
ccl_global uint *d_shader_sort_key,
|
||||
ccl_global int *partition_key_offsets,
|
||||
ccl_gpu_shared int *buckets,
|
||||
const ushort local_id,
|
||||
const ushort local_size,
|
||||
const ushort grid_id)
|
||||
{
|
||||
/* Zero the bucket sizes. */
|
||||
if (local_id < max_shaders) {
|
||||
atomic_store_local(&buckets[local_id], 0);
|
||||
}
|
||||
|
||||
ccl_gpu_syncthreads();
|
||||
|
||||
/* Determine bucket sizes within the partitions. */
|
||||
|
||||
const uint partition_start = partition_size * uint(grid_id);
|
||||
const uint partition_end = min(num_states, partition_start + partition_size);
|
||||
|
||||
for (int state_index = partition_start + uint(local_id); state_index < partition_end;
|
||||
state_index += uint(local_size)) {
|
||||
ushort kernel_index = d_queued_kernel[state_index];
|
||||
if (kernel_index == queued_kernel) {
|
||||
uint key = d_shader_sort_key[state_index] % max_shaders;
|
||||
atomic_fetch_and_add_uint32(&buckets[key], 1);
|
||||
}
|
||||
}
|
||||
|
||||
ccl_gpu_syncthreads();
|
||||
|
||||
/* Calculate the partition's local offsets from the prefix sum of bucket sizes. */
|
||||
|
||||
if (local_id == 0) {
|
||||
int offset = 0;
|
||||
for (int i = 0; i < max_shaders; i++) {
|
||||
partition_key_offsets[i + uint(grid_id) * (max_shaders + 1)] = offset;
|
||||
offset = offset + atomic_load_local(&buckets[i]);
|
||||
}
|
||||
|
||||
/* Store the number of active states in this partition. */
|
||||
partition_key_offsets[max_shaders + uint(grid_id) * (max_shaders + 1)] = offset;
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device_inline void gpu_parallel_sort_write_pass(const uint num_states,
|
||||
const uint partition_size,
|
||||
const uint max_shaders,
|
||||
const uint queued_kernel,
|
||||
const int num_states_limit,
|
||||
ccl_global int *indices,
|
||||
ccl_global ushort *d_queued_kernel,
|
||||
ccl_global uint *d_shader_sort_key,
|
||||
ccl_global int *partition_key_offsets,
|
||||
ccl_gpu_shared int *local_offset,
|
||||
const ushort local_id,
|
||||
const ushort local_size,
|
||||
const ushort grid_id)
|
||||
{
|
||||
/* Calculate each partition's global offset from the prefix sum of the active state counts per
|
||||
* partition. */
|
||||
|
||||
if (local_id < max_shaders) {
|
||||
int partition_offset = 0;
|
||||
for (int i = 0; i < uint(grid_id); i++) {
|
||||
int partition_key_count = partition_key_offsets[max_shaders + uint(i) * (max_shaders + 1)];
|
||||
partition_offset += partition_key_count;
|
||||
}
|
||||
|
||||
ccl_global int *key_offsets = partition_key_offsets + (uint(grid_id) * (max_shaders + 1));
|
||||
atomic_store_local(&local_offset[local_id], key_offsets[local_id] + partition_offset);
|
||||
}
|
||||
|
||||
ccl_gpu_syncthreads();
|
||||
|
||||
/* Write the sorted active indices. */
|
||||
|
||||
const uint partition_start = partition_size * uint(grid_id);
|
||||
const uint partition_end = min(num_states, partition_start + partition_size);
|
||||
|
||||
ccl_global int *key_offsets = partition_key_offsets + (uint(grid_id) * max_shaders);
|
||||
|
||||
for (int state_index = partition_start + uint(local_id); state_index < partition_end;
|
||||
state_index += uint(local_size)) {
|
||||
ushort kernel_index = d_queued_kernel[state_index];
|
||||
if (kernel_index == queued_kernel) {
|
||||
uint key = d_shader_sort_key[state_index] % max_shaders;
|
||||
int index = atomic_fetch_and_add_uint32(&local_offset[key], 1);
|
||||
if (index < num_states_limit) {
|
||||
indices[index] = state_index;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* __KERNEL_LOCAL_ATOMIC_SORT__ */
|
||||
|
||||
template<typename GetKeyOp>
|
||||
__device__ void gpu_parallel_sorted_index_array(const uint state_index,
|
||||
|
@@ -172,17 +172,14 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
|
||||
kernel_assert(!"Invalid ift_local");
|
||||
return false;
|
||||
}
|
||||
# endif
|
||||
|
||||
metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
|
||||
metalrt_intersector_type metalrt_intersect;
|
||||
|
||||
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
|
||||
|
||||
bool triangle_only = !kernel_data.bvh.have_curves && !kernel_data.bvh.have_points;
|
||||
if (triangle_only) {
|
||||
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
|
||||
if (is_null_intersection_function_table(metal_ancillaries->ift_local_prim)) {
|
||||
if (local_isect) {
|
||||
local_isect->num_hits = 0;
|
||||
}
|
||||
kernel_assert(!"Invalid ift_local_prim");
|
||||
return false;
|
||||
}
|
||||
# endif
|
||||
|
||||
MetalRTIntersectionLocalPayload payload;
|
||||
payload.self = ray->self;
|
||||
@@ -195,14 +192,48 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
|
||||
}
|
||||
payload.result = false;
|
||||
|
||||
typename metalrt_intersector_type::result_type intersection;
|
||||
metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
|
||||
|
||||
# if defined(__METALRT_MOTION__)
|
||||
metalrt_intersector_type metalrt_intersect;
|
||||
typename metalrt_intersector_type::result_type intersection;
|
||||
|
||||
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
|
||||
bool triangle_only = !kernel_data.bvh.have_curves && !kernel_data.bvh.have_points;
|
||||
if (triangle_only) {
|
||||
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
|
||||
}
|
||||
|
||||
intersection = metalrt_intersect.intersect(
|
||||
r, metal_ancillaries->accel_struct, 0xFF, ray->time, metal_ancillaries->ift_local, payload);
|
||||
# else
|
||||
|
||||
metalrt_blas_intersector_type metalrt_intersect;
|
||||
typename metalrt_blas_intersector_type::result_type intersection;
|
||||
|
||||
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
|
||||
bool triangle_only = !kernel_data.bvh.have_curves && !kernel_data.bvh.have_points;
|
||||
if (triangle_only) {
|
||||
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
|
||||
}
|
||||
|
||||
// if we know we are going to get max one hit, like for random-sss-walk we can
|
||||
// optimize and accept the first hit
|
||||
if (max_hits == 1) {
|
||||
metalrt_intersect.accept_any_intersection(true);
|
||||
}
|
||||
|
||||
int blas_index = metal_ancillaries->blas_userID_to_index_lookUp[local_object];
|
||||
// transform the ray into object's local space
|
||||
Transform itfm = kernel_data_fetch(objects, local_object).itfm;
|
||||
r.origin = transform_point(&itfm, r.origin);
|
||||
r.direction = transform_direction(&itfm, r.direction);
|
||||
|
||||
intersection = metalrt_intersect.intersect(
|
||||
r, metal_ancillaries->accel_struct, 0xFF, metal_ancillaries->ift_local, payload);
|
||||
r,
|
||||
metal_ancillaries->blas_accel_structs[blas_index].blas,
|
||||
metal_ancillaries->ift_local_prim,
|
||||
payload);
|
||||
# endif
|
||||
|
||||
if (lcg_state) {
|
||||
|
@@ -105,10 +105,11 @@ struct kernel_gpu_##name \
|
||||
{ \
|
||||
PARAMS_MAKER(__VA_ARGS__)(__VA_ARGS__) \
|
||||
void run(thread MetalKernelContext& context, \
|
||||
threadgroup int *simdgroup_offset, \
|
||||
threadgroup atomic_int *threadgroup_array, \
|
||||
const uint metal_global_id, \
|
||||
const ushort metal_local_id, \
|
||||
const ushort metal_local_size, \
|
||||
const ushort metal_grid_id, \
|
||||
uint simdgroup_size, \
|
||||
uint simd_lane_index, \
|
||||
uint simd_group_index, \
|
||||
@@ -117,22 +118,24 @@ struct kernel_gpu_##name \
|
||||
kernel void cycles_metal_##name(device const kernel_gpu_##name *params_struct, \
|
||||
constant KernelParamsMetal &ccl_restrict _launch_params_metal, \
|
||||
constant MetalAncillaries *_metal_ancillaries, \
|
||||
threadgroup int *simdgroup_offset[[ threadgroup(0) ]], \
|
||||
threadgroup atomic_int *threadgroup_array[[ threadgroup(0) ]], \
|
||||
const uint metal_global_id [[thread_position_in_grid]], \
|
||||
const ushort metal_local_id [[thread_position_in_threadgroup]], \
|
||||
const ushort metal_local_size [[threads_per_threadgroup]], \
|
||||
const ushort metal_grid_id [[threadgroup_position_in_grid]], \
|
||||
uint simdgroup_size [[threads_per_simdgroup]], \
|
||||
uint simd_lane_index [[thread_index_in_simdgroup]], \
|
||||
uint simd_group_index [[simdgroup_index_in_threadgroup]], \
|
||||
uint num_simd_groups [[simdgroups_per_threadgroup]]) { \
|
||||
MetalKernelContext context(_launch_params_metal, _metal_ancillaries); \
|
||||
params_struct->run(context, simdgroup_offset, metal_global_id, metal_local_id, metal_local_size, simdgroup_size, simd_lane_index, simd_group_index, num_simd_groups); \
|
||||
params_struct->run(context, threadgroup_array, metal_global_id, metal_local_id, metal_local_size, metal_grid_id, simdgroup_size, simd_lane_index, simd_group_index, num_simd_groups); \
|
||||
} \
|
||||
void kernel_gpu_##name::run(thread MetalKernelContext& context, \
|
||||
threadgroup int *simdgroup_offset, \
|
||||
threadgroup atomic_int *threadgroup_array, \
|
||||
const uint metal_global_id, \
|
||||
const ushort metal_local_id, \
|
||||
const ushort metal_local_size, \
|
||||
const ushort metal_grid_id, \
|
||||
uint simdgroup_size, \
|
||||
uint simd_lane_index, \
|
||||
uint simd_group_index, \
|
||||
@@ -263,13 +266,25 @@ ccl_device_forceinline uchar4 make_uchar4(const uchar x,
|
||||
|
||||
# if defined(__METALRT_MOTION__)
|
||||
# define METALRT_TAGS instancing, instance_motion, primitive_motion
|
||||
# define METALRT_BLAS_TAGS , primitive_motion
|
||||
# else
|
||||
# define METALRT_TAGS instancing
|
||||
# define METALRT_BLAS_TAGS
|
||||
# endif /* __METALRT_MOTION__ */
|
||||
|
||||
typedef acceleration_structure<METALRT_TAGS> metalrt_as_type;
|
||||
typedef intersection_function_table<triangle_data, METALRT_TAGS> metalrt_ift_type;
|
||||
typedef metal::raytracing::intersector<triangle_data, METALRT_TAGS> metalrt_intersector_type;
|
||||
# if defined(__METALRT_MOTION__)
|
||||
typedef acceleration_structure<primitive_motion> metalrt_blas_as_type;
|
||||
typedef intersection_function_table<triangle_data, primitive_motion> metalrt_blas_ift_type;
|
||||
typedef metal::raytracing::intersector<triangle_data, primitive_motion>
|
||||
metalrt_blas_intersector_type;
|
||||
# else
|
||||
typedef acceleration_structure<> metalrt_blas_as_type;
|
||||
typedef intersection_function_table<triangle_data> metalrt_blas_ift_type;
|
||||
typedef metal::raytracing::intersector<triangle_data> metalrt_blas_intersector_type;
|
||||
# endif
|
||||
|
||||
#endif /* __METALRT__ */
|
||||
|
||||
@@ -282,6 +297,12 @@ struct Texture3DParamsMetal {
|
||||
texture3d<float, access::sample> tex;
|
||||
};
|
||||
|
||||
#ifdef __METALRT__
|
||||
struct MetalRTBlasWrapper {
|
||||
metalrt_blas_as_type blas;
|
||||
};
|
||||
#endif
|
||||
|
||||
struct MetalAncillaries {
|
||||
device Texture2DParamsMetal *textures_2d;
|
||||
device Texture3DParamsMetal *textures_3d;
|
||||
@@ -291,6 +312,9 @@ struct MetalAncillaries {
|
||||
metalrt_ift_type ift_default;
|
||||
metalrt_ift_type ift_shadow;
|
||||
metalrt_ift_type ift_local;
|
||||
metalrt_blas_ift_type ift_local_prim;
|
||||
constant MetalRTBlasWrapper *blas_accel_structs;
|
||||
constant int *blas_userID_to_index_lookUp;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
@@ -139,6 +139,20 @@ TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal,
|
||||
#endif
|
||||
}
|
||||
|
||||
[[intersection(triangle, triangle_data )]] TriangleIntersectionResult
|
||||
__anyhit__cycles_metalrt_local_hit_tri_prim(
|
||||
constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload [[payload]],
|
||||
uint primitive_id [[primitive_id]],
|
||||
float2 barycentrics [[barycentric_coord]],
|
||||
float ray_tmax [[distance]])
|
||||
{
|
||||
//instance_id, aka the user_id has been removed. If we take this function we optimized the
|
||||
//SSS for starting traversal from a primitive acceleration structure instead of the root of the global AS.
|
||||
//this means we will always be intersecting the correct object no need for the userid to check
|
||||
return metalrt_local_hit<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>(
|
||||
launch_params_metal, payload, payload.local_object, primitive_id, barycentrics, ray_tmax);
|
||||
}
|
||||
[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult
|
||||
__anyhit__cycles_metalrt_local_hit_tri(
|
||||
constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
@@ -163,6 +177,17 @@ __anyhit__cycles_metalrt_local_hit_box(const float ray_tmax [[max_distance]])
|
||||
return result;
|
||||
}
|
||||
|
||||
[[intersection(bounding_box, triangle_data )]] BoundingBoxIntersectionResult
|
||||
__anyhit__cycles_metalrt_local_hit_box_prim(const float ray_tmax [[max_distance]])
|
||||
{
|
||||
/* unused function */
|
||||
BoundingBoxIntersectionResult result;
|
||||
result.distance = ray_tmax;
|
||||
result.accept = false;
|
||||
result.continue_search = false;
|
||||
return result;
|
||||
}
|
||||
|
||||
template<uint intersection_type>
|
||||
bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
|
||||
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
|
||||
|
@@ -372,6 +372,16 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
|
||||
kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_sorted_paths_array);
|
||||
break;
|
||||
}
|
||||
case DEVICE_KERNEL_INTEGRATOR_SORT_BUCKET_PASS: {
|
||||
oneapi_call(
|
||||
kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_sort_bucket_pass);
|
||||
break;
|
||||
}
|
||||
case DEVICE_KERNEL_INTEGRATOR_SORT_WRITE_PASS: {
|
||||
oneapi_call(
|
||||
kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_sort_write_pass);
|
||||
break;
|
||||
}
|
||||
case DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY: {
|
||||
oneapi_call(kg,
|
||||
cgh,
|
||||
|
@@ -132,6 +132,9 @@ typedef struct IntegratorStateGPU {
|
||||
/* Index of main path which will be used by a next shadow catcher split. */
|
||||
ccl_global int *next_main_path_index;
|
||||
|
||||
/* Partition/key offsets used when writing sorted active indices. */
|
||||
ccl_global int *sort_partition_key_offsets;
|
||||
|
||||
/* Divisor used to partition active indices by locality when sorting by material. */
|
||||
uint sort_partition_divisor;
|
||||
} IntegratorStateGPU;
|
||||
|
@@ -115,6 +115,13 @@ ccl_device_forceinline void integrator_path_init_sorted(KernelGlobals kg,
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
||||
INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_;
|
||||
|
||||
# if defined(__KERNEL_LOCAL_ATOMIC_SORT__)
|
||||
if (!kernel_integrator_state.sort_key_counter[next_kernel]) {
|
||||
return;
|
||||
}
|
||||
# endif
|
||||
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1);
|
||||
}
|
||||
|
||||
@@ -130,6 +137,13 @@ ccl_device_forceinline void integrator_path_next_sorted(KernelGlobals kg,
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
||||
INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_;
|
||||
|
||||
# if defined(__KERNEL_LOCAL_ATOMIC_SORT__)
|
||||
if (!kernel_integrator_state.sort_key_counter[next_kernel]) {
|
||||
return;
|
||||
}
|
||||
# endif
|
||||
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1);
|
||||
}
|
||||
|
||||
|
@@ -209,14 +209,7 @@ ccl_device void osl_closure_microfacet_setup(KernelGlobals kg,
|
||||
if (closure->distribution == make_string("ggx", 11253504724482777663ull) ||
|
||||
closure->distribution == make_string("default", 4430693559278735917ull)) {
|
||||
if (!closure->refract) {
|
||||
if (closure->alpha_x == closure->alpha_y) {
|
||||
/* Isotropic */
|
||||
sd->flag |= bsdf_microfacet_ggx_isotropic_setup(bsdf);
|
||||
}
|
||||
else {
|
||||
/* Anisotropic */
|
||||
sd->flag |= bsdf_microfacet_ggx_setup(bsdf);
|
||||
}
|
||||
sd->flag |= bsdf_microfacet_ggx_setup(bsdf);
|
||||
}
|
||||
else {
|
||||
sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf);
|
||||
@@ -225,14 +218,7 @@ ccl_device void osl_closure_microfacet_setup(KernelGlobals kg,
|
||||
/* Beckmann */
|
||||
else {
|
||||
if (!closure->refract) {
|
||||
if (closure->alpha_x == closure->alpha_y) {
|
||||
/* Isotropic */
|
||||
sd->flag |= bsdf_microfacet_beckmann_isotropic_setup(bsdf);
|
||||
}
|
||||
else {
|
||||
/* Anisotropic */
|
||||
sd->flag |= bsdf_microfacet_beckmann_setup(bsdf);
|
||||
}
|
||||
sd->flag |= bsdf_microfacet_beckmann_setup(bsdf);
|
||||
}
|
||||
else {
|
||||
sd->flag |= bsdf_microfacet_beckmann_refraction_setup(bsdf);
|
||||
@@ -258,9 +244,9 @@ ccl_device void osl_closure_microfacet_ggx_setup(
|
||||
}
|
||||
|
||||
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
|
||||
bsdf->alpha_x = closure->alpha_x;
|
||||
bsdf->alpha_x = bsdf->alpha_y = closure->alpha_x;
|
||||
|
||||
sd->flag |= bsdf_microfacet_ggx_isotropic_setup(bsdf);
|
||||
sd->flag |= bsdf_microfacet_ggx_setup(bsdf);
|
||||
}
|
||||
|
||||
ccl_device void osl_closure_microfacet_ggx_aniso_setup(
|
||||
@@ -652,9 +638,9 @@ ccl_device void osl_closure_microfacet_beckmann_setup(
|
||||
}
|
||||
|
||||
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
|
||||
bsdf->alpha_x = closure->alpha_x;
|
||||
bsdf->alpha_x = bsdf->alpha_y = closure->alpha_x;
|
||||
|
||||
sd->flag |= bsdf_microfacet_beckmann_isotropic_setup(bsdf);
|
||||
sd->flag |= bsdf_microfacet_beckmann_setup(bsdf);
|
||||
}
|
||||
|
||||
ccl_device void osl_closure_microfacet_beckmann_aniso_setup(
|
||||
|
@@ -74,7 +74,8 @@ CCL_NAMESPACE_BEGIN
|
||||
#define __VOLUME__
|
||||
|
||||
/* TODO: solve internal compiler errors and enable light tree on HIP. */
|
||||
#ifdef __KERNEL_HIP__
|
||||
/* TODO: solve internal compiler perf issue and enable light tree on Metal/AMD. */
|
||||
#if defined(__KERNEL_HIP__) || defined(__KERNEL_METAL_AMD__)
|
||||
# undef __LIGHT_TREE__
|
||||
#endif
|
||||
|
||||
@@ -1508,6 +1509,8 @@ typedef enum DeviceKernel : int {
|
||||
DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY,
|
||||
DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY,
|
||||
DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY,
|
||||
DEVICE_KERNEL_INTEGRATOR_SORT_BUCKET_PASS,
|
||||
DEVICE_KERNEL_INTEGRATOR_SORT_WRITE_PASS,
|
||||
DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY,
|
||||
DEVICE_KERNEL_INTEGRATOR_COMPACT_STATES,
|
||||
DEVICE_KERNEL_INTEGRATOR_TERMINATED_SHADOW_PATHS_ARRAY,
|
||||
|
@@ -13,6 +13,7 @@
|
||||
#include "scene/light.h"
|
||||
#include "scene/mesh.h"
|
||||
#include "scene/object.h"
|
||||
#include "scene/osl.h"
|
||||
#include "scene/pointcloud.h"
|
||||
#include "scene/scene.h"
|
||||
#include "scene/shader.h"
|
||||
@@ -25,7 +26,6 @@
|
||||
|
||||
#ifdef WITH_OSL
|
||||
# include "kernel/osl/globals.h"
|
||||
# include "kernel/osl/services.h"
|
||||
#endif
|
||||
|
||||
#include "util/foreach.h"
|
||||
@@ -1717,20 +1717,7 @@ void GeometryManager::device_update_displacement_images(Device *device,
|
||||
/* If any OSL node is used for displacement, it may reference a texture. But it's
|
||||
* unknown which ones, so have to load them all. */
|
||||
if (has_osl_node) {
|
||||
set<OSLRenderServices *> services_shared;
|
||||
device->foreach_device([&services_shared](Device *sub_device) {
|
||||
OSLGlobals *og = (OSLGlobals *)sub_device->get_cpu_osl_memory();
|
||||
services_shared.insert(og->services);
|
||||
});
|
||||
|
||||
for (OSLRenderServices *services : services_shared) {
|
||||
for (auto it = services->textures.begin(); it != services->textures.end(); ++it) {
|
||||
if (it->second->handle.get_manager() == image_manager) {
|
||||
const int slot = it->second->handle.svm_slot();
|
||||
bump_images.insert(slot);
|
||||
}
|
||||
}
|
||||
}
|
||||
OSLShaderManager::osl_image_slots(device, image_manager, bump_images);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@@ -665,6 +665,27 @@ OSLNode *OSLShaderManager::osl_node(ShaderGraph *graph,
|
||||
return node;
|
||||
}
|
||||
|
||||
/* Static function, so only this file needs to be compile with RTTT. */
|
||||
void OSLShaderManager::osl_image_slots(Device *device,
|
||||
ImageManager *image_manager,
|
||||
set<int> &image_slots)
|
||||
{
|
||||
set<OSLRenderServices *> services_shared;
|
||||
device->foreach_device([&services_shared](Device *sub_device) {
|
||||
OSLGlobals *og = (OSLGlobals *)sub_device->get_cpu_osl_memory();
|
||||
services_shared.insert(og->services);
|
||||
});
|
||||
|
||||
for (OSLRenderServices *services : services_shared) {
|
||||
for (auto it = services->textures.begin(); it != services->textures.end(); ++it) {
|
||||
if (it->second->handle.get_manager() == image_manager) {
|
||||
const int slot = it->second->handle.svm_slot();
|
||||
image_slots.insert(slot);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Graph Compiler */
|
||||
|
||||
OSLCompiler::OSLCompiler(OSLShaderManager *manager, OSL::ShadingSystem *ss, Scene *scene)
|
||||
|
@@ -92,6 +92,9 @@ class OSLShaderManager : public ShaderManager {
|
||||
const std::string &bytecode_hash = "",
|
||||
const std::string &bytecode = "");
|
||||
|
||||
/* Get image slots used by OSL services on device. */
|
||||
static void osl_image_slots(Device *device, ImageManager *image_manager, set<int> &image_slots);
|
||||
|
||||
private:
|
||||
void texture_system_init();
|
||||
void texture_system_free();
|
||||
|
@@ -73,16 +73,55 @@ ccl_device_inline float atomic_add_and_fetch_float(volatile ccl_global float *_s
|
||||
return new_value.float_value;
|
||||
}
|
||||
|
||||
# define atomic_fetch_and_add_uint32(p, x) \
|
||||
atomic_fetch_add_explicit((device atomic_uint *)p, x, memory_order_relaxed)
|
||||
# define atomic_fetch_and_sub_uint32(p, x) \
|
||||
atomic_fetch_sub_explicit((device atomic_uint *)p, x, memory_order_relaxed)
|
||||
# define atomic_fetch_and_inc_uint32(p) \
|
||||
atomic_fetch_add_explicit((device atomic_uint *)p, 1, memory_order_relaxed)
|
||||
# define atomic_fetch_and_dec_uint32(p) \
|
||||
atomic_fetch_sub_explicit((device atomic_uint *)p, 1, memory_order_relaxed)
|
||||
# define atomic_fetch_and_or_uint32(p, x) \
|
||||
atomic_fetch_or_explicit((device atomic_uint *)p, x, memory_order_relaxed)
|
||||
template<class T> ccl_device_inline uint32_t atomic_fetch_and_add_uint32(device T *p, int x)
|
||||
{
|
||||
return atomic_fetch_add_explicit((device atomic_uint *)p, x, memory_order_relaxed);
|
||||
}
|
||||
|
||||
template<class T> ccl_device_inline uint32_t atomic_fetch_and_sub_uint32(device T *p, int x)
|
||||
{
|
||||
return atomic_fetch_sub_explicit((device atomic_uint *)p, x, memory_order_relaxed);
|
||||
}
|
||||
|
||||
template<class T> ccl_device_inline uint32_t atomic_fetch_and_inc_uint32(device T *p)
|
||||
{
|
||||
return atomic_fetch_add_explicit((device atomic_uint *)p, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
template<class T> ccl_device_inline uint32_t atomic_fetch_and_dec_uint32(device T *p)
|
||||
{
|
||||
return atomic_fetch_sub_explicit((device atomic_uint *)p, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
template<class T> ccl_device_inline uint32_t atomic_fetch_and_or_uint32(device T *p, int x)
|
||||
{
|
||||
return atomic_fetch_or_explicit((device atomic_uint *)p, x, memory_order_relaxed);
|
||||
}
|
||||
|
||||
template<class T> ccl_device_inline uint32_t atomic_fetch_and_add_uint32(threadgroup T *p, int x)
|
||||
{
|
||||
return atomic_fetch_add_explicit((threadgroup atomic_uint *)p, x, memory_order_relaxed);
|
||||
}
|
||||
|
||||
template<class T> ccl_device_inline uint32_t atomic_fetch_and_sub_uint32(threadgroup T *p, int x)
|
||||
{
|
||||
return atomic_fetch_sub_explicit((threadgroup atomic_uint *)p, x, memory_order_relaxed);
|
||||
}
|
||||
|
||||
template<class T> ccl_device_inline uint32_t atomic_fetch_and_inc_uint32(threadgroup T *p)
|
||||
{
|
||||
return atomic_fetch_add_explicit((threadgroup atomic_uint *)p, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
template<class T> ccl_device_inline uint32_t atomic_fetch_and_dec_uint32(threadgroup T *p)
|
||||
{
|
||||
return atomic_fetch_sub_explicit((threadgroup atomic_uint *)p, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
template<class T> ccl_device_inline uint32_t atomic_fetch_and_or_uint32(threadgroup T *p, int x)
|
||||
{
|
||||
return atomic_fetch_or_explicit((threadgroup atomic_uint *)p, x, memory_order_relaxed);
|
||||
}
|
||||
|
||||
ccl_device_inline float atomic_compare_and_swap_float(volatile ccl_global float *dest,
|
||||
const float old_val,
|
||||
|
@@ -69,6 +69,9 @@ void DebugFlags::Metal::reset()
|
||||
{
|
||||
if (getenv("CYCLES_METAL_ADAPTIVE_COMPILE") != NULL)
|
||||
adaptive_compile = true;
|
||||
|
||||
if (auto str = getenv("CYCLES_METAL_LOCAL_ATOMIC_SORT"))
|
||||
use_local_atomic_sort = (atoi(str) != 0);
|
||||
}
|
||||
|
||||
DebugFlags::OptiX::OptiX()
|
||||
|
@@ -97,6 +97,9 @@ class DebugFlags {
|
||||
|
||||
/* Whether adaptive feature based runtime compile is enabled or not. */
|
||||
bool adaptive_compile = false;
|
||||
|
||||
/* Whether local atomic sorting is enabled or not. */
|
||||
bool use_local_atomic_sort = true;
|
||||
};
|
||||
|
||||
/* Get instance of debug flags registry. */
|
||||
|
@@ -74,7 +74,7 @@ ccl_device float fast_sinf(float x)
|
||||
*
|
||||
* Results on: [-2pi,2pi].
|
||||
*
|
||||
* Examined 2173837240 values of sin: 0.00662760244 avg ulp diff, 2 max ulp,
|
||||
* Examined 2173837240 values of sin: 0.00662760244 avg ULP diff, 2 max ULP,
|
||||
* 1.19209e-07 max error
|
||||
*/
|
||||
int q = fast_rint(x * M_1_PI_F);
|
||||
@@ -256,11 +256,11 @@ ccl_device float fast_acosf(float x)
|
||||
/* clamp and crush denormals. */
|
||||
const float m = (f < 1.0f) ? 1.0f - (1.0f - f) : 1.0f;
|
||||
/* Based on http://www.pouet.net/topic.php?which=9132&page=2
|
||||
* 85% accurate (ulp 0)
|
||||
* 85% accurate (ULP 0)
|
||||
* Examined 2130706434 values of acos:
|
||||
* 15.2000597 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // without "denormal crush"
|
||||
* 15.2000597 avg ULP diff, 4492 max ULP, 4.51803e-05 max error // without "denormal crush"
|
||||
* Examined 2130706434 values of acos:
|
||||
* 15.2007108 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // with "denormal crush"
|
||||
* 15.2007108 avg ULP diff, 4492 max ULP, 4.51803e-05 max error // with "denormal crush"
|
||||
*/
|
||||
const float a = sqrtf(1.0f - m) *
|
||||
(1.5707963267f + m * (-0.213300989f + m * (0.077980478f + m * -0.02164095f)));
|
||||
@@ -270,9 +270,8 @@ ccl_device float fast_acosf(float x)
|
||||
ccl_device float fast_asinf(float x)
|
||||
{
|
||||
/* Based on acosf approximation above.
|
||||
* Max error is 4.51133e-05 (ulps are higher because we are consistently off
|
||||
* by a little amount).
|
||||
*/
|
||||
* Max error is 4.51133e-05 (ULPS are higher because we are consistently off
|
||||
* by a little amount). */
|
||||
const float f = fabsf(x);
|
||||
/* Clamp and crush denormals. */
|
||||
const float m = (f < 1.0f) ? 1.0f - (1.0f - f) : 1.0f;
|
||||
@@ -290,9 +289,9 @@ ccl_device float fast_atanf(float x)
|
||||
const float t = s * s;
|
||||
/* http://mathforum.org/library/drmath/view/62672.html
|
||||
* Examined 4278190080 values of atan:
|
||||
* 2.36864877 avg ulp diff, 302 max ulp, 6.55651e-06 max error // (with denormals)
|
||||
* 2.36864877 avg ULP diff, 302 max ULP, 6.55651e-06 max error // (with denormals)
|
||||
* Examined 4278190080 values of atan:
|
||||
* 171160502 avg ulp diff, 855638016 max ulp, 6.55651e-06 max error // (crush denormals)
|
||||
* 171160502 avg ULP diff, 855638016 max ULP, 6.55651e-06 max error // (crush denormals)
|
||||
*/
|
||||
float r = s * madd(0.43157974f, t, 1.0f) / madd(madd(0.05831938f, t, 0.76443945f), t, 1.0f);
|
||||
if (a > 1.0f) {
|
||||
@@ -343,8 +342,8 @@ ccl_device float fast_log2f(float x)
|
||||
int exponent = (int)(bits >> 23) - 127;
|
||||
float f = __uint_as_float((bits & 0x007FFFFF) | 0x3f800000) - 1.0f;
|
||||
/* Examined 2130706432 values of log2 on [1.17549435e-38,3.40282347e+38]:
|
||||
* 0.0797524457 avg ulp diff, 3713596 max ulp, 7.62939e-06 max error.
|
||||
* ulp histogram:
|
||||
* 0.0797524457 avg ULP diff, 3713596 max ULP, 7.62939e-06 max error.
|
||||
* ULP histogram:
|
||||
* 0 = 97.46%
|
||||
* 1 = 2.29%
|
||||
* 2 = 0.11%
|
||||
@@ -363,7 +362,7 @@ ccl_device float fast_log2f(float x)
|
||||
ccl_device_inline float fast_logf(float x)
|
||||
{
|
||||
/* Examined 2130706432 values of logf on [1.17549435e-38,3.40282347e+38]:
|
||||
* 0.313865375 avg ulp diff, 5148137 max ulp, 7.62939e-06 max error.
|
||||
* 0.313865375 avg ULP diff, 5148137 max ULP, 7.62939e-06 max error.
|
||||
*/
|
||||
return fast_log2f(x) * M_LN2_F;
|
||||
}
|
||||
@@ -371,7 +370,7 @@ ccl_device_inline float fast_logf(float x)
|
||||
ccl_device_inline float fast_log10(float x)
|
||||
{
|
||||
/* Examined 2130706432 values of log10f on [1.17549435e-38,3.40282347e+38]:
|
||||
* 0.631237033 avg ulp diff, 4471615 max ulp, 3.8147e-06 max error.
|
||||
* 0.631237033 avg ULP diff, 4471615 max ULP, 3.8147e-06 max error.
|
||||
*/
|
||||
return fast_log2f(x) * M_LN2_F / M_LN10_F;
|
||||
}
|
||||
@@ -392,12 +391,12 @@ ccl_device float fast_exp2f(float x)
|
||||
/* Range reduction. */
|
||||
int m = (int)x;
|
||||
x -= m;
|
||||
x = 1.0f - (1.0f - x); /* Crush denormals (does not affect max ulps!). */
|
||||
x = 1.0f - (1.0f - x); /* Crush denormals (does not affect max ULPS!). */
|
||||
/* 5th degree polynomial generated with sollya
|
||||
* Examined 2247622658 values of exp2 on [-126,126]: 2.75764912 avg ulp diff,
|
||||
* 232 max ulp.
|
||||
* Examined 2247622658 values of exp2 on [-126,126]: 2.75764912 avg ULP diff,
|
||||
* 232 max ULP.
|
||||
*
|
||||
* ulp histogram:
|
||||
* ULP histogram:
|
||||
* 0 = 87.81%
|
||||
* 1 = 4.18%
|
||||
*/
|
||||
@@ -415,7 +414,7 @@ ccl_device float fast_exp2f(float x)
|
||||
ccl_device_inline float fast_expf(float x)
|
||||
{
|
||||
/* Examined 2237485550 values of exp on [-87.3300018,87.3300018]:
|
||||
* 2.6666452 avg ulp diff, 230 max ulp.
|
||||
* 2.6666452 avg ULP diff, 230 max ULP.
|
||||
*/
|
||||
return fast_exp2f(x / M_LN2_F);
|
||||
}
|
||||
@@ -454,7 +453,7 @@ ccl_device_inline float4 fast_expf4(float4 x)
|
||||
ccl_device_inline float fast_exp10(float x)
|
||||
{
|
||||
/* Examined 2217701018 values of exp10 on [-37.9290009,37.9290009]:
|
||||
* 2.71732409 avg ulp diff, 232 max ulp.
|
||||
* 2.71732409 avg ULP diff, 232 max ULP.
|
||||
*/
|
||||
return fast_exp2f(x * M_LN10_F / M_LN2_F);
|
||||
}
|
||||
@@ -475,7 +474,7 @@ ccl_device float fast_sinhf(float x)
|
||||
float a = fabsf(x);
|
||||
if (a > 1.0f) {
|
||||
/* Examined 53389559 values of sinh on [1,87.3300018]:
|
||||
* 33.6886442 avg ulp diff, 178 max ulp. */
|
||||
* 33.6886442 avg ULP diff, 178 max ULP. */
|
||||
float e = fast_expf(a);
|
||||
return copysignf(0.5f * e - 0.5f / e, x);
|
||||
}
|
||||
@@ -495,7 +494,7 @@ ccl_device float fast_sinhf(float x)
|
||||
ccl_device_inline float fast_coshf(float x)
|
||||
{
|
||||
/* Examined 2237485550 values of cosh on [-87.3300018,87.3300018]:
|
||||
* 1.78256726 avg ulp diff, 178 max ulp.
|
||||
* 1.78256726 avg ULP diff, 178 max ULP.
|
||||
*/
|
||||
float e = fast_expf(fabsf(x));
|
||||
return 0.5f * e + 0.5f / e;
|
||||
@@ -506,7 +505,7 @@ ccl_device_inline float fast_tanhf(float x)
|
||||
/* Examined 4278190080 values of tanh on [-3.40282347e+38,3.40282347e+38]:
|
||||
* 3.12924e-06 max error.
|
||||
*/
|
||||
/* NOTE: ulp error is high because of sub-optimal handling around the origin. */
|
||||
/* NOTE: ULP error is high because of sub-optimal handling around the origin. */
|
||||
float e = fast_expf(2.0f * fabsf(x));
|
||||
return copysignf(1.0f - 2.0f / (1.0f + e), x);
|
||||
}
|
||||
@@ -579,7 +578,7 @@ ccl_device_inline float fast_erfcf(float x)
|
||||
{
|
||||
/* Examined 2164260866 values of erfcf on [-4,4]: 1.90735e-06 max error.
|
||||
*
|
||||
* ulp histogram:
|
||||
* ULP histogram:
|
||||
*
|
||||
* 0 = 80.30%
|
||||
*/
|
||||
|
@@ -602,7 +602,7 @@ void GHOST_SystemSDL::processEvent(SDL_Event *sdl_event)
|
||||
/* NOTE: the `sdl_sub_evt.keysym.sym` is truncated,
|
||||
* for unicode support ghost has to be modified. */
|
||||
|
||||
/* TODO(@campbellbarton): support full unicode, SDL supports this but it needs to be
|
||||
/* TODO(@ideasman42): support full unicode, SDL supports this but it needs to be
|
||||
* explicitly enabled via #SDL_StartTextInput which GHOST would have to wrap. */
|
||||
char utf8_buf[sizeof(GHOST_TEventKeyData::utf8_buf)] = {'\0'};
|
||||
if (type == GHOST_kEventKeyDown) {
|
||||
|
@@ -82,6 +82,8 @@
|
||||
#include "CLG_log.h"
|
||||
|
||||
#ifdef USE_EVENT_BACKGROUND_THREAD
|
||||
# include "GHOST_TimerTask.h"
|
||||
|
||||
# include <pthread.h>
|
||||
#endif
|
||||
|
||||
@@ -239,7 +241,7 @@ enum {
|
||||
BTN_STYLUS = 0x14b,
|
||||
/** Use as right-mouse. */
|
||||
BTN_STYLUS2 = 0x14c,
|
||||
/** NOTE(@campbellbarton): Map to an additional button (not sure which hardware uses this). */
|
||||
/** NOTE(@ideasman42): Map to an additional button (not sure which hardware uses this). */
|
||||
BTN_STYLUS3 = 0x149,
|
||||
};
|
||||
|
||||
@@ -768,7 +770,12 @@ struct GWL_Seat {
|
||||
int32_t rate = 0;
|
||||
/** Time (milliseconds) after which to start repeating keys. */
|
||||
int32_t delay = 0;
|
||||
/** Timer for key repeats. */
|
||||
/**
|
||||
* Timer for key repeats.
|
||||
*
|
||||
* \note For as long as #USE_EVENT_BACKGROUND_THREAD is defined, any access to this
|
||||
* (including null checks, must lock `timer_mutex` first.
|
||||
*/
|
||||
GHOST_ITimerTask *timer = nullptr;
|
||||
} key_repeat;
|
||||
|
||||
@@ -832,6 +839,42 @@ static bool gwl_seat_key_depressed_suppress_warning(const GWL_Seat *seat)
|
||||
return suppress_warning;
|
||||
}
|
||||
|
||||
/**
|
||||
* \note Caller must lock `timer_mutex`.
|
||||
*/
|
||||
static void gwl_seat_key_repeat_timer_add(GWL_Seat *seat,
|
||||
GHOST_TimerProcPtr key_repeat_fn,
|
||||
GHOST_TUserDataPtr payload,
|
||||
const bool use_delay)
|
||||
{
|
||||
GHOST_SystemWayland *system = seat->system;
|
||||
const uint64_t time_step = 1000 / seat->key_repeat.rate;
|
||||
const uint64_t time_start = use_delay ? seat->key_repeat.delay : time_step;
|
||||
#ifdef USE_EVENT_BACKGROUND_THREAD
|
||||
GHOST_TimerTask *timer = new GHOST_TimerTask(
|
||||
system->getMilliSeconds() + time_start, time_step, key_repeat_fn, payload);
|
||||
seat->key_repeat.timer = timer;
|
||||
system->ghost_timer_manager()->addTimer(timer);
|
||||
#else
|
||||
seat->key_repeat.timer = system->installTimer(time_start, time_step, key_repeat_fn, payload);
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* \note The caller must lock `timer_mutex`.
|
||||
*/
|
||||
static void gwl_seat_key_repeat_timer_remove(GWL_Seat *seat)
|
||||
{
|
||||
GHOST_SystemWayland *system = seat->system;
|
||||
#ifdef USE_EVENT_BACKGROUND_THREAD
|
||||
system->ghost_timer_manager()->removeTimer(
|
||||
static_cast<GHOST_TimerTask *>(seat->key_repeat.timer));
|
||||
#else
|
||||
system->removeTimer(seat->key_repeat.timer);
|
||||
#endif
|
||||
seat->key_repeat.timer = nullptr;
|
||||
}
|
||||
|
||||
/** \} */
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
@@ -873,7 +916,7 @@ struct GWL_Display {
|
||||
* The main purpose of having an active seat is an alternative from always using the first
|
||||
* seat which prevents events from any other seat.
|
||||
*
|
||||
* NOTE(@campbellbarton): This could be extended and developed further extended to support
|
||||
* NOTE(@ideasman42): This could be extended and developed further extended to support
|
||||
* an active seat per window (for e.g.), basic support is sufficient for now as currently isn't
|
||||
* a widely used feature.
|
||||
*/
|
||||
@@ -906,6 +949,16 @@ struct GWL_Display {
|
||||
/** Guard against multiple threads accessing `events_pending` at once. */
|
||||
std::mutex events_pending_mutex;
|
||||
|
||||
/**
|
||||
* A separate timer queue, needed so the WAYLAND thread can lock access.
|
||||
* Using the system's #GHOST_Sysem::getTimerManager is not thread safe because
|
||||
* access to the timer outside of WAYLAND specific logic will not lock.
|
||||
*
|
||||
* Needed because #GHOST_System::dispatchEvents fires timers
|
||||
* outside of WAYLAND (without locking the `timer_mutex`).
|
||||
*/
|
||||
GHOST_TimerManager *ghost_timer_manager = nullptr;
|
||||
|
||||
#endif /* USE_EVENT_BACKGROUND_THREAD */
|
||||
};
|
||||
|
||||
@@ -959,6 +1012,13 @@ static void gwl_display_destroy(GWL_Display *display)
|
||||
gwl_display_event_thread_destroy(display);
|
||||
display->system->server_mutex->unlock();
|
||||
}
|
||||
|
||||
/* Important to remove after the seats which may have key repeat timers active. */
|
||||
if (display->ghost_timer_manager) {
|
||||
delete display->ghost_timer_manager;
|
||||
display->ghost_timer_manager = nullptr;
|
||||
}
|
||||
|
||||
#endif /* USE_EVENT_BACKGROUND_THREAD */
|
||||
|
||||
if (display->wl_display) {
|
||||
@@ -1179,7 +1239,7 @@ static void gwl_registry_entry_remove_all(GWL_Display *display)
|
||||
{
|
||||
const bool on_exit = true;
|
||||
|
||||
/* NOTE(@campbellbarton): Free by slot instead of simply looping over
|
||||
/* NOTE(@ideasman42): Free by slot instead of simply looping over
|
||||
* `display->registry_entry` so the order of freeing is always predictable.
|
||||
* Otherwise global objects would be feed in the order they are registered.
|
||||
* While this works in my tests, it could cause difficult to reproduce bugs
|
||||
@@ -1209,7 +1269,7 @@ static void gwl_registry_entry_remove_all(GWL_Display *display)
|
||||
* so there is no reason to update all other outputs that an output was removed (for e.g.).
|
||||
* Pass as -1 to update all slots.
|
||||
*
|
||||
* NOTE(@campbellbarton): Updating all other items on a single change is typically worth avoiding.
|
||||
* NOTE(@ideasman42): Updating all other items on a single change is typically worth avoiding.
|
||||
* In practice this isn't a problem as so there are so few elements in `display->registry_entry`,
|
||||
* so few use update functions and adding/removal at runtime is rarely called (plugging/unplugging)
|
||||
* hardware for e.g. So while it's possible to store dependency links to avoid unnecessary
|
||||
@@ -1258,7 +1318,7 @@ static void ghost_wl_display_report_error(struct wl_display *display)
|
||||
fprintf(stderr, "The Wayland connection experienced a fatal error: %s\n", strerror(ecode));
|
||||
}
|
||||
|
||||
/* NOTE(@campbellbarton): The application is running,
|
||||
/* NOTE(@ideasman42): The application is running,
|
||||
* however an error closes all windows and most importantly:
|
||||
* shuts down the GPU context (loosing all GPU state - shaders, bind codes etc),
|
||||
* so recovering from this effectively involves restarting.
|
||||
@@ -2910,7 +2970,7 @@ static void gesture_pinch_handle_begin(void *data,
|
||||
if (wl_surface *wl_surface_focus = seat->pointer.wl_surface_window) {
|
||||
win = ghost_wl_surface_user_data(wl_surface_focus);
|
||||
}
|
||||
/* NOTE(@campbellbarton): Blender's use of track-pad coordinates is inconsistent and needs work.
|
||||
/* NOTE(@ideasman42): Blender's use of track-pad coordinates is inconsistent and needs work.
|
||||
* This isn't specific to WAYLAND, in practice they tend to work well enough in most cases.
|
||||
* Some operators scale by the UI scale, some don't.
|
||||
* Even this window scale is not correct because it doesn't account for:
|
||||
@@ -2924,7 +2984,7 @@ static void gesture_pinch_handle_begin(void *data,
|
||||
*/
|
||||
const wl_fixed_t win_scale = win ? win->scale() : 1;
|
||||
|
||||
/* NOTE(@campbellbarton): Scale factors match Blender's operators & default preferences.
|
||||
/* NOTE(@ideasman42): Scale factors match Blender's operators & default preferences.
|
||||
* For these values to work correctly, operator logic will need to be changed not to scale input
|
||||
* by the region size (as with 3D view zoom) or preference for 3D view orbit sensitivity.
|
||||
*
|
||||
@@ -3087,7 +3147,7 @@ static const struct zwp_pointer_gesture_swipe_v1_listener gesture_swipe_listener
|
||||
/* -------------------------------------------------------------------- */
|
||||
/** \name Listener (Touch Seat), #wl_touch_listener
|
||||
*
|
||||
* NOTE(@campbellbarton): It's not clear if this interface is used by popular compositors.
|
||||
* NOTE(@ideasman42): It's not clear if this interface is used by popular compositors.
|
||||
* It looks like GNOME/KDE only support `zwp_pointer_gestures_v1_interface`.
|
||||
* If this isn't used anywhere, it could be removed.
|
||||
* \{ */
|
||||
@@ -3718,9 +3778,14 @@ static void keyboard_handle_leave(void *data,
|
||||
GWL_Seat *seat = static_cast<GWL_Seat *>(data);
|
||||
seat->keyboard.wl_surface_window = nullptr;
|
||||
|
||||
/* Losing focus must stop repeating text. */
|
||||
if (seat->key_repeat.timer) {
|
||||
keyboard_handle_key_repeat_cancel(seat);
|
||||
{
|
||||
#ifdef USE_EVENT_BACKGROUND_THREAD
|
||||
std::lock_guard lock_timer_guard{*seat->system->timer_mutex};
|
||||
#endif
|
||||
/* Losing focus must stop repeating text. */
|
||||
if (seat->key_repeat.timer) {
|
||||
keyboard_handle_key_repeat_cancel(seat);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef USE_GNOME_KEYBOARD_SUPPRESS_WARNING
|
||||
@@ -3743,7 +3808,7 @@ static xkb_keysym_t xkb_state_key_get_one_sym_without_modifiers(
|
||||
/* Use an empty keyboard state to access key symbol without modifiers. */
|
||||
xkb_keysym_t sym = xkb_state_key_get_one_sym(xkb_state_empty, key);
|
||||
|
||||
/* NOTE(@campbellbarton): Only perform the number-locked lookup as a fallback
|
||||
/* NOTE(@ideasman42): Only perform the number-locked lookup as a fallback
|
||||
* when a number-pad key has been pressed. This is important as some key-maps use number lock
|
||||
* for switching other layers (in particular `de(neo_qwertz)` turns on layer-4), see: T96170.
|
||||
* Alternative solutions could be to inspect the layout however this could get involved
|
||||
@@ -3780,36 +3845,32 @@ static xkb_keysym_t xkb_state_key_get_one_sym_without_modifiers(
|
||||
return sym;
|
||||
}
|
||||
|
||||
/**
|
||||
* \note Caller must lock `timer_mutex`.
|
||||
*/
|
||||
static void keyboard_handle_key_repeat_cancel(GWL_Seat *seat)
|
||||
{
|
||||
#ifdef USE_EVENT_BACKGROUND_THREAD
|
||||
std::lock_guard lock_timer_guard{*seat->system->timer_mutex};
|
||||
#endif
|
||||
GHOST_ASSERT(seat->key_repeat.timer != nullptr, "Caller much check for timer");
|
||||
delete static_cast<GWL_KeyRepeatPlayload *>(seat->key_repeat.timer->getUserData());
|
||||
seat->system->removeTimer(seat->key_repeat.timer);
|
||||
seat->key_repeat.timer = nullptr;
|
||||
|
||||
gwl_seat_key_repeat_timer_remove(seat);
|
||||
}
|
||||
|
||||
/**
|
||||
* Restart the key-repeat timer.
|
||||
* \param use_delay: When false, use the interval
|
||||
* (prevents pause when the setting changes while the key is held).
|
||||
*
|
||||
* \note Caller must lock `timer_mutex`.
|
||||
*/
|
||||
static void keyboard_handle_key_repeat_reset(GWL_Seat *seat, const bool use_delay)
|
||||
{
|
||||
#ifdef USE_EVENT_BACKGROUND_THREAD
|
||||
std::lock_guard lock_timer_guard{*seat->system->timer_mutex};
|
||||
#endif
|
||||
GHOST_ASSERT(seat->key_repeat.timer != nullptr, "Caller much check for timer");
|
||||
GHOST_SystemWayland *system = seat->system;
|
||||
GHOST_ITimerTask *timer = seat->key_repeat.timer;
|
||||
GHOST_TimerProcPtr key_repeat_fn = timer->getTimerProc();
|
||||
GHOST_TimerProcPtr key_repeat_fn = seat->key_repeat.timer->getTimerProc();
|
||||
GHOST_TUserDataPtr payload = seat->key_repeat.timer->getUserData();
|
||||
seat->system->removeTimer(seat->key_repeat.timer);
|
||||
const uint64_t time_step = 1000 / seat->key_repeat.rate;
|
||||
const uint64_t time_start = use_delay ? seat->key_repeat.delay : time_step;
|
||||
seat->key_repeat.timer = system->installTimer(time_start, time_step, key_repeat_fn, payload);
|
||||
|
||||
gwl_seat_key_repeat_timer_remove(seat);
|
||||
gwl_seat_key_repeat_timer_add(seat, key_repeat_fn, payload, use_delay);
|
||||
}
|
||||
|
||||
static void keyboard_handle_key(void *data,
|
||||
@@ -3848,6 +3909,11 @@ static void keyboard_handle_key(void *data,
|
||||
break;
|
||||
}
|
||||
|
||||
#ifdef USE_EVENT_BACKGROUND_THREAD
|
||||
/* Any access to `seat->key_repeat.timer` must lock. */
|
||||
std::lock_guard lock_timer_guard{*seat->system->timer_mutex};
|
||||
#endif
|
||||
|
||||
struct GWL_KeyRepeatPlayload *key_repeat_payload = nullptr;
|
||||
|
||||
/* Delete previous timer. */
|
||||
@@ -3872,7 +3938,7 @@ static void keyboard_handle_key(void *data,
|
||||
else {
|
||||
/* Key-up from keys that were not repeating cause the repeat timer to pause.
|
||||
*
|
||||
* NOTE(@campbellbarton): This behavior isn't universal, some text input systems will
|
||||
* NOTE(@ideasman42): This behavior isn't universal, some text input systems will
|
||||
* stop the repeat entirely. Choose to pause repeat instead as this is what GTK/WIN32 do,
|
||||
* and it fits better for keyboard input that isn't related to text entry. */
|
||||
timer_action = RESET;
|
||||
@@ -3886,23 +3952,14 @@ static void keyboard_handle_key(void *data,
|
||||
break;
|
||||
}
|
||||
case RESET: {
|
||||
#ifdef USE_EVENT_BACKGROUND_THREAD
|
||||
std::lock_guard lock_timer_guard{*seat->system->timer_mutex};
|
||||
#endif
|
||||
/* The payload will be added again. */
|
||||
seat->system->removeTimer(seat->key_repeat.timer);
|
||||
seat->key_repeat.timer = nullptr;
|
||||
gwl_seat_key_repeat_timer_remove(seat);
|
||||
break;
|
||||
}
|
||||
case CANCEL: {
|
||||
#ifdef USE_EVENT_BACKGROUND_THREAD
|
||||
std::lock_guard lock_timer_guard{*seat->system->timer_mutex};
|
||||
#endif
|
||||
delete key_repeat_payload;
|
||||
key_repeat_payload = nullptr;
|
||||
|
||||
seat->system->removeTimer(seat->key_repeat.timer);
|
||||
seat->key_repeat.timer = nullptr;
|
||||
gwl_seat_key_repeat_timer_remove(seat);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -3956,8 +4013,8 @@ static void keyboard_handle_key(void *data,
|
||||
utf8_buf));
|
||||
}
|
||||
};
|
||||
seat->key_repeat.timer = seat->system->installTimer(
|
||||
seat->key_repeat.delay, 1000 / seat->key_repeat.rate, key_repeat_fn, key_repeat_payload);
|
||||
|
||||
gwl_seat_key_repeat_timer_add(seat, key_repeat_fn, key_repeat_payload, true);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3982,8 +4039,13 @@ static void keyboard_handle_modifiers(void *data,
|
||||
|
||||
/* A modifier changed so reset the timer,
|
||||
* see comment in #keyboard_handle_key regarding this behavior. */
|
||||
if (seat->key_repeat.timer) {
|
||||
keyboard_handle_key_repeat_reset(seat, true);
|
||||
{
|
||||
#ifdef USE_EVENT_BACKGROUND_THREAD
|
||||
std::lock_guard lock_timer_guard{*seat->system->timer_mutex};
|
||||
#endif
|
||||
if (seat->key_repeat.timer) {
|
||||
keyboard_handle_key_repeat_reset(seat, true);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef USE_GNOME_KEYBOARD_SUPPRESS_WARNING
|
||||
@@ -4002,9 +4064,14 @@ static void keyboard_repeat_handle_info(void *data,
|
||||
seat->key_repeat.rate = rate;
|
||||
seat->key_repeat.delay = delay;
|
||||
|
||||
/* Unlikely possible this setting changes while repeating. */
|
||||
if (seat->key_repeat.timer) {
|
||||
keyboard_handle_key_repeat_reset(seat, false);
|
||||
{
|
||||
#ifdef USE_EVENT_BACKGROUND_THREAD
|
||||
std::lock_guard lock_timer_guard{*seat->system->timer_mutex};
|
||||
#endif
|
||||
/* Unlikely possible this setting changes while repeating. */
|
||||
if (seat->key_repeat.timer) {
|
||||
keyboard_handle_key_repeat_reset(seat, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4275,8 +4342,14 @@ static void gwl_seat_capability_keyboard_disable(GWL_Seat *seat)
|
||||
if (!seat->wl_keyboard) {
|
||||
return;
|
||||
}
|
||||
if (seat->key_repeat.timer) {
|
||||
keyboard_handle_key_repeat_cancel(seat);
|
||||
|
||||
{
|
||||
#ifdef USE_EVENT_BACKGROUND_THREAD
|
||||
std::lock_guard lock_timer_guard{*seat->system->timer_mutex};
|
||||
#endif
|
||||
if (seat->key_repeat.timer) {
|
||||
keyboard_handle_key_repeat_cancel(seat);
|
||||
}
|
||||
}
|
||||
wl_keyboard_destroy(seat->wl_keyboard);
|
||||
seat->wl_keyboard = nullptr;
|
||||
@@ -5411,6 +5484,8 @@ GHOST_SystemWayland::GHOST_SystemWayland(bool background)
|
||||
|
||||
#ifdef USE_EVENT_BACKGROUND_THREAD
|
||||
gwl_display_event_thread_create(display_);
|
||||
|
||||
display_->ghost_timer_manager = new GHOST_TimerManager();
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -5491,10 +5566,16 @@ bool GHOST_SystemWayland::processEvents(bool waitForEvent)
|
||||
#endif /* USE_EVENT_BACKGROUND_THREAD */
|
||||
|
||||
{
|
||||
const uint64_t now = getMilliSeconds();
|
||||
#ifdef USE_EVENT_BACKGROUND_THREAD
|
||||
std::lock_guard lock_timer_guard{*display_->system->timer_mutex};
|
||||
{
|
||||
std::lock_guard lock_timer_guard{*display_->system->timer_mutex};
|
||||
if (ghost_timer_manager()->fireTimers(now)) {
|
||||
any_processed = true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (getTimerManager()->fireTimers(getMilliSeconds())) {
|
||||
if (getTimerManager()->fireTimers(now)) {
|
||||
any_processed = true;
|
||||
}
|
||||
}
|
||||
@@ -6717,6 +6798,13 @@ struct wl_shm *GHOST_SystemWayland::wl_shm() const
|
||||
return display_->wl_shm;
|
||||
}
|
||||
|
||||
#ifdef USE_EVENT_BACKGROUND_THREAD
|
||||
GHOST_TimerManager *GHOST_SystemWayland::ghost_timer_manager()
|
||||
{
|
||||
return display_->ghost_timer_manager;
|
||||
}
|
||||
#endif
|
||||
|
||||
/** \} */
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
@@ -6949,7 +7037,7 @@ bool GHOST_SystemWayland::window_cursor_grab_set(const GHOST_TGrabCursorMode mod
|
||||
UNPACK2(xy_next));
|
||||
wl_surface_commit(wl_surface);
|
||||
|
||||
/* NOTE(@campbellbarton): The new cursor position is a hint,
|
||||
/* NOTE(@ideasman42): The new cursor position is a hint,
|
||||
* it's possible the hint is ignored. It doesn't seem like there is a good way to
|
||||
* know if the hint will be used or not, at least not immediately. */
|
||||
xy_motion[0] = xy_next[0];
|
||||
@@ -6992,7 +7080,7 @@ bool GHOST_SystemWayland::window_cursor_grab_set(const GHOST_TGrabCursorMode mod
|
||||
if (mode != GHOST_kGrabDisable) {
|
||||
if (grab_state_next.use_lock) {
|
||||
if (!grab_state_prev.use_lock) {
|
||||
/* TODO(@campbellbarton): As WAYLAND does not support warping the pointer it may not be
|
||||
/* TODO(@ideasman42): As WAYLAND does not support warping the pointer it may not be
|
||||
* possible to support #GHOST_kGrabWrap by pragmatically settings it's coordinates.
|
||||
* An alternative could be to draw the cursor in software (and hide the real cursor),
|
||||
* or just accept a locked cursor on WAYLAND. */
|
||||
|
@@ -165,6 +165,16 @@ class GHOST_SystemWayland : public GHOST_System {
|
||||
|
||||
bool cursor_grab_use_software_display_get(const GHOST_TGrabCursorMode mode);
|
||||
|
||||
#ifdef USE_EVENT_BACKGROUND_THREAD
|
||||
/**
|
||||
* Return a separate WAYLAND local timer manager to #GHOST_System::getTimerManager
|
||||
* Manipulation & access must lock with #GHOST_WaylandSystem::server_mutex.
|
||||
*
|
||||
* See #GWL_Display::ghost_timer_manager doc-string for details on why this is needed.
|
||||
*/
|
||||
GHOST_TimerManager *ghost_timer_manager();
|
||||
#endif
|
||||
|
||||
/* WAYLAND direct-data access. */
|
||||
|
||||
struct wl_display *wl_display();
|
||||
@@ -233,7 +243,14 @@ class GHOST_SystemWayland : public GHOST_System {
|
||||
* from running at the same time. */
|
||||
std::mutex *server_mutex = nullptr;
|
||||
|
||||
/** Threads must lock this before manipulating timers. */
|
||||
/**
|
||||
* Threads must lock this before manipulating #GWL_Display::ghost_timer_manager.
|
||||
*
|
||||
* \note Using a separate lock to `server_mutex` is necessary because the
|
||||
* server lock is already held when calling `ghost_wl_display_event_pump`.
|
||||
* If manipulating the timer used the `server_mutex`, event pump can indirectly
|
||||
* handle key up/down events which would lock `server_mutex` causing a dead-lock.
|
||||
*/
|
||||
std::mutex *timer_mutex = nullptr;
|
||||
|
||||
std::thread::id main_thread_id;
|
||||
|
@@ -1080,7 +1080,7 @@ GHOST_EventCursor *GHOST_SystemWin32::processCursorEvent(GHOST_WindowWin32 *wind
|
||||
if (window->getCursorGrabMode() == GHOST_kGrabHide) {
|
||||
window->getClientBounds(bounds);
|
||||
|
||||
/* WARNING(@campbellbarton): The current warping logic fails to warp on every event,
|
||||
/* WARNING(@ideasman42): The current warping logic fails to warp on every event,
|
||||
* so the box needs to small enough not to let the cursor escape the window but large
|
||||
* enough that the cursor isn't being warped every time.
|
||||
* If this was not the case it would be less trouble to simply warp the cursor to the
|
||||
@@ -1179,7 +1179,7 @@ GHOST_EventKey *GHOST_SystemWin32::processKeyEvent(GHOST_WindowWin32 *window, RA
|
||||
GHOST_TKey key = system->hardKey(raw, &key_down);
|
||||
GHOST_EventKey *event;
|
||||
|
||||
/* NOTE(@campbellbarton): key repeat in WIN32 also applies to modifier-keys.
|
||||
/* NOTE(@ideasman42): key repeat in WIN32 also applies to modifier-keys.
|
||||
* Check for this case and filter out modifier-repeat.
|
||||
* Typically keyboard events are *not* filtered as part of GHOST's event handling.
|
||||
* As other GHOST back-ends don't have the behavior, it's simplest not to send them through.
|
||||
|
@@ -282,7 +282,7 @@ class GHOST_SystemWin32 : public GHOST_System {
|
||||
GHOST_TSuccess exit();
|
||||
|
||||
/**
|
||||
* Converts raw WIN32 key codes from the wndproc to GHOST keys.
|
||||
* Converts raw WIN32 key codes from the `wndproc` to GHOST keys.
|
||||
* \param vKey: The virtual key from #hardKey.
|
||||
* \param ScanCode: The ScanCode of pressed key (similar to PS/2 Set 1).
|
||||
* \param extend: Flag if key is not primly (left or right).
|
||||
@@ -291,7 +291,7 @@ class GHOST_SystemWin32 : public GHOST_System {
|
||||
GHOST_TKey convertKey(short vKey, short ScanCode, short extend) const;
|
||||
|
||||
/**
|
||||
* Catches raw WIN32 key codes from WM_INPUT in the wndproc.
|
||||
* Catches raw WIN32 key codes from WM_INPUT in the `wndproc`.
|
||||
* \param raw: RawInput structure with detailed info about the key event.
|
||||
* \param r_key_down: Set true when the key is pressed, otherwise false.
|
||||
* \return The GHOST key (GHOST_kKeyUnknown if no match).
|
||||
@@ -319,8 +319,8 @@ class GHOST_SystemWin32 : public GHOST_System {
|
||||
* Creates tablet events from pointer events.
|
||||
* \param type: The type of pointer event.
|
||||
* \param window: The window receiving the event (the active window).
|
||||
* \param wParam: The wParam from the wndproc.
|
||||
* \param lParam: The lParam from the wndproc.
|
||||
* \param wParam: The wParam from the `wndproc`.
|
||||
* \param lParam: The lParam from the `wndproc`.
|
||||
* \param eventhandled: True if the method handled the event.
|
||||
*/
|
||||
static void processPointerEvent(
|
||||
@@ -337,8 +337,8 @@ class GHOST_SystemWin32 : public GHOST_System {
|
||||
/**
|
||||
* Handles a mouse wheel event.
|
||||
* \param window: The window receiving the event (the active window).
|
||||
* \param wParam: The wParam from the wndproc.
|
||||
* \param lParam: The lParam from the wndproc.
|
||||
* \param wParam: The wParam from the `wndproc`.
|
||||
* \param lParam: The lParam from the `wndproc`.
|
||||
*/
|
||||
static void processWheelEvent(GHOST_WindowWin32 *window, WPARAM wParam, LPARAM lParam);
|
||||
|
||||
|
@@ -278,7 +278,7 @@ uint8_t GHOST_SystemX11::getNumDisplays() const
|
||||
void GHOST_SystemX11::getMainDisplayDimensions(uint32_t &width, uint32_t &height) const
|
||||
{
|
||||
if (m_display) {
|
||||
/* NOTE(@campbellbarton): for this to work as documented,
|
||||
/* NOTE(@ideasman42): for this to work as documented,
|
||||
* we would need to use Xinerama check r54370 for code that did this,
|
||||
* we've since removed since its not worth the extra dependency. */
|
||||
getAllDisplayDimensions(width, height);
|
||||
@@ -927,7 +927,7 @@ void GHOST_SystemX11::processEvent(XEvent *xe)
|
||||
if (window->getCursorGrabMode() == GHOST_kGrabHide) {
|
||||
window->getClientBounds(bounds);
|
||||
|
||||
/* TODO(@campbellbarton): warp the cursor to `window->getCursorGrabInitPos`,
|
||||
/* TODO(@ideasman42): warp the cursor to `window->getCursorGrabInitPos`,
|
||||
* on every motion event, see: D16557 (alternative fix for T102346). */
|
||||
const int32_t subregion_div = 4; /* One quarter of the region. */
|
||||
const int32_t size[2] = {bounds.getWidth(), bounds.getHeight()};
|
||||
@@ -2015,8 +2015,8 @@ void GHOST_SystemX11::getClipboard_xcout(
|
||||
return;
|
||||
}
|
||||
|
||||
/* if it's not incr, and not format == 8, then there's
|
||||
* nothing in the selection (that xclip understands, anyway) */
|
||||
/* If it's not INCR, and not `format == 8`, then there's
|
||||
* nothing in the selection (that `xclip` understands, anyway). */
|
||||
|
||||
if (pty_format != 8) {
|
||||
*context = XCLIB_XCOUT_NONE;
|
||||
|
@@ -669,7 +669,7 @@ static void xdg_surface_handle_configure(void *data,
|
||||
GHOST_SystemWayland *system = win->ghost_system;
|
||||
const bool is_main_thread = system->main_thread_id == std::this_thread::get_id();
|
||||
if (!is_main_thread) {
|
||||
/* NOTE(@campbellbarton): this only gets one redraw,
|
||||
/* NOTE(@ideasman42): this only gets one redraw,
|
||||
* I could not find a case where this causes problems. */
|
||||
gwl_window_pending_actions_tag(win, PENDING_FRAME_CONFIGURE);
|
||||
}
|
||||
@@ -774,7 +774,7 @@ GHOST_WindowWayland::GHOST_WindowWayland(GHOST_SystemWayland *system,
|
||||
window_->ghost_window = this;
|
||||
window_->ghost_system = system;
|
||||
|
||||
/* NOTE(@campbellbarton): The scale set here to avoid flickering on startup.
|
||||
/* NOTE(@ideasman42): The scale set here to avoid flickering on startup.
|
||||
* When all monitors use the same scale (which is quite common) there aren't any problems.
|
||||
*
|
||||
* When monitors have different scales there may still be a visible window resize on startup.
|
||||
@@ -1078,7 +1078,7 @@ GHOST_WindowWayland::~GHOST_WindowWayland()
|
||||
|
||||
wl_surface_destroy(window_->wl_surface);
|
||||
|
||||
/* NOTE(@campbellbarton): Flushing will often run the appropriate handlers event
|
||||
/* NOTE(@ideasman42): Flushing will often run the appropriate handlers event
|
||||
* (#wl_surface_listener.leave in particular) to avoid attempted access to the freed surfaces.
|
||||
* This is not fool-proof though, hence the call to #window_surface_unref, see: T99078. */
|
||||
wl_display_flush(system_->wl_display());
|
||||
|
@@ -8,9 +8,9 @@ else
|
||||
exit 1
|
||||
fi
|
||||
|
||||
BRANCH="master"
|
||||
BRANCH="main"
|
||||
|
||||
# repo="git://git.blender.org/libmv.git"
|
||||
# repo="https://projects.blender.org/blender/libmv.git"
|
||||
repo="/home/sergey/Developer/libmv"
|
||||
tmp=`mktemp -d`
|
||||
|
||||
|
@@ -40,11 +40,11 @@ inline float fast_acosf(float x)
|
||||
/* clamp and crush denormals. */
|
||||
const float m = (f < 1.0f) ? 1.0f - (1.0f - f) : 1.0f;
|
||||
/* Based on http://www.pouet.net/topic.php?which=9132&page=2
|
||||
* 85% accurate (ulp 0)
|
||||
* 85% accurate (ULP 0)
|
||||
* Examined 2130706434 values of acos:
|
||||
* 15.2000597 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // without "denormal crush"
|
||||
* 15.2000597 avg ULP diff, 4492 max ULP, 4.51803e-05 max error // without "denormal crush"
|
||||
* Examined 2130706434 values of acos:
|
||||
* 15.2007108 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // with "denormal crush"
|
||||
* 15.2007108 avg ULP diff, 4492 max ULP, 4.51803e-05 max error // with "denormal crush"
|
||||
*/
|
||||
const float a = sqrtf(1.0f - m) *
|
||||
(1.5707963267f + m * (-0.213300989f + m * (0.077980478f + m * -0.02164095f)));
|
||||
|
@@ -89,7 +89,7 @@ class EvalOutputAPI::EvalOutput {
|
||||
// The following interfaces are dependant on the actual evaluator type (CPU, OpenGL, etc.) which
|
||||
// have slightly different APIs to access patch arrays, as well as different types for their
|
||||
// data structure. They need to be overridden in the specific instances of the EvalOutput derived
|
||||
// classes if needed, while the interfaces above are overriden through VolatileEvalOutput.
|
||||
// classes if needed, while the interfaces above are overridden through VolatileEvalOutput.
|
||||
|
||||
virtual void fillPatchArraysBuffer(OpenSubdiv_Buffer * /*patch_arrays_buffer*/)
|
||||
{
|
||||
|
@@ -1,41 +1,18 @@
|
||||
This folder contains several scripts to smoothen the Blender LTS releases.
|
||||
This folder contains a script to generate release notes and download URLs
|
||||
for Blender LTS releases.
|
||||
|
||||
create_download_urls.py
|
||||
=======================
|
||||
Ensure required Python modules are installed before running:
|
||||
|
||||
This python script is used to generate the download urls which we can
|
||||
copy-paste directly into the CMS of www.blender.org.
|
||||
pip3 install -r ./requirements.txt
|
||||
|
||||
Usage: create_download_urls.py --version 2.83.7
|
||||
Then run for example:
|
||||
|
||||
Arguments:
|
||||
--version VERSION Version string in the form of {major}.{minor}.{build}
|
||||
(eg 2.83.7)
|
||||
./create_release_notes.py --version 3.3.2 --format=html
|
||||
|
||||
The resulting html will be printed to the console.
|
||||
Available arguments:
|
||||
|
||||
create_release_notes.py
|
||||
=======================
|
||||
|
||||
This python script is used to generate the release notes which we can
|
||||
copy-paste directly into the CMS of www.blender.org and stores.
|
||||
|
||||
Usage: ./create_release_notes.py --task=T77348 --version=2.83.7
|
||||
|
||||
Arguments:
|
||||
--version VERSION Version string in the form of {major}.{minor}.{build}
|
||||
(e.g. 2.83.7)
|
||||
--task TASK Phabricator ticket that is contains the release notes
|
||||
information (e.g. T77348)
|
||||
--format FORMAT Format the result in `text`, `steam`, `wiki` or `html`
|
||||
|
||||
Requirements
|
||||
============
|
||||
|
||||
* Python 3.8 or later
|
||||
* Python phabricator client version 0.7.0
|
||||
https://pypi.org/project/phabricator/
|
||||
|
||||
For convenience the python modules can be installed using pip
|
||||
|
||||
pip3 install -r ./requirements.txt
|
||||
--version VERSION Version string in the form of {major}.{minor}.{build}
|
||||
(e.g. 3.3.2)
|
||||
--issue ISSUE Gitea issue that is contains the release notes
|
||||
information (e.g. #77348)
|
||||
--format FORMAT Format the result in `text`, `steam`, `wiki` or `html`
|
||||
|
@@ -1,169 +1,46 @@
|
||||
#!/usr/bin/env python3
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import phabricator
|
||||
|
||||
import lts_issue
|
||||
import lts_download
|
||||
|
||||
DESCRIPTION = ("This python script is used to generate the release notes "
|
||||
"which we can copy-paste directly into the CMS of "
|
||||
DESCRIPTION = ("This python script is used to generate the release notes and "
|
||||
"download URLs which we can copy-paste directly into the CMS of "
|
||||
"www.blender.org and stores.")
|
||||
USAGE = "./create_release_notes.py --task=T77348 --version=2.83.7"
|
||||
|
||||
# Parse arguments
|
||||
parser = argparse.ArgumentParser(description=DESCRIPTION)
|
||||
parser.add_argument(
|
||||
"--version",
|
||||
required=True,
|
||||
help="Version string in the form of {major}.{minor}.{patch} (e.g. 3.3.2)")
|
||||
parser.add_argument(
|
||||
"--issue",
|
||||
help="Task that is contains the release notes information (e.g. #77348)")
|
||||
parser.add_argument(
|
||||
"--format",
|
||||
help="Format the result in `text`, `steam`, `wiki` or `html`",
|
||||
default="text")
|
||||
args = parser.parse_args()
|
||||
|
||||
class ReleaseLogLine:
|
||||
"""
|
||||
Class containing the information of a single line of the release log
|
||||
# Determine issue number
|
||||
version = args.version
|
||||
issue = args.issue
|
||||
if not issue:
|
||||
if version.startswith("2.83."):
|
||||
issue = "#77348"
|
||||
elif version.startswith("2.93."):
|
||||
issue = "#88449"
|
||||
elif version.startswith("3.3."):
|
||||
issue = "#100749"
|
||||
else:
|
||||
raise ValueError("Specify --issue or update script to include issue number for this version")
|
||||
|
||||
Instance attributes:
|
||||
# Print
|
||||
if args.format == "html":
|
||||
lts_download.print_urls(version=version)
|
||||
print("")
|
||||
|
||||
* line: (str) the original line used to create this log line
|
||||
* task_id: (int or None) the extracted task id associated with this log
|
||||
line. Can be None if the log line isn't associated with a task.
|
||||
* commit_id: (str or None) the extracted commit id associated with this log
|
||||
line. Only filled when no `task_id` could be found.
|
||||
* ref: (str) `task_id` or `commit_id` of this line, including `T` for tasks
|
||||
or `D` for diffs.
|
||||
* title: (str) title of this log line. When constructed this attribute is
|
||||
an empty string. The called needs to retrieve the title from the
|
||||
backend.
|
||||
* url: (str) url of the ticket task or commit.
|
||||
"""
|
||||
|
||||
def __init__(self, line: str):
|
||||
self.line = line
|
||||
items = line.split("|")
|
||||
self.task_id = None
|
||||
self.commit_id = None
|
||||
try:
|
||||
task_id = int(items[1].strip()[1:])
|
||||
self.task_id = task_id
|
||||
self.ref = f"T{self.task_id}"
|
||||
except ValueError:
|
||||
# no task
|
||||
commit_string = items[3].strip()
|
||||
commits = commit_string.split(",")
|
||||
commit_id = commits[0]
|
||||
commit_id = commit_id.replace("{", "").replace("}", "")
|
||||
if not commit_id.startswith("rB"):
|
||||
commit_id = f"rB{commit_id}"
|
||||
self.commit_id = commit_id
|
||||
|
||||
self.ref = f"{self.commit_id}"
|
||||
|
||||
self.title = ""
|
||||
self.url = f"https://developer.blender.org/{self.ref}"
|
||||
|
||||
def __format_as_html(self) -> str:
|
||||
return f" <li>{self.title} [<a href=\"{self.url}\">{self.ref}</a>]</li>"
|
||||
|
||||
def __format_as_text(self) -> str:
|
||||
return f"* {self.title} [{self.ref}]"
|
||||
|
||||
def __format_as_steam(self) -> str:
|
||||
return f"* {self.title} ([url={self.url}]{self.ref}[/url])"
|
||||
|
||||
def __format_as_wiki(self) -> str:
|
||||
if self.task_id:
|
||||
return f"* {self.title} [{{{{BugReport|{self.task_id}}}}}]"
|
||||
else:
|
||||
return f"* {self.title} [{{{{GitCommit|{self.commit_id[2:]}}}}}]"
|
||||
|
||||
def format(self, format: str) -> str:
|
||||
"""
|
||||
Format this line
|
||||
|
||||
:attr format: the desired format. Possible values are 'text', 'steam' or 'html'
|
||||
:type string:
|
||||
"""
|
||||
if format == 'html':
|
||||
return self.__format_as_html()
|
||||
elif format == 'steam':
|
||||
return self.__format_as_steam()
|
||||
elif format == 'wiki':
|
||||
return self.__format_as_wiki()
|
||||
else:
|
||||
return self.__format_as_text()
|
||||
|
||||
|
||||
def format_title(title: str) -> str:
|
||||
title = title.strip()
|
||||
if not title.endswith("."):
|
||||
title = title + "."
|
||||
return title
|
||||
|
||||
|
||||
def extract_release_notes(version: str, task_id: int):
|
||||
"""
|
||||
Extract all release notes logs
|
||||
|
||||
# Process
|
||||
|
||||
1. Retrieval of description of the given `task_id`.
|
||||
2. Find rows for the given `version` and convert to `ReleaseLogLine`.
|
||||
3. based on the associated task or commit retrieves the title of the log
|
||||
line.
|
||||
"""
|
||||
phab = phabricator.Phabricator()
|
||||
phab.update_interfaces()
|
||||
task = phab.maniphest.info(task_id=task_id)
|
||||
description = task["description"]
|
||||
lines = description.split("\n")
|
||||
start_index = lines.index(f"## Blender {version} ##")
|
||||
lines = lines[start_index + 1:]
|
||||
for line in lines:
|
||||
if not line.strip():
|
||||
continue
|
||||
if line.startswith("| **Report**"):
|
||||
continue
|
||||
if line.startswith("## Blender"):
|
||||
break
|
||||
|
||||
log_line = ReleaseLogLine(line)
|
||||
if log_line.task_id:
|
||||
issue_task = phab.maniphest.info(task_id=log_line.task_id)
|
||||
log_line.title = format_title(issue_task.title)
|
||||
yield log_line
|
||||
elif log_line.commit_id:
|
||||
commits = phab.diffusion.commit.search(constraints={"identifiers": [log_line.commit_id]})
|
||||
commit = commits.data[0]
|
||||
commit_message = commit['fields']['message']
|
||||
commit_title = commit_message.split("\n")[0]
|
||||
log_line.title = format_title(commit_title)
|
||||
yield log_line
|
||||
|
||||
|
||||
def print_release_notes(version: str, format: str, task_id: int):
|
||||
"""
|
||||
Generate and print the release notes to the console.
|
||||
"""
|
||||
if format == 'html':
|
||||
print("<ul>")
|
||||
if format == 'steam':
|
||||
print("[ul]")
|
||||
for log_item in extract_release_notes(version=version, task_id=task_id):
|
||||
print(log_item.format(format=format))
|
||||
if format == 'html':
|
||||
print("</ul>")
|
||||
if format == 'steam':
|
||||
print("[/ul]")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description=DESCRIPTION, usage=USAGE)
|
||||
parser.add_argument(
|
||||
"--version",
|
||||
required=True,
|
||||
help="Version string in the form of {major}.{minor}.{build} (e.g. 2.83.7)")
|
||||
parser.add_argument(
|
||||
"--task",
|
||||
required=True,
|
||||
help="Phabricator ticket that is contains the release notes information (e.g. T77348)")
|
||||
parser.add_argument(
|
||||
"--format",
|
||||
help="Format the result in `text`, `steam`, `wiki` or `html`",
|
||||
default="text")
|
||||
args = parser.parse_args()
|
||||
|
||||
print_release_notes(version=args.version, format=args.format, task_id=int(args.task[1:]))
|
||||
lts_issue.print_notes(version=version, format=args.format, issue=issue)
|
||||
|
20
release/lts/create_download_urls.py → release/lts/lts_download.py
Executable file → Normal file
20
release/lts/create_download_urls.py → release/lts/lts_download.py
Executable file → Normal file
@@ -1,14 +1,9 @@
|
||||
#!/usr/bin/env python3
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
import argparse
|
||||
import datetime
|
||||
|
||||
|
||||
DESCRIPTION = ("This python script is used to generate the download urls "
|
||||
"which we can copy-paste directly into the CMS of "
|
||||
"www.blender.org")
|
||||
USAGE = "create_download_urls --version=2.83.7"
|
||||
# Used date format: "September 30, 2020"
|
||||
DATE_FORMAT = "%B %d, %Y"
|
||||
|
||||
@@ -62,19 +57,8 @@ def generate_html(version: Version) -> str:
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def print_download_urls(version: Version):
|
||||
def print_urls(version: str):
|
||||
"""
|
||||
Generate the download urls and print them to the console.
|
||||
"""
|
||||
print(generate_html(version))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description=DESCRIPTION, usage=USAGE)
|
||||
parser.add_argument("--version",
|
||||
required=True,
|
||||
help=("Version string in the form of {major}.{minor}."
|
||||
"{build} (eg 2.83.7)"))
|
||||
args = parser.parse_args()
|
||||
|
||||
print_download_urls(version=Version(args.version))
|
||||
print(generate_html(Version(version)))
|
169
release/lts/lts_issue.py
Normal file
169
release/lts/lts_issue.py
Normal file
@@ -0,0 +1,169 @@
|
||||
#!/usr/bin/env python3
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
class ReleaseLogLine:
|
||||
"""
|
||||
Class containing the information of a single line of the release log
|
||||
|
||||
Instance attributes:
|
||||
|
||||
* line: (str) the original line used to create this log line
|
||||
* issue_id: (int or None) the extracted issue id associated with this log
|
||||
line. Can be None if the log line isn't associated with a issue.
|
||||
* commit_id: (str or None) the extracted commit id associated with this log
|
||||
line. Only filled when no `issue_id` could be found.
|
||||
* ref: (str) `issue_id` or `commit_id` of this line, including `T` for issues
|
||||
or `D` for diffs.
|
||||
* title: (str) title of this log line. When constructed this attribute is
|
||||
an empty string. The called needs to retrieve the title from the
|
||||
backend.
|
||||
* url: (str) url of the ticket issue or commit.
|
||||
"""
|
||||
|
||||
def __init__(self, line: str):
|
||||
self.line = line
|
||||
items = line.split("|")
|
||||
self.issue_id = None
|
||||
self.issue_repo = None
|
||||
self.commit_id = None
|
||||
self.commit_repo = None
|
||||
base_url = "https://projects.blender.org"
|
||||
try:
|
||||
issue_tokens = items[1].strip().split("#")
|
||||
if len(issue_tokens[0]) > 0:
|
||||
self.issue_repo = issue_tokens[0]
|
||||
self.issue_id = issue_tokens[1]
|
||||
else:
|
||||
self.issue_repo = "blender/blender"
|
||||
self.issue_id = issue_tokens[1]
|
||||
|
||||
self.ref = f"#{self.issue_id}"
|
||||
self.url = f"{base_url}/{self.issue_repo}/issues/{self.issue_id}"
|
||||
except IndexError:
|
||||
# no issue
|
||||
commit_string = items[3].strip()
|
||||
commit_string = commit_string.split(",")[0]
|
||||
commit_string = commit_string.split("]")[0]
|
||||
commit_string = commit_string.replace("[", "")
|
||||
|
||||
commit_tokens = commit_string.split("@")
|
||||
if len(commit_tokens) > 1:
|
||||
self.commit_repo = commit_tokens[0]
|
||||
self.commit_id = commit_tokens[1]
|
||||
else:
|
||||
self.commit_repo = "blender/blender"
|
||||
self.commit_id = commit_tokens[0]
|
||||
|
||||
self.ref = f"{self.commit_id}"
|
||||
self.url = f"{base_url}/{self.commit_repo}/commit/{self.commit_id}"
|
||||
|
||||
self.title = ""
|
||||
|
||||
def __format_as_html(self) -> str:
|
||||
return f" <li>{self.title} [<a href=\"{self.url}\">{self.ref}</a>]</li>"
|
||||
|
||||
def __format_as_text(self) -> str:
|
||||
return f"* {self.title} [{self.ref}]"
|
||||
|
||||
def __format_as_steam(self) -> str:
|
||||
return f"* {self.title} ([url={self.url}]{self.ref}[/url])"
|
||||
|
||||
def __format_as_wiki(self) -> str:
|
||||
if self.issue_id:
|
||||
return f"* {self.title} [{{{{BugReport|{self.issue_id}}}}}]"
|
||||
else:
|
||||
return f"* {self.title} [{{{{GitCommit|{self.commit_id[2:]}}}}}]"
|
||||
|
||||
def format(self, format: str) -> str:
|
||||
"""
|
||||
Format this line
|
||||
|
||||
:attr format: the desired format. Possible values are 'text', 'steam' or 'html'
|
||||
:type string:
|
||||
"""
|
||||
if format == 'html':
|
||||
return self.__format_as_html()
|
||||
elif format == 'steam':
|
||||
return self.__format_as_steam()
|
||||
elif format == 'wiki':
|
||||
return self.__format_as_wiki()
|
||||
else:
|
||||
return self.__format_as_text()
|
||||
|
||||
|
||||
def format_title(title: str) -> str:
|
||||
title = title.strip()
|
||||
if not title.endswith("."):
|
||||
title = title + "."
|
||||
return title
|
||||
|
||||
|
||||
def extract_release_notes(version: str, issue: str):
|
||||
"""
|
||||
Extract all release notes logs
|
||||
|
||||
# Process
|
||||
|
||||
1. Retrieval of description of the given `issue_id`.
|
||||
2. Find rows for the given `version` and convert to `ReleaseLogLine`.
|
||||
3. based on the associated issue or commit retrieves the title of the log
|
||||
line.
|
||||
"""
|
||||
base_url = "https://projects.blender.org/api/v1/repos"
|
||||
issues_url = base_url + "/blender/blender/issues/"
|
||||
headers = {'accept': 'application/json'}
|
||||
|
||||
response = requests.get(issues_url + issue[1:], headers=headers)
|
||||
description = response.json()["body"]
|
||||
|
||||
lines = description.split("\n")
|
||||
start_index = lines.index(f"## Blender {version}")
|
||||
lines = lines[start_index + 1:]
|
||||
for line in lines:
|
||||
if not line.strip():
|
||||
continue
|
||||
if line.startswith("| **Report**"):
|
||||
continue
|
||||
if line.startswith("## Blender"):
|
||||
break
|
||||
if line.find("| -- |") != -1:
|
||||
continue
|
||||
|
||||
log_line = ReleaseLogLine(line)
|
||||
if log_line.issue_id:
|
||||
issue_url = f"{base_url}/{log_line.issue_repo}/issues/{log_line.issue_id}"
|
||||
response = requests.get(issue_url, headers=headers)
|
||||
if response.status_code != 200:
|
||||
raise ValueError("Issue not found: " + str(log_line.issue_id))
|
||||
|
||||
log_line.title = format_title(response.json()["title"])
|
||||
yield log_line
|
||||
elif log_line.commit_id:
|
||||
commit_url = f"{base_url}/{log_line.commit_repo}/git/commits/{log_line.commit_id}"
|
||||
response = requests.get(commit_url, headers=headers)
|
||||
if response.status_code != 200:
|
||||
raise ValueError("Commit not found: " + log_line.commit_id)
|
||||
|
||||
commit_message = response.json()['commit']['message']
|
||||
commit_title = commit_message.split("\n")[0]
|
||||
log_line.title = format_title(commit_title)
|
||||
yield log_line
|
||||
|
||||
|
||||
def print_notes(version: str, format: str, issue: str):
|
||||
"""
|
||||
Generate and print the release notes to the console.
|
||||
"""
|
||||
if format == 'html':
|
||||
print("<ul>")
|
||||
if format == 'steam':
|
||||
print("[ul]")
|
||||
for log_item in extract_release_notes(version=version, issue=issue):
|
||||
print(log_item.format(format=format))
|
||||
if format == 'html':
|
||||
print("</ul>")
|
||||
if format == 'steam':
|
||||
print("[/ul]")
|
@@ -1 +1 @@
|
||||
phabricator==0.7.0
|
||||
requests
|
||||
|
Submodule release/scripts/addons updated: b3f0ffc587...d887a4ea6b
@@ -1002,11 +1002,11 @@ def unregister_tool(tool_cls):
|
||||
|
||||
# we start with the built-in default mapping
|
||||
def _blender_default_map():
|
||||
import rna_manual_reference as ref_mod
|
||||
ret = (ref_mod.url_manual_prefix, ref_mod.url_manual_mapping)
|
||||
# avoid storing in memory
|
||||
del _sys.modules["rna_manual_reference"]
|
||||
return ret
|
||||
# NOTE(@ideasman42): Avoid importing this as there is no need to keep the lookup table in memory.
|
||||
# As this runs when the user accesses the "Online Manual", the overhead loading the file is acceptable.
|
||||
# In my tests it's under 1/100th of a second loading from a `pyc`.
|
||||
ref_mod = execfile(_os.path.join(_script_base_dir, "modules", "rna_manual_reference.py"))
|
||||
return (ref_mod.url_manual_prefix, ref_mod.url_manual_mapping)
|
||||
|
||||
|
||||
# hooks for doc lookups
|
||||
|
@@ -1299,6 +1299,8 @@ def km_uv_editor(params):
|
||||
{"properties": [("data_path", 'tool_settings.snap_uv_element')]}),
|
||||
("wm.context_toggle", {"type": 'ACCENT_GRAVE', "value": 'PRESS', "ctrl": True},
|
||||
{"properties": [("data_path", 'space_data.show_gizmo')]}),
|
||||
("wm.context_toggle", {"type": 'Z', "value": 'PRESS', "alt": True, "shift": True},
|
||||
{"properties": [("data_path", "space_data.overlay.show_overlays")]}),
|
||||
*_template_items_context_menu("IMAGE_MT_uvs_context_menu", params.context_menu_event),
|
||||
])
|
||||
|
||||
@@ -1968,6 +1970,8 @@ def km_image(params):
|
||||
("image.clear_render_border", {"type": 'B', "value": 'PRESS', "ctrl": True, "alt": True}, None),
|
||||
("wm.context_toggle", {"type": 'ACCENT_GRAVE', "value": 'PRESS', "ctrl": True},
|
||||
{"properties": [("data_path", 'space_data.show_gizmo')]}),
|
||||
("wm.context_toggle", {"type": 'Z', "value": 'PRESS', "alt": True, "shift": True},
|
||||
{"properties": [("data_path", "space_data.overlay.show_overlays")]}),
|
||||
*_template_items_context_menu("IMAGE_MT_mask_context_menu", params.context_menu_event),
|
||||
])
|
||||
|
||||
@@ -2914,6 +2918,8 @@ def km_sequencer(params):
|
||||
{"properties": [("side", 'RIGHT')]}),
|
||||
("wm.context_toggle", {"type": 'TAB', "value": 'PRESS', "shift": True},
|
||||
{"properties": [("data_path", 'tool_settings.use_snap_sequencer')]}),
|
||||
("wm.context_toggle", {"type": 'Z', "value": 'PRESS', "alt": True, "shift": True},
|
||||
{"properties": [("data_path", "space_data.show_overlays")]}),
|
||||
*_template_items_context_menu("SEQUENCER_MT_context_menu", params.context_menu_event),
|
||||
])
|
||||
|
||||
@@ -6338,6 +6344,8 @@ def km_node_link_modal_map(_params):
|
||||
return keymap
|
||||
|
||||
# Fallback for gizmos that don't have custom a custom key-map.
|
||||
|
||||
|
||||
def km_generic_gizmo(_params):
|
||||
keymap = (
|
||||
"Generic Gizmo",
|
||||
|
@@ -338,6 +338,7 @@ class NODE_MT_geometry_node_GEO_MESH_READ(Menu):
|
||||
node_add_menu.add_node_type(layout, "GeometryNodeInputMeshEdgeAngle")
|
||||
node_add_menu.add_node_type(layout, "GeometryNodeInputMeshEdgeNeighbors")
|
||||
node_add_menu.add_node_type(layout, "GeometryNodeInputMeshEdgeVertices")
|
||||
node_add_menu.add_node_type(layout, "GeometryNodeEdgesToFaceGroups")
|
||||
node_add_menu.add_node_type(layout, "GeometryNodeInputMeshFaceArea")
|
||||
node_add_menu.add_node_type(layout, "GeometryNodeInputMeshFaceNeighbors")
|
||||
node_add_menu.add_node_type(layout, "GeometryNodeMeshFaceSetBoundaries")
|
||||
|
@@ -469,6 +469,11 @@ class RENDER_PT_eevee_next_shadows(RenderButtonsPanel, Panel):
|
||||
def poll(cls, context):
|
||||
return (context.engine in cls.COMPAT_ENGINES)
|
||||
|
||||
def draw_header(self, context):
|
||||
scene = context.scene
|
||||
props = scene.eevee
|
||||
self.layout.prop(props, "use_shadows", text="")
|
||||
|
||||
def draw(self, context):
|
||||
layout = self.layout
|
||||
layout.use_property_split = True
|
||||
|
@@ -1996,7 +1996,7 @@ class SEQUENCER_PT_adjust_sound(SequencerButtonsPanel, Panel):
|
||||
|
||||
split = col.split(factor=0.4)
|
||||
split.alignment = 'RIGHT'
|
||||
split.label(text="Pan", heading_ctxt=i18n_contexts.id_sound)
|
||||
split.label(text="Pan", text_ctxt=i18n_contexts.id_sound)
|
||||
split.prop(strip, "pan", text="")
|
||||
split.enabled = pan_enabled
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -2267,7 +2267,7 @@ class ExperimentalPanel:
|
||||
bl_region_type = 'WINDOW'
|
||||
bl_context = "experimental"
|
||||
|
||||
url_prefix = "https://developer.blender.org/"
|
||||
url_prefix = "https://projects.blender.org/"
|
||||
|
||||
@classmethod
|
||||
def poll(cls, _context):
|
||||
@@ -2308,8 +2308,8 @@ class USERPREF_PT_experimental_virtual_reality(ExperimentalPanel, Panel):
|
||||
def draw(self, context):
|
||||
self._draw_items(
|
||||
context, (
|
||||
({"property": "use_virtual_reality_scene_inspection"}, "T71347"),
|
||||
({"property": "use_virtual_reality_immersive_drawing"}, "T71348"),
|
||||
({"property": "use_virtual_reality_scene_inspection"}, ("blender/blender/issues/71347", "#71347")),
|
||||
({"property": "use_virtual_reality_immersive_drawing"}, ("blender/blender/issues/71348", "#71348")),
|
||||
),
|
||||
)
|
||||
"""
|
||||
@@ -2319,13 +2319,18 @@ class USERPREF_PT_experimental_new_features(ExperimentalPanel, Panel):
|
||||
bl_label = "New Features"
|
||||
|
||||
def draw(self, context):
|
||||
self._draw_items(
|
||||
context, (
|
||||
({"property": "use_sculpt_tools_tilt"}, "T82877"),
|
||||
({"property": "use_extended_asset_browser"}, ("project/view/130/", "Project Page")),
|
||||
({"property": "use_override_templates"}, ("T73318", "Milestone 4")),
|
||||
),
|
||||
)
|
||||
self._draw_items(context,
|
||||
(({"property": "use_sculpt_tools_tilt"},
|
||||
("blender/blender/issues/82877",
|
||||
"#82877")),
|
||||
({"property": "use_extended_asset_browser"},
|
||||
("blender/blender/projects/10",
|
||||
"Pipeline, Assets & IO Project Page")),
|
||||
({"property": "use_override_templates"},
|
||||
("blender/blender/issues/73318",
|
||||
"Milestone 4")),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class USERPREF_PT_experimental_prototypes(ExperimentalPanel, Panel):
|
||||
@@ -2334,12 +2339,12 @@ class USERPREF_PT_experimental_prototypes(ExperimentalPanel, Panel):
|
||||
def draw(self, context):
|
||||
self._draw_items(
|
||||
context, (
|
||||
({"property": "use_new_curves_tools"}, "T68981"),
|
||||
({"property": "use_new_point_cloud_type"}, "T75717"),
|
||||
({"property": "use_sculpt_texture_paint"}, "T96225"),
|
||||
({"property": "use_full_frame_compositor"}, "T88150"),
|
||||
({"property": "enable_eevee_next"}, "T93220"),
|
||||
({"property": "enable_workbench_next"}, "T101619"),
|
||||
({"property": "use_new_curves_tools"}, ("blender/blender/issues/68981", "#68981")),
|
||||
({"property": "use_new_point_cloud_type"}, ("blender/blender/issues/75717", "#75717")),
|
||||
({"property": "use_sculpt_texture_paint"}, ("blender/blender/issues/96225", "#96225")),
|
||||
({"property": "use_full_frame_compositor"}, ("blender/blender/issues/88150", "#88150")),
|
||||
({"property": "enable_eevee_next"}, ("blender/blender/issues/93220", "#93220")),
|
||||
({"property": "enable_workbench_next"}, ("blender/blender/issues/101619", "#101619")),
|
||||
),
|
||||
)
|
||||
|
||||
@@ -2352,7 +2357,7 @@ class USERPREF_PT_experimental_tweaks(ExperimentalPanel, Panel):
|
||||
def draw(self, context):
|
||||
self._draw_items(
|
||||
context, (
|
||||
({"property": "use_select_nearest_on_first_click"}, "T96752"),
|
||||
({"property": "use_select_nearest_on_first_click"}, ("blender/blender/issues/96752", "#96752")),
|
||||
),
|
||||
)
|
||||
|
||||
@@ -2371,8 +2376,8 @@ class USERPREF_PT_experimental_debugging(ExperimentalPanel, Panel):
|
||||
def draw(self, context):
|
||||
self._draw_items(
|
||||
context, (
|
||||
({"property": "use_undo_legacy"}, "T60695"),
|
||||
({"property": "override_auto_resync"}, "T83811"),
|
||||
({"property": "use_undo_legacy"}, ("blender/blender/issues/60695", "#60695")),
|
||||
({"property": "override_auto_resync"}, ("blender/blender/issues/83811", "#83811")),
|
||||
({"property": "use_cycles_debug"}, None),
|
||||
({"property": "show_asset_debug_info"}, None),
|
||||
({"property": "use_asset_indexing"}, None),
|
||||
|
@@ -96,8 +96,8 @@ Chat <a href="https://blender.chat/channel/today">
|
||||
<p class="p5">
|
||||
<span class="s3">Development <a href="https://www.blender.org/get-involved/developers/">
|
||||
<span class="s4">www.blender.org/get-involved/developers/</span></a><br>
|
||||
GIT and Bug Tracker <a href="https://developer.blender.org/">
|
||||
<span class="s4">developer.blender.org</span></a><br>
|
||||
GIT and Bug Tracker <a href="https://projects.blender.org/">
|
||||
<span class="s4">projects.blender.org</span></a><br>
|
||||
Chat <a href="https://blender.chat/channel/blender-coders">
|
||||
<span class="s4">#blender-coders</span></a> on blender.chat</span>
|
||||
</p>
|
||||
|
@@ -3,7 +3,7 @@ echo Starting blender with GPU debugging options, log files will be created
|
||||
echo in your temp folder, windows explorer will open after you close blender
|
||||
echo to help you find them.
|
||||
echo.
|
||||
echo If you report a bug on https://developer.blender.org you can attach these files
|
||||
echo If you report a bug on https://projects.blender.org you can attach these files
|
||||
echo by dragging them into the text area of your bug report, please include both
|
||||
echo blender_debug_output.txt and blender_system_info.txt in your report.
|
||||
echo.
|
||||
|
@@ -3,7 +3,7 @@ echo Starting blender with GPU debugging and glitch workaround options, log file
|
||||
echo will be created in your temp folder, windows explorer will open after you
|
||||
echo close blender to help you find them.
|
||||
echo.
|
||||
echo If you report a bug on https://developer.blender.org you can attach these files
|
||||
echo If you report a bug on https://projects.blender.org you can attach these files
|
||||
echo by dragging them into the text area of your bug report, please include both
|
||||
echo blender_debug_output.txt and blender_system_info.txt in your report.
|
||||
echo.
|
||||
|
@@ -3,7 +3,7 @@ echo Starting blender with debug logging options, log files will be created
|
||||
echo in your temp folder, windows explorer will open after you close blender
|
||||
echo to help you find them.
|
||||
echo.
|
||||
echo If you report a bug on https://developer.blender.org you can attach these files
|
||||
echo If you report a bug on https://projects.blender.org you can attach these files
|
||||
echo by dragging them into the text area of your bug report, please include both
|
||||
echo blender_debug_output.txt and blender_system_info.txt in your report.
|
||||
echo.
|
||||
|
@@ -3,7 +3,7 @@ echo Starting blender with factory settings, log files will be created
|
||||
echo in your temp folder, windows explorer will open after you close blender
|
||||
echo to help you find them.
|
||||
echo.
|
||||
echo If you report a bug on https://developer.blender.org you can attach these files
|
||||
echo If you report a bug on https://projects.blender.org you can attach these files
|
||||
echo by dragging them into the text area of your bug report, please include both
|
||||
echo blender_debug_output.txt and blender_system_info.txt in your report.
|
||||
echo.
|
||||
|
@@ -40,7 +40,7 @@ typedef int32_t ft_pix;
|
||||
/* Macros copied from `include/freetype/internal/ftobjs.h`. */
|
||||
|
||||
/**
|
||||
* FIXME(@campbellbarton): Follow rounding from Blender 3.1x and older.
|
||||
* FIXME(@ideasman42): Follow rounding from Blender 3.1x and older.
|
||||
* This is what users will expect and changing this creates wider spaced text.
|
||||
* Use this macro to communicate that rounding should be used, using floor is to avoid
|
||||
* user visible changes, which can be reviewed and handled separately.
|
||||
|
@@ -6,8 +6,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "BLI_float3x3.hh"
|
||||
#include "BLI_math_vector_types.hh"
|
||||
#include "BLI_math_matrix.hh"
|
||||
#include "BLI_span.hh"
|
||||
|
||||
struct Depsgraph;
|
||||
@@ -38,7 +37,7 @@ struct GeometryDeformation {
|
||||
return translation;
|
||||
}
|
||||
const float3x3 &deform_mat = this->deform_mats[position_i];
|
||||
return deform_mat.inverted() * translation;
|
||||
return math::transform_point(math::invert(deform_mat), translation);
|
||||
}
|
||||
};
|
||||
|
||||
|
@@ -2,31 +2,25 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "BKE_curves.h"
|
||||
|
||||
/** \file
|
||||
* \ingroup bke
|
||||
* \brief Low-level operations for curves.
|
||||
*/
|
||||
|
||||
#include <mutex>
|
||||
|
||||
#include "BLI_bounds_types.hh"
|
||||
#include "BLI_cache_mutex.hh"
|
||||
#include "BLI_float3x3.hh"
|
||||
#include "BLI_float4x4.hh"
|
||||
#include "BLI_generic_virtual_array.hh"
|
||||
#include "BLI_index_mask.hh"
|
||||
#include "BLI_math_matrix_types.hh"
|
||||
#include "BLI_math_vector_types.hh"
|
||||
#include "BLI_offset_indices.hh"
|
||||
#include "BLI_shared_cache.hh"
|
||||
#include "BLI_span.hh"
|
||||
#include "BLI_task.hh"
|
||||
#include "BLI_vector.hh"
|
||||
#include "BLI_virtual_array.hh"
|
||||
|
||||
#include "BKE_attribute.hh"
|
||||
#include "BKE_attribute_math.hh"
|
||||
#include "BKE_curves.h"
|
||||
|
||||
namespace blender::bke {
|
||||
|
||||
|
@@ -2,7 +2,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "BLI_float4x4.hh"
|
||||
#include "BLI_math_matrix_types.hh"
|
||||
|
||||
#include "BKE_geometry_set.hh"
|
||||
|
||||
|
@@ -19,7 +19,7 @@
|
||||
|
||||
#include <mutex>
|
||||
|
||||
#include "BLI_float4x4.hh"
|
||||
#include "BLI_math_matrix_types.hh"
|
||||
#include "BLI_vector.hh"
|
||||
#include "BLI_vector_set.hh"
|
||||
|
||||
|
@@ -497,6 +497,7 @@ void BKE_lib_id_expand_local(struct Main *bmain, struct ID *id, int flags);
|
||||
*
|
||||
* Only for local IDs (linked ones already have a unique ID in their library).
|
||||
*
|
||||
* \param name: The new name of the given ID, if NULL the current given ID name is used instead.
|
||||
* \param do_linked_data: if true, also ensure a unique name in case the given \a id is linked
|
||||
* (otherwise, just ensure that it is properly sorted).
|
||||
*
|
||||
|
@@ -142,6 +142,9 @@ enum {
|
||||
/** Also process internal ID pointers like `ID.newid` or `ID.orig_id`.
|
||||
* WARNING: Dangerous, use with caution. */
|
||||
IDWALK_DO_INTERNAL_RUNTIME_POINTERS = (1 << 9),
|
||||
/** Also process the ID.lib pointer. It is an option because this pointer can usually be fully
|
||||
ignored. */
|
||||
IDWALK_DO_LIBRARY_POINTER = (1 << 10),
|
||||
};
|
||||
|
||||
typedef struct LibraryForeachIDData LibraryForeachIDData;
|
||||
|
@@ -222,8 +222,12 @@ void BKE_id_remapper_clear(struct IDRemapper *id_remapper);
|
||||
bool BKE_id_remapper_is_empty(const struct IDRemapper *id_remapper);
|
||||
/** Free the given ID Remapper. */
|
||||
void BKE_id_remapper_free(struct IDRemapper *id_remapper);
|
||||
/** Add a new remapping. */
|
||||
/** Add a new remapping. Does not replace an existing mapping for `old_id`, if any. */
|
||||
void BKE_id_remapper_add(struct IDRemapper *id_remapper, struct ID *old_id, struct ID *new_id);
|
||||
/** Add a new remapping, replacing a potential already existing mapping of `old_id`. */
|
||||
void BKE_id_remapper_add_overwrite(struct IDRemapper *id_remapper,
|
||||
struct ID *old_id,
|
||||
struct ID *new_id);
|
||||
|
||||
/**
|
||||
* Apply a remapping.
|
||||
|
@@ -425,7 +425,7 @@ int set_listbasepointers(struct Main *main, struct ListBase *lb[]);
|
||||
/**
|
||||
* The size of thumbnails (optionally) stored in the `.blend` files header.
|
||||
*
|
||||
* NOTE(@campbellbarton): This is kept small as it's stored uncompressed in the `.blend` file,
|
||||
* NOTE(@ideasman42): This is kept small as it's stored uncompressed in the `.blend` file,
|
||||
* where a larger size would increase the size of every `.blend` file unreasonably.
|
||||
* If we wanted to increase the size, we'd want to use compression (JPEG or similar).
|
||||
*/
|
||||
|
@@ -29,6 +29,15 @@ struct UniqueName_Map;
|
||||
struct UniqueName_Map *BKE_main_namemap_create(void) ATTR_WARN_UNUSED_RESULT;
|
||||
void BKE_main_namemap_destroy(struct UniqueName_Map **r_name_map) ATTR_NONNULL();
|
||||
|
||||
/**
|
||||
* Destroy all name_maps in given bmain:
|
||||
* - In bmain itself for local IDs.
|
||||
* - In the split bmains in the list is any (for linked IDs in some cases, e.g. if called during
|
||||
* readfile code).
|
||||
* - In all of the libraries IDs (for linked IDs).
|
||||
*/
|
||||
void BKE_main_namemap_clear(struct Main *bmain) ATTR_NONNULL();
|
||||
|
||||
/**
|
||||
* Ensures the given name is unique within the given ID type.
|
||||
*
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user