1
1

Compare commits

..

14 Commits

1329 changed files with 12143 additions and 25811 deletions

8
.arcconfig Normal file
View File

@@ -0,0 +1,8 @@
{
"project_id" : "Blender",
"conduit_uri" : "https://developer.blender.org/",
"phabricator.uri" : "https://developer.blender.org/",
"git.default-relative-commit" : "origin/master",
"arc.land.update.default" : "rebase",
"arc.land.onto.default" : "master"
}

View File

@@ -236,8 +236,6 @@ ForEachMacros:
- LOOP_UNSELECTED_POINTS
- LOOP_VISIBLE_KEYS
- LOOP_VISIBLE_POINTS
- LIGHT_FOREACH_BEGIN_DIRECTIONAL
- LIGHT_FOREACH_BEGIN_LOCAL
- LISTBASE_CIRCULAR_BACKWARD_BEGIN
- LISTBASE_CIRCULAR_FORWARD_BEGIN
- LISTBASE_FOREACH

View File

@@ -1,4 +1,5 @@
This repository is only used as a mirror. Blender development happens on projects.blender.org.
This repository is only used as a mirror of git.blender.org. Blender development happens on
https://developer.blender.org.
To get started with contributing code, please see:
https://wiki.blender.org/wiki/Process/Contributing_Code

3
.github/stale.yml vendored
View File

@@ -15,7 +15,8 @@ staleLabel: stale
# Comment to post when closing a stale Issue or Pull Request.
closeComment: >
This issue has been automatically closed, because this repository is only
used as a mirror. Blender development happens on projects.blender.org.
used as a mirror of git.blender.org. Blender development happens on
developer.blender.org.
To get started contributing code, please read:
https://wiki.blender.org/wiki/Process/Contributing_Code

8
.gitmodules vendored
View File

@@ -1,20 +1,20 @@
[submodule "release/scripts/addons"]
path = release/scripts/addons
url = ../blender-addons.git
branch = main
branch = master
ignore = all
[submodule "release/scripts/addons_contrib"]
path = release/scripts/addons_contrib
url = ../blender-addons-contrib.git
branch = main
branch = master
ignore = all
[submodule "release/datafiles/locale"]
path = release/datafiles/locale
url = ../blender-translations.git
branch = main
branch = master
ignore = all
[submodule "source/tools"]
path = source/tools
url = ../blender-dev-tools.git
branch = main
branch = master
ignore = all

View File

@@ -524,7 +524,7 @@ endif()
if(NOT APPLE)
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" ON)
option(WITH_CYCLES_HIP_BINARIES "Build Cycles AMD HIP binaries" OFF)
set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx906 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
set(CYCLES_HIP_BINARIES_ARCH gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
endif()
@@ -625,10 +625,8 @@ mark_as_advanced(
# Vulkan
option(WITH_VULKAN_BACKEND "Enable Vulkan as graphics backend (only for development)" OFF)
option(WITH_VULKAN_GUARDEDALLOC "Use guardedalloc for host allocations done inside Vulkan (development option)" OFF)
mark_as_advanced(
WITH_VULKAN_BACKEND
WITH_VULKAN_GUARDEDALLOC
)
# Metal

View File

@@ -299,11 +299,7 @@ else
ifneq ("$(wildcard $(DEPS_BUILD_DIR)/build.ninja)","")
DEPS_BUILD_COMMAND:=ninja
else
ifeq ($(OS), Darwin)
DEPS_BUILD_COMMAND:=make -s
else
DEPS_BUILD_COMMAND:="$(BLENDER_DIR)/build_files/build_environment/linux/make_deps_wrapper.sh" -s
endif
DEPS_BUILD_COMMAND:=make -s
endif
endif
@@ -402,7 +398,7 @@ endif
deps: .FORCE
@echo
@echo Configuring dependencies in \"$(DEPS_BUILD_DIR)\", install to \"$(DEPS_INSTALL_DIR)\"
@echo Configuring dependencies in \"$(DEPS_BUILD_DIR)\"
@cmake -H"$(DEPS_SOURCE_DIR)" \
-B"$(DEPS_BUILD_DIR)" \

View File

@@ -24,7 +24,7 @@ Development
-----------
- [Build Instructions](https://wiki.blender.org/wiki/Building_Blender)
- [Code Review & Bug Tracker](https://projects.blender.org)
- [Code Review & Bug Tracker](https://developer.blender.org)
- [Developer Forum](https://devtalk.blender.org)
- [Developer Documentation](https://wiki.blender.org)

View File

@@ -10,7 +10,7 @@ ExternalProject_Add(external_epoxy
URL_HASH ${EPOXY_HASH_TYPE}=${EPOXY_HASH}
PREFIX ${BUILD_DIR}/epoxy
PATCH_COMMAND ${PATCH_CMD} -p 1 -N -d ${BUILD_DIR}/epoxy/src/external_epoxy/ < ${PATCH_DIR}/epoxy.diff
CONFIGURE_COMMAND ${CONFIGURE_ENV} && ${MESON} setup --prefix ${LIBDIR}/epoxy --default-library ${EPOXY_LIB_TYPE} --libdir lib ${BUILD_DIR}/epoxy/src/external_epoxy-build ${BUILD_DIR}/epoxy/src/external_epoxy -Dtests=false ${MESON_BUILD_TYPE}
CONFIGURE_COMMAND ${CONFIGURE_ENV} && ${MESON} setup --prefix ${LIBDIR}/epoxy --default-library ${EPOXY_LIB_TYPE} --libdir lib ${BUILD_DIR}/epoxy/src/external_epoxy-build ${BUILD_DIR}/epoxy/src/external_epoxy -Dtests=false
BUILD_COMMAND ninja
INSTALL_COMMAND ninja install
)

View File

@@ -9,7 +9,7 @@ ExternalProject_Add(external_fribidi
URL_HASH ${FRIBIDI_HASH_TYPE}=${FRIBIDI_HASH}
DOWNLOAD_DIR ${DOWNLOAD_DIR}
PREFIX ${BUILD_DIR}/fribidi
CONFIGURE_COMMAND ${MESON} setup --prefix ${LIBDIR}/fribidi ${MESON_BUILD_TYPE} -Ddocs=false --default-library static --libdir lib ${BUILD_DIR}/fribidi/src/external_fribidi-build ${BUILD_DIR}/fribidi/src/external_fribidi
CONFIGURE_COMMAND ${MESON} setup --prefix ${LIBDIR}/fribidi -Ddocs=false --default-library static --libdir lib ${BUILD_DIR}/fribidi/src/external_fribidi-build ${BUILD_DIR}/fribidi/src/external_fribidi
BUILD_COMMAND ninja
INSTALL_COMMAND ninja install
INSTALL_DIR ${LIBDIR}/fribidi

View File

@@ -22,7 +22,7 @@ elseif(UNIX AND NOT APPLE)
)
endif()
# Boolean crashes with Arm assembly, see #103423.
# Boolean crashes with Arm assembly, see T103423.
if(BLENDER_PLATFORM_ARM)
set(GMP_OPTIONS
${GMP_OPTIONS}

View File

@@ -21,7 +21,6 @@ set(HARFBUZZ_EXTRA_OPTIONS
# Only used for command line utilities,
# disable as this would add an addition & unnecessary build-dependency.
-Dcairo=disabled
${MESON_BUILD_TYPE}
)
ExternalProject_Add(external_harfbuzz
@@ -60,10 +59,3 @@ if(BUILD_MODE STREQUAL Release AND WIN32)
DEPENDEES install
)
endif()
if(BUILD_MODE STREQUAL Debug AND WIN32)
ExternalProject_Add_Step(external_harfbuzz after_install
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/harfbuzz/lib/libharfbuzz.a ${HARVEST_TARGET}/harfbuzz/lib/libharfbuzz_d.lib
DEPENDEES install
)
endif()

View File

@@ -40,8 +40,7 @@ ExternalProject_Add(external_igc_llvm
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0004-OpenCL-support-cl_ext_float_atomics.patch &&
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0005-OpenCL-Add-cl_khr_integer_dot_product.patch &&
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0001-Memory-leak-fix-for-Managed-Static-Mutex.patch &&
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0002-Remove-repo-name-in-LLVM-IR.patch &&
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0003-Add-missing-include-limit-in-benchmark.patch
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0002-Remove-repo-name-in-LLVM-IR.patch
)
add_dependencies(
external_igc_llvm
@@ -56,6 +55,9 @@ ExternalProject_Add(external_igc_spirv_translator
CONFIGURE_COMMAND echo .
BUILD_COMMAND echo .
INSTALL_COMMAND echo .
PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${IGC_SPIRV_TRANSLATOR_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/spirv/0001-update-SPIR-V-headers-for-SPV_INTEL_split_barrier.patch &&
${PATCH_CMD} -p 1 -d ${IGC_SPIRV_TRANSLATOR_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/spirv/0002-Add-support-for-split-barriers-extension-SPV_INTEL_s.patch &&
${PATCH_CMD} -p 1 -d ${IGC_SPIRV_TRANSLATOR_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/spirv/0003-Support-cl_bf16_conversions.patch
)
add_dependencies(
external_igc_spirv_translator

View File

@@ -15,7 +15,7 @@ llvm-config = '${LIBDIR}/llvm/bin/llvm-config'"
)
set(MESA_EXTRA_FLAGS
${MESON_BUILD_TYPE}
-Dbuildtype=release
-Dc_args=${MESA_CFLAGS}
-Dcpp_args=${MESA_CXXFLAGS}
-Dc_link_args=${MESA_LDFLAGS}

View File

@@ -16,10 +16,8 @@ message("BuildMode = ${BUILD_MODE}")
if(BUILD_MODE STREQUAL "Debug")
set(LIBDIR ${CMAKE_CURRENT_BINARY_DIR}/Debug)
set(MESON_BUILD_TYPE -Dbuildtype=debug)
else()
set(LIBDIR ${CMAKE_CURRENT_BINARY_DIR}/Release)
set(MESON_BUILD_TYPE -Dbuildtype=release)
endif()
set(DOWNLOAD_DIR "${CMAKE_CURRENT_BINARY_DIR}/downloads" CACHE STRING "Path for downloaded files")

View File

@@ -88,19 +88,6 @@ else()
export LDFLAGS=${PYTHON_LDFLAGS} &&
export PKG_CONFIG_PATH=${LIBDIR}/ffi/lib/pkgconfig)
# NOTE: untested on APPLE so far.
if(NOT APPLE)
set(PYTHON_CONFIGURE_EXTRA_ARGS
${PYTHON_CONFIGURE_EXTRA_ARGS}
# Used on most release Linux builds (Fedora for e.g.),
# increases build times noticeably with the benefit of a modest speedup at runtime.
--enable-optimizations
# While LTO is OK when building on the same system, it's incompatible across GCC versions,
# making it impractical for developers to build against, so keep it disabled.
# `--with-lto`
)
endif()
ExternalProject_Add(external_python
URL file://${PACKAGE_DIR}/${PYTHON_FILE}
DOWNLOAD_DIR ${DOWNLOAD_DIR}

View File

@@ -668,9 +668,9 @@ set(SPIRV_HEADERS_FILE SPIR-V-Headers-${SPIRV_HEADERS_VERSION}.tar.gz)
# compiler, the versions used are taken from the following location
# https://github.com/intel/intel-graphics-compiler/releases
set(IGC_VERSION 1.0.13064.7)
set(IGC_VERSION 1.0.12149.1)
set(IGC_URI https://github.com/intel/intel-graphics-compiler/archive/refs/tags/igc-${IGC_VERSION}.tar.gz)
set(IGC_HASH a929abd4cca2b293961ec0437ee4b3b2147bd3b2c8a3c423af78c0c359b2e5ae)
set(IGC_HASH 44f67f24e3bc5130f9f062533abf8154782a9d0a992bc19b498639a8521ae836)
set(IGC_HASH_TYPE SHA256)
set(IGC_FILE igc-${IGC_VERSION}.tar.gz)
@@ -690,15 +690,15 @@ set(IGC_LLVM_FILE ${IGC_LLVM_VERSION}.tar.gz)
#
# WARNING WARNING WARNING
set(IGC_OPENCL_CLANG_VERSION ee31812ea8b89d08c2918f045d11a19bd33525c5)
set(IGC_OPENCL_CLANG_VERSION 363a5262d8c7cff3fb28f3bdb5d85c8d7e91c1bb)
set(IGC_OPENCL_CLANG_URI https://github.com/intel/opencl-clang/archive/${IGC_OPENCL_CLANG_VERSION}.tar.gz)
set(IGC_OPENCL_CLANG_HASH 1db6735bbcfaa31e8a9ba39f121d6bafa806ea8919e9f56782d6aaa67771ddda)
set(IGC_OPENCL_CLANG_HASH aa8cf72bb239722ce8ce44f79413c6887ecc8ca18477dd520aa5c4809756da9a)
set(IGC_OPENCL_CLANG_HASH_TYPE SHA256)
set(IGC_OPENCL_CLANG_FILE opencl-clang-${IGC_OPENCL_CLANG_VERSION}.tar.gz)
set(IGC_VCINTRINSICS_VERSION v0.11.0)
set(IGC_VCINTRINSICS_VERSION v0.5.0)
set(IGC_VCINTRINSICS_URI https://github.com/intel/vc-intrinsics/archive/refs/tags/${IGC_VCINTRINSICS_VERSION}.tar.gz)
set(IGC_VCINTRINSICS_HASH e5acd5626ce7fa6d41ce154c50ac805eda734ee66af94ef28e680ac2ad81bb9f)
set(IGC_VCINTRINSICS_HASH 70bb47c5e32173cf61514941e83ae7c7eb4485e6d2fca60cfa1f50d4f42c41f2)
set(IGC_VCINTRINSICS_HASH_TYPE SHA256)
set(IGC_VCINTRINSICS_FILE vc-intrinsics-${IGC_VCINTRINSICS_VERSION}.tar.gz)
@@ -714,9 +714,9 @@ set(IGC_SPIRV_TOOLS_HASH 6e19900e948944243024aedd0a201baf3854b377b9cc7a386553bc1
set(IGC_SPIRV_TOOLS_HASH_TYPE SHA256)
set(IGC_SPIRV_TOOLS_FILE SPIR-V-Tools-${IGC_SPIRV_TOOLS_VERSION}.tar.gz)
set(IGC_SPIRV_TRANSLATOR_VERSION d739c01d65ec00dee64dedd40deed805216a7193)
set(IGC_SPIRV_TRANSLATOR_VERSION a31ffaeef77e23d500b3ea3d35e0c42ff5648ad9)
set(IGC_SPIRV_TRANSLATOR_URI https://github.com/KhronosGroup/SPIRV-LLVM-Translator/archive/${IGC_SPIRV_TRANSLATOR_VERSION}.tar.gz)
set(IGC_SPIRV_TRANSLATOR_HASH ddc0cc9ccbe59dadeaf291012d59de142b2e9f2b124dbb634644d39daddaa13e)
set(IGC_SPIRV_TRANSLATOR_HASH 9e26c96a45341b8f8af521bacea20e752623346340addd02af95d669f6e89252)
set(IGC_SPIRV_TRANSLATOR_HASH_TYPE SHA256)
set(IGC_SPIRV_TRANSLATOR_FILE SPIR-V-Translator-${IGC_SPIRV_TRANSLATOR_VERSION}.tar.gz)
@@ -724,15 +724,15 @@ set(IGC_SPIRV_TRANSLATOR_FILE SPIR-V-Translator-${IGC_SPIRV_TRANSLATOR_VERSION}.
### Intel Graphics Compiler DEPS END ###
########################################
set(GMMLIB_VERSION intel-gmmlib-22.3.0)
set(GMMLIB_VERSION intel-gmmlib-22.1.8)
set(GMMLIB_URI https://github.com/intel/gmmlib/archive/refs/tags/${GMMLIB_VERSION}.tar.gz)
set(GMMLIB_HASH c1f33e1519edfc527127baeb0436b783430dfd256c643130169a3a71dc86aff9)
set(GMMLIB_HASH bf23e9a3742b4fb98c7666c9e9b29f3219e4b2fb4d831aaf4eed71f5e2d17368)
set(GMMLIB_HASH_TYPE SHA256)
set(GMMLIB_FILE ${GMMLIB_VERSION}.tar.gz)
set(OCLOC_VERSION 22.49.25018.21)
set(OCLOC_VERSION 22.38.24278)
set(OCLOC_URI https://github.com/intel/compute-runtime/archive/refs/tags/${OCLOC_VERSION}.tar.gz)
set(OCLOC_HASH 92362dae08b503a34e5d3820ed284198c452bcd5e7504d90eb69887b20492c06)
set(OCLOC_HASH db0c542fccd651e6404b15a74d46027f1ce0eda8dc9e25a40cbb6c0faef257ee)
set(OCLOC_HASH_TYPE SHA256)
set(OCLOC_FILE ocloc-${OCLOC_VERSION}.tar.gz)

View File

@@ -13,7 +13,7 @@ ExternalProject_Add(external_wayland
# NOTE: `-lm` is needed for `libxml2` which is a static library that uses `libm.so`,
# without this, math symbols such as `floor` aren't found.
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env PKG_CONFIG_PATH=${LIBDIR}/expat/lib/pkgconfig:${LIBDIR}/xml2/lib/pkgconfig:${LIBDIR}/ffi/lib/pkgconfig:$PKG_CONFIG_PATH
${MESON} --prefix ${LIBDIR}/wayland ${MESON_BUILD_TYPE} -Ddocumentation=false -Dtests=false -D "c_link_args=-L${LIBDIR}/ffi/lib -lm" . ../external_wayland
${MESON} --prefix ${LIBDIR}/wayland -Ddocumentation=false -Dtests=false -D "c_link_args=-L${LIBDIR}/ffi/lib -lm" . ../external_wayland
BUILD_COMMAND ninja
INSTALL_COMMAND ninja install
)

View File

@@ -7,7 +7,7 @@ ExternalProject_Add(external_wayland_protocols
PREFIX ${BUILD_DIR}/wayland-protocols
# Use `-E` so the `PKG_CONFIG_PATH` can be defined to link against our own WAYLAND.
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env PKG_CONFIG_PATH=${LIBDIR}/wayland/lib64/pkgconfig:$PKG_CONFIG_PATH
${MESON} --prefix ${LIBDIR}/wayland-protocols ${MESON_BUILD_TYPE} . ../external_wayland_protocols -Dtests=false
${MESON} --prefix ${LIBDIR}/wayland-protocols . ../external_wayland_protocols -Dtests=false
BUILD_COMMAND ninja
INSTALL_COMMAND ninja install
)

View File

@@ -1,74 +0,0 @@
#!/usr/bin/env bash
# SPDX-License-Identifier: GPL-2.0-or-later
# This script ensures:
# - One dependency is built at a time.
# - That dependency uses all available cores.
#
# Without this, simply calling `make -j$(nproc)` from the `${CMAKE_BUILD_DIR}/deps/`
# directory will build many projects at once.
#
# This is undesirable for the following reasons:
#
# - The output from projects is mixed together,
# making it difficult to track down the cause of a build failure.
#
# - Larger dependencies such as LLVM can bottleneck the build process,
# making it necessary to cancel the build and manually run build commands in each directory.
#
# - Building many projects at once means canceling (Control-C) can lead to the build being in an undefined state.
# It's possible canceling happens as a patch is being applied or files are being copied.
# (steps that aren't part of the compilation process where it's typically safe to cancel).
if [[ -z "$MY_MAKE_CALL_LEVEL" ]]; then
export MY_MAKE_CALL_LEVEL=0
export MY_MAKEFLAGS=$MAKEFLAGS
# Extract the jobs argument (`-jN`, `-j N`, `--jobs=N`).
add_next=0
for i in "$@"; do
case $i in
-j*)
export MY_JOBS_ARG=$i
if [ "$MY_JOBS_ARG" = "-j" ]; then
add_next=1
fi
;;
--jobs=*)
shift # past argument=value
MY_JOBS_ARG=$i
;;
*)
if (( add_next == 1 )); then
MY_JOBS_ARG="$MY_JOBS_ARG $i"
add_next=0
fi
;;
esac
done
unset i add_next
if [[ -z "$MY_JOBS_ARG" ]]; then
MY_JOBS_ARG="-j$(nproc)"
fi
export MY_JOBS_ARG
# Support user defined `MAKEFLAGS`.
export MAKEFLAGS="$MY_MAKEFLAGS -j1"
else
export MY_MAKE_CALL_LEVEL=$(( MY_MAKE_CALL_LEVEL + 1 ))
if (( MY_MAKE_CALL_LEVEL == 1 )); then
# Important to set jobs to 1, otherwise user defined jobs argument is used.
export MAKEFLAGS="$MY_MAKEFLAGS -j1"
elif (( MY_MAKE_CALL_LEVEL == 2 )); then
# This is the level used by each sub-project.
export MAKEFLAGS="$MY_MAKEFLAGS $MY_JOBS_ARG"
fi
# Else leave `MY_MAKEFLAGS` flags as-is, avoids setting a high number of jobs on recursive
# calls (which may easily run out of memory). Let the job-server handle the rest.
fi
# Useful for troubleshooting the wrapper.
# echo "Call level: $MY_MAKE_CALL_LEVEL, args=$@".
# Call actual make but ensure recursive calls run via this script.
exec make MAKE="$0" "$@"

View File

@@ -1,7 +1,7 @@
diff -Naur external_igc_opencl_clang.orig/CMakeLists.txt external_igc_opencl_clang/CMakeLists.txt
--- external_igc_opencl_clang.orig/CMakeLists.txt 2022-03-16 05:51:10 -0600
+++ external_igc_opencl_clang/CMakeLists.txt 2022-05-23 10:40:09 -0600
@@ -147,22 +147,24 @@
@@ -126,22 +126,24 @@
)
endif()

View File

@@ -23,19 +23,19 @@ if(EXISTS ${SOURCE_DIR}/.git)
if(MY_WC_BRANCH STREQUAL "HEAD")
# Detached HEAD, check whether commit hash is reachable
# in the main branch
# in the master branch
execute_process(COMMAND git rev-parse --short=12 HEAD
WORKING_DIRECTORY ${SOURCE_DIR}
OUTPUT_VARIABLE MY_WC_HASH
OUTPUT_STRIP_TRAILING_WHITESPACE)
execute_process(COMMAND git branch --list main blender-v* --contains ${MY_WC_HASH}
execute_process(COMMAND git branch --list master blender-v* --contains ${MY_WC_HASH}
WORKING_DIRECTORY ${SOURCE_DIR}
OUTPUT_VARIABLE _git_contains_check
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT _git_contains_check STREQUAL "")
set(MY_WC_BRANCH "main")
set(MY_WC_BRANCH "master")
else()
execute_process(COMMAND git show-ref --tags -d
WORKING_DIRECTORY ${SOURCE_DIR}
@@ -48,7 +48,7 @@ if(EXISTS ${SOURCE_DIR}/.git)
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(_git_tag_hashes MATCHES "${_git_head_hash}")
set(MY_WC_BRANCH "main")
set(MY_WC_BRANCH "master")
else()
execute_process(COMMAND git branch --contains ${MY_WC_HASH}
WORKING_DIRECTORY ${SOURCE_DIR}

View File

@@ -85,7 +85,7 @@ if(NOT APPLE)
set(WITH_CYCLES_DEVICE_OPTIX ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE)
set(WITH_CYCLES_HIP_BINARIES OFF CACHE BOOL "" FORCE)
set(WITH_CYCLES_HIP_BINARIES ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_DEVICE_ONEAPI ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_ONEAPI_BINARIES ON CACHE BOOL "" FORCE)
endif()

View File

@@ -11,11 +11,11 @@
mkdir ~/blender-git
cd ~/blender-git
git clone https://projects.blender.org/blender/blender.git
git clone http://git.blender.org/blender.git
cd blender
git submodule update --init --recursive
git submodule foreach git checkout main
git submodule foreach git pull --rebase origin main
git submodule foreach git checkout master
git submodule foreach git pull --rebase origin master
# create build dir
mkdir ~/blender-git/build-cmake
@@ -35,7 +35,7 @@ ln -s ~/blender-git/build-cmake/bin/blender ~/blender-git/blender/blender.bin
echo ""
echo "* Useful Commands *"
echo " Run Blender: ~/blender-git/blender/blender.bin"
echo " Update Blender: git pull --rebase; git submodule foreach git pull --rebase origin main"
echo " Update Blender: git pull --rebase; git submodule foreach git pull --rebase origin master"
echo " Reconfigure Blender: cd ~/blender-git/build-cmake ; cmake ."
echo " Build Blender: cd ~/blender-git/build-cmake ; make"
echo ""

View File

@@ -544,7 +544,7 @@ endfunction()
function(setup_platform_linker_libs
target
)
# jemalloc must be early in the list, to be before pthread (see #57998).
# jemalloc must be early in the list, to be before pthread (see T57998)
if(WITH_MEM_JEMALLOC)
target_link_libraries(${target} ${JEMALLOC_LIBRARIES})
endif()

View File

@@ -440,7 +440,7 @@ string(APPEND PLATFORM_LINKFLAGS " -stdlib=libc++")
# Make stack size more similar to Embree, required for Embree.
string(APPEND PLATFORM_LINKFLAGS_EXECUTABLE " -Wl,-stack_size,0x100000")
# Suppress ranlib "has no symbols" warnings (workaround for #48250).
# Suppress ranlib "has no symbols" warnings (workaround for T48250)
set(CMAKE_C_ARCHIVE_CREATE "<CMAKE_AR> Scr <TARGET> <LINK_FLAGS> <OBJECTS>")
set(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> Scr <TARGET> <LINK_FLAGS> <OBJECTS>")
# llvm-ranlib doesn't support this flag. Xcode's libtool does.

View File

@@ -121,7 +121,7 @@ if(WITH_WINDOWS_BUNDLE_CRT)
include(InstallRequiredSystemLibraries)
# ucrtbase(d).dll cannot be in the manifest, due to the way windows 10 handles
# redirects for this dll, for details see #88813.
# redirects for this dll, for details see T88813.
foreach(lib ${CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS})
string(FIND ${lib} "ucrtbase" pos)
if(NOT pos EQUAL -1)
@@ -295,7 +295,7 @@ unset(MATERIALX_LIB_FOLDER_EXISTS)
if(NOT MSVC_CLANG AND # Available with MSVC 15.7+ but not for CLANG.
NOT WITH_WINDOWS_SCCACHE AND # And not when sccache is enabled
NOT VS_CLANG_TIDY) # Clang-tidy does not like these options
add_compile_options(/experimental:external /external:I "${LIBDIR}" /external:W0)
add_compile_options(/experimental:external /external:templates- /external:I "${LIBDIR}" /external:W0)
endif()
# Add each of our libraries to our cmake_prefix_path so find_package() could work

View File

@@ -5,16 +5,16 @@
update-code:
git:
submodules:
- branch: main
- branch: master
commit_id: HEAD
path: release/scripts/addons
- branch: main
- branch: master
commit_id: HEAD
path: release/scripts/addons_contrib
- branch: main
- branch: master
commit_id: HEAD
path: release/datafiles/locale
- branch: main
- branch: master
commit_id: HEAD
path: source/tools
svn:

View File

@@ -58,7 +58,7 @@ Each Blender release supports one Python version, and the package is only compat
## Source Code
* [Releases](https://download.blender.org/source/)
* Repository: [projects.blender.org/blender/blender.git](https://projects.blender.org/blender/blender)
* Repository: [git.blender.org/blender.git](https://git.blender.org/gitweb/gitweb.cgi/blender.git)
## Credits

View File

@@ -42,7 +42,6 @@ def parse_arguments() -> argparse.Namespace:
parser.add_argument("--svn-branch", default=None)
parser.add_argument("--git-command", default="git")
parser.add_argument("--use-linux-libraries", action="store_true")
parser.add_argument("--architecture", type=str, choices=("x86_64", "amd64", "arm64",))
return parser.parse_args()
@@ -52,17 +51,6 @@ def get_blender_git_root() -> str:
# Setup for precompiled libraries and tests from svn.
def get_effective_architecture(args: argparse.Namespace):
if args.architecture:
return args.architecture
# Check platform.version to detect arm64 with x86_64 python binary.
if "ARM64" in platform.version():
return "arm64"
return platform.machine().lower()
def svn_update(args: argparse.Namespace, release_version: Optional[str]) -> None:
svn_non_interactive = [args.svn_command, '--non-interactive']
@@ -70,11 +58,11 @@ def svn_update(args: argparse.Namespace, release_version: Optional[str]) -> None
svn_url = make_utils.svn_libraries_base_url(release_version, args.svn_branch)
# Checkout precompiled libraries
architecture = get_effective_architecture(args)
if sys.platform == 'darwin':
if architecture == 'arm64':
# Check platform.version to detect arm64 with x86_64 python binary.
if platform.machine() == 'arm64' or ('ARM64' in platform.version()):
lib_platform = "darwin_arm64"
elif architecture == 'x86_64':
elif platform.machine() == 'x86_64':
lib_platform = "darwin"
else:
lib_platform = None
@@ -182,7 +170,7 @@ def git_update_skip(args: argparse.Namespace, check_remote_exists: bool = True)
return "rebase or merge in progress, complete it first"
# Abort if uncommitted changes.
changes = check_output([args.git_command, 'status', '--porcelain', '--untracked-files=no', '--ignore-submodules'])
changes = check_output([args.git_command, 'status', '--porcelain', '--untracked-files=no'])
if len(changes) != 0:
return "you have unstaged changes"
@@ -214,8 +202,8 @@ def submodules_update(
sys.exit(1)
# Update submodules to appropriate given branch,
# falling back to main if none is given and/or found in a sub-repository.
branch_fallback = "main"
# falling back to master if none is given and/or found in a sub-repository.
branch_fallback = "master"
if not branch:
branch = branch_fallback
@@ -268,15 +256,14 @@ if __name__ == "__main__":
blender_skip_msg = ""
submodules_skip_msg = ""
blender_version = make_utils. parse_blender_version()
if blender_version.cycle != 'alpha':
major = blender_version.version // 100
minor = blender_version.version % 100
branch = f"blender-v{major}.{minor}-release"
release_version = f"{major}.{minor}"
else:
branch = 'main'
release_version = None
# Test if we are building a specific release version.
branch = make_utils.git_branch(args.git_command)
if branch == 'HEAD':
sys.stderr.write('Blender git repository is in detached HEAD state, must be in a branch\n')
sys.exit(1)
tag = make_utils.git_tag(args.git_command)
release_version = make_utils.git_branch_release_version(branch, tag)
if not args.no_libraries:
svn_update(args, release_version)

View File

@@ -3,9 +3,9 @@ if NOT exist "%BLENDER_DIR%\source\tools\.git" (
if not "%GIT%" == "" (
"%GIT%" submodule update --init --recursive --progress
if errorlevel 1 goto FAIL
"%GIT%" submodule foreach git checkout main
"%GIT%" submodule foreach git checkout master
if errorlevel 1 goto FAIL
"%GIT%" submodule foreach git pull --rebase origin main
"%GIT%" submodule foreach git pull --rebase origin master
if errorlevel 1 goto FAIL
goto EOF
) else (

View File

@@ -38,7 +38,7 @@ PROJECT_NAME = Blender
# could be handy for archiving the generated documentation or if some version
# control system is used.
PROJECT_NUMBER = V3.6
PROJECT_NUMBER = V3.5
# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer a

View File

@@ -37,7 +37,7 @@ def draw_callback_px(self, context):
# BLF drawing routine
font_id = font_info["font_id"]
blf.position(font_id, 2, 80, 0)
blf.size(font_id, 50)
blf.size(font_id, 50, 72)
blf.draw(font_id, "Hello World")

View File

@@ -476,7 +476,7 @@ MODULE_GROUPING = {
# -------------------------------BLENDER----------------------------------------
# Converting bytes to strings, due to #30154.
# converting bytes to strings, due to T30154
BLENDER_REVISION = str(bpy.app.build_hash, 'utf_8')
BLENDER_REVISION_TIMESTAMP = bpy.app.build_commit_timestamp
@@ -487,7 +487,7 @@ BLENDER_VERSION_DOTS = "%d.%d" % (bpy.app.version[0], bpy.app.version[1])
if BLENDER_REVISION != "Unknown":
# SHA1 Git hash
BLENDER_VERSION_HASH = BLENDER_REVISION
BLENDER_VERSION_HASH_HTML_LINK = "<a href=https://projects.blender.org/blender/blender/commit/%s>%s</a>" % (
BLENDER_VERSION_HASH_HTML_LINK = "<a href=https://developer.blender.org/rB%s>%s</a>" % (
BLENDER_VERSION_HASH, BLENDER_VERSION_HASH,
)
BLENDER_VERSION_DATE = time.strftime("%d/%m/%Y", time.localtime(BLENDER_REVISION_TIMESTAMP))
@@ -647,7 +647,7 @@ def undocumented_message(module_name, type_name, identifier):
module_name, type_name, identifier,
)
return "Undocumented, consider `contributing <https://developer.blender.org/>`__."
return "Undocumented, consider `contributing <https://developer.blender.org/T51061>`__."
def range_str(val):
@@ -1816,9 +1816,9 @@ def pyrna2sphinx(basepath):
# operators
def write_ops():
API_BASEURL = "https://projects.blender.org/blender/blender/src/branch/main/release/scripts"
API_BASEURL_ADDON = "https://projects.blender.org/blender/blender-addons"
API_BASEURL_ADDON_CONTRIB = "https://projects.blender.org/blender/blender-addons-contrib"
API_BASEURL = "https://developer.blender.org/diffusion/B/browse/master/release/scripts"
API_BASEURL_ADDON = "https://developer.blender.org/diffusion/BA"
API_BASEURL_ADDON_CONTRIB = "https://developer.blender.org/diffusion/BAC"
op_modules = {}
op = None
@@ -2200,7 +2200,7 @@ def write_rst_enum_items(basepath, key, key_no_prefix, enum_items):
Write a single page for a static enum in RST.
This helps avoiding very large lists being in-lined in many places which is an issue
especially with icons in ``bpy.types.UILayout``. See #87008.
especially with icons in ``bpy.types.UILayout``. See T87008.
"""
filepath = os.path.join(basepath, "%s.rst" % key_no_prefix)
with open(filepath, "w", encoding="utf-8") as fh:

View File

@@ -156,7 +156,7 @@ var Popover = function() {
},
getNamed : function(v) {
$.each(all_versions, function(ix, title) {
if (ix === "master" || ix === "main" || ix === "latest") {
if (ix === "master" || ix === "latest") {
var m = title.match(/\d\.\d[\w\d\.]*/)[0];
if (parseFloat(m) == v) {
v = ix;

View File

@@ -1,5 +1,5 @@
Project: Blender
URL: https://projects.blender.org/blender/blender.git
URL: https://git.blender.org/blender.git
License: Apache 2.0
Upstream version: N/A
Local modifications: None

View File

@@ -12,7 +12,6 @@ from bpy.props import (
PointerProperty,
StringProperty,
)
from bpy.app.translations import pgettext_iface as iface_
from math import pi
@@ -1665,51 +1664,30 @@ class CyclesPreferences(bpy.types.AddonPreferences):
col.label(text="No compatible GPUs found for Cycles", icon='INFO')
if device_type == 'CUDA':
compute_capability = "3.0"
col.label(text=iface_("Requires NVIDIA GPU with compute capability %s") % compute_capability,
icon='BLANK1', translate=False)
col.label(text="Requires NVIDIA GPU with compute capability 3.0", icon='BLANK1')
elif device_type == 'OPTIX':
compute_capability = "5.0"
driver_version = "470"
col.label(text=iface_("Requires NVIDIA GPU with compute capability %s") % compute_capability,
icon='BLANK1', translate=False)
col.label(text="and NVIDIA driver version %s or newer" % driver_version,
icon='BLANK1', translate=False)
col.label(text="Requires NVIDIA GPU with compute capability 5.0", icon='BLANK1')
col.label(text="and NVIDIA driver version 470 or newer", icon='BLANK1')
elif device_type == 'HIP':
if True:
col.label(text="HIP temporarily disabled due to compiler bugs", icon='BLANK1')
else:
import sys
if sys.platform[:3] == "win":
driver_version = "21.Q4"
col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
col.label(text=iface_("and AMD Radeon Pro %s driver or newer") % driver_version,
icon='BLANK1', translate=False)
elif sys.platform.startswith("linux"):
driver_version = "22.10"
col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
col.label(text=iface_("and AMD driver version %s or newer") % driver_version, icon='BLANK1',
translate=False)
import sys
if sys.platform[:3] == "win":
col.label(text="Requires AMD GPU with RDNA architecture", icon='BLANK1')
col.label(text="and AMD Radeon Pro 21.Q4 driver or newer", icon='BLANK1')
elif sys.platform.startswith("linux"):
col.label(text="Requires AMD GPU with RDNA architecture", icon='BLANK1')
col.label(text="and AMD driver version 22.10 or newer", icon='BLANK1')
elif device_type == 'ONEAPI':
import sys
if sys.platform.startswith("win"):
driver_version = "101.4032"
col.label(text="Requires Intel GPU with Xe-HPG architecture", icon='BLANK1')
col.label(text=iface_("and Windows driver version %s or newer") % driver_version,
icon='BLANK1', translate=False)
col.label(text="and Windows driver version 101.4032 or newer", icon='BLANK1')
elif sys.platform.startswith("linux"):
driver_version = "1.3.24931"
col.label(text="Requires Intel GPU with Xe-HPG architecture and", icon='BLANK1')
col.label(text=iface_(" - intel-level-zero-gpu version %s or newer") % driver_version,
icon='BLANK1', translate=False)
col.label(text=" - intel-level-zero-gpu version 1.3.24931 or newer", icon='BLANK1')
col.label(text=" - oneAPI Level-Zero Loader", icon='BLANK1')
elif device_type == 'METAL':
silicon_mac_version = "12.2"
amd_mac_version = "12.3"
col.label(text=iface_("Requires Apple Silicon with macOS %s or newer") % silicon_mac_version,
icon='BLANK1', translate=False)
col.label(text=iface_("or AMD with macOS %s or newer") % amd_mac_version, icon='BLANK1',
translate=False)
col.label(text="Requires Apple Silicon with macOS 12.2 or newer", icon='BLANK1')
col.label(text="or AMD with macOS 12.3 or newer", icon='BLANK1')
return
for device in devices:
@@ -1744,21 +1722,19 @@ class CyclesPreferences(bpy.types.AddonPreferences):
row.prop(self, "peer_memory")
if compute_device_type == 'METAL':
import platform
import re
is_navi_2 = False
import platform, re
isNavi2 = False
for device in devices:
if re.search(r"((RX)|(Pro)|(PRO))\s+W?6\d00X", device.name):
is_navi_2 = True
break
obj = re.search("((RX)|(Pro)|(PRO))\s+W?6\d00X",device.name)
if obj:
isNavi2 = True
# MetalRT only works on Apple Silicon and Navi2.
is_arm64 = platform.machine() == 'arm64'
if is_arm64 or is_navi_2:
# MetalRT only works on Apple Silicon and Navi2
if platform.machine() == 'arm64' or isNavi2:
col = layout.column()
col.use_property_split = True
# Kernel specialization is only supported on Apple Silicon
if is_arm64:
if platform.machine() == 'arm64':
col.prop(self, "kernel_optimization_level")
col.prop(self, "use_metalrt")

View File

@@ -20,7 +20,7 @@ class CyclesPresetPanel(PresetPanel, Panel):
@staticmethod
def post_cb(context):
# Modify an arbitrary built-in scene property to force a depsgraph
# update, because add-on properties don't. (see #62325)
# update, because add-on properties don't. (see T62325)
render = context.scene.render
render.filter_size = render.filter_size

View File

@@ -105,12 +105,11 @@ GPUShader *BlenderFallbackDisplayShader::bind(int width, int height)
/* Bind shader now to enable uniform assignment. */
GPU_shader_bind(shader_program_);
int slot = 0;
GPU_shader_uniform_int_ex(shader_program_, image_texture_location_, 1, 1, &slot);
GPU_shader_uniform_int(shader_program_, image_texture_location_, 0);
float size[2];
size[0] = width;
size[1] = height;
GPU_shader_uniform_float_ex(shader_program_, fullscreen_location_, 2, 1, size);
GPU_shader_uniform_vector(shader_program_, fullscreen_location_, 2, 1, size);
return shader_program_;
}

View File

@@ -20,7 +20,7 @@ BlenderImageLoader::BlenderImageLoader(BL::Image b_image,
: b_image(b_image),
frame(frame),
tile_number(tile_number),
/* Don't free cache for preview render to avoid race condition from #93560, to be fixed
/* Don't free cache for preview render to avoid race condition from T93560, to be fixed
* properly later as we are close to release. */
free_cache(!is_preview_render && !b_image.has_data())
{
@@ -72,7 +72,7 @@ bool BlenderImageLoader::load_metadata(const ImageDeviceFeatures &, ImageMetaDat
metadata.colorspace = u_colorspace_raw;
}
else {
/* In some cases (e.g. #94135), the colorspace setting in Blender gets updated as part of the
/* In some cases (e.g. T94135), the colorspace setting in Blender gets updated as part of the
* metadata queries in this function, so update the colorspace setting here. */
PointerRNA colorspace_ptr = b_image.colorspace_settings().ptr;
metadata.colorspace = get_enum_identifier(colorspace_ptr, "name");

View File

@@ -24,7 +24,7 @@ void BlenderSync::sync_light(BL::Object &b_parent,
Light *light = light_map.find(key);
/* Check if the transform was modified, in case a linked collection is moved we do not get a
* specific depsgraph update (#88515). This also mimics the behavior for Objects. */
* specific depsgraph update (T88515). This also mimics the behavior for Objects. */
const bool tfm_updated = (light && light->get_tfm() != tfm);
/* Update if either object or light data changed. */

View File

@@ -94,7 +94,7 @@ void python_thread_state_restore(void **python_thread_state)
*python_thread_state = NULL;
}
static const char *PyC_UnicodeAsBytes(PyObject *py_str, PyObject **coerce)
static const char *PyC_UnicodeAsByte(PyObject *py_str, PyObject **coerce)
{
const char *result = PyUnicode_AsUTF8(py_str);
if (result) {
@@ -131,8 +131,8 @@ static PyObject *init_func(PyObject * /*self*/, PyObject *args)
}
PyObject *path_coerce = nullptr, *user_path_coerce = nullptr;
path_init(PyC_UnicodeAsBytes(path, &path_coerce),
PyC_UnicodeAsBytes(user_path, &user_path_coerce));
path_init(PyC_UnicodeAsByte(path, &path_coerce),
PyC_UnicodeAsByte(user_path, &user_path_coerce));
Py_XDECREF(path_coerce);
Py_XDECREF(user_path_coerce);

View File

@@ -404,7 +404,7 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
* point we know that we've got everything to render current view layer.
*/
/* At the moment we only free if we are not doing multi-view
* (or if we are rendering the last view). See #58142/D4239 for discussion.
* (or if we are rendering the last view). See T58142/D4239 for discussion.
*/
if (view_index == num_views - 1) {
free_blender_memory_if_possible();

View File

@@ -766,7 +766,7 @@ void BlenderSync::free_data_after_sync(BL::Depsgraph &b_depsgraph)
(BlenderSession::headless || is_interface_locked) &&
/* Baking re-uses the depsgraph multiple times, clearing crashes
* reading un-evaluated mesh data which isn't aligned with the
* geometry we're baking, see #71012. */
* geometry we're baking, see T71012. */
!scene->bake_manager->get_baking() &&
/* Persistent data must main caches for performance and correctness. */
!is_persistent_data;

View File

@@ -42,15 +42,12 @@ endif()
###########################################################################
if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
set(WITH_CYCLES_HIP_BINARIES OFF)
message(STATUS "HIP temporarily disabled due to compiler bugs")
find_package(HIP)
set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES)
# find_package(HIP)
# set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES)
# if(HIP_FOUND)
# message(STATUS "Found HIP ${HIP_HIPCC_EXECUTABLE} (${HIP_VERSION})")
# endif()
if(HIP_FOUND)
message(STATUS "Found HIP ${HIP_HIPCC_EXECUTABLE} (${HIP_VERSION})")
endif()
endif()
if(NOT WITH_HIP_DYNLOAD)

View File

@@ -53,12 +53,8 @@ void CUDADevice::set_error(const string &error)
}
CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
: GPUDevice(info, stats, profiler)
: Device(info, stats, profiler), texture_info(this, "texture_info", MEM_GLOBAL)
{
/* Verify that base class types can be used with specific backend types */
static_assert(sizeof(texMemObject) == sizeof(CUtexObject));
static_assert(sizeof(arrayMemObject) == sizeof(CUarray));
first_error = true;
cuDevId = info.num;
@@ -69,6 +65,12 @@ CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
need_texture_info = false;
device_texture_headroom = 0;
device_working_headroom = 0;
move_texture_to_host = false;
map_host_limit = 0;
map_host_used = 0;
can_map_host = 0;
pitch_alignment = 0;
/* Initialize CUDA. */
@@ -89,9 +91,8 @@ CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
/* CU_CTX_MAP_HOST for mapping host memory when out of device memory.
* CU_CTX_LMEM_RESIZE_TO_MAX for reserving local memory ahead of render,
* so we can predict which memory to map to host. */
int value;
cuda_assert(cuDeviceGetAttribute(&value, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuDevice));
can_map_host = value != 0;
cuda_assert(
cuDeviceGetAttribute(&can_map_host, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuDevice));
cuda_assert(cuDeviceGetAttribute(
&pitch_alignment, CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, cuDevice));
@@ -498,57 +499,311 @@ void CUDADevice::reserve_local_memory(const uint kernel_features)
# endif
}
void CUDADevice::get_device_memory_info(size_t &total, size_t &free)
void CUDADevice::init_host_memory()
{
/* Limit amount of host mapped memory, because allocating too much can
* cause system instability. Leave at least half or 4 GB of system
* memory free, whichever is smaller. */
size_t default_limit = 4 * 1024 * 1024 * 1024LL;
size_t system_ram = system_physical_ram();
if (system_ram > 0) {
if (system_ram / 2 > default_limit) {
map_host_limit = system_ram - default_limit;
}
else {
map_host_limit = system_ram / 2;
}
}
else {
VLOG_WARNING << "Mapped host memory disabled, failed to get system RAM";
map_host_limit = 0;
}
/* Amount of device memory to keep is free after texture memory
* and working memory allocations respectively. We set the working
* memory limit headroom lower so that some space is left after all
* texture memory allocations. */
device_working_headroom = 32 * 1024 * 1024LL; // 32MB
device_texture_headroom = 128 * 1024 * 1024LL; // 128MB
VLOG_INFO << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
<< " bytes. (" << string_human_readable_size(map_host_limit) << ")";
}
void CUDADevice::load_texture_info()
{
if (need_texture_info) {
/* Unset flag before copying, so this does not loop indefinitely if the copy below calls
* into 'move_textures_to_host' (which calls 'load_texture_info' again). */
need_texture_info = false;
texture_info.copy_to_device();
}
}
void CUDADevice::move_textures_to_host(size_t size, bool for_texture)
{
/* Break out of recursive call, which can happen when moving memory on a multi device. */
static bool any_device_moving_textures_to_host = false;
if (any_device_moving_textures_to_host) {
return;
}
/* Signal to reallocate textures in host memory only. */
move_texture_to_host = true;
while (size > 0) {
/* Find suitable memory allocation to move. */
device_memory *max_mem = NULL;
size_t max_size = 0;
bool max_is_image = false;
thread_scoped_lock lock(cuda_mem_map_mutex);
foreach (CUDAMemMap::value_type &pair, cuda_mem_map) {
device_memory &mem = *pair.first;
CUDAMem *cmem = &pair.second;
/* Can only move textures allocated on this device (and not those from peer devices).
* And need to ignore memory that is already on the host. */
if (!mem.is_resident(this) || cmem->use_mapped_host) {
continue;
}
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) &&
(&mem != &texture_info);
bool is_image = is_texture && (mem.data_height > 1);
/* Can't move this type of memory. */
if (!is_texture || cmem->array) {
continue;
}
/* For other textures, only move image textures. */
if (for_texture && !is_image) {
continue;
}
/* Try to move largest allocation, prefer moving images. */
if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
max_is_image = is_image;
max_size = mem.device_size;
max_mem = &mem;
}
}
lock.unlock();
/* Move to host memory. This part is mutex protected since
* multiple CUDA devices could be moving the memory. The
* first one will do it, and the rest will adopt the pointer. */
if (max_mem) {
VLOG_WORK << "Move memory from device to host: " << max_mem->name;
static thread_mutex move_mutex;
thread_scoped_lock lock(move_mutex);
any_device_moving_textures_to_host = true;
/* Potentially need to call back into multi device, so pointer mapping
* and peer devices are updated. This is also necessary since the device
* pointer may just be a key here, so cannot be accessed and freed directly.
* Unfortunately it does mean that memory is reallocated on all other
* devices as well, which is potentially dangerous when still in use (since
* a thread rendering on another devices would only be caught in this mutex
* if it so happens to do an allocation at the same time as well. */
max_mem->device_copy_to();
size = (max_size >= size) ? 0 : size - max_size;
any_device_moving_textures_to_host = false;
}
else {
break;
}
}
/* Unset flag before texture info is reloaded, since it should stay in device memory. */
move_texture_to_host = false;
/* Update texture info array with new pointers. */
load_texture_info();
}
CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_padding)
{
CUDAContextScope scope(this);
CUdeviceptr device_pointer = 0;
size_t size = mem.memory_size() + pitch_padding;
CUresult mem_alloc_result = CUDA_ERROR_OUT_OF_MEMORY;
const char *status = "";
/* First try allocating in device memory, respecting headroom. We make
* an exception for texture info. It is small and frequently accessed,
* so treat it as working memory.
*
* If there is not enough room for working memory, we will try to move
* textures to host memory, assuming the performance impact would have
* been worse for working memory. */
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && (&mem != &texture_info);
bool is_image = is_texture && (mem.data_height > 1);
size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
size_t total = 0, free = 0;
cuMemGetInfo(&free, &total);
/* Move textures to host memory if needed. */
if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
move_textures_to_host(size + headroom - free, is_texture);
cuMemGetInfo(&free, &total);
}
/* Allocate in device memory. */
if (!move_texture_to_host && (size + headroom) < free) {
mem_alloc_result = cuMemAlloc(&device_pointer, size);
if (mem_alloc_result == CUDA_SUCCESS) {
status = " in device memory";
}
}
/* Fall back to mapped host memory if needed and possible. */
void *shared_pointer = 0;
if (mem_alloc_result != CUDA_SUCCESS && can_map_host && mem.type != MEM_DEVICE_ONLY) {
if (mem.shared_pointer) {
/* Another device already allocated host memory. */
mem_alloc_result = CUDA_SUCCESS;
shared_pointer = mem.shared_pointer;
}
else if (map_host_used + size < map_host_limit) {
/* Allocate host memory ourselves. */
mem_alloc_result = cuMemHostAlloc(
&shared_pointer, size, CU_MEMHOSTALLOC_DEVICEMAP | CU_MEMHOSTALLOC_WRITECOMBINED);
assert((mem_alloc_result == CUDA_SUCCESS && shared_pointer != 0) ||
(mem_alloc_result != CUDA_SUCCESS && shared_pointer == 0));
}
if (mem_alloc_result == CUDA_SUCCESS) {
cuda_assert(cuMemHostGetDevicePointer_v2(&device_pointer, shared_pointer, 0));
map_host_used += size;
status = " in host memory";
}
}
if (mem_alloc_result != CUDA_SUCCESS) {
if (mem.type == MEM_DEVICE_ONLY) {
status = " failed, out of device memory";
set_error("System is out of GPU memory");
}
else {
status = " failed, out of device and host memory";
set_error("System is out of GPU and shared host memory");
}
}
if (mem.name) {
VLOG_WORK << "Buffer allocate: " << mem.name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")" << status;
}
mem.device_pointer = (device_ptr)device_pointer;
mem.device_size = size;
stats.mem_alloc(size);
if (!mem.device_pointer) {
return NULL;
}
/* Insert into map of allocations. */
thread_scoped_lock lock(cuda_mem_map_mutex);
CUDAMem *cmem = &cuda_mem_map[&mem];
if (shared_pointer != 0) {
/* Replace host pointer with our host allocation. Only works if
* CUDA memory layout is the same and has no pitch padding. Also
* does not work if we move textures to host during a render,
* since other devices might be using the memory. */
if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
mem.host_pointer != shared_pointer) {
memcpy(shared_pointer, mem.host_pointer, size);
/* A Call to device_memory::host_free() should be preceded by
* a call to device_memory::device_free() for host memory
* allocated by a device to be handled properly. Two exceptions
* are here and a call in OptiXDevice::generic_alloc(), where
* the current host memory can be assumed to be allocated by
* device_memory::host_alloc(), not by a device */
mem.host_free();
mem.host_pointer = shared_pointer;
}
mem.shared_pointer = shared_pointer;
mem.shared_counter++;
cmem->use_mapped_host = true;
}
else {
cmem->use_mapped_host = false;
}
return cmem;
}
bool CUDADevice::alloc_device(void *&device_pointer, size_t size)
void CUDADevice::generic_copy_to(device_memory &mem)
{
CUDAContextScope scope(this);
if (!mem.host_pointer || !mem.device_pointer) {
return;
}
CUresult mem_alloc_result = cuMemAlloc((CUdeviceptr *)&device_pointer, size);
return mem_alloc_result == CUDA_SUCCESS;
/* If use_mapped_host of mem is false, the current device only uses device memory allocated by
* cuMemAlloc regardless of mem.host_pointer and mem.shared_pointer, and should copy data from
* mem.host_pointer. */
thread_scoped_lock lock(cuda_mem_map_mutex);
if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
const CUDAContextScope scope(this);
cuda_assert(
cuMemcpyHtoD((CUdeviceptr)mem.device_pointer, mem.host_pointer, mem.memory_size()));
}
}
void CUDADevice::free_device(void *device_pointer)
void CUDADevice::generic_free(device_memory &mem)
{
CUDAContextScope scope(this);
if (mem.device_pointer) {
CUDAContextScope scope(this);
thread_scoped_lock lock(cuda_mem_map_mutex);
DCHECK(cuda_mem_map.find(&mem) != cuda_mem_map.end());
const CUDAMem &cmem = cuda_mem_map[&mem];
cuda_assert(cuMemFree((CUdeviceptr)device_pointer));
}
/* If cmem.use_mapped_host is true, reference counting is used
* to safely free a mapped host memory. */
bool CUDADevice::alloc_host(void *&shared_pointer, size_t size)
{
CUDAContextScope scope(this);
if (cmem.use_mapped_host) {
assert(mem.shared_pointer);
if (mem.shared_pointer) {
assert(mem.shared_counter > 0);
if (--mem.shared_counter == 0) {
if (mem.host_pointer == mem.shared_pointer) {
mem.host_pointer = 0;
}
cuMemFreeHost(mem.shared_pointer);
mem.shared_pointer = 0;
}
}
map_host_used -= mem.device_size;
}
else {
/* Free device memory. */
cuda_assert(cuMemFree(mem.device_pointer));
}
CUresult mem_alloc_result = cuMemHostAlloc(
&shared_pointer, size, CU_MEMHOSTALLOC_DEVICEMAP | CU_MEMHOSTALLOC_WRITECOMBINED);
return mem_alloc_result == CUDA_SUCCESS;
}
stats.mem_free(mem.device_size);
mem.device_pointer = 0;
mem.device_size = 0;
void CUDADevice::free_host(void *shared_pointer)
{
CUDAContextScope scope(this);
cuMemFreeHost(shared_pointer);
}
bool CUDADevice::transform_host_pointer(void *&device_pointer, void *&shared_pointer)
{
CUDAContextScope scope(this);
cuda_assert(cuMemHostGetDevicePointer_v2((CUdeviceptr *)&device_pointer, shared_pointer, 0));
return true;
}
void CUDADevice::copy_host_to_device(void *device_pointer, void *host_pointer, size_t size)
{
const CUDAContextScope scope(this);
cuda_assert(cuMemcpyHtoD((CUdeviceptr)device_pointer, host_pointer, size));
cuda_mem_map.erase(cuda_mem_map.find(&mem));
}
}
void CUDADevice::mem_alloc(device_memory &mem)
@@ -613,8 +868,8 @@ void CUDADevice::mem_zero(device_memory &mem)
/* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory
* regardless of mem.host_pointer and mem.shared_pointer. */
thread_scoped_lock lock(device_mem_map_mutex);
if (!device_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
thread_scoped_lock lock(cuda_mem_map_mutex);
if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
const CUDAContextScope scope(this);
cuda_assert(cuMemsetD8((CUdeviceptr)mem.device_pointer, 0, mem.memory_size()));
}
@@ -739,19 +994,19 @@ void CUDADevice::tex_alloc(device_texture &mem)
return;
}
Mem *cmem = NULL;
CUDAMem *cmem = NULL;
CUarray array_3d = NULL;
size_t src_pitch = mem.data_width * dsize * mem.data_elements;
size_t dst_pitch = src_pitch;
if (!mem.is_resident(this)) {
thread_scoped_lock lock(device_mem_map_mutex);
cmem = &device_mem_map[&mem];
thread_scoped_lock lock(cuda_mem_map_mutex);
cmem = &cuda_mem_map[&mem];
cmem->texobject = 0;
if (mem.data_depth > 1) {
array_3d = (CUarray)mem.device_pointer;
cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
cmem->array = array_3d;
}
else if (mem.data_height > 0) {
dst_pitch = align_up(src_pitch, pitch_alignment);
@@ -795,10 +1050,10 @@ void CUDADevice::tex_alloc(device_texture &mem)
mem.device_size = size;
stats.mem_alloc(size);
thread_scoped_lock lock(device_mem_map_mutex);
cmem = &device_mem_map[&mem];
thread_scoped_lock lock(cuda_mem_map_mutex);
cmem = &cuda_mem_map[&mem];
cmem->texobject = 0;
cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
cmem->array = array_3d;
}
else if (mem.data_height > 0) {
/* 2D texture, using pitch aligned linear memory. */
@@ -882,8 +1137,8 @@ void CUDADevice::tex_alloc(device_texture &mem)
texDesc.filterMode = filter_mode;
texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
thread_scoped_lock lock(device_mem_map_mutex);
cmem = &device_mem_map[&mem];
thread_scoped_lock lock(cuda_mem_map_mutex);
cmem = &cuda_mem_map[&mem];
cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
@@ -898,9 +1153,9 @@ void CUDADevice::tex_free(device_texture &mem)
{
if (mem.device_pointer) {
CUDAContextScope scope(this);
thread_scoped_lock lock(device_mem_map_mutex);
DCHECK(device_mem_map.find(&mem) != device_mem_map.end());
const Mem &cmem = device_mem_map[&mem];
thread_scoped_lock lock(cuda_mem_map_mutex);
DCHECK(cuda_mem_map.find(&mem) != cuda_mem_map.end());
const CUDAMem &cmem = cuda_mem_map[&mem];
if (cmem.texobject) {
/* Free bindless texture. */
@@ -909,16 +1164,16 @@ void CUDADevice::tex_free(device_texture &mem)
if (!mem.is_resident(this)) {
/* Do not free memory here, since it was allocated on a different device. */
device_mem_map.erase(device_mem_map.find(&mem));
cuda_mem_map.erase(cuda_mem_map.find(&mem));
}
else if (cmem.array) {
/* Free array. */
cuArrayDestroy(reinterpret_cast<CUarray>(cmem.array));
cuArrayDestroy(cmem.array);
stats.mem_free(mem.device_size);
mem.device_pointer = 0;
mem.device_size = 0;
device_mem_map.erase(device_mem_map.find(&mem));
cuda_mem_map.erase(cuda_mem_map.find(&mem));
}
else {
lock.unlock();

View File

@@ -21,7 +21,7 @@ CCL_NAMESPACE_BEGIN
class DeviceQueue;
class CUDADevice : public GPUDevice {
class CUDADevice : public Device {
friend class CUDAContextScope;
@@ -29,11 +29,36 @@ class CUDADevice : public GPUDevice {
CUdevice cuDevice;
CUcontext cuContext;
CUmodule cuModule;
size_t device_texture_headroom;
size_t device_working_headroom;
bool move_texture_to_host;
size_t map_host_used;
size_t map_host_limit;
int can_map_host;
int pitch_alignment;
int cuDevId;
int cuDevArchitecture;
bool first_error;
struct CUDAMem {
CUDAMem() : texobject(0), array(0), use_mapped_host(false)
{
}
CUtexObject texobject;
CUarray array;
/* If true, a mapped host memory in shared_pointer is being used. */
bool use_mapped_host;
};
typedef map<device_memory *, CUDAMem> CUDAMemMap;
CUDAMemMap cuda_mem_map;
thread_mutex cuda_mem_map_mutex;
/* Bindless Textures */
device_vector<TextureInfo> texture_info;
bool need_texture_info;
CUDADeviceKernels kernels;
static bool have_precompiled_kernels();
@@ -63,13 +88,17 @@ class CUDADevice : public GPUDevice {
void reserve_local_memory(const uint kernel_features);
virtual void get_device_memory_info(size_t &total, size_t &free) override;
virtual bool alloc_device(void *&device_pointer, size_t size) override;
virtual void free_device(void *device_pointer) override;
virtual bool alloc_host(void *&shared_pointer, size_t size) override;
virtual void free_host(void *shared_pointer) override;
virtual bool transform_host_pointer(void *&device_pointer, void *&shared_pointer) override;
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size) override;
void init_host_memory();
void load_texture_info();
void move_textures_to_host(size_t size, bool for_texture);
CUDAMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
void generic_copy_to(device_memory &mem);
void generic_free(device_memory &mem);
void mem_alloc(device_memory &mem) override;

View File

@@ -452,320 +452,6 @@ void *Device::get_cpu_osl_memory()
return nullptr;
}
GPUDevice::~GPUDevice() noexcept(false)
{
}
bool GPUDevice::load_texture_info()
{
if (need_texture_info) {
/* Unset flag before copying, so this does not loop indefinitely if the copy below calls
* into 'move_textures_to_host' (which calls 'load_texture_info' again). */
need_texture_info = false;
texture_info.copy_to_device();
return true;
}
else {
return false;
}
}
void GPUDevice::init_host_memory(size_t preferred_texture_headroom,
size_t preferred_working_headroom)
{
/* Limit amount of host mapped memory, because allocating too much can
* cause system instability. Leave at least half or 4 GB of system
* memory free, whichever is smaller. */
size_t default_limit = 4 * 1024 * 1024 * 1024LL;
size_t system_ram = system_physical_ram();
if (system_ram > 0) {
if (system_ram / 2 > default_limit) {
map_host_limit = system_ram - default_limit;
}
else {
map_host_limit = system_ram / 2;
}
}
else {
VLOG_WARNING << "Mapped host memory disabled, failed to get system RAM";
map_host_limit = 0;
}
/* Amount of device memory to keep free after texture memory
* and working memory allocations respectively. We set the working
* memory limit headroom lower than the working one so there
* is space left for it. */
device_working_headroom = preferred_working_headroom > 0 ? preferred_working_headroom :
32 * 1024 * 1024LL; // 32MB
device_texture_headroom = preferred_texture_headroom > 0 ? preferred_texture_headroom :
128 * 1024 * 1024LL; // 128MB
VLOG_INFO << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
<< " bytes. (" << string_human_readable_size(map_host_limit) << ")";
}
void GPUDevice::move_textures_to_host(size_t size, bool for_texture)
{
/* Break out of recursive call, which can happen when moving memory on a multi device. */
static bool any_device_moving_textures_to_host = false;
if (any_device_moving_textures_to_host) {
return;
}
/* Signal to reallocate textures in host memory only. */
move_texture_to_host = true;
while (size > 0) {
/* Find suitable memory allocation to move. */
device_memory *max_mem = NULL;
size_t max_size = 0;
bool max_is_image = false;
thread_scoped_lock lock(device_mem_map_mutex);
foreach (MemMap::value_type &pair, device_mem_map) {
device_memory &mem = *pair.first;
Mem *cmem = &pair.second;
/* Can only move textures allocated on this device (and not those from peer devices).
* And need to ignore memory that is already on the host. */
if (!mem.is_resident(this) || cmem->use_mapped_host) {
continue;
}
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) &&
(&mem != &texture_info);
bool is_image = is_texture && (mem.data_height > 1);
/* Can't move this type of memory. */
if (!is_texture || cmem->array) {
continue;
}
/* For other textures, only move image textures. */
if (for_texture && !is_image) {
continue;
}
/* Try to move largest allocation, prefer moving images. */
if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
max_is_image = is_image;
max_size = mem.device_size;
max_mem = &mem;
}
}
lock.unlock();
/* Move to host memory. This part is mutex protected since
* multiple backend devices could be moving the memory. The
* first one will do it, and the rest will adopt the pointer. */
if (max_mem) {
VLOG_WORK << "Move memory from device to host: " << max_mem->name;
static thread_mutex move_mutex;
thread_scoped_lock lock(move_mutex);
any_device_moving_textures_to_host = true;
/* Potentially need to call back into multi device, so pointer mapping
* and peer devices are updated. This is also necessary since the device
* pointer may just be a key here, so cannot be accessed and freed directly.
* Unfortunately it does mean that memory is reallocated on all other
* devices as well, which is potentially dangerous when still in use (since
* a thread rendering on another devices would only be caught in this mutex
* if it so happens to do an allocation at the same time as well. */
max_mem->device_copy_to();
size = (max_size >= size) ? 0 : size - max_size;
any_device_moving_textures_to_host = false;
}
else {
break;
}
}
/* Unset flag before texture info is reloaded, since it should stay in device memory. */
move_texture_to_host = false;
/* Update texture info array with new pointers. */
load_texture_info();
}
GPUDevice::Mem *GPUDevice::generic_alloc(device_memory &mem, size_t pitch_padding)
{
void *device_pointer = 0;
size_t size = mem.memory_size() + pitch_padding;
bool mem_alloc_result = false;
const char *status = "";
/* First try allocating in device memory, respecting headroom. We make
* an exception for texture info. It is small and frequently accessed,
* so treat it as working memory.
*
* If there is not enough room for working memory, we will try to move
* textures to host memory, assuming the performance impact would have
* been worse for working memory. */
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && (&mem != &texture_info);
bool is_image = is_texture && (mem.data_height > 1);
size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
size_t total = 0, free = 0;
get_device_memory_info(total, free);
/* Move textures to host memory if needed. */
if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
move_textures_to_host(size + headroom - free, is_texture);
get_device_memory_info(total, free);
}
/* Allocate in device memory. */
if (!move_texture_to_host && (size + headroom) < free) {
mem_alloc_result = alloc_device(device_pointer, size);
if (mem_alloc_result) {
device_mem_in_use += size;
status = " in device memory";
}
}
/* Fall back to mapped host memory if needed and possible. */
void *shared_pointer = 0;
if (!mem_alloc_result && can_map_host && mem.type != MEM_DEVICE_ONLY) {
if (mem.shared_pointer) {
/* Another device already allocated host memory. */
mem_alloc_result = true;
shared_pointer = mem.shared_pointer;
}
else if (map_host_used + size < map_host_limit) {
/* Allocate host memory ourselves. */
mem_alloc_result = alloc_host(shared_pointer, size);
assert((mem_alloc_result && shared_pointer != 0) ||
(!mem_alloc_result && shared_pointer == 0));
}
if (mem_alloc_result) {
assert(transform_host_pointer(device_pointer, shared_pointer));
map_host_used += size;
status = " in host memory";
}
}
if (!mem_alloc_result) {
if (mem.type == MEM_DEVICE_ONLY) {
status = " failed, out of device memory";
set_error("System is out of GPU memory");
}
else {
status = " failed, out of device and host memory";
set_error("System is out of GPU and shared host memory");
}
}
if (mem.name) {
VLOG_WORK << "Buffer allocate: " << mem.name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")" << status;
}
mem.device_pointer = (device_ptr)device_pointer;
mem.device_size = size;
stats.mem_alloc(size);
if (!mem.device_pointer) {
return NULL;
}
/* Insert into map of allocations. */
thread_scoped_lock lock(device_mem_map_mutex);
Mem *cmem = &device_mem_map[&mem];
if (shared_pointer != 0) {
/* Replace host pointer with our host allocation. Only works if
* memory layout is the same and has no pitch padding. Also
* does not work if we move textures to host during a render,
* since other devices might be using the memory. */
if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
mem.host_pointer != shared_pointer) {
memcpy(shared_pointer, mem.host_pointer, size);
/* A Call to device_memory::host_free() should be preceded by
* a call to device_memory::device_free() for host memory
* allocated by a device to be handled properly. Two exceptions
* are here and a call in OptiXDevice::generic_alloc(), where
* the current host memory can be assumed to be allocated by
* device_memory::host_alloc(), not by a device */
mem.host_free();
mem.host_pointer = shared_pointer;
}
mem.shared_pointer = shared_pointer;
mem.shared_counter++;
cmem->use_mapped_host = true;
}
else {
cmem->use_mapped_host = false;
}
return cmem;
}
void GPUDevice::generic_free(device_memory &mem)
{
if (mem.device_pointer) {
thread_scoped_lock lock(device_mem_map_mutex);
DCHECK(device_mem_map.find(&mem) != device_mem_map.end());
const Mem &cmem = device_mem_map[&mem];
/* If cmem.use_mapped_host is true, reference counting is used
* to safely free a mapped host memory. */
if (cmem.use_mapped_host) {
assert(mem.shared_pointer);
if (mem.shared_pointer) {
assert(mem.shared_counter > 0);
if (--mem.shared_counter == 0) {
if (mem.host_pointer == mem.shared_pointer) {
mem.host_pointer = 0;
}
free_host(mem.shared_pointer);
mem.shared_pointer = 0;
}
}
map_host_used -= mem.device_size;
}
else {
/* Free device memory. */
free_device((void *)mem.device_pointer);
device_mem_in_use -= mem.device_size;
}
stats.mem_free(mem.device_size);
mem.device_pointer = 0;
mem.device_size = 0;
device_mem_map.erase(device_mem_map.find(&mem));
}
}
void GPUDevice::generic_copy_to(device_memory &mem)
{
if (!mem.host_pointer || !mem.device_pointer) {
return;
}
/* If use_mapped_host of mem is false, the current device only uses device memory allocated by
* backend device allocation regardless of mem.host_pointer and mem.shared_pointer, and should
* copy data from mem.host_pointer. */
thread_scoped_lock lock(device_mem_map_mutex);
if (!device_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
copy_host_to_device((void *)mem.device_pointer, mem.host_pointer, mem.memory_size());
}
}
/* DeviceInfo */
CCL_NAMESPACE_END

View File

@@ -309,93 +309,6 @@ class Device {
static uint devices_initialized_mask;
};
/* Device, which is GPU, with some common functionality for GPU backends */
class GPUDevice : public Device {
protected:
GPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_)
: Device(info_, stats_, profiler_),
texture_info(this, "texture_info", MEM_GLOBAL),
need_texture_info(false),
can_map_host(false),
map_host_used(0),
map_host_limit(0),
device_texture_headroom(0),
device_working_headroom(0),
device_mem_map(),
device_mem_map_mutex(),
move_texture_to_host(false),
device_mem_in_use(0)
{
}
public:
virtual ~GPUDevice() noexcept(false);
/* For GPUs that can use bindless textures in some way or another. */
device_vector<TextureInfo> texture_info;
bool need_texture_info;
/* Returns true if the texture info was copied to the device (meaning, some more
* re-initialization might be needed). */
virtual bool load_texture_info();
protected:
/* Memory allocation, only accessed through device_memory. */
friend class device_memory;
bool can_map_host;
size_t map_host_used;
size_t map_host_limit;
size_t device_texture_headroom;
size_t device_working_headroom;
typedef unsigned long long texMemObject;
typedef unsigned long long arrayMemObject;
struct Mem {
Mem() : texobject(0), array(0), use_mapped_host(false)
{
}
texMemObject texobject;
arrayMemObject array;
/* If true, a mapped host memory in shared_pointer is being used. */
bool use_mapped_host;
};
typedef map<device_memory *, Mem> MemMap;
MemMap device_mem_map;
thread_mutex device_mem_map_mutex;
bool move_texture_to_host;
/* Simple counter which will try to track amount of used device memory */
size_t device_mem_in_use;
virtual void init_host_memory(size_t preferred_texture_headroom = 0,
size_t preferred_working_headroom = 0);
virtual void move_textures_to_host(size_t size, bool for_texture);
/* Allocation, deallocation and copy functions, with corresponding
* support of device/host allocations. */
virtual GPUDevice::Mem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
virtual void generic_free(device_memory &mem);
virtual void generic_copy_to(device_memory &mem);
/* total - amount of device memory, free - amount of available device memory */
virtual void get_device_memory_info(size_t &total, size_t &free) = 0;
virtual bool alloc_device(void *&device_pointer, size_t size) = 0;
virtual void free_device(void *device_pointer) = 0;
virtual bool alloc_host(void *&shared_pointer, size_t size) = 0;
virtual void free_host(void *shared_pointer) = 0;
/* This function should return device pointer corresponding to shared pointer, which
* is host buffer, allocated in `alloc_host`. The function should `true`, if such
* address transformation is possible and `false` otherwise. */
virtual bool transform_host_pointer(void *&device_pointer, void *&shared_pointer) = 0;
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size) = 0;
};
CCL_NAMESPACE_END
#endif /* __DEVICE_H__ */

View File

@@ -53,12 +53,8 @@ void HIPDevice::set_error(const string &error)
}
HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
: GPUDevice(info, stats, profiler)
: Device(info, stats, profiler), texture_info(this, "texture_info", MEM_GLOBAL)
{
/* Verify that base class types can be used with specific backend types */
static_assert(sizeof(texMemObject) == sizeof(hipTextureObject_t));
static_assert(sizeof(arrayMemObject) == sizeof(hArray));
first_error = true;
hipDevId = info.num;
@@ -69,6 +65,12 @@ HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
need_texture_info = false;
device_texture_headroom = 0;
device_working_headroom = 0;
move_texture_to_host = false;
map_host_limit = 0;
map_host_used = 0;
can_map_host = 0;
pitch_alignment = 0;
/* Initialize HIP. */
@@ -89,9 +91,7 @@ HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
/* hipDeviceMapHost for mapping host memory when out of device memory.
* hipDeviceLmemResizeToMax for reserving local memory ahead of render,
* so we can predict which memory to map to host. */
int value;
hip_assert(hipDeviceGetAttribute(&value, hipDeviceAttributeCanMapHostMemory, hipDevice));
can_map_host = value != 0;
hip_assert(hipDeviceGetAttribute(&can_map_host, hipDeviceAttributeCanMapHostMemory, hipDevice));
hip_assert(
hipDeviceGetAttribute(&pitch_alignment, hipDeviceAttributeTexturePitchAlignment, hipDevice));
@@ -460,58 +460,305 @@ void HIPDevice::reserve_local_memory(const uint kernel_features)
# endif
}
void HIPDevice::get_device_memory_info(size_t &total, size_t &free)
void HIPDevice::init_host_memory()
{
/* Limit amount of host mapped memory, because allocating too much can
* cause system instability. Leave at least half or 4 GB of system
* memory free, whichever is smaller. */
size_t default_limit = 4 * 1024 * 1024 * 1024LL;
size_t system_ram = system_physical_ram();
if (system_ram > 0) {
if (system_ram / 2 > default_limit) {
map_host_limit = system_ram - default_limit;
}
else {
map_host_limit = system_ram / 2;
}
}
else {
VLOG_WARNING << "Mapped host memory disabled, failed to get system RAM";
map_host_limit = 0;
}
/* Amount of device memory to keep is free after texture memory
* and working memory allocations respectively. We set the working
* memory limit headroom lower so that some space is left after all
* texture memory allocations. */
device_working_headroom = 32 * 1024 * 1024LL; // 32MB
device_texture_headroom = 128 * 1024 * 1024LL; // 128MB
VLOG_INFO << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
<< " bytes. (" << string_human_readable_size(map_host_limit) << ")";
}
void HIPDevice::load_texture_info()
{
if (need_texture_info) {
/* Unset flag before copying, so this does not loop indefinitely if the copy below calls
* into 'move_textures_to_host' (which calls 'load_texture_info' again). */
need_texture_info = false;
texture_info.copy_to_device();
}
}
void HIPDevice::move_textures_to_host(size_t size, bool for_texture)
{
/* Break out of recursive call, which can happen when moving memory on a multi device. */
static bool any_device_moving_textures_to_host = false;
if (any_device_moving_textures_to_host) {
return;
}
/* Signal to reallocate textures in host memory only. */
move_texture_to_host = true;
while (size > 0) {
/* Find suitable memory allocation to move. */
device_memory *max_mem = NULL;
size_t max_size = 0;
bool max_is_image = false;
thread_scoped_lock lock(hip_mem_map_mutex);
foreach (HIPMemMap::value_type &pair, hip_mem_map) {
device_memory &mem = *pair.first;
HIPMem *cmem = &pair.second;
/* Can only move textures allocated on this device (and not those from peer devices).
* And need to ignore memory that is already on the host. */
if (!mem.is_resident(this) || cmem->use_mapped_host) {
continue;
}
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) &&
(&mem != &texture_info);
bool is_image = is_texture && (mem.data_height > 1);
/* Can't move this type of memory. */
if (!is_texture || cmem->array) {
continue;
}
/* For other textures, only move image textures. */
if (for_texture && !is_image) {
continue;
}
/* Try to move largest allocation, prefer moving images. */
if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
max_is_image = is_image;
max_size = mem.device_size;
max_mem = &mem;
}
}
lock.unlock();
/* Move to host memory. This part is mutex protected since
* multiple HIP devices could be moving the memory. The
* first one will do it, and the rest will adopt the pointer. */
if (max_mem) {
VLOG_WORK << "Move memory from device to host: " << max_mem->name;
static thread_mutex move_mutex;
thread_scoped_lock lock(move_mutex);
any_device_moving_textures_to_host = true;
/* Potentially need to call back into multi device, so pointer mapping
* and peer devices are updated. This is also necessary since the device
* pointer may just be a key here, so cannot be accessed and freed directly.
* Unfortunately it does mean that memory is reallocated on all other
* devices as well, which is potentially dangerous when still in use (since
* a thread rendering on another devices would only be caught in this mutex
* if it so happens to do an allocation at the same time as well. */
max_mem->device_copy_to();
size = (max_size >= size) ? 0 : size - max_size;
any_device_moving_textures_to_host = false;
}
else {
break;
}
}
/* Unset flag before texture info is reloaded, since it should stay in device memory. */
move_texture_to_host = false;
/* Update texture info array with new pointers. */
load_texture_info();
}
HIPDevice::HIPMem *HIPDevice::generic_alloc(device_memory &mem, size_t pitch_padding)
{
HIPContextScope scope(this);
hipDeviceptr_t device_pointer = 0;
size_t size = mem.memory_size() + pitch_padding;
hipError_t mem_alloc_result = hipErrorOutOfMemory;
const char *status = "";
/* First try allocating in device memory, respecting headroom. We make
* an exception for texture info. It is small and frequently accessed,
* so treat it as working memory.
*
* If there is not enough room for working memory, we will try to move
* textures to host memory, assuming the performance impact would have
* been worse for working memory. */
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && (&mem != &texture_info);
bool is_image = is_texture && (mem.data_height > 1);
size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
size_t total = 0, free = 0;
hipMemGetInfo(&free, &total);
/* Move textures to host memory if needed. */
if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
move_textures_to_host(size + headroom - free, is_texture);
hipMemGetInfo(&free, &total);
}
/* Allocate in device memory. */
if (!move_texture_to_host && (size + headroom) < free) {
mem_alloc_result = hipMalloc(&device_pointer, size);
if (mem_alloc_result == hipSuccess) {
status = " in device memory";
}
}
/* Fall back to mapped host memory if needed and possible. */
void *shared_pointer = 0;
if (mem_alloc_result != hipSuccess && can_map_host) {
if (mem.shared_pointer) {
/* Another device already allocated host memory. */
mem_alloc_result = hipSuccess;
shared_pointer = mem.shared_pointer;
}
else if (map_host_used + size < map_host_limit) {
/* Allocate host memory ourselves. */
mem_alloc_result = hipHostMalloc(
&shared_pointer, size, hipHostMallocMapped | hipHostMallocWriteCombined);
assert((mem_alloc_result == hipSuccess && shared_pointer != 0) ||
(mem_alloc_result != hipSuccess && shared_pointer == 0));
}
if (mem_alloc_result == hipSuccess) {
hip_assert(hipHostGetDevicePointer(&device_pointer, shared_pointer, 0));
map_host_used += size;
status = " in host memory";
}
}
if (mem_alloc_result != hipSuccess) {
status = " failed, out of device and host memory";
set_error("System is out of GPU and shared host memory");
}
if (mem.name) {
VLOG_WORK << "Buffer allocate: " << mem.name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")" << status;
}
mem.device_pointer = (device_ptr)device_pointer;
mem.device_size = size;
stats.mem_alloc(size);
if (!mem.device_pointer) {
return NULL;
}
/* Insert into map of allocations. */
thread_scoped_lock lock(hip_mem_map_mutex);
HIPMem *cmem = &hip_mem_map[&mem];
if (shared_pointer != 0) {
/* Replace host pointer with our host allocation. Only works if
* HIP memory layout is the same and has no pitch padding. Also
* does not work if we move textures to host during a render,
* since other devices might be using the memory. */
if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
mem.host_pointer != shared_pointer) {
memcpy(shared_pointer, mem.host_pointer, size);
/* A Call to device_memory::host_free() should be preceded by
* a call to device_memory::device_free() for host memory
* allocated by a device to be handled properly. Two exceptions
* are here and a call in OptiXDevice::generic_alloc(), where
* the current host memory can be assumed to be allocated by
* device_memory::host_alloc(), not by a device */
mem.host_free();
mem.host_pointer = shared_pointer;
}
mem.shared_pointer = shared_pointer;
mem.shared_counter++;
cmem->use_mapped_host = true;
}
else {
cmem->use_mapped_host = false;
}
return cmem;
}
bool HIPDevice::alloc_device(void *&device_pointer, size_t size)
void HIPDevice::generic_copy_to(device_memory &mem)
{
HIPContextScope scope(this);
if (!mem.host_pointer || !mem.device_pointer) {
return;
}
hipError_t mem_alloc_result = hipMalloc((hipDeviceptr_t *)&device_pointer, size);
return mem_alloc_result == hipSuccess;
/* If use_mapped_host of mem is false, the current device only uses device memory allocated by
* hipMalloc regardless of mem.host_pointer and mem.shared_pointer, and should copy data from
* mem.host_pointer. */
thread_scoped_lock lock(hip_mem_map_mutex);
if (!hip_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
const HIPContextScope scope(this);
hip_assert(
hipMemcpyHtoD((hipDeviceptr_t)mem.device_pointer, mem.host_pointer, mem.memory_size()));
}
}
void HIPDevice::free_device(void *device_pointer)
void HIPDevice::generic_free(device_memory &mem)
{
HIPContextScope scope(this);
if (mem.device_pointer) {
HIPContextScope scope(this);
thread_scoped_lock lock(hip_mem_map_mutex);
DCHECK(hip_mem_map.find(&mem) != hip_mem_map.end());
const HIPMem &cmem = hip_mem_map[&mem];
hip_assert(hipFree((hipDeviceptr_t)device_pointer));
}
/* If cmem.use_mapped_host is true, reference counting is used
* to safely free a mapped host memory. */
bool HIPDevice::alloc_host(void *&shared_pointer, size_t size)
{
HIPContextScope scope(this);
if (cmem.use_mapped_host) {
assert(mem.shared_pointer);
if (mem.shared_pointer) {
assert(mem.shared_counter > 0);
if (--mem.shared_counter == 0) {
if (mem.host_pointer == mem.shared_pointer) {
mem.host_pointer = 0;
}
hipHostFree(mem.shared_pointer);
mem.shared_pointer = 0;
}
}
map_host_used -= mem.device_size;
}
else {
/* Free device memory. */
hip_assert(hipFree(mem.device_pointer));
}
hipError_t mem_alloc_result = hipHostMalloc(
&shared_pointer, size, hipHostMallocMapped | hipHostMallocWriteCombined);
stats.mem_free(mem.device_size);
mem.device_pointer = 0;
mem.device_size = 0;
return mem_alloc_result == hipSuccess;
}
void HIPDevice::free_host(void *shared_pointer)
{
HIPContextScope scope(this);
hipHostFree(shared_pointer);
}
bool HIPDevice::transform_host_pointer(void *&device_pointer, void *&shared_pointer)
{
HIPContextScope scope(this);
hip_assert(hipHostGetDevicePointer((hipDeviceptr_t *)&device_pointer, shared_pointer, 0));
return true;
}
void HIPDevice::copy_host_to_device(void *device_pointer, void *host_pointer, size_t size)
{
const HIPContextScope scope(this);
hip_assert(hipMemcpyHtoD((hipDeviceptr_t)device_pointer, host_pointer, size));
hip_mem_map.erase(hip_mem_map.find(&mem));
}
}
void HIPDevice::mem_alloc(device_memory &mem)
@@ -576,8 +823,8 @@ void HIPDevice::mem_zero(device_memory &mem)
/* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory
* regardless of mem.host_pointer and mem.shared_pointer. */
thread_scoped_lock lock(device_mem_map_mutex);
if (!device_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
thread_scoped_lock lock(hip_mem_map_mutex);
if (!hip_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
const HIPContextScope scope(this);
hip_assert(hipMemsetD8((hipDeviceptr_t)mem.device_pointer, 0, mem.memory_size()));
}
@@ -704,19 +951,19 @@ void HIPDevice::tex_alloc(device_texture &mem)
return;
}
Mem *cmem = NULL;
HIPMem *cmem = NULL;
hArray array_3d = NULL;
size_t src_pitch = mem.data_width * dsize * mem.data_elements;
size_t dst_pitch = src_pitch;
if (!mem.is_resident(this)) {
thread_scoped_lock lock(device_mem_map_mutex);
cmem = &device_mem_map[&mem];
thread_scoped_lock lock(hip_mem_map_mutex);
cmem = &hip_mem_map[&mem];
cmem->texobject = 0;
if (mem.data_depth > 1) {
array_3d = (hArray)mem.device_pointer;
cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
cmem->array = array_3d;
}
else if (mem.data_height > 0) {
dst_pitch = align_up(src_pitch, pitch_alignment);
@@ -760,10 +1007,10 @@ void HIPDevice::tex_alloc(device_texture &mem)
mem.device_size = size;
stats.mem_alloc(size);
thread_scoped_lock lock(device_mem_map_mutex);
cmem = &device_mem_map[&mem];
thread_scoped_lock lock(hip_mem_map_mutex);
cmem = &hip_mem_map[&mem];
cmem->texobject = 0;
cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
cmem->array = array_3d;
}
else if (mem.data_height > 0) {
/* 2D texture, using pitch aligned linear memory. */
@@ -848,8 +1095,8 @@ void HIPDevice::tex_alloc(device_texture &mem)
texDesc.filterMode = filter_mode;
texDesc.flags = HIP_TRSF_NORMALIZED_COORDINATES;
thread_scoped_lock lock(device_mem_map_mutex);
cmem = &device_mem_map[&mem];
thread_scoped_lock lock(hip_mem_map_mutex);
cmem = &hip_mem_map[&mem];
hip_assert(hipTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
@@ -864,9 +1111,9 @@ void HIPDevice::tex_free(device_texture &mem)
{
if (mem.device_pointer) {
HIPContextScope scope(this);
thread_scoped_lock lock(device_mem_map_mutex);
DCHECK(device_mem_map.find(&mem) != device_mem_map.end());
const Mem &cmem = device_mem_map[&mem];
thread_scoped_lock lock(hip_mem_map_mutex);
DCHECK(hip_mem_map.find(&mem) != hip_mem_map.end());
const HIPMem &cmem = hip_mem_map[&mem];
if (cmem.texobject) {
/* Free bindless texture. */
@@ -875,16 +1122,16 @@ void HIPDevice::tex_free(device_texture &mem)
if (!mem.is_resident(this)) {
/* Do not free memory here, since it was allocated on a different device. */
device_mem_map.erase(device_mem_map.find(&mem));
hip_mem_map.erase(hip_mem_map.find(&mem));
}
else if (cmem.array) {
/* Free array. */
hipArrayDestroy(reinterpret_cast<hArray>(cmem.array));
hipArrayDestroy(cmem.array);
stats.mem_free(mem.device_size);
mem.device_pointer = 0;
mem.device_size = 0;
device_mem_map.erase(device_mem_map.find(&mem));
hip_mem_map.erase(hip_mem_map.find(&mem));
}
else {
lock.unlock();
@@ -906,7 +1153,7 @@ bool HIPDevice::should_use_graphics_interop()
* possible, but from the empiric measurements it can be considerably slower than using naive
* pixels copy. */
/* Disable graphics interop for now, because of driver bug in 21.40. See #92972 */
/* Disable graphics interop for now, because of driver bug in 21.40. See T92972 */
# if 0
HIPContextScope scope(this);

View File

@@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN
class DeviceQueue;
class HIPDevice : public GPUDevice {
class HIPDevice : public Device {
friend class HIPContextScope;
@@ -26,11 +26,36 @@ class HIPDevice : public GPUDevice {
hipDevice_t hipDevice;
hipCtx_t hipContext;
hipModule_t hipModule;
size_t device_texture_headroom;
size_t device_working_headroom;
bool move_texture_to_host;
size_t map_host_used;
size_t map_host_limit;
int can_map_host;
int pitch_alignment;
int hipDevId;
int hipDevArchitecture;
bool first_error;
struct HIPMem {
HIPMem() : texobject(0), array(0), use_mapped_host(false)
{
}
hipTextureObject_t texobject;
hArray array;
/* If true, a mapped host memory in shared_pointer is being used. */
bool use_mapped_host;
};
typedef map<device_memory *, HIPMem> HIPMemMap;
HIPMemMap hip_mem_map;
thread_mutex hip_mem_map_mutex;
/* Bindless Textures */
device_vector<TextureInfo> texture_info;
bool need_texture_info;
HIPDeviceKernels kernels;
static bool have_precompiled_kernels();
@@ -56,13 +81,17 @@ class HIPDevice : public GPUDevice {
virtual bool load_kernels(const uint kernel_features) override;
void reserve_local_memory(const uint kernel_features);
virtual void get_device_memory_info(size_t &total, size_t &free) override;
virtual bool alloc_device(void *&device_pointer, size_t size) override;
virtual void free_device(void *device_pointer) override;
virtual bool alloc_host(void *&shared_pointer, size_t size) override;
virtual void free_host(void *shared_pointer) override;
virtual bool transform_host_pointer(void *&device_pointer, void *&shared_pointer) override;
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size) override;
void init_host_memory();
void load_texture_info();
void move_textures_to_host(size_t size, bool for_texture);
HIPMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
void generic_copy_to(device_memory &mem);
void generic_free(device_memory &mem);
void mem_alloc(device_memory &mem) override;

View File

@@ -51,7 +51,7 @@ static inline bool hipSupportsDevice(const int hipDevId)
hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
return (major >= 9);
return (major >= 10);
}
CCL_NAMESPACE_END

View File

@@ -247,8 +247,6 @@ class device_memory {
bool is_resident(Device *sub_device) const;
protected:
friend class Device;
friend class GPUDevice;
friend class CUDADevice;
friend class OptiXDevice;
friend class HIPDevice;

View File

@@ -21,7 +21,6 @@ class BVHMetal : public BVH {
API_AVAILABLE(macos(11.0))
vector<id<MTLAccelerationStructure>> blas_array;
vector<uint32_t> blas_lookup;
bool motion_blur = false;

View File

@@ -816,11 +816,6 @@ bool BVHMetal::build_TLAS(Progress &progress,
uint32_t instance_index = 0;
uint32_t motion_transform_index = 0;
// allocate look up buffer for wost case scenario
uint64_t count = objects.size();
blas_lookup.resize(count);
for (Object *ob : objects) {
/* Skip non-traceable objects */
if (!ob->is_traceable())
@@ -848,15 +843,12 @@ bool BVHMetal::build_TLAS(Progress &progress,
/* Set user instance ID to object index */
int object_index = ob->get_device_index();
uint32_t user_id = uint32_t(object_index);
int currIndex = instance_index++;
assert(user_id < blas_lookup.size());
blas_lookup[user_id] = accel_struct_index;
/* Bake into the appropriate descriptor */
if (motion_blur) {
MTLAccelerationStructureMotionInstanceDescriptor *instances =
(MTLAccelerationStructureMotionInstanceDescriptor *)[instanceBuf contents];
MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[currIndex];
MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[instance_index++];
desc.accelerationStructureIndex = accel_struct_index;
desc.userID = user_id;
@@ -902,7 +894,7 @@ bool BVHMetal::build_TLAS(Progress &progress,
else {
MTLAccelerationStructureUserIDInstanceDescriptor *instances =
(MTLAccelerationStructureUserIDInstanceDescriptor *)[instanceBuf contents];
MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[currIndex];
MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[instance_index++];
desc.accelerationStructureIndex = accel_struct_index;
desc.userID = user_id;

View File

@@ -74,11 +74,6 @@ class MetalDevice : public Device {
id<MTLBuffer> texture_bindings_3d = nil;
std::vector<id<MTLTexture>> texture_slot_map;
/* BLAS encoding & lookup */
id<MTLArgumentEncoder> mtlBlasArgEncoder = nil;
id<MTLBuffer> blas_buffer = nil;
id<MTLBuffer> blas_lookup_buffer = nil;
bool use_metalrt = false;
MetalPipelineType kernel_specialization_level = PSO_GENERIC;

View File

@@ -105,7 +105,6 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
}
case METAL_GPU_AMD: {
max_threads_per_threadgroup = 128;
use_metalrt = info.use_metalrt;
break;
}
case METAL_GPU_APPLE: {
@@ -193,10 +192,6 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
arg_desc_as.dataType = MTLDataTypeInstanceAccelerationStructure;
arg_desc_as.access = MTLArgumentAccessReadOnly;
MTLArgumentDescriptor *arg_desc_ptrs = [[MTLArgumentDescriptor alloc] init];
arg_desc_ptrs.dataType = MTLDataTypePointer;
arg_desc_ptrs.access = MTLArgumentAccessReadOnly;
MTLArgumentDescriptor *arg_desc_ift = [[MTLArgumentDescriptor alloc] init];
arg_desc_ift.dataType = MTLDataTypeIntersectionFunctionTable;
arg_desc_ift.access = MTLArgumentAccessReadOnly;
@@ -209,28 +204,14 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
[ancillary_desc addObject:[arg_desc_ift copy]]; /* ift_shadow */
arg_desc_ift.index = index++;
[ancillary_desc addObject:[arg_desc_ift copy]]; /* ift_local */
arg_desc_ift.index = index++;
[ancillary_desc addObject:[arg_desc_ift copy]]; /* ift_local_prim */
arg_desc_ptrs.index = index++;
[ancillary_desc addObject:[arg_desc_ptrs copy]]; /* blas array */
arg_desc_ptrs.index = index++;
[ancillary_desc addObject:[arg_desc_ptrs copy]]; /* look up table for blas */
[arg_desc_ift release];
[arg_desc_as release];
[arg_desc_ptrs release];
}
}
mtlAncillaryArgEncoder = [mtlDevice newArgumentEncoderWithArguments:ancillary_desc];
// preparing the blas arg encoder
MTLArgumentDescriptor *arg_desc_blas = [[MTLArgumentDescriptor alloc] init];
arg_desc_blas.dataType = MTLDataTypeInstanceAccelerationStructure;
arg_desc_blas.access = MTLArgumentAccessReadOnly;
mtlBlasArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_blas ]];
[arg_desc_blas release];
for (int i = 0; i < ancillary_desc.count; i++) {
[ancillary_desc[i] release];
}
@@ -586,7 +567,7 @@ void MetalDevice::erase_allocation(device_memory &mem)
if (it != metal_mem_map.end()) {
MetalMem *mmem = it->second.get();
/* blank out reference to MetalMem* in the launch params (fixes crash #94736) */
/* blank out reference to MetalMem* in the launch params (fixes crash T94736) */
if (mmem->pointer_index >= 0) {
device_ptr *pointers = (device_ptr *)&launch_params;
pointers[mmem->pointer_index] = 0;
@@ -1259,33 +1240,6 @@ void MetalDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
if (@available(macos 11.0, *)) {
if (bvh->params.top_level) {
bvhMetalRT = bvh_metal;
// allocate required buffers for BLAS array
uint64_t count = bvhMetalRT->blas_array.size();
uint64_t bufferSize = mtlBlasArgEncoder.encodedLength * count;
blas_buffer = [mtlDevice newBufferWithLength:bufferSize options:default_storage_mode];
stats.mem_alloc(blas_buffer.allocatedSize);
for (uint64_t i = 0; i < count; ++i) {
[mtlBlasArgEncoder setArgumentBuffer:blas_buffer
offset:i * mtlBlasArgEncoder.encodedLength];
[mtlBlasArgEncoder setAccelerationStructure:bvhMetalRT->blas_array[i] atIndex:0];
}
count = bvhMetalRT->blas_lookup.size();
bufferSize = sizeof(uint32_t) * count;
blas_lookup_buffer = [mtlDevice newBufferWithLength:bufferSize
options:default_storage_mode];
stats.mem_alloc(blas_lookup_buffer.allocatedSize);
memcpy([blas_lookup_buffer contents],
bvhMetalRT -> blas_lookup.data(),
blas_lookup_buffer.allocatedSize);
if (default_storage_mode == MTLResourceStorageModeManaged) {
[blas_buffer didModifyRange:NSMakeRange(0, blas_buffer.length)];
[blas_lookup_buffer didModifyRange:NSMakeRange(0, blas_lookup_buffer.length)];
}
}
}
}

View File

@@ -19,8 +19,6 @@ enum {
METALRT_FUNC_SHADOW_BOX,
METALRT_FUNC_LOCAL_TRI,
METALRT_FUNC_LOCAL_BOX,
METALRT_FUNC_LOCAL_TRI_PRIM,
METALRT_FUNC_LOCAL_BOX_PRIM,
METALRT_FUNC_CURVE_RIBBON,
METALRT_FUNC_CURVE_RIBBON_SHADOW,
METALRT_FUNC_CURVE_ALL,
@@ -30,13 +28,7 @@ enum {
METALRT_FUNC_NUM
};
enum {
METALRT_TABLE_DEFAULT,
METALRT_TABLE_SHADOW,
METALRT_TABLE_LOCAL,
METALRT_TABLE_LOCAL_PRIM,
METALRT_TABLE_NUM
};
enum { METALRT_TABLE_DEFAULT, METALRT_TABLE_SHADOW, METALRT_TABLE_LOCAL, METALRT_TABLE_NUM };
/* Pipeline State Object types */
enum MetalPipelineType {

View File

@@ -524,8 +524,6 @@ void MetalKernelPipeline::compile()
"__anyhit__cycles_metalrt_shadow_all_hit_box",
"__anyhit__cycles_metalrt_local_hit_tri",
"__anyhit__cycles_metalrt_local_hit_box",
"__anyhit__cycles_metalrt_local_hit_tri_prim",
"__anyhit__cycles_metalrt_local_hit_box_prim",
"__intersection__curve_ribbon",
"__intersection__curve_ribbon_shadow",
"__intersection__curve_all",
@@ -616,17 +614,11 @@ void MetalKernelPipeline::compile()
rt_intersection_function[METALRT_FUNC_LOCAL_BOX],
rt_intersection_function[METALRT_FUNC_LOCAL_BOX],
nil];
table_functions[METALRT_TABLE_LOCAL_PRIM] = [NSArray
arrayWithObjects:rt_intersection_function[METALRT_FUNC_LOCAL_TRI_PRIM],
rt_intersection_function[METALRT_FUNC_LOCAL_BOX_PRIM],
rt_intersection_function[METALRT_FUNC_LOCAL_BOX_PRIM],
nil];
NSMutableSet *unique_functions = [NSMutableSet
setWithArray:table_functions[METALRT_TABLE_DEFAULT]];
[unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_SHADOW]];
[unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_LOCAL]];
[unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_LOCAL_PRIM]];
if (kernel_has_intersection(device_kernel)) {
linked_functions = [[NSArray arrayWithArray:[unique_functions allObjects]]

View File

@@ -482,12 +482,6 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
if (metal_device_->bvhMetalRT) {
id<MTLAccelerationStructure> accel_struct = metal_device_->bvhMetalRT->accel_struct;
[metal_device_->mtlAncillaryArgEncoder setAccelerationStructure:accel_struct atIndex:2];
[metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->blas_buffer
offset:0
atIndex:7];
[metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->blas_lookup_buffer
offset:0
atIndex:8];
}
for (int table = 0; table < METALRT_TABLE_NUM; table++) {
@@ -538,10 +532,6 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
if (bvhMetalRT) {
/* Mark all Accelerations resources as used */
[mtlComputeCommandEncoder useResource:bvhMetalRT->accel_struct usage:MTLResourceUsageRead];
[mtlComputeCommandEncoder useResource:metal_device_->blas_buffer
usage:MTLResourceUsageRead];
[mtlComputeCommandEncoder useResource:metal_device_->blas_lookup_buffer
usage:MTLResourceUsageRead];
[mtlComputeCommandEncoder useResources:bvhMetalRT->blas_array.data()
count:bvhMetalRT->blas_array.size()
usage:MTLResourceUsageRead];

View File

@@ -103,7 +103,7 @@ vector<id<MTLDevice>> const &MetalInfo::get_usable_devices()
}
/* If the system has both an AMD GPU (discrete) and an Intel one (integrated), prefer the AMD
* one. This can be overridden with CYCLES_METAL_FORCE_INTEL. */
* one. This can be overriden with CYCLES_METAL_FORCE_INTEL. */
bool has_usable_amd_gpu = false;
if (@available(macos 12.3, *)) {
for (id<MTLDevice> device in MTLCopyAllDevices()) {

View File

@@ -854,14 +854,12 @@ bool OptiXDevice::load_osl_kernels()
context, group_descs, 2, &group_options, nullptr, 0, &osl_groups[i * 2]));
}
OptixStackSizes stack_size[NUM_PROGRAM_GROUPS] = {};
vector<OptixStackSizes> osl_stack_size(osl_groups.size());
/* Update SBT with new entries. */
sbt_data.alloc(NUM_PROGRAM_GROUPS + osl_groups.size());
for (int i = 0; i < NUM_PROGRAM_GROUPS; ++i) {
optix_assert(optixSbtRecordPackHeader(groups[i], &sbt_data[i]));
optix_assert(optixProgramGroupGetStackSize(groups[i], &stack_size[i]));
}
for (size_t i = 0; i < osl_groups.size(); ++i) {
if (osl_groups[i] != NULL) {
@@ -909,15 +907,13 @@ bool OptiXDevice::load_osl_kernels()
0,
&pipelines[PIP_SHADE]));
const unsigned int css = std::max(stack_size[PG_RGEN_SHADE_SURFACE_RAYTRACE].cssRG,
stack_size[PG_RGEN_SHADE_SURFACE_MNEE].cssRG);
unsigned int dss = 0;
for (unsigned int i = 0; i < osl_stack_size.size(); ++i) {
dss = std::max(dss, osl_stack_size[i].dssDC);
}
optix_assert(optixPipelineSetStackSize(
pipelines[PIP_SHADE], 0, dss, css, pipeline_options.usesMotionBlur ? 3 : 2));
pipelines[PIP_SHADE], 0, dss, 0, pipeline_options.usesMotionBlur ? 3 : 2));
}
return !have_error();

View File

@@ -886,7 +886,7 @@ int RenderScheduler::get_num_samples_during_navigation(int resolution_divider) c
{
/* Special trick for fast navigation: schedule multiple samples during fast navigation
* (which will prefer to use lower resolution to keep up with refresh rate). This gives more
* usable visual feedback for artists. */
* usable visual feedback for artists. There are a couple of tricks though. */
if (is_denoise_active_during_update()) {
/* When denoising is used during navigation prefer using a higher resolution with less samples
@@ -896,12 +896,25 @@ int RenderScheduler::get_num_samples_during_navigation(int resolution_divider) c
return 1;
}
/* Schedule samples equal to the resolution divider up to a maximum of 4.
* The idea is to have enough information on the screen by increasing the sample count as the
* resolution is decreased. */
/* NOTE: Changing this formula will change the formula in
* `RenderScheduler::calculate_resolution_divider_for_time()`. */
return min(max(1, resolution_divider / pixel_size_), 4);
if (resolution_divider <= pixel_size_) {
/* When resolution divider is at or below pixel size, schedule one sample. This doesn't effect
* the sample count at this resolution division, but instead assists in the calculation of
* the resolution divider. */
return 1;
}
if (resolution_divider == pixel_size_ * 2) {
/* When resolution divider is the previous step to the final resolution, schedule two samples.
* This is so that rendering on lower resolution does not exceed time that it takes to render
* first sample at the full resolution. */
return 2;
}
/* Always render 4 samples, even if scene is configured for less.
* The idea here is to have enough information on the screen. Resolution divider of 2 allows us
* to have 4 time extra samples, so overall worst case timing is the same as the final resolution
* at one sample. */
return 4;
}
bool RenderScheduler::work_need_adaptive_filter() const
@@ -1087,10 +1100,9 @@ void RenderScheduler::update_start_resolution_divider()
/* TODO(sergey): Need to add hysteresis to avoid resolution divider bouncing around when actual
* render time is somewhere on a boundary between two resolutions. */
/* Don't let resolution drop below the desired one. It's better to be slow than provide an
* unreadable viewport render. */
start_resolution_divider_ = min(resolution_divider_for_update,
default_start_resolution_divider_);
/* Never increase resolution to higher than the pixel size (which is possible if the scene is
* simple and compute device is fast). */
start_resolution_divider_ = max(resolution_divider_for_update, pixel_size_);
VLOG_WORK << "Calculated resolution divider is " << start_resolution_divider_;
}
@@ -1175,24 +1187,24 @@ void RenderScheduler::check_time_limit_reached()
int RenderScheduler::calculate_resolution_divider_for_time(double desired_time, double actual_time)
{
const double ratio_between_times = actual_time / desired_time;
/* TODO(sergey): There should a non-iterative analytical formula here. */
/* We can pass `ratio_between_times` to `get_num_samples_during_navigation()` to get our
* navigation samples because the equation for calculating the resolution divider is as follows:
* `actual_time / desired_time = sqr(resolution_divider) / sample_count`.
* While `resolution_divider` is less than or equal to 4, `resolution_divider = sample_count`
* (This relationship is determined in `get_num_samples_during_navigation()`). With some
* substitution we end up with `actual_time / desired_time = resolution_divider` while the
* resolution divider is less than or equal to 4. Once the resolution divider increases above 4,
* the relationship of `actual_time / desired_time = resolution_divider` is no longer true,
* however the sample count retrieved from `get_num_samples_during_navigation()` is still
* accurate if we continue using this assumption. It should be noted that the interaction between
* `pixel_size`, sample count, and resolution divider are automatically accounted for and that's
* why `pixel_size` isn't included in any of the equations. */
const int navigation_samples = get_num_samples_during_navigation(
ceil_to_int(ratio_between_times));
int resolution_divider = 1;
return ceil_to_int(sqrt(navigation_samples * ratio_between_times));
/* This algorithm iterates through resolution dividers until a divider is found that achieves
* the desired render time. A limit of default_start_resolution_divider_ is put in place as the
* maximum resolution divider to avoid an unreadable viewport due to a low resolution.
* pre_resolution_division_samples and post_resolution_division_samples are used in this
* calculation to better predict the performance impact of changing resolution divisions as
* the sample count can also change between resolution divisions. */
while (actual_time > desired_time && resolution_divider < default_start_resolution_divider_) {
int pre_resolution_division_samples = get_num_samples_during_navigation(resolution_divider);
resolution_divider = resolution_divider * 2;
int post_resolution_division_samples = get_num_samples_during_navigation(resolution_divider);
actual_time /= 4.0 * pre_resolution_division_samples / post_resolution_division_samples;
}
return resolution_divider;
}
int calculate_resolution_divider_for_resolution(int width, int height, int resolution)

View File

@@ -412,12 +412,11 @@ if(WITH_CYCLES_CUDA_BINARIES)
# warn for other versions
if((CUDA_VERSION STREQUAL "101") OR
(CUDA_VERSION STREQUAL "102") OR
(CUDA_VERSION_MAJOR STREQUAL "11") OR
(CUDA_VERSION_MAJOR STREQUAL "12"))
(CUDA_VERSION_MAJOR STREQUAL "11"))
else()
message(WARNING
"CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, "
"build may succeed but only CUDA 12, 11, 10.2 and 10.1 have been tested")
"build may succeed but only CUDA 11, 10.2 and 10.1 have been tested")
endif()
# build for each arch
@@ -515,16 +514,6 @@ if(WITH_CYCLES_CUDA_BINARIES)
else()
message(STATUS "CUDA binaries for ${arch} require CUDA 10 or earlier, skipped.")
endif()
elseif(${arch} MATCHES ".*_3.")
if(DEFINED CUDA11_NVCC_EXECUTABLE)
set(cuda_nvcc_executable ${CUDA11_NVCC_EXECUTABLE})
set(cuda_toolkit_root_dir ${CUDA11_TOOLKIT_ROOT_DIR})
elseif("${CUDA_VERSION}" LESS 120) # Support for sm_35, sm_37 was removed in CUDA 12
set(cuda_nvcc_executable ${CUDA_NVCC_EXECUTABLE})
set(cuda_toolkit_root_dir ${CUDA_TOOLKIT_ROOT_DIR})
else()
message(STATUS "CUDA binaries for ${arch} require CUDA 11 or earlier, skipped.")
endif()
elseif(${arch} MATCHES ".*_7." AND "${CUDA_VERSION}" LESS 100)
message(STATUS "CUDA binaries for ${arch} require CUDA 10.0+, skipped.")
elseif(${arch} MATCHES ".*_8.")

View File

@@ -686,7 +686,7 @@ ccl_device_inline Spectrum bsdf_albedo(ccl_private const ShaderData *sd,
albedo *= ((ccl_private const PrincipledSheenBsdf *)sc)->avg_value;
break;
case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
albedo *= bsdf_principled_hair_albedo(sd, sc);
albedo *= bsdf_principled_hair_albedo(sc);
break;
default:
break;

View File

@@ -478,18 +478,10 @@ ccl_device_inline float bsdf_principled_hair_albedo_roughness_scale(
return (((((0.245f * x) + 5.574f) * x - 10.73f) * x + 2.532f) * x - 0.215f) * x + 5.969f;
}
ccl_device Spectrum bsdf_principled_hair_albedo(ccl_private const ShaderData *sd,
ccl_private const ShaderClosure *sc)
ccl_device Spectrum bsdf_principled_hair_albedo(ccl_private const ShaderClosure *sc)
{
ccl_private PrincipledHairBSDF *bsdf = (ccl_private PrincipledHairBSDF *)sc;
const float cos_theta_o = cos_from_sin(dot(sd->wi, safe_normalize(sd->dPdu)));
const float cos_gamma_o = cos_from_sin(bsdf->extra->geom.w);
const float f = fresnel_dielectric_cos(cos_theta_o * cos_gamma_o, bsdf->eta);
const float roughness_scale = bsdf_principled_hair_albedo_roughness_scale(bsdf->v);
/* TODO(lukas): Adding the Fresnel term here as a workaround until the proper refactor. */
return exp(-sqrt(bsdf->sigma) * roughness_scale) + make_spectrum(f);
return exp(-sqrt(bsdf->sigma) * bsdf_principled_hair_albedo_roughness_scale(bsdf->v));
}
ccl_device_inline Spectrum

View File

@@ -10,7 +10,7 @@
#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
# define KERNEL_STUB
#else
/* SSE optimization disabled for now on 32 bit, see bug #36316. */
/* SSE optimization disabled for now on 32 bit, see bug T36316. */
# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
# define __KERNEL_SSE__
# define __KERNEL_SSE2__

View File

@@ -10,7 +10,7 @@
#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
# define KERNEL_STUB
#else
/* SSE optimization disabled for now on 32 bit, see bug #36316. */
/* SSE optimization disabled for now on 32 bit, see bug T36316. */
# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
# define __KERNEL_SSE2__
# endif

View File

@@ -10,7 +10,7 @@
#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
# define KERNEL_STUB
#else
/* SSE optimization disabled for now on 32 bit, see bug #36316. */
/* SSE optimization disabled for now on 32 bit, see bug T36316. */
# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
# define __KERNEL_SSE2__
# define __KERNEL_SSE3__

View File

@@ -431,7 +431,6 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_SORT_BLOCK_SIZE)
metal_grid_id);
#endif
}
ccl_gpu_kernel_postfix
ccl_gpu_kernel_threads(GPU_PARALLEL_SORT_BLOCK_SIZE)
ccl_gpu_kernel_signature(integrator_sort_write_pass,
@@ -645,7 +644,7 @@ ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgb
const int y,
const half4 half_pixel)
{
/* Work around HIP issue with half float display, see #92972. */
/* Work around HIP issue with half float display, see T92972. */
#ifdef __KERNEL_HIP__
ccl_global half *out = ((ccl_global half *)rgba) + (rgba_offset + y * rgba_stride + x) * 4;
out[0] = half_pixel.x;

View File

@@ -172,15 +172,18 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
kernel_assert(!"Invalid ift_local");
return false;
}
if (is_null_intersection_function_table(metal_ancillaries->ift_local_prim)) {
if (local_isect) {
local_isect->num_hits = 0;
}
kernel_assert(!"Invalid ift_local_prim");
return false;
}
# endif
metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
metalrt_intersector_type metalrt_intersect;
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
bool triangle_only = !kernel_data.bvh.have_curves && !kernel_data.bvh.have_points;
if (triangle_only) {
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
}
MetalRTIntersectionLocalPayload payload;
payload.self = ray->self;
payload.local_object = local_object;
@@ -192,48 +195,14 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
}
payload.result = false;
metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
# if defined(__METALRT_MOTION__)
metalrt_intersector_type metalrt_intersect;
typename metalrt_intersector_type::result_type intersection;
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
bool triangle_only = !kernel_data.bvh.have_curves && !kernel_data.bvh.have_points;
if (triangle_only) {
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
}
# if defined(__METALRT_MOTION__)
intersection = metalrt_intersect.intersect(
r, metal_ancillaries->accel_struct, 0xFF, ray->time, metal_ancillaries->ift_local, payload);
# else
metalrt_blas_intersector_type metalrt_intersect;
typename metalrt_blas_intersector_type::result_type intersection;
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
bool triangle_only = !kernel_data.bvh.have_curves && !kernel_data.bvh.have_points;
if (triangle_only) {
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
}
// if we know we are going to get max one hit, like for random-sss-walk we can
// optimize and accept the first hit
if (max_hits == 1) {
metalrt_intersect.accept_any_intersection(true);
}
int blas_index = metal_ancillaries->blas_userID_to_index_lookUp[local_object];
// transform the ray into object's local space
Transform itfm = kernel_data_fetch(objects, local_object).itfm;
r.origin = transform_point(&itfm, r.origin);
r.direction = transform_direction(&itfm, r.direction);
intersection = metalrt_intersect.intersect(
r,
metal_ancillaries->blas_accel_structs[blas_index].blas,
metal_ancillaries->ift_local_prim,
payload);
r, metal_ancillaries->accel_struct, 0xFF, metal_ancillaries->ift_local, payload);
# endif
if (lcg_state) {

View File

@@ -266,25 +266,13 @@ ccl_device_forceinline uchar4 make_uchar4(const uchar x,
# if defined(__METALRT_MOTION__)
# define METALRT_TAGS instancing, instance_motion, primitive_motion
# define METALRT_BLAS_TAGS , primitive_motion
# else
# define METALRT_TAGS instancing
# define METALRT_BLAS_TAGS
# endif /* __METALRT_MOTION__ */
typedef acceleration_structure<METALRT_TAGS> metalrt_as_type;
typedef intersection_function_table<triangle_data, METALRT_TAGS> metalrt_ift_type;
typedef metal::raytracing::intersector<triangle_data, METALRT_TAGS> metalrt_intersector_type;
# if defined(__METALRT_MOTION__)
typedef acceleration_structure<primitive_motion> metalrt_blas_as_type;
typedef intersection_function_table<triangle_data, primitive_motion> metalrt_blas_ift_type;
typedef metal::raytracing::intersector<triangle_data, primitive_motion>
metalrt_blas_intersector_type;
# else
typedef acceleration_structure<> metalrt_blas_as_type;
typedef intersection_function_table<triangle_data> metalrt_blas_ift_type;
typedef metal::raytracing::intersector<triangle_data> metalrt_blas_intersector_type;
# endif
#endif /* __METALRT__ */
@@ -297,12 +285,6 @@ struct Texture3DParamsMetal {
texture3d<float, access::sample> tex;
};
#ifdef __METALRT__
struct MetalRTBlasWrapper {
metalrt_blas_as_type blas;
};
#endif
struct MetalAncillaries {
device Texture2DParamsMetal *textures_2d;
device Texture3DParamsMetal *textures_3d;
@@ -312,9 +294,6 @@ struct MetalAncillaries {
metalrt_ift_type ift_default;
metalrt_ift_type ift_shadow;
metalrt_ift_type ift_local;
metalrt_blas_ift_type ift_local_prim;
constant MetalRTBlasWrapper *blas_accel_structs;
constant int *blas_userID_to_index_lookUp;
#endif
};

View File

@@ -139,20 +139,6 @@ TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal,
#endif
}
[[intersection(triangle, triangle_data )]] TriangleIntersectionResult
__anyhit__cycles_metalrt_local_hit_tri_prim(
constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload [[payload]],
uint primitive_id [[primitive_id]],
float2 barycentrics [[barycentric_coord]],
float ray_tmax [[distance]])
{
//instance_id, aka the user_id has been removed. If we take this function we optimized the
//SSS for starting traversal from a primitive acceleration structure instead of the root of the global AS.
//this means we will always be intersecting the correct object no need for the userid to check
return metalrt_local_hit<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>(
launch_params_metal, payload, payload.local_object, primitive_id, barycentrics, ray_tmax);
}
[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult
__anyhit__cycles_metalrt_local_hit_tri(
constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
@@ -177,17 +163,6 @@ __anyhit__cycles_metalrt_local_hit_box(const float ray_tmax [[max_distance]])
return result;
}
[[intersection(bounding_box, triangle_data )]] BoundingBoxIntersectionResult
__anyhit__cycles_metalrt_local_hit_box_prim(const float ray_tmax [[max_distance]])
{
/* unused function */
BoundingBoxIntersectionResult result;
result.distance = ray_tmax;
result.accept = false;
result.continue_search = false;
return result;
}
template<uint intersection_type>
bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,

View File

@@ -372,16 +372,6 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_sorted_paths_array);
break;
}
case DEVICE_KERNEL_INTEGRATOR_SORT_BUCKET_PASS: {
oneapi_call(
kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_sort_bucket_pass);
break;
}
case DEVICE_KERNEL_INTEGRATOR_SORT_WRITE_PASS: {
oneapi_call(
kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_sort_write_pass);
break;
}
case DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY: {
oneapi_call(kg,
cgh,

View File

@@ -13,7 +13,6 @@
#include "scene/light.h"
#include "scene/mesh.h"
#include "scene/object.h"
#include "scene/osl.h"
#include "scene/pointcloud.h"
#include "scene/scene.h"
#include "scene/shader.h"
@@ -26,6 +25,7 @@
#ifdef WITH_OSL
# include "kernel/osl/globals.h"
# include "kernel/osl/services.h"
#endif
#include "util/foreach.h"
@@ -1717,7 +1717,20 @@ void GeometryManager::device_update_displacement_images(Device *device,
/* If any OSL node is used for displacement, it may reference a texture. But it's
* unknown which ones, so have to load them all. */
if (has_osl_node) {
OSLShaderManager::osl_image_slots(device, image_manager, bump_images);
set<OSLRenderServices *> services_shared;
device->foreach_device([&services_shared](Device *sub_device) {
OSLGlobals *og = (OSLGlobals *)sub_device->get_cpu_osl_memory();
services_shared.insert(og->services);
});
for (OSLRenderServices *services : services_shared) {
for (auto it = services->textures.begin(); it != services->textures.end(); ++it) {
if (it->second->handle.get_manager() == image_manager) {
const int slot = it->second->handle.svm_slot();
bump_images.insert(slot);
}
}
}
}
#endif

View File

@@ -394,7 +394,7 @@ bool OSLShaderManager::osl_compile(const string &inputfile, const string &output
/* Compile.
*
* Mutex protected because the OSL compiler does not appear to be thread safe, see #92503. */
* Mutex protected because the OSL compiler does not appear to be thread safe, see T92503. */
static thread_mutex osl_compiler_mutex;
thread_scoped_lock lock(osl_compiler_mutex);
@@ -665,27 +665,6 @@ OSLNode *OSLShaderManager::osl_node(ShaderGraph *graph,
return node;
}
/* Static function, so only this file needs to be compile with RTTT. */
void OSLShaderManager::osl_image_slots(Device *device,
ImageManager *image_manager,
set<int> &image_slots)
{
set<OSLRenderServices *> services_shared;
device->foreach_device([&services_shared](Device *sub_device) {
OSLGlobals *og = (OSLGlobals *)sub_device->get_cpu_osl_memory();
services_shared.insert(og->services);
});
for (OSLRenderServices *services : services_shared) {
for (auto it = services->textures.begin(); it != services->textures.end(); ++it) {
if (it->second->handle.get_manager() == image_manager) {
const int slot = it->second->handle.svm_slot();
image_slots.insert(slot);
}
}
}
}
/* Graph Compiler */
OSLCompiler::OSLCompiler(OSLShaderManager *manager, OSL::ShadingSystem *ss, Scene *scene)

View File

@@ -92,9 +92,6 @@ class OSLShaderManager : public ShaderManager {
const std::string &bytecode_hash = "",
const std::string &bytecode = "");
/* Get image slots used by OSL services on device. */
static void osl_image_slots(Device *device, ImageManager *image_manager, set<int> &image_slots);
private:
void texture_system_init();
void texture_system_free();

View File

@@ -573,7 +573,7 @@ void ShaderManager::device_update_common(Device * /*device*/,
kfilm->is_rec709 = is_rec709;
}
void ShaderManager::device_free_common(Device * /*device*/, DeviceScene *dscene, Scene * /*scene*/)
void ShaderManager::device_free_common(Device *, DeviceScene *dscene, Scene *scene)
{
dscene->shaders.free();
}

View File

@@ -520,7 +520,7 @@ bool TileManager::write_tile(const RenderBuffers &tile_buffers)
/* If there is an overscan used for the tile copy pixels into single continuous block of memory
* without any "gaps".
* This is a workaround for bug in OIIO (https://github.com/OpenImageIO/oiio/pull/3176).
* Our task reference: #93008. */
* Our task reference: T93008. */
if (tile_params.window_x || tile_params.window_y ||
tile_params.window_width != tile_params.width ||
tile_params.window_height != tile_params.height) {

View File

@@ -74,7 +74,7 @@ ccl_device float fast_sinf(float x)
*
* Results on: [-2pi,2pi].
*
* Examined 2173837240 values of sin: 0.00662760244 avg ULP diff, 2 max ULP,
* Examined 2173837240 values of sin: 0.00662760244 avg ulp diff, 2 max ulp,
* 1.19209e-07 max error
*/
int q = fast_rint(x * M_1_PI_F);
@@ -256,11 +256,11 @@ ccl_device float fast_acosf(float x)
/* clamp and crush denormals. */
const float m = (f < 1.0f) ? 1.0f - (1.0f - f) : 1.0f;
/* Based on http://www.pouet.net/topic.php?which=9132&page=2
* 85% accurate (ULP 0)
* 85% accurate (ulp 0)
* Examined 2130706434 values of acos:
* 15.2000597 avg ULP diff, 4492 max ULP, 4.51803e-05 max error // without "denormal crush"
* 15.2000597 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // without "denormal crush"
* Examined 2130706434 values of acos:
* 15.2007108 avg ULP diff, 4492 max ULP, 4.51803e-05 max error // with "denormal crush"
* 15.2007108 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // with "denormal crush"
*/
const float a = sqrtf(1.0f - m) *
(1.5707963267f + m * (-0.213300989f + m * (0.077980478f + m * -0.02164095f)));
@@ -270,8 +270,9 @@ ccl_device float fast_acosf(float x)
ccl_device float fast_asinf(float x)
{
/* Based on acosf approximation above.
* Max error is 4.51133e-05 (ULPS are higher because we are consistently off
* by a little amount). */
* Max error is 4.51133e-05 (ulps are higher because we are consistently off
* by a little amount).
*/
const float f = fabsf(x);
/* Clamp and crush denormals. */
const float m = (f < 1.0f) ? 1.0f - (1.0f - f) : 1.0f;
@@ -289,9 +290,9 @@ ccl_device float fast_atanf(float x)
const float t = s * s;
/* http://mathforum.org/library/drmath/view/62672.html
* Examined 4278190080 values of atan:
* 2.36864877 avg ULP diff, 302 max ULP, 6.55651e-06 max error // (with denormals)
* 2.36864877 avg ulp diff, 302 max ulp, 6.55651e-06 max error // (with denormals)
* Examined 4278190080 values of atan:
* 171160502 avg ULP diff, 855638016 max ULP, 6.55651e-06 max error // (crush denormals)
* 171160502 avg ulp diff, 855638016 max ulp, 6.55651e-06 max error // (crush denormals)
*/
float r = s * madd(0.43157974f, t, 1.0f) / madd(madd(0.05831938f, t, 0.76443945f), t, 1.0f);
if (a > 1.0f) {
@@ -342,8 +343,8 @@ ccl_device float fast_log2f(float x)
int exponent = (int)(bits >> 23) - 127;
float f = __uint_as_float((bits & 0x007FFFFF) | 0x3f800000) - 1.0f;
/* Examined 2130706432 values of log2 on [1.17549435e-38,3.40282347e+38]:
* 0.0797524457 avg ULP diff, 3713596 max ULP, 7.62939e-06 max error.
* ULP histogram:
* 0.0797524457 avg ulp diff, 3713596 max ulp, 7.62939e-06 max error.
* ulp histogram:
* 0 = 97.46%
* 1 = 2.29%
* 2 = 0.11%
@@ -362,7 +363,7 @@ ccl_device float fast_log2f(float x)
ccl_device_inline float fast_logf(float x)
{
/* Examined 2130706432 values of logf on [1.17549435e-38,3.40282347e+38]:
* 0.313865375 avg ULP diff, 5148137 max ULP, 7.62939e-06 max error.
* 0.313865375 avg ulp diff, 5148137 max ulp, 7.62939e-06 max error.
*/
return fast_log2f(x) * M_LN2_F;
}
@@ -370,7 +371,7 @@ ccl_device_inline float fast_logf(float x)
ccl_device_inline float fast_log10(float x)
{
/* Examined 2130706432 values of log10f on [1.17549435e-38,3.40282347e+38]:
* 0.631237033 avg ULP diff, 4471615 max ULP, 3.8147e-06 max error.
* 0.631237033 avg ulp diff, 4471615 max ulp, 3.8147e-06 max error.
*/
return fast_log2f(x) * M_LN2_F / M_LN10_F;
}
@@ -391,12 +392,12 @@ ccl_device float fast_exp2f(float x)
/* Range reduction. */
int m = (int)x;
x -= m;
x = 1.0f - (1.0f - x); /* Crush denormals (does not affect max ULPS!). */
x = 1.0f - (1.0f - x); /* Crush denormals (does not affect max ulps!). */
/* 5th degree polynomial generated with sollya
* Examined 2247622658 values of exp2 on [-126,126]: 2.75764912 avg ULP diff,
* 232 max ULP.
* Examined 2247622658 values of exp2 on [-126,126]: 2.75764912 avg ulp diff,
* 232 max ulp.
*
* ULP histogram:
* ulp histogram:
* 0 = 87.81%
* 1 = 4.18%
*/
@@ -414,14 +415,14 @@ ccl_device float fast_exp2f(float x)
ccl_device_inline float fast_expf(float x)
{
/* Examined 2237485550 values of exp on [-87.3300018,87.3300018]:
* 2.6666452 avg ULP diff, 230 max ULP.
* 2.6666452 avg ulp diff, 230 max ulp.
*/
return fast_exp2f(x / M_LN2_F);
}
#if !defined(__KERNEL_GPU__) && !defined(_MSC_VER)
/* MSVC seems to have a code-gen bug here in at least SSE41/AVX, see
* #78047 and #78869 for details. Just disable for now, it only makes
* T78047 and T78869 for details. Just disable for now, it only makes
* a small difference in denoising performance. */
ccl_device float4 fast_exp2f4(float4 x)
{
@@ -453,7 +454,7 @@ ccl_device_inline float4 fast_expf4(float4 x)
ccl_device_inline float fast_exp10(float x)
{
/* Examined 2217701018 values of exp10 on [-37.9290009,37.9290009]:
* 2.71732409 avg ULP diff, 232 max ULP.
* 2.71732409 avg ulp diff, 232 max ulp.
*/
return fast_exp2f(x * M_LN10_F / M_LN2_F);
}
@@ -474,7 +475,7 @@ ccl_device float fast_sinhf(float x)
float a = fabsf(x);
if (a > 1.0f) {
/* Examined 53389559 values of sinh on [1,87.3300018]:
* 33.6886442 avg ULP diff, 178 max ULP. */
* 33.6886442 avg ulp diff, 178 max ulp. */
float e = fast_expf(a);
return copysignf(0.5f * e - 0.5f / e, x);
}
@@ -494,7 +495,7 @@ ccl_device float fast_sinhf(float x)
ccl_device_inline float fast_coshf(float x)
{
/* Examined 2237485550 values of cosh on [-87.3300018,87.3300018]:
* 1.78256726 avg ULP diff, 178 max ULP.
* 1.78256726 avg ulp diff, 178 max ulp.
*/
float e = fast_expf(fabsf(x));
return 0.5f * e + 0.5f / e;
@@ -505,7 +506,7 @@ ccl_device_inline float fast_tanhf(float x)
/* Examined 4278190080 values of tanh on [-3.40282347e+38,3.40282347e+38]:
* 3.12924e-06 max error.
*/
/* NOTE: ULP error is high because of sub-optimal handling around the origin. */
/* NOTE: ulp error is high because of sub-optimal handling around the origin. */
float e = fast_expf(2.0f * fabsf(x));
return copysignf(1.0f - 2.0f / (1.0f + e), x);
}
@@ -578,7 +579,7 @@ ccl_device_inline float fast_erfcf(float x)
{
/* Examined 2164260866 values of erfcf on [-4,4]: 1.90735e-06 max error.
*
* ULP histogram:
* ulp histogram:
*
* 0 = 80.30%
*/

View File

@@ -1201,6 +1201,7 @@ void GHOST_GetVulkanHandles(GHOST_ContextHandle context,
void *r_instance,
void *r_physical_device,
void *r_device,
void *r_compute_command_buffer,
uint32_t *r_graphic_queue_family);
/**

View File

@@ -40,7 +40,7 @@ class GHOST_IContext {
virtual unsigned int getDefaultFramebuffer() = 0;
virtual GHOST_TSuccess getVulkanHandles(void *, void *, void *, uint32_t *) = 0;
virtual GHOST_TSuccess getVulkanHandles(void *, void *, void *, void *, uint32_t *) = 0;
/**
* Gets the Vulkan framebuffer related resource handles associated with the Vulkan context.

View File

@@ -1203,10 +1203,12 @@ void GHOST_GetVulkanHandles(GHOST_ContextHandle contexthandle,
void *r_instance,
void *r_physical_device,
void *r_device,
void *r_compute_command_buffer,
uint32_t *r_graphic_queue_family)
{
GHOST_IContext *context = (GHOST_IContext *)contexthandle;
context->getVulkanHandles(r_instance, r_physical_device, r_device, r_graphic_queue_family);
context->getVulkanHandles(
r_instance, r_physical_device, r_device, r_compute_command_buffer, r_graphic_queue_family);
}
void GHOST_GetVulkanBackbuffer(GHOST_WindowHandle windowhandle,

View File

@@ -142,6 +142,7 @@ class GHOST_Context : public GHOST_IContext {
virtual GHOST_TSuccess getVulkanHandles(void * /*r_instance*/,
void * /*r_physical_device*/,
void * /*r_device*/,
void * /*r_compute_command_buffer*/,
uint32_t * /*r_graphic_queue_family*/) override
{
return GHOST_kFailure;

View File

@@ -516,7 +516,7 @@ GHOST_TSuccess GHOST_ContextCGL::releaseNativeHandles()
/* OpenGL on Metal
*
* Use Metal layer to avoid Viewport lagging on macOS, see #60043. */
* Use Metal layer to avoid Viewport lagging on macOS, see T60043. */
static const MTLPixelFormat METAL_FRAMEBUFFERPIXEL_FORMAT = MTLPixelFormatBGRA8Unorm;
static const OSType METAL_CORE_VIDEO_PIXEL_FORMAT = kCVPixelFormatType_32BGRA;

View File

@@ -141,7 +141,7 @@ GHOST_TSuccess GHOST_ContextGLX::initializeDrawingContext()
/* -------------------------------------------------------------------- */
#else
/* Important to initialize only GLXEW (_not_ GLEW),
* since this breaks w/ Mesa's `swrast`, see: #46431. */
* since this breaks w/ Mesa's `swrast`, see: T46431. */
glxewInit();
#endif /* USE_GLXEW_INIT_WORKAROUND */

View File

@@ -192,6 +192,9 @@ GHOST_TSuccess GHOST_ContextVK::destroySwapchain()
if (m_render_pass != VK_NULL_HANDLE) {
vkDestroyRenderPass(m_device, m_render_pass, NULL);
}
if (m_compute_command_buffer != VK_NULL_HANDLE) {
vkFreeCommandBuffers(m_device, m_command_pool, 1, &m_compute_command_buffer);
}
for (auto command_buffer : m_command_buffers) {
vkFreeCommandBuffers(m_device, m_command_pool, 1, &command_buffer);
}
@@ -311,11 +314,13 @@ GHOST_TSuccess GHOST_ContextVK::getVulkanBackbuffer(void *image,
GHOST_TSuccess GHOST_ContextVK::getVulkanHandles(void *r_instance,
void *r_physical_device,
void *r_device,
void *r_compute_command_buffer,
uint32_t *r_graphic_queue_family)
{
*((VkInstance *)r_instance) = m_instance;
*((VkPhysicalDevice *)r_physical_device) = m_physical_device;
*((VkDevice *)r_device) = m_device;
*((VkCommandBuffer *)r_compute_command_buffer) = m_compute_command_buffer;
*r_graphic_queue_family = m_queue_family_graphic;
return GHOST_kSuccess;
@@ -619,16 +624,34 @@ static GHOST_TSuccess selectPresentMode(VkPhysicalDevice device,
return GHOST_kFailure;
}
GHOST_TSuccess GHOST_ContextVK::createCommandBuffers()
GHOST_TSuccess GHOST_ContextVK::createCommandPools()
{
m_command_buffers.resize(m_swapchain_image_views.size());
VkCommandPoolCreateInfo poolInfo = {};
poolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
poolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
poolInfo.queueFamilyIndex = m_queue_family_graphic;
VK_CHECK(vkCreateCommandPool(m_device, &poolInfo, NULL, &m_command_pool));
return GHOST_kSuccess;
}
GHOST_TSuccess GHOST_ContextVK::createComputeCommandBuffer()
{
assert(m_command_pool != VK_NULL_HANDLE);
VkCommandBufferAllocateInfo alloc_info = {};
alloc_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
alloc_info.commandPool = m_command_pool;
alloc_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
alloc_info.commandBufferCount = 1;
VK_CHECK(vkAllocateCommandBuffers(m_device, &alloc_info, &m_compute_command_buffer));
return GHOST_kSuccess;
}
GHOST_TSuccess GHOST_ContextVK::createGraphicsCommandBuffers()
{
assert(m_command_pool != VK_NULL_HANDLE);
m_command_buffers.resize(m_swapchain_image_views.size());
VkCommandBufferAllocateInfo alloc_info = {};
alloc_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
@@ -637,7 +660,6 @@ GHOST_TSuccess GHOST_ContextVK::createCommandBuffers()
alloc_info.commandBufferCount = static_cast<uint32_t>(m_command_buffers.size());
VK_CHECK(vkAllocateCommandBuffers(m_device, &alloc_info, m_command_buffers.data()));
return GHOST_kSuccess;
}
@@ -776,7 +798,7 @@ GHOST_TSuccess GHOST_ContextVK::createSwapchain()
VK_CHECK(vkCreateFence(m_device, &fence_info, NULL, &m_in_flight_fences[i]));
}
createCommandBuffers();
createGraphicsCommandBuffers();
return GHOST_kSuccess;
}
@@ -841,6 +863,13 @@ GHOST_TSuccess GHOST_ContextVK::initializeDrawingContext()
extensions_device.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
}
extensions_device.push_back("VK_KHR_dedicated_allocation");
extensions_device.push_back("VK_KHR_get_memory_requirements2");
/* Enable MoltenVK required instance extensions.*/
#ifdef VK_MVK_MOLTENVK_EXTENSION_NAME
requireExtension(
extensions_available, extensions_enabled, "VK_KHR_get_physical_device_properties2");
#endif
VkApplicationInfo app_info = {};
app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
@@ -903,6 +932,15 @@ GHOST_TSuccess GHOST_ContextVK::initializeDrawingContext()
return GHOST_kFailure;
}
#ifdef VK_MVK_MOLTENVK_EXTENSION_NAME
/* According to the Vulkan specs, when `VK_KHR_portability_subset` is available it should be
* enabled. See
* https://vulkan.lunarg.com/doc/view/1.2.198.1/mac/1.2-extensions/vkspec.html#VUID-VkDeviceCreateInfo-pProperties-04451*/
if (device_extensions_support(m_physical_device, {VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME})) {
extensions_device.push_back(VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME);
}
#endif
vector<VkDeviceQueueCreateInfo> queue_create_infos;
{
@@ -960,6 +998,9 @@ GHOST_TSuccess GHOST_ContextVK::initializeDrawingContext()
VK_CHECK(vkCreateDevice(m_physical_device, &device_create_info, NULL, &m_device));
createCommandPools();
createComputeCommandBuffer();
vkGetDeviceQueue(m_device, m_queue_family_graphic, 0, &m_graphic_queue);
if (use_window_surface) {

View File

@@ -113,6 +113,7 @@ class GHOST_ContextVK : public GHOST_Context {
GHOST_TSuccess getVulkanHandles(void *r_instance,
void *r_physical_device,
void *r_device,
void *r_compute_command_buffer,
uint32_t *r_graphic_queue_family);
/**
* Gets the Vulkan framebuffer related resource handles associated with the Vulkan context.
@@ -182,6 +183,7 @@ class GHOST_ContextVK : public GHOST_Context {
std::vector<VkImage> m_swapchain_images;
std::vector<VkImageView> m_swapchain_image_views;
std::vector<VkFramebuffer> m_swapchain_framebuffers;
VkCommandBuffer m_compute_command_buffer;
std::vector<VkCommandBuffer> m_command_buffers;
VkRenderPass m_render_pass;
VkExtent2D m_render_extent;
@@ -200,6 +202,8 @@ class GHOST_ContextVK : public GHOST_Context {
GHOST_TSuccess pickPhysicalDevice(std::vector<const char *> required_exts);
GHOST_TSuccess createSwapchain();
GHOST_TSuccess destroySwapchain();
GHOST_TSuccess createCommandBuffers();
GHOST_TSuccess createCommandPools();
GHOST_TSuccess createGraphicsCommandBuffers();
GHOST_TSuccess createComputeCommandBuffer();
GHOST_TSuccess recordCommandBuffers();
};

View File

@@ -302,7 +302,7 @@ bool GHOST_NDOFManager::setDevice(ushort vendor_id, ushort product_id)
switch (product_id) {
case 0xC62E: /* Plugged in. */
case 0xC62F: /* Wireless. */
case 0xC658: /* Wireless (3DConnexion Universal Wireless Receiver in WIN32), see #82412. */
case 0xC658: /* Wireless (3DConnexion Universal Wireless Receiver in WIN32), see T82412. */
{
device_type_ = NDOF_SpaceMouseWireless;
hid_map_button_num_ = 2;
@@ -341,7 +341,7 @@ bool GHOST_NDOFManager::setDevice(ushort vendor_id, ushort product_id)
hid_map_button_mask_ = int(~(UINT_MAX << hid_map_button_num_));
}
CLOG_INFO(LOG, 2, "%d buttons -> hex:%X", hid_map_button_num_, uint(hid_map_button_mask_));
CLOG_INFO(LOG, 2, "%d buttons -> hex:%X", hid_map_button_num_, (uint)hid_map_button_mask_);
return device_type_ != NDOF_UnknownDevice;
}
@@ -445,14 +445,14 @@ void GHOST_NDOFManager::updateButton(int button_number, bool press, uint64_t tim
2,
"button=%d, press=%d (out of range %d, ignoring!)",
button_number,
int(press),
(int)press,
hid_map_button_num_);
return;
}
const NDOF_ButtonT button = hid_map_[button_number];
if (button == NDOF_BUTTON_NONE) {
CLOG_INFO(
LOG, 2, "button=%d, press=%d (mapped to none, ignoring!)", button_number, int(press));
LOG, 2, "button=%d, press=%d (mapped to none, ignoring!)", button_number, (int)press);
return;
}
@@ -460,7 +460,7 @@ void GHOST_NDOFManager::updateButton(int button_number, bool press, uint64_t tim
2,
"button=%d, press=%d, name=%s",
button_number,
int(press),
(int)press,
ndof_button_names[button]);
GHOST_IWindow *window = system_.getWindowManager()->getActiveWindow();

View File

@@ -602,7 +602,7 @@ void GHOST_SystemSDL::processEvent(SDL_Event *sdl_event)
/* NOTE: the `sdl_sub_evt.keysym.sym` is truncated,
* for unicode support ghost has to be modified. */
/* TODO(@ideasman42): support full unicode, SDL supports this but it needs to be
/* TODO(@campbellbarton): support full unicode, SDL supports this but it needs to be
* explicitly enabled via #SDL_StartTextInput which GHOST would have to wrap. */
char utf8_buf[sizeof(GHOST_TEventKeyData::utf8_buf)] = {'\0'};
if (type == GHOST_kEventKeyDown) {

View File

@@ -141,7 +141,7 @@ constexpr size_t events_pending_default_size = 4096 / sizeof(void *);
* \{ */
/**
* GNOME (mutter 42.2 had a bug with confine not respecting scale - Hi-DPI), See: #98793.
* GNOME (mutter 42.2 had a bug with confine not respecting scale - Hi-DPI), See: T98793.
* Even though this has been fixed, at time of writing it's not yet in a release.
* Workaround the problem by implementing confine with a software cursor.
* While this isn't ideal, it's not adding a lot of overhead as software
@@ -176,7 +176,7 @@ static bool use_gnome_confine_hack = false;
/**
* KDE (plasma 5.26.1) has a bug where the cursor surface needs to be committed
* (via `wl_surface_commit`) when it was hidden and is being set to visible again, see: #102048.
* (via `wl_surface_commit`) when it was hidden and is being set to visible again, see: T102048.
* See: https://bugs.kde.org/show_bug.cgi?id=461001
*/
#define USE_KDE_TABLET_HIDDEN_CURSOR_HACK
@@ -197,8 +197,8 @@ static bool use_gnome_confine_hack = false;
* \{ */
/**
* Fix short-cut part of keyboard reading code not properly handling some keys, see: #102194.
* \note This is similar to X11 workaround by the same name, see: #47228.
* Fix short-cut part of keyboard reading code not properly handling some keys, see: T102194.
* \note This is similar to X11 workaround by the same name, see: T47228.
*/
#define USE_NON_LATIN_KB_WORKAROUND
@@ -241,7 +241,7 @@ enum {
BTN_STYLUS = 0x14b,
/** Use as right-mouse. */
BTN_STYLUS2 = 0x14c,
/** NOTE(@ideasman42): Map to an additional button (not sure which hardware uses this). */
/** NOTE(@campbellbarton): Map to an additional button (not sure which hardware uses this). */
BTN_STYLUS3 = 0x149,
};
@@ -916,7 +916,7 @@ struct GWL_Display {
* The main purpose of having an active seat is an alternative from always using the first
* seat which prevents events from any other seat.
*
* NOTE(@ideasman42): This could be extended and developed further extended to support
* NOTE(@campbellbarton): This could be extended and developed further extended to support
* an active seat per window (for e.g.), basic support is sufficient for now as currently isn't
* a widely used feature.
*/
@@ -957,7 +957,7 @@ struct GWL_Display {
* Needed because #GHOST_System::dispatchEvents fires timers
* outside of WAYLAND (without locking the `timer_mutex`).
*/
GHOST_TimerManager *ghost_timer_manager = nullptr;
GHOST_TimerManager *ghost_timer_manager;
#endif /* USE_EVENT_BACKGROUND_THREAD */
};
@@ -975,6 +975,9 @@ static void gwl_display_destroy(GWL_Display *display)
ghost_wl_display_lock_without_input(display->wl_display, display->system->server_mutex);
display->events_pthread_is_active = false;
}
delete display->ghost_timer_manager;
display->ghost_timer_manager = nullptr;
#endif
/* For typical WAYLAND use this will always be set.
@@ -1012,13 +1015,6 @@ static void gwl_display_destroy(GWL_Display *display)
gwl_display_event_thread_destroy(display);
display->system->server_mutex->unlock();
}
/* Important to remove after the seats which may have key repeat timers active. */
if (display->ghost_timer_manager) {
delete display->ghost_timer_manager;
display->ghost_timer_manager = nullptr;
}
#endif /* USE_EVENT_BACKGROUND_THREAD */
if (display->wl_display) {
@@ -1239,7 +1235,7 @@ static void gwl_registry_entry_remove_all(GWL_Display *display)
{
const bool on_exit = true;
/* NOTE(@ideasman42): Free by slot instead of simply looping over
/* NOTE(@campbellbarton): Free by slot instead of simply looping over
* `display->registry_entry` so the order of freeing is always predictable.
* Otherwise global objects would be feed in the order they are registered.
* While this works in my tests, it could cause difficult to reproduce bugs
@@ -1269,7 +1265,7 @@ static void gwl_registry_entry_remove_all(GWL_Display *display)
* so there is no reason to update all other outputs that an output was removed (for e.g.).
* Pass as -1 to update all slots.
*
* NOTE(@ideasman42): Updating all other items on a single change is typically worth avoiding.
* NOTE(@campbellbarton): Updating all other items on a single change is typically worth avoiding.
* In practice this isn't a problem as so there are so few elements in `display->registry_entry`,
* so few use update functions and adding/removal at runtime is rarely called (plugging/unplugging)
* hardware for e.g. So while it's possible to store dependency links to avoid unnecessary
@@ -1318,7 +1314,7 @@ static void ghost_wl_display_report_error(struct wl_display *display)
fprintf(stderr, "The Wayland connection experienced a fatal error: %s\n", strerror(ecode));
}
/* NOTE(@ideasman42): The application is running,
/* NOTE(@campbellbarton): The application is running,
* however an error closes all windows and most importantly:
* shuts down the GPU context (loosing all GPU state - shaders, bind codes etc),
* so recovering from this effectively involves restarting.
@@ -1328,7 +1324,7 @@ static void ghost_wl_display_report_error(struct wl_display *display)
* So in practice re-connecting to the display server isn't an option.
*
* Exit since leaving the process open will simply flood the output and do nothing.
* Although as the process is in a valid state, auto-save for e.g. is possible, see: #100855. */
* Although as the process is in a valid state, auto-save for e.g. is possible, see: T100855. */
::exit(-1);
}
@@ -1442,7 +1438,7 @@ static GHOST_TKey xkb_map_gkey(const xkb_keysym_t sym)
/* Additional keys for non US layouts. */
/* Uses the same physical key as #XKB_KEY_KP_Decimal for QWERTZ layout, see: #102287. */
/* Uses the same physical key as #XKB_KEY_KP_Decimal for QWERTZ layout, see: T102287. */
GXMAP(gkey, XKB_KEY_KP_Separator, GHOST_kKeyNumpadPeriod);
default:
@@ -2970,7 +2966,7 @@ static void gesture_pinch_handle_begin(void *data,
if (wl_surface *wl_surface_focus = seat->pointer.wl_surface_window) {
win = ghost_wl_surface_user_data(wl_surface_focus);
}
/* NOTE(@ideasman42): Blender's use of track-pad coordinates is inconsistent and needs work.
/* NOTE(@campbellbarton): Blender's use of track-pad coordinates is inconsistent and needs work.
* This isn't specific to WAYLAND, in practice they tend to work well enough in most cases.
* Some operators scale by the UI scale, some don't.
* Even this window scale is not correct because it doesn't account for:
@@ -2984,7 +2980,7 @@ static void gesture_pinch_handle_begin(void *data,
*/
const wl_fixed_t win_scale = win ? win->scale() : 1;
/* NOTE(@ideasman42): Scale factors match Blender's operators & default preferences.
/* NOTE(@campbellbarton): Scale factors match Blender's operators & default preferences.
* For these values to work correctly, operator logic will need to be changed not to scale input
* by the region size (as with 3D view zoom) or preference for 3D view orbit sensitivity.
*
@@ -3147,7 +3143,7 @@ static const struct zwp_pointer_gesture_swipe_v1_listener gesture_swipe_listener
/* -------------------------------------------------------------------- */
/** \name Listener (Touch Seat), #wl_touch_listener
*
* NOTE(@ideasman42): It's not clear if this interface is used by popular compositors.
* NOTE(@campbellbarton): It's not clear if this interface is used by popular compositors.
* It looks like GNOME/KDE only support `zwp_pointer_gestures_v1_interface`.
* If this isn't used anywhere, it could be removed.
* \{ */
@@ -3808,9 +3804,9 @@ static xkb_keysym_t xkb_state_key_get_one_sym_without_modifiers(
/* Use an empty keyboard state to access key symbol without modifiers. */
xkb_keysym_t sym = xkb_state_key_get_one_sym(xkb_state_empty, key);
/* NOTE(@ideasman42): Only perform the number-locked lookup as a fallback
/* NOTE(@campbellbarton): Only perform the number-locked lookup as a fallback
* when a number-pad key has been pressed. This is important as some key-maps use number lock
* for switching other layers (in particular `de(neo_qwertz)` turns on layer-4), see: #96170.
* for switching other layers (in particular `de(neo_qwertz)` turns on layer-4), see: T96170.
* Alternative solutions could be to inspect the layout however this could get involved
* and turning on the number-lock is only needed for a limited set of keys. */
@@ -3938,7 +3934,7 @@ static void keyboard_handle_key(void *data,
else {
/* Key-up from keys that were not repeating cause the repeat timer to pause.
*
* NOTE(@ideasman42): This behavior isn't universal, some text input systems will
* NOTE(@campbellbarton): This behavior isn't universal, some text input systems will
* stop the repeat entirely. Choose to pause repeat instead as this is what GTK/WIN32 do,
* and it fits better for keyboard input that isn't related to text entry. */
timer_action = RESET;
@@ -4467,7 +4463,7 @@ static void xdg_output_handle_logical_size(void *data,
#ifdef USE_GNOME_CONFINE_HACK
/* Use a bug in GNOME to check GNOME is in use. If the bug is fixed this won't cause an issue
* as #98793 has been fixed up-stream too, but not in a release at time of writing. */
* as T98793 has been fixed up-stream too, but not in a release at time of writing. */
use_gnome_confine_hack = true;
#endif
@@ -7037,7 +7033,7 @@ bool GHOST_SystemWayland::window_cursor_grab_set(const GHOST_TGrabCursorMode mod
UNPACK2(xy_next));
wl_surface_commit(wl_surface);
/* NOTE(@ideasman42): The new cursor position is a hint,
/* NOTE(@campbellbarton): The new cursor position is a hint,
* it's possible the hint is ignored. It doesn't seem like there is a good way to
* know if the hint will be used or not, at least not immediately. */
xy_motion[0] = xy_next[0];
@@ -7080,7 +7076,7 @@ bool GHOST_SystemWayland::window_cursor_grab_set(const GHOST_TGrabCursorMode mod
if (mode != GHOST_kGrabDisable) {
if (grab_state_next.use_lock) {
if (!grab_state_prev.use_lock) {
/* TODO(@ideasman42): As WAYLAND does not support warping the pointer it may not be
/* TODO(@campbellbarton): As WAYLAND does not support warping the pointer it may not be
* possible to support #GHOST_kGrabWrap by pragmatically settings it's coordinates.
* An alternative could be to draw the cursor in software (and hide the real cursor),
* or just accept a locked cursor on WAYLAND. */

View File

@@ -208,7 +208,7 @@ class GHOST_SystemWayland : public GHOST_System {
* Clear all references to this output.
*
* \note The compositor should have already called the `wl_surface_listener.leave` callback,
* however some compositors may not (see #103586).
* however some compositors may not (see T103586).
* So remove references to the output before it's destroyed to avoid crashing.
*
* \return true when any references were removed.

View File

@@ -424,9 +424,10 @@ bool GHOST_SystemWin32::processEvents(bool waitForEvent)
processTrackpad();
/* `PeekMessage` above is allowed to dispatch messages to the `wndproc` without us
/* PeekMessage above is allowed to dispatch messages to the wndproc without us
* noticing, so we need to check the event manager here to see if there are
* events waiting in the queue. */
* events waiting in the queue.
*/
hasEventHandled |= this->m_eventManager->getNumEvents() > 0;
} while (waitForEvent && !hasEventHandled);
@@ -565,8 +566,8 @@ GHOST_TKey GHOST_SystemWin32::hardKey(RAWINPUT const &raw, bool *r_key_down)
/**
* \note this function can be extended to include other exotic cases as they arise.
*
* This function was added in response to bug #25715.
* This is going to be a long list #42426.
* This function was added in response to bug T25715.
* This is going to be a long list T42426.
*/
GHOST_TKey GHOST_SystemWin32::processSpecialKey(short vKey, short scanCode) const
{
@@ -1079,11 +1080,11 @@ GHOST_EventCursor *GHOST_SystemWin32::processCursorEvent(GHOST_WindowWin32 *wind
if (window->getCursorGrabMode() == GHOST_kGrabHide) {
window->getClientBounds(bounds);
/* WARNING(@ideasman42): The current warping logic fails to warp on every event,
/* WARNING(@campbellbarton): The current warping logic fails to warp on every event,
* so the box needs to small enough not to let the cursor escape the window but large
* enough that the cursor isn't being warped every time.
* If this was not the case it would be less trouble to simply warp the cursor to the
* center of the screen on every motion, see: D16558 (alternative fix for #102346). */
* center of the screen on every motion, see: D16558 (alternative fix for T102346). */
const int32_t subregion_div = 4; /* One quarter of the region. */
const int32_t size[2] = {bounds.getWidth(), bounds.getHeight()};
const int32_t center[2] = {(bounds.m_l + bounds.m_r) / 2, (bounds.m_t + bounds.m_b) / 2};
@@ -1178,7 +1179,7 @@ GHOST_EventKey *GHOST_SystemWin32::processKeyEvent(GHOST_WindowWin32 *window, RA
GHOST_TKey key = system->hardKey(raw, &key_down);
GHOST_EventKey *event;
/* NOTE(@ideasman42): key repeat in WIN32 also applies to modifier-keys.
/* NOTE(@campbellbarton): key repeat in WIN32 also applies to modifier-keys.
* Check for this case and filter out modifier-repeat.
* Typically keyboard events are *not* filtered as part of GHOST's event handling.
* As other GHOST back-ends don't have the behavior, it's simplest not to send them through.
@@ -1209,7 +1210,7 @@ GHOST_EventKey *GHOST_SystemWin32::processKeyEvent(GHOST_WindowWin32 *window, RA
const bool ctrl_pressed = has_state && state[VK_CONTROL] & 0x80;
const bool alt_pressed = has_state && state[VK_MENU] & 0x80;
/* We can be here with !key_down if processing dead keys (diacritics). See #103119. */
/* We can be here with !key_down if processing dead keys (diacritics). See T103119. */
/* No text with control key pressed (Alt can be used to insert special characters though!). */
if (ctrl_pressed && !alt_pressed) {

View File

@@ -282,7 +282,7 @@ class GHOST_SystemWin32 : public GHOST_System {
GHOST_TSuccess exit();
/**
* Converts raw WIN32 key codes from the `wndproc` to GHOST keys.
* Converts raw WIN32 key codes from the wndproc to GHOST keys.
* \param vKey: The virtual key from #hardKey.
* \param ScanCode: The ScanCode of pressed key (similar to PS/2 Set 1).
* \param extend: Flag if key is not primly (left or right).
@@ -291,7 +291,7 @@ class GHOST_SystemWin32 : public GHOST_System {
GHOST_TKey convertKey(short vKey, short ScanCode, short extend) const;
/**
* Catches raw WIN32 key codes from WM_INPUT in the `wndproc`.
* Catches raw WIN32 key codes from WM_INPUT in the wndproc.
* \param raw: RawInput structure with detailed info about the key event.
* \param r_key_down: Set true when the key is pressed, otherwise false.
* \return The GHOST key (GHOST_kKeyUnknown if no match).
@@ -319,8 +319,8 @@ class GHOST_SystemWin32 : public GHOST_System {
* Creates tablet events from pointer events.
* \param type: The type of pointer event.
* \param window: The window receiving the event (the active window).
* \param wParam: The wParam from the `wndproc`.
* \param lParam: The lParam from the `wndproc`.
* \param wParam: The wParam from the wndproc.
* \param lParam: The lParam from the wndproc.
* \param eventhandled: True if the method handled the event.
*/
static void processPointerEvent(
@@ -337,8 +337,8 @@ class GHOST_SystemWin32 : public GHOST_System {
/**
* Handles a mouse wheel event.
* \param window: The window receiving the event (the active window).
* \param wParam: The wParam from the `wndproc`.
* \param lParam: The lParam from the `wndproc`.
* \param wParam: The wParam from the wndproc.
* \param lParam: The lParam from the wndproc.
*/
static void processWheelEvent(GHOST_WindowWin32 *window, WPARAM wParam, LPARAM lParam);

View File

@@ -46,7 +46,7 @@
#ifdef WITH_X11_XFIXES
# include <X11/extensions/Xfixes.h>
/* Workaround for XWayland grab glitch: #53004. */
/* Workaround for XWayland grab glitch: T53004. */
# define WITH_XWAYLAND_HACK
#endif
@@ -71,11 +71,11 @@
# define USE_XINPUT_HOTPLUG
#endif
/* see #34039 Fix Alt key glitch on Unity desktop */
/* see T34039 Fix Alt key glitch on Unity desktop */
#define USE_UNITY_WORKAROUND
/* Fix 'shortcut' part of keyboard reading code only ever using first defined key-map
* instead of active one. See #47228 and D1746 */
* instead of active one. See T47228 and D1746 */
#define USE_NON_LATIN_KB_WORKAROUND
static uchar bit_is_on(const uchar *ptr, int bit)
@@ -278,7 +278,7 @@ uint8_t GHOST_SystemX11::getNumDisplays() const
void GHOST_SystemX11::getMainDisplayDimensions(uint32_t &width, uint32_t &height) const
{
if (m_display) {
/* NOTE(@ideasman42): for this to work as documented,
/* NOTE(@campbellbarton): for this to work as documented,
* we would need to use Xinerama check r54370 for code that did this,
* we've since removed since its not worth the extra dependency. */
getAllDisplayDimensions(width, height);
@@ -927,8 +927,8 @@ void GHOST_SystemX11::processEvent(XEvent *xe)
if (window->getCursorGrabMode() == GHOST_kGrabHide) {
window->getClientBounds(bounds);
/* TODO(@ideasman42): warp the cursor to `window->getCursorGrabInitPos`,
* on every motion event, see: D16557 (alternative fix for #102346). */
/* TODO(@campbellbarton): warp the cursor to `window->getCursorGrabInitPos`,
* on every motion event, see: D16557 (alternative fix for T102346). */
const int32_t subregion_div = 4; /* One quarter of the region. */
const int32_t size[2] = {bounds.getWidth(), bounds.getHeight()};
const int32_t center[2] = {
@@ -964,7 +964,7 @@ void GHOST_SystemX11::processEvent(XEvent *xe)
if (x_new != xme.x_root || y_new != xme.y_root) {
/* Use time of last event to avoid wrapping several times on the 'same' actual wrap.
* Note that we need to deal with X and Y separately as those might wrap at the same time
* but still in two different events (corner case, see #74918).
* but still in two different events (corner case, see T74918).
* We also have to add a few extra milliseconds of 'padding', as sometimes we get two
* close events that will generate extra wrap on the same axis within those few
* milliseconds. */
@@ -1028,7 +1028,7 @@ void GHOST_SystemX11::processEvent(XEvent *xe)
/* XXX: Code below is kinda awfully convoluted... Issues are:
* - In keyboards like Latin ones, numbers need a 'Shift' to be accessed but key_sym
* is unmodified (or anyone swapping the keys with `xmodmap`).
* - #XLookupKeysym seems to always use first defined key-map (see #47228), which generates
* - #XLookupKeysym seems to always use first defined key-map (see T47228), which generates
* key-codes unusable by ghost_key_from_keysym for non-Latin-compatible key-maps.
*
* To address this, we:
@@ -1715,7 +1715,7 @@ GHOST_TSuccess GHOST_SystemX11::setCursorPosition(int32_t x, int32_t y)
#if defined(WITH_X11_XINPUT) && defined(USE_X11_XINPUT_WARP)
if ((m_xinput_version.present) && (m_xinput_version.major_version >= 2)) {
/* Needed to account for XInput "Coordinate Transformation Matrix", see #48901 */
/* Needed to account for XInput "Coordinate Transformation Matrix", see T48901 */
int device_id;
if (XIGetClientPointer(m_display, None, &device_id) != False) {
XIWarpPointer(m_display, device_id, None, None, 0, 0, 0, 0, relx, rely);
@@ -2015,8 +2015,8 @@ void GHOST_SystemX11::getClipboard_xcout(
return;
}
/* If it's not INCR, and not `format == 8`, then there's
* nothing in the selection (that `xclip` understands, anyway). */
/* if it's not incr, and not format == 8, then there's
* nothing in the selection (that xclip understands, anyway) */
if (pty_format != 8) {
*context = XCLIB_XCOUT_NONE;

View File

@@ -20,8 +20,8 @@
/* Disable XINPUT warp, currently not implemented by Xorg for multi-head display.
* (see comment in XSERVER `Xi/xiwarppointer.c` -> `FIXME: panoramix stuff is missing` ~ v1.13.4)
* If this is supported we can add back XINPUT for warping (fixing #48901).
* For now disable (see #50383). */
* If this is supported we can add back XINPUT for warping (fixing T48901).
* For now disable (see T50383). */
// # define USE_X11_XINPUT_WARP
#endif

View File

@@ -306,23 +306,14 @@ static void gwl_window_frame_update_from_pending(GWL_Window *win);
#ifdef USE_EVENT_BACKGROUND_THREAD
enum eGWL_PendingWindowActions {
/**
* The state of the window frame has changed, apply the state from #GWL_Window::frame_pending.
*/
PENDING_WINDOW_FRAME_CONFIGURE = 0,
/** The EGL buffer must be resized to match #GWL_WindowFrame::size. */
PENDING_EGL_WINDOW_RESIZE,
PENDING_FRAME_CONFIGURE = 0,
PENDING_EGL_RESIZE,
# ifdef GHOST_OPENGL_ALPHA
/** Draw an opaque region behind the window. */
PENDING_OPAQUE_SET,
# endif
/**
* The DPI for a monitor has changed or the monitors (outputs)
* this window is visible on may have changed. Recalculate the windows scale.
*/
PENDING_OUTPUT_SCALE_UPDATE,
PENDING_SCALE_UPDATE,
};
# define PENDING_NUM (PENDING_OUTPUT_SCALE_UPDATE + 1)
# define PENDING_NUM (PENDING_SCALE_UPDATE + 1)
static void gwl_window_pending_actions_tag(GWL_Window *win, enum eGWL_PendingWindowActions type)
{
@@ -332,10 +323,10 @@ static void gwl_window_pending_actions_tag(GWL_Window *win, enum eGWL_PendingWin
static void gwl_window_pending_actions_handle(GWL_Window *win)
{
if (win->pending_actions[PENDING_WINDOW_FRAME_CONFIGURE].exchange(false)) {
if (win->pending_actions[PENDING_FRAME_CONFIGURE].exchange(false)) {
gwl_window_frame_update_from_pending(win);
}
if (win->pending_actions[PENDING_EGL_WINDOW_RESIZE].exchange(false)) {
if (win->pending_actions[PENDING_EGL_RESIZE].exchange(false)) {
wl_egl_window_resize(win->egl_window, UNPACK2(win->frame.size), 0, 0);
}
# ifdef GHOST_OPENGL_ALPHA
@@ -343,7 +334,7 @@ static void gwl_window_pending_actions_handle(GWL_Window *win)
win->ghost_window->setOpaque();
}
# endif
if (win->pending_actions[PENDING_OUTPUT_SCALE_UPDATE].exchange(false)) {
if (win->pending_actions[PENDING_SCALE_UPDATE].exchange(false)) {
win->ghost_window->outputs_changed_update_scale();
}
}
@@ -351,10 +342,9 @@ static void gwl_window_pending_actions_handle(GWL_Window *win)
#endif /* USE_EVENT_BACKGROUND_THREAD */
/**
* Update the window's #GWL_WindowFrame.
* The caller must handle locking & run from the main thread.
* Update the window's #GWL_WindowFrame
*/
static void gwl_window_frame_update_from_pending_no_lock(GWL_Window *win)
static void gwl_window_frame_update_from_pending_lockfree(GWL_Window *win)
{
#ifdef USE_EVENT_BACKGROUND_THREAD
GHOST_ASSERT(win->ghost_system->main_thread_id == std::this_thread::get_id(),
@@ -391,7 +381,7 @@ static void gwl_window_frame_update_from_pending(GWL_Window *win)
#ifdef USE_EVENT_BACKGROUND_THREAD
std::lock_guard lock_frame_guard{win->frame_pending_mutex};
#endif
gwl_window_frame_update_from_pending_no_lock(win);
gwl_window_frame_update_from_pending_lockfree(win);
}
/** \} */
@@ -586,12 +576,12 @@ static void frame_handle_configure(struct libdecor_frame *frame,
GHOST_SystemWayland *system = win->ghost_system;
const bool is_main_thread = system->main_thread_id == std::this_thread::get_id();
if (!is_main_thread) {
gwl_window_pending_actions_tag(win, PENDING_WINDOW_FRAME_CONFIGURE);
gwl_window_pending_actions_tag(win, PENDING_FRAME_CONFIGURE);
}
else
# endif
{
gwl_window_frame_update_from_pending_no_lock(win);
gwl_window_frame_update_from_pending_lockfree(win);
}
}
}
@@ -679,9 +669,9 @@ static void xdg_surface_handle_configure(void *data,
GHOST_SystemWayland *system = win->ghost_system;
const bool is_main_thread = system->main_thread_id == std::this_thread::get_id();
if (!is_main_thread) {
/* NOTE(@ideasman42): this only gets one redraw,
/* NOTE(@campbellbarton): this only gets one redraw,
* I could not find a case where this causes problems. */
gwl_window_pending_actions_tag(win, PENDING_WINDOW_FRAME_CONFIGURE);
gwl_window_pending_actions_tag(win, PENDING_FRAME_CONFIGURE);
}
else
#endif
@@ -784,7 +774,7 @@ GHOST_WindowWayland::GHOST_WindowWayland(GHOST_SystemWayland *system,
window_->ghost_window = this;
window_->ghost_system = system;
/* NOTE(@ideasman42): The scale set here to avoid flickering on startup.
/* NOTE(@campbellbarton): The scale set here to avoid flickering on startup.
* When all monitors use the same scale (which is quite common) there aren't any problems.
*
* When monitors have different scales there may still be a visible window resize on startup.
@@ -822,12 +812,12 @@ GHOST_WindowWayland::GHOST_WindowWayland(GHOST_SystemWayland *system,
* when the `window_->scale` changed. */
const int32_t size_min[2] = {320, 240};
/* This value is expected to match the base name of the `.desktop` file. see #101805.
/* This value is expected to match the base name of the `.desktop` file. see T101805.
*
* NOTE: the XDG desktop-entry-spec defines that this should follow the "reverse DNS" convention.
* For e.g. `org.blender.Blender` - however the `.desktop` file distributed with Blender is
* simply called `blender.desktop`, so the it's important to follow that name.
* Other distributions such as SNAP & FLATPAK may need to change this value #101779.
* Other distributions such as SNAP & FLATPAK may need to change this value T101779.
* Currently there isn't a way to configure this, we may want to support that. */
const char *xdg_app_id = (
#ifdef WITH_GHOST_WAYLAND_APP_ID
@@ -1088,9 +1078,9 @@ GHOST_WindowWayland::~GHOST_WindowWayland()
wl_surface_destroy(window_->wl_surface);
/* NOTE(@ideasman42): Flushing will often run the appropriate handlers event
/* NOTE(@campbellbarton): Flushing will often run the appropriate handlers event
* (#wl_surface_listener.leave in particular) to avoid attempted access to the freed surfaces.
* This is not fool-proof though, hence the call to #window_surface_unref, see: #99078. */
* This is not fool-proof though, hence the call to #window_surface_unref, see: T99078. */
wl_display_flush(system_->wl_display());
delete window_;
@@ -1383,7 +1373,7 @@ bool GHOST_WindowWayland::outputs_changed_update_scale()
{
#ifdef USE_EVENT_BACKGROUND_THREAD
if (system_->main_thread_id != std::this_thread::get_id()) {
gwl_window_pending_actions_tag(window_, PENDING_OUTPUT_SCALE_UPDATE);
gwl_window_pending_actions_tag(window_, PENDING_SCALE_UPDATE);
return false;
}
#endif

View File

@@ -15,7 +15,7 @@
#include <wayland-util.h> /* For #wl_fixed_t */
/**
* Define to workaround for a bug/limitation in WAYLAND, see: #100855 & upstream report:
* Define to workaround for a bug/limitation in WAYLAND, see: T100855 & upstream report:
* https://gitlab.freedesktop.org/wayland/wayland/-/issues/159
*
* Consume events from WAYLAND in a thread, this is needed because overflowing the event queue

View File

@@ -93,7 +93,7 @@ GHOST_WindowWin32::GHOST_WindowWin32(GHOST_SystemWin32 *system,
}
RECT win_rect = {left, top, long(left + width), long(top + height)};
adjustWindowRectForDesktop(&win_rect, style, extended_style);
adjustWindowRectForClosestMonitor(&win_rect, style, extended_style);
wchar_t *title_16 = alloc_utf16_from_8((char *)title, 0);
m_hWnd = ::CreateWindowExW(extended_style, /* window extended style */
@@ -154,7 +154,7 @@ GHOST_WindowWin32::GHOST_WindowWin32(GHOST_SystemWin32 *system,
}
if (parentwindow) {
/* Release any parent capture to allow immediate interaction (#90110). */
/* Release any parent capture to allow immediate interaction (T90110). */
::ReleaseCapture();
parentwindow->lostMouseCapture();
}
@@ -298,52 +298,24 @@ GHOST_WindowWin32::~GHOST_WindowWin32()
m_directManipulationHelper = NULL;
}
void GHOST_WindowWin32::adjustWindowRectForDesktop(LPRECT win_rect, DWORD dwStyle, DWORD dwExStyle)
void GHOST_WindowWin32::adjustWindowRectForClosestMonitor(LPRECT win_rect,
DWORD dwStyle,
DWORD dwExStyle)
{
/* Windows can span multiple monitors, but must be usable. The desktop can have a larger
* surface than all monitors combined, for example when two monitors are aligned diagonally.
* Therefore we ensure that all the window's corners are within some monitor's Work area. */
POINT pt;
HMONITOR hmonitor;
/* Get Details of the closest monitor. */
HMONITOR hmonitor = MonitorFromRect(win_rect, MONITOR_DEFAULTTONEAREST);
MONITORINFOEX monitor;
monitor.cbSize = sizeof(MONITORINFOEX);
monitor.dwFlags = 0;
/* Note that with MonitorFromPoint using MONITOR_DEFAULTTONEAREST, it will return
* the exact monitor if there is one at the location or the nearest monitor if not. */
/* Top-left. */
pt.x = win_rect->left;
pt.y = win_rect->top;
hmonitor = MonitorFromPoint(pt, MONITOR_DEFAULTTONEAREST);
GetMonitorInfo(hmonitor, &monitor);
win_rect->top = max(win_rect->top, monitor.rcWork.top);
win_rect->left = max(win_rect->left, monitor.rcWork.left);
/* Top-right. */
pt.x = win_rect->right;
pt.y = win_rect->top;
hmonitor = MonitorFromPoint(pt, MONITOR_DEFAULTTONEAREST);
GetMonitorInfo(hmonitor, &monitor);
win_rect->top = max(win_rect->top, monitor.rcWork.top);
win_rect->right = min(win_rect->right, monitor.rcWork.right);
/* Bottom-left. */
pt.x = win_rect->left;
pt.y = win_rect->bottom;
hmonitor = MonitorFromPoint(pt, MONITOR_DEFAULTTONEAREST);
GetMonitorInfo(hmonitor, &monitor);
win_rect->bottom = min(win_rect->bottom, monitor.rcWork.bottom);
win_rect->left = max(win_rect->left, monitor.rcWork.left);
/* Bottom-right. */
pt.x = win_rect->right;
pt.y = win_rect->bottom;
hmonitor = MonitorFromPoint(pt, MONITOR_DEFAULTTONEAREST);
GetMonitorInfo(hmonitor, &monitor);
win_rect->bottom = min(win_rect->bottom, monitor.rcWork.bottom);
win_rect->right = min(win_rect->right, monitor.rcWork.right);
/* Constrain requested size and position to fit within this monitor. */
LONG width = min(monitor.rcWork.right - monitor.rcWork.left, win_rect->right - win_rect->left);
LONG height = min(monitor.rcWork.bottom - monitor.rcWork.top, win_rect->bottom - win_rect->top);
win_rect->left = min(max(monitor.rcWork.left, win_rect->left), monitor.rcWork.right - width);
win_rect->right = win_rect->left + width;
win_rect->top = min(max(monitor.rcWork.top, win_rect->top), monitor.rcWork.bottom - height);
win_rect->bottom = win_rect->top + height;
/* With Windows 10 and newer we can adjust for chrome that differs with DPI and scale. */
GHOST_WIN32_AdjustWindowRectExForDpi fpAdjustWindowRectExForDpi = nullptr;
@@ -362,6 +334,9 @@ void GHOST_WindowWin32::adjustWindowRectForDesktop(LPRECT win_rect, DWORD dwStyl
else {
AdjustWindowRectEx(win_rect, dwStyle & ~WS_OVERLAPPED, FALSE, dwExStyle);
}
/* But never allow a top position that can hide part of the title bar. */
win_rect->top = max(monitor.rcWork.top, win_rect->top);
}
bool GHOST_WindowWin32::getValid() const

View File

@@ -87,12 +87,12 @@ class GHOST_WindowWin32 : public GHOST_Window {
~GHOST_WindowWin32();
/**
* Adjusts a requested window rect to fit and position within the desktop.
* Adjusts a requested window rect to fit and position correctly in monitor.
* \param win_rect: pointer to rectangle that will be modified.
* \param dwStyle: The Window Style of the window whose required size is to be calculated.
* \param dwExStyle: The Extended Window Style of the window.
*/
void adjustWindowRectForDesktop(LPRECT win_rect, DWORD dwStyle, DWORD dwExStyle);
void adjustWindowRectForClosestMonitor(LPRECT win_rect, DWORD dwStyle, DWORD dwExStyle);
/**
* Returns indication as to whether the window is valid.

Some files were not shown because too many files have changed in this diff Show More