Compare commits

..

11 Commits

Author SHA1 Message Date
ca820c3ebd Merge branch 'master' into tmp-volume-matrix-api-update 2023-02-06 21:21:01 +01:00
ab223d1f8e Try to fix MSVC 2023-02-06 14:00:47 +01:00
b8866eeeb3 Fix namespace ambiguity confusing MSVC 2023-02-06 12:25:20 +01:00
616ff437ea Merge branch 'master' 2023-02-06 12:20:00 +01:00
b2c869f6a9 Fix copy paste error 2023-02-02 14:33:56 +01:00
54ece291d3 Remove old headers 2023-02-01 19:54:18 +01:00
b6db27fe24 Fix regression 2023-02-01 19:47:58 +01:00
fbfa83caba Fix UB 2023-02-01 18:23:26 +01:00
32c173be45 Fix alignment error causing crash 2023-02-01 18:23:12 +01:00
77c550cc43 Fix compilation of usd writer 2023-02-01 17:25:41 +01:00
6b7d893107 BLI: Use BLI_math_matrix_type.hh instead of BLI_math_float4x4.hh
Straightforward port. I took the oportunity to remove some C vector
functions (ex: copy_v2_v2()).
2023-02-01 16:46:43 +01:00
2908 changed files with 42094 additions and 2158786 deletions

8
.arcconfig Normal file
View File

@@ -0,0 +1,8 @@
{
"project_id" : "Blender",
"conduit_uri" : "https://developer.blender.org/",
"phabricator.uri" : "https://developer.blender.org/",
"git.default-relative-commit" : "origin/master",
"arc.land.update.default" : "rebase",
"arc.land.onto.default" : "master"
}

View File

@@ -236,8 +236,6 @@ ForEachMacros:
- LOOP_UNSELECTED_POINTS
- LOOP_VISIBLE_KEYS
- LOOP_VISIBLE_POINTS
- LIGHT_FOREACH_BEGIN_DIRECTIONAL
- LIGHT_FOREACH_BEGIN_LOCAL
- LISTBASE_CIRCULAR_BACKWARD_BEGIN
- LISTBASE_CIRCULAR_FORWARD_BEGIN
- LISTBASE_FOREACH

View File

@@ -2,4 +2,4 @@ ${CommitTitle}
${CommitBody}
Pull Request: https://projects.blender.org/blender/blender/pulls/${PullRequestIndex}
Pull Request #${PullRequestIndex}

View File

@@ -1,3 +1,3 @@
${PullRequestTitle}
Pull Request: https://projects.blender.org/blender/blender/pulls/${PullRequestIndex}
Pull Request #${PullRequestIndex}

View File

@@ -1,9 +1,9 @@
name: Bug Report
about: File a bug report
labels:
- "Type/Report"
- "Status/Needs Triage"
- "Priority/Normal"
- "type::Report"
- "status::Needs Triage"
- "priority::Normal"
body:
- type: markdown
attributes:

View File

@@ -1,7 +1,7 @@
name: Design
about: Create a design task (for developers only)
labels:
- "Type/Design"
- "type::Design"
body:
- type: textarea
id: body

View File

@@ -1,7 +1,7 @@
name: To Do
about: Create a to do task (for developers only)
labels:
- "Type/To Do"
- "type::To Do"
body:
- type: textarea
id: body

View File

@@ -1,4 +1,5 @@
This repository is only used as a mirror. Blender development happens on projects.blender.org.
This repository is only used as a mirror of git.blender.org. Blender development happens on
https://developer.blender.org.
To get started with contributing code, please see:
https://wiki.blender.org/wiki/Process/Contributing_Code

3
.github/stale.yml vendored
View File

@@ -15,7 +15,8 @@ staleLabel: stale
# Comment to post when closing a stale Issue or Pull Request.
closeComment: >
This issue has been automatically closed, because this repository is only
used as a mirror. Blender development happens on projects.blender.org.
used as a mirror of git.blender.org. Blender development happens on
developer.blender.org.
To get started contributing code, please read:
https://wiki.blender.org/wiki/Process/Contributing_Code

27
.gitignore vendored
View File

@@ -39,7 +39,7 @@ Desktop.ini
/doc/python_api/rst/bmesh.ops.rst
# in-source lib downloads
/build_files/build_environment/downloads/
/build_files/build_environment/downloads
# in-source buildbot signing configuration
/build_files/buildbot/codesign/config_server.py
@@ -48,27 +48,4 @@ Desktop.ini
waveletNoiseTile.bin
# testing environment
/Testing/
# Translations.
/locale/user-config.py
# External repositories.
/scripts/addons/
/scripts/addons_contrib/
# Ignore old submodules directories.
# Eventually need to get rid of those, but for the first time of transition
# avoid indidents when the folders exists after bisect and developers staging
# them by accident.
/release/scripts/addons/
/release/datafiles/locale/
/release/scripts/addons_contrib/
/source/tools/
# Build files for VS and VS Code.
/build/
/out/
CMakeSettings.json
CMakePresets.json
CMakeUserPresets.json
/Testing

20
.gitmodules vendored Normal file
View File

@@ -0,0 +1,20 @@
[submodule "release/scripts/addons"]
path = release/scripts/addons
url = ../blender-addons.git
branch = master
ignore = all
[submodule "release/scripts/addons_contrib"]
path = release/scripts/addons_contrib
url = ../blender-addons-contrib.git
branch = master
ignore = all
[submodule "release/datafiles/locale"]
path = release/datafiles/locale
url = ../blender-translations.git
branch = master
ignore = all
[submodule "source/tools"]
path = source/tools
url = ../blender-dev-tools.git
branch = master
ignore = all

View File

@@ -331,6 +331,7 @@ option(WITH_MOD_REMESH "Enable Remesh Modifier" ON)
option(WITH_MOD_OCEANSIM "Enable Ocean Modifier" ON)
# Image format support
option(WITH_OPENIMAGEIO "Enable OpenImageIO Support (http://www.openimageio.org)" ON)
option(WITH_IMAGE_OPENEXR "Enable OpenEXR Support (http://www.openexr.com)" ON)
option(WITH_IMAGE_OPENJPEG "Enable OpenJpeg Support (http://www.openjpeg.org)" ON)
option(WITH_IMAGE_TIFF "Enable LibTIFF Support" ON)
@@ -357,7 +358,6 @@ option(WITH_MATERIALX "Enable MaterialX Support" OFF)
# Disable opencollada when we don't have precompiled libs
option(WITH_OPENCOLLADA "Enable OpenCollada Support (http://www.opencollada.org)" ON)
option(WITH_IO_WAVEFRONT_OBJ "Enable Wavefront-OBJ 3D file format support (*.obj)" ON)
option(WITH_IO_PLY "Enable PLY 3D file format support (*.ply)" ON)
option(WITH_IO_STL "Enable STL 3D file format support (*.stl)" ON)
option(WITH_IO_GPENCIL "Enable grease-pencil file format IO (*.svg, *.pdf)" ON)
@@ -524,7 +524,7 @@ endif()
if(NOT APPLE)
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" ON)
option(WITH_CYCLES_HIP_BINARIES "Build Cycles AMD HIP binaries" OFF)
set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx906 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
set(CYCLES_HIP_BINARIES_ARCH gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
endif()
@@ -617,20 +617,16 @@ endif()
option(WITH_OPENGL "When off limits visibility of the opengl headers to just bf_gpu and gawain (temporary option for development purposes)" ON)
option(WITH_GPU_BUILDTIME_SHADER_BUILDER "Shader builder is a developer option enabling linting on GLSL during compilation" OFF)
option(WITH_RENDERDOC "Use Renderdoc API to capture frames" OFF)
mark_as_advanced(
WITH_OPENGL
WITH_GPU_BUILDTIME_SHADER_BUILDER
WITH_RENDERDOC
)
# Vulkan
option(WITH_VULKAN_BACKEND "Enable Vulkan as graphics backend (only for development)" OFF)
option(WITH_VULKAN_GUARDEDALLOC "Use guardedalloc for host allocations done inside Vulkan (development option)" OFF)
mark_as_advanced(
WITH_VULKAN_BACKEND
WITH_VULKAN_GUARDEDALLOC
)
# Metal
@@ -894,6 +890,8 @@ set_and_warn_dependency(WITH_IMAGE_TIFF WITH_HARU OFF)
# auto enable openimageio for cycles
if(WITH_CYCLES)
set(WITH_OPENIMAGEIO ON)
# auto enable llvm for cycles_osl
if(WITH_CYCLES_OSL)
set(WITH_LLVM ON CACHE BOOL "" FORCE)
@@ -954,6 +952,21 @@ endif()
# -----------------------------------------------------------------------------
# Check if Sub-modules are Cloned
if(WITH_INTERNATIONAL)
file(GLOB RESULT "${CMAKE_SOURCE_DIR}/release/datafiles/locale")
list(LENGTH RESULT DIR_LEN)
if(DIR_LEN EQUAL 0)
message(
WARNING
"Translation path '${CMAKE_SOURCE_DIR}/release/datafiles/locale' is missing, "
"This is a 'git submodule', which are known not to work with bridges to other version "
"control systems."
)
set(TRANSLATIONS_FOUND OFF)
set_and_warn_library_found("Translations" TRANSLATIONS_FOUND WITH_INTERNATIONAL)
endif()
endif()
if(WITH_PYTHON)
# While we have this as an '#error' in 'bpy_capi_utils.h',
# upgrading Python tends to cause confusion for users who build.
@@ -969,14 +982,14 @@ if(WITH_PYTHON)
)
endif()
file(GLOB RESULT "${CMAKE_SOURCE_DIR}/scripts/addons")
file(GLOB RESULT "${CMAKE_SOURCE_DIR}/release/scripts/addons")
list(LENGTH RESULT DIR_LEN)
if(DIR_LEN EQUAL 0)
message(
WARNING
"Addons path '${CMAKE_SOURCE_DIR}/scripts/addons' is missing. "
"This is an external repository which needs to be checked out. Use `make update` to do so. "
"* CONTINUING WITHOUT ADDONS *"
"Addons path '${CMAKE_SOURCE_DIR}/release/scripts/addons' is missing, "
"This is a 'git submodule', which are known not to work with bridges to other version "
"control systems: * CONTINUING WITHOUT ADDONS *"
)
endif()
endif()
@@ -1085,6 +1098,13 @@ if(NOT WITH_FFTW3 AND WITH_MOD_OCEANSIM)
endif()
if(WITH_CYCLES)
if(NOT WITH_OPENIMAGEIO)
message(
FATAL_ERROR
"Cycles requires WITH_OPENIMAGEIO, the library may not have been found. "
"Configure OIIO or disable WITH_CYCLES"
)
endif()
if(WITH_CYCLES_OSL)
if(NOT WITH_LLVM)
message(
@@ -1557,9 +1577,6 @@ elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
# add_check_c_compiler_flag(C_WARNINGS C_WARN_UNUSED_MACROS -Wunused-macros)
# add_check_cxx_compiler_flag(CXX_WARNINGS CXX_WARN_UNUSED_MACROS -Wunused-macros)
add_check_c_compiler_flag(C_WARNINGS C_WARN_ERROR_UNGUARDED_AVAILABILITY_NEW -Werror=unguarded-availability-new)
add_check_c_compiler_flag(CXX_WARNINGS CXX_WARN_ERROR_UNGUARDED_AVAILABILITY_NEW -Werror=unguarded-availability-new)
# ---------------------
# Suppress Strict Flags
@@ -1612,7 +1629,6 @@ elseif(CMAKE_C_COMPILER_ID MATCHES "MSVC")
# warning level:
"/W3"
"/w34062" # switch statement contains 'default' but no 'case' labels
"/w34100" # 'identifier' : unreferenced formal parameter
"/w34115" # 'type' : named type definition in parentheses
"/w34189" # local variable is initialized but not referenced
# see https://docs.microsoft.com/en-us/cpp/error-messages/compiler-warnings/c5038?view=vs-2017
@@ -1939,6 +1955,7 @@ if(FIRST_RUN)
info_cfg_option(WITH_IMAGE_OPENEXR)
info_cfg_option(WITH_IMAGE_OPENJPEG)
info_cfg_option(WITH_IMAGE_TIFF)
info_cfg_option(WITH_OPENIMAGEIO)
info_cfg_text("Audio:")
info_cfg_option(WITH_CODEC_AVI)

View File

@@ -69,7 +69,7 @@ Static Source Code Checking
* check_cmake: Runs our own cmake file checker which detects errors in the cmake file list definitions.
* check_pep8: Checks all Python script are pep8 which are tagged to use the stricter formatting.
* check_mypy: Checks all Python scripts using mypy,
see: tools/check_source/check_mypy_config.py scripts which are included.
see: source/tools/check_source/check_mypy_config.py scripts which are included.
Documentation Checking
@@ -85,7 +85,7 @@ Spell Checkers
* check_spelling_osl: Check for spelling errors (OSL only).
* check_spelling_py: Check for spelling errors (Python only).
Note: an additional word-list is maintained at: 'tools/check_source/check_spelling_c_config.py'
Note: an additional word-list is maintained at: 'source/tools/check_source/check_spelling_c_config.py'
Note: that spell checkers can take a 'CHECK_SPELLING_CACHE' filepath argument,
so re-running does not need to re-check unchanged files.
@@ -299,11 +299,7 @@ else
ifneq ("$(wildcard $(DEPS_BUILD_DIR)/build.ninja)","")
DEPS_BUILD_COMMAND:=ninja
else
ifeq ($(OS), Darwin)
DEPS_BUILD_COMMAND:=make -s
else
DEPS_BUILD_COMMAND:="$(BLENDER_DIR)/build_files/build_environment/linux/make_deps_wrapper.sh" -s
endif
DEPS_BUILD_COMMAND:=make -s
endif
endif
@@ -402,7 +398,7 @@ endif
deps: .FORCE
@echo
@echo Configuring dependencies in \"$(DEPS_BUILD_DIR)\", install to \"$(DEPS_INSTALL_DIR)\"
@echo Configuring dependencies in \"$(DEPS_BUILD_DIR)\"
@cmake -H"$(DEPS_SOURCE_DIR)" \
-B"$(DEPS_BUILD_DIR)" \
@@ -490,22 +486,22 @@ check_smatch: .FORCE
$(PYTHON) "$(BLENDER_DIR)/build_files/cmake/cmake_static_check_smatch.py"
check_mypy: .FORCE
@$(PYTHON) "$(BLENDER_DIR)/tools/check_source/check_mypy.py"
@$(PYTHON) "$(BLENDER_DIR)/source/tools/check_source/check_mypy.py"
check_wiki_file_structure: .FORCE
@PYTHONIOENCODING=utf_8 $(PYTHON) \
"$(BLENDER_DIR)/tools/check_wiki/check_wiki_file_structure.py"
"$(BLENDER_DIR)/source/tools/check_wiki/check_wiki_file_structure.py"
check_spelling_py: .FORCE
@cd "$(BUILD_DIR)" ; \
PYTHONIOENCODING=utf_8 $(PYTHON) \
"$(BLENDER_DIR)/tools/check_source/check_spelling.py" \
"$(BLENDER_DIR)/scripts"
"$(BLENDER_DIR)/source/tools/check_source/check_spelling.py" \
"$(BLENDER_DIR)/release/scripts"
check_spelling_c: .FORCE
@cd "$(BUILD_DIR)" ; \
PYTHONIOENCODING=utf_8 $(PYTHON) \
"$(BLENDER_DIR)/tools/check_source/check_spelling.py" \
"$(BLENDER_DIR)/source/tools/check_source/check_spelling.py" \
--cache-file=$(CHECK_SPELLING_CACHE) \
"$(BLENDER_DIR)/source" \
"$(BLENDER_DIR)/intern/cycles" \
@@ -515,21 +511,21 @@ check_spelling_c: .FORCE
check_spelling_osl: .FORCE
@cd "$(BUILD_DIR)" ; \
PYTHONIOENCODING=utf_8 $(PYTHON) \
"$(BLENDER_DIR)/tools/check_source/check_spelling.py" \
"$(BLENDER_DIR)/source/tools/check_source/check_spelling.py" \
--cache-file=$(CHECK_SPELLING_CACHE) \
"$(BLENDER_DIR)/intern/cycles/kernel/shaders"
check_descriptions: .FORCE
@$(BLENDER_BIN) --background -noaudio --factory-startup --python \
"$(BLENDER_DIR)/tools/check_source/check_descriptions.py"
"$(BLENDER_DIR)/source/tools/check_source/check_descriptions.py"
check_deprecated: .FORCE
@PYTHONIOENCODING=utf_8 $(PYTHON) \
tools/check_source/check_deprecated.py
source/tools/check_source/check_deprecated.py
check_licenses: .FORCE
@PYTHONIOENCODING=utf_8 $(PYTHON) \
"$(BLENDER_DIR)/tools/check_source/check_licenses.py" \
"$(BLENDER_DIR)/source/tools/check_source/check_licenses.py" \
"--show-headers=$(SHOW_HEADERS)"
check_pep8: .FORCE
@@ -538,7 +534,7 @@ check_pep8: .FORCE
check_cmake: .FORCE
@PYTHONIOENCODING=utf_8 $(PYTHON) \
tools/check_source/check_cmake_consistency.py
source/tools/check_source/check_cmake_consistency.py
# -----------------------------------------------------------------------------
@@ -576,8 +572,8 @@ update_code: .FORCE
@$(PYTHON) ./build_files/utils/make_update.py --no-libraries
format: .FORCE
@PATH="${LIBDIR}/llvm/bin/:$(PATH)" $(PYTHON) tools/utils_maintenance/clang_format_paths.py $(PATHS)
@$(PYTHON) tools/utils_maintenance/autopep8_format_paths.py --autopep8-command="$(AUTOPEP8)" $(PATHS)
@PATH="${LIBDIR}/llvm/bin/:$(PATH)" $(PYTHON) source/tools/utils_maintenance/clang_format_paths.py $(PATHS)
@$(PYTHON) source/tools/utils_maintenance/autopep8_format_paths.py --autopep8-command="$(AUTOPEP8)" $(PATHS)
# -----------------------------------------------------------------------------

View File

@@ -24,7 +24,7 @@ Development
-----------
- [Build Instructions](https://wiki.blender.org/wiki/Building_Blender)
- [Code Review & Bug Tracker](https://projects.blender.org)
- [Code Review & Bug Tracker](https://developer.blender.org)
- [Developer Forum](https://devtalk.blender.org)
- [Developer Documentation](https://wiki.blender.org)

View File

@@ -78,7 +78,12 @@ include(cmake/tbb.cmake)
include(cmake/python.cmake)
include(cmake/llvm.cmake)
include(cmake/osl.cmake)
include(cmake/numpy.cmake)
option(USE_PIP_NUMPY "Install NumPy using pip wheel instead of building from source" OFF)
if(APPLE AND ("${CMAKE_OSX_ARCHITECTURES}" STREQUAL "x86_64"))
set(USE_PIP_NUMPY ON)
else()
include(cmake/numpy.cmake)
endif()
include(cmake/python_site_packages.cmake)
include(cmake/package_python.cmake)
include(cmake/openimageio.cmake)

View File

@@ -10,7 +10,7 @@ ExternalProject_Add(external_epoxy
URL_HASH ${EPOXY_HASH_TYPE}=${EPOXY_HASH}
PREFIX ${BUILD_DIR}/epoxy
PATCH_COMMAND ${PATCH_CMD} -p 1 -N -d ${BUILD_DIR}/epoxy/src/external_epoxy/ < ${PATCH_DIR}/epoxy.diff
CONFIGURE_COMMAND ${CONFIGURE_ENV} && ${MESON} setup --prefix ${LIBDIR}/epoxy --default-library ${EPOXY_LIB_TYPE} --libdir lib ${BUILD_DIR}/epoxy/src/external_epoxy-build ${BUILD_DIR}/epoxy/src/external_epoxy -Dtests=false ${MESON_BUILD_TYPE}
CONFIGURE_COMMAND ${CONFIGURE_ENV} && ${MESON} setup --prefix ${LIBDIR}/epoxy --default-library ${EPOXY_LIB_TYPE} --libdir lib ${BUILD_DIR}/epoxy/src/external_epoxy-build ${BUILD_DIR}/epoxy/src/external_epoxy -Dtests=false
BUILD_COMMAND ninja
INSTALL_COMMAND ninja install
)

View File

@@ -9,7 +9,7 @@ ExternalProject_Add(external_fribidi
URL_HASH ${FRIBIDI_HASH_TYPE}=${FRIBIDI_HASH}
DOWNLOAD_DIR ${DOWNLOAD_DIR}
PREFIX ${BUILD_DIR}/fribidi
CONFIGURE_COMMAND ${MESON} setup --prefix ${LIBDIR}/fribidi ${MESON_BUILD_TYPE} -Ddocs=false --default-library static --libdir lib ${BUILD_DIR}/fribidi/src/external_fribidi-build ${BUILD_DIR}/fribidi/src/external_fribidi
CONFIGURE_COMMAND ${MESON} setup --prefix ${LIBDIR}/fribidi -Ddocs=false --default-library static --libdir lib ${BUILD_DIR}/fribidi/src/external_fribidi-build ${BUILD_DIR}/fribidi/src/external_fribidi
BUILD_COMMAND ninja
INSTALL_COMMAND ninja install
INSTALL_DIR ${LIBDIR}/fribidi

View File

@@ -22,7 +22,7 @@ elseif(UNIX AND NOT APPLE)
)
endif()
# Boolean crashes with Arm assembly, see #103423.
# Boolean crashes with Arm assembly, see T103423.
if(BLENDER_PLATFORM_ARM)
set(GMP_OPTIONS
${GMP_OPTIONS}

View File

@@ -21,7 +21,6 @@ set(HARFBUZZ_EXTRA_OPTIONS
# Only used for command line utilities,
# disable as this would add an addition & unnecessary build-dependency.
-Dcairo=disabled
${MESON_BUILD_TYPE}
)
ExternalProject_Add(external_harfbuzz
@@ -60,10 +59,3 @@ if(BUILD_MODE STREQUAL Release AND WIN32)
DEPENDEES install
)
endif()
if(BUILD_MODE STREQUAL Debug AND WIN32)
ExternalProject_Add_Step(external_harfbuzz after_install
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/harfbuzz/lib/libharfbuzz.a ${HARVEST_TARGET}/harfbuzz/lib/libharfbuzz_d.lib
DEPENDEES install
)
endif()

View File

@@ -40,8 +40,7 @@ ExternalProject_Add(external_igc_llvm
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0004-OpenCL-support-cl_ext_float_atomics.patch &&
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0005-OpenCL-Add-cl_khr_integer_dot_product.patch &&
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0001-Memory-leak-fix-for-Managed-Static-Mutex.patch &&
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0002-Remove-repo-name-in-LLVM-IR.patch &&
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0003-Add-missing-include-limit-in-benchmark.patch
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0002-Remove-repo-name-in-LLVM-IR.patch
)
add_dependencies(
external_igc_llvm
@@ -56,6 +55,9 @@ ExternalProject_Add(external_igc_spirv_translator
CONFIGURE_COMMAND echo .
BUILD_COMMAND echo .
INSTALL_COMMAND echo .
PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${IGC_SPIRV_TRANSLATOR_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/spirv/0001-update-SPIR-V-headers-for-SPV_INTEL_split_barrier.patch &&
${PATCH_CMD} -p 1 -d ${IGC_SPIRV_TRANSLATOR_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/spirv/0002-Add-support-for-split-barriers-extension-SPV_INTEL_s.patch &&
${PATCH_CMD} -p 1 -d ${IGC_SPIRV_TRANSLATOR_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/spirv/0003-Support-cl_bf16_conversions.patch
)
add_dependencies(
external_igc_spirv_translator

View File

@@ -15,7 +15,7 @@ llvm-config = '${LIBDIR}/llvm/bin/llvm-config'"
)
set(MESA_EXTRA_FLAGS
${MESON_BUILD_TYPE}
-Dbuildtype=release
-Dc_args=${MESA_CFLAGS}
-Dcpp_args=${MESA_CXXFLAGS}
-Dc_link_args=${MESA_LDFLAGS}

View File

@@ -44,21 +44,13 @@ set(OPENVDB_EXTRA_ARGS
# -DLLVM_DIR=${LIBDIR}/llvm/lib/cmake/llvm
)
set(OPENVDB_PATCH ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/openvdb/src/openvdb < ${PATCH_DIR}/openvdb.diff)
if(APPLE)
set(OPENVDB_PATCH
${OPENVDB_PATCH} &&
${PATCH_CMD} -p 0 -d ${BUILD_DIR}/openvdb/src/openvdb < ${PATCH_DIR}/openvdb_metal.diff
)
endif()
ExternalProject_Add(openvdb
URL file://${PACKAGE_DIR}/${OPENVDB_FILE}
DOWNLOAD_DIR ${DOWNLOAD_DIR}
URL_HASH ${OPENVDB_HASH_TYPE}=${OPENVDB_HASH}
CMAKE_GENERATOR ${PLATFORM_ALT_GENERATOR}
PREFIX ${BUILD_DIR}/openvdb
PATCH_COMMAND ${OPENVDB_PATCH}
PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/openvdb/src/openvdb < ${PATCH_DIR}/openvdb.diff
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/openvdb ${DEFAULT_CMAKE_FLAGS} ${OPENVDB_EXTRA_ARGS}
INSTALL_DIR ${LIBDIR}/openvdb
)

View File

@@ -16,10 +16,8 @@ message("BuildMode = ${BUILD_MODE}")
if(BUILD_MODE STREQUAL "Debug")
set(LIBDIR ${CMAKE_CURRENT_BINARY_DIR}/Debug)
set(MESON_BUILD_TYPE -Dbuildtype=debug)
else()
set(LIBDIR ${CMAKE_CURRENT_BINARY_DIR}/Release)
set(MESON_BUILD_TYPE -Dbuildtype=release)
endif()
set(DOWNLOAD_DIR "${CMAKE_CURRENT_BINARY_DIR}/downloads" CACHE STRING "Path for downloaded files")

View File

@@ -88,19 +88,6 @@ else()
export LDFLAGS=${PYTHON_LDFLAGS} &&
export PKG_CONFIG_PATH=${LIBDIR}/ffi/lib/pkgconfig)
# NOTE: untested on APPLE so far.
if(NOT APPLE)
set(PYTHON_CONFIGURE_EXTRA_ARGS
${PYTHON_CONFIGURE_EXTRA_ARGS}
# Used on most release Linux builds (Fedora for e.g.),
# increases build times noticeably with the benefit of a modest speedup at runtime.
--enable-optimizations
# While LTO is OK when building on the same system, it's incompatible across GCC versions,
# making it impractical for developers to build against, so keep it disabled.
# `--with-lto`
)
endif()
ExternalProject_Add(external_python
URL file://${PACKAGE_DIR}/${PYTHON_FILE}
DOWNLOAD_DIR ${DOWNLOAD_DIR}

View File

@@ -38,6 +38,15 @@ ExternalProject_Add(external_python_site_packages
--no-binary :all:
)
if(USE_PIP_NUMPY)
# Use only wheel (and not build from source) to stop NumPy from linking against buggy
# Accelerate framework backend on macOS. Official wheels are built with OpenBLAS.
ExternalProject_Add_Step(external_python_site_packages after_install
COMMAND ${PYTHON_BINARY} -m pip install --no-cache-dir numpy==${NUMPY_VERSION} --only-binary :all:
DEPENDEES install
)
endif()
add_dependencies(
external_python_site_packages
external_python

View File

@@ -165,9 +165,9 @@ set(OPENMP_URI https://github.com/llvm/llvm-project/releases/download/llvmorg-${
set(OPENMP_HASH_TYPE MD5)
set(OPENMP_FILE openmp-${OPENMP_VERSION}.src.tar.xz)
set(OPENIMAGEIO_VERSION v2.4.9.0)
set(OPENIMAGEIO_VERSION v2.4.6.0)
set(OPENIMAGEIO_URI https://github.com/OpenImageIO/oiio/archive/refs/tags/${OPENIMAGEIO_VERSION}.tar.gz)
set(OPENIMAGEIO_HASH 7da92a7d6029921a8599a977ff1efa2a)
set(OPENIMAGEIO_HASH c7acc1b9a8fda04ef48f7de1feda4dae)
set(OPENIMAGEIO_HASH_TYPE MD5)
set(OPENIMAGEIO_FILE OpenImageIO-${OPENIMAGEIO_VERSION}.tar.gz)
@@ -668,9 +668,9 @@ set(SPIRV_HEADERS_FILE SPIR-V-Headers-${SPIRV_HEADERS_VERSION}.tar.gz)
# compiler, the versions used are taken from the following location
# https://github.com/intel/intel-graphics-compiler/releases
set(IGC_VERSION 1.0.13064.7)
set(IGC_VERSION 1.0.12149.1)
set(IGC_URI https://github.com/intel/intel-graphics-compiler/archive/refs/tags/igc-${IGC_VERSION}.tar.gz)
set(IGC_HASH a929abd4cca2b293961ec0437ee4b3b2147bd3b2c8a3c423af78c0c359b2e5ae)
set(IGC_HASH 44f67f24e3bc5130f9f062533abf8154782a9d0a992bc19b498639a8521ae836)
set(IGC_HASH_TYPE SHA256)
set(IGC_FILE igc-${IGC_VERSION}.tar.gz)
@@ -690,15 +690,15 @@ set(IGC_LLVM_FILE ${IGC_LLVM_VERSION}.tar.gz)
#
# WARNING WARNING WARNING
set(IGC_OPENCL_CLANG_VERSION ee31812ea8b89d08c2918f045d11a19bd33525c5)
set(IGC_OPENCL_CLANG_VERSION 363a5262d8c7cff3fb28f3bdb5d85c8d7e91c1bb)
set(IGC_OPENCL_CLANG_URI https://github.com/intel/opencl-clang/archive/${IGC_OPENCL_CLANG_VERSION}.tar.gz)
set(IGC_OPENCL_CLANG_HASH 1db6735bbcfaa31e8a9ba39f121d6bafa806ea8919e9f56782d6aaa67771ddda)
set(IGC_OPENCL_CLANG_HASH aa8cf72bb239722ce8ce44f79413c6887ecc8ca18477dd520aa5c4809756da9a)
set(IGC_OPENCL_CLANG_HASH_TYPE SHA256)
set(IGC_OPENCL_CLANG_FILE opencl-clang-${IGC_OPENCL_CLANG_VERSION}.tar.gz)
set(IGC_VCINTRINSICS_VERSION v0.11.0)
set(IGC_VCINTRINSICS_VERSION v0.5.0)
set(IGC_VCINTRINSICS_URI https://github.com/intel/vc-intrinsics/archive/refs/tags/${IGC_VCINTRINSICS_VERSION}.tar.gz)
set(IGC_VCINTRINSICS_HASH e5acd5626ce7fa6d41ce154c50ac805eda734ee66af94ef28e680ac2ad81bb9f)
set(IGC_VCINTRINSICS_HASH 70bb47c5e32173cf61514941e83ae7c7eb4485e6d2fca60cfa1f50d4f42c41f2)
set(IGC_VCINTRINSICS_HASH_TYPE SHA256)
set(IGC_VCINTRINSICS_FILE vc-intrinsics-${IGC_VCINTRINSICS_VERSION}.tar.gz)
@@ -714,9 +714,9 @@ set(IGC_SPIRV_TOOLS_HASH 6e19900e948944243024aedd0a201baf3854b377b9cc7a386553bc1
set(IGC_SPIRV_TOOLS_HASH_TYPE SHA256)
set(IGC_SPIRV_TOOLS_FILE SPIR-V-Tools-${IGC_SPIRV_TOOLS_VERSION}.tar.gz)
set(IGC_SPIRV_TRANSLATOR_VERSION d739c01d65ec00dee64dedd40deed805216a7193)
set(IGC_SPIRV_TRANSLATOR_VERSION a31ffaeef77e23d500b3ea3d35e0c42ff5648ad9)
set(IGC_SPIRV_TRANSLATOR_URI https://github.com/KhronosGroup/SPIRV-LLVM-Translator/archive/${IGC_SPIRV_TRANSLATOR_VERSION}.tar.gz)
set(IGC_SPIRV_TRANSLATOR_HASH ddc0cc9ccbe59dadeaf291012d59de142b2e9f2b124dbb634644d39daddaa13e)
set(IGC_SPIRV_TRANSLATOR_HASH 9e26c96a45341b8f8af521bacea20e752623346340addd02af95d669f6e89252)
set(IGC_SPIRV_TRANSLATOR_HASH_TYPE SHA256)
set(IGC_SPIRV_TRANSLATOR_FILE SPIR-V-Translator-${IGC_SPIRV_TRANSLATOR_VERSION}.tar.gz)
@@ -724,15 +724,15 @@ set(IGC_SPIRV_TRANSLATOR_FILE SPIR-V-Translator-${IGC_SPIRV_TRANSLATOR_VERSION}.
### Intel Graphics Compiler DEPS END ###
########################################
set(GMMLIB_VERSION intel-gmmlib-22.3.0)
set(GMMLIB_VERSION intel-gmmlib-22.1.8)
set(GMMLIB_URI https://github.com/intel/gmmlib/archive/refs/tags/${GMMLIB_VERSION}.tar.gz)
set(GMMLIB_HASH c1f33e1519edfc527127baeb0436b783430dfd256c643130169a3a71dc86aff9)
set(GMMLIB_HASH bf23e9a3742b4fb98c7666c9e9b29f3219e4b2fb4d831aaf4eed71f5e2d17368)
set(GMMLIB_HASH_TYPE SHA256)
set(GMMLIB_FILE ${GMMLIB_VERSION}.tar.gz)
set(OCLOC_VERSION 22.49.25018.21)
set(OCLOC_VERSION 22.38.24278)
set(OCLOC_URI https://github.com/intel/compute-runtime/archive/refs/tags/${OCLOC_VERSION}.tar.gz)
set(OCLOC_HASH 92362dae08b503a34e5d3820ed284198c452bcd5e7504d90eb69887b20492c06)
set(OCLOC_HASH db0c542fccd651e6404b15a74d46027f1ce0eda8dc9e25a40cbb6c0faef257ee)
set(OCLOC_HASH_TYPE SHA256)
set(OCLOC_FILE ocloc-${OCLOC_VERSION}.tar.gz)

View File

@@ -13,7 +13,7 @@ ExternalProject_Add(external_wayland
# NOTE: `-lm` is needed for `libxml2` which is a static library that uses `libm.so`,
# without this, math symbols such as `floor` aren't found.
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env PKG_CONFIG_PATH=${LIBDIR}/expat/lib/pkgconfig:${LIBDIR}/xml2/lib/pkgconfig:${LIBDIR}/ffi/lib/pkgconfig:$PKG_CONFIG_PATH
${MESON} --prefix ${LIBDIR}/wayland ${MESON_BUILD_TYPE} -Ddocumentation=false -Dtests=false -D "c_link_args=-L${LIBDIR}/ffi/lib -lm" . ../external_wayland
${MESON} --prefix ${LIBDIR}/wayland -Ddocumentation=false -Dtests=false -D "c_link_args=-L${LIBDIR}/ffi/lib -lm" . ../external_wayland
BUILD_COMMAND ninja
INSTALL_COMMAND ninja install
)

View File

@@ -7,7 +7,7 @@ ExternalProject_Add(external_wayland_protocols
PREFIX ${BUILD_DIR}/wayland-protocols
# Use `-E` so the `PKG_CONFIG_PATH` can be defined to link against our own WAYLAND.
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env PKG_CONFIG_PATH=${LIBDIR}/wayland/lib64/pkgconfig:$PKG_CONFIG_PATH
${MESON} --prefix ${LIBDIR}/wayland-protocols ${MESON_BUILD_TYPE} . ../external_wayland_protocols -Dtests=false
${MESON} --prefix ${LIBDIR}/wayland-protocols . ../external_wayland_protocols -Dtests=false
BUILD_COMMAND ninja
INSTALL_COMMAND ninja install
)

View File

@@ -17,13 +17,11 @@ ExternalProject_Add(external_xvidcore
INSTALL_DIR ${LIBDIR}/xvidcore
)
if(WIN32)
ExternalProject_Add_Step(external_xvidcore after_install
COMMAND ${CMAKE_COMMAND} -E rename ${LIBDIR}/xvidcore/lib/xvidcore.a ${LIBDIR}/xvidcore/lib/libxvidcore.a || true
COMMAND ${CMAKE_COMMAND} -E remove ${LIBDIR}/xvidcore/lib/xvidcore.dll.a
DEPENDEES install
)
endif()
ExternalProject_Add_Step(external_xvidcore after_install
COMMAND ${CMAKE_COMMAND} -E rename ${LIBDIR}/xvidcore/lib/xvidcore.a ${LIBDIR}/xvidcore/lib/libxvidcore.a || true
COMMAND ${CMAKE_COMMAND} -E remove ${LIBDIR}/xvidcore/lib/xvidcore.dll.a
DEPENDEES install
)
if(MSVC)
set_target_properties(external_xvidcore PROPERTIES FOLDER Mingw)

View File

@@ -517,7 +517,7 @@ OPENEXR_FORCE_REBUILD=false
OPENEXR_SKIP=false
_with_built_openexr=false
OIIO_VERSION="2.4.9.0"
OIIO_VERSION="2.4.6.0"
OIIO_VERSION_SHORT="2.4"
OIIO_VERSION_MIN="2.2.0"
OIIO_VERSION_MEX="2.5.0"
@@ -6615,9 +6615,11 @@ print_info() {
fi
if [ -d $INST/oiio ]; then
_1="-D OPENIMAGEIO_ROOT_DIR=$INST/oiio"
_1="-D WITH_OPENIMAGEIO=ON"
_2="-D OPENIMAGEIO_ROOT_DIR=$INST/oiio"
PRINT " $_1"
_buildargs="$_buildargs $_1"
PRINT " $_2"
_buildargs="$_buildargs $_1 $_2"
fi
if [ "$OSL_SKIP" = false ]; then

View File

@@ -1,8 +1,8 @@
#!/usr/bin/env bash
# SPDX-License-Identifier: GPL-2.0-or-later
# This script is part of the official build environment, see wiki page for details.
# https://wiki.blender.org/wiki/Building_Blender/Other/Rocky8ReleaseEnvironment
# This script is part of the official build environment, see WIKI page for details.
# https://wiki.blender.org/wiki/Building_Blender/Other/CentOS7ReleaseEnvironment
set -e
@@ -59,7 +59,7 @@ PACKAGES_FOR_LIBS=(
automake
libtool
# Used to set rpath on shared libraries
# TODO: why is this needed?
patchelf
# Builds generated by meson use Ninja for the actual build.

View File

@@ -1,74 +0,0 @@
#!/usr/bin/env bash
# SPDX-License-Identifier: GPL-2.0-or-later
# This script ensures:
# - One dependency is built at a time.
# - That dependency uses all available cores.
#
# Without this, simply calling `make -j$(nproc)` from the `${CMAKE_BUILD_DIR}/deps/`
# directory will build many projects at once.
#
# This is undesirable for the following reasons:
#
# - The output from projects is mixed together,
# making it difficult to track down the cause of a build failure.
#
# - Larger dependencies such as LLVM can bottleneck the build process,
# making it necessary to cancel the build and manually run build commands in each directory.
#
# - Building many projects at once means canceling (Control-C) can lead to the build being in an undefined state.
# It's possible canceling happens as a patch is being applied or files are being copied.
# (steps that aren't part of the compilation process where it's typically safe to cancel).
if [[ -z "$MY_MAKE_CALL_LEVEL" ]]; then
export MY_MAKE_CALL_LEVEL=0
export MY_MAKEFLAGS=$MAKEFLAGS
# Extract the jobs argument (`-jN`, `-j N`, `--jobs=N`).
add_next=0
for i in "$@"; do
case $i in
-j*)
export MY_JOBS_ARG=$i
if [ "$MY_JOBS_ARG" = "-j" ]; then
add_next=1
fi
;;
--jobs=*)
shift # past argument=value
MY_JOBS_ARG=$i
;;
*)
if (( add_next == 1 )); then
MY_JOBS_ARG="$MY_JOBS_ARG $i"
add_next=0
fi
;;
esac
done
unset i add_next
if [[ -z "$MY_JOBS_ARG" ]]; then
MY_JOBS_ARG="-j$(nproc)"
fi
export MY_JOBS_ARG
# Support user defined `MAKEFLAGS`.
export MAKEFLAGS="$MY_MAKEFLAGS -j1"
else
export MY_MAKE_CALL_LEVEL=$(( MY_MAKE_CALL_LEVEL + 1 ))
if (( MY_MAKE_CALL_LEVEL == 1 )); then
# Important to set jobs to 1, otherwise user defined jobs argument is used.
export MAKEFLAGS="$MY_MAKEFLAGS -j1"
elif (( MY_MAKE_CALL_LEVEL == 2 )); then
# This is the level used by each sub-project.
export MAKEFLAGS="$MY_MAKEFLAGS $MY_JOBS_ARG"
fi
# Else leave `MY_MAKEFLAGS` flags as-is, avoids setting a high number of jobs on recursive
# calls (which may easily run out of memory). Let the job-server handle the rest.
fi
# Useful for troubleshooting the wrapper.
# echo "Call level: $MY_MAKE_CALL_LEVEL, args=$@".
# Call actual make but ensure recursive calls run via this script.
exec make MAKE="$0" "$@"

View File

@@ -1,7 +1,7 @@
diff -Naur external_igc_opencl_clang.orig/CMakeLists.txt external_igc_opencl_clang/CMakeLists.txt
--- external_igc_opencl_clang.orig/CMakeLists.txt 2022-03-16 05:51:10 -0600
+++ external_igc_opencl_clang/CMakeLists.txt 2022-05-23 10:40:09 -0600
@@ -147,22 +147,24 @@
@@ -126,22 +126,24 @@
)
endif()

File diff suppressed because it is too large Load Diff

View File

@@ -80,7 +80,6 @@ set(_CLANG_FIND_COMPONENTS
clangAST
clangLex
clangBasic
clangSupport
)
set(_CLANG_LIBRARIES)
@@ -95,9 +94,7 @@ foreach(COMPONENT ${_CLANG_FIND_COMPONENTS})
PATH_SUFFIXES
lib64 lib
)
if(CLANG_${UPPERCOMPONENT}_LIBRARY)
list(APPEND _CLANG_LIBRARIES "${CLANG_${UPPERCOMPONENT}_LIBRARY}")
endif()
list(APPEND _CLANG_LIBRARIES "${CLANG_${UPPERCOMPONENT}_LIBRARY}")
endforeach()

View File

@@ -23,19 +23,19 @@ if(EXISTS ${SOURCE_DIR}/.git)
if(MY_WC_BRANCH STREQUAL "HEAD")
# Detached HEAD, check whether commit hash is reachable
# in the main branch
# in the master branch
execute_process(COMMAND git rev-parse --short=12 HEAD
WORKING_DIRECTORY ${SOURCE_DIR}
OUTPUT_VARIABLE MY_WC_HASH
OUTPUT_STRIP_TRAILING_WHITESPACE)
execute_process(COMMAND git branch --list main blender-v* --contains ${MY_WC_HASH}
execute_process(COMMAND git branch --list master blender-v* --contains ${MY_WC_HASH}
WORKING_DIRECTORY ${SOURCE_DIR}
OUTPUT_VARIABLE _git_contains_check
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT _git_contains_check STREQUAL "")
set(MY_WC_BRANCH "main")
set(MY_WC_BRANCH "master")
else()
execute_process(COMMAND git show-ref --tags -d
WORKING_DIRECTORY ${SOURCE_DIR}
@@ -48,7 +48,7 @@ if(EXISTS ${SOURCE_DIR}/.git)
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(_git_tag_hashes MATCHES "${_git_head_hash}")
set(MY_WC_BRANCH "main")
set(MY_WC_BRANCH "master")
else()
execute_process(COMMAND git branch --contains ${MY_WC_HASH}
WORKING_DIRECTORY ${SOURCE_DIR}

View File

@@ -36,7 +36,6 @@ set(WITH_IMAGE_WEBP OFF CACHE BOOL "" FORCE)
set(WITH_INPUT_IME OFF CACHE BOOL "" FORCE)
set(WITH_INPUT_NDOF OFF CACHE BOOL "" FORCE)
set(WITH_INTERNATIONAL OFF CACHE BOOL "" FORCE)
set(WITH_IO_PLY OFF CACHE BOOL "" FORCE)
set(WITH_IO_STL OFF CACHE BOOL "" FORCE)
set(WITH_IO_WAVEFRONT_OBJ OFF CACHE BOOL "" FORCE)
set(WITH_IO_GPENCIL OFF CACHE BOOL "" FORCE)
@@ -53,6 +52,7 @@ set(WITH_OPENAL OFF CACHE BOOL "" FORCE)
set(WITH_OPENCOLLADA OFF CACHE BOOL "" FORCE)
set(WITH_OPENCOLORIO OFF CACHE BOOL "" FORCE)
set(WITH_OPENIMAGEDENOISE OFF CACHE BOOL "" FORCE)
set(WITH_OPENIMAGEIO OFF CACHE BOOL "" FORCE)
set(WITH_OPENMP OFF CACHE BOOL "" FORCE)
set(WITH_OPENSUBDIV OFF CACHE BOOL "" FORCE)
set(WITH_OPENVDB OFF CACHE BOOL "" FORCE)

View File

@@ -85,7 +85,7 @@ if(NOT APPLE)
set(WITH_CYCLES_DEVICE_OPTIX ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE)
set(WITH_CYCLES_HIP_BINARIES OFF CACHE BOOL "" FORCE)
set(WITH_CYCLES_HIP_BINARIES ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_DEVICE_ONEAPI ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_ONEAPI_BINARIES ON CACHE BOOL "" FORCE)
endif()

View File

@@ -11,11 +11,11 @@
mkdir ~/blender-git
cd ~/blender-git
git clone https://projects.blender.org/blender/blender.git
git clone http://git.blender.org/blender.git
cd blender
git submodule update --init --recursive
git submodule foreach git checkout main
git submodule foreach git pull --rebase origin main
git submodule foreach git checkout master
git submodule foreach git pull --rebase origin master
# create build dir
mkdir ~/blender-git/build-cmake
@@ -35,7 +35,7 @@ ln -s ~/blender-git/build-cmake/bin/blender ~/blender-git/blender/blender.bin
echo ""
echo "* Useful Commands *"
echo " Run Blender: ~/blender-git/blender/blender.bin"
echo " Update Blender: git pull --rebase; git submodule foreach git pull --rebase origin main"
echo " Update Blender: git pull --rebase; git submodule foreach git pull --rebase origin master"
echo " Reconfigure Blender: cd ~/blender-git/build-cmake ; cmake ."
echo " Build Blender: cd ~/blender-git/build-cmake ; make"
echo ""

View File

@@ -544,7 +544,7 @@ endfunction()
function(setup_platform_linker_libs
target
)
# jemalloc must be early in the list, to be before pthread (see #57998).
# jemalloc must be early in the list, to be before pthread (see T57998)
if(WITH_MEM_JEMALLOC)
target_link_libraries(${target} ${JEMALLOC_LIBRARIES})
endif()
@@ -702,7 +702,6 @@ macro(remove_strict_flags)
endif()
if(MSVC)
remove_cc_flag(/w34100) # Restore warn C4100 (unreferenced formal parameter) back to w4
remove_cc_flag(/w34189) # Restore warn C4189 (unused variable) back to w4
endif()
@@ -722,7 +721,7 @@ macro(remove_extra_strict_flags)
endif()
if(MSVC)
remove_cc_flag(/w34100) # Restore warn C4100 (unreferenced formal parameter) back to w4
# TODO
endif()
endmacro()
@@ -1091,7 +1090,7 @@ function(msgfmt_simple
add_custom_command(
OUTPUT ${_file_to}
COMMAND ${CMAKE_COMMAND} -E make_directory ${_file_to_path}
COMMAND ${CMAKE_COMMAND} -E env ${PLATFORM_ENV_BUILD} "$<TARGET_FILE:msgfmt>" ${_file_from} ${_file_to}
COMMAND "$<TARGET_FILE:msgfmt>" ${_file_from} ${_file_to}
DEPENDS msgfmt ${_file_from})
set_source_files_properties(${_file_to} PROPERTIES GENERATED TRUE)

View File

@@ -270,7 +270,19 @@ if(WITH_PUGIXML)
find_package(PugiXML REQUIRED)
endif()
find_package(OpenImageIO REQUIRED)
if(WITH_OPENIMAGEIO)
find_package(OpenImageIO)
list(APPEND OPENIMAGEIO_LIBRARIES
${PNG_LIBRARIES}
${JPEG_LIBRARIES}
${TIFF_LIBRARY}
${OPENEXR_LIBRARIES}
${OPENJPEG_LIBRARIES}
${ZLIB_LIBRARIES}
)
set(OPENIMAGEIO_DEFINITIONS "-DOIIO_STATIC_BUILD")
set(OPENIMAGEIO_IDIFF "${LIBDIR}/openimageio/bin/idiff")
endif()
add_bundled_libraries(openimageio/lib)
if(WITH_OPENCOLORIO)
@@ -428,7 +440,7 @@ string(APPEND PLATFORM_LINKFLAGS " -stdlib=libc++")
# Make stack size more similar to Embree, required for Embree.
string(APPEND PLATFORM_LINKFLAGS_EXECUTABLE " -Wl,-stack_size,0x100000")
# Suppress ranlib "has no symbols" warnings (workaround for #48250).
# Suppress ranlib "has no symbols" warnings (workaround for T48250)
set(CMAKE_C_ARCHIVE_CREATE "<CMAKE_AR> Scr <TARGET> <LINK_FLAGS> <OBJECTS>")
set(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> Scr <TARGET> <LINK_FLAGS> <OBJECTS>")
# llvm-ranlib doesn't support this flag. Xcode's libtool does.

View File

@@ -438,7 +438,32 @@ if(WITH_IMAGE_WEBP)
set_and_warn_library_found("WebP" WEBP_FOUND WITH_IMAGE_WEBP)
endif()
find_package_wrapper(OpenImageIO REQUIRED)
if(WITH_OPENIMAGEIO)
find_package_wrapper(OpenImageIO)
set(OPENIMAGEIO_LIBRARIES
${OPENIMAGEIO_LIBRARIES}
${PNG_LIBRARIES}
${JPEG_LIBRARIES}
${ZLIB_LIBRARIES}
)
set(OPENIMAGEIO_DEFINITIONS "")
if(WITH_BOOST)
list(APPEND OPENIMAGEIO_LIBRARIES "${BOOST_LIBRARIES}")
endif()
if(WITH_IMAGE_TIFF)
list(APPEND OPENIMAGEIO_LIBRARIES "${TIFF_LIBRARY}")
endif()
if(WITH_IMAGE_OPENEXR)
list(APPEND OPENIMAGEIO_LIBRARIES "${OPENEXR_LIBRARIES}")
endif()
if(WITH_IMAGE_WEBP)
list(APPEND OPENIMAGEIO_LIBRARIES "${WEBP_LIBRARIES}")
endif()
set_and_warn_library_found("OPENIMAGEIO" OPENIMAGEIO_FOUND WITH_OPENIMAGEIO)
endif()
add_bundled_libraries(openimageio/lib)
if(WITH_OPENCOLORIO)

View File

@@ -121,7 +121,7 @@ if(WITH_WINDOWS_BUNDLE_CRT)
include(InstallRequiredSystemLibraries)
# ucrtbase(d).dll cannot be in the manifest, due to the way windows 10 handles
# redirects for this dll, for details see #88813.
# redirects for this dll, for details see T88813.
foreach(lib ${CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS})
string(FIND ${lib} "ucrtbase" pos)
if(NOT pos EQUAL -1)
@@ -178,8 +178,8 @@ if(NOT MSVC_CLANG)
endif()
if(WITH_WINDOWS_SCCACHE AND CMAKE_VS_MSBUILD_COMMAND)
message(WARNING "Disabling sccache, sccache is not supported with msbuild")
set(WITH_WINDOWS_SCCACHE OFF)
message(WARNING "Disabling sccache, sccache is not supported with msbuild")
set(WITH_WINDOWS_SCCACHE OFF)
endif()
# Debug Symbol format
@@ -295,7 +295,7 @@ unset(MATERIALX_LIB_FOLDER_EXISTS)
if(NOT MSVC_CLANG AND # Available with MSVC 15.7+ but not for CLANG.
NOT WITH_WINDOWS_SCCACHE AND # And not when sccache is enabled
NOT VS_CLANG_TIDY) # Clang-tidy does not like these options
add_compile_options(/experimental:external /external:I "${LIBDIR}" /external:W0)
add_compile_options(/experimental:external /external:templates- /external:I "${LIBDIR}" /external:W0)
endif()
# Add each of our libraries to our cmake_prefix_path so find_package() could work
@@ -522,28 +522,6 @@ if(WITH_PYTHON)
set(PYTHON_LIBRARIES debug "${PYTHON_LIBRARY_DEBUG}" optimized "${PYTHON_LIBRARY}" )
endif()
if(NOT WITH_WINDOWS_FIND_MODULES)
# even if boost is off, we still need to install the dlls when we use our lib folder since
# some of the other dependencies may need them. For this to work, BOOST_VERSION,
# BOOST_POSTFIX, and BOOST_DEBUG_POSTFIX need to be set.
set(BOOST ${LIBDIR}/boost)
set(BOOST_INCLUDE_DIR ${BOOST}/include)
set(BOOST_LIBPATH ${BOOST}/lib)
set(BOOST_VERSION_HEADER ${BOOST_INCLUDE_DIR}/boost/version.hpp)
if(EXISTS ${BOOST_VERSION_HEADER})
file(STRINGS "${BOOST_VERSION_HEADER}" BOOST_LIB_VERSION REGEX "#define BOOST_LIB_VERSION ")
if(BOOST_LIB_VERSION MATCHES "#define BOOST_LIB_VERSION \"([0-9_]+)\"")
set(BOOST_VERSION "${CMAKE_MATCH_1}")
endif()
endif()
if(NOT BOOST_VERSION)
message(FATAL_ERROR "Unable to determine Boost version")
endif()
set(BOOST_POSTFIX "vc142-mt-x64-${BOOST_VERSION}")
set(BOOST_DEBUG_POSTFIX "vc142-mt-gyd-x64-${BOOST_VERSION}")
set(BOOST_PREFIX "")
endif()
if(WITH_BOOST)
if(WITH_CYCLES AND WITH_CYCLES_OSL)
set(boost_extra_libs wave)
@@ -559,6 +537,22 @@ if(WITH_BOOST)
endif()
if(NOT Boost_FOUND)
warn_hardcoded_paths(BOOST)
set(BOOST ${LIBDIR}/boost)
set(BOOST_INCLUDE_DIR ${BOOST}/include)
set(BOOST_LIBPATH ${BOOST}/lib)
set(BOOST_VERSION_HEADER ${BOOST_INCLUDE_DIR}/boost/version.hpp)
if(EXISTS ${BOOST_VERSION_HEADER})
file(STRINGS "${BOOST_VERSION_HEADER}" BOOST_LIB_VERSION REGEX "#define BOOST_LIB_VERSION ")
if(BOOST_LIB_VERSION MATCHES "#define BOOST_LIB_VERSION \"([0-9_]+)\"")
set(BOOST_VERSION "${CMAKE_MATCH_1}")
endif()
endif()
if(NOT BOOST_VERSION)
message(FATAL_ERROR "Unable to determine Boost version")
endif()
set(BOOST_POSTFIX "vc142-mt-x64-${BOOST_VERSION}")
set(BOOST_DEBUG_POSTFIX "vc142-mt-gyd-x64-${BOOST_VERSION}")
set(BOOST_PREFIX "")
# This is file new in 3.4 if it does not exist, assume we are building against 3.3 libs
set(BOOST_34_TRIGGER_FILE ${BOOST_LIBPATH}/${BOOST_PREFIX}boost_python310-${BOOST_DEBUG_POSTFIX}.lib)
if(NOT EXISTS ${BOOST_34_TRIGGER_FILE})
@@ -608,18 +602,25 @@ if(WITH_BOOST)
set(BOOST_DEFINITIONS "-DBOOST_ALL_NO_LIB")
endif()
windows_find_package(OpenImageIO)
if(NOT OpenImageIO_FOUND)
set(OPENIMAGEIO ${LIBDIR}/OpenImageIO)
set(OPENIMAGEIO_LIBPATH ${OPENIMAGEIO}/lib)
set(OPENIMAGEIO_INCLUDE_DIR ${OPENIMAGEIO}/include)
set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO_INCLUDE_DIR})
set(OIIO_OPTIMIZED optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO.lib optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util.lib)
set(OIIO_DEBUG debug ${OPENIMAGEIO_LIBPATH}/OpenImageIO_d.lib debug ${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util_d.lib)
set(OPENIMAGEIO_LIBRARIES ${OIIO_OPTIMIZED} ${OIIO_DEBUG})
if(WITH_OPENIMAGEIO)
windows_find_package(OpenImageIO)
if(NOT OpenImageIO_FOUND)
set(OPENIMAGEIO ${LIBDIR}/OpenImageIO)
set(OPENIMAGEIO_LIBPATH ${OPENIMAGEIO}/lib)
set(OPENIMAGEIO_INCLUDE_DIR ${OPENIMAGEIO}/include)
set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO_INCLUDE_DIR})
set(OIIO_OPTIMIZED optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO.lib optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util.lib)
set(OIIO_DEBUG debug ${OPENIMAGEIO_LIBPATH}/OpenImageIO_d.lib debug ${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util_d.lib)
set(OPENIMAGEIO_LIBRARIES ${OIIO_OPTIMIZED} ${OIIO_DEBUG})
endif()
set(OPENIMAGEIO_DEFINITIONS "-DUSE_TBB=0")
set(OPENIMAGEIO_IDIFF "${OPENIMAGEIO}/bin/idiff.exe")
# If the .dll does not exist, assume it is a static OIIO
if(NOT EXISTS ${OPENIMAGEIO}/bin/OpenImageIO.dll)
add_definitions(-DOIIO_STATIC_DEFINE)
endif()
add_definitions(-DOIIO_NO_SSE=1)
endif()
add_definitions(-DOIIO_NO_SSE=1)
if(WITH_LLVM)
set(LLVM_ROOT_DIR ${LIBDIR}/llvm CACHE PATH "Path to the LLVM installation")
@@ -900,11 +901,11 @@ endif()
if(WINDOWS_PYTHON_DEBUG)
# Include the system scripts in the blender_python_system_scripts project.
file(GLOB_RECURSE inFiles "${CMAKE_SOURCE_DIR}/scripts/*.*" )
file(GLOB_RECURSE inFiles "${CMAKE_SOURCE_DIR}/release/scripts/*.*" )
add_custom_target(blender_python_system_scripts SOURCES ${inFiles})
foreach(_source IN ITEMS ${inFiles})
get_filename_component(_source_path "${_source}" PATH)
string(REPLACE "${CMAKE_SOURCE_DIR}/scripts/" "" _source_path "${_source_path}")
string(REPLACE "${CMAKE_SOURCE_DIR}/release/scripts/" "" _source_path "${_source_path}")
string(REPLACE "/" "\\" _group_path "${_source_path}")
source_group("${_group_path}" FILES "${_source}")
endforeach()
@@ -939,7 +940,7 @@ if(WINDOWS_PYTHON_DEBUG)
file(WRITE ${USER_PROPS_FILE} "<?xml version=\"1.0\" encoding=\"utf-8\"?>
<Project DefaultTargets=\"Build\" xmlns=\"http://schemas.microsoft.com/developer/msbuild/2003\">
<PropertyGroup>
<LocalDebuggerCommandArguments>-con --env-system-scripts \"${CMAKE_SOURCE_DIR}/scripts\" </LocalDebuggerCommandArguments>
<LocalDebuggerCommandArguments>-con --env-system-scripts \"${CMAKE_SOURCE_DIR}/release/scripts\" </LocalDebuggerCommandArguments>
</PropertyGroup>
</Project>")
endif()
@@ -993,23 +994,6 @@ if(WITH_VULKAN_BACKEND)
endif()
endif()
if(WITH_VULKAN_BACKEND)
if(EXISTS ${LIBDIR}/shaderc)
set(SHADERC_FOUND On)
set(SHADERC_ROOT_DIR ${LIBDIR}/shaderc)
set(SHADERC_INCLUDE_DIR ${SHADERC_ROOT_DIR}/include)
set(SHADERC_INCLUDE_DIRS ${SHADERC_INCLUDE_DIR})
set(SHADERC_LIBRARY
DEBUG ${SHADERC_ROOT_DIR}/lib/shaderc_shared_d.lib
OPTIMIZED ${SHADERC_ROOT_DIR}/lib/shaderc_shared.lib
)
set(SHADERC_LIBRARIES ${SHADERC_LIBRARY})
else()
message(WARNING "Shaderc was not found, disabling WITH_VULKAN_BACKEND")
set(WITH_VULKAN_BACKEND OFF)
endif()
endif()
if(WITH_CYCLES AND WITH_CYCLES_PATH_GUIDING)
find_package(openpgl QUIET)
if(openpgl_FOUND)
@@ -1056,7 +1040,7 @@ endif()
# Environment variables to run precompiled executables that needed libraries.
list(JOIN PLATFORM_BUNDLED_LIBRARY_DIRS ";" _library_paths)
set(PLATFORM_ENV_BUILD_DIRS "${LIBDIR}/tbb/bin\;${LIBDIR}/OpenImageIO/bin\;${LIBDIR}/boost/lib\;${LIBDIR}/openexr/bin\;${LIBDIR}/imath/bin\;${PATH}")
set(PLATFORM_ENV_BUILD_DIRS "${LIBDIR}/OpenImageIO/bin\;${LIBDIR}/boost/lib\;${LIBDIR}/openexr/bin\;${LIBDIR}/imath/bin\;${PATH}")
set(PLATFORM_ENV_BUILD "PATH=${PLATFORM_ENV_BUILD_DIRS}")
# Install needs the additional folders from PLATFORM_ENV_BUILD_DIRS as well, as tools like idiff and abcls use the release mode dlls
set(PLATFORM_ENV_INSTALL "PATH=${CMAKE_INSTALL_PREFIX_WITH_CONFIG}/blender.shared/\;${PLATFORM_ENV_BUILD_DIRS}\;$ENV{PATH}")

View File

@@ -142,7 +142,7 @@ def cmake_advanced_info() -> Union[Tuple[List[str], List[Tuple[str, str]]], Tupl
make_exe = cmake_cache_var("CMAKE_MAKE_PROGRAM")
if make_exe is None:
print("Make command not found: CMAKE_MAKE_PROGRAM")
print("Make command not found in: %r not found" % project_path)
return None, None
make_exe_basename = os.path.basename(make_exe)

View File

@@ -1,3 +1,53 @@
#
# Used by Buildbot build pipeline make_update.py script only for now
# We intended to update the make_update.py in the branches to use this file eventually
#
update-code:
git:
submodules:
- branch: master
commit_id: HEAD
path: release/scripts/addons
- branch: master
commit_id: HEAD
path: release/scripts/addons_contrib
- branch: master
commit_id: HEAD
path: release/datafiles/locale
- branch: master
commit_id: HEAD
path: source/tools
svn:
libraries:
darwin-arm64:
branch: trunk
commit_id: HEAD
path: lib/darwin_arm64
darwin-x86_64:
branch: trunk
commit_id: HEAD
path: lib/darwin
linux-x86_64:
branch: trunk
commit_id: HEAD
path: lib/linux_x86_64_glibc_228
windows-amd64:
branch: trunk
commit_id: HEAD
path: lib/win64_vc15
tests:
branch: trunk
commit_id: HEAD
path: lib/tests
benchmarks:
branch: trunk
commit_id: HEAD
path: lib/benchmarks
assets:
branch: trunk
commit_id: HEAD
path: lib/assets
#
# Buildbot only configs
#

View File

@@ -58,7 +58,7 @@ Each Blender release supports one Python version, and the package is only compat
## Source Code
* [Releases](https://download.blender.org/source/)
* Repository: [projects.blender.org/blender/blender.git](https://projects.blender.org/blender/blender)
* Repository: [git.blender.org/blender.git](https://git.blender.org/gitweb/gitweb.cgi/blender.git)
## Credits

View File

@@ -135,7 +135,7 @@ def submodules_to_manifest(
submodule = line.split()[1]
# Don't use native slashes as GIT for MS-Windows outputs forward slashes.
if skip_addon_contrib and submodule == "scripts/addons_contrib":
if skip_addon_contrib and submodule == "release/scripts/addons_contrib":
continue
for path in git_ls_files(blender_srcdir / submodule):

View File

@@ -16,28 +16,14 @@ import shutil
import sys
import make_utils
from pathlib import Path
from make_utils import call, check_output
from urllib.parse import urljoin
from typing import (
List,
Iterable,
Optional,
)
class Submodule:
path: str
branch: str
branch_fallback: str
def __init__(self, path: str, branch: str, branch_fallback: str) -> None:
self.path = path
self.branch = branch
self.branch_fallback = branch_fallback
def print_stage(text: str) -> None:
print("")
print(text)
@@ -56,7 +42,6 @@ def parse_arguments() -> argparse.Namespace:
parser.add_argument("--svn-branch", default=None)
parser.add_argument("--git-command", default="git")
parser.add_argument("--use-linux-libraries", action="store_true")
parser.add_argument("--architecture", type=str, choices=("x86_64", "amd64", "arm64",))
return parser.parse_args()
@@ -66,19 +51,6 @@ def get_blender_git_root() -> str:
# Setup for precompiled libraries and tests from svn.
def get_effective_architecture(args: argparse.Namespace) -> str:
architecture = args.architecture
if architecture:
assert isinstance(architecture, str)
return architecture
# Check platform.version to detect arm64 with x86_64 python binary.
if "ARM64" in platform.version():
return "arm64"
return platform.machine().lower()
def svn_update(args: argparse.Namespace, release_version: Optional[str]) -> None:
svn_non_interactive = [args.svn_command, '--non-interactive']
@@ -86,11 +58,11 @@ def svn_update(args: argparse.Namespace, release_version: Optional[str]) -> None
svn_url = make_utils.svn_libraries_base_url(release_version, args.svn_branch)
# Checkout precompiled libraries
architecture = get_effective_architecture(args)
if sys.platform == 'darwin':
if architecture == 'arm64':
# Check platform.version to detect arm64 with x86_64 python binary.
if platform.machine() == 'arm64' or ('ARM64' in platform.version()):
lib_platform = "darwin_arm64"
elif architecture == 'x86_64':
elif platform.machine() == 'x86_64':
lib_platform = "darwin"
else:
lib_platform = None
@@ -198,7 +170,7 @@ def git_update_skip(args: argparse.Namespace, check_remote_exists: bool = True)
return "rebase or merge in progress, complete it first"
# Abort if uncommitted changes.
changes = check_output([args.git_command, 'status', '--porcelain', '--untracked-files=no', '--ignore-submodules'])
changes = check_output([args.git_command, 'status', '--porcelain', '--untracked-files=no'])
if len(changes) != 0:
return "you have unstaged changes"
@@ -212,296 +184,97 @@ def git_update_skip(args: argparse.Namespace, check_remote_exists: bool = True)
return ""
def use_upstream_workflow(args: argparse.Namespace) -> bool:
return make_utils.git_remote_exist(args.git_command, "upstream")
def work_tree_update_upstream_workflow(args: argparse.Namespace, use_fetch: bool = True) -> str:
"""
Update the Blender repository using the Github style of fork organization
Returns true if the current local branch has been updated to the upstream state.
Otherwise false is returned.
"""
branch_name = make_utils.git_branch(args.git_command)
if use_fetch:
call((args.git_command, "fetch", "upstream"))
upstream_branch = f"upstream/{branch_name}"
if not make_utils.git_branch_exists(args.git_command, upstream_branch):
return "no_branch"
retcode = call((args.git_command, "merge", "--ff-only", upstream_branch), exit_on_error=False)
if retcode != 0:
return "Unable to fast forward\n"
return ""
def work_tree_update(args: argparse.Namespace, use_fetch: bool = True) -> str:
"""
Update the Git working tree using the best strategy
This function detects whether it is a github style of fork remote organization is used, or
is it a repository which origin is an upstream.
"""
if use_upstream_workflow(args):
message = work_tree_update_upstream_workflow(args, use_fetch)
if message != "no_branch":
return message
# If there is upstream configured but the local branch is not in the upstream, try to
# update the branch from the fork.
update_command = [args.git_command, "pull", "--rebase"]
call(update_command)
return ""
# Update blender repository.
def blender_update(args: argparse.Namespace) -> str:
def blender_update(args: argparse.Namespace) -> None:
print_stage("Updating Blender Git Repository")
return work_tree_update(args)
call([args.git_command, "pull", "--rebase"])
def resolve_external_url(blender_url: str, repo_name: str) -> str:
return urljoin(blender_url + "/", "../" + repo_name)
# Update submodules.
def submodules_update(
args: argparse.Namespace,
release_version: Optional[str],
branch: Optional[str],
) -> str:
print_stage("Updating Submodules")
if make_utils.command_missing(args.git_command):
sys.stderr.write("git not found, can't update code\n")
sys.exit(1)
def external_script_copy_old_submodule_over(args: argparse.Namespace, directory_name: str) -> None:
blender_git_root = Path(get_blender_git_root())
scripts_dir = blender_git_root / "scripts"
external_dir = scripts_dir / directory_name
old_submodule_relative_dir = Path("release") / "scripts" / directory_name
print(f"Moving {old_submodule_relative_dir} to scripts/{directory_name} ...")
old_submodule_dir = blender_git_root / old_submodule_relative_dir
shutil.move(old_submodule_dir, external_dir)
# Remove old ".git" which is a file with path to a submodule bare repo inside of main
# repo .git/modules directory.
(external_dir / ".git").unlink()
bare_repo_relative_dir = Path(".git") / "modules" / "release" / "scripts" / directory_name
print(f"Copying {bare_repo_relative_dir} to scripts/{directory_name}/.git ...")
bare_repo_dir = blender_git_root / bare_repo_relative_dir
shutil.copytree(bare_repo_dir, external_dir / ".git")
git_config = external_dir / ".git" / "config"
call((args.git_command, "config", "--file", str(git_config), "--unset", "core.worktree"))
def external_script_initialize_if_needed(args: argparse.Namespace,
repo_name: str,
directory_name: str) -> None:
"""Initialize checkout of an external repository scripts directory"""
blender_git_root = Path(get_blender_git_root())
blender_dot_git = blender_git_root / ".git"
scripts_dir = blender_git_root / "scripts"
external_dir = scripts_dir / directory_name
if external_dir.exists():
return
print(f"Initializing scripts/{directory_name} ...")
old_submodule_dot_git = blender_git_root / "release" / "scripts" / directory_name / ".git"
if old_submodule_dot_git.exists() and blender_dot_git.is_dir():
external_script_copy_old_submodule_over(args, directory_name)
return
origin_name = "upstream" if use_upstream_workflow(args) else "origin"
blender_url = make_utils.git_get_remote_url(args.git_command, origin_name)
external_url = resolve_external_url(blender_url, repo_name)
# When running `make update` from a freshly cloned fork check whether the fork of the submodule is
# available, If not, switch to the submodule relative to the main blender repository.
if origin_name == "origin" and not make_utils.git_is_remote_repository(args.git_command, external_url):
external_url = resolve_external_url("https://projects.blender.org/blender/blender", repo_name)
call((args.git_command, "clone", "--origin", origin_name, external_url, str(external_dir)))
def external_script_add_origin_if_needed(args: argparse.Namespace,
repo_name: str,
directory_name: str) -> None:
"""
Add remote called 'origin' if there is a fork of the external repository available
This is only done when using Github style upstream workflow in the main repository.
"""
if not use_upstream_workflow(args):
return
cwd = os.getcwd()
blender_git_root = Path(get_blender_git_root())
scripts_dir = blender_git_root / "scripts"
external_dir = scripts_dir / directory_name
origin_blender_url = make_utils.git_get_remote_url(args.git_command, "origin")
origin_external_url = resolve_external_url(origin_blender_url, repo_name)
try:
os.chdir(external_dir)
if (make_utils.git_remote_exist(args.git_command, "origin") or
not make_utils.git_remote_exist(args.git_command, "upstream")):
return
if not make_utils.git_is_remote_repository(args.git_command, origin_external_url):
return
print(f"Adding origin remote to {directory_name} pointing to fork ...")
# Non-obvious tricks to introduce the new remote called "origin" to the existing
# submodule configuration.
#
# This is all within the content of creating a fork of a submodule after `make update`
# has been run and possibly local branches tracking upstream were added.
#
# The idea here goes as following:
#
# - Rename remote "upstream" to "origin", which takes care of changing the names of
# remotes the local branches are tracking.
#
# - Change the URL to the "origin", which so was was still pointing to upstream.
#
# - Re-introduce the "upstream" remote, with the same URL as it had prior to rename.
upstream_url = make_utils.git_get_remote_url(args.git_command, "upstream")
call((args.git_command, "remote", "rename", "upstream", "origin"))
make_utils.git_set_config(args.git_command, f"remote.origin.url", origin_external_url)
call((args.git_command, "remote", "add", "upstream", upstream_url))
finally:
os.chdir(cwd)
return
def external_scripts_update(args: argparse.Namespace,
repo_name: str,
directory_name: str,
branch: Optional[str]) -> str:
"""Update a single external checkout with the given name in the scripts folder"""
external_script_initialize_if_needed(args, repo_name, directory_name)
external_script_add_origin_if_needed(args, repo_name, directory_name)
print(f"Updating scripts/{directory_name} ...")
cwd = os.getcwd()
blender_git_root = Path(get_blender_git_root())
scripts_dir = blender_git_root / "scripts"
external_dir = scripts_dir / directory_name
# Update externals to appropriate given branch, falling back to main if none is given and/or
# found in a sub-repository.
branch_fallback = "main"
# Update submodules to appropriate given branch,
# falling back to master if none is given and/or found in a sub-repository.
branch_fallback = "master"
if not branch:
branch = branch_fallback
submodules = [
("release/scripts/addons", branch, branch_fallback),
("release/scripts/addons_contrib", branch, branch_fallback),
("release/datafiles/locale", branch, branch_fallback),
("source/tools", branch, branch_fallback),
]
# Initialize submodules only if needed.
for submodule_path, submodule_branch, submodule_branch_fallback in submodules:
if not os.path.exists(os.path.join(submodule_path, ".git")):
call([args.git_command, "submodule", "update", "--init", "--recursive"])
break
# Checkout appropriate branch and pull changes.
skip_msg = ""
try:
os.chdir(external_dir)
msg = git_update_skip(args, check_remote_exists=False)
if msg:
skip_msg += directory_name + " skipped: " + msg + "\n"
else:
# Find a matching branch that exists.
for remote in ("origin", "upstream"):
if make_utils.git_remote_exist(args.git_command, remote):
call([args.git_command, "fetch", remote])
submodule_branch = branch
if make_utils.git_branch_exists(args.git_command, submodule_branch):
pass
elif make_utils.git_branch_exists(args.git_command, branch_fallback):
submodule_branch = branch_fallback
for submodule_path, submodule_branch, submodule_branch_fallback in submodules:
cwd = os.getcwd()
try:
os.chdir(submodule_path)
msg = git_update_skip(args, check_remote_exists=False)
if msg:
skip_msg += submodule_path + " skipped: " + msg + "\n"
else:
# Skip.
submodule_branch = ""
# Find a matching branch that exists.
call([args.git_command, "fetch", "origin"])
if make_utils.git_branch_exists(args.git_command, submodule_branch):
pass
elif make_utils.git_branch_exists(args.git_command, submodule_branch_fallback):
submodule_branch = submodule_branch_fallback
else:
# Skip.
submodule_branch = ""
# Switch to branch and pull.
if submodule_branch:
if make_utils.git_branch(args.git_command) != submodule_branch:
# If the local branch exists just check out to it.
# If there is no local branch but only remote specify an explicit remote.
# Without this explicit specification Git attempts to set-up tracking
# automatically and fails when the branch is available in multiple remotes.
if make_utils.git_local_branch_exists(args.git_command, submodule_branch):
# Switch to branch and pull.
if submodule_branch:
if make_utils.git_branch(args.git_command) != submodule_branch:
call([args.git_command, "checkout", submodule_branch])
elif make_utils.git_remote_exist(args.git_command, "origin"):
call([args.git_command, "checkout", "-t", f"origin/{submodule_branch}"])
elif make_utils.git_remote_exist(args.git_command, "upstream"):
call([args.git_command, "checkout", "-t", f"upstream/{submodule_branch}"])
# Don't use extra fetch since all remotes of interest have been already fetched
# some lines above.
skip_msg += work_tree_update(args, use_fetch=False)
finally:
os.chdir(cwd)
call([args.git_command, "pull", "--rebase", "origin", submodule_branch])
finally:
os.chdir(cwd)
return skip_msg
def scripts_submodules_update(args: argparse.Namespace, branch: Optional[str]) -> str:
"""Update working trees of addons and addons_contrib within the scripts/ directory"""
msg = ""
msg += external_scripts_update(args, "blender-addons", "addons", branch)
msg += external_scripts_update(args, "blender-addons-contrib", "addons_contrib", branch)
return msg
def submodules_update(args: argparse.Namespace, branch: Optional[str]) -> str:
"""Update submodules or other externally tracked source trees"""
msg = ""
msg += scripts_submodules_update(args, branch)
return msg
if __name__ == "__main__":
args = parse_arguments()
blender_skip_msg = ""
submodules_skip_msg = ""
blender_version = make_utils. parse_blender_version()
if blender_version.cycle != 'alpha':
major = blender_version.version // 100
minor = blender_version.version % 100
branch = f"blender-v{major}.{minor}-release"
release_version: Optional[str] = f"{major}.{minor}"
else:
branch = 'main'
release_version = None
# Test if we are building a specific release version.
branch = make_utils.git_branch(args.git_command)
if branch == 'HEAD':
sys.stderr.write('Blender git repository is in detached HEAD state, must be in a branch\n')
sys.exit(1)
tag = make_utils.git_tag(args.git_command)
release_version = make_utils.git_branch_release_version(branch, tag)
if not args.no_libraries:
svn_update(args, release_version)
if not args.no_blender:
blender_skip_msg = git_update_skip(args)
if not blender_skip_msg:
blender_skip_msg = blender_update(args)
if blender_skip_msg:
blender_skip_msg = "Blender repository skipped: " + blender_skip_msg + "\n"
else:
blender_update(args)
if not args.no_submodules:
submodules_skip_msg = submodules_update(args, branch)
submodules_skip_msg = submodules_update(args, release_version, branch)
# Report any skipped repositories at the end, so it's not as easy to miss.
skip_msg = blender_skip_msg + submodules_skip_msg

View File

@@ -9,9 +9,7 @@ import re
import shutil
import subprocess
import sys
import os
from pathlib import Path
from urllib.parse import urljoin
from typing import (
Sequence,
@@ -21,7 +19,7 @@ from typing import (
def call(cmd: Sequence[str], exit_on_error: bool = True, silent: bool = False) -> int:
if not silent:
print(" ".join([str(x) for x in cmd]))
print(" ".join(cmd))
# Flush to ensure correct order output on Windows.
sys.stdout.flush()
@@ -54,57 +52,13 @@ def check_output(cmd: Sequence[str], exit_on_error: bool = True) -> str:
return output.strip()
def git_local_branch_exists(git_command: str, branch: str) -> bool:
return (
call([git_command, "rev-parse", "--verify", branch], exit_on_error=False, silent=True) == 0
)
def git_branch_exists(git_command: str, branch: str) -> bool:
return (
git_local_branch_exists(git_command, branch) or
call([git_command, "rev-parse", "--verify", "remotes/upstream/" + branch], exit_on_error=False, silent=True) == 0 or
call([git_command, "rev-parse", "--verify", branch], exit_on_error=False, silent=True) == 0 or
call([git_command, "rev-parse", "--verify", "remotes/origin/" + branch], exit_on_error=False, silent=True) == 0
)
def git_get_remote_url(git_command: str, remote_name: str) -> str:
return check_output((git_command, "ls-remote", "--get-url", remote_name))
def git_remote_exist(git_command: str, remote_name: str) -> bool:
"""Check whether there is a remote with the given name"""
# `git ls-remote --get-url upstream` will print an URL if there is such remote configured, and
# otherwise will print "upstream".
remote_url = check_output((git_command, "ls-remote", "--get-url", remote_name))
return remote_url != remote_name
def git_get_resolved_submodule_url(git_command: str, blender_url: str, submodule_path: str) -> str:
git_root = check_output([git_command, "rev-parse", "--show-toplevel"])
dot_gitmodules = os.path.join(git_root, ".gitmodules")
submodule_key_prefix = f"submodule.{submodule_path}"
submodule_key_url = f"{submodule_key_prefix}.url"
gitmodule_url = git_get_config(
git_command, submodule_key_url, file=dot_gitmodules)
# A bit of a trickery to construct final URL.
# Only works for the relative submodule URLs.
#
# Note that unless the LHS URL ends up with a slash urljoin treats the last component as a
# file.
assert gitmodule_url.startswith('..')
return urljoin(blender_url + "/", gitmodule_url)
def git_is_remote_repository(git_command: str, repo: str) -> bool:
"""Returns true if the given repository is a valid/clonable git repo"""
exit_code = call((git_command, "ls-remote", repo, "HEAD"), exit_on_error=False, silent=True)
return exit_code == 0
def git_branch(git_command: str) -> str:
# Get current branch name.
try:
@@ -116,20 +70,6 @@ def git_branch(git_command: str) -> str:
return branch.strip().decode('utf8')
def git_get_config(git_command: str, key: str, file: Optional[str] = None) -> str:
if file:
return check_output([git_command, "config", "--file", file, "--get", key])
return check_output([git_command, "config", "--get", key])
def git_set_config(git_command: str, key: str, value: str, file: Optional[str] = None) -> str:
if file:
return check_output([git_command, "config", "--file", file, key, value])
return check_output([git_command, "config", key, value])
def git_tag(git_command: str) -> Optional[str]:
# Get current tag name.
try:

View File

@@ -0,0 +1,20 @@
if NOT exist "%BLENDER_DIR%\source\tools\.git" (
echo Checking out sub-modules
if not "%GIT%" == "" (
"%GIT%" submodule update --init --recursive --progress
if errorlevel 1 goto FAIL
"%GIT%" submodule foreach git checkout master
if errorlevel 1 goto FAIL
"%GIT%" submodule foreach git pull --rebase origin master
if errorlevel 1 goto FAIL
goto EOF
) else (
echo Blender submodules not found, and git not found in path to retrieve them.
goto FAIL
)
)
goto EOF
:FAIL
exit /b 1
:EOF

View File

@@ -14,7 +14,7 @@ if NOT EXIST %PYTHON% (
exit /b 1
)
set FORMAT_PATHS=%BLENDER_DIR%\tools\utils_maintenance\clang_format_paths.py
set FORMAT_PATHS=%BLENDER_DIR%\source\tools\utils_maintenance\clang_format_paths.py
REM The formatting script expects clang-format to be in the current PATH.
set PATH=%CF_PATH%;%PATH%

View File

@@ -4,9 +4,9 @@ if "%GIT%" == "" (
)
cd "%BLENDER_DIR%"
for /f "delims=" %%i in ('"%GIT%" rev-parse HEAD') do echo Branch_hash=%%i
cd "%BLENDER_DIR%/locale"
cd "%BLENDER_DIR%/release/datafiles/locale"
for /f "delims=" %%i in ('"%GIT%" rev-parse HEAD') do echo Locale_hash=%%i
cd "%BLENDER_DIR%/scripts/addons"
cd "%BLENDER_DIR%/release/scripts/addons"
for /f "delims=" %%i in ('"%GIT%" rev-parse HEAD') do echo Addons_Hash=%%i
cd "%BLENDER_DIR%"
:EOF

View File

@@ -231,7 +231,7 @@ class FileBlockHeader:
self.SDNAIndex = 0
self.Count = 0
self.FileOffset = handle.tell()
# self.Code += ' ' * (4 - len(self.Code))
#self.Code += ' ' * (4 - len(self.Code))
log.debug("found blend-file-block-fileheader {0} {1}".format(self.Code, self.FileOffset))
def skip(self, handle):

View File

@@ -38,7 +38,7 @@ PROJECT_NAME = Blender
# could be handy for archiving the generated documentation or if some version
# control system is used.
PROJECT_NUMBER = V3.6
PROJECT_NUMBER = V3.5
# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer a

View File

@@ -37,7 +37,7 @@ def draw_callback_px(self, context):
# BLF drawing routine
font_id = font_info["font_id"]
blf.position(font_id, 2, 80, 0)
blf.size(font_id, 50)
blf.size(font_id, 50, 72)
blf.draw(font_id, "Hello World")

View File

@@ -31,7 +31,7 @@ For an overview of BMesh data types and how they reference each other see:
Example Script
--------------
.. literalinclude:: __/__/__/scripts/templates_py/bmesh_simple.py
.. literalinclude:: __/__/__/release/scripts/templates_py/bmesh_simple.py
Standalone Module

View File

@@ -288,7 +288,7 @@ In Python, this is done by defining a class, which is a subclass of an existing
Example Operator
----------------
.. literalinclude:: __/__/__/scripts/templates_py/operator_simple.py
.. literalinclude:: __/__/__/release/scripts/templates_py/operator_simple.py
Once this script runs, ``SimpleOperator`` is registered with Blender
and can be called from Operator Search or added to the toolbar.
@@ -320,7 +320,7 @@ Example Panel
Panels are registered as a class, like an operator.
Notice the extra ``bl_`` variables used to set the context they display in.
.. literalinclude:: __/__/__/scripts/templates_py/ui_panel_simple.py
.. literalinclude:: __/__/__/release/scripts/templates_py/ui_panel_simple.py
To run the script:

View File

@@ -367,13 +367,13 @@ except ImportError:
# Note that ".." is replaced by "__" in the RST files,
# to avoid having to match Blender's source tree.
EXTRA_SOURCE_FILES = (
"../../../scripts/templates_py/bmesh_simple.py",
"../../../scripts/templates_py/gizmo_operator.py",
"../../../scripts/templates_py/gizmo_operator_target.py",
"../../../scripts/templates_py/gizmo_simple.py",
"../../../scripts/templates_py/operator_simple.py",
"../../../scripts/templates_py/ui_panel_simple.py",
"../../../scripts/templates_py/ui_previews_custom_icon.py",
"../../../release/scripts/templates_py/bmesh_simple.py",
"../../../release/scripts/templates_py/gizmo_operator.py",
"../../../release/scripts/templates_py/gizmo_operator_target.py",
"../../../release/scripts/templates_py/gizmo_simple.py",
"../../../release/scripts/templates_py/operator_simple.py",
"../../../release/scripts/templates_py/ui_panel_simple.py",
"../../../release/scripts/templates_py/ui_previews_custom_icon.py",
"../examples/bmesh.ops.1.py",
"../examples/bpy.app.translations.py",
)
@@ -476,7 +476,7 @@ MODULE_GROUPING = {
# -------------------------------BLENDER----------------------------------------
# Converting bytes to strings, due to #30154.
# converting bytes to strings, due to T30154
BLENDER_REVISION = str(bpy.app.build_hash, 'utf_8')
BLENDER_REVISION_TIMESTAMP = bpy.app.build_commit_timestamp
@@ -487,7 +487,7 @@ BLENDER_VERSION_DOTS = "%d.%d" % (bpy.app.version[0], bpy.app.version[1])
if BLENDER_REVISION != "Unknown":
# SHA1 Git hash
BLENDER_VERSION_HASH = BLENDER_REVISION
BLENDER_VERSION_HASH_HTML_LINK = "<a href=https://projects.blender.org/blender/blender/commit/%s>%s</a>" % (
BLENDER_VERSION_HASH_HTML_LINK = "<a href=https://developer.blender.org/rB%s>%s</a>" % (
BLENDER_VERSION_HASH, BLENDER_VERSION_HASH,
)
BLENDER_VERSION_DATE = time.strftime("%d/%m/%Y", time.localtime(BLENDER_REVISION_TIMESTAMP))
@@ -647,7 +647,7 @@ def undocumented_message(module_name, type_name, identifier):
module_name, type_name, identifier,
)
return "Undocumented, consider `contributing <https://developer.blender.org/>`__."
return "Undocumented, consider `contributing <https://developer.blender.org/T51061>`__."
def range_str(val):
@@ -1816,9 +1816,9 @@ def pyrna2sphinx(basepath):
# operators
def write_ops():
API_BASEURL = "https://projects.blender.org/blender/blender/src/branch/main/scripts"
API_BASEURL_ADDON = "https://projects.blender.org/blender/blender-addons"
API_BASEURL_ADDON_CONTRIB = "https://projects.blender.org/blender/blender-addons-contrib"
API_BASEURL = "https://developer.blender.org/diffusion/B/browse/master/release/scripts"
API_BASEURL_ADDON = "https://developer.blender.org/diffusion/BA"
API_BASEURL_ADDON_CONTRIB = "https://developer.blender.org/diffusion/BAC"
op_modules = {}
op = None
@@ -1865,7 +1865,7 @@ def pyrna2sphinx(basepath):
else:
url_base = API_BASEURL
fw(" :file:`%s\\:%d <%s/%s#L%d>`_\n\n" %
fw(" :file: `%s\\:%d <%s/%s$%d>`_\n\n" %
(location[0], location[1], url_base, location[0], location[1]))
file.close()
@@ -2200,7 +2200,7 @@ def write_rst_enum_items(basepath, key, key_no_prefix, enum_items):
Write a single page for a static enum in RST.
This helps avoiding very large lists being in-lined in many places which is an issue
especially with icons in ``bpy.types.UILayout``. See #87008.
especially with icons in ``bpy.types.UILayout``. See T87008.
"""
filepath = os.path.join(basepath, "%s.rst" % key_no_prefix)
with open(filepath, "w", encoding="utf-8") as fh:

View File

@@ -156,7 +156,7 @@ var Popover = function() {
},
getNamed : function(v) {
$.each(all_versions, function(ix, title) {
if (ix === "master" || ix === "main" || ix === "latest") {
if (ix === "master" || ix === "latest") {
var m = title.match(/\d\.\d[\w\d\.]*/)[0];
if (parseFloat(m) == v) {
v = ix;

View File

@@ -1,5 +1,5 @@
Project: Blender
URL: https://projects.blender.org/blender/blender.git
URL: https://git.blender.org/blender.git
License: Apache 2.0
Upstream version: N/A
Local modifications: None

View File

@@ -1,5 +0,0 @@
Project: Renderdoc APP
URL: https://github.com/baldurk/renderdoc/
License: MIT
Upstream version: d47e79ae079783935b8857d6a1730440eafb0b38
Local modifications: None

View File

@@ -1,723 +0,0 @@
/******************************************************************************
* The MIT License (MIT)
*
* Copyright (c) 2019-2023 Baldur Karlsson
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
******************************************************************************/
#pragma once
//////////////////////////////////////////////////////////////////////////////////////////////////
//
// Documentation for the API is available at https://renderdoc.org/docs/in_application_api.html
//
#if !defined(RENDERDOC_NO_STDINT)
#include <stdint.h>
#endif
#if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || defined(_MSC_VER)
#define RENDERDOC_CC __cdecl
#elif defined(__linux__)
#define RENDERDOC_CC
#elif defined(__APPLE__)
#define RENDERDOC_CC
#else
#error "Unknown platform"
#endif
#ifdef __cplusplus
extern "C" {
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////
// Constants not used directly in below API
// This is a GUID/magic value used for when applications pass a path where shader debug
// information can be found to match up with a stripped shader.
// the define can be used like so: const GUID RENDERDOC_ShaderDebugMagicValue =
// RENDERDOC_ShaderDebugMagicValue_value
#define RENDERDOC_ShaderDebugMagicValue_struct \
{ \
0xeab25520, 0x6670, 0x4865, 0x84, 0x29, 0x6c, 0x8, 0x51, 0x54, 0x00, 0xff \
}
// as an alternative when you want a byte array (assuming x86 endianness):
#define RENDERDOC_ShaderDebugMagicValue_bytearray \
{ \
0x20, 0x55, 0xb2, 0xea, 0x70, 0x66, 0x65, 0x48, 0x84, 0x29, 0x6c, 0x8, 0x51, 0x54, 0x00, 0xff \
}
// truncated version when only a uint64_t is available (e.g. Vulkan tags):
#define RENDERDOC_ShaderDebugMagicValue_truncated 0x48656670eab25520ULL
//////////////////////////////////////////////////////////////////////////////////////////////////
// RenderDoc capture options
//
typedef enum RENDERDOC_CaptureOption {
// Allow the application to enable vsync
//
// Default - enabled
//
// 1 - The application can enable or disable vsync at will
// 0 - vsync is force disabled
eRENDERDOC_Option_AllowVSync = 0,
// Allow the application to enable fullscreen
//
// Default - enabled
//
// 1 - The application can enable or disable fullscreen at will
// 0 - fullscreen is force disabled
eRENDERDOC_Option_AllowFullscreen = 1,
// Record API debugging events and messages
//
// Default - disabled
//
// 1 - Enable built-in API debugging features and records the results into
// the capture, which is matched up with events on replay
// 0 - no API debugging is forcibly enabled
eRENDERDOC_Option_APIValidation = 2,
eRENDERDOC_Option_DebugDeviceMode = 2, // deprecated name of this enum
// Capture CPU callstacks for API events
//
// Default - disabled
//
// 1 - Enables capturing of callstacks
// 0 - no callstacks are captured
eRENDERDOC_Option_CaptureCallstacks = 3,
// When capturing CPU callstacks, only capture them from actions.
// This option does nothing without the above option being enabled
//
// Default - disabled
//
// 1 - Only captures callstacks for actions.
// Ignored if CaptureCallstacks is disabled
// 0 - Callstacks, if enabled, are captured for every event.
eRENDERDOC_Option_CaptureCallstacksOnlyDraws = 4,
eRENDERDOC_Option_CaptureCallstacksOnlyActions = 4,
// Specify a delay in seconds to wait for a debugger to attach, after
// creating or injecting into a process, before continuing to allow it to run.
//
// 0 indicates no delay, and the process will run immediately after injection
//
// Default - 0 seconds
//
eRENDERDOC_Option_DelayForDebugger = 5,
// Verify buffer access. This includes checking the memory returned by a Map() call to
// detect any out-of-bounds modification, as well as initialising buffers with undefined contents
// to a marker value to catch use of uninitialised memory.
//
// NOTE: This option is only valid for OpenGL and D3D11. Explicit APIs such as D3D12 and Vulkan do
// not do the same kind of interception & checking and undefined contents are really undefined.
//
// Default - disabled
//
// 1 - Verify buffer access
// 0 - No verification is performed, and overwriting bounds may cause crashes or corruption in
// RenderDoc.
eRENDERDOC_Option_VerifyBufferAccess = 6,
// The old name for eRENDERDOC_Option_VerifyBufferAccess was eRENDERDOC_Option_VerifyMapWrites.
// This option now controls the filling of uninitialised buffers with 0xdddddddd which was
// previously always enabled
eRENDERDOC_Option_VerifyMapWrites = eRENDERDOC_Option_VerifyBufferAccess,
// Hooks any system API calls that create child processes, and injects
// RenderDoc into them recursively with the same options.
//
// Default - disabled
//
// 1 - Hooks into spawned child processes
// 0 - Child processes are not hooked by RenderDoc
eRENDERDOC_Option_HookIntoChildren = 7,
// By default RenderDoc only includes resources in the final capture necessary
// for that frame, this allows you to override that behaviour.
//
// Default - disabled
//
// 1 - all live resources at the time of capture are included in the capture
// and available for inspection
// 0 - only the resources referenced by the captured frame are included
eRENDERDOC_Option_RefAllResources = 8,
// **NOTE**: As of RenderDoc v1.1 this option has been deprecated. Setting or
// getting it will be ignored, to allow compatibility with older versions.
// In v1.1 the option acts as if it's always enabled.
//
// By default RenderDoc skips saving initial states for resources where the
// previous contents don't appear to be used, assuming that writes before
// reads indicate previous contents aren't used.
//
// Default - disabled
//
// 1 - initial contents at the start of each captured frame are saved, even if
// they are later overwritten or cleared before being used.
// 0 - unless a read is detected, initial contents will not be saved and will
// appear as black or empty data.
eRENDERDOC_Option_SaveAllInitials = 9,
// In APIs that allow for the recording of command lists to be replayed later,
// RenderDoc may choose to not capture command lists before a frame capture is
// triggered, to reduce overheads. This means any command lists recorded once
// and replayed many times will not be available and may cause a failure to
// capture.
//
// NOTE: This is only true for APIs where multithreading is difficult or
// discouraged. Newer APIs like Vulkan and D3D12 will ignore this option
// and always capture all command lists since the API is heavily oriented
// around it and the overheads have been reduced by API design.
//
// 1 - All command lists are captured from the start of the application
// 0 - Command lists are only captured if their recording begins during
// the period when a frame capture is in progress.
eRENDERDOC_Option_CaptureAllCmdLists = 10,
// Mute API debugging output when the API validation mode option is enabled
//
// Default - enabled
//
// 1 - Mute any API debug messages from being displayed or passed through
// 0 - API debugging is displayed as normal
eRENDERDOC_Option_DebugOutputMute = 11,
// Option to allow vendor extensions to be used even when they may be
// incompatible with RenderDoc and cause corrupted replays or crashes.
//
// Default - inactive
//
// No values are documented, this option should only be used when absolutely
// necessary as directed by a RenderDoc developer.
eRENDERDOC_Option_AllowUnsupportedVendorExtensions = 12,
} RENDERDOC_CaptureOption;
// Sets an option that controls how RenderDoc behaves on capture.
//
// Returns 1 if the option and value are valid
// Returns 0 if either is invalid and the option is unchanged
typedef int(RENDERDOC_CC *pRENDERDOC_SetCaptureOptionU32)(RENDERDOC_CaptureOption opt, uint32_t val);
typedef int(RENDERDOC_CC *pRENDERDOC_SetCaptureOptionF32)(RENDERDOC_CaptureOption opt, float val);
// Gets the current value of an option as a uint32_t
//
// If the option is invalid, 0xffffffff is returned
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionU32)(RENDERDOC_CaptureOption opt);
// Gets the current value of an option as a float
//
// If the option is invalid, -FLT_MAX is returned
typedef float(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionF32)(RENDERDOC_CaptureOption opt);
typedef enum RENDERDOC_InputButton {
// '0' - '9' matches ASCII values
eRENDERDOC_Key_0 = 0x30,
eRENDERDOC_Key_1 = 0x31,
eRENDERDOC_Key_2 = 0x32,
eRENDERDOC_Key_3 = 0x33,
eRENDERDOC_Key_4 = 0x34,
eRENDERDOC_Key_5 = 0x35,
eRENDERDOC_Key_6 = 0x36,
eRENDERDOC_Key_7 = 0x37,
eRENDERDOC_Key_8 = 0x38,
eRENDERDOC_Key_9 = 0x39,
// 'A' - 'Z' matches ASCII values
eRENDERDOC_Key_A = 0x41,
eRENDERDOC_Key_B = 0x42,
eRENDERDOC_Key_C = 0x43,
eRENDERDOC_Key_D = 0x44,
eRENDERDOC_Key_E = 0x45,
eRENDERDOC_Key_F = 0x46,
eRENDERDOC_Key_G = 0x47,
eRENDERDOC_Key_H = 0x48,
eRENDERDOC_Key_I = 0x49,
eRENDERDOC_Key_J = 0x4A,
eRENDERDOC_Key_K = 0x4B,
eRENDERDOC_Key_L = 0x4C,
eRENDERDOC_Key_M = 0x4D,
eRENDERDOC_Key_N = 0x4E,
eRENDERDOC_Key_O = 0x4F,
eRENDERDOC_Key_P = 0x50,
eRENDERDOC_Key_Q = 0x51,
eRENDERDOC_Key_R = 0x52,
eRENDERDOC_Key_S = 0x53,
eRENDERDOC_Key_T = 0x54,
eRENDERDOC_Key_U = 0x55,
eRENDERDOC_Key_V = 0x56,
eRENDERDOC_Key_W = 0x57,
eRENDERDOC_Key_X = 0x58,
eRENDERDOC_Key_Y = 0x59,
eRENDERDOC_Key_Z = 0x5A,
// leave the rest of the ASCII range free
// in case we want to use it later
eRENDERDOC_Key_NonPrintable = 0x100,
eRENDERDOC_Key_Divide,
eRENDERDOC_Key_Multiply,
eRENDERDOC_Key_Subtract,
eRENDERDOC_Key_Plus,
eRENDERDOC_Key_F1,
eRENDERDOC_Key_F2,
eRENDERDOC_Key_F3,
eRENDERDOC_Key_F4,
eRENDERDOC_Key_F5,
eRENDERDOC_Key_F6,
eRENDERDOC_Key_F7,
eRENDERDOC_Key_F8,
eRENDERDOC_Key_F9,
eRENDERDOC_Key_F10,
eRENDERDOC_Key_F11,
eRENDERDOC_Key_F12,
eRENDERDOC_Key_Home,
eRENDERDOC_Key_End,
eRENDERDOC_Key_Insert,
eRENDERDOC_Key_Delete,
eRENDERDOC_Key_PageUp,
eRENDERDOC_Key_PageDn,
eRENDERDOC_Key_Backspace,
eRENDERDOC_Key_Tab,
eRENDERDOC_Key_PrtScrn,
eRENDERDOC_Key_Pause,
eRENDERDOC_Key_Max,
} RENDERDOC_InputButton;
// Sets which key or keys can be used to toggle focus between multiple windows
//
// If keys is NULL or num is 0, toggle keys will be disabled
typedef void(RENDERDOC_CC *pRENDERDOC_SetFocusToggleKeys)(RENDERDOC_InputButton *keys, int num);
// Sets which key or keys can be used to capture the next frame
//
// If keys is NULL or num is 0, captures keys will be disabled
typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureKeys)(RENDERDOC_InputButton *keys, int num);
typedef enum RENDERDOC_OverlayBits {
// This single bit controls whether the overlay is enabled or disabled globally
eRENDERDOC_Overlay_Enabled = 0x1,
// Show the average framerate over several seconds as well as min/max
eRENDERDOC_Overlay_FrameRate = 0x2,
// Show the current frame number
eRENDERDOC_Overlay_FrameNumber = 0x4,
// Show a list of recent captures, and how many captures have been made
eRENDERDOC_Overlay_CaptureList = 0x8,
// Default values for the overlay mask
eRENDERDOC_Overlay_Default = (eRENDERDOC_Overlay_Enabled | eRENDERDOC_Overlay_FrameRate |
eRENDERDOC_Overlay_FrameNumber | eRENDERDOC_Overlay_CaptureList),
// Enable all bits
eRENDERDOC_Overlay_All = ~0U,
// Disable all bits
eRENDERDOC_Overlay_None = 0,
} RENDERDOC_OverlayBits;
// returns the overlay bits that have been set
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetOverlayBits)();
// sets the overlay bits with an and & or mask
typedef void(RENDERDOC_CC *pRENDERDOC_MaskOverlayBits)(uint32_t And, uint32_t Or);
// this function will attempt to remove RenderDoc's hooks in the application.
//
// Note: that this can only work correctly if done immediately after
// the module is loaded, before any API work happens. RenderDoc will remove its
// injected hooks and shut down. Behaviour is undefined if this is called
// after any API functions have been called, and there is still no guarantee of
// success.
typedef void(RENDERDOC_CC *pRENDERDOC_RemoveHooks)();
// DEPRECATED: compatibility for code compiled against pre-1.4.1 headers.
typedef pRENDERDOC_RemoveHooks pRENDERDOC_Shutdown;
// This function will unload RenderDoc's crash handler.
//
// If you use your own crash handler and don't want RenderDoc's handler to
// intercede, you can call this function to unload it and any unhandled
// exceptions will pass to the next handler.
typedef void(RENDERDOC_CC *pRENDERDOC_UnloadCrashHandler)();
// Sets the capture file path template
//
// pathtemplate is a UTF-8 string that gives a template for how captures will be named
// and where they will be saved.
//
// Any extension is stripped off the path, and captures are saved in the directory
// specified, and named with the filename and the frame number appended. If the
// directory does not exist it will be created, including any parent directories.
//
// If pathtemplate is NULL, the template will remain unchanged
//
// Example:
//
// SetCaptureFilePathTemplate("my_captures/example");
//
// Capture #1 -> my_captures/example_frame123.rdc
// Capture #2 -> my_captures/example_frame456.rdc
typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureFilePathTemplate)(const char *pathtemplate);
// returns the current capture path template, see SetCaptureFileTemplate above, as a UTF-8 string
typedef const char *(RENDERDOC_CC *pRENDERDOC_GetCaptureFilePathTemplate)();
// DEPRECATED: compatibility for code compiled against pre-1.1.2 headers.
typedef pRENDERDOC_SetCaptureFilePathTemplate pRENDERDOC_SetLogFilePathTemplate;
typedef pRENDERDOC_GetCaptureFilePathTemplate pRENDERDOC_GetLogFilePathTemplate;
// returns the number of captures that have been made
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetNumCaptures)();
// This function returns the details of a capture, by index. New captures are added
// to the end of the list.
//
// filename will be filled with the absolute path to the capture file, as a UTF-8 string
// pathlength will be written with the length in bytes of the filename string
// timestamp will be written with the time of the capture, in seconds since the Unix epoch
//
// Any of the parameters can be NULL and they'll be skipped.
//
// The function will return 1 if the capture index is valid, or 0 if the index is invalid
// If the index is invalid, the values will be unchanged
//
// Note: when captures are deleted in the UI they will remain in this list, so the
// capture path may not exist anymore.
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetCapture)(uint32_t idx, char *filename,
uint32_t *pathlength, uint64_t *timestamp);
// Sets the comments associated with a capture file. These comments are displayed in the
// UI program when opening.
//
// filePath should be a path to the capture file to add comments to. If set to NULL or ""
// the most recent capture file created made will be used instead.
// comments should be a NULL-terminated UTF-8 string to add as comments.
//
// Any existing comments will be overwritten.
typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureFileComments)(const char *filePath,
const char *comments);
// returns 1 if the RenderDoc UI is connected to this application, 0 otherwise
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_IsTargetControlConnected)();
// DEPRECATED: compatibility for code compiled against pre-1.1.1 headers.
// This was renamed to IsTargetControlConnected in API 1.1.1, the old typedef is kept here for
// backwards compatibility with old code, it is castable either way since it's ABI compatible
// as the same function pointer type.
typedef pRENDERDOC_IsTargetControlConnected pRENDERDOC_IsRemoteAccessConnected;
// This function will launch the Replay UI associated with the RenderDoc library injected
// into the running application.
//
// if connectTargetControl is 1, the Replay UI will be launched with a command line parameter
// to connect to this application
// cmdline is the rest of the command line, as a UTF-8 string. E.g. a captures to open
// if cmdline is NULL, the command line will be empty.
//
// returns the PID of the replay UI if successful, 0 if not successful.
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_LaunchReplayUI)(uint32_t connectTargetControl,
const char *cmdline);
// RenderDoc can return a higher version than requested if it's backwards compatible,
// this function returns the actual version returned. If a parameter is NULL, it will be
// ignored and the others will be filled out.
typedef void(RENDERDOC_CC *pRENDERDOC_GetAPIVersion)(int *major, int *minor, int *patch);
// Requests that the replay UI show itself (if hidden or not the current top window). This can be
// used in conjunction with IsTargetControlConnected and LaunchReplayUI to intelligently handle
// showing the UI after making a capture.
//
// This will return 1 if the request was successfully passed on, though it's not guaranteed that
// the UI will be on top in all cases depending on OS rules. It will return 0 if there is no current
// target control connection to make such a request, or if there was another error
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_ShowReplayUI)();
//////////////////////////////////////////////////////////////////////////
// Capturing functions
//
// A device pointer is a pointer to the API's root handle.
//
// This would be an ID3D11Device, HGLRC/GLXContext, ID3D12Device, etc
typedef void *RENDERDOC_DevicePointer;
// A window handle is the OS's native window handle
//
// This would be an HWND, GLXDrawable, etc
typedef void *RENDERDOC_WindowHandle;
// A helper macro for Vulkan, where the device handle cannot be used directly.
//
// Passing the VkInstance to this macro will return the RENDERDOC_DevicePointer to use.
//
// Specifically, the value needed is the dispatch table pointer, which sits as the first
// pointer-sized object in the memory pointed to by the VkInstance. Thus we cast to a void** and
// indirect once.
#define RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(inst) (*((void **)(inst)))
// This sets the RenderDoc in-app overlay in the API/window pair as 'active' and it will
// respond to keypresses. Neither parameter can be NULL
typedef void(RENDERDOC_CC *pRENDERDOC_SetActiveWindow)(RENDERDOC_DevicePointer device,
RENDERDOC_WindowHandle wndHandle);
// capture the next frame on whichever window and API is currently considered active
typedef void(RENDERDOC_CC *pRENDERDOC_TriggerCapture)();
// capture the next N frames on whichever window and API is currently considered active
typedef void(RENDERDOC_CC *pRENDERDOC_TriggerMultiFrameCapture)(uint32_t numFrames);
// When choosing either a device pointer or a window handle to capture, you can pass NULL.
// Passing NULL specifies a 'wildcard' match against anything. This allows you to specify
// any API rendering to a specific window, or a specific API instance rendering to any window,
// or in the simplest case of one window and one API, you can just pass NULL for both.
//
// In either case, if there are two or more possible matching (device,window) pairs it
// is undefined which one will be captured.
//
// Note: for headless rendering you can pass NULL for the window handle and either specify
// a device pointer or leave it NULL as above.
// Immediately starts capturing API calls on the specified device pointer and window handle.
//
// If there is no matching thing to capture (e.g. no supported API has been initialised),
// this will do nothing.
//
// The results are undefined (including crashes) if two captures are started overlapping,
// even on separate devices and/oror windows.
typedef void(RENDERDOC_CC *pRENDERDOC_StartFrameCapture)(RENDERDOC_DevicePointer device,
RENDERDOC_WindowHandle wndHandle);
// Returns whether or not a frame capture is currently ongoing anywhere.
//
// This will return 1 if a capture is ongoing, and 0 if there is no capture running
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_IsFrameCapturing)();
// Ends capturing immediately.
//
// This will return 1 if the capture succeeded, and 0 if there was an error capturing.
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_EndFrameCapture)(RENDERDOC_DevicePointer device,
RENDERDOC_WindowHandle wndHandle);
// Ends capturing immediately and discard any data stored without saving to disk.
//
// This will return 1 if the capture was discarded, and 0 if there was an error or no capture
// was in progress
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_DiscardFrameCapture)(RENDERDOC_DevicePointer device,
RENDERDOC_WindowHandle wndHandle);
// Only valid to be called between a call to StartFrameCapture and EndFrameCapture. Gives a custom
// title to the capture produced which will be displayed in the UI.
//
// If multiple captures are ongoing, this title will be applied to the first capture to end after
// this call. The second capture to end will have no title, unless this function is called again.
//
// Calling this function has no effect if no capture is currently running
typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureTitle)(const char *title);
//////////////////////////////////////////////////////////////////////////////////////////////////
// RenderDoc API versions
//
// RenderDoc uses semantic versioning (http://semver.org/).
//
// MAJOR version is incremented when incompatible API changes happen.
// MINOR version is incremented when functionality is added in a backwards-compatible manner.
// PATCH version is incremented when backwards-compatible bug fixes happen.
//
// Note that this means the API returned can be higher than the one you might have requested.
// e.g. if you are running against a newer RenderDoc that supports 1.0.1, it will be returned
// instead of 1.0.0. You can check this with the GetAPIVersion entry point
typedef enum RENDERDOC_Version {
eRENDERDOC_API_Version_1_0_0 = 10000, // RENDERDOC_API_1_0_0 = 1 00 00
eRENDERDOC_API_Version_1_0_1 = 10001, // RENDERDOC_API_1_0_1 = 1 00 01
eRENDERDOC_API_Version_1_0_2 = 10002, // RENDERDOC_API_1_0_2 = 1 00 02
eRENDERDOC_API_Version_1_1_0 = 10100, // RENDERDOC_API_1_1_0 = 1 01 00
eRENDERDOC_API_Version_1_1_1 = 10101, // RENDERDOC_API_1_1_1 = 1 01 01
eRENDERDOC_API_Version_1_1_2 = 10102, // RENDERDOC_API_1_1_2 = 1 01 02
eRENDERDOC_API_Version_1_2_0 = 10200, // RENDERDOC_API_1_2_0 = 1 02 00
eRENDERDOC_API_Version_1_3_0 = 10300, // RENDERDOC_API_1_3_0 = 1 03 00
eRENDERDOC_API_Version_1_4_0 = 10400, // RENDERDOC_API_1_4_0 = 1 04 00
eRENDERDOC_API_Version_1_4_1 = 10401, // RENDERDOC_API_1_4_1 = 1 04 01
eRENDERDOC_API_Version_1_4_2 = 10402, // RENDERDOC_API_1_4_2 = 1 04 02
eRENDERDOC_API_Version_1_5_0 = 10500, // RENDERDOC_API_1_5_0 = 1 05 00
eRENDERDOC_API_Version_1_6_0 = 10600, // RENDERDOC_API_1_6_0 = 1 06 00
} RENDERDOC_Version;
// API version changelog:
//
// 1.0.0 - initial release
// 1.0.1 - Bugfix: IsFrameCapturing() was returning false for captures that were triggered
// by keypress or TriggerCapture, instead of Start/EndFrameCapture.
// 1.0.2 - Refactor: Renamed eRENDERDOC_Option_DebugDeviceMode to eRENDERDOC_Option_APIValidation
// 1.1.0 - Add feature: TriggerMultiFrameCapture(). Backwards compatible with 1.0.x since the new
// function pointer is added to the end of the struct, the original layout is identical
// 1.1.1 - Refactor: Renamed remote access to target control (to better disambiguate from remote
// replay/remote server concept in replay UI)
// 1.1.2 - Refactor: Renamed "log file" in function names to just capture, to clarify that these
// are captures and not debug logging files. This is the first API version in the v1.0
// branch.
// 1.2.0 - Added feature: SetCaptureFileComments() to add comments to a capture file that will be
// displayed in the UI program on load.
// 1.3.0 - Added feature: New capture option eRENDERDOC_Option_AllowUnsupportedVendorExtensions
// which allows users to opt-in to allowing unsupported vendor extensions to function.
// Should be used at the user's own risk.
// Refactor: Renamed eRENDERDOC_Option_VerifyMapWrites to
// eRENDERDOC_Option_VerifyBufferAccess, which now also controls initialisation to
// 0xdddddddd of uninitialised buffer contents.
// 1.4.0 - Added feature: DiscardFrameCapture() to discard a frame capture in progress and stop
// capturing without saving anything to disk.
// 1.4.1 - Refactor: Renamed Shutdown to RemoveHooks to better clarify what is happening
// 1.4.2 - Refactor: Renamed 'draws' to 'actions' in callstack capture option.
// 1.5.0 - Added feature: ShowReplayUI() to request that the replay UI show itself if connected
// 1.6.0 - Added feature: SetCaptureTitle() which can be used to set a title for a
// capture made with StartFrameCapture() or EndFrameCapture()
typedef struct RENDERDOC_API_1_6_0
{
pRENDERDOC_GetAPIVersion GetAPIVersion;
pRENDERDOC_SetCaptureOptionU32 SetCaptureOptionU32;
pRENDERDOC_SetCaptureOptionF32 SetCaptureOptionF32;
pRENDERDOC_GetCaptureOptionU32 GetCaptureOptionU32;
pRENDERDOC_GetCaptureOptionF32 GetCaptureOptionF32;
pRENDERDOC_SetFocusToggleKeys SetFocusToggleKeys;
pRENDERDOC_SetCaptureKeys SetCaptureKeys;
pRENDERDOC_GetOverlayBits GetOverlayBits;
pRENDERDOC_MaskOverlayBits MaskOverlayBits;
// Shutdown was renamed to RemoveHooks in 1.4.1.
// These unions allow old code to continue compiling without changes
union
{
pRENDERDOC_Shutdown Shutdown;
pRENDERDOC_RemoveHooks RemoveHooks;
};
pRENDERDOC_UnloadCrashHandler UnloadCrashHandler;
// Get/SetLogFilePathTemplate was renamed to Get/SetCaptureFilePathTemplate in 1.1.2.
// These unions allow old code to continue compiling without changes
union
{
// deprecated name
pRENDERDOC_SetLogFilePathTemplate SetLogFilePathTemplate;
// current name
pRENDERDOC_SetCaptureFilePathTemplate SetCaptureFilePathTemplate;
};
union
{
// deprecated name
pRENDERDOC_GetLogFilePathTemplate GetLogFilePathTemplate;
// current name
pRENDERDOC_GetCaptureFilePathTemplate GetCaptureFilePathTemplate;
};
pRENDERDOC_GetNumCaptures GetNumCaptures;
pRENDERDOC_GetCapture GetCapture;
pRENDERDOC_TriggerCapture TriggerCapture;
// IsRemoteAccessConnected was renamed to IsTargetControlConnected in 1.1.1.
// This union allows old code to continue compiling without changes
union
{
// deprecated name
pRENDERDOC_IsRemoteAccessConnected IsRemoteAccessConnected;
// current name
pRENDERDOC_IsTargetControlConnected IsTargetControlConnected;
};
pRENDERDOC_LaunchReplayUI LaunchReplayUI;
pRENDERDOC_SetActiveWindow SetActiveWindow;
pRENDERDOC_StartFrameCapture StartFrameCapture;
pRENDERDOC_IsFrameCapturing IsFrameCapturing;
pRENDERDOC_EndFrameCapture EndFrameCapture;
// new function in 1.1.0
pRENDERDOC_TriggerMultiFrameCapture TriggerMultiFrameCapture;
// new function in 1.2.0
pRENDERDOC_SetCaptureFileComments SetCaptureFileComments;
// new function in 1.4.0
pRENDERDOC_DiscardFrameCapture DiscardFrameCapture;
// new function in 1.5.0
pRENDERDOC_ShowReplayUI ShowReplayUI;
// new function in 1.6.0
pRENDERDOC_SetCaptureTitle SetCaptureTitle;
} RENDERDOC_API_1_6_0;
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_0;
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_1;
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_2;
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_0;
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_1;
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_2;
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_2_0;
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_3_0;
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_0;
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_1;
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_2;
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_5_0;
//////////////////////////////////////////////////////////////////////////////////////////////////
// RenderDoc API entry point
//
// This entry point can be obtained via GetProcAddress/dlsym if RenderDoc is available.
//
// The name is the same as the typedef - "RENDERDOC_GetAPI"
//
// This function is not thread safe, and should not be called on multiple threads at once.
// Ideally, call this once as early as possible in your application's startup, before doing
// any API work, since some configuration functionality etc has to be done also before
// initialising any APIs.
//
// Parameters:
// version is a single value from the RENDERDOC_Version above.
//
// outAPIPointers will be filled out with a pointer to the corresponding struct of function
// pointers.
//
// Returns:
// 1 - if the outAPIPointers has been filled with a pointer to the API struct requested
// 0 - if the requested version is not supported or the arguments are invalid.
//
typedef int(RENDERDOC_CC *pRENDERDOC_GetAPI)(RENDERDOC_Version version, void **outAPIPointers);
#ifdef __cplusplus
} // extern "C"
#endif

View File

@@ -7,7 +7,7 @@ set(INC
)
set(INC_SYS
${X11_X11_INCLUDE_PATH}
)
set(SRC

View File

@@ -1,5 +1,6 @@
Project: TinyGLTF
URL: https://github.com/syoyo/tinygltf
License: MIT
Upstream version: 2.8.3, 84a83d39f55d
Local modifications: None
Upstream version: 2.5.0, 19a41d20ec0
Local modifications:
* Silence "enum value not handled in switch" warnings due to JSON dependency.

BIN
extern/tinygltf/patches/TinyGLTF.diff vendored Normal file

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@@ -67,10 +67,6 @@ if(UNIX AND NOT APPLE)
add_subdirectory(libc_compat)
endif()
if (WITH_RENDERDOC)
add_subdirectory(renderdoc_dynload)
endif()
if(UNIX AND NOT APPLE)
# Important this comes after "ghost" as it uses includes defined by GHOST's CMake.
if(WITH_GHOST_WAYLAND AND WITH_GHOST_WAYLAND_DYNLOAD)

View File

@@ -41,7 +41,7 @@ static const char *FRAGMENT_SHADER =
"void main()\n"
"{\n"
" vec4 rgba = texture(image_texture, texCoord_interp);\n"
/* Hard-coded Rec.709 gamma, should use OpenColorIO eventually. */
/* Harcoded Rec.709 gamma, should use OpenColorIO eventually. */
" fragColor = pow(rgba, vec4(0.45, 0.45, 0.45, 1.0));\n"
"}\n\0";

View File

@@ -12,7 +12,6 @@ from bpy.props import (
PointerProperty,
StringProperty,
)
from bpy.app.translations import pgettext_iface as iface_
from math import pi
@@ -1665,51 +1664,30 @@ class CyclesPreferences(bpy.types.AddonPreferences):
col.label(text="No compatible GPUs found for Cycles", icon='INFO')
if device_type == 'CUDA':
compute_capability = "3.0"
col.label(text=iface_("Requires NVIDIA GPU with compute capability %s") % compute_capability,
icon='BLANK1', translate=False)
col.label(text="Requires NVIDIA GPU with compute capability 3.0", icon='BLANK1')
elif device_type == 'OPTIX':
compute_capability = "5.0"
driver_version = "470"
col.label(text=iface_("Requires NVIDIA GPU with compute capability %s") % compute_capability,
icon='BLANK1', translate=False)
col.label(text=iface_("and NVIDIA driver version %s or newer") % driver_version,
icon='BLANK1', translate=False)
col.label(text="Requires NVIDIA GPU with compute capability 5.0", icon='BLANK1')
col.label(text="and NVIDIA driver version 470 or newer", icon='BLANK1')
elif device_type == 'HIP':
if True:
col.label(text="HIP temporarily disabled due to compiler bugs", icon='BLANK1')
else:
import sys
if sys.platform[:3] == "win":
driver_version = "21.Q4"
col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
col.label(text=iface_("and AMD Radeon Pro %s driver or newer") % driver_version,
icon='BLANK1', translate=False)
elif sys.platform.startswith("linux"):
driver_version = "22.10"
col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
col.label(text=iface_("and AMD driver version %s or newer") % driver_version, icon='BLANK1',
translate=False)
import sys
if sys.platform[:3] == "win":
col.label(text="Requires AMD GPU with RDNA architecture", icon='BLANK1')
col.label(text="and AMD Radeon Pro 21.Q4 driver or newer", icon='BLANK1')
elif sys.platform.startswith("linux"):
col.label(text="Requires AMD GPU with RDNA architecture", icon='BLANK1')
col.label(text="and AMD driver version 22.10 or newer", icon='BLANK1')
elif device_type == 'ONEAPI':
import sys
if sys.platform.startswith("win"):
driver_version = "101.4032"
col.label(text="Requires Intel GPU with Xe-HPG architecture", icon='BLANK1')
col.label(text=iface_("and Windows driver version %s or newer") % driver_version,
icon='BLANK1', translate=False)
col.label(text="and Windows driver version 101.4032 or newer", icon='BLANK1')
elif sys.platform.startswith("linux"):
driver_version = "1.3.24931"
col.label(text="Requires Intel GPU with Xe-HPG architecture and", icon='BLANK1')
col.label(text=iface_(" - intel-level-zero-gpu version %s or newer") % driver_version,
icon='BLANK1', translate=False)
col.label(text=" - intel-level-zero-gpu version 1.3.24931 or newer", icon='BLANK1')
col.label(text=" - oneAPI Level-Zero Loader", icon='BLANK1')
elif device_type == 'METAL':
silicon_mac_version = "12.2"
amd_mac_version = "12.3"
col.label(text=iface_("Requires Apple Silicon with macOS %s or newer") % silicon_mac_version,
icon='BLANK1', translate=False)
col.label(text=iface_("or AMD with macOS %s or newer") % amd_mac_version, icon='BLANK1',
translate=False)
col.label(text="Requires Apple Silicon with macOS 12.2 or newer", icon='BLANK1')
col.label(text="or AMD with macOS 12.3 or newer", icon='BLANK1')
return
for device in devices:
@@ -1719,8 +1697,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
.replace('(TM)', unicodedata.lookup('TRADE MARK SIGN'))
.replace('(tm)', unicodedata.lookup('TRADE MARK SIGN'))
.replace('(R)', unicodedata.lookup('REGISTERED SIGN'))
.replace('(C)', unicodedata.lookup('COPYRIGHT SIGN')),
translate=False
.replace('(C)', unicodedata.lookup('COPYRIGHT SIGN'))
)
def draw_impl(self, layout, context):
@@ -1745,21 +1722,19 @@ class CyclesPreferences(bpy.types.AddonPreferences):
row.prop(self, "peer_memory")
if compute_device_type == 'METAL':
import platform
import re
is_navi_2 = False
import platform, re
isNavi2 = False
for device in devices:
if re.search(r"((RX)|(Pro)|(PRO))\s+W?6\d00X", device.name):
is_navi_2 = True
break
obj = re.search("((RX)|(Pro)|(PRO))\s+W?6\d00X",device.name)
if obj:
isNavi2 = True
# MetalRT only works on Apple Silicon and Navi2.
is_arm64 = platform.machine() == 'arm64'
if is_arm64 or is_navi_2:
# MetalRT only works on Apple Silicon and Navi2
if platform.machine() == 'arm64' or isNavi2:
col = layout.column()
col.use_property_split = True
# Kernel specialization is only supported on Apple Silicon
if is_arm64:
if platform.machine() == 'arm64':
col.prop(self, "kernel_optimization_level")
col.prop(self, "use_metalrt")

View File

@@ -20,7 +20,7 @@ class CyclesPresetPanel(PresetPanel, Panel):
@staticmethod
def post_cb(context):
# Modify an arbitrary built-in scene property to force a depsgraph
# update, because add-on properties don't. (see #62325)
# update, because add-on properties don't. (see T62325)
render = context.scene.render
render.filter_size = render.filter_size

View File

@@ -54,10 +54,44 @@ int BlenderDisplayShader::get_tex_coord_attrib_location()
/* --------------------------------------------------------------------
* BlenderFallbackDisplayShader.
*/
/* TODO move shaders to standalone .glsl file. */
static const char *FALLBACK_VERTEX_SHADER =
"uniform vec2 fullscreen;\n"
"in vec2 texCoord;\n"
"in vec2 pos;\n"
"out vec2 texCoord_interp;\n"
"\n"
"vec2 normalize_coordinates()\n"
"{\n"
" return (vec2(2.0) * (pos / fullscreen)) - vec2(1.0);\n"
"}\n"
"\n"
"void main()\n"
"{\n"
" gl_Position = vec4(normalize_coordinates(), 0.0, 1.0);\n"
" texCoord_interp = texCoord;\n"
"}\n\0";
static const char *FALLBACK_FRAGMENT_SHADER =
"uniform sampler2D image_texture;\n"
"in vec2 texCoord_interp;\n"
"out vec4 fragColor;\n"
"\n"
"void main()\n"
"{\n"
" fragColor = texture(image_texture, texCoord_interp);\n"
"}\n\0";
static GPUShader *compile_fallback_shader(void)
{
/* NOTE: Compilation errors are logged to console. */
GPUShader *shader = GPU_shader_create_from_info_name("gpu_shader_cycles_display_fallback");
GPUShader *shader = GPU_shader_create(FALLBACK_VERTEX_SHADER,
FALLBACK_FRAGMENT_SHADER,
nullptr,
nullptr,
nullptr,
"FallbackCyclesBlitShader");
return shader;
}
@@ -71,12 +105,11 @@ GPUShader *BlenderFallbackDisplayShader::bind(int width, int height)
/* Bind shader now to enable uniform assignment. */
GPU_shader_bind(shader_program_);
int slot = 0;
GPU_shader_uniform_int_ex(shader_program_, image_texture_location_, 1, 1, &slot);
GPU_shader_uniform_int(shader_program_, image_texture_location_, 0);
float size[2];
size[0] = width;
size[1] = height;
GPU_shader_uniform_float_ex(shader_program_, fullscreen_location_, 2, 1, size);
GPU_shader_uniform_vector(shader_program_, fullscreen_location_, 2, 1, size);
return shader_program_;
}
@@ -216,13 +249,8 @@ class DisplayGPUTexture {
height = texture_height;
/* Texture must have a minimum size of 1x1. */
gpu_texture = GPU_texture_create_2d("CyclesBlitTexture",
max(width, 1),
max(height, 1),
1,
GPU_RGBA16F,
GPU_TEXTURE_USAGE_GENERAL,
nullptr);
gpu_texture = GPU_texture_create_2d(
"CyclesBlitTexture", max(width, 1), max(height, 1), 1, GPU_RGBA16F, nullptr);
if (!gpu_texture) {
LOG(ERROR) << "Error creating texture.";
@@ -705,14 +733,14 @@ static void draw_tile(const float2 &zoom,
const float zoomed_height = draw_tile.params.size.y * zoom.y;
if (texture.width != draw_tile.params.size.x || texture.height != draw_tile.params.size.y) {
/* Resolution divider is different from 1, force nearest interpolation. */
GPU_texture_bind_ex(texture.gpu_texture, GPU_SAMPLER_DEFAULT, 0);
GPU_texture_bind_ex(texture.gpu_texture, GPU_SAMPLER_DEFAULT, 0, false);
}
else if (zoomed_width - draw_tile.params.size.x > 0.5f ||
zoomed_height - draw_tile.params.size.y > 0.5f) {
GPU_texture_bind_ex(texture.gpu_texture, GPU_SAMPLER_DEFAULT, 0);
GPU_texture_bind_ex(texture.gpu_texture, GPU_SAMPLER_DEFAULT, 0, false);
}
else {
GPU_texture_bind_ex(texture.gpu_texture, GPU_SAMPLER_FILTER, 0);
GPU_texture_bind_ex(texture.gpu_texture, GPU_SAMPLER_FILTER, 0, false);
}
/* Draw at the parameters for which the texture has been updated for. This allows to always draw

View File

@@ -20,7 +20,7 @@ BlenderImageLoader::BlenderImageLoader(BL::Image b_image,
: b_image(b_image),
frame(frame),
tile_number(tile_number),
/* Don't free cache for preview render to avoid race condition from #93560, to be fixed
/* Don't free cache for preview render to avoid race condition from T93560, to be fixed
* properly later as we are close to release. */
free_cache(!is_preview_render && !b_image.has_data())
{
@@ -72,7 +72,7 @@ bool BlenderImageLoader::load_metadata(const ImageDeviceFeatures &, ImageMetaDat
metadata.colorspace = u_colorspace_raw;
}
else {
/* In some cases (e.g. #94135), the colorspace setting in Blender gets updated as part of the
/* In some cases (e.g. T94135), the colorspace setting in Blender gets updated as part of the
* metadata queries in this function, so update the colorspace setting here. */
PointerRNA colorspace_ptr = b_image.colorspace_settings().ptr;
metadata.colorspace = get_enum_identifier(colorspace_ptr, "name");

View File

@@ -24,7 +24,7 @@ void BlenderSync::sync_light(BL::Object &b_parent,
Light *light = light_map.find(key);
/* Check if the transform was modified, in case a linked collection is moved we do not get a
* specific depsgraph update (#88515). This also mimics the behavior for Objects. */
* specific depsgraph update (T88515). This also mimics the behavior for Objects. */
const bool tfm_updated = (light && light->get_tfm() != tfm);
/* Update if either object or light data changed. */

View File

@@ -818,23 +818,6 @@ static void attr_create_pointiness(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh, b
}
}
static std::optional<BL::IntAttribute> find_corner_vert_attribute(BL::Mesh b_mesh)
{
for (BL::Attribute &b_attribute : b_mesh.attributes) {
if (b_attribute.domain() != BL::Attribute::domain_CORNER) {
continue;
}
if (b_attribute.data_type() != BL::Attribute::data_type_INT) {
continue;
}
if (b_attribute.name() != ".corner_vert") {
continue;
}
return BL::IntAttribute{b_attribute};
}
return std::nullopt;
}
/* The Random Per Island attribute is a random float associated with each
* connected component (island) of the mesh. The attribute is computed by
* first classifying the vertices into different sets using a Disjoint Set
@@ -881,11 +864,11 @@ static void attr_create_random_per_island(Scene *scene,
else {
if (polys_num != 0) {
const MPoly *polys = static_cast<const MPoly *>(b_mesh.polygons[0].ptr.data);
BL::IntAttribute corner_verts = *find_corner_vert_attribute(b_mesh);
const MLoop *loops = static_cast<const MLoop *>(b_mesh.loops[0].ptr.data);
for (int i = 0; i < polys_num; i++) {
const MPoly &b_poly = polys[i];
const int vert = corner_verts.data[b_poly.loopstart].value();
data[i] = hash_uint_to_float(vertices_sets.find(vert));
const MLoop &b_loop = loops[b_poly.loopstart];
data[i] = hash_uint_to_float(vertices_sets.find(b_loop.v));
}
}
}
@@ -910,23 +893,6 @@ static std::optional<BL::IntAttribute> find_material_index_attribute(BL::Mesh b_
return std::nullopt;
}
static std::optional<BL::BoolAttribute> find_sharp_face_attribute(BL::Mesh b_mesh)
{
for (BL::Attribute &b_attribute : b_mesh.attributes) {
if (b_attribute.domain() != BL::Attribute::domain_FACE) {
continue;
}
if (b_attribute.data_type() != BL::Attribute::data_type_BOOLEAN) {
continue;
}
if (b_attribute.name() != "sharp_face") {
continue;
}
return BL::BoolAttribute{b_attribute};
}
return std::nullopt;
}
static void create_mesh(Scene *scene,
Mesh *mesh,
BL::Mesh &b_mesh,
@@ -1017,22 +983,16 @@ static void create_mesh(Scene *scene,
return 0;
};
std::optional<BL::BoolAttribute> sharp_faces = find_sharp_face_attribute(b_mesh);
auto get_face_sharp = [&](const int poly_index) -> bool {
if (sharp_faces) {
return sharp_faces->data[poly_index].value();
}
return false;
};
/* create faces */
const MPoly *polys = static_cast<const MPoly *>(b_mesh.polygons[0].ptr.data);
if (!subdivision) {
for (BL::MeshLoopTriangle &t : b_mesh.loop_triangles) {
const int poly_index = t.polygon_index();
const MPoly &b_poly = polys[poly_index];
int3 vi = get_int3(t.vertices());
int shader = get_material_index(poly_index);
bool smooth = !get_face_sharp(poly_index) || use_loop_normals;
bool smooth = (b_poly.flag & ME_SMOOTH) || use_loop_normals;
if (use_loop_normals) {
BL::Array<float, 9> loop_normals = t.split_normals();
@@ -1052,19 +1012,19 @@ static void create_mesh(Scene *scene,
else {
vector<int> vi;
const MPoly *polys = static_cast<const MPoly *>(b_mesh.polygons[0].ptr.data);
std::optional<BL::IntAttribute> corner_verts = find_corner_vert_attribute(b_mesh);
const MLoop *loops = static_cast<const MLoop *>(b_mesh.loops[0].ptr.data);
for (int i = 0; i < numfaces; i++) {
const MPoly &b_poly = polys[i];
int n = b_poly.totloop;
int shader = get_material_index(i);
bool smooth = !get_face_sharp(i) || use_loop_normals;
bool smooth = (b_poly.flag & ME_SMOOTH) || use_loop_normals;
vi.resize(n);
for (int i = 0; i < n; i++) {
/* NOTE: Autosmooth is already taken care about. */
vi[i] = corner_verts->data[b_poly.loopstart + i].value();
vi[i] = loops[b_poly.loopstart + i].v;
}
/* create subd faces */

View File

@@ -222,10 +222,7 @@ static void export_pointcloud_motion(PointCloud *pointcloud,
/* Export motion points. */
const int num_points = pointcloud->num_points();
// Point cloud attributes are stored as float4 with the radius
// in the w element. This is explict now as float3 is no longer
// interchangeable with float4 as it is packed now.
float4 *mP = attr_mP->data_float4() + motion_step * num_points;
float3 *mP = attr_mP->data_float3() + motion_step * num_points;
bool have_motion = false;
const array<float3> &pointcloud_points = pointcloud->get_points();
@@ -234,9 +231,11 @@ static void export_pointcloud_motion(PointCloud *pointcloud,
std::optional<BL::FloatAttribute> b_attr_radius = find_radius_attribute(b_pointcloud);
for (int i = 0; i < std::min(num_points, b_points_num); i++) {
const float3 P = get_float3(b_attr_position.data[i].vector());
const float3 co = get_float3(b_attr_position.data[i].vector());
const float radius = b_attr_radius ? b_attr_radius->data[i].value() : 0.01f;
mP[i] = make_float4(P.x, P.y, P.z, radius);
float3 P = co;
P.w = radius;
mP[i] = P;
have_motion = have_motion || (P != pointcloud_points[i]);
}

View File

@@ -94,7 +94,7 @@ void python_thread_state_restore(void **python_thread_state)
*python_thread_state = NULL;
}
static const char *PyC_UnicodeAsBytes(PyObject *py_str, PyObject **coerce)
static const char *PyC_UnicodeAsByte(PyObject *py_str, PyObject **coerce)
{
const char *result = PyUnicode_AsUTF8(py_str);
if (result) {
@@ -131,8 +131,8 @@ static PyObject *init_func(PyObject * /*self*/, PyObject *args)
}
PyObject *path_coerce = nullptr, *user_path_coerce = nullptr;
path_init(PyC_UnicodeAsBytes(path, &path_coerce),
PyC_UnicodeAsBytes(user_path, &user_path_coerce));
path_init(PyC_UnicodeAsByte(path, &path_coerce),
PyC_UnicodeAsByte(user_path, &user_path_coerce));
Py_XDECREF(path_coerce);
Py_XDECREF(user_path_coerce);

View File

@@ -404,7 +404,7 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
* point we know that we've got everything to render current view layer.
*/
/* At the moment we only free if we are not doing multi-view
* (or if we are rendering the last view). See #58142/D4239 for discussion.
* (or if we are rendering the last view). See T58142/D4239 for discussion.
*/
if (view_index == num_views - 1) {
free_blender_memory_if_possible();

View File

@@ -349,7 +349,8 @@ void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer, bool background)
bool use_light_tree = get_boolean(cscene, "use_light_tree");
integrator->set_use_light_tree(use_light_tree);
integrator->set_light_sampling_threshold(get_float(cscene, "light_sampling_threshold"));
integrator->set_light_sampling_threshold(
(use_light_tree) ? 0.0f : get_float(cscene, "light_sampling_threshold"));
if (integrator->use_light_tree_is_modified()) {
scene->light_manager->tag_update(scene, LightManager::UPDATE_ALL);
@@ -765,7 +766,7 @@ void BlenderSync::free_data_after_sync(BL::Depsgraph &b_depsgraph)
(BlenderSession::headless || is_interface_locked) &&
/* Baking re-uses the depsgraph multiple times, clearing crashes
* reading un-evaluated mesh data which isn't aligned with the
* geometry we're baking, see #71012. */
* geometry we're baking, see T71012. */
!scene->bake_manager->get_baking() &&
/* Persistent data must main caches for performance and correctness. */
!is_persistent_data;

View File

@@ -180,9 +180,9 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox &center, Hair *hair
curve.bounds_grow(k, &hair->get_curve_keys()[0], curve_radius, bounds);
const size_t num_keys = hair->get_curve_keys().size();
const size_t num_steps = hair->get_motion_steps();
const float4 *key_steps = curve_attr_mP->data_float4();
const float3 *key_steps = curve_attr_mP->data_float3();
for (size_t step = 0; step < num_steps - 1; step++) {
curve.bounds_grow(k, key_steps + step * num_keys, bounds);
curve.bounds_grow(k, key_steps + step * num_keys, curve_radius, bounds);
}
if (bounds.valid()) {
int packed_type = PRIMITIVE_PACK_SEGMENT(primitive_type, k);
@@ -200,7 +200,7 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox &center, Hair *hair
const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
const size_t num_steps = hair->get_motion_steps();
const float3 *curve_keys = &hair->get_curve_keys()[0];
const float4 *key_steps = curve_attr_mP->data_float4();
const float3 *key_steps = curve_attr_mP->data_float3();
const size_t num_keys = hair->get_curve_keys().size();
/* Calculate bounding box of the previous time step.
* Will be reused later to avoid duplicated work on

View File

@@ -254,15 +254,20 @@ void BVHEmbree::add_triangles(const Object *ob, const Mesh *mesh, int i)
rtcSetGeometryBuildQuality(geom_id, build_quality);
rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
const int *triangles = mesh->get_triangles().data();
rtcSetSharedGeometryBuffer(geom_id,
RTC_BUFFER_TYPE_INDEX,
0,
RTC_FORMAT_UINT3,
triangles,
0,
sizeof(int) * 3,
num_triangles);
unsigned *rtc_indices = (unsigned *)rtcSetNewGeometryBuffer(
geom_id, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT3, sizeof(int) * 3, num_triangles);
assert(rtc_indices);
if (!rtc_indices) {
VLOG_WARNING << "Embree could not create new geometry buffer for mesh " << mesh->name.c_str()
<< ".\n";
return;
}
for (size_t j = 0; j < num_triangles; ++j) {
Mesh::Triangle t = mesh->get_triangle(j);
rtc_indices[j * 3] = t.v[0];
rtc_indices[j * 3 + 1] = t.v[1];
rtc_indices[j * 3 + 2] = t.v[2];
}
set_tri_vertex_buffer(geom_id, mesh, false);
@@ -304,46 +309,28 @@ void BVHEmbree::set_tri_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh, con
verts = &attr_mP->data_float3()[t_ * num_verts];
}
float *rtc_verts = (update) ?
(float *)rtcGetGeometryBufferData(geom_id, RTC_BUFFER_TYPE_VERTEX, t) :
(float *)rtcSetNewGeometryBuffer(geom_id,
RTC_BUFFER_TYPE_VERTEX,
t,
RTC_FORMAT_FLOAT3,
sizeof(float) * 3,
num_verts + 1);
assert(rtc_verts);
if (rtc_verts) {
for (size_t j = 0; j < num_verts; ++j) {
rtc_verts[0] = verts[j].x;
rtc_verts[1] = verts[j].y;
rtc_verts[2] = verts[j].z;
rtc_verts += 3;
}
}
if (update) {
rtcUpdateGeometryBuffer(geom_id, RTC_BUFFER_TYPE_VERTEX, t);
}
else {
rtcSetSharedGeometryBuffer(geom_id,
RTC_BUFFER_TYPE_VERTEX,
t,
RTC_FORMAT_FLOAT3,
verts,
0,
sizeof(float3),
num_verts + 1);
}
}
}
/**
* Packs the hair motion curve data control variables (CVs) into float4s as [x y z radius]
*/
template<typename T>
void pack_motion_verts(size_t num_curves,
const Hair *hair,
const T *verts,
const float *curve_radius,
float4 *rtc_verts)
{
for (size_t j = 0; j < num_curves; ++j) {
Hair::Curve c = hair->get_curve(j);
int fk = c.first_key;
int k = 1;
for (; k < c.num_keys + 1; ++k, ++fk) {
rtc_verts[k].x = verts[fk].x;
rtc_verts[k].y = verts[fk].y;
rtc_verts[k].z = verts[fk].z;
rtc_verts[k].w = curve_radius[fk];
}
/* Duplicate Embree's Catmull-Rom spline CVs at the start and end of each curve. */
rtc_verts[0] = rtc_verts[1];
rtc_verts[k] = rtc_verts[k - 1];
rtc_verts += c.num_keys + 2;
}
}
@@ -373,10 +360,15 @@ void BVHEmbree::set_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair, c
const int t_mid = (num_motion_steps - 1) / 2;
const float *curve_radius = &hair->get_curve_radius()[0];
for (int t = 0; t < num_motion_steps; ++t) {
// As float4 and float3 are no longer interchangeable the 2 types need to be
// handled separately. Attributes are float4s where the radius is stored in w and
// the middle motion vector is from the mesh points which are stored float3s with
// the radius stored in another array.
const float3 *verts;
if (t == t_mid || attr_mP == NULL) {
verts = &hair->get_curve_keys()[0];
}
else {
int t_ = (t > t_mid) ? (t - 1) : t;
verts = &attr_mP->data_float3()[t_ * num_keys];
}
float4 *rtc_verts = (update) ? (float4 *)rtcGetGeometryBufferData(
geom_id, RTC_BUFFER_TYPE_VERTEX, t) :
(float4 *)rtcSetNewGeometryBuffer(geom_id,
@@ -389,14 +381,18 @@ void BVHEmbree::set_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair, c
assert(rtc_verts);
if (rtc_verts) {
const size_t num_curves = hair->num_curves();
if (t == t_mid || attr_mP == NULL) {
const float3 *verts = &hair->get_curve_keys()[0];
pack_motion_verts<float3>(num_curves, hair, verts, curve_radius, rtc_verts);
}
else {
int t_ = (t > t_mid) ? (t - 1) : t;
const float4 *verts = &attr_mP->data_float4()[t_ * num_keys];
pack_motion_verts<float4>(num_curves, hair, verts, curve_radius, rtc_verts);
for (size_t j = 0; j < num_curves; ++j) {
Hair::Curve c = hair->get_curve(j);
int fk = c.first_key;
int k = 1;
for (; k < c.num_keys + 1; ++k, ++fk) {
rtc_verts[k] = float3_to_float4(verts[fk]);
rtc_verts[k].w = curve_radius[fk];
}
/* Duplicate Embree's Catmull-Rom spline CVs at the start and end of each curve. */
rtc_verts[0] = rtc_verts[1];
rtc_verts[k] = rtc_verts[k - 1];
rtc_verts += c.num_keys + 2;
}
}
@@ -406,20 +402,6 @@ void BVHEmbree::set_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair, c
}
}
/**
* Pack the motion points into a float4 as [x y z radius]
*/
template<typename T>
void pack_motion_points(size_t num_points, const T *verts, const float *radius, float4 *rtc_verts)
{
for (size_t j = 0; j < num_points; ++j) {
rtc_verts[j].x = verts[j].x;
rtc_verts[j].y = verts[j].y;
rtc_verts[j].z = verts[j].z;
rtc_verts[j].w = radius[j];
}
}
void BVHEmbree::set_point_vertex_buffer(RTCGeometry geom_id,
const PointCloud *pointcloud,
const bool update)
@@ -439,10 +421,15 @@ void BVHEmbree::set_point_vertex_buffer(RTCGeometry geom_id,
const int t_mid = (num_motion_steps - 1) / 2;
const float *radius = pointcloud->get_radius().data();
for (int t = 0; t < num_motion_steps; ++t) {
// As float4 and float3 are no longer interchangeable the 2 types need to be
// handled separately. Attributes are float4s where the radius is stored in w and
// the middle motion vector is from the mesh points which are stored float3s with
// the radius stored in another array.
const float3 *verts;
if (t == t_mid || attr_mP == NULL) {
verts = pointcloud->get_points().data();
}
else {
int t_ = (t > t_mid) ? (t - 1) : t;
verts = &attr_mP->data_float3()[t_ * num_points];
}
float4 *rtc_verts = (update) ? (float4 *)rtcGetGeometryBufferData(
geom_id, RTC_BUFFER_TYPE_VERTEX, t) :
(float4 *)rtcSetNewGeometryBuffer(geom_id,
@@ -454,14 +441,9 @@ void BVHEmbree::set_point_vertex_buffer(RTCGeometry geom_id,
assert(rtc_verts);
if (rtc_verts) {
if (t == t_mid || attr_mP == NULL) {
const float3 *verts = pointcloud->get_points().data();
pack_motion_points<float3>(num_points, verts, radius, rtc_verts);
}
else {
int t_ = (t > t_mid) ? (t - 1) : t;
const float4 *verts = &attr_mP->data_float4()[t_ * num_points];
pack_motion_points<float4>(num_points, verts, radius, rtc_verts);
for (size_t j = 0; j < num_points; ++j) {
rtc_verts[j] = float3_to_float4(verts[j]);
rtc_verts[j].w = radius[j];
}
}

View File

@@ -35,7 +35,7 @@ struct BVHReferenceCompare {
/* Compare two references.
*
* Returns value is similar to return value of `strcmp()`.
* Returns value is similar to return value of strcmp().
*/
__forceinline int compare(const BVHReference &ra, const BVHReference &rb) const
{

View File

@@ -42,15 +42,12 @@ endif()
###########################################################################
if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
set(WITH_CYCLES_HIP_BINARIES OFF)
message(STATUS "HIP temporarily disabled due to compiler bugs")
find_package(HIP)
set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES)
# find_package(HIP)
# set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES)
# if(HIP_FOUND)
# message(STATUS "Found HIP ${HIP_HIPCC_EXECUTABLE} (${HIP_VERSION})")
# endif()
if(HIP_FOUND)
message(STATUS "Found HIP ${HIP_HIPCC_EXECUTABLE} (${HIP_VERSION})")
endif()
endif()
if(NOT WITH_HIP_DYNLOAD)

View File

@@ -53,12 +53,8 @@ void CUDADevice::set_error(const string &error)
}
CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
: GPUDevice(info, stats, profiler)
: Device(info, stats, profiler), texture_info(this, "texture_info", MEM_GLOBAL)
{
/* Verify that base class types can be used with specific backend types */
static_assert(sizeof(texMemObject) == sizeof(CUtexObject));
static_assert(sizeof(arrayMemObject) == sizeof(CUarray));
first_error = true;
cuDevId = info.num;
@@ -69,6 +65,12 @@ CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
need_texture_info = false;
device_texture_headroom = 0;
device_working_headroom = 0;
move_texture_to_host = false;
map_host_limit = 0;
map_host_used = 0;
can_map_host = 0;
pitch_alignment = 0;
/* Initialize CUDA. */
@@ -89,9 +91,8 @@ CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
/* CU_CTX_MAP_HOST for mapping host memory when out of device memory.
* CU_CTX_LMEM_RESIZE_TO_MAX for reserving local memory ahead of render,
* so we can predict which memory to map to host. */
int value;
cuda_assert(cuDeviceGetAttribute(&value, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuDevice));
can_map_host = value != 0;
cuda_assert(
cuDeviceGetAttribute(&can_map_host, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuDevice));
cuda_assert(cuDeviceGetAttribute(
&pitch_alignment, CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, cuDevice));
@@ -498,56 +499,311 @@ void CUDADevice::reserve_local_memory(const uint kernel_features)
# endif
}
void CUDADevice::get_device_memory_info(size_t &total, size_t &free)
void CUDADevice::init_host_memory()
{
/* Limit amount of host mapped memory, because allocating too much can
* cause system instability. Leave at least half or 4 GB of system
* memory free, whichever is smaller. */
size_t default_limit = 4 * 1024 * 1024 * 1024LL;
size_t system_ram = system_physical_ram();
if (system_ram > 0) {
if (system_ram / 2 > default_limit) {
map_host_limit = system_ram - default_limit;
}
else {
map_host_limit = system_ram / 2;
}
}
else {
VLOG_WARNING << "Mapped host memory disabled, failed to get system RAM";
map_host_limit = 0;
}
/* Amount of device memory to keep is free after texture memory
* and working memory allocations respectively. We set the working
* memory limit headroom lower so that some space is left after all
* texture memory allocations. */
device_working_headroom = 32 * 1024 * 1024LL; // 32MB
device_texture_headroom = 128 * 1024 * 1024LL; // 128MB
VLOG_INFO << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
<< " bytes. (" << string_human_readable_size(map_host_limit) << ")";
}
void CUDADevice::load_texture_info()
{
if (need_texture_info) {
/* Unset flag before copying, so this does not loop indefinitely if the copy below calls
* into 'move_textures_to_host' (which calls 'load_texture_info' again). */
need_texture_info = false;
texture_info.copy_to_device();
}
}
void CUDADevice::move_textures_to_host(size_t size, bool for_texture)
{
/* Break out of recursive call, which can happen when moving memory on a multi device. */
static bool any_device_moving_textures_to_host = false;
if (any_device_moving_textures_to_host) {
return;
}
/* Signal to reallocate textures in host memory only. */
move_texture_to_host = true;
while (size > 0) {
/* Find suitable memory allocation to move. */
device_memory *max_mem = NULL;
size_t max_size = 0;
bool max_is_image = false;
thread_scoped_lock lock(cuda_mem_map_mutex);
foreach (CUDAMemMap::value_type &pair, cuda_mem_map) {
device_memory &mem = *pair.first;
CUDAMem *cmem = &pair.second;
/* Can only move textures allocated on this device (and not those from peer devices).
* And need to ignore memory that is already on the host. */
if (!mem.is_resident(this) || cmem->use_mapped_host) {
continue;
}
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) &&
(&mem != &texture_info);
bool is_image = is_texture && (mem.data_height > 1);
/* Can't move this type of memory. */
if (!is_texture || cmem->array) {
continue;
}
/* For other textures, only move image textures. */
if (for_texture && !is_image) {
continue;
}
/* Try to move largest allocation, prefer moving images. */
if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
max_is_image = is_image;
max_size = mem.device_size;
max_mem = &mem;
}
}
lock.unlock();
/* Move to host memory. This part is mutex protected since
* multiple CUDA devices could be moving the memory. The
* first one will do it, and the rest will adopt the pointer. */
if (max_mem) {
VLOG_WORK << "Move memory from device to host: " << max_mem->name;
static thread_mutex move_mutex;
thread_scoped_lock lock(move_mutex);
any_device_moving_textures_to_host = true;
/* Potentially need to call back into multi device, so pointer mapping
* and peer devices are updated. This is also necessary since the device
* pointer may just be a key here, so cannot be accessed and freed directly.
* Unfortunately it does mean that memory is reallocated on all other
* devices as well, which is potentially dangerous when still in use (since
* a thread rendering on another devices would only be caught in this mutex
* if it so happens to do an allocation at the same time as well. */
max_mem->device_copy_to();
size = (max_size >= size) ? 0 : size - max_size;
any_device_moving_textures_to_host = false;
}
else {
break;
}
}
/* Unset flag before texture info is reloaded, since it should stay in device memory. */
move_texture_to_host = false;
/* Update texture info array with new pointers. */
load_texture_info();
}
CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_padding)
{
CUDAContextScope scope(this);
CUdeviceptr device_pointer = 0;
size_t size = mem.memory_size() + pitch_padding;
CUresult mem_alloc_result = CUDA_ERROR_OUT_OF_MEMORY;
const char *status = "";
/* First try allocating in device memory, respecting headroom. We make
* an exception for texture info. It is small and frequently accessed,
* so treat it as working memory.
*
* If there is not enough room for working memory, we will try to move
* textures to host memory, assuming the performance impact would have
* been worse for working memory. */
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && (&mem != &texture_info);
bool is_image = is_texture && (mem.data_height > 1);
size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
size_t total = 0, free = 0;
cuMemGetInfo(&free, &total);
/* Move textures to host memory if needed. */
if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
move_textures_to_host(size + headroom - free, is_texture);
cuMemGetInfo(&free, &total);
}
/* Allocate in device memory. */
if (!move_texture_to_host && (size + headroom) < free) {
mem_alloc_result = cuMemAlloc(&device_pointer, size);
if (mem_alloc_result == CUDA_SUCCESS) {
status = " in device memory";
}
}
/* Fall back to mapped host memory if needed and possible. */
void *shared_pointer = 0;
if (mem_alloc_result != CUDA_SUCCESS && can_map_host && mem.type != MEM_DEVICE_ONLY) {
if (mem.shared_pointer) {
/* Another device already allocated host memory. */
mem_alloc_result = CUDA_SUCCESS;
shared_pointer = mem.shared_pointer;
}
else if (map_host_used + size < map_host_limit) {
/* Allocate host memory ourselves. */
mem_alloc_result = cuMemHostAlloc(
&shared_pointer, size, CU_MEMHOSTALLOC_DEVICEMAP | CU_MEMHOSTALLOC_WRITECOMBINED);
assert((mem_alloc_result == CUDA_SUCCESS && shared_pointer != 0) ||
(mem_alloc_result != CUDA_SUCCESS && shared_pointer == 0));
}
if (mem_alloc_result == CUDA_SUCCESS) {
cuda_assert(cuMemHostGetDevicePointer_v2(&device_pointer, shared_pointer, 0));
map_host_used += size;
status = " in host memory";
}
}
if (mem_alloc_result != CUDA_SUCCESS) {
if (mem.type == MEM_DEVICE_ONLY) {
status = " failed, out of device memory";
set_error("System is out of GPU memory");
}
else {
status = " failed, out of device and host memory";
set_error("System is out of GPU and shared host memory");
}
}
if (mem.name) {
VLOG_WORK << "Buffer allocate: " << mem.name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")" << status;
}
mem.device_pointer = (device_ptr)device_pointer;
mem.device_size = size;
stats.mem_alloc(size);
if (!mem.device_pointer) {
return NULL;
}
/* Insert into map of allocations. */
thread_scoped_lock lock(cuda_mem_map_mutex);
CUDAMem *cmem = &cuda_mem_map[&mem];
if (shared_pointer != 0) {
/* Replace host pointer with our host allocation. Only works if
* CUDA memory layout is the same and has no pitch padding. Also
* does not work if we move textures to host during a render,
* since other devices might be using the memory. */
if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
mem.host_pointer != shared_pointer) {
memcpy(shared_pointer, mem.host_pointer, size);
/* A Call to device_memory::host_free() should be preceded by
* a call to device_memory::device_free() for host memory
* allocated by a device to be handled properly. Two exceptions
* are here and a call in OptiXDevice::generic_alloc(), where
* the current host memory can be assumed to be allocated by
* device_memory::host_alloc(), not by a device */
mem.host_free();
mem.host_pointer = shared_pointer;
}
mem.shared_pointer = shared_pointer;
mem.shared_counter++;
cmem->use_mapped_host = true;
}
else {
cmem->use_mapped_host = false;
}
return cmem;
}
bool CUDADevice::alloc_device(void *&device_pointer, size_t size)
void CUDADevice::generic_copy_to(device_memory &mem)
{
CUDAContextScope scope(this);
if (!mem.host_pointer || !mem.device_pointer) {
return;
}
CUresult mem_alloc_result = cuMemAlloc((CUdeviceptr *)&device_pointer, size);
return mem_alloc_result == CUDA_SUCCESS;
/* If use_mapped_host of mem is false, the current device only uses device memory allocated by
* cuMemAlloc regardless of mem.host_pointer and mem.shared_pointer, and should copy data from
* mem.host_pointer. */
thread_scoped_lock lock(cuda_mem_map_mutex);
if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
const CUDAContextScope scope(this);
cuda_assert(
cuMemcpyHtoD((CUdeviceptr)mem.device_pointer, mem.host_pointer, mem.memory_size()));
}
}
void CUDADevice::free_device(void *device_pointer)
void CUDADevice::generic_free(device_memory &mem)
{
CUDAContextScope scope(this);
if (mem.device_pointer) {
CUDAContextScope scope(this);
thread_scoped_lock lock(cuda_mem_map_mutex);
DCHECK(cuda_mem_map.find(&mem) != cuda_mem_map.end());
const CUDAMem &cmem = cuda_mem_map[&mem];
cuda_assert(cuMemFree((CUdeviceptr)device_pointer));
}
/* If cmem.use_mapped_host is true, reference counting is used
* to safely free a mapped host memory. */
bool CUDADevice::alloc_host(void *&shared_pointer, size_t size)
{
CUDAContextScope scope(this);
if (cmem.use_mapped_host) {
assert(mem.shared_pointer);
if (mem.shared_pointer) {
assert(mem.shared_counter > 0);
if (--mem.shared_counter == 0) {
if (mem.host_pointer == mem.shared_pointer) {
mem.host_pointer = 0;
}
cuMemFreeHost(mem.shared_pointer);
mem.shared_pointer = 0;
}
}
map_host_used -= mem.device_size;
}
else {
/* Free device memory. */
cuda_assert(cuMemFree(mem.device_pointer));
}
CUresult mem_alloc_result = cuMemHostAlloc(
&shared_pointer, size, CU_MEMHOSTALLOC_DEVICEMAP | CU_MEMHOSTALLOC_WRITECOMBINED);
return mem_alloc_result == CUDA_SUCCESS;
}
stats.mem_free(mem.device_size);
mem.device_pointer = 0;
mem.device_size = 0;
void CUDADevice::free_host(void *shared_pointer)
{
CUDAContextScope scope(this);
cuMemFreeHost(shared_pointer);
}
void CUDADevice::transform_host_pointer(void *&device_pointer, void *&shared_pointer)
{
CUDAContextScope scope(this);
cuda_assert(cuMemHostGetDevicePointer_v2((CUdeviceptr *)&device_pointer, shared_pointer, 0));
}
void CUDADevice::copy_host_to_device(void *device_pointer, void *host_pointer, size_t size)
{
const CUDAContextScope scope(this);
cuda_assert(cuMemcpyHtoD((CUdeviceptr)device_pointer, host_pointer, size));
cuda_mem_map.erase(cuda_mem_map.find(&mem));
}
}
void CUDADevice::mem_alloc(device_memory &mem)
@@ -612,8 +868,8 @@ void CUDADevice::mem_zero(device_memory &mem)
/* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory
* regardless of mem.host_pointer and mem.shared_pointer. */
thread_scoped_lock lock(device_mem_map_mutex);
if (!device_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
thread_scoped_lock lock(cuda_mem_map_mutex);
if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
const CUDAContextScope scope(this);
cuda_assert(cuMemsetD8((CUdeviceptr)mem.device_pointer, 0, mem.memory_size()));
}
@@ -738,19 +994,19 @@ void CUDADevice::tex_alloc(device_texture &mem)
return;
}
Mem *cmem = NULL;
CUDAMem *cmem = NULL;
CUarray array_3d = NULL;
size_t src_pitch = mem.data_width * dsize * mem.data_elements;
size_t dst_pitch = src_pitch;
if (!mem.is_resident(this)) {
thread_scoped_lock lock(device_mem_map_mutex);
cmem = &device_mem_map[&mem];
thread_scoped_lock lock(cuda_mem_map_mutex);
cmem = &cuda_mem_map[&mem];
cmem->texobject = 0;
if (mem.data_depth > 1) {
array_3d = (CUarray)mem.device_pointer;
cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
cmem->array = array_3d;
}
else if (mem.data_height > 0) {
dst_pitch = align_up(src_pitch, pitch_alignment);
@@ -794,10 +1050,10 @@ void CUDADevice::tex_alloc(device_texture &mem)
mem.device_size = size;
stats.mem_alloc(size);
thread_scoped_lock lock(device_mem_map_mutex);
cmem = &device_mem_map[&mem];
thread_scoped_lock lock(cuda_mem_map_mutex);
cmem = &cuda_mem_map[&mem];
cmem->texobject = 0;
cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
cmem->array = array_3d;
}
else if (mem.data_height > 0) {
/* 2D texture, using pitch aligned linear memory. */
@@ -881,8 +1137,8 @@ void CUDADevice::tex_alloc(device_texture &mem)
texDesc.filterMode = filter_mode;
texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
thread_scoped_lock lock(device_mem_map_mutex);
cmem = &device_mem_map[&mem];
thread_scoped_lock lock(cuda_mem_map_mutex);
cmem = &cuda_mem_map[&mem];
cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
@@ -897,9 +1153,9 @@ void CUDADevice::tex_free(device_texture &mem)
{
if (mem.device_pointer) {
CUDAContextScope scope(this);
thread_scoped_lock lock(device_mem_map_mutex);
DCHECK(device_mem_map.find(&mem) != device_mem_map.end());
const Mem &cmem = device_mem_map[&mem];
thread_scoped_lock lock(cuda_mem_map_mutex);
DCHECK(cuda_mem_map.find(&mem) != cuda_mem_map.end());
const CUDAMem &cmem = cuda_mem_map[&mem];
if (cmem.texobject) {
/* Free bindless texture. */
@@ -908,16 +1164,16 @@ void CUDADevice::tex_free(device_texture &mem)
if (!mem.is_resident(this)) {
/* Do not free memory here, since it was allocated on a different device. */
device_mem_map.erase(device_mem_map.find(&mem));
cuda_mem_map.erase(cuda_mem_map.find(&mem));
}
else if (cmem.array) {
/* Free array. */
cuArrayDestroy(reinterpret_cast<CUarray>(cmem.array));
cuArrayDestroy(cmem.array);
stats.mem_free(mem.device_size);
mem.device_pointer = 0;
mem.device_size = 0;
device_mem_map.erase(device_mem_map.find(&mem));
cuda_mem_map.erase(cuda_mem_map.find(&mem));
}
else {
lock.unlock();

View File

@@ -21,7 +21,7 @@ CCL_NAMESPACE_BEGIN
class DeviceQueue;
class CUDADevice : public GPUDevice {
class CUDADevice : public Device {
friend class CUDAContextScope;
@@ -29,11 +29,36 @@ class CUDADevice : public GPUDevice {
CUdevice cuDevice;
CUcontext cuContext;
CUmodule cuModule;
size_t device_texture_headroom;
size_t device_working_headroom;
bool move_texture_to_host;
size_t map_host_used;
size_t map_host_limit;
int can_map_host;
int pitch_alignment;
int cuDevId;
int cuDevArchitecture;
bool first_error;
struct CUDAMem {
CUDAMem() : texobject(0), array(0), use_mapped_host(false)
{
}
CUtexObject texobject;
CUarray array;
/* If true, a mapped host memory in shared_pointer is being used. */
bool use_mapped_host;
};
typedef map<device_memory *, CUDAMem> CUDAMemMap;
CUDAMemMap cuda_mem_map;
thread_mutex cuda_mem_map_mutex;
/* Bindless Textures */
device_vector<TextureInfo> texture_info;
bool need_texture_info;
CUDADeviceKernels kernels;
static bool have_precompiled_kernels();
@@ -63,13 +88,17 @@ class CUDADevice : public GPUDevice {
void reserve_local_memory(const uint kernel_features);
virtual void get_device_memory_info(size_t &total, size_t &free) override;
virtual bool alloc_device(void *&device_pointer, size_t size) override;
virtual void free_device(void *device_pointer) override;
virtual bool alloc_host(void *&shared_pointer, size_t size) override;
virtual void free_host(void *shared_pointer) override;
virtual void transform_host_pointer(void *&device_pointer, void *&shared_pointer) override;
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size) override;
void init_host_memory();
void load_texture_info();
void move_textures_to_host(size_t size, bool for_texture);
CUDAMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
void generic_copy_to(device_memory &mem);
void generic_free(device_memory &mem);
void mem_alloc(device_memory &mem) override;

View File

@@ -452,320 +452,6 @@ void *Device::get_cpu_osl_memory()
return nullptr;
}
GPUDevice::~GPUDevice() noexcept(false)
{
}
bool GPUDevice::load_texture_info()
{
if (need_texture_info) {
/* Unset flag before copying, so this does not loop indefinitely if the copy below calls
* into 'move_textures_to_host' (which calls 'load_texture_info' again). */
need_texture_info = false;
texture_info.copy_to_device();
return true;
}
else {
return false;
}
}
void GPUDevice::init_host_memory(size_t preferred_texture_headroom,
size_t preferred_working_headroom)
{
/* Limit amount of host mapped memory, because allocating too much can
* cause system instability. Leave at least half or 4 GB of system
* memory free, whichever is smaller. */
size_t default_limit = 4 * 1024 * 1024 * 1024LL;
size_t system_ram = system_physical_ram();
if (system_ram > 0) {
if (system_ram / 2 > default_limit) {
map_host_limit = system_ram - default_limit;
}
else {
map_host_limit = system_ram / 2;
}
}
else {
VLOG_WARNING << "Mapped host memory disabled, failed to get system RAM";
map_host_limit = 0;
}
/* Amount of device memory to keep free after texture memory
* and working memory allocations respectively. We set the working
* memory limit headroom lower than the working one so there
* is space left for it. */
device_working_headroom = preferred_working_headroom > 0 ? preferred_working_headroom :
32 * 1024 * 1024LL; // 32MB
device_texture_headroom = preferred_texture_headroom > 0 ? preferred_texture_headroom :
128 * 1024 * 1024LL; // 128MB
VLOG_INFO << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
<< " bytes. (" << string_human_readable_size(map_host_limit) << ")";
}
void GPUDevice::move_textures_to_host(size_t size, bool for_texture)
{
/* Break out of recursive call, which can happen when moving memory on a multi device. */
static bool any_device_moving_textures_to_host = false;
if (any_device_moving_textures_to_host) {
return;
}
/* Signal to reallocate textures in host memory only. */
move_texture_to_host = true;
while (size > 0) {
/* Find suitable memory allocation to move. */
device_memory *max_mem = NULL;
size_t max_size = 0;
bool max_is_image = false;
thread_scoped_lock lock(device_mem_map_mutex);
foreach (MemMap::value_type &pair, device_mem_map) {
device_memory &mem = *pair.first;
Mem *cmem = &pair.second;
/* Can only move textures allocated on this device (and not those from peer devices).
* And need to ignore memory that is already on the host. */
if (!mem.is_resident(this) || cmem->use_mapped_host) {
continue;
}
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) &&
(&mem != &texture_info);
bool is_image = is_texture && (mem.data_height > 1);
/* Can't move this type of memory. */
if (!is_texture || cmem->array) {
continue;
}
/* For other textures, only move image textures. */
if (for_texture && !is_image) {
continue;
}
/* Try to move largest allocation, prefer moving images. */
if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
max_is_image = is_image;
max_size = mem.device_size;
max_mem = &mem;
}
}
lock.unlock();
/* Move to host memory. This part is mutex protected since
* multiple backend devices could be moving the memory. The
* first one will do it, and the rest will adopt the pointer. */
if (max_mem) {
VLOG_WORK << "Move memory from device to host: " << max_mem->name;
static thread_mutex move_mutex;
thread_scoped_lock lock(move_mutex);
any_device_moving_textures_to_host = true;
/* Potentially need to call back into multi device, so pointer mapping
* and peer devices are updated. This is also necessary since the device
* pointer may just be a key here, so cannot be accessed and freed directly.
* Unfortunately it does mean that memory is reallocated on all other
* devices as well, which is potentially dangerous when still in use (since
* a thread rendering on another devices would only be caught in this mutex
* if it so happens to do an allocation at the same time as well. */
max_mem->device_copy_to();
size = (max_size >= size) ? 0 : size - max_size;
any_device_moving_textures_to_host = false;
}
else {
break;
}
}
/* Unset flag before texture info is reloaded, since it should stay in device memory. */
move_texture_to_host = false;
/* Update texture info array with new pointers. */
load_texture_info();
}
GPUDevice::Mem *GPUDevice::generic_alloc(device_memory &mem, size_t pitch_padding)
{
void *device_pointer = 0;
size_t size = mem.memory_size() + pitch_padding;
bool mem_alloc_result = false;
const char *status = "";
/* First try allocating in device memory, respecting headroom. We make
* an exception for texture info. It is small and frequently accessed,
* so treat it as working memory.
*
* If there is not enough room for working memory, we will try to move
* textures to host memory, assuming the performance impact would have
* been worse for working memory. */
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && (&mem != &texture_info);
bool is_image = is_texture && (mem.data_height > 1);
size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
size_t total = 0, free = 0;
get_device_memory_info(total, free);
/* Move textures to host memory if needed. */
if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
move_textures_to_host(size + headroom - free, is_texture);
get_device_memory_info(total, free);
}
/* Allocate in device memory. */
if (!move_texture_to_host && (size + headroom) < free) {
mem_alloc_result = alloc_device(device_pointer, size);
if (mem_alloc_result) {
device_mem_in_use += size;
status = " in device memory";
}
}
/* Fall back to mapped host memory if needed and possible. */
void *shared_pointer = 0;
if (!mem_alloc_result && can_map_host && mem.type != MEM_DEVICE_ONLY) {
if (mem.shared_pointer) {
/* Another device already allocated host memory. */
mem_alloc_result = true;
shared_pointer = mem.shared_pointer;
}
else if (map_host_used + size < map_host_limit) {
/* Allocate host memory ourselves. */
mem_alloc_result = alloc_host(shared_pointer, size);
assert((mem_alloc_result && shared_pointer != 0) ||
(!mem_alloc_result && shared_pointer == 0));
}
if (mem_alloc_result) {
transform_host_pointer(device_pointer, shared_pointer);
map_host_used += size;
status = " in host memory";
}
}
if (!mem_alloc_result) {
if (mem.type == MEM_DEVICE_ONLY) {
status = " failed, out of device memory";
set_error("System is out of GPU memory");
}
else {
status = " failed, out of device and host memory";
set_error("System is out of GPU and shared host memory");
}
}
if (mem.name) {
VLOG_WORK << "Buffer allocate: " << mem.name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")" << status;
}
mem.device_pointer = (device_ptr)device_pointer;
mem.device_size = size;
stats.mem_alloc(size);
if (!mem.device_pointer) {
return NULL;
}
/* Insert into map of allocations. */
thread_scoped_lock lock(device_mem_map_mutex);
Mem *cmem = &device_mem_map[&mem];
if (shared_pointer != 0) {
/* Replace host pointer with our host allocation. Only works if
* memory layout is the same and has no pitch padding. Also
* does not work if we move textures to host during a render,
* since other devices might be using the memory. */
if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
mem.host_pointer != shared_pointer) {
memcpy(shared_pointer, mem.host_pointer, size);
/* A Call to device_memory::host_free() should be preceded by
* a call to device_memory::device_free() for host memory
* allocated by a device to be handled properly. Two exceptions
* are here and a call in OptiXDevice::generic_alloc(), where
* the current host memory can be assumed to be allocated by
* device_memory::host_alloc(), not by a device */
mem.host_free();
mem.host_pointer = shared_pointer;
}
mem.shared_pointer = shared_pointer;
mem.shared_counter++;
cmem->use_mapped_host = true;
}
else {
cmem->use_mapped_host = false;
}
return cmem;
}
void GPUDevice::generic_free(device_memory &mem)
{
if (mem.device_pointer) {
thread_scoped_lock lock(device_mem_map_mutex);
DCHECK(device_mem_map.find(&mem) != device_mem_map.end());
const Mem &cmem = device_mem_map[&mem];
/* If cmem.use_mapped_host is true, reference counting is used
* to safely free a mapped host memory. */
if (cmem.use_mapped_host) {
assert(mem.shared_pointer);
if (mem.shared_pointer) {
assert(mem.shared_counter > 0);
if (--mem.shared_counter == 0) {
if (mem.host_pointer == mem.shared_pointer) {
mem.host_pointer = 0;
}
free_host(mem.shared_pointer);
mem.shared_pointer = 0;
}
}
map_host_used -= mem.device_size;
}
else {
/* Free device memory. */
free_device((void *)mem.device_pointer);
device_mem_in_use -= mem.device_size;
}
stats.mem_free(mem.device_size);
mem.device_pointer = 0;
mem.device_size = 0;
device_mem_map.erase(device_mem_map.find(&mem));
}
}
void GPUDevice::generic_copy_to(device_memory &mem)
{
if (!mem.host_pointer || !mem.device_pointer) {
return;
}
/* If use_mapped_host of mem is false, the current device only uses device memory allocated by
* backend device allocation regardless of mem.host_pointer and mem.shared_pointer, and should
* copy data from mem.host_pointer. */
thread_scoped_lock lock(device_mem_map_mutex);
if (!device_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
copy_host_to_device((void *)mem.device_pointer, mem.host_pointer, mem.memory_size());
}
}
/* DeviceInfo */
CCL_NAMESPACE_END

View File

@@ -81,7 +81,7 @@ class DeviceInfo {
bool has_gpu_queue; /* Device supports GPU queue. */
bool use_metalrt; /* Use MetalRT to accelerate ray queries (Metal only). */
KernelOptimizationLevel kernel_optimization_level; /* Optimization level applied to path tracing
* kernels (Metal only). */
kernels (Metal only). */
DenoiserTypeMask denoisers; /* Supported denoiser types. */
int cpu_threads;
vector<DeviceInfo> multi_devices;
@@ -182,7 +182,7 @@ class Device {
{
}
/* Report status and return true if device is ready for rendering. */
/* Return true if device is ready for rendering, or report status if not. */
virtual bool is_ready(string & /*status*/) const
{
return true;
@@ -309,93 +309,6 @@ class Device {
static uint devices_initialized_mask;
};
/* Device, which is GPU, with some common functionality for GPU back-ends. */
class GPUDevice : public Device {
protected:
GPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_)
: Device(info_, stats_, profiler_),
texture_info(this, "texture_info", MEM_GLOBAL),
need_texture_info(false),
can_map_host(false),
map_host_used(0),
map_host_limit(0),
device_texture_headroom(0),
device_working_headroom(0),
device_mem_map(),
device_mem_map_mutex(),
move_texture_to_host(false),
device_mem_in_use(0)
{
}
public:
virtual ~GPUDevice() noexcept(false);
/* For GPUs that can use bindless textures in some way or another. */
device_vector<TextureInfo> texture_info;
bool need_texture_info;
/* Returns true if the texture info was copied to the device (meaning, some more
* re-initialization might be needed). */
virtual bool load_texture_info();
protected:
/* Memory allocation, only accessed through device_memory. */
friend class device_memory;
bool can_map_host;
size_t map_host_used;
size_t map_host_limit;
size_t device_texture_headroom;
size_t device_working_headroom;
typedef unsigned long long texMemObject;
typedef unsigned long long arrayMemObject;
struct Mem {
Mem() : texobject(0), array(0), use_mapped_host(false)
{
}
texMemObject texobject;
arrayMemObject array;
/* If true, a mapped host memory in shared_pointer is being used. */
bool use_mapped_host;
};
typedef map<device_memory *, Mem> MemMap;
MemMap device_mem_map;
thread_mutex device_mem_map_mutex;
bool move_texture_to_host;
/* Simple counter which will try to track amount of used device memory */
size_t device_mem_in_use;
virtual void init_host_memory(size_t preferred_texture_headroom = 0,
size_t preferred_working_headroom = 0);
virtual void move_textures_to_host(size_t size, bool for_texture);
/* Allocation, deallocation and copy functions, with corresponding
* support of device/host allocations. */
virtual GPUDevice::Mem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
virtual void generic_free(device_memory &mem);
virtual void generic_copy_to(device_memory &mem);
/* total - amount of device memory, free - amount of available device memory */
virtual void get_device_memory_info(size_t &total, size_t &free) = 0;
virtual bool alloc_device(void *&device_pointer, size_t size) = 0;
virtual void free_device(void *device_pointer) = 0;
virtual bool alloc_host(void *&shared_pointer, size_t size) = 0;
virtual void free_host(void *shared_pointer) = 0;
/* This function should return device pointer corresponding to shared pointer, which
* is host buffer, allocated in `alloc_host`. The function should `true`, if such
* address transformation is possible and `false` otherwise. */
virtual void transform_host_pointer(void *&device_pointer, void *&shared_pointer) = 0;
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size) = 0;
};
CCL_NAMESPACE_END
#endif /* __DEVICE_H__ */

View File

@@ -53,12 +53,8 @@ void HIPDevice::set_error(const string &error)
}
HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
: GPUDevice(info, stats, profiler)
: Device(info, stats, profiler), texture_info(this, "texture_info", MEM_GLOBAL)
{
/* Verify that base class types can be used with specific backend types */
static_assert(sizeof(texMemObject) == sizeof(hipTextureObject_t));
static_assert(sizeof(arrayMemObject) == sizeof(hArray));
first_error = true;
hipDevId = info.num;
@@ -69,6 +65,12 @@ HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
need_texture_info = false;
device_texture_headroom = 0;
device_working_headroom = 0;
move_texture_to_host = false;
map_host_limit = 0;
map_host_used = 0;
can_map_host = 0;
pitch_alignment = 0;
/* Initialize HIP. */
@@ -89,9 +91,7 @@ HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
/* hipDeviceMapHost for mapping host memory when out of device memory.
* hipDeviceLmemResizeToMax for reserving local memory ahead of render,
* so we can predict which memory to map to host. */
int value;
hip_assert(hipDeviceGetAttribute(&value, hipDeviceAttributeCanMapHostMemory, hipDevice));
can_map_host = value != 0;
hip_assert(hipDeviceGetAttribute(&can_map_host, hipDeviceAttributeCanMapHostMemory, hipDevice));
hip_assert(
hipDeviceGetAttribute(&pitch_alignment, hipDeviceAttributeTexturePitchAlignment, hipDevice));
@@ -460,57 +460,305 @@ void HIPDevice::reserve_local_memory(const uint kernel_features)
# endif
}
void HIPDevice::get_device_memory_info(size_t &total, size_t &free)
void HIPDevice::init_host_memory()
{
/* Limit amount of host mapped memory, because allocating too much can
* cause system instability. Leave at least half or 4 GB of system
* memory free, whichever is smaller. */
size_t default_limit = 4 * 1024 * 1024 * 1024LL;
size_t system_ram = system_physical_ram();
if (system_ram > 0) {
if (system_ram / 2 > default_limit) {
map_host_limit = system_ram - default_limit;
}
else {
map_host_limit = system_ram / 2;
}
}
else {
VLOG_WARNING << "Mapped host memory disabled, failed to get system RAM";
map_host_limit = 0;
}
/* Amount of device memory to keep is free after texture memory
* and working memory allocations respectively. We set the working
* memory limit headroom lower so that some space is left after all
* texture memory allocations. */
device_working_headroom = 32 * 1024 * 1024LL; // 32MB
device_texture_headroom = 128 * 1024 * 1024LL; // 128MB
VLOG_INFO << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
<< " bytes. (" << string_human_readable_size(map_host_limit) << ")";
}
void HIPDevice::load_texture_info()
{
if (need_texture_info) {
/* Unset flag before copying, so this does not loop indefinitely if the copy below calls
* into 'move_textures_to_host' (which calls 'load_texture_info' again). */
need_texture_info = false;
texture_info.copy_to_device();
}
}
void HIPDevice::move_textures_to_host(size_t size, bool for_texture)
{
/* Break out of recursive call, which can happen when moving memory on a multi device. */
static bool any_device_moving_textures_to_host = false;
if (any_device_moving_textures_to_host) {
return;
}
/* Signal to reallocate textures in host memory only. */
move_texture_to_host = true;
while (size > 0) {
/* Find suitable memory allocation to move. */
device_memory *max_mem = NULL;
size_t max_size = 0;
bool max_is_image = false;
thread_scoped_lock lock(hip_mem_map_mutex);
foreach (HIPMemMap::value_type &pair, hip_mem_map) {
device_memory &mem = *pair.first;
HIPMem *cmem = &pair.second;
/* Can only move textures allocated on this device (and not those from peer devices).
* And need to ignore memory that is already on the host. */
if (!mem.is_resident(this) || cmem->use_mapped_host) {
continue;
}
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) &&
(&mem != &texture_info);
bool is_image = is_texture && (mem.data_height > 1);
/* Can't move this type of memory. */
if (!is_texture || cmem->array) {
continue;
}
/* For other textures, only move image textures. */
if (for_texture && !is_image) {
continue;
}
/* Try to move largest allocation, prefer moving images. */
if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
max_is_image = is_image;
max_size = mem.device_size;
max_mem = &mem;
}
}
lock.unlock();
/* Move to host memory. This part is mutex protected since
* multiple HIP devices could be moving the memory. The
* first one will do it, and the rest will adopt the pointer. */
if (max_mem) {
VLOG_WORK << "Move memory from device to host: " << max_mem->name;
static thread_mutex move_mutex;
thread_scoped_lock lock(move_mutex);
any_device_moving_textures_to_host = true;
/* Potentially need to call back into multi device, so pointer mapping
* and peer devices are updated. This is also necessary since the device
* pointer may just be a key here, so cannot be accessed and freed directly.
* Unfortunately it does mean that memory is reallocated on all other
* devices as well, which is potentially dangerous when still in use (since
* a thread rendering on another devices would only be caught in this mutex
* if it so happens to do an allocation at the same time as well. */
max_mem->device_copy_to();
size = (max_size >= size) ? 0 : size - max_size;
any_device_moving_textures_to_host = false;
}
else {
break;
}
}
/* Unset flag before texture info is reloaded, since it should stay in device memory. */
move_texture_to_host = false;
/* Update texture info array with new pointers. */
load_texture_info();
}
HIPDevice::HIPMem *HIPDevice::generic_alloc(device_memory &mem, size_t pitch_padding)
{
HIPContextScope scope(this);
hipDeviceptr_t device_pointer = 0;
size_t size = mem.memory_size() + pitch_padding;
hipError_t mem_alloc_result = hipErrorOutOfMemory;
const char *status = "";
/* First try allocating in device memory, respecting headroom. We make
* an exception for texture info. It is small and frequently accessed,
* so treat it as working memory.
*
* If there is not enough room for working memory, we will try to move
* textures to host memory, assuming the performance impact would have
* been worse for working memory. */
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && (&mem != &texture_info);
bool is_image = is_texture && (mem.data_height > 1);
size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
size_t total = 0, free = 0;
hipMemGetInfo(&free, &total);
/* Move textures to host memory if needed. */
if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
move_textures_to_host(size + headroom - free, is_texture);
hipMemGetInfo(&free, &total);
}
/* Allocate in device memory. */
if (!move_texture_to_host && (size + headroom) < free) {
mem_alloc_result = hipMalloc(&device_pointer, size);
if (mem_alloc_result == hipSuccess) {
status = " in device memory";
}
}
/* Fall back to mapped host memory if needed and possible. */
void *shared_pointer = 0;
if (mem_alloc_result != hipSuccess && can_map_host) {
if (mem.shared_pointer) {
/* Another device already allocated host memory. */
mem_alloc_result = hipSuccess;
shared_pointer = mem.shared_pointer;
}
else if (map_host_used + size < map_host_limit) {
/* Allocate host memory ourselves. */
mem_alloc_result = hipHostMalloc(
&shared_pointer, size, hipHostMallocMapped | hipHostMallocWriteCombined);
assert((mem_alloc_result == hipSuccess && shared_pointer != 0) ||
(mem_alloc_result != hipSuccess && shared_pointer == 0));
}
if (mem_alloc_result == hipSuccess) {
hip_assert(hipHostGetDevicePointer(&device_pointer, shared_pointer, 0));
map_host_used += size;
status = " in host memory";
}
}
if (mem_alloc_result != hipSuccess) {
status = " failed, out of device and host memory";
set_error("System is out of GPU and shared host memory");
}
if (mem.name) {
VLOG_WORK << "Buffer allocate: " << mem.name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")" << status;
}
mem.device_pointer = (device_ptr)device_pointer;
mem.device_size = size;
stats.mem_alloc(size);
if (!mem.device_pointer) {
return NULL;
}
/* Insert into map of allocations. */
thread_scoped_lock lock(hip_mem_map_mutex);
HIPMem *cmem = &hip_mem_map[&mem];
if (shared_pointer != 0) {
/* Replace host pointer with our host allocation. Only works if
* HIP memory layout is the same and has no pitch padding. Also
* does not work if we move textures to host during a render,
* since other devices might be using the memory. */
if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
mem.host_pointer != shared_pointer) {
memcpy(shared_pointer, mem.host_pointer, size);
/* A Call to device_memory::host_free() should be preceded by
* a call to device_memory::device_free() for host memory
* allocated by a device to be handled properly. Two exceptions
* are here and a call in OptiXDevice::generic_alloc(), where
* the current host memory can be assumed to be allocated by
* device_memory::host_alloc(), not by a device */
mem.host_free();
mem.host_pointer = shared_pointer;
}
mem.shared_pointer = shared_pointer;
mem.shared_counter++;
cmem->use_mapped_host = true;
}
else {
cmem->use_mapped_host = false;
}
return cmem;
}
bool HIPDevice::alloc_device(void *&device_pointer, size_t size)
void HIPDevice::generic_copy_to(device_memory &mem)
{
HIPContextScope scope(this);
if (!mem.host_pointer || !mem.device_pointer) {
return;
}
hipError_t mem_alloc_result = hipMalloc((hipDeviceptr_t *)&device_pointer, size);
return mem_alloc_result == hipSuccess;
/* If use_mapped_host of mem is false, the current device only uses device memory allocated by
* hipMalloc regardless of mem.host_pointer and mem.shared_pointer, and should copy data from
* mem.host_pointer. */
thread_scoped_lock lock(hip_mem_map_mutex);
if (!hip_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
const HIPContextScope scope(this);
hip_assert(
hipMemcpyHtoD((hipDeviceptr_t)mem.device_pointer, mem.host_pointer, mem.memory_size()));
}
}
void HIPDevice::free_device(void *device_pointer)
void HIPDevice::generic_free(device_memory &mem)
{
HIPContextScope scope(this);
if (mem.device_pointer) {
HIPContextScope scope(this);
thread_scoped_lock lock(hip_mem_map_mutex);
DCHECK(hip_mem_map.find(&mem) != hip_mem_map.end());
const HIPMem &cmem = hip_mem_map[&mem];
hip_assert(hipFree((hipDeviceptr_t)device_pointer));
}
/* If cmem.use_mapped_host is true, reference counting is used
* to safely free a mapped host memory. */
bool HIPDevice::alloc_host(void *&shared_pointer, size_t size)
{
HIPContextScope scope(this);
if (cmem.use_mapped_host) {
assert(mem.shared_pointer);
if (mem.shared_pointer) {
assert(mem.shared_counter > 0);
if (--mem.shared_counter == 0) {
if (mem.host_pointer == mem.shared_pointer) {
mem.host_pointer = 0;
}
hipHostFree(mem.shared_pointer);
mem.shared_pointer = 0;
}
}
map_host_used -= mem.device_size;
}
else {
/* Free device memory. */
hip_assert(hipFree(mem.device_pointer));
}
hipError_t mem_alloc_result = hipHostMalloc(
&shared_pointer, size, hipHostMallocMapped | hipHostMallocWriteCombined);
stats.mem_free(mem.device_size);
mem.device_pointer = 0;
mem.device_size = 0;
return mem_alloc_result == hipSuccess;
}
void HIPDevice::free_host(void *shared_pointer)
{
HIPContextScope scope(this);
hipHostFree(shared_pointer);
}
void HIPDevice::transform_host_pointer(void *&device_pointer, void *&shared_pointer)
{
HIPContextScope scope(this);
hip_assert(hipHostGetDevicePointer((hipDeviceptr_t *)&device_pointer, shared_pointer, 0));
}
void HIPDevice::copy_host_to_device(void *device_pointer, void *host_pointer, size_t size)
{
const HIPContextScope scope(this);
hip_assert(hipMemcpyHtoD((hipDeviceptr_t)device_pointer, host_pointer, size));
hip_mem_map.erase(hip_mem_map.find(&mem));
}
}
void HIPDevice::mem_alloc(device_memory &mem)
@@ -575,8 +823,8 @@ void HIPDevice::mem_zero(device_memory &mem)
/* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory
* regardless of mem.host_pointer and mem.shared_pointer. */
thread_scoped_lock lock(device_mem_map_mutex);
if (!device_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
thread_scoped_lock lock(hip_mem_map_mutex);
if (!hip_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
const HIPContextScope scope(this);
hip_assert(hipMemsetD8((hipDeviceptr_t)mem.device_pointer, 0, mem.memory_size()));
}
@@ -703,19 +951,19 @@ void HIPDevice::tex_alloc(device_texture &mem)
return;
}
Mem *cmem = NULL;
HIPMem *cmem = NULL;
hArray array_3d = NULL;
size_t src_pitch = mem.data_width * dsize * mem.data_elements;
size_t dst_pitch = src_pitch;
if (!mem.is_resident(this)) {
thread_scoped_lock lock(device_mem_map_mutex);
cmem = &device_mem_map[&mem];
thread_scoped_lock lock(hip_mem_map_mutex);
cmem = &hip_mem_map[&mem];
cmem->texobject = 0;
if (mem.data_depth > 1) {
array_3d = (hArray)mem.device_pointer;
cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
cmem->array = array_3d;
}
else if (mem.data_height > 0) {
dst_pitch = align_up(src_pitch, pitch_alignment);
@@ -759,10 +1007,10 @@ void HIPDevice::tex_alloc(device_texture &mem)
mem.device_size = size;
stats.mem_alloc(size);
thread_scoped_lock lock(device_mem_map_mutex);
cmem = &device_mem_map[&mem];
thread_scoped_lock lock(hip_mem_map_mutex);
cmem = &hip_mem_map[&mem];
cmem->texobject = 0;
cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
cmem->array = array_3d;
}
else if (mem.data_height > 0) {
/* 2D texture, using pitch aligned linear memory. */
@@ -847,8 +1095,8 @@ void HIPDevice::tex_alloc(device_texture &mem)
texDesc.filterMode = filter_mode;
texDesc.flags = HIP_TRSF_NORMALIZED_COORDINATES;
thread_scoped_lock lock(device_mem_map_mutex);
cmem = &device_mem_map[&mem];
thread_scoped_lock lock(hip_mem_map_mutex);
cmem = &hip_mem_map[&mem];
hip_assert(hipTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
@@ -863,9 +1111,9 @@ void HIPDevice::tex_free(device_texture &mem)
{
if (mem.device_pointer) {
HIPContextScope scope(this);
thread_scoped_lock lock(device_mem_map_mutex);
DCHECK(device_mem_map.find(&mem) != device_mem_map.end());
const Mem &cmem = device_mem_map[&mem];
thread_scoped_lock lock(hip_mem_map_mutex);
DCHECK(hip_mem_map.find(&mem) != hip_mem_map.end());
const HIPMem &cmem = hip_mem_map[&mem];
if (cmem.texobject) {
/* Free bindless texture. */
@@ -874,16 +1122,16 @@ void HIPDevice::tex_free(device_texture &mem)
if (!mem.is_resident(this)) {
/* Do not free memory here, since it was allocated on a different device. */
device_mem_map.erase(device_mem_map.find(&mem));
hip_mem_map.erase(hip_mem_map.find(&mem));
}
else if (cmem.array) {
/* Free array. */
hipArrayDestroy(reinterpret_cast<hArray>(cmem.array));
hipArrayDestroy(cmem.array);
stats.mem_free(mem.device_size);
mem.device_pointer = 0;
mem.device_size = 0;
device_mem_map.erase(device_mem_map.find(&mem));
hip_mem_map.erase(hip_mem_map.find(&mem));
}
else {
lock.unlock();
@@ -905,7 +1153,7 @@ bool HIPDevice::should_use_graphics_interop()
* possible, but from the empiric measurements it can be considerably slower than using naive
* pixels copy. */
/* Disable graphics interop for now, because of driver bug in 21.40. See #92972 */
/* Disable graphics interop for now, because of driver bug in 21.40. See T92972 */
# if 0
HIPContextScope scope(this);

View File

@@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN
class DeviceQueue;
class HIPDevice : public GPUDevice {
class HIPDevice : public Device {
friend class HIPContextScope;
@@ -26,11 +26,36 @@ class HIPDevice : public GPUDevice {
hipDevice_t hipDevice;
hipCtx_t hipContext;
hipModule_t hipModule;
size_t device_texture_headroom;
size_t device_working_headroom;
bool move_texture_to_host;
size_t map_host_used;
size_t map_host_limit;
int can_map_host;
int pitch_alignment;
int hipDevId;
int hipDevArchitecture;
bool first_error;
struct HIPMem {
HIPMem() : texobject(0), array(0), use_mapped_host(false)
{
}
hipTextureObject_t texobject;
hArray array;
/* If true, a mapped host memory in shared_pointer is being used. */
bool use_mapped_host;
};
typedef map<device_memory *, HIPMem> HIPMemMap;
HIPMemMap hip_mem_map;
thread_mutex hip_mem_map_mutex;
/* Bindless Textures */
device_vector<TextureInfo> texture_info;
bool need_texture_info;
HIPDeviceKernels kernels;
static bool have_precompiled_kernels();
@@ -56,13 +81,17 @@ class HIPDevice : public GPUDevice {
virtual bool load_kernels(const uint kernel_features) override;
void reserve_local_memory(const uint kernel_features);
virtual void get_device_memory_info(size_t &total, size_t &free) override;
virtual bool alloc_device(void *&device_pointer, size_t size) override;
virtual void free_device(void *device_pointer) override;
virtual bool alloc_host(void *&shared_pointer, size_t size) override;
virtual void free_host(void *shared_pointer) override;
virtual void transform_host_pointer(void *&device_pointer, void *&shared_pointer) override;
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size) override;
void init_host_memory();
void load_texture_info();
void move_textures_to_host(size_t size, bool for_texture);
HIPMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
void generic_copy_to(device_memory &mem);
void generic_free(device_memory &mem);
void mem_alloc(device_memory &mem) override;

View File

@@ -51,7 +51,7 @@ static inline bool hipSupportsDevice(const int hipDevId)
hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
return (major >= 9);
return (major >= 10);
}
CCL_NAMESPACE_END

View File

@@ -108,10 +108,9 @@ template<> struct device_type_traits<uint2> {
};
template<> struct device_type_traits<uint3> {
/* uint3 has different size depending on the device, can't use it for interchanging
* memory between CPU and GPU.
*
* Leave body empty to trigger a compile error if used. */
static const DataType data_type = TYPE_UINT;
static const size_t num_elements = 3;
static_assert(sizeof(uint3) == num_elements * datatype_size(data_type));
};
template<> struct device_type_traits<uint4> {
@@ -133,10 +132,9 @@ template<> struct device_type_traits<int2> {
};
template<> struct device_type_traits<int3> {
/* int3 has different size depending on the device, can't use it for interchanging
* memory between CPU and GPU.
*
* Leave body empty to trigger a compile error if used. */
static const DataType data_type = TYPE_INT;
static const size_t num_elements = 4;
static_assert(sizeof(int3) == num_elements * datatype_size(data_type));
};
template<> struct device_type_traits<int4> {
@@ -249,8 +247,6 @@ class device_memory {
bool is_resident(Device *sub_device) const;
protected:
friend class Device;
friend class GPUDevice;
friend class CUDADevice;
friend class OptiXDevice;
friend class HIPDevice;

View File

@@ -55,8 +55,9 @@ void device_metal_info(vector<DeviceInfo> &devices)
info.denoisers = DENOISER_NONE;
info.id = id;
info.has_nanovdb = MetalInfo::get_device_vendor(device) == METAL_GPU_APPLE;
info.has_light_tree = MetalInfo::get_device_vendor(device) != METAL_GPU_AMD;
if (MetalInfo::get_device_vendor(device) == METAL_GPU_AMD) {
info.has_light_tree = false;
}
devices.push_back(info);
device_index++;

View File

@@ -29,8 +29,7 @@ class MetalDevice : public Device {
id<MTLArgumentEncoder> mtlAncillaryArgEncoder =
nil; /* encoder used for fetching device pointers from MTLBuffers */
string source[PSO_NUM];
string kernels_md5[PSO_NUM];
string global_defines_md5[PSO_NUM];
string source_md5[PSO_NUM];
bool capture_enabled = false;
@@ -68,12 +67,9 @@ class MetalDevice : public Device {
std::recursive_mutex metal_mem_map_mutex;
/* Bindless Textures */
bool is_texture(const TextureInfo &tex);
device_vector<TextureInfo> texture_info;
bool need_texture_info;
id<MTLArgumentEncoder> mtlTextureArgEncoder = nil;
id<MTLArgumentEncoder> mtlBufferArgEncoder = nil;
id<MTLBuffer> buffer_bindings_1d = nil;
id<MTLBuffer> texture_bindings_2d = nil;
id<MTLBuffer> texture_bindings_3d = nil;
std::vector<id<MTLTexture>> texture_slot_map;
@@ -116,10 +112,6 @@ class MetalDevice : public Device {
bool use_local_atomic_sort() const;
string preprocess_source(MetalPipelineType pso_type,
const uint kernel_features,
string *source = nullptr);
bool make_source_and_check_if_compile_needed(MetalPipelineType pso_type);
void make_source(MetalPipelineType pso_type, const uint kernel_features);

View File

@@ -91,6 +91,11 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
}
}
texture_bindings_2d = [mtlDevice newBufferWithLength:4096 options:default_storage_mode];
texture_bindings_3d = [mtlDevice newBufferWithLength:4096 options:default_storage_mode];
stats.mem_alloc(texture_bindings_2d.allocatedSize + texture_bindings_3d.allocatedSize);
switch (device_vendor) {
default:
break;
@@ -100,7 +105,6 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
}
case METAL_GPU_AMD: {
max_threads_per_threadgroup = 128;
use_metalrt = info.use_metalrt;
break;
}
case METAL_GPU_APPLE: {
@@ -151,16 +155,6 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
arg_desc_texture.dataType = MTLDataTypeTexture;
arg_desc_texture.access = MTLArgumentAccessReadOnly;
mtlTextureArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_texture ]];
MTLArgumentDescriptor *arg_desc_buffer = [[MTLArgumentDescriptor alloc] init];
arg_desc_buffer.dataType = MTLDataTypePointer;
arg_desc_buffer.access = MTLArgumentAccessReadOnly;
mtlBufferArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_buffer ]];
buffer_bindings_1d = [mtlDevice newBufferWithLength:8192 options:default_storage_mode];
texture_bindings_2d = [mtlDevice newBufferWithLength:8192 options:default_storage_mode];
texture_bindings_3d = [mtlDevice newBufferWithLength:8192 options:default_storage_mode];
stats.mem_alloc(buffer_bindings_1d.allocatedSize + texture_bindings_2d.allocatedSize +
texture_bindings_3d.allocatedSize);
/* command queue for non-tracing work on the GPU */
mtlGeneralCommandQueue = [mtlDevice newCommandQueue];
@@ -185,8 +179,6 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
arg_desc_tex.dataType = MTLDataTypePointer;
arg_desc_tex.access = MTLArgumentAccessReadOnly;
arg_desc_tex.index = index++;
[ancillary_desc addObject:[arg_desc_tex copy]]; /* metal_buf_1d */
arg_desc_tex.index = index++;
[ancillary_desc addObject:[arg_desc_tex copy]]; /* metal_tex_2d */
arg_desc_tex.index = index++;
@@ -232,15 +224,11 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
mtlAncillaryArgEncoder = [mtlDevice newArgumentEncoderWithArguments:ancillary_desc];
// preparing the blas arg encoder
if (@available(macos 11.0, *)) {
if (use_metalrt) {
MTLArgumentDescriptor *arg_desc_blas = [[MTLArgumentDescriptor alloc] init];
arg_desc_blas.dataType = MTLDataTypeInstanceAccelerationStructure;
arg_desc_blas.access = MTLArgumentAccessReadOnly;
mtlBlasArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_blas ]];
[arg_desc_blas release];
}
}
MTLArgumentDescriptor *arg_desc_blas = [[MTLArgumentDescriptor alloc] init];
arg_desc_blas.dataType = MTLDataTypeInstanceAccelerationStructure;
arg_desc_blas.access = MTLArgumentAccessReadOnly;
mtlBlasArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_blas ]];
[arg_desc_blas release];
for (int i = 0; i < ancillary_desc.count; i++) {
[ancillary_desc[i] release];
@@ -260,26 +248,22 @@ MetalDevice::~MetalDevice()
* existing_devices_mutex). */
thread_scoped_lock lock(existing_devices_mutex);
int num_resources = texture_info.size();
for (int res = 0; res < num_resources; res++) {
if (is_texture(texture_info[res])) {
[texture_slot_map[res] release];
texture_slot_map[res] = nil;
for (auto &tex : texture_slot_map) {
if (tex) {
[tex release];
tex = nil;
}
}
flush_delayed_free_list();
if (texture_bindings_2d) {
stats.mem_free(buffer_bindings_1d.allocatedSize + texture_bindings_2d.allocatedSize +
texture_bindings_3d.allocatedSize);
[buffer_bindings_1d release];
stats.mem_free(texture_bindings_2d.allocatedSize + texture_bindings_3d.allocatedSize);
[texture_bindings_2d release];
[texture_bindings_3d release];
}
[mtlTextureArgEncoder release];
[mtlBufferKernelParamsEncoder release];
[mtlBufferArgEncoder release];
[mtlASArgEncoder release];
[mtlAncillaryArgEncoder release];
[mtlGeneralCommandQueue release];
@@ -310,9 +294,7 @@ bool MetalDevice::use_local_atomic_sort() const
return DebugFlags().metal.use_local_atomic_sort;
}
string MetalDevice::preprocess_source(MetalPipelineType pso_type,
const uint kernel_features,
string *source)
void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_features)
{
string global_defines;
if (use_adaptive_compilation()) {
@@ -345,9 +327,6 @@ string MetalDevice::preprocess_source(MetalPipelineType pso_type,
break;
case METAL_GPU_APPLE:
global_defines += "#define __KERNEL_METAL_APPLE__\n";
# ifdef WITH_NANOVDB
global_defines += "#define WITH_NANOVDB\n";
# endif
break;
}
@@ -355,61 +334,6 @@ string MetalDevice::preprocess_source(MetalPipelineType pso_type,
NSOperatingSystemVersion macos_ver = [processInfo operatingSystemVersion];
global_defines += "#define __KERNEL_METAL_MACOS__ " + to_string(macos_ver.majorVersion) + "\n";
/* Replace specific KernelData "dot" dereferences with a Metal function_constant identifier of
* the same character length. Build a string of all active constant values which is then hashed
* in order to identify the PSO.
*/
if (pso_type != PSO_GENERIC) {
if (source) {
const double starttime = time_dt();
# define KERNEL_STRUCT_BEGIN(name, parent) \
string_replace_same_length(*source, "kernel_data." #parent ".", "kernel_data_" #parent "_");
bool next_member_is_specialized = true;
# define KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE next_member_is_specialized = false;
# define KERNEL_STRUCT_MEMBER(parent, _type, name) \
if (!next_member_is_specialized) { \
string_replace( \
*source, "kernel_data_" #parent "_" #name, "kernel_data." #parent ".__unused_" #name); \
next_member_is_specialized = true; \
}
# include "kernel/data_template.h"
# undef KERNEL_STRUCT_MEMBER
# undef KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE
# undef KERNEL_STRUCT_BEGIN
metal_printf("KernelData patching took %.1f ms\n", (time_dt() - starttime) * 1000.0);
}
/* Opt in to all of available specializations. This can be made more granular for the
* PSO_SPECIALIZED_INTERSECT case in order to minimize the number of specialization requests,
* but the overhead should be negligible as these are very quick to (re)build and aren't
* serialized to disk via MTLBinaryArchives.
*/
global_defines += "#define __KERNEL_USE_DATA_CONSTANTS__\n";
}
# if 0
metal_printf("================\n%s================\n",
global_defines.c_str());
# endif
if (source) {
*source = global_defines + *source;
}
MD5Hash md5;
md5.append(global_defines);
return md5.get_hex();
}
void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_features)
{
string &source = this->source[pso_type];
source = "\n#include \"kernel/device/metal/kernel.metal\"\n";
source = path_source_replace_includes(source, path_get("source"));
@@ -418,7 +342,62 @@ void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_feat
* With Metal function constants we can generate a single variant of the kernel source which can
* be repeatedly respecialized.
*/
global_defines_md5[pso_type] = preprocess_source(pso_type, kernel_features, &source);
string baked_constants;
/* Replace specific KernelData "dot" dereferences with a Metal function_constant identifier of
* the same character length. Build a string of all active constant values which is then hashed
* in order to identify the PSO.
*/
if (pso_type != PSO_GENERIC) {
const double starttime = time_dt();
# define KERNEL_STRUCT_BEGIN(name, parent) \
string_replace_same_length(source, "kernel_data." #parent ".", "kernel_data_" #parent "_");
bool next_member_is_specialized = true;
# define KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE next_member_is_specialized = false;
/* Add constants to md5 so that 'get_best_pipeline' is able to return a suitable match. */
# define KERNEL_STRUCT_MEMBER(parent, _type, name) \
if (next_member_is_specialized) { \
baked_constants += string(#parent "." #name "=") + \
to_string(_type(launch_params.data.parent.name)) + "\n"; \
} \
else { \
string_replace( \
source, "kernel_data_" #parent "_" #name, "kernel_data." #parent ".__unused_" #name); \
next_member_is_specialized = true; \
}
# include "kernel/data_template.h"
/* Opt in to all of available specializations. This can be made more granular for the
* PSO_SPECIALIZED_INTERSECT case in order to minimize the number of specialization requests,
* but the overhead should be negligible as these are very quick to (re)build and aren't
* serialized to disk via MTLBinaryArchives.
*/
global_defines += "#define __KERNEL_USE_DATA_CONSTANTS__\n";
metal_printf("KernelData patching took %.1f ms\n", (time_dt() - starttime) * 1000.0);
}
source = global_defines + source;
# if 0
metal_printf("================\n%s================\n\%s================\n",
global_defines.c_str(),
baked_constants.c_str());
# endif
/* Generate an MD5 from the source and include any baked constants. This is used when caching
* PSOs. */
MD5Hash md5;
md5.append(baked_constants);
md5.append(source);
if (use_metalrt) {
md5.append(std::to_string(kernel_features & METALRT_FEATURE_MASK));
}
source_md5[pso_type] = md5.get_hex();
}
bool MetalDevice::load_kernels(const uint _kernel_features)
@@ -452,49 +431,9 @@ bool MetalDevice::load_kernels(const uint _kernel_features)
bool MetalDevice::make_source_and_check_if_compile_needed(MetalPipelineType pso_type)
{
string defines_md5 = preprocess_source(pso_type, kernel_features);
/* Rebuild the source string if the injected block of #defines has changed. */
if (global_defines_md5[pso_type] != defines_md5) {
if (this->source[pso_type].empty()) {
make_source(pso_type, kernel_features);
}
string constant_values;
if (pso_type != PSO_GENERIC) {
bool next_member_is_specialized = true;
# define KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE next_member_is_specialized = false;
/* Add specialization constants to md5 so that 'get_best_pipeline' is able to return a suitable
* match. */
# define KERNEL_STRUCT_MEMBER(parent, _type, name) \
if (next_member_is_specialized) { \
constant_values += string(#parent "." #name "=") + \
to_string(_type(launch_params.data.parent.name)) + "\n"; \
} \
else { \
next_member_is_specialized = true; \
}
# include "kernel/data_template.h"
# undef KERNEL_STRUCT_MEMBER
# undef KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE
# if 0
metal_printf("================\n%s================\n",
constant_values.c_str());
# endif
}
MD5Hash md5;
md5.append(constant_values);
md5.append(source[pso_type]);
if (use_metalrt) {
md5.append(string_printf("metalrt_features=%d", kernel_features & METALRT_FEATURE_MASK));
}
kernels_md5[pso_type] = md5.get_hex();
return MetalDeviceKernels::should_load_kernels(this, pso_type);
}
@@ -581,11 +520,6 @@ void MetalDevice::compile_and_load(int device_id, MetalPipelineType pso_type)
thread_scoped_lock lock(existing_devices_mutex);
if (MetalDevice *instance = get_device_by_ID(device_id, lock)) {
if (mtlLibrary) {
if (error && [error localizedDescription]) {
VLOG_WARNING << "MSL compilation messages: "
<< [[error localizedDescription] UTF8String];
}
instance->mtlLibrary[pso_type] = mtlLibrary;
starttime = time_dt();
@@ -607,11 +541,6 @@ void MetalDevice::compile_and_load(int device_id, MetalPipelineType pso_type)
}
}
bool MetalDevice::is_texture(const TextureInfo &tex)
{
return (tex.depth > 0 || tex.height > 0);
}
void MetalDevice::load_texture_info()
{
if (need_texture_info) {
@@ -623,20 +552,21 @@ void MetalDevice::load_texture_info()
for (int tex = 0; tex < num_textures; tex++) {
uint64_t offset = tex * sizeof(void *);
if (is_texture(texture_info[tex]) && texture_slot_map[tex]) {
id<MTLTexture> metal_texture = texture_slot_map[tex];
id<MTLTexture> metal_texture = texture_slot_map[tex];
if (!metal_texture) {
[mtlTextureArgEncoder setArgumentBuffer:texture_bindings_2d offset:offset];
[mtlTextureArgEncoder setTexture:nil atIndex:0];
[mtlTextureArgEncoder setArgumentBuffer:texture_bindings_3d offset:offset];
[mtlTextureArgEncoder setTexture:nil atIndex:0];
}
else {
MTLTextureType type = metal_texture.textureType;
[mtlTextureArgEncoder setArgumentBuffer:texture_bindings_2d offset:offset];
[mtlTextureArgEncoder setTexture:type == MTLTextureType2D ? metal_texture : nil atIndex:0];
[mtlTextureArgEncoder setArgumentBuffer:texture_bindings_3d offset:offset];
[mtlTextureArgEncoder setTexture:type == MTLTextureType3D ? metal_texture : nil atIndex:0];
}
else {
[mtlTextureArgEncoder setArgumentBuffer:texture_bindings_2d offset:offset];
[mtlTextureArgEncoder setTexture:nil atIndex:0];
[mtlTextureArgEncoder setArgumentBuffer:texture_bindings_3d offset:offset];
[mtlTextureArgEncoder setTexture:nil atIndex:0];
}
}
if (default_storage_mode == MTLResourceStorageModeManaged) {
[texture_bindings_2d didModifyRange:NSMakeRange(0, num_textures * sizeof(void *))];
@@ -655,7 +585,7 @@ void MetalDevice::erase_allocation(device_memory &mem)
if (it != metal_mem_map.end()) {
MetalMem *mmem = it->second.get();
/* blank out reference to MetalMem* in the launch params (fixes crash #94736) */
/* blank out reference to MetalMem* in the launch params (fixes crash T94736) */
if (mmem->pointer_index >= 0) {
device_ptr *pointers = (device_ptr *)&launch_params;
pointers[mmem->pointer_index] = 0;
@@ -809,6 +739,7 @@ void MetalDevice::generic_free(device_memory &mem)
mem.shared_pointer = 0;
/* Free device memory. */
delayed_free_list.push_back(mmem.mtlBuffer);
mmem.mtlBuffer = nil;
}
@@ -930,11 +861,6 @@ void MetalDevice::cancel()
bool MetalDevice::is_ready(string &status) const
{
if (!error_msg.empty()) {
/* Avoid hanging if we had an error. */
return true;
}
int num_loaded = MetalDeviceKernels::get_loaded_kernel_count(this, PSO_GENERIC);
if (num_loaded < DEVICE_KERNEL_NUM) {
status = string_printf("%d / %d render kernels loaded (may take a few minutes the first time)",
@@ -942,17 +868,6 @@ bool MetalDevice::is_ready(string &status) const
DEVICE_KERNEL_NUM);
return false;
}
if (int num_requests = MetalDeviceKernels::num_incomplete_specialization_requests()) {
status = string_printf("%d kernels to optimize", num_requests);
}
else if (kernel_specialization_level == PSO_SPECIALIZED_INTERSECT) {
status = "Using optimized intersection kernels";
}
else if (kernel_specialization_level == PSO_SPECIALIZED_SHADE) {
status = "Using optimized kernels";
}
metal_printf("MetalDevice::is_ready(...) --> true\n");
return true;
}
@@ -989,7 +904,7 @@ void MetalDevice::optimize_for_scene(Scene *scene)
}
if (specialize_in_background) {
if (MetalDeviceKernels::num_incomplete_specialization_requests() == 0) {
if (!MetalDeviceKernels::any_specialization_happening_now()) {
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0),
specialize_kernels_fn);
}
@@ -1059,7 +974,7 @@ void MetalDevice::global_free(device_memory &mem)
void MetalDevice::tex_alloc_as_buffer(device_texture &mem)
{
MetalDevice::MetalMem *mmem = generic_alloc(mem);
generic_alloc(mem);
generic_copy_to(mem);
/* Resize once */
@@ -1068,32 +983,27 @@ void MetalDevice::tex_alloc_as_buffer(device_texture &mem)
/* Allocate some slots in advance, to reduce amount
* of re-allocations. */
texture_info.resize(round_up(slot + 1, 128));
texture_slot_map.resize(round_up(slot + 1, 128));
}
mem.info.data = (uint64_t)mem.device_pointer;
/* Set Mapping and tag that we need to (re-)upload to device */
texture_info[slot] = mem.info;
uint64_t offset = slot * sizeof(void *);
[mtlBufferArgEncoder setArgumentBuffer:buffer_bindings_1d offset:offset];
[mtlBufferArgEncoder setBuffer:mmem->mtlBuffer offset:0 atIndex:0];
texture_info[slot].data = *(uint64_t *)((uint64_t)buffer_bindings_1d.contents + offset);
texture_slot_map[slot] = nil;
need_texture_info = true;
}
void MetalDevice::tex_alloc(device_texture &mem)
{
/* Check that dimensions fit within maximum allowable size.
* If 1D texture is allocated, use 1D buffer.
* See: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf */
if (mem.data_height > 0) {
if (mem.data_width > 16384 || mem.data_height > 16384) {
set_error(string_printf(
"Texture exceeds maximum allowed size of 16384 x 16384 (requested: %zu x %zu)",
mem.data_width,
mem.data_height));
return;
}
if (mem.data_width > 16384 || mem.data_height > 16384) {
set_error(string_printf(
"Texture exceeds maximum allowed size of 16384 x 16384 (requested: %zu x %zu)",
mem.data_width,
mem.data_height));
return;
}
MTLStorageMode storage_mode = MTLStorageModeManaged;
if (@available(macos 10.15, *)) {
if ([mtlDevice hasUnifiedMemory] &&
@@ -1233,9 +1143,8 @@ void MetalDevice::tex_alloc(device_texture &mem)
bytesPerRow:src_pitch];
}
else {
assert(0);
/* 1D texture, using linear memory. */
tex_alloc_as_buffer(mem);
return;
}
mem.device_pointer = (device_ptr)mtlTexture;
@@ -1259,22 +1168,17 @@ void MetalDevice::tex_alloc(device_texture &mem)
ssize_t min_buffer_length = sizeof(void *) * texture_info.size();
if (!texture_bindings_2d || (texture_bindings_2d.length < min_buffer_length)) {
if (texture_bindings_2d) {
delayed_free_list.push_back(buffer_bindings_1d);
delayed_free_list.push_back(texture_bindings_2d);
delayed_free_list.push_back(texture_bindings_3d);
stats.mem_free(buffer_bindings_1d.allocatedSize + texture_bindings_2d.allocatedSize +
texture_bindings_3d.allocatedSize);
stats.mem_free(texture_bindings_2d.allocatedSize + texture_bindings_3d.allocatedSize);
}
buffer_bindings_1d = [mtlDevice newBufferWithLength:min_buffer_length
options:default_storage_mode];
texture_bindings_2d = [mtlDevice newBufferWithLength:min_buffer_length
options:default_storage_mode];
texture_bindings_3d = [mtlDevice newBufferWithLength:min_buffer_length
options:default_storage_mode];
stats.mem_alloc(buffer_bindings_1d.allocatedSize + texture_bindings_2d.allocatedSize +
texture_bindings_3d.allocatedSize);
stats.mem_alloc(texture_bindings_2d.allocatedSize + texture_bindings_3d.allocatedSize);
}
}
@@ -1301,18 +1205,12 @@ void MetalDevice::tex_alloc(device_texture &mem)
void MetalDevice::tex_free(device_texture &mem)
{
if (mem.data_depth == 0 && mem.data_height == 0) {
generic_free(mem);
return;
}
if (metal_mem_map.count(&mem)) {
std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex);
MetalMem &mmem = *metal_mem_map.at(&mem);
assert(texture_slot_map[mem.slot] == mmem.mtlTexture);
if (texture_slot_map[mem.slot] == mmem.mtlTexture)
texture_slot_map[mem.slot] = nil;
texture_slot_map[mem.slot] = nil;
if (mmem.mtlTexture) {
/* Free bindless texture. */

View File

@@ -63,7 +63,8 @@ enum MetalPipelineType {
};
# define METALRT_FEATURE_MASK \
(KERNEL_FEATURE_HAIR | KERNEL_FEATURE_HAIR_THICK | KERNEL_FEATURE_POINTCLOUD)
(KERNEL_FEATURE_HAIR | KERNEL_FEATURE_HAIR_THICK | KERNEL_FEATURE_POINTCLOUD | \
KERNEL_FEATURE_OBJECT_MOTION)
const char *kernel_type_as_string(MetalPipelineType pso_type);
@@ -75,12 +76,12 @@ struct MetalKernelPipeline {
id<MTLLibrary> mtlLibrary = nil;
MetalPipelineType pso_type;
string kernels_md5;
string source_md5;
size_t usage_count = 0;
KernelData kernel_data_;
bool use_metalrt;
uint32_t kernel_features = 0;
uint32_t metalrt_features = 0;
int threads_per_threadgroup;
@@ -103,7 +104,7 @@ struct MetalKernelPipeline {
/* Cache of Metal kernels for each DeviceKernel. */
namespace MetalDeviceKernels {
int num_incomplete_specialization_requests();
bool any_specialization_happening_now();
int get_loaded_kernel_count(MetalDevice const *device, MetalPipelineType pso_type);
bool should_load_kernels(MetalDevice const *device, MetalPipelineType pso_type);
bool load(MetalDevice *device, MetalPipelineType pso_type);

View File

@@ -161,12 +161,25 @@ ShaderCache::~ShaderCache()
running = false;
cond_var.notify_all();
metal_printf("Waiting for ShaderCache threads... (incomplete_requests = %d)\n",
int(incomplete_requests));
int num_incomplete = int(incomplete_requests);
if (num_incomplete) {
/* Shutting down the app with incomplete shader compilation requests. Give 1 second's grace for
* clean shutdown. */
metal_printf("ShaderCache busy (incomplete_requests = %d)...\n", num_incomplete);
std::this_thread::sleep_for(std::chrono::seconds(1));
num_incomplete = int(incomplete_requests);
}
if (num_incomplete && !MetalDeviceKernels::is_benchmark_warmup()) {
metal_printf("ShaderCache still busy (incomplete_requests = %d). Terminating...\n",
num_incomplete);
std::terminate();
}
metal_printf("ShaderCache idle. Shutting down.\n");
for (auto &thread : compile_threads) {
thread.join();
}
metal_printf("ShaderCache shut down.\n");
}
void ShaderCache::wait_for_all()
@@ -279,7 +292,7 @@ bool ShaderCache::should_load_kernel(DeviceKernel device_kernel,
/* check whether the kernel has already been requested / cached */
thread_scoped_lock lock(cache_mutex);
for (auto &pipeline : pipelines[device_kernel]) {
if (pipeline->kernels_md5 == device->kernels_md5[pso_type]) {
if (pipeline->source_md5 == device->source_md5[pso_type]) {
return false;
}
}
@@ -319,7 +332,7 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
memcpy(&pipeline->kernel_data_, &device->launch_params.data, sizeof(pipeline->kernel_data_));
pipeline->pso_type = pso_type;
pipeline->mtlDevice = mtlDevice;
pipeline->kernels_md5 = device->kernels_md5[pso_type];
pipeline->source_md5 = device->source_md5[pso_type];
pipeline->mtlLibrary = device->mtlLibrary[pso_type];
pipeline->device_kernel = device_kernel;
pipeline->threads_per_threadgroup = device->max_threads_per_threadgroup;
@@ -331,7 +344,9 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
/* metalrt options */
pipeline->use_metalrt = device->use_metalrt;
pipeline->kernel_features = device->kernel_features;
pipeline->metalrt_features = device->use_metalrt ?
(device->kernel_features & METALRT_FEATURE_MASK) :
0;
{
thread_scoped_lock lock(cache_mutex);
@@ -342,36 +357,65 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
MetalKernelPipeline *ShaderCache::get_best_pipeline(DeviceKernel kernel, const MetalDevice *device)
{
while (running) {
/* Search all loaded pipelines with matching kernels_md5 checksums. */
MetalKernelPipeline *best_match = nullptr;
/* metalrt options */
bool use_metalrt = device->use_metalrt;
bool device_metalrt_hair = use_metalrt && device->kernel_features & KERNEL_FEATURE_HAIR;
bool device_metalrt_hair_thick = use_metalrt &&
device->kernel_features & KERNEL_FEATURE_HAIR_THICK;
bool device_metalrt_pointcloud = use_metalrt &&
device->kernel_features & KERNEL_FEATURE_POINTCLOUD;
bool device_metalrt_motion = use_metalrt &&
device->kernel_features & KERNEL_FEATURE_OBJECT_MOTION;
MetalKernelPipeline *best_pipeline = nullptr;
while (!best_pipeline) {
{
thread_scoped_lock lock(cache_mutex);
for (auto &candidate : pipelines[kernel]) {
if (candidate->loaded &&
candidate->kernels_md5 == device->kernels_md5[candidate->pso_type]) {
/* Replace existing match if candidate is more specialized. */
if (!best_match || candidate->pso_type > best_match->pso_type) {
best_match = candidate.get();
for (auto &pipeline : pipelines[kernel]) {
if (!pipeline->loaded) {
/* still loading - ignore */
continue;
}
bool pipeline_metalrt_hair = pipeline->metalrt_features & KERNEL_FEATURE_HAIR;
bool pipeline_metalrt_hair_thick = pipeline->metalrt_features & KERNEL_FEATURE_HAIR_THICK;
bool pipeline_metalrt_pointcloud = pipeline->metalrt_features & KERNEL_FEATURE_POINTCLOUD;
bool pipeline_metalrt_motion = use_metalrt &&
pipeline->metalrt_features & KERNEL_FEATURE_OBJECT_MOTION;
if (pipeline->use_metalrt != use_metalrt || pipeline_metalrt_hair != device_metalrt_hair ||
pipeline_metalrt_hair_thick != device_metalrt_hair_thick ||
pipeline_metalrt_pointcloud != device_metalrt_pointcloud ||
pipeline_metalrt_motion != device_metalrt_motion) {
/* wrong combination of metalrt options */
continue;
}
if (pipeline->pso_type != PSO_GENERIC) {
if (pipeline->source_md5 == device->source_md5[PSO_SPECIALIZED_INTERSECT] ||
pipeline->source_md5 == device->source_md5[PSO_SPECIALIZED_SHADE]) {
best_pipeline = pipeline.get();
}
}
else if (!best_pipeline) {
best_pipeline = pipeline.get();
}
}
}
if (best_match) {
if (best_match->usage_count == 0 && best_match->pso_type != PSO_GENERIC) {
metal_printf("Swapping in %s version of %s\n",
kernel_type_as_string(best_match->pso_type),
device_kernel_as_string(kernel));
}
best_match->usage_count += 1;
return best_match;
if (!best_pipeline) {
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
/* Spin until a matching kernel is loaded, or we're shutting down. */
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
return nullptr;
if (best_pipeline->usage_count == 0 && best_pipeline->pso_type != PSO_GENERIC) {
metal_printf("Swapping in %s version of %s\n",
kernel_type_as_string(best_pipeline->pso_type),
device_kernel_as_string(kernel));
}
best_pipeline->usage_count += 1;
return best_pipeline;
}
bool MetalKernelPipeline::should_use_binary_archive() const
@@ -384,12 +428,11 @@ bool MetalKernelPipeline::should_use_binary_archive() const
return false;
}
}
else {
/* Workaround for issues using Binary Archives on non-Apple Silicon systems. */
MetalGPUVendor gpu_vendor = MetalInfo::get_device_vendor(mtlDevice);
if (gpu_vendor != METAL_GPU_APPLE) {
return false;
}
/* Workaround for Intel GPU having issue using Binary Archives */
MetalGPUVendor gpu_vendor = MetalInfo::get_device_vendor(mtlDevice);
if (gpu_vendor == METAL_GPU_INTEL) {
return false;
}
if (pso_type == PSO_GENERIC) {
@@ -397,10 +440,8 @@ bool MetalKernelPipeline::should_use_binary_archive() const
return true;
}
if ((device_kernel >= DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND &&
device_kernel <= DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW) ||
(device_kernel >= DEVICE_KERNEL_SHADER_EVAL_DISPLACE &&
device_kernel <= DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY)) {
if (device_kernel >= DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND &&
device_kernel <= DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW) {
/* Archive all shade kernels - they take a long time to compile. */
return true;
}
@@ -526,14 +567,18 @@ void MetalKernelPipeline::compile()
NSArray *table_functions[METALRT_TABLE_NUM] = {nil};
NSArray *linked_functions = nil;
bool metalrt_hair = use_metalrt && (metalrt_features & KERNEL_FEATURE_HAIR);
bool metalrt_hair_thick = use_metalrt && (metalrt_features & KERNEL_FEATURE_HAIR_THICK);
bool metalrt_pointcloud = use_metalrt && (metalrt_features & KERNEL_FEATURE_POINTCLOUD);
if (use_metalrt) {
id<MTLFunction> curve_intersect_default = nil;
id<MTLFunction> curve_intersect_shadow = nil;
id<MTLFunction> point_intersect_default = nil;
id<MTLFunction> point_intersect_shadow = nil;
if (kernel_features & KERNEL_FEATURE_HAIR) {
if (metalrt_hair) {
/* Add curve intersection programs. */
if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
if (metalrt_hair_thick) {
/* Slower programs for thick hair since that also slows down ribbons.
* Ideally this should not be needed. */
curve_intersect_default = rt_intersection_function[METALRT_FUNC_CURVE_ALL];
@@ -544,7 +589,7 @@ void MetalKernelPipeline::compile()
curve_intersect_shadow = rt_intersection_function[METALRT_FUNC_CURVE_RIBBON_SHADOW];
}
}
if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
if (metalrt_pointcloud) {
point_intersect_default = rt_intersection_function[METALRT_FUNC_POINT];
point_intersect_shadow = rt_intersection_function[METALRT_FUNC_POINT_SHADOW];
}
@@ -621,8 +666,6 @@ void MetalKernelPipeline::compile()
MTLPipelineOption pipelineOptions = MTLPipelineOptionNone;
bool use_binary_archive = should_use_binary_archive();
bool loading_existing_archive = false;
bool creating_new_archive = false;
id<MTLBinaryArchive> archive = nil;
string metalbin_path;
@@ -631,11 +674,20 @@ void MetalKernelPipeline::compile()
NSProcessInfo *processInfo = [NSProcessInfo processInfo];
string osVersion = [[processInfo operatingSystemVersionString] UTF8String];
MD5Hash local_md5;
local_md5.append(kernels_md5);
local_md5.append(source_md5);
local_md5.append(osVersion);
local_md5.append((uint8_t *)&this->threads_per_threadgroup,
sizeof(this->threads_per_threadgroup));
string options;
if (use_metalrt && kernel_has_intersection(device_kernel)) {
/* incorporate any MetalRT specializations into the archive name */
options += string_printf(".hair_%d.hair_thick_%d.pointcloud_%d",
metalrt_hair ? 1 : 0,
metalrt_hair_thick ? 1 : 0,
metalrt_pointcloud ? 1 : 0);
}
/* Replace non-alphanumerical characters with underscores. */
string device_name = [mtlDevice.name UTF8String];
for (char &c : device_name) {
@@ -647,141 +699,77 @@ void MetalKernelPipeline::compile()
metalbin_name = device_name;
metalbin_name = path_join(metalbin_name, device_kernel_as_string(device_kernel));
metalbin_name = path_join(metalbin_name, kernel_type_as_string(pso_type));
metalbin_name = path_join(metalbin_name, local_md5.get_hex() + ".bin");
metalbin_name = path_join(metalbin_name, local_md5.get_hex() + options + ".bin");
metalbin_path = path_cache_get(path_join("kernels", metalbin_name));
path_create_directories(metalbin_path);
/* Check if shader binary exists on disk, and if so, update the file timestamp for LRU purging
* to work as intended. */
loading_existing_archive = path_cache_kernel_exists_and_mark_used(metalbin_path);
creating_new_archive = !loading_existing_archive;
if (@available(macOS 11.0, *)) {
MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
if (loading_existing_archive) {
/* Retrieve shader binary from disk, and update the file timestamp for LRU purging to work as
* intended. */
if (use_binary_archive && path_cache_kernel_exists_and_mark_used(metalbin_path)) {
if (@available(macOS 11.0, *)) {
MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
archiveDesc.url = [NSURL fileURLWithPath:@(metalbin_path.c_str())];
}
NSError *error = nil;
archive = [mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:&error];
if (!archive) {
const char *err = error ? [[error localizedDescription] UTF8String] : nullptr;
metal_printf("newBinaryArchiveWithDescriptor failed: %s\n", err ? err : "nil");
}
[archiveDesc release];
if (loading_existing_archive) {
pipelineOptions = MTLPipelineOptionFailOnBinaryArchiveMiss;
computePipelineStateDescriptor.binaryArchives = [NSArray arrayWithObjects:archive, nil];
archive = [mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil];
[archiveDesc release];
}
}
}
bool recreate_archive = false;
/* Lambda to do the actual pipeline compilation. */
auto do_compilation = [&]() {
__block bool compilation_finished = false;
__block string error_str;
if (loading_existing_archive) {
/* Use the blocking variant of newComputePipelineStateWithDescriptor if an archive exists on
* disk. It should load almost instantaneously, and will fail gracefully when loading a
* corrupt archive (unlike the async variant). */
NSError *error = nil;
pipeline = [mtlDevice newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
options:pipelineOptions
reflection:nullptr
error:&error];
const char *err = error ? [[error localizedDescription] UTF8String] : nullptr;
error_str = err ? err : "nil";
}
else {
/* TODO / MetalRT workaround:
* Workaround for a crash when addComputePipelineFunctionsWithDescriptor is called *after*
* newComputePipelineStateWithDescriptor with linked functions (i.e. with MetalRT enabled).
* Ideally we would like to call newComputePipelineStateWithDescriptor (async) first so we
* can bail out if needed, but we can stop the crash by flipping the order when there are
* linked functions. However when addComputePipelineFunctionsWithDescriptor is called first
* it will block while it builds the pipeline, offering no way of bailing out. */
auto addComputePipelineFunctionsWithDescriptor = [&]() {
if (creating_new_archive && ShaderCache::running) {
NSError *error;
if (![archive addComputePipelineFunctionsWithDescriptor:computePipelineStateDescriptor
error:&error]) {
NSString *errStr = [error localizedDescription];
metal_printf("Failed to add PSO to archive:\n%s\n",
errStr ? [errStr UTF8String] : "nil");
}
}
};
if (linked_functions) {
addComputePipelineFunctionsWithDescriptor();
}
/* Use the async variant of newComputePipelineStateWithDescriptor if no archive exists on
* disk. This allows us to respond to app shutdown. */
[mtlDevice
newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
options:pipelineOptions
completionHandler:^(id<MTLComputePipelineState> computePipelineState,
MTLComputePipelineReflection *reflection,
NSError *error) {
pipeline = computePipelineState;
/* Retain the pipeline so we can use it safely past the completion
* handler. */
if (pipeline) {
[pipeline retain];
}
const char *err = error ?
[[error localizedDescription] UTF8String] :
nullptr;
error_str = err ? err : "nil";
compilation_finished = true;
}];
/* Immediately wait for either the compilation to finish or for app shutdown. */
while (ShaderCache::running && !compilation_finished) {
std::this_thread::sleep_for(std::chrono::milliseconds(5));
}
/* Add pipeline into the new archive (unless we did it earlier). */
if (pipeline && !linked_functions) {
addComputePipelineFunctionsWithDescriptor();
__block bool creating_new_archive = false;
if (@available(macOS 11.0, *)) {
if (use_binary_archive) {
if (!archive) {
MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
archiveDesc.url = nil;
archive = [mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil];
creating_new_archive = true;
}
computePipelineStateDescriptor.binaryArchives = [NSArray arrayWithObjects:archive, nil];
pipelineOptions = MTLPipelineOptionFailOnBinaryArchiveMiss;
}
if (!pipeline) {
metal_printf(
"newComputePipelineStateWithDescriptor failed for \"%s\"%s. "
"Error:\n%s\n",
device_kernel_as_string((DeviceKernel)device_kernel),
(archive && !recreate_archive) ? " Archive may be incomplete or corrupt - attempting "
"recreation.." :
"",
error_str.c_str());
}
};
}
double starttime = time_dt();
do_compilation();
/* Block on load to ensure we continue with a valid kernel function */
if (creating_new_archive) {
starttime = time_dt();
NSError *error;
if (![archive addComputePipelineFunctionsWithDescriptor:computePipelineStateDescriptor
error:&error]) {
NSString *errStr = [error localizedDescription];
metal_printf("Failed to add PSO to archive:\n%s\n", errStr ? [errStr UTF8String] : "nil");
}
}
/* An archive might have a corrupt entry and fail to materialize the pipeline. This shouldn't
* happen, but if it does we recreate it. */
pipeline = [mtlDevice newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
options:pipelineOptions
reflection:nullptr
error:&error];
bool recreate_archive = false;
if (pipeline == nil && archive) {
NSString *errStr = [error localizedDescription];
metal_printf(
"Failed to create compute pipeline state \"%s\" from archive - attempting recreation... "
"(error: %s)\n",
device_kernel_as_string((DeviceKernel)device_kernel),
errStr ? [errStr UTF8String] : "nil");
pipeline = [mtlDevice newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
options:MTLPipelineOptionNone
reflection:nullptr
error:&error];
recreate_archive = true;
pipelineOptions = MTLPipelineOptionNone;
path_remove(metalbin_path);
do_compilation();
}
double duration = time_dt() - starttime;
if (pipeline == nil) {
NSString *errStr = [error localizedDescription];
error_str = string_printf("Failed to create compute pipeline state \"%s\", error: \n",
device_kernel_as_string((DeviceKernel)device_kernel));
error_str += (errStr ? [errStr UTF8String] : "nil");
metal_printf("%16s | %2d | %-55s | %7.2fs | FAILED!\n",
kernel_type_as_string(pso_type),
device_kernel,
@@ -801,8 +789,7 @@ void MetalKernelPipeline::compile()
if (creating_new_archive || recreate_archive) {
if (![archive serializeToURL:[NSURL fileURLWithPath:@(metalbin_path.c_str())]
error:&error]) {
metal_printf("Failed to save binary archive to %s, error:\n%s\n",
metalbin_path.c_str(),
metal_printf("Failed to save binary archive, error:\n%s\n",
[[error localizedDescription] UTF8String]);
}
else {
@@ -870,15 +857,16 @@ void MetalDeviceKernels::wait_for_all()
}
}
int MetalDeviceKernels::num_incomplete_specialization_requests()
bool MetalDeviceKernels::any_specialization_happening_now()
{
/* Return true if any ShaderCaches have ongoing specialization requests (typically there will be
* only 1). */
int total = 0;
for (int i = 0; i < g_shaderCacheCount; i++) {
total += g_shaderCache[i].second->incomplete_specialization_requests;
if (g_shaderCache[i].second->incomplete_specialization_requests > 0) {
return true;
}
}
return total;
return false;
}
int MetalDeviceKernels::get_loaded_kernel_count(MetalDevice const *device,

View File

@@ -278,7 +278,7 @@ int MetalDeviceQueue::num_concurrent_states(const size_t state_size) const
if (metal_device_->device_vendor == METAL_GPU_APPLE) {
result *= 4;
/* Increasing the state count doesn't notably benefit M1-family systems. */
/* Increasing the state count doesn't notably benefit M1-family systems. */
if (MetalInfo::get_apple_gpu_architecture(metal_device_->mtlDevice) != APPLE_M1) {
size_t system_ram = system_physical_ram();
size_t allocated_so_far = [metal_device_->mtlDevice currentAllocatedSize];
@@ -477,21 +477,17 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
[metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->texture_bindings_3d
offset:0
atIndex:1];
[metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->buffer_bindings_1d
offset:0
atIndex:2];
if (@available(macos 12.0, *)) {
if (metal_device_->use_metalrt) {
if (metal_device_->bvhMetalRT) {
id<MTLAccelerationStructure> accel_struct = metal_device_->bvhMetalRT->accel_struct;
[metal_device_->mtlAncillaryArgEncoder setAccelerationStructure:accel_struct atIndex:3];
[metal_device_->mtlAncillaryArgEncoder setAccelerationStructure:accel_struct atIndex:2];
[metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->blas_buffer
offset:0
atIndex:8];
atIndex:7];
[metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->blas_lookup_buffer
offset:0
atIndex:9];
atIndex:8];
}
for (int table = 0; table < METALRT_TABLE_NUM; table++) {
@@ -501,13 +497,13 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
atIndex:1];
[metal_device_->mtlAncillaryArgEncoder
setIntersectionFunctionTable:metal_kernel_pso->intersection_func_table[table]
atIndex:4 + table];
atIndex:3 + table];
[mtlComputeCommandEncoder useResource:metal_kernel_pso->intersection_func_table[table]
usage:MTLResourceUsageRead];
}
else {
[metal_device_->mtlAncillaryArgEncoder setIntersectionFunctionTable:nil
atIndex:4 + table];
atIndex:3 + table];
}
}
}
@@ -878,7 +874,6 @@ void MetalDeviceQueue::prepare_resources(DeviceKernel kernel)
/* ancillaries */
[mtlComputeEncoder_ useResource:metal_device_->texture_bindings_2d usage:MTLResourceUsageRead];
[mtlComputeEncoder_ useResource:metal_device_->texture_bindings_3d usage:MTLResourceUsageRead];
[mtlComputeEncoder_ useResource:metal_device_->buffer_bindings_1d usage:MTLResourceUsageRead];
}
id<MTLComputeCommandEncoder> MetalDeviceQueue::get_compute_encoder(DeviceKernel kernel)

View File

@@ -103,7 +103,7 @@ vector<id<MTLDevice>> const &MetalInfo::get_usable_devices()
}
/* If the system has both an AMD GPU (discrete) and an Intel one (integrated), prefer the AMD
* one. This can be overridden with CYCLES_METAL_FORCE_INTEL. */
* one. This can be overriden with CYCLES_METAL_FORCE_INTEL. */
bool has_usable_amd_gpu = false;
if (@available(macos 12.3, *)) {
for (id<MTLDevice> device in MTLCopyAllDevices()) {

View File

@@ -1437,9 +1437,6 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
BVHOptiX *const blas = static_cast<BVHOptiX *>(ob->get_geometry()->bvh);
OptixTraversableHandle handle = blas->traversable_handle;
if (handle == 0) {
continue;
}
OptixInstance &instance = instances[num_instances++];
memset(&instance, 0, sizeof(instance));

View File

@@ -1343,7 +1343,7 @@ void PathTrace::guiding_prepare_structures()
* per update to be limited, for reproducible results and reasonable training size.
*
* Idea: we could stochastically discard samples with a probability of 1/num_samples_per_update
* we can then update only after the num_samples_per_update iterations are rendered. */
* we can then update only after the num_samples_per_update iterations are rendered. */
render_scheduler_.set_limit_samples_per_update(4);
}
else {

View File

@@ -94,7 +94,7 @@ class PathTrace {
void set_adaptive_sampling(const AdaptiveSampling &adaptive_sampling);
/* Set the parameters for guiding.
* Use to setup the guiding structures before each rendering iteration. */
* Use to setup the guiding structures before each rendering iteration.*/
void set_guiding_params(const GuidingParams &params, const bool reset);
/* Sets output driver for render buffer output. */
@@ -119,7 +119,7 @@ class PathTrace {
*/
void cancel();
/* Copy an entire render buffer to/from the path trace. */
/* Copy an entire render buffer to/from the path trace. */
/* Copy happens via CPU side buffer: data will be copied from every device of the path trace, and
* the data will be copied to the device of the given render buffers. */
@@ -294,7 +294,7 @@ class PathTrace {
* rendering iteration. */
unique_ptr<openpgl::cpp::SampleStorage> guiding_sample_data_storage_;
/* The number of already performed training iterations for the guiding field. */
/* The number of already performed training iterations for the guiding field.*/
int guiding_update_count = 0;
#endif

Some files were not shown because too many files have changed in this diff Show More