Compare commits
11 Commits
realtime-c
...
tmp-volume
Author | SHA1 | Date | |
---|---|---|---|
ca820c3ebd | |||
ab223d1f8e | |||
b8866eeeb3 | |||
616ff437ea | |||
b2c869f6a9 | |||
54ece291d3 | |||
b6db27fe24 | |||
fbfa83caba | |||
32c173be45 | |||
77c550cc43 | |||
6b7d893107 |
8
.arcconfig
Normal file
8
.arcconfig
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"project_id" : "Blender",
|
||||
"conduit_uri" : "https://developer.blender.org/",
|
||||
"phabricator.uri" : "https://developer.blender.org/",
|
||||
"git.default-relative-commit" : "origin/master",
|
||||
"arc.land.update.default" : "rebase",
|
||||
"arc.land.onto.default" : "master"
|
||||
}
|
@@ -236,8 +236,6 @@ ForEachMacros:
|
||||
- LOOP_UNSELECTED_POINTS
|
||||
- LOOP_VISIBLE_KEYS
|
||||
- LOOP_VISIBLE_POINTS
|
||||
- LIGHT_FOREACH_BEGIN_DIRECTIONAL
|
||||
- LIGHT_FOREACH_BEGIN_LOCAL
|
||||
- LISTBASE_CIRCULAR_BACKWARD_BEGIN
|
||||
- LISTBASE_CIRCULAR_FORWARD_BEGIN
|
||||
- LISTBASE_FOREACH
|
||||
|
@@ -2,4 +2,4 @@ ${CommitTitle}
|
||||
|
||||
${CommitBody}
|
||||
|
||||
Pull Request: https://projects.blender.org/blender/blender/pulls/${PullRequestIndex}
|
||||
Pull Request #${PullRequestIndex}
|
||||
|
@@ -1,3 +1,3 @@
|
||||
${PullRequestTitle}
|
||||
|
||||
Pull Request: https://projects.blender.org/blender/blender/pulls/${PullRequestIndex}
|
||||
Pull Request #${PullRequestIndex}
|
||||
|
@@ -1,9 +1,9 @@
|
||||
name: Bug Report
|
||||
about: File a bug report
|
||||
labels:
|
||||
- "Type/Report"
|
||||
- "Status/Needs Triage"
|
||||
- "Priority/Normal"
|
||||
- "type::Report"
|
||||
- "status::Needs Triage"
|
||||
- "priority::Normal"
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
|
@@ -1,7 +1,7 @@
|
||||
name: Design
|
||||
about: Create a design task (for developers only)
|
||||
labels:
|
||||
- "Type/Design"
|
||||
- "type::Design"
|
||||
body:
|
||||
- type: textarea
|
||||
id: body
|
||||
|
@@ -1,7 +1,7 @@
|
||||
name: To Do
|
||||
about: Create a to do task (for developers only)
|
||||
labels:
|
||||
- "Type/To Do"
|
||||
- "type::To Do"
|
||||
body:
|
||||
- type: textarea
|
||||
id: body
|
||||
|
3
.github/pull_request_template.md
vendored
3
.github/pull_request_template.md
vendored
@@ -1,4 +1,5 @@
|
||||
This repository is only used as a mirror. Blender development happens on projects.blender.org.
|
||||
This repository is only used as a mirror of git.blender.org. Blender development happens on
|
||||
https://developer.blender.org.
|
||||
|
||||
To get started with contributing code, please see:
|
||||
https://wiki.blender.org/wiki/Process/Contributing_Code
|
||||
|
3
.github/stale.yml
vendored
3
.github/stale.yml
vendored
@@ -15,7 +15,8 @@ staleLabel: stale
|
||||
# Comment to post when closing a stale Issue or Pull Request.
|
||||
closeComment: >
|
||||
This issue has been automatically closed, because this repository is only
|
||||
used as a mirror. Blender development happens on projects.blender.org.
|
||||
used as a mirror of git.blender.org. Blender development happens on
|
||||
developer.blender.org.
|
||||
|
||||
To get started contributing code, please read:
|
||||
https://wiki.blender.org/wiki/Process/Contributing_Code
|
||||
|
27
.gitignore
vendored
27
.gitignore
vendored
@@ -39,7 +39,7 @@ Desktop.ini
|
||||
/doc/python_api/rst/bmesh.ops.rst
|
||||
|
||||
# in-source lib downloads
|
||||
/build_files/build_environment/downloads/
|
||||
/build_files/build_environment/downloads
|
||||
|
||||
# in-source buildbot signing configuration
|
||||
/build_files/buildbot/codesign/config_server.py
|
||||
@@ -48,27 +48,4 @@ Desktop.ini
|
||||
waveletNoiseTile.bin
|
||||
|
||||
# testing environment
|
||||
/Testing/
|
||||
|
||||
# Translations.
|
||||
/locale/user-config.py
|
||||
|
||||
# External repositories.
|
||||
/scripts/addons/
|
||||
/scripts/addons_contrib/
|
||||
|
||||
# Ignore old submodules directories.
|
||||
# Eventually need to get rid of those, but for the first time of transition
|
||||
# avoid indidents when the folders exists after bisect and developers staging
|
||||
# them by accident.
|
||||
/release/scripts/addons/
|
||||
/release/datafiles/locale/
|
||||
/release/scripts/addons_contrib/
|
||||
/source/tools/
|
||||
|
||||
# Build files for VS and VS Code.
|
||||
/build/
|
||||
/out/
|
||||
CMakeSettings.json
|
||||
CMakePresets.json
|
||||
CMakeUserPresets.json
|
||||
/Testing
|
||||
|
20
.gitmodules
vendored
Normal file
20
.gitmodules
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
[submodule "release/scripts/addons"]
|
||||
path = release/scripts/addons
|
||||
url = ../blender-addons.git
|
||||
branch = master
|
||||
ignore = all
|
||||
[submodule "release/scripts/addons_contrib"]
|
||||
path = release/scripts/addons_contrib
|
||||
url = ../blender-addons-contrib.git
|
||||
branch = master
|
||||
ignore = all
|
||||
[submodule "release/datafiles/locale"]
|
||||
path = release/datafiles/locale
|
||||
url = ../blender-translations.git
|
||||
branch = master
|
||||
ignore = all
|
||||
[submodule "source/tools"]
|
||||
path = source/tools
|
||||
url = ../blender-dev-tools.git
|
||||
branch = master
|
||||
ignore = all
|
@@ -331,6 +331,7 @@ option(WITH_MOD_REMESH "Enable Remesh Modifier" ON)
|
||||
option(WITH_MOD_OCEANSIM "Enable Ocean Modifier" ON)
|
||||
|
||||
# Image format support
|
||||
option(WITH_OPENIMAGEIO "Enable OpenImageIO Support (http://www.openimageio.org)" ON)
|
||||
option(WITH_IMAGE_OPENEXR "Enable OpenEXR Support (http://www.openexr.com)" ON)
|
||||
option(WITH_IMAGE_OPENJPEG "Enable OpenJpeg Support (http://www.openjpeg.org)" ON)
|
||||
option(WITH_IMAGE_TIFF "Enable LibTIFF Support" ON)
|
||||
@@ -357,7 +358,6 @@ option(WITH_MATERIALX "Enable MaterialX Support" OFF)
|
||||
# Disable opencollada when we don't have precompiled libs
|
||||
option(WITH_OPENCOLLADA "Enable OpenCollada Support (http://www.opencollada.org)" ON)
|
||||
option(WITH_IO_WAVEFRONT_OBJ "Enable Wavefront-OBJ 3D file format support (*.obj)" ON)
|
||||
option(WITH_IO_PLY "Enable PLY 3D file format support (*.ply)" ON)
|
||||
option(WITH_IO_STL "Enable STL 3D file format support (*.stl)" ON)
|
||||
option(WITH_IO_GPENCIL "Enable grease-pencil file format IO (*.svg, *.pdf)" ON)
|
||||
|
||||
@@ -524,7 +524,7 @@ endif()
|
||||
if(NOT APPLE)
|
||||
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" ON)
|
||||
option(WITH_CYCLES_HIP_BINARIES "Build Cycles AMD HIP binaries" OFF)
|
||||
set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx906 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
|
||||
set(CYCLES_HIP_BINARIES_ARCH gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
|
||||
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
|
||||
mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
|
||||
endif()
|
||||
@@ -617,20 +617,16 @@ endif()
|
||||
|
||||
option(WITH_OPENGL "When off limits visibility of the opengl headers to just bf_gpu and gawain (temporary option for development purposes)" ON)
|
||||
option(WITH_GPU_BUILDTIME_SHADER_BUILDER "Shader builder is a developer option enabling linting on GLSL during compilation" OFF)
|
||||
option(WITH_RENDERDOC "Use Renderdoc API to capture frames" OFF)
|
||||
|
||||
mark_as_advanced(
|
||||
WITH_OPENGL
|
||||
WITH_GPU_BUILDTIME_SHADER_BUILDER
|
||||
WITH_RENDERDOC
|
||||
)
|
||||
|
||||
# Vulkan
|
||||
option(WITH_VULKAN_BACKEND "Enable Vulkan as graphics backend (only for development)" OFF)
|
||||
option(WITH_VULKAN_GUARDEDALLOC "Use guardedalloc for host allocations done inside Vulkan (development option)" OFF)
|
||||
mark_as_advanced(
|
||||
WITH_VULKAN_BACKEND
|
||||
WITH_VULKAN_GUARDEDALLOC
|
||||
)
|
||||
|
||||
# Metal
|
||||
@@ -894,6 +890,8 @@ set_and_warn_dependency(WITH_IMAGE_TIFF WITH_HARU OFF)
|
||||
|
||||
# auto enable openimageio for cycles
|
||||
if(WITH_CYCLES)
|
||||
set(WITH_OPENIMAGEIO ON)
|
||||
|
||||
# auto enable llvm for cycles_osl
|
||||
if(WITH_CYCLES_OSL)
|
||||
set(WITH_LLVM ON CACHE BOOL "" FORCE)
|
||||
@@ -954,6 +952,21 @@ endif()
|
||||
# -----------------------------------------------------------------------------
|
||||
# Check if Sub-modules are Cloned
|
||||
|
||||
if(WITH_INTERNATIONAL)
|
||||
file(GLOB RESULT "${CMAKE_SOURCE_DIR}/release/datafiles/locale")
|
||||
list(LENGTH RESULT DIR_LEN)
|
||||
if(DIR_LEN EQUAL 0)
|
||||
message(
|
||||
WARNING
|
||||
"Translation path '${CMAKE_SOURCE_DIR}/release/datafiles/locale' is missing, "
|
||||
"This is a 'git submodule', which are known not to work with bridges to other version "
|
||||
"control systems."
|
||||
)
|
||||
set(TRANSLATIONS_FOUND OFF)
|
||||
set_and_warn_library_found("Translations" TRANSLATIONS_FOUND WITH_INTERNATIONAL)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WITH_PYTHON)
|
||||
# While we have this as an '#error' in 'bpy_capi_utils.h',
|
||||
# upgrading Python tends to cause confusion for users who build.
|
||||
@@ -969,14 +982,14 @@ if(WITH_PYTHON)
|
||||
)
|
||||
endif()
|
||||
|
||||
file(GLOB RESULT "${CMAKE_SOURCE_DIR}/scripts/addons")
|
||||
file(GLOB RESULT "${CMAKE_SOURCE_DIR}/release/scripts/addons")
|
||||
list(LENGTH RESULT DIR_LEN)
|
||||
if(DIR_LEN EQUAL 0)
|
||||
message(
|
||||
WARNING
|
||||
"Addons path '${CMAKE_SOURCE_DIR}/scripts/addons' is missing. "
|
||||
"This is an external repository which needs to be checked out. Use `make update` to do so. "
|
||||
"* CONTINUING WITHOUT ADDONS *"
|
||||
"Addons path '${CMAKE_SOURCE_DIR}/release/scripts/addons' is missing, "
|
||||
"This is a 'git submodule', which are known not to work with bridges to other version "
|
||||
"control systems: * CONTINUING WITHOUT ADDONS *"
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
@@ -1085,6 +1098,13 @@ if(NOT WITH_FFTW3 AND WITH_MOD_OCEANSIM)
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES)
|
||||
if(NOT WITH_OPENIMAGEIO)
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"Cycles requires WITH_OPENIMAGEIO, the library may not have been found. "
|
||||
"Configure OIIO or disable WITH_CYCLES"
|
||||
)
|
||||
endif()
|
||||
if(WITH_CYCLES_OSL)
|
||||
if(NOT WITH_LLVM)
|
||||
message(
|
||||
@@ -1557,9 +1577,6 @@ elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
# add_check_c_compiler_flag(C_WARNINGS C_WARN_UNUSED_MACROS -Wunused-macros)
|
||||
# add_check_cxx_compiler_flag(CXX_WARNINGS CXX_WARN_UNUSED_MACROS -Wunused-macros)
|
||||
|
||||
add_check_c_compiler_flag(C_WARNINGS C_WARN_ERROR_UNGUARDED_AVAILABILITY_NEW -Werror=unguarded-availability-new)
|
||||
add_check_c_compiler_flag(CXX_WARNINGS CXX_WARN_ERROR_UNGUARDED_AVAILABILITY_NEW -Werror=unguarded-availability-new)
|
||||
|
||||
# ---------------------
|
||||
# Suppress Strict Flags
|
||||
|
||||
@@ -1612,7 +1629,6 @@ elseif(CMAKE_C_COMPILER_ID MATCHES "MSVC")
|
||||
# warning level:
|
||||
"/W3"
|
||||
"/w34062" # switch statement contains 'default' but no 'case' labels
|
||||
"/w34100" # 'identifier' : unreferenced formal parameter
|
||||
"/w34115" # 'type' : named type definition in parentheses
|
||||
"/w34189" # local variable is initialized but not referenced
|
||||
# see https://docs.microsoft.com/en-us/cpp/error-messages/compiler-warnings/c5038?view=vs-2017
|
||||
@@ -1939,6 +1955,7 @@ if(FIRST_RUN)
|
||||
info_cfg_option(WITH_IMAGE_OPENEXR)
|
||||
info_cfg_option(WITH_IMAGE_OPENJPEG)
|
||||
info_cfg_option(WITH_IMAGE_TIFF)
|
||||
info_cfg_option(WITH_OPENIMAGEIO)
|
||||
|
||||
info_cfg_text("Audio:")
|
||||
info_cfg_option(WITH_CODEC_AVI)
|
||||
|
36
GNUmakefile
36
GNUmakefile
@@ -69,7 +69,7 @@ Static Source Code Checking
|
||||
* check_cmake: Runs our own cmake file checker which detects errors in the cmake file list definitions.
|
||||
* check_pep8: Checks all Python script are pep8 which are tagged to use the stricter formatting.
|
||||
* check_mypy: Checks all Python scripts using mypy,
|
||||
see: tools/check_source/check_mypy_config.py scripts which are included.
|
||||
see: source/tools/check_source/check_mypy_config.py scripts which are included.
|
||||
|
||||
Documentation Checking
|
||||
|
||||
@@ -85,7 +85,7 @@ Spell Checkers
|
||||
* check_spelling_osl: Check for spelling errors (OSL only).
|
||||
* check_spelling_py: Check for spelling errors (Python only).
|
||||
|
||||
Note: an additional word-list is maintained at: 'tools/check_source/check_spelling_c_config.py'
|
||||
Note: an additional word-list is maintained at: 'source/tools/check_source/check_spelling_c_config.py'
|
||||
|
||||
Note: that spell checkers can take a 'CHECK_SPELLING_CACHE' filepath argument,
|
||||
so re-running does not need to re-check unchanged files.
|
||||
@@ -299,11 +299,7 @@ else
|
||||
ifneq ("$(wildcard $(DEPS_BUILD_DIR)/build.ninja)","")
|
||||
DEPS_BUILD_COMMAND:=ninja
|
||||
else
|
||||
ifeq ($(OS), Darwin)
|
||||
DEPS_BUILD_COMMAND:=make -s
|
||||
else
|
||||
DEPS_BUILD_COMMAND:="$(BLENDER_DIR)/build_files/build_environment/linux/make_deps_wrapper.sh" -s
|
||||
endif
|
||||
DEPS_BUILD_COMMAND:=make -s
|
||||
endif
|
||||
endif
|
||||
|
||||
@@ -402,7 +398,7 @@ endif
|
||||
|
||||
deps: .FORCE
|
||||
@echo
|
||||
@echo Configuring dependencies in \"$(DEPS_BUILD_DIR)\", install to \"$(DEPS_INSTALL_DIR)\"
|
||||
@echo Configuring dependencies in \"$(DEPS_BUILD_DIR)\"
|
||||
|
||||
@cmake -H"$(DEPS_SOURCE_DIR)" \
|
||||
-B"$(DEPS_BUILD_DIR)" \
|
||||
@@ -490,22 +486,22 @@ check_smatch: .FORCE
|
||||
$(PYTHON) "$(BLENDER_DIR)/build_files/cmake/cmake_static_check_smatch.py"
|
||||
|
||||
check_mypy: .FORCE
|
||||
@$(PYTHON) "$(BLENDER_DIR)/tools/check_source/check_mypy.py"
|
||||
@$(PYTHON) "$(BLENDER_DIR)/source/tools/check_source/check_mypy.py"
|
||||
|
||||
check_wiki_file_structure: .FORCE
|
||||
@PYTHONIOENCODING=utf_8 $(PYTHON) \
|
||||
"$(BLENDER_DIR)/tools/check_wiki/check_wiki_file_structure.py"
|
||||
"$(BLENDER_DIR)/source/tools/check_wiki/check_wiki_file_structure.py"
|
||||
|
||||
check_spelling_py: .FORCE
|
||||
@cd "$(BUILD_DIR)" ; \
|
||||
PYTHONIOENCODING=utf_8 $(PYTHON) \
|
||||
"$(BLENDER_DIR)/tools/check_source/check_spelling.py" \
|
||||
"$(BLENDER_DIR)/scripts"
|
||||
"$(BLENDER_DIR)/source/tools/check_source/check_spelling.py" \
|
||||
"$(BLENDER_DIR)/release/scripts"
|
||||
|
||||
check_spelling_c: .FORCE
|
||||
@cd "$(BUILD_DIR)" ; \
|
||||
PYTHONIOENCODING=utf_8 $(PYTHON) \
|
||||
"$(BLENDER_DIR)/tools/check_source/check_spelling.py" \
|
||||
"$(BLENDER_DIR)/source/tools/check_source/check_spelling.py" \
|
||||
--cache-file=$(CHECK_SPELLING_CACHE) \
|
||||
"$(BLENDER_DIR)/source" \
|
||||
"$(BLENDER_DIR)/intern/cycles" \
|
||||
@@ -515,21 +511,21 @@ check_spelling_c: .FORCE
|
||||
check_spelling_osl: .FORCE
|
||||
@cd "$(BUILD_DIR)" ; \
|
||||
PYTHONIOENCODING=utf_8 $(PYTHON) \
|
||||
"$(BLENDER_DIR)/tools/check_source/check_spelling.py" \
|
||||
"$(BLENDER_DIR)/source/tools/check_source/check_spelling.py" \
|
||||
--cache-file=$(CHECK_SPELLING_CACHE) \
|
||||
"$(BLENDER_DIR)/intern/cycles/kernel/shaders"
|
||||
|
||||
check_descriptions: .FORCE
|
||||
@$(BLENDER_BIN) --background -noaudio --factory-startup --python \
|
||||
"$(BLENDER_DIR)/tools/check_source/check_descriptions.py"
|
||||
"$(BLENDER_DIR)/source/tools/check_source/check_descriptions.py"
|
||||
|
||||
check_deprecated: .FORCE
|
||||
@PYTHONIOENCODING=utf_8 $(PYTHON) \
|
||||
tools/check_source/check_deprecated.py
|
||||
source/tools/check_source/check_deprecated.py
|
||||
|
||||
check_licenses: .FORCE
|
||||
@PYTHONIOENCODING=utf_8 $(PYTHON) \
|
||||
"$(BLENDER_DIR)/tools/check_source/check_licenses.py" \
|
||||
"$(BLENDER_DIR)/source/tools/check_source/check_licenses.py" \
|
||||
"--show-headers=$(SHOW_HEADERS)"
|
||||
|
||||
check_pep8: .FORCE
|
||||
@@ -538,7 +534,7 @@ check_pep8: .FORCE
|
||||
|
||||
check_cmake: .FORCE
|
||||
@PYTHONIOENCODING=utf_8 $(PYTHON) \
|
||||
tools/check_source/check_cmake_consistency.py
|
||||
source/tools/check_source/check_cmake_consistency.py
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
@@ -576,8 +572,8 @@ update_code: .FORCE
|
||||
@$(PYTHON) ./build_files/utils/make_update.py --no-libraries
|
||||
|
||||
format: .FORCE
|
||||
@PATH="${LIBDIR}/llvm/bin/:$(PATH)" $(PYTHON) tools/utils_maintenance/clang_format_paths.py $(PATHS)
|
||||
@$(PYTHON) tools/utils_maintenance/autopep8_format_paths.py --autopep8-command="$(AUTOPEP8)" $(PATHS)
|
||||
@PATH="${LIBDIR}/llvm/bin/:$(PATH)" $(PYTHON) source/tools/utils_maintenance/clang_format_paths.py $(PATHS)
|
||||
@$(PYTHON) source/tools/utils_maintenance/autopep8_format_paths.py --autopep8-command="$(AUTOPEP8)" $(PATHS)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
@@ -24,7 +24,7 @@ Development
|
||||
-----------
|
||||
|
||||
- [Build Instructions](https://wiki.blender.org/wiki/Building_Blender)
|
||||
- [Code Review & Bug Tracker](https://projects.blender.org)
|
||||
- [Code Review & Bug Tracker](https://developer.blender.org)
|
||||
- [Developer Forum](https://devtalk.blender.org)
|
||||
- [Developer Documentation](https://wiki.blender.org)
|
||||
|
||||
|
@@ -78,7 +78,12 @@ include(cmake/tbb.cmake)
|
||||
include(cmake/python.cmake)
|
||||
include(cmake/llvm.cmake)
|
||||
include(cmake/osl.cmake)
|
||||
include(cmake/numpy.cmake)
|
||||
option(USE_PIP_NUMPY "Install NumPy using pip wheel instead of building from source" OFF)
|
||||
if(APPLE AND ("${CMAKE_OSX_ARCHITECTURES}" STREQUAL "x86_64"))
|
||||
set(USE_PIP_NUMPY ON)
|
||||
else()
|
||||
include(cmake/numpy.cmake)
|
||||
endif()
|
||||
include(cmake/python_site_packages.cmake)
|
||||
include(cmake/package_python.cmake)
|
||||
include(cmake/openimageio.cmake)
|
||||
|
@@ -10,7 +10,7 @@ ExternalProject_Add(external_epoxy
|
||||
URL_HASH ${EPOXY_HASH_TYPE}=${EPOXY_HASH}
|
||||
PREFIX ${BUILD_DIR}/epoxy
|
||||
PATCH_COMMAND ${PATCH_CMD} -p 1 -N -d ${BUILD_DIR}/epoxy/src/external_epoxy/ < ${PATCH_DIR}/epoxy.diff
|
||||
CONFIGURE_COMMAND ${CONFIGURE_ENV} && ${MESON} setup --prefix ${LIBDIR}/epoxy --default-library ${EPOXY_LIB_TYPE} --libdir lib ${BUILD_DIR}/epoxy/src/external_epoxy-build ${BUILD_DIR}/epoxy/src/external_epoxy -Dtests=false ${MESON_BUILD_TYPE}
|
||||
CONFIGURE_COMMAND ${CONFIGURE_ENV} && ${MESON} setup --prefix ${LIBDIR}/epoxy --default-library ${EPOXY_LIB_TYPE} --libdir lib ${BUILD_DIR}/epoxy/src/external_epoxy-build ${BUILD_DIR}/epoxy/src/external_epoxy -Dtests=false
|
||||
BUILD_COMMAND ninja
|
||||
INSTALL_COMMAND ninja install
|
||||
)
|
||||
|
@@ -9,7 +9,7 @@ ExternalProject_Add(external_fribidi
|
||||
URL_HASH ${FRIBIDI_HASH_TYPE}=${FRIBIDI_HASH}
|
||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||
PREFIX ${BUILD_DIR}/fribidi
|
||||
CONFIGURE_COMMAND ${MESON} setup --prefix ${LIBDIR}/fribidi ${MESON_BUILD_TYPE} -Ddocs=false --default-library static --libdir lib ${BUILD_DIR}/fribidi/src/external_fribidi-build ${BUILD_DIR}/fribidi/src/external_fribidi
|
||||
CONFIGURE_COMMAND ${MESON} setup --prefix ${LIBDIR}/fribidi -Ddocs=false --default-library static --libdir lib ${BUILD_DIR}/fribidi/src/external_fribidi-build ${BUILD_DIR}/fribidi/src/external_fribidi
|
||||
BUILD_COMMAND ninja
|
||||
INSTALL_COMMAND ninja install
|
||||
INSTALL_DIR ${LIBDIR}/fribidi
|
||||
|
@@ -22,7 +22,7 @@ elseif(UNIX AND NOT APPLE)
|
||||
)
|
||||
endif()
|
||||
|
||||
# Boolean crashes with Arm assembly, see #103423.
|
||||
# Boolean crashes with Arm assembly, see T103423.
|
||||
if(BLENDER_PLATFORM_ARM)
|
||||
set(GMP_OPTIONS
|
||||
${GMP_OPTIONS}
|
||||
|
@@ -21,7 +21,6 @@ set(HARFBUZZ_EXTRA_OPTIONS
|
||||
# Only used for command line utilities,
|
||||
# disable as this would add an addition & unnecessary build-dependency.
|
||||
-Dcairo=disabled
|
||||
${MESON_BUILD_TYPE}
|
||||
)
|
||||
|
||||
ExternalProject_Add(external_harfbuzz
|
||||
@@ -60,10 +59,3 @@ if(BUILD_MODE STREQUAL Release AND WIN32)
|
||||
DEPENDEES install
|
||||
)
|
||||
endif()
|
||||
|
||||
if(BUILD_MODE STREQUAL Debug AND WIN32)
|
||||
ExternalProject_Add_Step(external_harfbuzz after_install
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/harfbuzz/lib/libharfbuzz.a ${HARVEST_TARGET}/harfbuzz/lib/libharfbuzz_d.lib
|
||||
DEPENDEES install
|
||||
)
|
||||
endif()
|
||||
|
@@ -40,8 +40,7 @@ ExternalProject_Add(external_igc_llvm
|
||||
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0004-OpenCL-support-cl_ext_float_atomics.patch &&
|
||||
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0005-OpenCL-Add-cl_khr_integer_dot_product.patch &&
|
||||
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0001-Memory-leak-fix-for-Managed-Static-Mutex.patch &&
|
||||
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0002-Remove-repo-name-in-LLVM-IR.patch &&
|
||||
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0003-Add-missing-include-limit-in-benchmark.patch
|
||||
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0002-Remove-repo-name-in-LLVM-IR.patch
|
||||
)
|
||||
add_dependencies(
|
||||
external_igc_llvm
|
||||
@@ -56,6 +55,9 @@ ExternalProject_Add(external_igc_spirv_translator
|
||||
CONFIGURE_COMMAND echo .
|
||||
BUILD_COMMAND echo .
|
||||
INSTALL_COMMAND echo .
|
||||
PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${IGC_SPIRV_TRANSLATOR_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/spirv/0001-update-SPIR-V-headers-for-SPV_INTEL_split_barrier.patch &&
|
||||
${PATCH_CMD} -p 1 -d ${IGC_SPIRV_TRANSLATOR_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/spirv/0002-Add-support-for-split-barriers-extension-SPV_INTEL_s.patch &&
|
||||
${PATCH_CMD} -p 1 -d ${IGC_SPIRV_TRANSLATOR_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/spirv/0003-Support-cl_bf16_conversions.patch
|
||||
)
|
||||
add_dependencies(
|
||||
external_igc_spirv_translator
|
||||
|
@@ -15,7 +15,7 @@ llvm-config = '${LIBDIR}/llvm/bin/llvm-config'"
|
||||
)
|
||||
|
||||
set(MESA_EXTRA_FLAGS
|
||||
${MESON_BUILD_TYPE}
|
||||
-Dbuildtype=release
|
||||
-Dc_args=${MESA_CFLAGS}
|
||||
-Dcpp_args=${MESA_CXXFLAGS}
|
||||
-Dc_link_args=${MESA_LDFLAGS}
|
||||
|
@@ -44,21 +44,13 @@ set(OPENVDB_EXTRA_ARGS
|
||||
# -DLLVM_DIR=${LIBDIR}/llvm/lib/cmake/llvm
|
||||
)
|
||||
|
||||
set(OPENVDB_PATCH ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/openvdb/src/openvdb < ${PATCH_DIR}/openvdb.diff)
|
||||
if(APPLE)
|
||||
set(OPENVDB_PATCH
|
||||
${OPENVDB_PATCH} &&
|
||||
${PATCH_CMD} -p 0 -d ${BUILD_DIR}/openvdb/src/openvdb < ${PATCH_DIR}/openvdb_metal.diff
|
||||
)
|
||||
endif()
|
||||
|
||||
ExternalProject_Add(openvdb
|
||||
URL file://${PACKAGE_DIR}/${OPENVDB_FILE}
|
||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||
URL_HASH ${OPENVDB_HASH_TYPE}=${OPENVDB_HASH}
|
||||
CMAKE_GENERATOR ${PLATFORM_ALT_GENERATOR}
|
||||
PREFIX ${BUILD_DIR}/openvdb
|
||||
PATCH_COMMAND ${OPENVDB_PATCH}
|
||||
PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/openvdb/src/openvdb < ${PATCH_DIR}/openvdb.diff
|
||||
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/openvdb ${DEFAULT_CMAKE_FLAGS} ${OPENVDB_EXTRA_ARGS}
|
||||
INSTALL_DIR ${LIBDIR}/openvdb
|
||||
)
|
||||
|
@@ -16,10 +16,8 @@ message("BuildMode = ${BUILD_MODE}")
|
||||
|
||||
if(BUILD_MODE STREQUAL "Debug")
|
||||
set(LIBDIR ${CMAKE_CURRENT_BINARY_DIR}/Debug)
|
||||
set(MESON_BUILD_TYPE -Dbuildtype=debug)
|
||||
else()
|
||||
set(LIBDIR ${CMAKE_CURRENT_BINARY_DIR}/Release)
|
||||
set(MESON_BUILD_TYPE -Dbuildtype=release)
|
||||
endif()
|
||||
|
||||
set(DOWNLOAD_DIR "${CMAKE_CURRENT_BINARY_DIR}/downloads" CACHE STRING "Path for downloaded files")
|
||||
|
@@ -88,19 +88,6 @@ else()
|
||||
export LDFLAGS=${PYTHON_LDFLAGS} &&
|
||||
export PKG_CONFIG_PATH=${LIBDIR}/ffi/lib/pkgconfig)
|
||||
|
||||
# NOTE: untested on APPLE so far.
|
||||
if(NOT APPLE)
|
||||
set(PYTHON_CONFIGURE_EXTRA_ARGS
|
||||
${PYTHON_CONFIGURE_EXTRA_ARGS}
|
||||
# Used on most release Linux builds (Fedora for e.g.),
|
||||
# increases build times noticeably with the benefit of a modest speedup at runtime.
|
||||
--enable-optimizations
|
||||
# While LTO is OK when building on the same system, it's incompatible across GCC versions,
|
||||
# making it impractical for developers to build against, so keep it disabled.
|
||||
# `--with-lto`
|
||||
)
|
||||
endif()
|
||||
|
||||
ExternalProject_Add(external_python
|
||||
URL file://${PACKAGE_DIR}/${PYTHON_FILE}
|
||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||
|
@@ -38,6 +38,15 @@ ExternalProject_Add(external_python_site_packages
|
||||
--no-binary :all:
|
||||
)
|
||||
|
||||
if(USE_PIP_NUMPY)
|
||||
# Use only wheel (and not build from source) to stop NumPy from linking against buggy
|
||||
# Accelerate framework backend on macOS. Official wheels are built with OpenBLAS.
|
||||
ExternalProject_Add_Step(external_python_site_packages after_install
|
||||
COMMAND ${PYTHON_BINARY} -m pip install --no-cache-dir numpy==${NUMPY_VERSION} --only-binary :all:
|
||||
DEPENDEES install
|
||||
)
|
||||
endif()
|
||||
|
||||
add_dependencies(
|
||||
external_python_site_packages
|
||||
external_python
|
||||
|
@@ -165,9 +165,9 @@ set(OPENMP_URI https://github.com/llvm/llvm-project/releases/download/llvmorg-${
|
||||
set(OPENMP_HASH_TYPE MD5)
|
||||
set(OPENMP_FILE openmp-${OPENMP_VERSION}.src.tar.xz)
|
||||
|
||||
set(OPENIMAGEIO_VERSION v2.4.9.0)
|
||||
set(OPENIMAGEIO_VERSION v2.4.6.0)
|
||||
set(OPENIMAGEIO_URI https://github.com/OpenImageIO/oiio/archive/refs/tags/${OPENIMAGEIO_VERSION}.tar.gz)
|
||||
set(OPENIMAGEIO_HASH 7da92a7d6029921a8599a977ff1efa2a)
|
||||
set(OPENIMAGEIO_HASH c7acc1b9a8fda04ef48f7de1feda4dae)
|
||||
set(OPENIMAGEIO_HASH_TYPE MD5)
|
||||
set(OPENIMAGEIO_FILE OpenImageIO-${OPENIMAGEIO_VERSION}.tar.gz)
|
||||
|
||||
@@ -668,9 +668,9 @@ set(SPIRV_HEADERS_FILE SPIR-V-Headers-${SPIRV_HEADERS_VERSION}.tar.gz)
|
||||
# compiler, the versions used are taken from the following location
|
||||
# https://github.com/intel/intel-graphics-compiler/releases
|
||||
|
||||
set(IGC_VERSION 1.0.13064.7)
|
||||
set(IGC_VERSION 1.0.12149.1)
|
||||
set(IGC_URI https://github.com/intel/intel-graphics-compiler/archive/refs/tags/igc-${IGC_VERSION}.tar.gz)
|
||||
set(IGC_HASH a929abd4cca2b293961ec0437ee4b3b2147bd3b2c8a3c423af78c0c359b2e5ae)
|
||||
set(IGC_HASH 44f67f24e3bc5130f9f062533abf8154782a9d0a992bc19b498639a8521ae836)
|
||||
set(IGC_HASH_TYPE SHA256)
|
||||
set(IGC_FILE igc-${IGC_VERSION}.tar.gz)
|
||||
|
||||
@@ -690,15 +690,15 @@ set(IGC_LLVM_FILE ${IGC_LLVM_VERSION}.tar.gz)
|
||||
#
|
||||
# WARNING WARNING WARNING
|
||||
|
||||
set(IGC_OPENCL_CLANG_VERSION ee31812ea8b89d08c2918f045d11a19bd33525c5)
|
||||
set(IGC_OPENCL_CLANG_VERSION 363a5262d8c7cff3fb28f3bdb5d85c8d7e91c1bb)
|
||||
set(IGC_OPENCL_CLANG_URI https://github.com/intel/opencl-clang/archive/${IGC_OPENCL_CLANG_VERSION}.tar.gz)
|
||||
set(IGC_OPENCL_CLANG_HASH 1db6735bbcfaa31e8a9ba39f121d6bafa806ea8919e9f56782d6aaa67771ddda)
|
||||
set(IGC_OPENCL_CLANG_HASH aa8cf72bb239722ce8ce44f79413c6887ecc8ca18477dd520aa5c4809756da9a)
|
||||
set(IGC_OPENCL_CLANG_HASH_TYPE SHA256)
|
||||
set(IGC_OPENCL_CLANG_FILE opencl-clang-${IGC_OPENCL_CLANG_VERSION}.tar.gz)
|
||||
|
||||
set(IGC_VCINTRINSICS_VERSION v0.11.0)
|
||||
set(IGC_VCINTRINSICS_VERSION v0.5.0)
|
||||
set(IGC_VCINTRINSICS_URI https://github.com/intel/vc-intrinsics/archive/refs/tags/${IGC_VCINTRINSICS_VERSION}.tar.gz)
|
||||
set(IGC_VCINTRINSICS_HASH e5acd5626ce7fa6d41ce154c50ac805eda734ee66af94ef28e680ac2ad81bb9f)
|
||||
set(IGC_VCINTRINSICS_HASH 70bb47c5e32173cf61514941e83ae7c7eb4485e6d2fca60cfa1f50d4f42c41f2)
|
||||
set(IGC_VCINTRINSICS_HASH_TYPE SHA256)
|
||||
set(IGC_VCINTRINSICS_FILE vc-intrinsics-${IGC_VCINTRINSICS_VERSION}.tar.gz)
|
||||
|
||||
@@ -714,9 +714,9 @@ set(IGC_SPIRV_TOOLS_HASH 6e19900e948944243024aedd0a201baf3854b377b9cc7a386553bc1
|
||||
set(IGC_SPIRV_TOOLS_HASH_TYPE SHA256)
|
||||
set(IGC_SPIRV_TOOLS_FILE SPIR-V-Tools-${IGC_SPIRV_TOOLS_VERSION}.tar.gz)
|
||||
|
||||
set(IGC_SPIRV_TRANSLATOR_VERSION d739c01d65ec00dee64dedd40deed805216a7193)
|
||||
set(IGC_SPIRV_TRANSLATOR_VERSION a31ffaeef77e23d500b3ea3d35e0c42ff5648ad9)
|
||||
set(IGC_SPIRV_TRANSLATOR_URI https://github.com/KhronosGroup/SPIRV-LLVM-Translator/archive/${IGC_SPIRV_TRANSLATOR_VERSION}.tar.gz)
|
||||
set(IGC_SPIRV_TRANSLATOR_HASH ddc0cc9ccbe59dadeaf291012d59de142b2e9f2b124dbb634644d39daddaa13e)
|
||||
set(IGC_SPIRV_TRANSLATOR_HASH 9e26c96a45341b8f8af521bacea20e752623346340addd02af95d669f6e89252)
|
||||
set(IGC_SPIRV_TRANSLATOR_HASH_TYPE SHA256)
|
||||
set(IGC_SPIRV_TRANSLATOR_FILE SPIR-V-Translator-${IGC_SPIRV_TRANSLATOR_VERSION}.tar.gz)
|
||||
|
||||
@@ -724,15 +724,15 @@ set(IGC_SPIRV_TRANSLATOR_FILE SPIR-V-Translator-${IGC_SPIRV_TRANSLATOR_VERSION}.
|
||||
### Intel Graphics Compiler DEPS END ###
|
||||
########################################
|
||||
|
||||
set(GMMLIB_VERSION intel-gmmlib-22.3.0)
|
||||
set(GMMLIB_VERSION intel-gmmlib-22.1.8)
|
||||
set(GMMLIB_URI https://github.com/intel/gmmlib/archive/refs/tags/${GMMLIB_VERSION}.tar.gz)
|
||||
set(GMMLIB_HASH c1f33e1519edfc527127baeb0436b783430dfd256c643130169a3a71dc86aff9)
|
||||
set(GMMLIB_HASH bf23e9a3742b4fb98c7666c9e9b29f3219e4b2fb4d831aaf4eed71f5e2d17368)
|
||||
set(GMMLIB_HASH_TYPE SHA256)
|
||||
set(GMMLIB_FILE ${GMMLIB_VERSION}.tar.gz)
|
||||
|
||||
set(OCLOC_VERSION 22.49.25018.21)
|
||||
set(OCLOC_VERSION 22.38.24278)
|
||||
set(OCLOC_URI https://github.com/intel/compute-runtime/archive/refs/tags/${OCLOC_VERSION}.tar.gz)
|
||||
set(OCLOC_HASH 92362dae08b503a34e5d3820ed284198c452bcd5e7504d90eb69887b20492c06)
|
||||
set(OCLOC_HASH db0c542fccd651e6404b15a74d46027f1ce0eda8dc9e25a40cbb6c0faef257ee)
|
||||
set(OCLOC_HASH_TYPE SHA256)
|
||||
set(OCLOC_FILE ocloc-${OCLOC_VERSION}.tar.gz)
|
||||
|
||||
|
@@ -13,7 +13,7 @@ ExternalProject_Add(external_wayland
|
||||
# NOTE: `-lm` is needed for `libxml2` which is a static library that uses `libm.so`,
|
||||
# without this, math symbols such as `floor` aren't found.
|
||||
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env PKG_CONFIG_PATH=${LIBDIR}/expat/lib/pkgconfig:${LIBDIR}/xml2/lib/pkgconfig:${LIBDIR}/ffi/lib/pkgconfig:$PKG_CONFIG_PATH
|
||||
${MESON} --prefix ${LIBDIR}/wayland ${MESON_BUILD_TYPE} -Ddocumentation=false -Dtests=false -D "c_link_args=-L${LIBDIR}/ffi/lib -lm" . ../external_wayland
|
||||
${MESON} --prefix ${LIBDIR}/wayland -Ddocumentation=false -Dtests=false -D "c_link_args=-L${LIBDIR}/ffi/lib -lm" . ../external_wayland
|
||||
BUILD_COMMAND ninja
|
||||
INSTALL_COMMAND ninja install
|
||||
)
|
||||
|
@@ -7,7 +7,7 @@ ExternalProject_Add(external_wayland_protocols
|
||||
PREFIX ${BUILD_DIR}/wayland-protocols
|
||||
# Use `-E` so the `PKG_CONFIG_PATH` can be defined to link against our own WAYLAND.
|
||||
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env PKG_CONFIG_PATH=${LIBDIR}/wayland/lib64/pkgconfig:$PKG_CONFIG_PATH
|
||||
${MESON} --prefix ${LIBDIR}/wayland-protocols ${MESON_BUILD_TYPE} . ../external_wayland_protocols -Dtests=false
|
||||
${MESON} --prefix ${LIBDIR}/wayland-protocols . ../external_wayland_protocols -Dtests=false
|
||||
BUILD_COMMAND ninja
|
||||
INSTALL_COMMAND ninja install
|
||||
)
|
||||
|
@@ -17,13 +17,11 @@ ExternalProject_Add(external_xvidcore
|
||||
INSTALL_DIR ${LIBDIR}/xvidcore
|
||||
)
|
||||
|
||||
if(WIN32)
|
||||
ExternalProject_Add_Step(external_xvidcore after_install
|
||||
COMMAND ${CMAKE_COMMAND} -E rename ${LIBDIR}/xvidcore/lib/xvidcore.a ${LIBDIR}/xvidcore/lib/libxvidcore.a || true
|
||||
COMMAND ${CMAKE_COMMAND} -E remove ${LIBDIR}/xvidcore/lib/xvidcore.dll.a
|
||||
DEPENDEES install
|
||||
)
|
||||
endif()
|
||||
ExternalProject_Add_Step(external_xvidcore after_install
|
||||
COMMAND ${CMAKE_COMMAND} -E rename ${LIBDIR}/xvidcore/lib/xvidcore.a ${LIBDIR}/xvidcore/lib/libxvidcore.a || true
|
||||
COMMAND ${CMAKE_COMMAND} -E remove ${LIBDIR}/xvidcore/lib/xvidcore.dll.a
|
||||
DEPENDEES install
|
||||
)
|
||||
|
||||
if(MSVC)
|
||||
set_target_properties(external_xvidcore PROPERTIES FOLDER Mingw)
|
||||
|
@@ -517,7 +517,7 @@ OPENEXR_FORCE_REBUILD=false
|
||||
OPENEXR_SKIP=false
|
||||
_with_built_openexr=false
|
||||
|
||||
OIIO_VERSION="2.4.9.0"
|
||||
OIIO_VERSION="2.4.6.0"
|
||||
OIIO_VERSION_SHORT="2.4"
|
||||
OIIO_VERSION_MIN="2.2.0"
|
||||
OIIO_VERSION_MEX="2.5.0"
|
||||
@@ -6615,9 +6615,11 @@ print_info() {
|
||||
fi
|
||||
|
||||
if [ -d $INST/oiio ]; then
|
||||
_1="-D OPENIMAGEIO_ROOT_DIR=$INST/oiio"
|
||||
_1="-D WITH_OPENIMAGEIO=ON"
|
||||
_2="-D OPENIMAGEIO_ROOT_DIR=$INST/oiio"
|
||||
PRINT " $_1"
|
||||
_buildargs="$_buildargs $_1"
|
||||
PRINT " $_2"
|
||||
_buildargs="$_buildargs $_1 $_2"
|
||||
fi
|
||||
|
||||
if [ "$OSL_SKIP" = false ]; then
|
||||
|
@@ -1,8 +1,8 @@
|
||||
#!/usr/bin/env bash
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
# This script is part of the official build environment, see wiki page for details.
|
||||
# https://wiki.blender.org/wiki/Building_Blender/Other/Rocky8ReleaseEnvironment
|
||||
# This script is part of the official build environment, see WIKI page for details.
|
||||
# https://wiki.blender.org/wiki/Building_Blender/Other/CentOS7ReleaseEnvironment
|
||||
|
||||
set -e
|
||||
|
||||
@@ -59,7 +59,7 @@ PACKAGES_FOR_LIBS=(
|
||||
automake
|
||||
libtool
|
||||
|
||||
# Used to set rpath on shared libraries
|
||||
# TODO: why is this needed?
|
||||
patchelf
|
||||
|
||||
# Builds generated by meson use Ninja for the actual build.
|
||||
|
@@ -1,74 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
# This script ensures:
|
||||
# - One dependency is built at a time.
|
||||
# - That dependency uses all available cores.
|
||||
#
|
||||
# Without this, simply calling `make -j$(nproc)` from the `${CMAKE_BUILD_DIR}/deps/`
|
||||
# directory will build many projects at once.
|
||||
#
|
||||
# This is undesirable for the following reasons:
|
||||
#
|
||||
# - The output from projects is mixed together,
|
||||
# making it difficult to track down the cause of a build failure.
|
||||
#
|
||||
# - Larger dependencies such as LLVM can bottleneck the build process,
|
||||
# making it necessary to cancel the build and manually run build commands in each directory.
|
||||
#
|
||||
# - Building many projects at once means canceling (Control-C) can lead to the build being in an undefined state.
|
||||
# It's possible canceling happens as a patch is being applied or files are being copied.
|
||||
# (steps that aren't part of the compilation process where it's typically safe to cancel).
|
||||
|
||||
if [[ -z "$MY_MAKE_CALL_LEVEL" ]]; then
|
||||
export MY_MAKE_CALL_LEVEL=0
|
||||
export MY_MAKEFLAGS=$MAKEFLAGS
|
||||
|
||||
# Extract the jobs argument (`-jN`, `-j N`, `--jobs=N`).
|
||||
add_next=0
|
||||
for i in "$@"; do
|
||||
case $i in
|
||||
-j*)
|
||||
export MY_JOBS_ARG=$i
|
||||
if [ "$MY_JOBS_ARG" = "-j" ]; then
|
||||
add_next=1
|
||||
fi
|
||||
;;
|
||||
--jobs=*)
|
||||
shift # past argument=value
|
||||
MY_JOBS_ARG=$i
|
||||
;;
|
||||
*)
|
||||
if (( add_next == 1 )); then
|
||||
MY_JOBS_ARG="$MY_JOBS_ARG $i"
|
||||
add_next=0
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
done
|
||||
unset i add_next
|
||||
|
||||
if [[ -z "$MY_JOBS_ARG" ]]; then
|
||||
MY_JOBS_ARG="-j$(nproc)"
|
||||
fi
|
||||
export MY_JOBS_ARG
|
||||
# Support user defined `MAKEFLAGS`.
|
||||
export MAKEFLAGS="$MY_MAKEFLAGS -j1"
|
||||
else
|
||||
export MY_MAKE_CALL_LEVEL=$(( MY_MAKE_CALL_LEVEL + 1 ))
|
||||
if (( MY_MAKE_CALL_LEVEL == 1 )); then
|
||||
# Important to set jobs to 1, otherwise user defined jobs argument is used.
|
||||
export MAKEFLAGS="$MY_MAKEFLAGS -j1"
|
||||
elif (( MY_MAKE_CALL_LEVEL == 2 )); then
|
||||
# This is the level used by each sub-project.
|
||||
export MAKEFLAGS="$MY_MAKEFLAGS $MY_JOBS_ARG"
|
||||
fi
|
||||
# Else leave `MY_MAKEFLAGS` flags as-is, avoids setting a high number of jobs on recursive
|
||||
# calls (which may easily run out of memory). Let the job-server handle the rest.
|
||||
fi
|
||||
|
||||
# Useful for troubleshooting the wrapper.
|
||||
# echo "Call level: $MY_MAKE_CALL_LEVEL, args=$@".
|
||||
|
||||
# Call actual make but ensure recursive calls run via this script.
|
||||
exec make MAKE="$0" "$@"
|
@@ -1,7 +1,7 @@
|
||||
diff -Naur external_igc_opencl_clang.orig/CMakeLists.txt external_igc_opencl_clang/CMakeLists.txt
|
||||
--- external_igc_opencl_clang.orig/CMakeLists.txt 2022-03-16 05:51:10 -0600
|
||||
+++ external_igc_opencl_clang/CMakeLists.txt 2022-05-23 10:40:09 -0600
|
||||
@@ -147,22 +147,24 @@
|
||||
@@ -126,22 +126,24 @@
|
||||
)
|
||||
endif()
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -80,7 +80,6 @@ set(_CLANG_FIND_COMPONENTS
|
||||
clangAST
|
||||
clangLex
|
||||
clangBasic
|
||||
clangSupport
|
||||
)
|
||||
|
||||
set(_CLANG_LIBRARIES)
|
||||
@@ -95,9 +94,7 @@ foreach(COMPONENT ${_CLANG_FIND_COMPONENTS})
|
||||
PATH_SUFFIXES
|
||||
lib64 lib
|
||||
)
|
||||
if(CLANG_${UPPERCOMPONENT}_LIBRARY)
|
||||
list(APPEND _CLANG_LIBRARIES "${CLANG_${UPPERCOMPONENT}_LIBRARY}")
|
||||
endif()
|
||||
list(APPEND _CLANG_LIBRARIES "${CLANG_${UPPERCOMPONENT}_LIBRARY}")
|
||||
endforeach()
|
||||
|
||||
|
||||
|
@@ -23,19 +23,19 @@ if(EXISTS ${SOURCE_DIR}/.git)
|
||||
|
||||
if(MY_WC_BRANCH STREQUAL "HEAD")
|
||||
# Detached HEAD, check whether commit hash is reachable
|
||||
# in the main branch
|
||||
# in the master branch
|
||||
execute_process(COMMAND git rev-parse --short=12 HEAD
|
||||
WORKING_DIRECTORY ${SOURCE_DIR}
|
||||
OUTPUT_VARIABLE MY_WC_HASH
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
execute_process(COMMAND git branch --list main blender-v* --contains ${MY_WC_HASH}
|
||||
execute_process(COMMAND git branch --list master blender-v* --contains ${MY_WC_HASH}
|
||||
WORKING_DIRECTORY ${SOURCE_DIR}
|
||||
OUTPUT_VARIABLE _git_contains_check
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
if(NOT _git_contains_check STREQUAL "")
|
||||
set(MY_WC_BRANCH "main")
|
||||
set(MY_WC_BRANCH "master")
|
||||
else()
|
||||
execute_process(COMMAND git show-ref --tags -d
|
||||
WORKING_DIRECTORY ${SOURCE_DIR}
|
||||
@@ -48,7 +48,7 @@ if(EXISTS ${SOURCE_DIR}/.git)
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
if(_git_tag_hashes MATCHES "${_git_head_hash}")
|
||||
set(MY_WC_BRANCH "main")
|
||||
set(MY_WC_BRANCH "master")
|
||||
else()
|
||||
execute_process(COMMAND git branch --contains ${MY_WC_HASH}
|
||||
WORKING_DIRECTORY ${SOURCE_DIR}
|
||||
|
@@ -36,7 +36,6 @@ set(WITH_IMAGE_WEBP OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_INPUT_IME OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_INPUT_NDOF OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_INTERNATIONAL OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_IO_PLY OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_IO_STL OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_IO_WAVEFRONT_OBJ OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_IO_GPENCIL OFF CACHE BOOL "" FORCE)
|
||||
@@ -53,6 +52,7 @@ set(WITH_OPENAL OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_OPENCOLLADA OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_OPENCOLORIO OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_OPENIMAGEDENOISE OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_OPENIMAGEIO OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_OPENMP OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_OPENSUBDIV OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_OPENVDB OFF CACHE BOOL "" FORCE)
|
||||
|
@@ -85,7 +85,7 @@ if(NOT APPLE)
|
||||
set(WITH_CYCLES_DEVICE_OPTIX ON CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_HIP_BINARIES OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_HIP_BINARIES ON CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_DEVICE_ONEAPI ON CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_ONEAPI_BINARIES ON CACHE BOOL "" FORCE)
|
||||
endif()
|
||||
|
@@ -11,11 +11,11 @@
|
||||
mkdir ~/blender-git
|
||||
cd ~/blender-git
|
||||
|
||||
git clone https://projects.blender.org/blender/blender.git
|
||||
git clone http://git.blender.org/blender.git
|
||||
cd blender
|
||||
git submodule update --init --recursive
|
||||
git submodule foreach git checkout main
|
||||
git submodule foreach git pull --rebase origin main
|
||||
git submodule foreach git checkout master
|
||||
git submodule foreach git pull --rebase origin master
|
||||
|
||||
# create build dir
|
||||
mkdir ~/blender-git/build-cmake
|
||||
@@ -35,7 +35,7 @@ ln -s ~/blender-git/build-cmake/bin/blender ~/blender-git/blender/blender.bin
|
||||
echo ""
|
||||
echo "* Useful Commands *"
|
||||
echo " Run Blender: ~/blender-git/blender/blender.bin"
|
||||
echo " Update Blender: git pull --rebase; git submodule foreach git pull --rebase origin main"
|
||||
echo " Update Blender: git pull --rebase; git submodule foreach git pull --rebase origin master"
|
||||
echo " Reconfigure Blender: cd ~/blender-git/build-cmake ; cmake ."
|
||||
echo " Build Blender: cd ~/blender-git/build-cmake ; make"
|
||||
echo ""
|
||||
|
@@ -544,7 +544,7 @@ endfunction()
|
||||
function(setup_platform_linker_libs
|
||||
target
|
||||
)
|
||||
# jemalloc must be early in the list, to be before pthread (see #57998).
|
||||
# jemalloc must be early in the list, to be before pthread (see T57998)
|
||||
if(WITH_MEM_JEMALLOC)
|
||||
target_link_libraries(${target} ${JEMALLOC_LIBRARIES})
|
||||
endif()
|
||||
@@ -702,7 +702,6 @@ macro(remove_strict_flags)
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
remove_cc_flag(/w34100) # Restore warn C4100 (unreferenced formal parameter) back to w4
|
||||
remove_cc_flag(/w34189) # Restore warn C4189 (unused variable) back to w4
|
||||
endif()
|
||||
|
||||
@@ -722,7 +721,7 @@ macro(remove_extra_strict_flags)
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
remove_cc_flag(/w34100) # Restore warn C4100 (unreferenced formal parameter) back to w4
|
||||
# TODO
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
@@ -1091,7 +1090,7 @@ function(msgfmt_simple
|
||||
add_custom_command(
|
||||
OUTPUT ${_file_to}
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory ${_file_to_path}
|
||||
COMMAND ${CMAKE_COMMAND} -E env ${PLATFORM_ENV_BUILD} "$<TARGET_FILE:msgfmt>" ${_file_from} ${_file_to}
|
||||
COMMAND "$<TARGET_FILE:msgfmt>" ${_file_from} ${_file_to}
|
||||
DEPENDS msgfmt ${_file_from})
|
||||
|
||||
set_source_files_properties(${_file_to} PROPERTIES GENERATED TRUE)
|
||||
|
@@ -270,7 +270,19 @@ if(WITH_PUGIXML)
|
||||
find_package(PugiXML REQUIRED)
|
||||
endif()
|
||||
|
||||
find_package(OpenImageIO REQUIRED)
|
||||
if(WITH_OPENIMAGEIO)
|
||||
find_package(OpenImageIO)
|
||||
list(APPEND OPENIMAGEIO_LIBRARIES
|
||||
${PNG_LIBRARIES}
|
||||
${JPEG_LIBRARIES}
|
||||
${TIFF_LIBRARY}
|
||||
${OPENEXR_LIBRARIES}
|
||||
${OPENJPEG_LIBRARIES}
|
||||
${ZLIB_LIBRARIES}
|
||||
)
|
||||
set(OPENIMAGEIO_DEFINITIONS "-DOIIO_STATIC_BUILD")
|
||||
set(OPENIMAGEIO_IDIFF "${LIBDIR}/openimageio/bin/idiff")
|
||||
endif()
|
||||
add_bundled_libraries(openimageio/lib)
|
||||
|
||||
if(WITH_OPENCOLORIO)
|
||||
@@ -428,7 +440,7 @@ string(APPEND PLATFORM_LINKFLAGS " -stdlib=libc++")
|
||||
# Make stack size more similar to Embree, required for Embree.
|
||||
string(APPEND PLATFORM_LINKFLAGS_EXECUTABLE " -Wl,-stack_size,0x100000")
|
||||
|
||||
# Suppress ranlib "has no symbols" warnings (workaround for #48250).
|
||||
# Suppress ranlib "has no symbols" warnings (workaround for T48250)
|
||||
set(CMAKE_C_ARCHIVE_CREATE "<CMAKE_AR> Scr <TARGET> <LINK_FLAGS> <OBJECTS>")
|
||||
set(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> Scr <TARGET> <LINK_FLAGS> <OBJECTS>")
|
||||
# llvm-ranlib doesn't support this flag. Xcode's libtool does.
|
||||
|
@@ -438,7 +438,32 @@ if(WITH_IMAGE_WEBP)
|
||||
set_and_warn_library_found("WebP" WEBP_FOUND WITH_IMAGE_WEBP)
|
||||
endif()
|
||||
|
||||
find_package_wrapper(OpenImageIO REQUIRED)
|
||||
if(WITH_OPENIMAGEIO)
|
||||
find_package_wrapper(OpenImageIO)
|
||||
set(OPENIMAGEIO_LIBRARIES
|
||||
${OPENIMAGEIO_LIBRARIES}
|
||||
${PNG_LIBRARIES}
|
||||
${JPEG_LIBRARIES}
|
||||
${ZLIB_LIBRARIES}
|
||||
)
|
||||
|
||||
set(OPENIMAGEIO_DEFINITIONS "")
|
||||
|
||||
if(WITH_BOOST)
|
||||
list(APPEND OPENIMAGEIO_LIBRARIES "${BOOST_LIBRARIES}")
|
||||
endif()
|
||||
if(WITH_IMAGE_TIFF)
|
||||
list(APPEND OPENIMAGEIO_LIBRARIES "${TIFF_LIBRARY}")
|
||||
endif()
|
||||
if(WITH_IMAGE_OPENEXR)
|
||||
list(APPEND OPENIMAGEIO_LIBRARIES "${OPENEXR_LIBRARIES}")
|
||||
endif()
|
||||
if(WITH_IMAGE_WEBP)
|
||||
list(APPEND OPENIMAGEIO_LIBRARIES "${WEBP_LIBRARIES}")
|
||||
endif()
|
||||
|
||||
set_and_warn_library_found("OPENIMAGEIO" OPENIMAGEIO_FOUND WITH_OPENIMAGEIO)
|
||||
endif()
|
||||
add_bundled_libraries(openimageio/lib)
|
||||
|
||||
if(WITH_OPENCOLORIO)
|
||||
|
@@ -121,7 +121,7 @@ if(WITH_WINDOWS_BUNDLE_CRT)
|
||||
include(InstallRequiredSystemLibraries)
|
||||
|
||||
# ucrtbase(d).dll cannot be in the manifest, due to the way windows 10 handles
|
||||
# redirects for this dll, for details see #88813.
|
||||
# redirects for this dll, for details see T88813.
|
||||
foreach(lib ${CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS})
|
||||
string(FIND ${lib} "ucrtbase" pos)
|
||||
if(NOT pos EQUAL -1)
|
||||
@@ -178,8 +178,8 @@ if(NOT MSVC_CLANG)
|
||||
endif()
|
||||
|
||||
if(WITH_WINDOWS_SCCACHE AND CMAKE_VS_MSBUILD_COMMAND)
|
||||
message(WARNING "Disabling sccache, sccache is not supported with msbuild")
|
||||
set(WITH_WINDOWS_SCCACHE OFF)
|
||||
message(WARNING "Disabling sccache, sccache is not supported with msbuild")
|
||||
set(WITH_WINDOWS_SCCACHE OFF)
|
||||
endif()
|
||||
|
||||
# Debug Symbol format
|
||||
@@ -295,7 +295,7 @@ unset(MATERIALX_LIB_FOLDER_EXISTS)
|
||||
if(NOT MSVC_CLANG AND # Available with MSVC 15.7+ but not for CLANG.
|
||||
NOT WITH_WINDOWS_SCCACHE AND # And not when sccache is enabled
|
||||
NOT VS_CLANG_TIDY) # Clang-tidy does not like these options
|
||||
add_compile_options(/experimental:external /external:I "${LIBDIR}" /external:W0)
|
||||
add_compile_options(/experimental:external /external:templates- /external:I "${LIBDIR}" /external:W0)
|
||||
endif()
|
||||
|
||||
# Add each of our libraries to our cmake_prefix_path so find_package() could work
|
||||
@@ -522,28 +522,6 @@ if(WITH_PYTHON)
|
||||
set(PYTHON_LIBRARIES debug "${PYTHON_LIBRARY_DEBUG}" optimized "${PYTHON_LIBRARY}" )
|
||||
endif()
|
||||
|
||||
if(NOT WITH_WINDOWS_FIND_MODULES)
|
||||
# even if boost is off, we still need to install the dlls when we use our lib folder since
|
||||
# some of the other dependencies may need them. For this to work, BOOST_VERSION,
|
||||
# BOOST_POSTFIX, and BOOST_DEBUG_POSTFIX need to be set.
|
||||
set(BOOST ${LIBDIR}/boost)
|
||||
set(BOOST_INCLUDE_DIR ${BOOST}/include)
|
||||
set(BOOST_LIBPATH ${BOOST}/lib)
|
||||
set(BOOST_VERSION_HEADER ${BOOST_INCLUDE_DIR}/boost/version.hpp)
|
||||
if(EXISTS ${BOOST_VERSION_HEADER})
|
||||
file(STRINGS "${BOOST_VERSION_HEADER}" BOOST_LIB_VERSION REGEX "#define BOOST_LIB_VERSION ")
|
||||
if(BOOST_LIB_VERSION MATCHES "#define BOOST_LIB_VERSION \"([0-9_]+)\"")
|
||||
set(BOOST_VERSION "${CMAKE_MATCH_1}")
|
||||
endif()
|
||||
endif()
|
||||
if(NOT BOOST_VERSION)
|
||||
message(FATAL_ERROR "Unable to determine Boost version")
|
||||
endif()
|
||||
set(BOOST_POSTFIX "vc142-mt-x64-${BOOST_VERSION}")
|
||||
set(BOOST_DEBUG_POSTFIX "vc142-mt-gyd-x64-${BOOST_VERSION}")
|
||||
set(BOOST_PREFIX "")
|
||||
endif()
|
||||
|
||||
if(WITH_BOOST)
|
||||
if(WITH_CYCLES AND WITH_CYCLES_OSL)
|
||||
set(boost_extra_libs wave)
|
||||
@@ -559,6 +537,22 @@ if(WITH_BOOST)
|
||||
endif()
|
||||
if(NOT Boost_FOUND)
|
||||
warn_hardcoded_paths(BOOST)
|
||||
set(BOOST ${LIBDIR}/boost)
|
||||
set(BOOST_INCLUDE_DIR ${BOOST}/include)
|
||||
set(BOOST_LIBPATH ${BOOST}/lib)
|
||||
set(BOOST_VERSION_HEADER ${BOOST_INCLUDE_DIR}/boost/version.hpp)
|
||||
if(EXISTS ${BOOST_VERSION_HEADER})
|
||||
file(STRINGS "${BOOST_VERSION_HEADER}" BOOST_LIB_VERSION REGEX "#define BOOST_LIB_VERSION ")
|
||||
if(BOOST_LIB_VERSION MATCHES "#define BOOST_LIB_VERSION \"([0-9_]+)\"")
|
||||
set(BOOST_VERSION "${CMAKE_MATCH_1}")
|
||||
endif()
|
||||
endif()
|
||||
if(NOT BOOST_VERSION)
|
||||
message(FATAL_ERROR "Unable to determine Boost version")
|
||||
endif()
|
||||
set(BOOST_POSTFIX "vc142-mt-x64-${BOOST_VERSION}")
|
||||
set(BOOST_DEBUG_POSTFIX "vc142-mt-gyd-x64-${BOOST_VERSION}")
|
||||
set(BOOST_PREFIX "")
|
||||
# This is file new in 3.4 if it does not exist, assume we are building against 3.3 libs
|
||||
set(BOOST_34_TRIGGER_FILE ${BOOST_LIBPATH}/${BOOST_PREFIX}boost_python310-${BOOST_DEBUG_POSTFIX}.lib)
|
||||
if(NOT EXISTS ${BOOST_34_TRIGGER_FILE})
|
||||
@@ -608,18 +602,25 @@ if(WITH_BOOST)
|
||||
set(BOOST_DEFINITIONS "-DBOOST_ALL_NO_LIB")
|
||||
endif()
|
||||
|
||||
windows_find_package(OpenImageIO)
|
||||
if(NOT OpenImageIO_FOUND)
|
||||
set(OPENIMAGEIO ${LIBDIR}/OpenImageIO)
|
||||
set(OPENIMAGEIO_LIBPATH ${OPENIMAGEIO}/lib)
|
||||
set(OPENIMAGEIO_INCLUDE_DIR ${OPENIMAGEIO}/include)
|
||||
set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO_INCLUDE_DIR})
|
||||
set(OIIO_OPTIMIZED optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO.lib optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util.lib)
|
||||
set(OIIO_DEBUG debug ${OPENIMAGEIO_LIBPATH}/OpenImageIO_d.lib debug ${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util_d.lib)
|
||||
set(OPENIMAGEIO_LIBRARIES ${OIIO_OPTIMIZED} ${OIIO_DEBUG})
|
||||
if(WITH_OPENIMAGEIO)
|
||||
windows_find_package(OpenImageIO)
|
||||
if(NOT OpenImageIO_FOUND)
|
||||
set(OPENIMAGEIO ${LIBDIR}/OpenImageIO)
|
||||
set(OPENIMAGEIO_LIBPATH ${OPENIMAGEIO}/lib)
|
||||
set(OPENIMAGEIO_INCLUDE_DIR ${OPENIMAGEIO}/include)
|
||||
set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO_INCLUDE_DIR})
|
||||
set(OIIO_OPTIMIZED optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO.lib optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util.lib)
|
||||
set(OIIO_DEBUG debug ${OPENIMAGEIO_LIBPATH}/OpenImageIO_d.lib debug ${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util_d.lib)
|
||||
set(OPENIMAGEIO_LIBRARIES ${OIIO_OPTIMIZED} ${OIIO_DEBUG})
|
||||
endif()
|
||||
set(OPENIMAGEIO_DEFINITIONS "-DUSE_TBB=0")
|
||||
set(OPENIMAGEIO_IDIFF "${OPENIMAGEIO}/bin/idiff.exe")
|
||||
# If the .dll does not exist, assume it is a static OIIO
|
||||
if(NOT EXISTS ${OPENIMAGEIO}/bin/OpenImageIO.dll)
|
||||
add_definitions(-DOIIO_STATIC_DEFINE)
|
||||
endif()
|
||||
add_definitions(-DOIIO_NO_SSE=1)
|
||||
endif()
|
||||
add_definitions(-DOIIO_NO_SSE=1)
|
||||
|
||||
if(WITH_LLVM)
|
||||
set(LLVM_ROOT_DIR ${LIBDIR}/llvm CACHE PATH "Path to the LLVM installation")
|
||||
@@ -900,11 +901,11 @@ endif()
|
||||
|
||||
if(WINDOWS_PYTHON_DEBUG)
|
||||
# Include the system scripts in the blender_python_system_scripts project.
|
||||
file(GLOB_RECURSE inFiles "${CMAKE_SOURCE_DIR}/scripts/*.*" )
|
||||
file(GLOB_RECURSE inFiles "${CMAKE_SOURCE_DIR}/release/scripts/*.*" )
|
||||
add_custom_target(blender_python_system_scripts SOURCES ${inFiles})
|
||||
foreach(_source IN ITEMS ${inFiles})
|
||||
get_filename_component(_source_path "${_source}" PATH)
|
||||
string(REPLACE "${CMAKE_SOURCE_DIR}/scripts/" "" _source_path "${_source_path}")
|
||||
string(REPLACE "${CMAKE_SOURCE_DIR}/release/scripts/" "" _source_path "${_source_path}")
|
||||
string(REPLACE "/" "\\" _group_path "${_source_path}")
|
||||
source_group("${_group_path}" FILES "${_source}")
|
||||
endforeach()
|
||||
@@ -939,7 +940,7 @@ if(WINDOWS_PYTHON_DEBUG)
|
||||
file(WRITE ${USER_PROPS_FILE} "<?xml version=\"1.0\" encoding=\"utf-8\"?>
|
||||
<Project DefaultTargets=\"Build\" xmlns=\"http://schemas.microsoft.com/developer/msbuild/2003\">
|
||||
<PropertyGroup>
|
||||
<LocalDebuggerCommandArguments>-con --env-system-scripts \"${CMAKE_SOURCE_DIR}/scripts\" </LocalDebuggerCommandArguments>
|
||||
<LocalDebuggerCommandArguments>-con --env-system-scripts \"${CMAKE_SOURCE_DIR}/release/scripts\" </LocalDebuggerCommandArguments>
|
||||
</PropertyGroup>
|
||||
</Project>")
|
||||
endif()
|
||||
@@ -993,23 +994,6 @@ if(WITH_VULKAN_BACKEND)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WITH_VULKAN_BACKEND)
|
||||
if(EXISTS ${LIBDIR}/shaderc)
|
||||
set(SHADERC_FOUND On)
|
||||
set(SHADERC_ROOT_DIR ${LIBDIR}/shaderc)
|
||||
set(SHADERC_INCLUDE_DIR ${SHADERC_ROOT_DIR}/include)
|
||||
set(SHADERC_INCLUDE_DIRS ${SHADERC_INCLUDE_DIR})
|
||||
set(SHADERC_LIBRARY
|
||||
DEBUG ${SHADERC_ROOT_DIR}/lib/shaderc_shared_d.lib
|
||||
OPTIMIZED ${SHADERC_ROOT_DIR}/lib/shaderc_shared.lib
|
||||
)
|
||||
set(SHADERC_LIBRARIES ${SHADERC_LIBRARY})
|
||||
else()
|
||||
message(WARNING "Shaderc was not found, disabling WITH_VULKAN_BACKEND")
|
||||
set(WITH_VULKAN_BACKEND OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES AND WITH_CYCLES_PATH_GUIDING)
|
||||
find_package(openpgl QUIET)
|
||||
if(openpgl_FOUND)
|
||||
@@ -1056,7 +1040,7 @@ endif()
|
||||
|
||||
# Environment variables to run precompiled executables that needed libraries.
|
||||
list(JOIN PLATFORM_BUNDLED_LIBRARY_DIRS ";" _library_paths)
|
||||
set(PLATFORM_ENV_BUILD_DIRS "${LIBDIR}/tbb/bin\;${LIBDIR}/OpenImageIO/bin\;${LIBDIR}/boost/lib\;${LIBDIR}/openexr/bin\;${LIBDIR}/imath/bin\;${PATH}")
|
||||
set(PLATFORM_ENV_BUILD_DIRS "${LIBDIR}/OpenImageIO/bin\;${LIBDIR}/boost/lib\;${LIBDIR}/openexr/bin\;${LIBDIR}/imath/bin\;${PATH}")
|
||||
set(PLATFORM_ENV_BUILD "PATH=${PLATFORM_ENV_BUILD_DIRS}")
|
||||
# Install needs the additional folders from PLATFORM_ENV_BUILD_DIRS as well, as tools like idiff and abcls use the release mode dlls
|
||||
set(PLATFORM_ENV_INSTALL "PATH=${CMAKE_INSTALL_PREFIX_WITH_CONFIG}/blender.shared/\;${PLATFORM_ENV_BUILD_DIRS}\;$ENV{PATH}")
|
||||
|
@@ -142,7 +142,7 @@ def cmake_advanced_info() -> Union[Tuple[List[str], List[Tuple[str, str]]], Tupl
|
||||
|
||||
make_exe = cmake_cache_var("CMAKE_MAKE_PROGRAM")
|
||||
if make_exe is None:
|
||||
print("Make command not found: CMAKE_MAKE_PROGRAM")
|
||||
print("Make command not found in: %r not found" % project_path)
|
||||
return None, None
|
||||
|
||||
make_exe_basename = os.path.basename(make_exe)
|
||||
|
@@ -1,3 +1,53 @@
|
||||
#
|
||||
# Used by Buildbot build pipeline make_update.py script only for now
|
||||
# We intended to update the make_update.py in the branches to use this file eventually
|
||||
#
|
||||
update-code:
|
||||
git:
|
||||
submodules:
|
||||
- branch: master
|
||||
commit_id: HEAD
|
||||
path: release/scripts/addons
|
||||
- branch: master
|
||||
commit_id: HEAD
|
||||
path: release/scripts/addons_contrib
|
||||
- branch: master
|
||||
commit_id: HEAD
|
||||
path: release/datafiles/locale
|
||||
- branch: master
|
||||
commit_id: HEAD
|
||||
path: source/tools
|
||||
svn:
|
||||
libraries:
|
||||
darwin-arm64:
|
||||
branch: trunk
|
||||
commit_id: HEAD
|
||||
path: lib/darwin_arm64
|
||||
darwin-x86_64:
|
||||
branch: trunk
|
||||
commit_id: HEAD
|
||||
path: lib/darwin
|
||||
linux-x86_64:
|
||||
branch: trunk
|
||||
commit_id: HEAD
|
||||
path: lib/linux_x86_64_glibc_228
|
||||
windows-amd64:
|
||||
branch: trunk
|
||||
commit_id: HEAD
|
||||
path: lib/win64_vc15
|
||||
tests:
|
||||
branch: trunk
|
||||
commit_id: HEAD
|
||||
path: lib/tests
|
||||
benchmarks:
|
||||
branch: trunk
|
||||
commit_id: HEAD
|
||||
path: lib/benchmarks
|
||||
assets:
|
||||
branch: trunk
|
||||
commit_id: HEAD
|
||||
path: lib/assets
|
||||
|
||||
#
|
||||
# Buildbot only configs
|
||||
#
|
||||
|
@@ -58,7 +58,7 @@ Each Blender release supports one Python version, and the package is only compat
|
||||
## Source Code
|
||||
|
||||
* [Releases](https://download.blender.org/source/)
|
||||
* Repository: [projects.blender.org/blender/blender.git](https://projects.blender.org/blender/blender)
|
||||
* Repository: [git.blender.org/blender.git](https://git.blender.org/gitweb/gitweb.cgi/blender.git)
|
||||
|
||||
## Credits
|
||||
|
||||
|
@@ -135,7 +135,7 @@ def submodules_to_manifest(
|
||||
submodule = line.split()[1]
|
||||
|
||||
# Don't use native slashes as GIT for MS-Windows outputs forward slashes.
|
||||
if skip_addon_contrib and submodule == "scripts/addons_contrib":
|
||||
if skip_addon_contrib and submodule == "release/scripts/addons_contrib":
|
||||
continue
|
||||
|
||||
for path in git_ls_files(blender_srcdir / submodule):
|
||||
|
@@ -16,28 +16,14 @@ import shutil
|
||||
import sys
|
||||
|
||||
import make_utils
|
||||
from pathlib import Path
|
||||
from make_utils import call, check_output
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from typing import (
|
||||
List,
|
||||
Iterable,
|
||||
Optional,
|
||||
)
|
||||
|
||||
|
||||
class Submodule:
|
||||
path: str
|
||||
branch: str
|
||||
branch_fallback: str
|
||||
|
||||
def __init__(self, path: str, branch: str, branch_fallback: str) -> None:
|
||||
self.path = path
|
||||
self.branch = branch
|
||||
self.branch_fallback = branch_fallback
|
||||
|
||||
|
||||
def print_stage(text: str) -> None:
|
||||
print("")
|
||||
print(text)
|
||||
@@ -56,7 +42,6 @@ def parse_arguments() -> argparse.Namespace:
|
||||
parser.add_argument("--svn-branch", default=None)
|
||||
parser.add_argument("--git-command", default="git")
|
||||
parser.add_argument("--use-linux-libraries", action="store_true")
|
||||
parser.add_argument("--architecture", type=str, choices=("x86_64", "amd64", "arm64",))
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
@@ -66,19 +51,6 @@ def get_blender_git_root() -> str:
|
||||
# Setup for precompiled libraries and tests from svn.
|
||||
|
||||
|
||||
def get_effective_architecture(args: argparse.Namespace) -> str:
|
||||
architecture = args.architecture
|
||||
if architecture:
|
||||
assert isinstance(architecture, str)
|
||||
return architecture
|
||||
|
||||
# Check platform.version to detect arm64 with x86_64 python binary.
|
||||
if "ARM64" in platform.version():
|
||||
return "arm64"
|
||||
|
||||
return platform.machine().lower()
|
||||
|
||||
|
||||
def svn_update(args: argparse.Namespace, release_version: Optional[str]) -> None:
|
||||
svn_non_interactive = [args.svn_command, '--non-interactive']
|
||||
|
||||
@@ -86,11 +58,11 @@ def svn_update(args: argparse.Namespace, release_version: Optional[str]) -> None
|
||||
svn_url = make_utils.svn_libraries_base_url(release_version, args.svn_branch)
|
||||
|
||||
# Checkout precompiled libraries
|
||||
architecture = get_effective_architecture(args)
|
||||
if sys.platform == 'darwin':
|
||||
if architecture == 'arm64':
|
||||
# Check platform.version to detect arm64 with x86_64 python binary.
|
||||
if platform.machine() == 'arm64' or ('ARM64' in platform.version()):
|
||||
lib_platform = "darwin_arm64"
|
||||
elif architecture == 'x86_64':
|
||||
elif platform.machine() == 'x86_64':
|
||||
lib_platform = "darwin"
|
||||
else:
|
||||
lib_platform = None
|
||||
@@ -198,7 +170,7 @@ def git_update_skip(args: argparse.Namespace, check_remote_exists: bool = True)
|
||||
return "rebase or merge in progress, complete it first"
|
||||
|
||||
# Abort if uncommitted changes.
|
||||
changes = check_output([args.git_command, 'status', '--porcelain', '--untracked-files=no', '--ignore-submodules'])
|
||||
changes = check_output([args.git_command, 'status', '--porcelain', '--untracked-files=no'])
|
||||
if len(changes) != 0:
|
||||
return "you have unstaged changes"
|
||||
|
||||
@@ -212,296 +184,97 @@ def git_update_skip(args: argparse.Namespace, check_remote_exists: bool = True)
|
||||
return ""
|
||||
|
||||
|
||||
def use_upstream_workflow(args: argparse.Namespace) -> bool:
|
||||
return make_utils.git_remote_exist(args.git_command, "upstream")
|
||||
|
||||
|
||||
def work_tree_update_upstream_workflow(args: argparse.Namespace, use_fetch: bool = True) -> str:
|
||||
"""
|
||||
Update the Blender repository using the Github style of fork organization
|
||||
|
||||
Returns true if the current local branch has been updated to the upstream state.
|
||||
Otherwise false is returned.
|
||||
"""
|
||||
|
||||
branch_name = make_utils.git_branch(args.git_command)
|
||||
|
||||
if use_fetch:
|
||||
call((args.git_command, "fetch", "upstream"))
|
||||
|
||||
upstream_branch = f"upstream/{branch_name}"
|
||||
if not make_utils.git_branch_exists(args.git_command, upstream_branch):
|
||||
return "no_branch"
|
||||
|
||||
retcode = call((args.git_command, "merge", "--ff-only", upstream_branch), exit_on_error=False)
|
||||
if retcode != 0:
|
||||
return "Unable to fast forward\n"
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
def work_tree_update(args: argparse.Namespace, use_fetch: bool = True) -> str:
|
||||
"""
|
||||
Update the Git working tree using the best strategy
|
||||
|
||||
This function detects whether it is a github style of fork remote organization is used, or
|
||||
is it a repository which origin is an upstream.
|
||||
"""
|
||||
|
||||
if use_upstream_workflow(args):
|
||||
message = work_tree_update_upstream_workflow(args, use_fetch)
|
||||
if message != "no_branch":
|
||||
return message
|
||||
|
||||
# If there is upstream configured but the local branch is not in the upstream, try to
|
||||
# update the branch from the fork.
|
||||
|
||||
update_command = [args.git_command, "pull", "--rebase"]
|
||||
|
||||
call(update_command)
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
# Update blender repository.
|
||||
def blender_update(args: argparse.Namespace) -> str:
|
||||
def blender_update(args: argparse.Namespace) -> None:
|
||||
print_stage("Updating Blender Git Repository")
|
||||
|
||||
return work_tree_update(args)
|
||||
call([args.git_command, "pull", "--rebase"])
|
||||
|
||||
|
||||
def resolve_external_url(blender_url: str, repo_name: str) -> str:
|
||||
return urljoin(blender_url + "/", "../" + repo_name)
|
||||
# Update submodules.
|
||||
def submodules_update(
|
||||
args: argparse.Namespace,
|
||||
release_version: Optional[str],
|
||||
branch: Optional[str],
|
||||
) -> str:
|
||||
print_stage("Updating Submodules")
|
||||
if make_utils.command_missing(args.git_command):
|
||||
sys.stderr.write("git not found, can't update code\n")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def external_script_copy_old_submodule_over(args: argparse.Namespace, directory_name: str) -> None:
|
||||
blender_git_root = Path(get_blender_git_root())
|
||||
scripts_dir = blender_git_root / "scripts"
|
||||
external_dir = scripts_dir / directory_name
|
||||
|
||||
old_submodule_relative_dir = Path("release") / "scripts" / directory_name
|
||||
print(f"Moving {old_submodule_relative_dir} to scripts/{directory_name} ...")
|
||||
|
||||
old_submodule_dir = blender_git_root / old_submodule_relative_dir
|
||||
shutil.move(old_submodule_dir, external_dir)
|
||||
|
||||
# Remove old ".git" which is a file with path to a submodule bare repo inside of main
|
||||
# repo .git/modules directory.
|
||||
(external_dir / ".git").unlink()
|
||||
|
||||
bare_repo_relative_dir = Path(".git") / "modules" / "release" / "scripts" / directory_name
|
||||
print(f"Copying {bare_repo_relative_dir} to scripts/{directory_name}/.git ...")
|
||||
bare_repo_dir = blender_git_root / bare_repo_relative_dir
|
||||
shutil.copytree(bare_repo_dir, external_dir / ".git")
|
||||
|
||||
git_config = external_dir / ".git" / "config"
|
||||
call((args.git_command, "config", "--file", str(git_config), "--unset", "core.worktree"))
|
||||
|
||||
|
||||
def external_script_initialize_if_needed(args: argparse.Namespace,
|
||||
repo_name: str,
|
||||
directory_name: str) -> None:
|
||||
"""Initialize checkout of an external repository scripts directory"""
|
||||
|
||||
blender_git_root = Path(get_blender_git_root())
|
||||
blender_dot_git = blender_git_root / ".git"
|
||||
scripts_dir = blender_git_root / "scripts"
|
||||
external_dir = scripts_dir / directory_name
|
||||
|
||||
if external_dir.exists():
|
||||
return
|
||||
|
||||
print(f"Initializing scripts/{directory_name} ...")
|
||||
|
||||
old_submodule_dot_git = blender_git_root / "release" / "scripts" / directory_name / ".git"
|
||||
if old_submodule_dot_git.exists() and blender_dot_git.is_dir():
|
||||
external_script_copy_old_submodule_over(args, directory_name)
|
||||
return
|
||||
|
||||
origin_name = "upstream" if use_upstream_workflow(args) else "origin"
|
||||
blender_url = make_utils.git_get_remote_url(args.git_command, origin_name)
|
||||
external_url = resolve_external_url(blender_url, repo_name)
|
||||
|
||||
# When running `make update` from a freshly cloned fork check whether the fork of the submodule is
|
||||
# available, If not, switch to the submodule relative to the main blender repository.
|
||||
if origin_name == "origin" and not make_utils.git_is_remote_repository(args.git_command, external_url):
|
||||
external_url = resolve_external_url("https://projects.blender.org/blender/blender", repo_name)
|
||||
|
||||
call((args.git_command, "clone", "--origin", origin_name, external_url, str(external_dir)))
|
||||
|
||||
|
||||
def external_script_add_origin_if_needed(args: argparse.Namespace,
|
||||
repo_name: str,
|
||||
directory_name: str) -> None:
|
||||
"""
|
||||
Add remote called 'origin' if there is a fork of the external repository available
|
||||
|
||||
This is only done when using Github style upstream workflow in the main repository.
|
||||
"""
|
||||
|
||||
if not use_upstream_workflow(args):
|
||||
return
|
||||
|
||||
cwd = os.getcwd()
|
||||
|
||||
blender_git_root = Path(get_blender_git_root())
|
||||
scripts_dir = blender_git_root / "scripts"
|
||||
external_dir = scripts_dir / directory_name
|
||||
|
||||
origin_blender_url = make_utils.git_get_remote_url(args.git_command, "origin")
|
||||
origin_external_url = resolve_external_url(origin_blender_url, repo_name)
|
||||
|
||||
try:
|
||||
os.chdir(external_dir)
|
||||
|
||||
if (make_utils.git_remote_exist(args.git_command, "origin") or
|
||||
not make_utils.git_remote_exist(args.git_command, "upstream")):
|
||||
return
|
||||
|
||||
if not make_utils.git_is_remote_repository(args.git_command, origin_external_url):
|
||||
return
|
||||
|
||||
print(f"Adding origin remote to {directory_name} pointing to fork ...")
|
||||
|
||||
# Non-obvious tricks to introduce the new remote called "origin" to the existing
|
||||
# submodule configuration.
|
||||
#
|
||||
# This is all within the content of creating a fork of a submodule after `make update`
|
||||
# has been run and possibly local branches tracking upstream were added.
|
||||
#
|
||||
# The idea here goes as following:
|
||||
#
|
||||
# - Rename remote "upstream" to "origin", which takes care of changing the names of
|
||||
# remotes the local branches are tracking.
|
||||
#
|
||||
# - Change the URL to the "origin", which so was was still pointing to upstream.
|
||||
#
|
||||
# - Re-introduce the "upstream" remote, with the same URL as it had prior to rename.
|
||||
|
||||
upstream_url = make_utils.git_get_remote_url(args.git_command, "upstream")
|
||||
|
||||
call((args.git_command, "remote", "rename", "upstream", "origin"))
|
||||
make_utils.git_set_config(args.git_command, f"remote.origin.url", origin_external_url)
|
||||
|
||||
call((args.git_command, "remote", "add", "upstream", upstream_url))
|
||||
finally:
|
||||
os.chdir(cwd)
|
||||
|
||||
return
|
||||
|
||||
|
||||
def external_scripts_update(args: argparse.Namespace,
|
||||
repo_name: str,
|
||||
directory_name: str,
|
||||
branch: Optional[str]) -> str:
|
||||
"""Update a single external checkout with the given name in the scripts folder"""
|
||||
|
||||
external_script_initialize_if_needed(args, repo_name, directory_name)
|
||||
external_script_add_origin_if_needed(args, repo_name, directory_name)
|
||||
|
||||
print(f"Updating scripts/{directory_name} ...")
|
||||
|
||||
cwd = os.getcwd()
|
||||
|
||||
blender_git_root = Path(get_blender_git_root())
|
||||
scripts_dir = blender_git_root / "scripts"
|
||||
external_dir = scripts_dir / directory_name
|
||||
|
||||
# Update externals to appropriate given branch, falling back to main if none is given and/or
|
||||
# found in a sub-repository.
|
||||
branch_fallback = "main"
|
||||
# Update submodules to appropriate given branch,
|
||||
# falling back to master if none is given and/or found in a sub-repository.
|
||||
branch_fallback = "master"
|
||||
if not branch:
|
||||
branch = branch_fallback
|
||||
|
||||
submodules = [
|
||||
("release/scripts/addons", branch, branch_fallback),
|
||||
("release/scripts/addons_contrib", branch, branch_fallback),
|
||||
("release/datafiles/locale", branch, branch_fallback),
|
||||
("source/tools", branch, branch_fallback),
|
||||
]
|
||||
|
||||
# Initialize submodules only if needed.
|
||||
for submodule_path, submodule_branch, submodule_branch_fallback in submodules:
|
||||
if not os.path.exists(os.path.join(submodule_path, ".git")):
|
||||
call([args.git_command, "submodule", "update", "--init", "--recursive"])
|
||||
break
|
||||
|
||||
# Checkout appropriate branch and pull changes.
|
||||
skip_msg = ""
|
||||
|
||||
try:
|
||||
os.chdir(external_dir)
|
||||
msg = git_update_skip(args, check_remote_exists=False)
|
||||
if msg:
|
||||
skip_msg += directory_name + " skipped: " + msg + "\n"
|
||||
else:
|
||||
# Find a matching branch that exists.
|
||||
for remote in ("origin", "upstream"):
|
||||
if make_utils.git_remote_exist(args.git_command, remote):
|
||||
call([args.git_command, "fetch", remote])
|
||||
|
||||
submodule_branch = branch
|
||||
|
||||
if make_utils.git_branch_exists(args.git_command, submodule_branch):
|
||||
pass
|
||||
elif make_utils.git_branch_exists(args.git_command, branch_fallback):
|
||||
submodule_branch = branch_fallback
|
||||
for submodule_path, submodule_branch, submodule_branch_fallback in submodules:
|
||||
cwd = os.getcwd()
|
||||
try:
|
||||
os.chdir(submodule_path)
|
||||
msg = git_update_skip(args, check_remote_exists=False)
|
||||
if msg:
|
||||
skip_msg += submodule_path + " skipped: " + msg + "\n"
|
||||
else:
|
||||
# Skip.
|
||||
submodule_branch = ""
|
||||
# Find a matching branch that exists.
|
||||
call([args.git_command, "fetch", "origin"])
|
||||
if make_utils.git_branch_exists(args.git_command, submodule_branch):
|
||||
pass
|
||||
elif make_utils.git_branch_exists(args.git_command, submodule_branch_fallback):
|
||||
submodule_branch = submodule_branch_fallback
|
||||
else:
|
||||
# Skip.
|
||||
submodule_branch = ""
|
||||
|
||||
# Switch to branch and pull.
|
||||
if submodule_branch:
|
||||
if make_utils.git_branch(args.git_command) != submodule_branch:
|
||||
# If the local branch exists just check out to it.
|
||||
# If there is no local branch but only remote specify an explicit remote.
|
||||
# Without this explicit specification Git attempts to set-up tracking
|
||||
# automatically and fails when the branch is available in multiple remotes.
|
||||
if make_utils.git_local_branch_exists(args.git_command, submodule_branch):
|
||||
# Switch to branch and pull.
|
||||
if submodule_branch:
|
||||
if make_utils.git_branch(args.git_command) != submodule_branch:
|
||||
call([args.git_command, "checkout", submodule_branch])
|
||||
elif make_utils.git_remote_exist(args.git_command, "origin"):
|
||||
call([args.git_command, "checkout", "-t", f"origin/{submodule_branch}"])
|
||||
elif make_utils.git_remote_exist(args.git_command, "upstream"):
|
||||
call([args.git_command, "checkout", "-t", f"upstream/{submodule_branch}"])
|
||||
# Don't use extra fetch since all remotes of interest have been already fetched
|
||||
# some lines above.
|
||||
skip_msg += work_tree_update(args, use_fetch=False)
|
||||
finally:
|
||||
os.chdir(cwd)
|
||||
call([args.git_command, "pull", "--rebase", "origin", submodule_branch])
|
||||
finally:
|
||||
os.chdir(cwd)
|
||||
|
||||
return skip_msg
|
||||
|
||||
|
||||
def scripts_submodules_update(args: argparse.Namespace, branch: Optional[str]) -> str:
|
||||
"""Update working trees of addons and addons_contrib within the scripts/ directory"""
|
||||
msg = ""
|
||||
|
||||
msg += external_scripts_update(args, "blender-addons", "addons", branch)
|
||||
msg += external_scripts_update(args, "blender-addons-contrib", "addons_contrib", branch)
|
||||
|
||||
return msg
|
||||
|
||||
|
||||
def submodules_update(args: argparse.Namespace, branch: Optional[str]) -> str:
|
||||
"""Update submodules or other externally tracked source trees"""
|
||||
msg = ""
|
||||
|
||||
msg += scripts_submodules_update(args, branch)
|
||||
|
||||
return msg
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_arguments()
|
||||
blender_skip_msg = ""
|
||||
submodules_skip_msg = ""
|
||||
|
||||
blender_version = make_utils. parse_blender_version()
|
||||
if blender_version.cycle != 'alpha':
|
||||
major = blender_version.version // 100
|
||||
minor = blender_version.version % 100
|
||||
branch = f"blender-v{major}.{minor}-release"
|
||||
release_version: Optional[str] = f"{major}.{minor}"
|
||||
else:
|
||||
branch = 'main'
|
||||
release_version = None
|
||||
# Test if we are building a specific release version.
|
||||
branch = make_utils.git_branch(args.git_command)
|
||||
if branch == 'HEAD':
|
||||
sys.stderr.write('Blender git repository is in detached HEAD state, must be in a branch\n')
|
||||
sys.exit(1)
|
||||
|
||||
tag = make_utils.git_tag(args.git_command)
|
||||
release_version = make_utils.git_branch_release_version(branch, tag)
|
||||
|
||||
if not args.no_libraries:
|
||||
svn_update(args, release_version)
|
||||
if not args.no_blender:
|
||||
blender_skip_msg = git_update_skip(args)
|
||||
if not blender_skip_msg:
|
||||
blender_skip_msg = blender_update(args)
|
||||
if blender_skip_msg:
|
||||
blender_skip_msg = "Blender repository skipped: " + blender_skip_msg + "\n"
|
||||
else:
|
||||
blender_update(args)
|
||||
if not args.no_submodules:
|
||||
submodules_skip_msg = submodules_update(args, branch)
|
||||
submodules_skip_msg = submodules_update(args, release_version, branch)
|
||||
|
||||
# Report any skipped repositories at the end, so it's not as easy to miss.
|
||||
skip_msg = blender_skip_msg + submodules_skip_msg
|
||||
|
@@ -9,9 +9,7 @@ import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import os
|
||||
from pathlib import Path
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from typing import (
|
||||
Sequence,
|
||||
@@ -21,7 +19,7 @@ from typing import (
|
||||
|
||||
def call(cmd: Sequence[str], exit_on_error: bool = True, silent: bool = False) -> int:
|
||||
if not silent:
|
||||
print(" ".join([str(x) for x in cmd]))
|
||||
print(" ".join(cmd))
|
||||
|
||||
# Flush to ensure correct order output on Windows.
|
||||
sys.stdout.flush()
|
||||
@@ -54,57 +52,13 @@ def check_output(cmd: Sequence[str], exit_on_error: bool = True) -> str:
|
||||
return output.strip()
|
||||
|
||||
|
||||
def git_local_branch_exists(git_command: str, branch: str) -> bool:
|
||||
return (
|
||||
call([git_command, "rev-parse", "--verify", branch], exit_on_error=False, silent=True) == 0
|
||||
)
|
||||
|
||||
|
||||
def git_branch_exists(git_command: str, branch: str) -> bool:
|
||||
return (
|
||||
git_local_branch_exists(git_command, branch) or
|
||||
call([git_command, "rev-parse", "--verify", "remotes/upstream/" + branch], exit_on_error=False, silent=True) == 0 or
|
||||
call([git_command, "rev-parse", "--verify", branch], exit_on_error=False, silent=True) == 0 or
|
||||
call([git_command, "rev-parse", "--verify", "remotes/origin/" + branch], exit_on_error=False, silent=True) == 0
|
||||
)
|
||||
|
||||
|
||||
def git_get_remote_url(git_command: str, remote_name: str) -> str:
|
||||
return check_output((git_command, "ls-remote", "--get-url", remote_name))
|
||||
|
||||
|
||||
def git_remote_exist(git_command: str, remote_name: str) -> bool:
|
||||
"""Check whether there is a remote with the given name"""
|
||||
# `git ls-remote --get-url upstream` will print an URL if there is such remote configured, and
|
||||
# otherwise will print "upstream".
|
||||
remote_url = check_output((git_command, "ls-remote", "--get-url", remote_name))
|
||||
return remote_url != remote_name
|
||||
|
||||
|
||||
def git_get_resolved_submodule_url(git_command: str, blender_url: str, submodule_path: str) -> str:
|
||||
git_root = check_output([git_command, "rev-parse", "--show-toplevel"])
|
||||
dot_gitmodules = os.path.join(git_root, ".gitmodules")
|
||||
|
||||
submodule_key_prefix = f"submodule.{submodule_path}"
|
||||
submodule_key_url = f"{submodule_key_prefix}.url"
|
||||
|
||||
gitmodule_url = git_get_config(
|
||||
git_command, submodule_key_url, file=dot_gitmodules)
|
||||
|
||||
# A bit of a trickery to construct final URL.
|
||||
# Only works for the relative submodule URLs.
|
||||
#
|
||||
# Note that unless the LHS URL ends up with a slash urljoin treats the last component as a
|
||||
# file.
|
||||
assert gitmodule_url.startswith('..')
|
||||
return urljoin(blender_url + "/", gitmodule_url)
|
||||
|
||||
|
||||
def git_is_remote_repository(git_command: str, repo: str) -> bool:
|
||||
"""Returns true if the given repository is a valid/clonable git repo"""
|
||||
exit_code = call((git_command, "ls-remote", repo, "HEAD"), exit_on_error=False, silent=True)
|
||||
return exit_code == 0
|
||||
|
||||
|
||||
def git_branch(git_command: str) -> str:
|
||||
# Get current branch name.
|
||||
try:
|
||||
@@ -116,20 +70,6 @@ def git_branch(git_command: str) -> str:
|
||||
return branch.strip().decode('utf8')
|
||||
|
||||
|
||||
def git_get_config(git_command: str, key: str, file: Optional[str] = None) -> str:
|
||||
if file:
|
||||
return check_output([git_command, "config", "--file", file, "--get", key])
|
||||
|
||||
return check_output([git_command, "config", "--get", key])
|
||||
|
||||
|
||||
def git_set_config(git_command: str, key: str, value: str, file: Optional[str] = None) -> str:
|
||||
if file:
|
||||
return check_output([git_command, "config", "--file", file, key, value])
|
||||
|
||||
return check_output([git_command, "config", key, value])
|
||||
|
||||
|
||||
def git_tag(git_command: str) -> Optional[str]:
|
||||
# Get current tag name.
|
||||
try:
|
||||
|
20
build_files/windows/check_submodules.cmd
Normal file
20
build_files/windows/check_submodules.cmd
Normal file
@@ -0,0 +1,20 @@
|
||||
if NOT exist "%BLENDER_DIR%\source\tools\.git" (
|
||||
echo Checking out sub-modules
|
||||
if not "%GIT%" == "" (
|
||||
"%GIT%" submodule update --init --recursive --progress
|
||||
if errorlevel 1 goto FAIL
|
||||
"%GIT%" submodule foreach git checkout master
|
||||
if errorlevel 1 goto FAIL
|
||||
"%GIT%" submodule foreach git pull --rebase origin master
|
||||
if errorlevel 1 goto FAIL
|
||||
goto EOF
|
||||
) else (
|
||||
echo Blender submodules not found, and git not found in path to retrieve them.
|
||||
goto FAIL
|
||||
)
|
||||
)
|
||||
goto EOF
|
||||
|
||||
:FAIL
|
||||
exit /b 1
|
||||
:EOF
|
@@ -14,7 +14,7 @@ if NOT EXIST %PYTHON% (
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
set FORMAT_PATHS=%BLENDER_DIR%\tools\utils_maintenance\clang_format_paths.py
|
||||
set FORMAT_PATHS=%BLENDER_DIR%\source\tools\utils_maintenance\clang_format_paths.py
|
||||
|
||||
REM The formatting script expects clang-format to be in the current PATH.
|
||||
set PATH=%CF_PATH%;%PATH%
|
||||
|
@@ -4,9 +4,9 @@ if "%GIT%" == "" (
|
||||
)
|
||||
cd "%BLENDER_DIR%"
|
||||
for /f "delims=" %%i in ('"%GIT%" rev-parse HEAD') do echo Branch_hash=%%i
|
||||
cd "%BLENDER_DIR%/locale"
|
||||
cd "%BLENDER_DIR%/release/datafiles/locale"
|
||||
for /f "delims=" %%i in ('"%GIT%" rev-parse HEAD') do echo Locale_hash=%%i
|
||||
cd "%BLENDER_DIR%/scripts/addons"
|
||||
cd "%BLENDER_DIR%/release/scripts/addons"
|
||||
for /f "delims=" %%i in ('"%GIT%" rev-parse HEAD') do echo Addons_Hash=%%i
|
||||
cd "%BLENDER_DIR%"
|
||||
:EOF
|
@@ -231,7 +231,7 @@ class FileBlockHeader:
|
||||
self.SDNAIndex = 0
|
||||
self.Count = 0
|
||||
self.FileOffset = handle.tell()
|
||||
# self.Code += ' ' * (4 - len(self.Code))
|
||||
#self.Code += ' ' * (4 - len(self.Code))
|
||||
log.debug("found blend-file-block-fileheader {0} {1}".format(self.Code, self.FileOffset))
|
||||
|
||||
def skip(self, handle):
|
||||
|
@@ -38,7 +38,7 @@ PROJECT_NAME = Blender
|
||||
# could be handy for archiving the generated documentation or if some version
|
||||
# control system is used.
|
||||
|
||||
PROJECT_NUMBER = V3.6
|
||||
PROJECT_NUMBER = V3.5
|
||||
|
||||
# Using the PROJECT_BRIEF tag one can provide an optional one line description
|
||||
# for a project that appears at the top of each page and should give viewer a
|
||||
|
@@ -37,7 +37,7 @@ def draw_callback_px(self, context):
|
||||
# BLF drawing routine
|
||||
font_id = font_info["font_id"]
|
||||
blf.position(font_id, 2, 80, 0)
|
||||
blf.size(font_id, 50)
|
||||
blf.size(font_id, 50, 72)
|
||||
blf.draw(font_id, "Hello World")
|
||||
|
||||
|
||||
|
@@ -31,7 +31,7 @@ For an overview of BMesh data types and how they reference each other see:
|
||||
Example Script
|
||||
--------------
|
||||
|
||||
.. literalinclude:: __/__/__/scripts/templates_py/bmesh_simple.py
|
||||
.. literalinclude:: __/__/__/release/scripts/templates_py/bmesh_simple.py
|
||||
|
||||
|
||||
Standalone Module
|
||||
|
@@ -288,7 +288,7 @@ In Python, this is done by defining a class, which is a subclass of an existing
|
||||
Example Operator
|
||||
----------------
|
||||
|
||||
.. literalinclude:: __/__/__/scripts/templates_py/operator_simple.py
|
||||
.. literalinclude:: __/__/__/release/scripts/templates_py/operator_simple.py
|
||||
|
||||
Once this script runs, ``SimpleOperator`` is registered with Blender
|
||||
and can be called from Operator Search or added to the toolbar.
|
||||
@@ -320,7 +320,7 @@ Example Panel
|
||||
Panels are registered as a class, like an operator.
|
||||
Notice the extra ``bl_`` variables used to set the context they display in.
|
||||
|
||||
.. literalinclude:: __/__/__/scripts/templates_py/ui_panel_simple.py
|
||||
.. literalinclude:: __/__/__/release/scripts/templates_py/ui_panel_simple.py
|
||||
|
||||
To run the script:
|
||||
|
||||
|
@@ -367,13 +367,13 @@ except ImportError:
|
||||
# Note that ".." is replaced by "__" in the RST files,
|
||||
# to avoid having to match Blender's source tree.
|
||||
EXTRA_SOURCE_FILES = (
|
||||
"../../../scripts/templates_py/bmesh_simple.py",
|
||||
"../../../scripts/templates_py/gizmo_operator.py",
|
||||
"../../../scripts/templates_py/gizmo_operator_target.py",
|
||||
"../../../scripts/templates_py/gizmo_simple.py",
|
||||
"../../../scripts/templates_py/operator_simple.py",
|
||||
"../../../scripts/templates_py/ui_panel_simple.py",
|
||||
"../../../scripts/templates_py/ui_previews_custom_icon.py",
|
||||
"../../../release/scripts/templates_py/bmesh_simple.py",
|
||||
"../../../release/scripts/templates_py/gizmo_operator.py",
|
||||
"../../../release/scripts/templates_py/gizmo_operator_target.py",
|
||||
"../../../release/scripts/templates_py/gizmo_simple.py",
|
||||
"../../../release/scripts/templates_py/operator_simple.py",
|
||||
"../../../release/scripts/templates_py/ui_panel_simple.py",
|
||||
"../../../release/scripts/templates_py/ui_previews_custom_icon.py",
|
||||
"../examples/bmesh.ops.1.py",
|
||||
"../examples/bpy.app.translations.py",
|
||||
)
|
||||
@@ -476,7 +476,7 @@ MODULE_GROUPING = {
|
||||
|
||||
# -------------------------------BLENDER----------------------------------------
|
||||
|
||||
# Converting bytes to strings, due to #30154.
|
||||
# converting bytes to strings, due to T30154
|
||||
BLENDER_REVISION = str(bpy.app.build_hash, 'utf_8')
|
||||
BLENDER_REVISION_TIMESTAMP = bpy.app.build_commit_timestamp
|
||||
|
||||
@@ -487,7 +487,7 @@ BLENDER_VERSION_DOTS = "%d.%d" % (bpy.app.version[0], bpy.app.version[1])
|
||||
if BLENDER_REVISION != "Unknown":
|
||||
# SHA1 Git hash
|
||||
BLENDER_VERSION_HASH = BLENDER_REVISION
|
||||
BLENDER_VERSION_HASH_HTML_LINK = "<a href=https://projects.blender.org/blender/blender/commit/%s>%s</a>" % (
|
||||
BLENDER_VERSION_HASH_HTML_LINK = "<a href=https://developer.blender.org/rB%s>%s</a>" % (
|
||||
BLENDER_VERSION_HASH, BLENDER_VERSION_HASH,
|
||||
)
|
||||
BLENDER_VERSION_DATE = time.strftime("%d/%m/%Y", time.localtime(BLENDER_REVISION_TIMESTAMP))
|
||||
@@ -647,7 +647,7 @@ def undocumented_message(module_name, type_name, identifier):
|
||||
module_name, type_name, identifier,
|
||||
)
|
||||
|
||||
return "Undocumented, consider `contributing <https://developer.blender.org/>`__."
|
||||
return "Undocumented, consider `contributing <https://developer.blender.org/T51061>`__."
|
||||
|
||||
|
||||
def range_str(val):
|
||||
@@ -1816,9 +1816,9 @@ def pyrna2sphinx(basepath):
|
||||
|
||||
# operators
|
||||
def write_ops():
|
||||
API_BASEURL = "https://projects.blender.org/blender/blender/src/branch/main/scripts"
|
||||
API_BASEURL_ADDON = "https://projects.blender.org/blender/blender-addons"
|
||||
API_BASEURL_ADDON_CONTRIB = "https://projects.blender.org/blender/blender-addons-contrib"
|
||||
API_BASEURL = "https://developer.blender.org/diffusion/B/browse/master/release/scripts"
|
||||
API_BASEURL_ADDON = "https://developer.blender.org/diffusion/BA"
|
||||
API_BASEURL_ADDON_CONTRIB = "https://developer.blender.org/diffusion/BAC"
|
||||
|
||||
op_modules = {}
|
||||
op = None
|
||||
@@ -1865,7 +1865,7 @@ def pyrna2sphinx(basepath):
|
||||
else:
|
||||
url_base = API_BASEURL
|
||||
|
||||
fw(" :file:`%s\\:%d <%s/%s#L%d>`_\n\n" %
|
||||
fw(" :file: `%s\\:%d <%s/%s$%d>`_\n\n" %
|
||||
(location[0], location[1], url_base, location[0], location[1]))
|
||||
|
||||
file.close()
|
||||
@@ -2200,7 +2200,7 @@ def write_rst_enum_items(basepath, key, key_no_prefix, enum_items):
|
||||
Write a single page for a static enum in RST.
|
||||
|
||||
This helps avoiding very large lists being in-lined in many places which is an issue
|
||||
especially with icons in ``bpy.types.UILayout``. See #87008.
|
||||
especially with icons in ``bpy.types.UILayout``. See T87008.
|
||||
"""
|
||||
filepath = os.path.join(basepath, "%s.rst" % key_no_prefix)
|
||||
with open(filepath, "w", encoding="utf-8") as fh:
|
||||
|
@@ -156,7 +156,7 @@ var Popover = function() {
|
||||
},
|
||||
getNamed : function(v) {
|
||||
$.each(all_versions, function(ix, title) {
|
||||
if (ix === "master" || ix === "main" || ix === "latest") {
|
||||
if (ix === "master" || ix === "latest") {
|
||||
var m = title.match(/\d\.\d[\w\d\.]*/)[0];
|
||||
if (parseFloat(m) == v) {
|
||||
v = ix;
|
||||
|
2
extern/hipew/README.blender
vendored
2
extern/hipew/README.blender
vendored
@@ -1,5 +1,5 @@
|
||||
Project: Blender
|
||||
URL: https://projects.blender.org/blender/blender.git
|
||||
URL: https://git.blender.org/blender.git
|
||||
License: Apache 2.0
|
||||
Upstream version: N/A
|
||||
Local modifications: None
|
||||
|
5
extern/renderdoc/README.blender
vendored
5
extern/renderdoc/README.blender
vendored
@@ -1,5 +0,0 @@
|
||||
Project: Renderdoc APP
|
||||
URL: https://github.com/baldurk/renderdoc/
|
||||
License: MIT
|
||||
Upstream version: d47e79ae079783935b8857d6a1730440eafb0b38
|
||||
Local modifications: None
|
723
extern/renderdoc/include/renderdoc_app.h
vendored
723
extern/renderdoc/include/renderdoc_app.h
vendored
@@ -1,723 +0,0 @@
|
||||
/******************************************************************************
|
||||
* The MIT License (MIT)
|
||||
*
|
||||
* Copyright (c) 2019-2023 Baldur Karlsson
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
******************************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Documentation for the API is available at https://renderdoc.org/docs/in_application_api.html
|
||||
//
|
||||
|
||||
#if !defined(RENDERDOC_NO_STDINT)
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
#if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || defined(_MSC_VER)
|
||||
#define RENDERDOC_CC __cdecl
|
||||
#elif defined(__linux__)
|
||||
#define RENDERDOC_CC
|
||||
#elif defined(__APPLE__)
|
||||
#define RENDERDOC_CC
|
||||
#else
|
||||
#error "Unknown platform"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Constants not used directly in below API
|
||||
|
||||
// This is a GUID/magic value used for when applications pass a path where shader debug
|
||||
// information can be found to match up with a stripped shader.
|
||||
// the define can be used like so: const GUID RENDERDOC_ShaderDebugMagicValue =
|
||||
// RENDERDOC_ShaderDebugMagicValue_value
|
||||
#define RENDERDOC_ShaderDebugMagicValue_struct \
|
||||
{ \
|
||||
0xeab25520, 0x6670, 0x4865, 0x84, 0x29, 0x6c, 0x8, 0x51, 0x54, 0x00, 0xff \
|
||||
}
|
||||
|
||||
// as an alternative when you want a byte array (assuming x86 endianness):
|
||||
#define RENDERDOC_ShaderDebugMagicValue_bytearray \
|
||||
{ \
|
||||
0x20, 0x55, 0xb2, 0xea, 0x70, 0x66, 0x65, 0x48, 0x84, 0x29, 0x6c, 0x8, 0x51, 0x54, 0x00, 0xff \
|
||||
}
|
||||
|
||||
// truncated version when only a uint64_t is available (e.g. Vulkan tags):
|
||||
#define RENDERDOC_ShaderDebugMagicValue_truncated 0x48656670eab25520ULL
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// RenderDoc capture options
|
||||
//
|
||||
|
||||
typedef enum RENDERDOC_CaptureOption {
|
||||
// Allow the application to enable vsync
|
||||
//
|
||||
// Default - enabled
|
||||
//
|
||||
// 1 - The application can enable or disable vsync at will
|
||||
// 0 - vsync is force disabled
|
||||
eRENDERDOC_Option_AllowVSync = 0,
|
||||
|
||||
// Allow the application to enable fullscreen
|
||||
//
|
||||
// Default - enabled
|
||||
//
|
||||
// 1 - The application can enable or disable fullscreen at will
|
||||
// 0 - fullscreen is force disabled
|
||||
eRENDERDOC_Option_AllowFullscreen = 1,
|
||||
|
||||
// Record API debugging events and messages
|
||||
//
|
||||
// Default - disabled
|
||||
//
|
||||
// 1 - Enable built-in API debugging features and records the results into
|
||||
// the capture, which is matched up with events on replay
|
||||
// 0 - no API debugging is forcibly enabled
|
||||
eRENDERDOC_Option_APIValidation = 2,
|
||||
eRENDERDOC_Option_DebugDeviceMode = 2, // deprecated name of this enum
|
||||
|
||||
// Capture CPU callstacks for API events
|
||||
//
|
||||
// Default - disabled
|
||||
//
|
||||
// 1 - Enables capturing of callstacks
|
||||
// 0 - no callstacks are captured
|
||||
eRENDERDOC_Option_CaptureCallstacks = 3,
|
||||
|
||||
// When capturing CPU callstacks, only capture them from actions.
|
||||
// This option does nothing without the above option being enabled
|
||||
//
|
||||
// Default - disabled
|
||||
//
|
||||
// 1 - Only captures callstacks for actions.
|
||||
// Ignored if CaptureCallstacks is disabled
|
||||
// 0 - Callstacks, if enabled, are captured for every event.
|
||||
eRENDERDOC_Option_CaptureCallstacksOnlyDraws = 4,
|
||||
eRENDERDOC_Option_CaptureCallstacksOnlyActions = 4,
|
||||
|
||||
// Specify a delay in seconds to wait for a debugger to attach, after
|
||||
// creating or injecting into a process, before continuing to allow it to run.
|
||||
//
|
||||
// 0 indicates no delay, and the process will run immediately after injection
|
||||
//
|
||||
// Default - 0 seconds
|
||||
//
|
||||
eRENDERDOC_Option_DelayForDebugger = 5,
|
||||
|
||||
// Verify buffer access. This includes checking the memory returned by a Map() call to
|
||||
// detect any out-of-bounds modification, as well as initialising buffers with undefined contents
|
||||
// to a marker value to catch use of uninitialised memory.
|
||||
//
|
||||
// NOTE: This option is only valid for OpenGL and D3D11. Explicit APIs such as D3D12 and Vulkan do
|
||||
// not do the same kind of interception & checking and undefined contents are really undefined.
|
||||
//
|
||||
// Default - disabled
|
||||
//
|
||||
// 1 - Verify buffer access
|
||||
// 0 - No verification is performed, and overwriting bounds may cause crashes or corruption in
|
||||
// RenderDoc.
|
||||
eRENDERDOC_Option_VerifyBufferAccess = 6,
|
||||
|
||||
// The old name for eRENDERDOC_Option_VerifyBufferAccess was eRENDERDOC_Option_VerifyMapWrites.
|
||||
// This option now controls the filling of uninitialised buffers with 0xdddddddd which was
|
||||
// previously always enabled
|
||||
eRENDERDOC_Option_VerifyMapWrites = eRENDERDOC_Option_VerifyBufferAccess,
|
||||
|
||||
// Hooks any system API calls that create child processes, and injects
|
||||
// RenderDoc into them recursively with the same options.
|
||||
//
|
||||
// Default - disabled
|
||||
//
|
||||
// 1 - Hooks into spawned child processes
|
||||
// 0 - Child processes are not hooked by RenderDoc
|
||||
eRENDERDOC_Option_HookIntoChildren = 7,
|
||||
|
||||
// By default RenderDoc only includes resources in the final capture necessary
|
||||
// for that frame, this allows you to override that behaviour.
|
||||
//
|
||||
// Default - disabled
|
||||
//
|
||||
// 1 - all live resources at the time of capture are included in the capture
|
||||
// and available for inspection
|
||||
// 0 - only the resources referenced by the captured frame are included
|
||||
eRENDERDOC_Option_RefAllResources = 8,
|
||||
|
||||
// **NOTE**: As of RenderDoc v1.1 this option has been deprecated. Setting or
|
||||
// getting it will be ignored, to allow compatibility with older versions.
|
||||
// In v1.1 the option acts as if it's always enabled.
|
||||
//
|
||||
// By default RenderDoc skips saving initial states for resources where the
|
||||
// previous contents don't appear to be used, assuming that writes before
|
||||
// reads indicate previous contents aren't used.
|
||||
//
|
||||
// Default - disabled
|
||||
//
|
||||
// 1 - initial contents at the start of each captured frame are saved, even if
|
||||
// they are later overwritten or cleared before being used.
|
||||
// 0 - unless a read is detected, initial contents will not be saved and will
|
||||
// appear as black or empty data.
|
||||
eRENDERDOC_Option_SaveAllInitials = 9,
|
||||
|
||||
// In APIs that allow for the recording of command lists to be replayed later,
|
||||
// RenderDoc may choose to not capture command lists before a frame capture is
|
||||
// triggered, to reduce overheads. This means any command lists recorded once
|
||||
// and replayed many times will not be available and may cause a failure to
|
||||
// capture.
|
||||
//
|
||||
// NOTE: This is only true for APIs where multithreading is difficult or
|
||||
// discouraged. Newer APIs like Vulkan and D3D12 will ignore this option
|
||||
// and always capture all command lists since the API is heavily oriented
|
||||
// around it and the overheads have been reduced by API design.
|
||||
//
|
||||
// 1 - All command lists are captured from the start of the application
|
||||
// 0 - Command lists are only captured if their recording begins during
|
||||
// the period when a frame capture is in progress.
|
||||
eRENDERDOC_Option_CaptureAllCmdLists = 10,
|
||||
|
||||
// Mute API debugging output when the API validation mode option is enabled
|
||||
//
|
||||
// Default - enabled
|
||||
//
|
||||
// 1 - Mute any API debug messages from being displayed or passed through
|
||||
// 0 - API debugging is displayed as normal
|
||||
eRENDERDOC_Option_DebugOutputMute = 11,
|
||||
|
||||
// Option to allow vendor extensions to be used even when they may be
|
||||
// incompatible with RenderDoc and cause corrupted replays or crashes.
|
||||
//
|
||||
// Default - inactive
|
||||
//
|
||||
// No values are documented, this option should only be used when absolutely
|
||||
// necessary as directed by a RenderDoc developer.
|
||||
eRENDERDOC_Option_AllowUnsupportedVendorExtensions = 12,
|
||||
|
||||
} RENDERDOC_CaptureOption;
|
||||
|
||||
// Sets an option that controls how RenderDoc behaves on capture.
|
||||
//
|
||||
// Returns 1 if the option and value are valid
|
||||
// Returns 0 if either is invalid and the option is unchanged
|
||||
typedef int(RENDERDOC_CC *pRENDERDOC_SetCaptureOptionU32)(RENDERDOC_CaptureOption opt, uint32_t val);
|
||||
typedef int(RENDERDOC_CC *pRENDERDOC_SetCaptureOptionF32)(RENDERDOC_CaptureOption opt, float val);
|
||||
|
||||
// Gets the current value of an option as a uint32_t
|
||||
//
|
||||
// If the option is invalid, 0xffffffff is returned
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionU32)(RENDERDOC_CaptureOption opt);
|
||||
|
||||
// Gets the current value of an option as a float
|
||||
//
|
||||
// If the option is invalid, -FLT_MAX is returned
|
||||
typedef float(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionF32)(RENDERDOC_CaptureOption opt);
|
||||
|
||||
typedef enum RENDERDOC_InputButton {
|
||||
// '0' - '9' matches ASCII values
|
||||
eRENDERDOC_Key_0 = 0x30,
|
||||
eRENDERDOC_Key_1 = 0x31,
|
||||
eRENDERDOC_Key_2 = 0x32,
|
||||
eRENDERDOC_Key_3 = 0x33,
|
||||
eRENDERDOC_Key_4 = 0x34,
|
||||
eRENDERDOC_Key_5 = 0x35,
|
||||
eRENDERDOC_Key_6 = 0x36,
|
||||
eRENDERDOC_Key_7 = 0x37,
|
||||
eRENDERDOC_Key_8 = 0x38,
|
||||
eRENDERDOC_Key_9 = 0x39,
|
||||
|
||||
// 'A' - 'Z' matches ASCII values
|
||||
eRENDERDOC_Key_A = 0x41,
|
||||
eRENDERDOC_Key_B = 0x42,
|
||||
eRENDERDOC_Key_C = 0x43,
|
||||
eRENDERDOC_Key_D = 0x44,
|
||||
eRENDERDOC_Key_E = 0x45,
|
||||
eRENDERDOC_Key_F = 0x46,
|
||||
eRENDERDOC_Key_G = 0x47,
|
||||
eRENDERDOC_Key_H = 0x48,
|
||||
eRENDERDOC_Key_I = 0x49,
|
||||
eRENDERDOC_Key_J = 0x4A,
|
||||
eRENDERDOC_Key_K = 0x4B,
|
||||
eRENDERDOC_Key_L = 0x4C,
|
||||
eRENDERDOC_Key_M = 0x4D,
|
||||
eRENDERDOC_Key_N = 0x4E,
|
||||
eRENDERDOC_Key_O = 0x4F,
|
||||
eRENDERDOC_Key_P = 0x50,
|
||||
eRENDERDOC_Key_Q = 0x51,
|
||||
eRENDERDOC_Key_R = 0x52,
|
||||
eRENDERDOC_Key_S = 0x53,
|
||||
eRENDERDOC_Key_T = 0x54,
|
||||
eRENDERDOC_Key_U = 0x55,
|
||||
eRENDERDOC_Key_V = 0x56,
|
||||
eRENDERDOC_Key_W = 0x57,
|
||||
eRENDERDOC_Key_X = 0x58,
|
||||
eRENDERDOC_Key_Y = 0x59,
|
||||
eRENDERDOC_Key_Z = 0x5A,
|
||||
|
||||
// leave the rest of the ASCII range free
|
||||
// in case we want to use it later
|
||||
eRENDERDOC_Key_NonPrintable = 0x100,
|
||||
|
||||
eRENDERDOC_Key_Divide,
|
||||
eRENDERDOC_Key_Multiply,
|
||||
eRENDERDOC_Key_Subtract,
|
||||
eRENDERDOC_Key_Plus,
|
||||
|
||||
eRENDERDOC_Key_F1,
|
||||
eRENDERDOC_Key_F2,
|
||||
eRENDERDOC_Key_F3,
|
||||
eRENDERDOC_Key_F4,
|
||||
eRENDERDOC_Key_F5,
|
||||
eRENDERDOC_Key_F6,
|
||||
eRENDERDOC_Key_F7,
|
||||
eRENDERDOC_Key_F8,
|
||||
eRENDERDOC_Key_F9,
|
||||
eRENDERDOC_Key_F10,
|
||||
eRENDERDOC_Key_F11,
|
||||
eRENDERDOC_Key_F12,
|
||||
|
||||
eRENDERDOC_Key_Home,
|
||||
eRENDERDOC_Key_End,
|
||||
eRENDERDOC_Key_Insert,
|
||||
eRENDERDOC_Key_Delete,
|
||||
eRENDERDOC_Key_PageUp,
|
||||
eRENDERDOC_Key_PageDn,
|
||||
|
||||
eRENDERDOC_Key_Backspace,
|
||||
eRENDERDOC_Key_Tab,
|
||||
eRENDERDOC_Key_PrtScrn,
|
||||
eRENDERDOC_Key_Pause,
|
||||
|
||||
eRENDERDOC_Key_Max,
|
||||
} RENDERDOC_InputButton;
|
||||
|
||||
// Sets which key or keys can be used to toggle focus between multiple windows
|
||||
//
|
||||
// If keys is NULL or num is 0, toggle keys will be disabled
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_SetFocusToggleKeys)(RENDERDOC_InputButton *keys, int num);
|
||||
|
||||
// Sets which key or keys can be used to capture the next frame
|
||||
//
|
||||
// If keys is NULL or num is 0, captures keys will be disabled
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureKeys)(RENDERDOC_InputButton *keys, int num);
|
||||
|
||||
typedef enum RENDERDOC_OverlayBits {
|
||||
// This single bit controls whether the overlay is enabled or disabled globally
|
||||
eRENDERDOC_Overlay_Enabled = 0x1,
|
||||
|
||||
// Show the average framerate over several seconds as well as min/max
|
||||
eRENDERDOC_Overlay_FrameRate = 0x2,
|
||||
|
||||
// Show the current frame number
|
||||
eRENDERDOC_Overlay_FrameNumber = 0x4,
|
||||
|
||||
// Show a list of recent captures, and how many captures have been made
|
||||
eRENDERDOC_Overlay_CaptureList = 0x8,
|
||||
|
||||
// Default values for the overlay mask
|
||||
eRENDERDOC_Overlay_Default = (eRENDERDOC_Overlay_Enabled | eRENDERDOC_Overlay_FrameRate |
|
||||
eRENDERDOC_Overlay_FrameNumber | eRENDERDOC_Overlay_CaptureList),
|
||||
|
||||
// Enable all bits
|
||||
eRENDERDOC_Overlay_All = ~0U,
|
||||
|
||||
// Disable all bits
|
||||
eRENDERDOC_Overlay_None = 0,
|
||||
} RENDERDOC_OverlayBits;
|
||||
|
||||
// returns the overlay bits that have been set
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetOverlayBits)();
|
||||
// sets the overlay bits with an and & or mask
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_MaskOverlayBits)(uint32_t And, uint32_t Or);
|
||||
|
||||
// this function will attempt to remove RenderDoc's hooks in the application.
|
||||
//
|
||||
// Note: that this can only work correctly if done immediately after
|
||||
// the module is loaded, before any API work happens. RenderDoc will remove its
|
||||
// injected hooks and shut down. Behaviour is undefined if this is called
|
||||
// after any API functions have been called, and there is still no guarantee of
|
||||
// success.
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_RemoveHooks)();
|
||||
|
||||
// DEPRECATED: compatibility for code compiled against pre-1.4.1 headers.
|
||||
typedef pRENDERDOC_RemoveHooks pRENDERDOC_Shutdown;
|
||||
|
||||
// This function will unload RenderDoc's crash handler.
|
||||
//
|
||||
// If you use your own crash handler and don't want RenderDoc's handler to
|
||||
// intercede, you can call this function to unload it and any unhandled
|
||||
// exceptions will pass to the next handler.
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_UnloadCrashHandler)();
|
||||
|
||||
// Sets the capture file path template
|
||||
//
|
||||
// pathtemplate is a UTF-8 string that gives a template for how captures will be named
|
||||
// and where they will be saved.
|
||||
//
|
||||
// Any extension is stripped off the path, and captures are saved in the directory
|
||||
// specified, and named with the filename and the frame number appended. If the
|
||||
// directory does not exist it will be created, including any parent directories.
|
||||
//
|
||||
// If pathtemplate is NULL, the template will remain unchanged
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// SetCaptureFilePathTemplate("my_captures/example");
|
||||
//
|
||||
// Capture #1 -> my_captures/example_frame123.rdc
|
||||
// Capture #2 -> my_captures/example_frame456.rdc
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureFilePathTemplate)(const char *pathtemplate);
|
||||
|
||||
// returns the current capture path template, see SetCaptureFileTemplate above, as a UTF-8 string
|
||||
typedef const char *(RENDERDOC_CC *pRENDERDOC_GetCaptureFilePathTemplate)();
|
||||
|
||||
// DEPRECATED: compatibility for code compiled against pre-1.1.2 headers.
|
||||
typedef pRENDERDOC_SetCaptureFilePathTemplate pRENDERDOC_SetLogFilePathTemplate;
|
||||
typedef pRENDERDOC_GetCaptureFilePathTemplate pRENDERDOC_GetLogFilePathTemplate;
|
||||
|
||||
// returns the number of captures that have been made
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetNumCaptures)();
|
||||
|
||||
// This function returns the details of a capture, by index. New captures are added
|
||||
// to the end of the list.
|
||||
//
|
||||
// filename will be filled with the absolute path to the capture file, as a UTF-8 string
|
||||
// pathlength will be written with the length in bytes of the filename string
|
||||
// timestamp will be written with the time of the capture, in seconds since the Unix epoch
|
||||
//
|
||||
// Any of the parameters can be NULL and they'll be skipped.
|
||||
//
|
||||
// The function will return 1 if the capture index is valid, or 0 if the index is invalid
|
||||
// If the index is invalid, the values will be unchanged
|
||||
//
|
||||
// Note: when captures are deleted in the UI they will remain in this list, so the
|
||||
// capture path may not exist anymore.
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetCapture)(uint32_t idx, char *filename,
|
||||
uint32_t *pathlength, uint64_t *timestamp);
|
||||
|
||||
// Sets the comments associated with a capture file. These comments are displayed in the
|
||||
// UI program when opening.
|
||||
//
|
||||
// filePath should be a path to the capture file to add comments to. If set to NULL or ""
|
||||
// the most recent capture file created made will be used instead.
|
||||
// comments should be a NULL-terminated UTF-8 string to add as comments.
|
||||
//
|
||||
// Any existing comments will be overwritten.
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureFileComments)(const char *filePath,
|
||||
const char *comments);
|
||||
|
||||
// returns 1 if the RenderDoc UI is connected to this application, 0 otherwise
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_IsTargetControlConnected)();
|
||||
|
||||
// DEPRECATED: compatibility for code compiled against pre-1.1.1 headers.
|
||||
// This was renamed to IsTargetControlConnected in API 1.1.1, the old typedef is kept here for
|
||||
// backwards compatibility with old code, it is castable either way since it's ABI compatible
|
||||
// as the same function pointer type.
|
||||
typedef pRENDERDOC_IsTargetControlConnected pRENDERDOC_IsRemoteAccessConnected;
|
||||
|
||||
// This function will launch the Replay UI associated with the RenderDoc library injected
|
||||
// into the running application.
|
||||
//
|
||||
// if connectTargetControl is 1, the Replay UI will be launched with a command line parameter
|
||||
// to connect to this application
|
||||
// cmdline is the rest of the command line, as a UTF-8 string. E.g. a captures to open
|
||||
// if cmdline is NULL, the command line will be empty.
|
||||
//
|
||||
// returns the PID of the replay UI if successful, 0 if not successful.
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_LaunchReplayUI)(uint32_t connectTargetControl,
|
||||
const char *cmdline);
|
||||
|
||||
// RenderDoc can return a higher version than requested if it's backwards compatible,
|
||||
// this function returns the actual version returned. If a parameter is NULL, it will be
|
||||
// ignored and the others will be filled out.
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_GetAPIVersion)(int *major, int *minor, int *patch);
|
||||
|
||||
// Requests that the replay UI show itself (if hidden or not the current top window). This can be
|
||||
// used in conjunction with IsTargetControlConnected and LaunchReplayUI to intelligently handle
|
||||
// showing the UI after making a capture.
|
||||
//
|
||||
// This will return 1 if the request was successfully passed on, though it's not guaranteed that
|
||||
// the UI will be on top in all cases depending on OS rules. It will return 0 if there is no current
|
||||
// target control connection to make such a request, or if there was another error
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_ShowReplayUI)();
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Capturing functions
|
||||
//
|
||||
|
||||
// A device pointer is a pointer to the API's root handle.
|
||||
//
|
||||
// This would be an ID3D11Device, HGLRC/GLXContext, ID3D12Device, etc
|
||||
typedef void *RENDERDOC_DevicePointer;
|
||||
|
||||
// A window handle is the OS's native window handle
|
||||
//
|
||||
// This would be an HWND, GLXDrawable, etc
|
||||
typedef void *RENDERDOC_WindowHandle;
|
||||
|
||||
// A helper macro for Vulkan, where the device handle cannot be used directly.
|
||||
//
|
||||
// Passing the VkInstance to this macro will return the RENDERDOC_DevicePointer to use.
|
||||
//
|
||||
// Specifically, the value needed is the dispatch table pointer, which sits as the first
|
||||
// pointer-sized object in the memory pointed to by the VkInstance. Thus we cast to a void** and
|
||||
// indirect once.
|
||||
#define RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(inst) (*((void **)(inst)))
|
||||
|
||||
// This sets the RenderDoc in-app overlay in the API/window pair as 'active' and it will
|
||||
// respond to keypresses. Neither parameter can be NULL
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_SetActiveWindow)(RENDERDOC_DevicePointer device,
|
||||
RENDERDOC_WindowHandle wndHandle);
|
||||
|
||||
// capture the next frame on whichever window and API is currently considered active
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_TriggerCapture)();
|
||||
|
||||
// capture the next N frames on whichever window and API is currently considered active
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_TriggerMultiFrameCapture)(uint32_t numFrames);
|
||||
|
||||
// When choosing either a device pointer or a window handle to capture, you can pass NULL.
|
||||
// Passing NULL specifies a 'wildcard' match against anything. This allows you to specify
|
||||
// any API rendering to a specific window, or a specific API instance rendering to any window,
|
||||
// or in the simplest case of one window and one API, you can just pass NULL for both.
|
||||
//
|
||||
// In either case, if there are two or more possible matching (device,window) pairs it
|
||||
// is undefined which one will be captured.
|
||||
//
|
||||
// Note: for headless rendering you can pass NULL for the window handle and either specify
|
||||
// a device pointer or leave it NULL as above.
|
||||
|
||||
// Immediately starts capturing API calls on the specified device pointer and window handle.
|
||||
//
|
||||
// If there is no matching thing to capture (e.g. no supported API has been initialised),
|
||||
// this will do nothing.
|
||||
//
|
||||
// The results are undefined (including crashes) if two captures are started overlapping,
|
||||
// even on separate devices and/oror windows.
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_StartFrameCapture)(RENDERDOC_DevicePointer device,
|
||||
RENDERDOC_WindowHandle wndHandle);
|
||||
|
||||
// Returns whether or not a frame capture is currently ongoing anywhere.
|
||||
//
|
||||
// This will return 1 if a capture is ongoing, and 0 if there is no capture running
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_IsFrameCapturing)();
|
||||
|
||||
// Ends capturing immediately.
|
||||
//
|
||||
// This will return 1 if the capture succeeded, and 0 if there was an error capturing.
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_EndFrameCapture)(RENDERDOC_DevicePointer device,
|
||||
RENDERDOC_WindowHandle wndHandle);
|
||||
|
||||
// Ends capturing immediately and discard any data stored without saving to disk.
|
||||
//
|
||||
// This will return 1 if the capture was discarded, and 0 if there was an error or no capture
|
||||
// was in progress
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_DiscardFrameCapture)(RENDERDOC_DevicePointer device,
|
||||
RENDERDOC_WindowHandle wndHandle);
|
||||
|
||||
// Only valid to be called between a call to StartFrameCapture and EndFrameCapture. Gives a custom
|
||||
// title to the capture produced which will be displayed in the UI.
|
||||
//
|
||||
// If multiple captures are ongoing, this title will be applied to the first capture to end after
|
||||
// this call. The second capture to end will have no title, unless this function is called again.
|
||||
//
|
||||
// Calling this function has no effect if no capture is currently running
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureTitle)(const char *title);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// RenderDoc API versions
|
||||
//
|
||||
|
||||
// RenderDoc uses semantic versioning (http://semver.org/).
|
||||
//
|
||||
// MAJOR version is incremented when incompatible API changes happen.
|
||||
// MINOR version is incremented when functionality is added in a backwards-compatible manner.
|
||||
// PATCH version is incremented when backwards-compatible bug fixes happen.
|
||||
//
|
||||
// Note that this means the API returned can be higher than the one you might have requested.
|
||||
// e.g. if you are running against a newer RenderDoc that supports 1.0.1, it will be returned
|
||||
// instead of 1.0.0. You can check this with the GetAPIVersion entry point
|
||||
typedef enum RENDERDOC_Version {
|
||||
eRENDERDOC_API_Version_1_0_0 = 10000, // RENDERDOC_API_1_0_0 = 1 00 00
|
||||
eRENDERDOC_API_Version_1_0_1 = 10001, // RENDERDOC_API_1_0_1 = 1 00 01
|
||||
eRENDERDOC_API_Version_1_0_2 = 10002, // RENDERDOC_API_1_0_2 = 1 00 02
|
||||
eRENDERDOC_API_Version_1_1_0 = 10100, // RENDERDOC_API_1_1_0 = 1 01 00
|
||||
eRENDERDOC_API_Version_1_1_1 = 10101, // RENDERDOC_API_1_1_1 = 1 01 01
|
||||
eRENDERDOC_API_Version_1_1_2 = 10102, // RENDERDOC_API_1_1_2 = 1 01 02
|
||||
eRENDERDOC_API_Version_1_2_0 = 10200, // RENDERDOC_API_1_2_0 = 1 02 00
|
||||
eRENDERDOC_API_Version_1_3_0 = 10300, // RENDERDOC_API_1_3_0 = 1 03 00
|
||||
eRENDERDOC_API_Version_1_4_0 = 10400, // RENDERDOC_API_1_4_0 = 1 04 00
|
||||
eRENDERDOC_API_Version_1_4_1 = 10401, // RENDERDOC_API_1_4_1 = 1 04 01
|
||||
eRENDERDOC_API_Version_1_4_2 = 10402, // RENDERDOC_API_1_4_2 = 1 04 02
|
||||
eRENDERDOC_API_Version_1_5_0 = 10500, // RENDERDOC_API_1_5_0 = 1 05 00
|
||||
eRENDERDOC_API_Version_1_6_0 = 10600, // RENDERDOC_API_1_6_0 = 1 06 00
|
||||
} RENDERDOC_Version;
|
||||
|
||||
// API version changelog:
|
||||
//
|
||||
// 1.0.0 - initial release
|
||||
// 1.0.1 - Bugfix: IsFrameCapturing() was returning false for captures that were triggered
|
||||
// by keypress or TriggerCapture, instead of Start/EndFrameCapture.
|
||||
// 1.0.2 - Refactor: Renamed eRENDERDOC_Option_DebugDeviceMode to eRENDERDOC_Option_APIValidation
|
||||
// 1.1.0 - Add feature: TriggerMultiFrameCapture(). Backwards compatible with 1.0.x since the new
|
||||
// function pointer is added to the end of the struct, the original layout is identical
|
||||
// 1.1.1 - Refactor: Renamed remote access to target control (to better disambiguate from remote
|
||||
// replay/remote server concept in replay UI)
|
||||
// 1.1.2 - Refactor: Renamed "log file" in function names to just capture, to clarify that these
|
||||
// are captures and not debug logging files. This is the first API version in the v1.0
|
||||
// branch.
|
||||
// 1.2.0 - Added feature: SetCaptureFileComments() to add comments to a capture file that will be
|
||||
// displayed in the UI program on load.
|
||||
// 1.3.0 - Added feature: New capture option eRENDERDOC_Option_AllowUnsupportedVendorExtensions
|
||||
// which allows users to opt-in to allowing unsupported vendor extensions to function.
|
||||
// Should be used at the user's own risk.
|
||||
// Refactor: Renamed eRENDERDOC_Option_VerifyMapWrites to
|
||||
// eRENDERDOC_Option_VerifyBufferAccess, which now also controls initialisation to
|
||||
// 0xdddddddd of uninitialised buffer contents.
|
||||
// 1.4.0 - Added feature: DiscardFrameCapture() to discard a frame capture in progress and stop
|
||||
// capturing without saving anything to disk.
|
||||
// 1.4.1 - Refactor: Renamed Shutdown to RemoveHooks to better clarify what is happening
|
||||
// 1.4.2 - Refactor: Renamed 'draws' to 'actions' in callstack capture option.
|
||||
// 1.5.0 - Added feature: ShowReplayUI() to request that the replay UI show itself if connected
|
||||
// 1.6.0 - Added feature: SetCaptureTitle() which can be used to set a title for a
|
||||
// capture made with StartFrameCapture() or EndFrameCapture()
|
||||
|
||||
typedef struct RENDERDOC_API_1_6_0
|
||||
{
|
||||
pRENDERDOC_GetAPIVersion GetAPIVersion;
|
||||
|
||||
pRENDERDOC_SetCaptureOptionU32 SetCaptureOptionU32;
|
||||
pRENDERDOC_SetCaptureOptionF32 SetCaptureOptionF32;
|
||||
|
||||
pRENDERDOC_GetCaptureOptionU32 GetCaptureOptionU32;
|
||||
pRENDERDOC_GetCaptureOptionF32 GetCaptureOptionF32;
|
||||
|
||||
pRENDERDOC_SetFocusToggleKeys SetFocusToggleKeys;
|
||||
pRENDERDOC_SetCaptureKeys SetCaptureKeys;
|
||||
|
||||
pRENDERDOC_GetOverlayBits GetOverlayBits;
|
||||
pRENDERDOC_MaskOverlayBits MaskOverlayBits;
|
||||
|
||||
// Shutdown was renamed to RemoveHooks in 1.4.1.
|
||||
// These unions allow old code to continue compiling without changes
|
||||
union
|
||||
{
|
||||
pRENDERDOC_Shutdown Shutdown;
|
||||
pRENDERDOC_RemoveHooks RemoveHooks;
|
||||
};
|
||||
pRENDERDOC_UnloadCrashHandler UnloadCrashHandler;
|
||||
|
||||
// Get/SetLogFilePathTemplate was renamed to Get/SetCaptureFilePathTemplate in 1.1.2.
|
||||
// These unions allow old code to continue compiling without changes
|
||||
union
|
||||
{
|
||||
// deprecated name
|
||||
pRENDERDOC_SetLogFilePathTemplate SetLogFilePathTemplate;
|
||||
// current name
|
||||
pRENDERDOC_SetCaptureFilePathTemplate SetCaptureFilePathTemplate;
|
||||
};
|
||||
union
|
||||
{
|
||||
// deprecated name
|
||||
pRENDERDOC_GetLogFilePathTemplate GetLogFilePathTemplate;
|
||||
// current name
|
||||
pRENDERDOC_GetCaptureFilePathTemplate GetCaptureFilePathTemplate;
|
||||
};
|
||||
|
||||
pRENDERDOC_GetNumCaptures GetNumCaptures;
|
||||
pRENDERDOC_GetCapture GetCapture;
|
||||
|
||||
pRENDERDOC_TriggerCapture TriggerCapture;
|
||||
|
||||
// IsRemoteAccessConnected was renamed to IsTargetControlConnected in 1.1.1.
|
||||
// This union allows old code to continue compiling without changes
|
||||
union
|
||||
{
|
||||
// deprecated name
|
||||
pRENDERDOC_IsRemoteAccessConnected IsRemoteAccessConnected;
|
||||
// current name
|
||||
pRENDERDOC_IsTargetControlConnected IsTargetControlConnected;
|
||||
};
|
||||
pRENDERDOC_LaunchReplayUI LaunchReplayUI;
|
||||
|
||||
pRENDERDOC_SetActiveWindow SetActiveWindow;
|
||||
|
||||
pRENDERDOC_StartFrameCapture StartFrameCapture;
|
||||
pRENDERDOC_IsFrameCapturing IsFrameCapturing;
|
||||
pRENDERDOC_EndFrameCapture EndFrameCapture;
|
||||
|
||||
// new function in 1.1.0
|
||||
pRENDERDOC_TriggerMultiFrameCapture TriggerMultiFrameCapture;
|
||||
|
||||
// new function in 1.2.0
|
||||
pRENDERDOC_SetCaptureFileComments SetCaptureFileComments;
|
||||
|
||||
// new function in 1.4.0
|
||||
pRENDERDOC_DiscardFrameCapture DiscardFrameCapture;
|
||||
|
||||
// new function in 1.5.0
|
||||
pRENDERDOC_ShowReplayUI ShowReplayUI;
|
||||
|
||||
// new function in 1.6.0
|
||||
pRENDERDOC_SetCaptureTitle SetCaptureTitle;
|
||||
} RENDERDOC_API_1_6_0;
|
||||
|
||||
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_0;
|
||||
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_1;
|
||||
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_2;
|
||||
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_0;
|
||||
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_1;
|
||||
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_2;
|
||||
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_2_0;
|
||||
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_3_0;
|
||||
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_0;
|
||||
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_1;
|
||||
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_2;
|
||||
typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_5_0;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// RenderDoc API entry point
|
||||
//
|
||||
// This entry point can be obtained via GetProcAddress/dlsym if RenderDoc is available.
|
||||
//
|
||||
// The name is the same as the typedef - "RENDERDOC_GetAPI"
|
||||
//
|
||||
// This function is not thread safe, and should not be called on multiple threads at once.
|
||||
// Ideally, call this once as early as possible in your application's startup, before doing
|
||||
// any API work, since some configuration functionality etc has to be done also before
|
||||
// initialising any APIs.
|
||||
//
|
||||
// Parameters:
|
||||
// version is a single value from the RENDERDOC_Version above.
|
||||
//
|
||||
// outAPIPointers will be filled out with a pointer to the corresponding struct of function
|
||||
// pointers.
|
||||
//
|
||||
// Returns:
|
||||
// 1 - if the outAPIPointers has been filled with a pointer to the API struct requested
|
||||
// 0 - if the requested version is not supported or the arguments are invalid.
|
||||
//
|
||||
typedef int(RENDERDOC_CC *pRENDERDOC_GetAPI)(RENDERDOC_Version version, void **outAPIPointers);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
2
extern/sdlew/CMakeLists.txt
vendored
2
extern/sdlew/CMakeLists.txt
vendored
@@ -7,7 +7,7 @@ set(INC
|
||||
)
|
||||
|
||||
set(INC_SYS
|
||||
${X11_X11_INCLUDE_PATH}
|
||||
|
||||
)
|
||||
|
||||
set(SRC
|
||||
|
5
extern/tinygltf/README.blender
vendored
5
extern/tinygltf/README.blender
vendored
@@ -1,5 +1,6 @@
|
||||
Project: TinyGLTF
|
||||
URL: https://github.com/syoyo/tinygltf
|
||||
License: MIT
|
||||
Upstream version: 2.8.3, 84a83d39f55d
|
||||
Local modifications: None
|
||||
Upstream version: 2.5.0, 19a41d20ec0
|
||||
Local modifications:
|
||||
* Silence "enum value not handled in switch" warnings due to JSON dependency.
|
||||
|
BIN
extern/tinygltf/patches/TinyGLTF.diff
vendored
Normal file
BIN
extern/tinygltf/patches/TinyGLTF.diff
vendored
Normal file
Binary file not shown.
2087
extern/tinygltf/tiny_gltf.h
vendored
2087
extern/tinygltf/tiny_gltf.h
vendored
File diff suppressed because it is too large
Load Diff
@@ -67,10 +67,6 @@ if(UNIX AND NOT APPLE)
|
||||
add_subdirectory(libc_compat)
|
||||
endif()
|
||||
|
||||
if (WITH_RENDERDOC)
|
||||
add_subdirectory(renderdoc_dynload)
|
||||
endif()
|
||||
|
||||
if(UNIX AND NOT APPLE)
|
||||
# Important this comes after "ghost" as it uses includes defined by GHOST's CMake.
|
||||
if(WITH_GHOST_WAYLAND AND WITH_GHOST_WAYLAND_DYNLOAD)
|
||||
|
@@ -41,7 +41,7 @@ static const char *FRAGMENT_SHADER =
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" vec4 rgba = texture(image_texture, texCoord_interp);\n"
|
||||
/* Hard-coded Rec.709 gamma, should use OpenColorIO eventually. */
|
||||
/* Harcoded Rec.709 gamma, should use OpenColorIO eventually. */
|
||||
" fragColor = pow(rgba, vec4(0.45, 0.45, 0.45, 1.0));\n"
|
||||
"}\n\0";
|
||||
|
||||
|
@@ -12,7 +12,6 @@ from bpy.props import (
|
||||
PointerProperty,
|
||||
StringProperty,
|
||||
)
|
||||
from bpy.app.translations import pgettext_iface as iface_
|
||||
|
||||
from math import pi
|
||||
|
||||
@@ -1665,51 +1664,30 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
col.label(text="No compatible GPUs found for Cycles", icon='INFO')
|
||||
|
||||
if device_type == 'CUDA':
|
||||
compute_capability = "3.0"
|
||||
col.label(text=iface_("Requires NVIDIA GPU with compute capability %s") % compute_capability,
|
||||
icon='BLANK1', translate=False)
|
||||
col.label(text="Requires NVIDIA GPU with compute capability 3.0", icon='BLANK1')
|
||||
elif device_type == 'OPTIX':
|
||||
compute_capability = "5.0"
|
||||
driver_version = "470"
|
||||
col.label(text=iface_("Requires NVIDIA GPU with compute capability %s") % compute_capability,
|
||||
icon='BLANK1', translate=False)
|
||||
col.label(text=iface_("and NVIDIA driver version %s or newer") % driver_version,
|
||||
icon='BLANK1', translate=False)
|
||||
col.label(text="Requires NVIDIA GPU with compute capability 5.0", icon='BLANK1')
|
||||
col.label(text="and NVIDIA driver version 470 or newer", icon='BLANK1')
|
||||
elif device_type == 'HIP':
|
||||
if True:
|
||||
col.label(text="HIP temporarily disabled due to compiler bugs", icon='BLANK1')
|
||||
else:
|
||||
import sys
|
||||
if sys.platform[:3] == "win":
|
||||
driver_version = "21.Q4"
|
||||
col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
|
||||
col.label(text=iface_("and AMD Radeon Pro %s driver or newer") % driver_version,
|
||||
icon='BLANK1', translate=False)
|
||||
elif sys.platform.startswith("linux"):
|
||||
driver_version = "22.10"
|
||||
col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
|
||||
col.label(text=iface_("and AMD driver version %s or newer") % driver_version, icon='BLANK1',
|
||||
translate=False)
|
||||
import sys
|
||||
if sys.platform[:3] == "win":
|
||||
col.label(text="Requires AMD GPU with RDNA architecture", icon='BLANK1')
|
||||
col.label(text="and AMD Radeon Pro 21.Q4 driver or newer", icon='BLANK1')
|
||||
elif sys.platform.startswith("linux"):
|
||||
col.label(text="Requires AMD GPU with RDNA architecture", icon='BLANK1')
|
||||
col.label(text="and AMD driver version 22.10 or newer", icon='BLANK1')
|
||||
elif device_type == 'ONEAPI':
|
||||
import sys
|
||||
if sys.platform.startswith("win"):
|
||||
driver_version = "101.4032"
|
||||
col.label(text="Requires Intel GPU with Xe-HPG architecture", icon='BLANK1')
|
||||
col.label(text=iface_("and Windows driver version %s or newer") % driver_version,
|
||||
icon='BLANK1', translate=False)
|
||||
col.label(text="and Windows driver version 101.4032 or newer", icon='BLANK1')
|
||||
elif sys.platform.startswith("linux"):
|
||||
driver_version = "1.3.24931"
|
||||
col.label(text="Requires Intel GPU with Xe-HPG architecture and", icon='BLANK1')
|
||||
col.label(text=iface_(" - intel-level-zero-gpu version %s or newer") % driver_version,
|
||||
icon='BLANK1', translate=False)
|
||||
col.label(text=" - intel-level-zero-gpu version 1.3.24931 or newer", icon='BLANK1')
|
||||
col.label(text=" - oneAPI Level-Zero Loader", icon='BLANK1')
|
||||
elif device_type == 'METAL':
|
||||
silicon_mac_version = "12.2"
|
||||
amd_mac_version = "12.3"
|
||||
col.label(text=iface_("Requires Apple Silicon with macOS %s or newer") % silicon_mac_version,
|
||||
icon='BLANK1', translate=False)
|
||||
col.label(text=iface_("or AMD with macOS %s or newer") % amd_mac_version, icon='BLANK1',
|
||||
translate=False)
|
||||
col.label(text="Requires Apple Silicon with macOS 12.2 or newer", icon='BLANK1')
|
||||
col.label(text="or AMD with macOS 12.3 or newer", icon='BLANK1')
|
||||
return
|
||||
|
||||
for device in devices:
|
||||
@@ -1719,8 +1697,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
.replace('(TM)', unicodedata.lookup('TRADE MARK SIGN'))
|
||||
.replace('(tm)', unicodedata.lookup('TRADE MARK SIGN'))
|
||||
.replace('(R)', unicodedata.lookup('REGISTERED SIGN'))
|
||||
.replace('(C)', unicodedata.lookup('COPYRIGHT SIGN')),
|
||||
translate=False
|
||||
.replace('(C)', unicodedata.lookup('COPYRIGHT SIGN'))
|
||||
)
|
||||
|
||||
def draw_impl(self, layout, context):
|
||||
@@ -1745,21 +1722,19 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
row.prop(self, "peer_memory")
|
||||
|
||||
if compute_device_type == 'METAL':
|
||||
import platform
|
||||
import re
|
||||
is_navi_2 = False
|
||||
import platform, re
|
||||
isNavi2 = False
|
||||
for device in devices:
|
||||
if re.search(r"((RX)|(Pro)|(PRO))\s+W?6\d00X", device.name):
|
||||
is_navi_2 = True
|
||||
break
|
||||
obj = re.search("((RX)|(Pro)|(PRO))\s+W?6\d00X",device.name)
|
||||
if obj:
|
||||
isNavi2 = True
|
||||
|
||||
# MetalRT only works on Apple Silicon and Navi2.
|
||||
is_arm64 = platform.machine() == 'arm64'
|
||||
if is_arm64 or is_navi_2:
|
||||
# MetalRT only works on Apple Silicon and Navi2
|
||||
if platform.machine() == 'arm64' or isNavi2:
|
||||
col = layout.column()
|
||||
col.use_property_split = True
|
||||
# Kernel specialization is only supported on Apple Silicon
|
||||
if is_arm64:
|
||||
if platform.machine() == 'arm64':
|
||||
col.prop(self, "kernel_optimization_level")
|
||||
col.prop(self, "use_metalrt")
|
||||
|
||||
|
@@ -20,7 +20,7 @@ class CyclesPresetPanel(PresetPanel, Panel):
|
||||
@staticmethod
|
||||
def post_cb(context):
|
||||
# Modify an arbitrary built-in scene property to force a depsgraph
|
||||
# update, because add-on properties don't. (see #62325)
|
||||
# update, because add-on properties don't. (see T62325)
|
||||
render = context.scene.render
|
||||
render.filter_size = render.filter_size
|
||||
|
||||
|
@@ -54,10 +54,44 @@ int BlenderDisplayShader::get_tex_coord_attrib_location()
|
||||
/* --------------------------------------------------------------------
|
||||
* BlenderFallbackDisplayShader.
|
||||
*/
|
||||
|
||||
/* TODO move shaders to standalone .glsl file. */
|
||||
static const char *FALLBACK_VERTEX_SHADER =
|
||||
"uniform vec2 fullscreen;\n"
|
||||
"in vec2 texCoord;\n"
|
||||
"in vec2 pos;\n"
|
||||
"out vec2 texCoord_interp;\n"
|
||||
"\n"
|
||||
"vec2 normalize_coordinates()\n"
|
||||
"{\n"
|
||||
" return (vec2(2.0) * (pos / fullscreen)) - vec2(1.0);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" gl_Position = vec4(normalize_coordinates(), 0.0, 1.0);\n"
|
||||
" texCoord_interp = texCoord;\n"
|
||||
"}\n\0";
|
||||
|
||||
static const char *FALLBACK_FRAGMENT_SHADER =
|
||||
"uniform sampler2D image_texture;\n"
|
||||
"in vec2 texCoord_interp;\n"
|
||||
"out vec4 fragColor;\n"
|
||||
"\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" fragColor = texture(image_texture, texCoord_interp);\n"
|
||||
"}\n\0";
|
||||
|
||||
static GPUShader *compile_fallback_shader(void)
|
||||
{
|
||||
/* NOTE: Compilation errors are logged to console. */
|
||||
GPUShader *shader = GPU_shader_create_from_info_name("gpu_shader_cycles_display_fallback");
|
||||
GPUShader *shader = GPU_shader_create(FALLBACK_VERTEX_SHADER,
|
||||
FALLBACK_FRAGMENT_SHADER,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
"FallbackCyclesBlitShader");
|
||||
return shader;
|
||||
}
|
||||
|
||||
@@ -71,12 +105,11 @@ GPUShader *BlenderFallbackDisplayShader::bind(int width, int height)
|
||||
|
||||
/* Bind shader now to enable uniform assignment. */
|
||||
GPU_shader_bind(shader_program_);
|
||||
int slot = 0;
|
||||
GPU_shader_uniform_int_ex(shader_program_, image_texture_location_, 1, 1, &slot);
|
||||
GPU_shader_uniform_int(shader_program_, image_texture_location_, 0);
|
||||
float size[2];
|
||||
size[0] = width;
|
||||
size[1] = height;
|
||||
GPU_shader_uniform_float_ex(shader_program_, fullscreen_location_, 2, 1, size);
|
||||
GPU_shader_uniform_vector(shader_program_, fullscreen_location_, 2, 1, size);
|
||||
return shader_program_;
|
||||
}
|
||||
|
||||
@@ -216,13 +249,8 @@ class DisplayGPUTexture {
|
||||
height = texture_height;
|
||||
|
||||
/* Texture must have a minimum size of 1x1. */
|
||||
gpu_texture = GPU_texture_create_2d("CyclesBlitTexture",
|
||||
max(width, 1),
|
||||
max(height, 1),
|
||||
1,
|
||||
GPU_RGBA16F,
|
||||
GPU_TEXTURE_USAGE_GENERAL,
|
||||
nullptr);
|
||||
gpu_texture = GPU_texture_create_2d(
|
||||
"CyclesBlitTexture", max(width, 1), max(height, 1), 1, GPU_RGBA16F, nullptr);
|
||||
|
||||
if (!gpu_texture) {
|
||||
LOG(ERROR) << "Error creating texture.";
|
||||
@@ -705,14 +733,14 @@ static void draw_tile(const float2 &zoom,
|
||||
const float zoomed_height = draw_tile.params.size.y * zoom.y;
|
||||
if (texture.width != draw_tile.params.size.x || texture.height != draw_tile.params.size.y) {
|
||||
/* Resolution divider is different from 1, force nearest interpolation. */
|
||||
GPU_texture_bind_ex(texture.gpu_texture, GPU_SAMPLER_DEFAULT, 0);
|
||||
GPU_texture_bind_ex(texture.gpu_texture, GPU_SAMPLER_DEFAULT, 0, false);
|
||||
}
|
||||
else if (zoomed_width - draw_tile.params.size.x > 0.5f ||
|
||||
zoomed_height - draw_tile.params.size.y > 0.5f) {
|
||||
GPU_texture_bind_ex(texture.gpu_texture, GPU_SAMPLER_DEFAULT, 0);
|
||||
GPU_texture_bind_ex(texture.gpu_texture, GPU_SAMPLER_DEFAULT, 0, false);
|
||||
}
|
||||
else {
|
||||
GPU_texture_bind_ex(texture.gpu_texture, GPU_SAMPLER_FILTER, 0);
|
||||
GPU_texture_bind_ex(texture.gpu_texture, GPU_SAMPLER_FILTER, 0, false);
|
||||
}
|
||||
|
||||
/* Draw at the parameters for which the texture has been updated for. This allows to always draw
|
||||
|
@@ -20,7 +20,7 @@ BlenderImageLoader::BlenderImageLoader(BL::Image b_image,
|
||||
: b_image(b_image),
|
||||
frame(frame),
|
||||
tile_number(tile_number),
|
||||
/* Don't free cache for preview render to avoid race condition from #93560, to be fixed
|
||||
/* Don't free cache for preview render to avoid race condition from T93560, to be fixed
|
||||
* properly later as we are close to release. */
|
||||
free_cache(!is_preview_render && !b_image.has_data())
|
||||
{
|
||||
@@ -72,7 +72,7 @@ bool BlenderImageLoader::load_metadata(const ImageDeviceFeatures &, ImageMetaDat
|
||||
metadata.colorspace = u_colorspace_raw;
|
||||
}
|
||||
else {
|
||||
/* In some cases (e.g. #94135), the colorspace setting in Blender gets updated as part of the
|
||||
/* In some cases (e.g. T94135), the colorspace setting in Blender gets updated as part of the
|
||||
* metadata queries in this function, so update the colorspace setting here. */
|
||||
PointerRNA colorspace_ptr = b_image.colorspace_settings().ptr;
|
||||
metadata.colorspace = get_enum_identifier(colorspace_ptr, "name");
|
||||
|
@@ -24,7 +24,7 @@ void BlenderSync::sync_light(BL::Object &b_parent,
|
||||
Light *light = light_map.find(key);
|
||||
|
||||
/* Check if the transform was modified, in case a linked collection is moved we do not get a
|
||||
* specific depsgraph update (#88515). This also mimics the behavior for Objects. */
|
||||
* specific depsgraph update (T88515). This also mimics the behavior for Objects. */
|
||||
const bool tfm_updated = (light && light->get_tfm() != tfm);
|
||||
|
||||
/* Update if either object or light data changed. */
|
||||
|
@@ -818,23 +818,6 @@ static void attr_create_pointiness(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh, b
|
||||
}
|
||||
}
|
||||
|
||||
static std::optional<BL::IntAttribute> find_corner_vert_attribute(BL::Mesh b_mesh)
|
||||
{
|
||||
for (BL::Attribute &b_attribute : b_mesh.attributes) {
|
||||
if (b_attribute.domain() != BL::Attribute::domain_CORNER) {
|
||||
continue;
|
||||
}
|
||||
if (b_attribute.data_type() != BL::Attribute::data_type_INT) {
|
||||
continue;
|
||||
}
|
||||
if (b_attribute.name() != ".corner_vert") {
|
||||
continue;
|
||||
}
|
||||
return BL::IntAttribute{b_attribute};
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
/* The Random Per Island attribute is a random float associated with each
|
||||
* connected component (island) of the mesh. The attribute is computed by
|
||||
* first classifying the vertices into different sets using a Disjoint Set
|
||||
@@ -881,11 +864,11 @@ static void attr_create_random_per_island(Scene *scene,
|
||||
else {
|
||||
if (polys_num != 0) {
|
||||
const MPoly *polys = static_cast<const MPoly *>(b_mesh.polygons[0].ptr.data);
|
||||
BL::IntAttribute corner_verts = *find_corner_vert_attribute(b_mesh);
|
||||
const MLoop *loops = static_cast<const MLoop *>(b_mesh.loops[0].ptr.data);
|
||||
for (int i = 0; i < polys_num; i++) {
|
||||
const MPoly &b_poly = polys[i];
|
||||
const int vert = corner_verts.data[b_poly.loopstart].value();
|
||||
data[i] = hash_uint_to_float(vertices_sets.find(vert));
|
||||
const MLoop &b_loop = loops[b_poly.loopstart];
|
||||
data[i] = hash_uint_to_float(vertices_sets.find(b_loop.v));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -910,23 +893,6 @@ static std::optional<BL::IntAttribute> find_material_index_attribute(BL::Mesh b_
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
static std::optional<BL::BoolAttribute> find_sharp_face_attribute(BL::Mesh b_mesh)
|
||||
{
|
||||
for (BL::Attribute &b_attribute : b_mesh.attributes) {
|
||||
if (b_attribute.domain() != BL::Attribute::domain_FACE) {
|
||||
continue;
|
||||
}
|
||||
if (b_attribute.data_type() != BL::Attribute::data_type_BOOLEAN) {
|
||||
continue;
|
||||
}
|
||||
if (b_attribute.name() != "sharp_face") {
|
||||
continue;
|
||||
}
|
||||
return BL::BoolAttribute{b_attribute};
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
static void create_mesh(Scene *scene,
|
||||
Mesh *mesh,
|
||||
BL::Mesh &b_mesh,
|
||||
@@ -1017,22 +983,16 @@ static void create_mesh(Scene *scene,
|
||||
return 0;
|
||||
};
|
||||
|
||||
std::optional<BL::BoolAttribute> sharp_faces = find_sharp_face_attribute(b_mesh);
|
||||
auto get_face_sharp = [&](const int poly_index) -> bool {
|
||||
if (sharp_faces) {
|
||||
return sharp_faces->data[poly_index].value();
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
/* create faces */
|
||||
const MPoly *polys = static_cast<const MPoly *>(b_mesh.polygons[0].ptr.data);
|
||||
if (!subdivision) {
|
||||
for (BL::MeshLoopTriangle &t : b_mesh.loop_triangles) {
|
||||
const int poly_index = t.polygon_index();
|
||||
const MPoly &b_poly = polys[poly_index];
|
||||
int3 vi = get_int3(t.vertices());
|
||||
|
||||
int shader = get_material_index(poly_index);
|
||||
bool smooth = !get_face_sharp(poly_index) || use_loop_normals;
|
||||
bool smooth = (b_poly.flag & ME_SMOOTH) || use_loop_normals;
|
||||
|
||||
if (use_loop_normals) {
|
||||
BL::Array<float, 9> loop_normals = t.split_normals();
|
||||
@@ -1052,19 +1012,19 @@ static void create_mesh(Scene *scene,
|
||||
else {
|
||||
vector<int> vi;
|
||||
|
||||
const MPoly *polys = static_cast<const MPoly *>(b_mesh.polygons[0].ptr.data);
|
||||
std::optional<BL::IntAttribute> corner_verts = find_corner_vert_attribute(b_mesh);
|
||||
const MLoop *loops = static_cast<const MLoop *>(b_mesh.loops[0].ptr.data);
|
||||
|
||||
for (int i = 0; i < numfaces; i++) {
|
||||
const MPoly &b_poly = polys[i];
|
||||
int n = b_poly.totloop;
|
||||
int shader = get_material_index(i);
|
||||
bool smooth = !get_face_sharp(i) || use_loop_normals;
|
||||
bool smooth = (b_poly.flag & ME_SMOOTH) || use_loop_normals;
|
||||
|
||||
vi.resize(n);
|
||||
for (int i = 0; i < n; i++) {
|
||||
/* NOTE: Autosmooth is already taken care about. */
|
||||
vi[i] = corner_verts->data[b_poly.loopstart + i].value();
|
||||
|
||||
vi[i] = loops[b_poly.loopstart + i].v;
|
||||
}
|
||||
|
||||
/* create subd faces */
|
||||
|
@@ -222,10 +222,7 @@ static void export_pointcloud_motion(PointCloud *pointcloud,
|
||||
|
||||
/* Export motion points. */
|
||||
const int num_points = pointcloud->num_points();
|
||||
// Point cloud attributes are stored as float4 with the radius
|
||||
// in the w element. This is explict now as float3 is no longer
|
||||
// interchangeable with float4 as it is packed now.
|
||||
float4 *mP = attr_mP->data_float4() + motion_step * num_points;
|
||||
float3 *mP = attr_mP->data_float3() + motion_step * num_points;
|
||||
bool have_motion = false;
|
||||
const array<float3> &pointcloud_points = pointcloud->get_points();
|
||||
|
||||
@@ -234,9 +231,11 @@ static void export_pointcloud_motion(PointCloud *pointcloud,
|
||||
std::optional<BL::FloatAttribute> b_attr_radius = find_radius_attribute(b_pointcloud);
|
||||
|
||||
for (int i = 0; i < std::min(num_points, b_points_num); i++) {
|
||||
const float3 P = get_float3(b_attr_position.data[i].vector());
|
||||
const float3 co = get_float3(b_attr_position.data[i].vector());
|
||||
const float radius = b_attr_radius ? b_attr_radius->data[i].value() : 0.01f;
|
||||
mP[i] = make_float4(P.x, P.y, P.z, radius);
|
||||
float3 P = co;
|
||||
P.w = radius;
|
||||
mP[i] = P;
|
||||
have_motion = have_motion || (P != pointcloud_points[i]);
|
||||
}
|
||||
|
||||
|
@@ -94,7 +94,7 @@ void python_thread_state_restore(void **python_thread_state)
|
||||
*python_thread_state = NULL;
|
||||
}
|
||||
|
||||
static const char *PyC_UnicodeAsBytes(PyObject *py_str, PyObject **coerce)
|
||||
static const char *PyC_UnicodeAsByte(PyObject *py_str, PyObject **coerce)
|
||||
{
|
||||
const char *result = PyUnicode_AsUTF8(py_str);
|
||||
if (result) {
|
||||
@@ -131,8 +131,8 @@ static PyObject *init_func(PyObject * /*self*/, PyObject *args)
|
||||
}
|
||||
|
||||
PyObject *path_coerce = nullptr, *user_path_coerce = nullptr;
|
||||
path_init(PyC_UnicodeAsBytes(path, &path_coerce),
|
||||
PyC_UnicodeAsBytes(user_path, &user_path_coerce));
|
||||
path_init(PyC_UnicodeAsByte(path, &path_coerce),
|
||||
PyC_UnicodeAsByte(user_path, &user_path_coerce));
|
||||
Py_XDECREF(path_coerce);
|
||||
Py_XDECREF(user_path_coerce);
|
||||
|
||||
|
@@ -404,7 +404,7 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
|
||||
* point we know that we've got everything to render current view layer.
|
||||
*/
|
||||
/* At the moment we only free if we are not doing multi-view
|
||||
* (or if we are rendering the last view). See #58142/D4239 for discussion.
|
||||
* (or if we are rendering the last view). See T58142/D4239 for discussion.
|
||||
*/
|
||||
if (view_index == num_views - 1) {
|
||||
free_blender_memory_if_possible();
|
||||
|
@@ -349,7 +349,8 @@ void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer, bool background)
|
||||
|
||||
bool use_light_tree = get_boolean(cscene, "use_light_tree");
|
||||
integrator->set_use_light_tree(use_light_tree);
|
||||
integrator->set_light_sampling_threshold(get_float(cscene, "light_sampling_threshold"));
|
||||
integrator->set_light_sampling_threshold(
|
||||
(use_light_tree) ? 0.0f : get_float(cscene, "light_sampling_threshold"));
|
||||
|
||||
if (integrator->use_light_tree_is_modified()) {
|
||||
scene->light_manager->tag_update(scene, LightManager::UPDATE_ALL);
|
||||
@@ -765,7 +766,7 @@ void BlenderSync::free_data_after_sync(BL::Depsgraph &b_depsgraph)
|
||||
(BlenderSession::headless || is_interface_locked) &&
|
||||
/* Baking re-uses the depsgraph multiple times, clearing crashes
|
||||
* reading un-evaluated mesh data which isn't aligned with the
|
||||
* geometry we're baking, see #71012. */
|
||||
* geometry we're baking, see T71012. */
|
||||
!scene->bake_manager->get_baking() &&
|
||||
/* Persistent data must main caches for performance and correctness. */
|
||||
!is_persistent_data;
|
||||
|
@@ -180,9 +180,9 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox ¢er, Hair *hair
|
||||
curve.bounds_grow(k, &hair->get_curve_keys()[0], curve_radius, bounds);
|
||||
const size_t num_keys = hair->get_curve_keys().size();
|
||||
const size_t num_steps = hair->get_motion_steps();
|
||||
const float4 *key_steps = curve_attr_mP->data_float4();
|
||||
const float3 *key_steps = curve_attr_mP->data_float3();
|
||||
for (size_t step = 0; step < num_steps - 1; step++) {
|
||||
curve.bounds_grow(k, key_steps + step * num_keys, bounds);
|
||||
curve.bounds_grow(k, key_steps + step * num_keys, curve_radius, bounds);
|
||||
}
|
||||
if (bounds.valid()) {
|
||||
int packed_type = PRIMITIVE_PACK_SEGMENT(primitive_type, k);
|
||||
@@ -200,7 +200,7 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox ¢er, Hair *hair
|
||||
const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
|
||||
const size_t num_steps = hair->get_motion_steps();
|
||||
const float3 *curve_keys = &hair->get_curve_keys()[0];
|
||||
const float4 *key_steps = curve_attr_mP->data_float4();
|
||||
const float3 *key_steps = curve_attr_mP->data_float3();
|
||||
const size_t num_keys = hair->get_curve_keys().size();
|
||||
/* Calculate bounding box of the previous time step.
|
||||
* Will be reused later to avoid duplicated work on
|
||||
|
@@ -254,15 +254,20 @@ void BVHEmbree::add_triangles(const Object *ob, const Mesh *mesh, int i)
|
||||
rtcSetGeometryBuildQuality(geom_id, build_quality);
|
||||
rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
|
||||
|
||||
const int *triangles = mesh->get_triangles().data();
|
||||
rtcSetSharedGeometryBuffer(geom_id,
|
||||
RTC_BUFFER_TYPE_INDEX,
|
||||
0,
|
||||
RTC_FORMAT_UINT3,
|
||||
triangles,
|
||||
0,
|
||||
sizeof(int) * 3,
|
||||
num_triangles);
|
||||
unsigned *rtc_indices = (unsigned *)rtcSetNewGeometryBuffer(
|
||||
geom_id, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT3, sizeof(int) * 3, num_triangles);
|
||||
assert(rtc_indices);
|
||||
if (!rtc_indices) {
|
||||
VLOG_WARNING << "Embree could not create new geometry buffer for mesh " << mesh->name.c_str()
|
||||
<< ".\n";
|
||||
return;
|
||||
}
|
||||
for (size_t j = 0; j < num_triangles; ++j) {
|
||||
Mesh::Triangle t = mesh->get_triangle(j);
|
||||
rtc_indices[j * 3] = t.v[0];
|
||||
rtc_indices[j * 3 + 1] = t.v[1];
|
||||
rtc_indices[j * 3 + 2] = t.v[2];
|
||||
}
|
||||
|
||||
set_tri_vertex_buffer(geom_id, mesh, false);
|
||||
|
||||
@@ -304,46 +309,28 @@ void BVHEmbree::set_tri_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh, con
|
||||
verts = &attr_mP->data_float3()[t_ * num_verts];
|
||||
}
|
||||
|
||||
float *rtc_verts = (update) ?
|
||||
(float *)rtcGetGeometryBufferData(geom_id, RTC_BUFFER_TYPE_VERTEX, t) :
|
||||
(float *)rtcSetNewGeometryBuffer(geom_id,
|
||||
RTC_BUFFER_TYPE_VERTEX,
|
||||
t,
|
||||
RTC_FORMAT_FLOAT3,
|
||||
sizeof(float) * 3,
|
||||
num_verts + 1);
|
||||
|
||||
assert(rtc_verts);
|
||||
if (rtc_verts) {
|
||||
for (size_t j = 0; j < num_verts; ++j) {
|
||||
rtc_verts[0] = verts[j].x;
|
||||
rtc_verts[1] = verts[j].y;
|
||||
rtc_verts[2] = verts[j].z;
|
||||
rtc_verts += 3;
|
||||
}
|
||||
}
|
||||
|
||||
if (update) {
|
||||
rtcUpdateGeometryBuffer(geom_id, RTC_BUFFER_TYPE_VERTEX, t);
|
||||
}
|
||||
else {
|
||||
rtcSetSharedGeometryBuffer(geom_id,
|
||||
RTC_BUFFER_TYPE_VERTEX,
|
||||
t,
|
||||
RTC_FORMAT_FLOAT3,
|
||||
verts,
|
||||
0,
|
||||
sizeof(float3),
|
||||
num_verts + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Packs the hair motion curve data control variables (CVs) into float4s as [x y z radius]
|
||||
*/
|
||||
template<typename T>
|
||||
void pack_motion_verts(size_t num_curves,
|
||||
const Hair *hair,
|
||||
const T *verts,
|
||||
const float *curve_radius,
|
||||
float4 *rtc_verts)
|
||||
{
|
||||
for (size_t j = 0; j < num_curves; ++j) {
|
||||
Hair::Curve c = hair->get_curve(j);
|
||||
int fk = c.first_key;
|
||||
int k = 1;
|
||||
for (; k < c.num_keys + 1; ++k, ++fk) {
|
||||
rtc_verts[k].x = verts[fk].x;
|
||||
rtc_verts[k].y = verts[fk].y;
|
||||
rtc_verts[k].z = verts[fk].z;
|
||||
rtc_verts[k].w = curve_radius[fk];
|
||||
}
|
||||
/* Duplicate Embree's Catmull-Rom spline CVs at the start and end of each curve. */
|
||||
rtc_verts[0] = rtc_verts[1];
|
||||
rtc_verts[k] = rtc_verts[k - 1];
|
||||
rtc_verts += c.num_keys + 2;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -373,10 +360,15 @@ void BVHEmbree::set_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair, c
|
||||
const int t_mid = (num_motion_steps - 1) / 2;
|
||||
const float *curve_radius = &hair->get_curve_radius()[0];
|
||||
for (int t = 0; t < num_motion_steps; ++t) {
|
||||
// As float4 and float3 are no longer interchangeable the 2 types need to be
|
||||
// handled separately. Attributes are float4s where the radius is stored in w and
|
||||
// the middle motion vector is from the mesh points which are stored float3s with
|
||||
// the radius stored in another array.
|
||||
const float3 *verts;
|
||||
if (t == t_mid || attr_mP == NULL) {
|
||||
verts = &hair->get_curve_keys()[0];
|
||||
}
|
||||
else {
|
||||
int t_ = (t > t_mid) ? (t - 1) : t;
|
||||
verts = &attr_mP->data_float3()[t_ * num_keys];
|
||||
}
|
||||
|
||||
float4 *rtc_verts = (update) ? (float4 *)rtcGetGeometryBufferData(
|
||||
geom_id, RTC_BUFFER_TYPE_VERTEX, t) :
|
||||
(float4 *)rtcSetNewGeometryBuffer(geom_id,
|
||||
@@ -389,14 +381,18 @@ void BVHEmbree::set_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair, c
|
||||
assert(rtc_verts);
|
||||
if (rtc_verts) {
|
||||
const size_t num_curves = hair->num_curves();
|
||||
if (t == t_mid || attr_mP == NULL) {
|
||||
const float3 *verts = &hair->get_curve_keys()[0];
|
||||
pack_motion_verts<float3>(num_curves, hair, verts, curve_radius, rtc_verts);
|
||||
}
|
||||
else {
|
||||
int t_ = (t > t_mid) ? (t - 1) : t;
|
||||
const float4 *verts = &attr_mP->data_float4()[t_ * num_keys];
|
||||
pack_motion_verts<float4>(num_curves, hair, verts, curve_radius, rtc_verts);
|
||||
for (size_t j = 0; j < num_curves; ++j) {
|
||||
Hair::Curve c = hair->get_curve(j);
|
||||
int fk = c.first_key;
|
||||
int k = 1;
|
||||
for (; k < c.num_keys + 1; ++k, ++fk) {
|
||||
rtc_verts[k] = float3_to_float4(verts[fk]);
|
||||
rtc_verts[k].w = curve_radius[fk];
|
||||
}
|
||||
/* Duplicate Embree's Catmull-Rom spline CVs at the start and end of each curve. */
|
||||
rtc_verts[0] = rtc_verts[1];
|
||||
rtc_verts[k] = rtc_verts[k - 1];
|
||||
rtc_verts += c.num_keys + 2;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -406,20 +402,6 @@ void BVHEmbree::set_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair, c
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Pack the motion points into a float4 as [x y z radius]
|
||||
*/
|
||||
template<typename T>
|
||||
void pack_motion_points(size_t num_points, const T *verts, const float *radius, float4 *rtc_verts)
|
||||
{
|
||||
for (size_t j = 0; j < num_points; ++j) {
|
||||
rtc_verts[j].x = verts[j].x;
|
||||
rtc_verts[j].y = verts[j].y;
|
||||
rtc_verts[j].z = verts[j].z;
|
||||
rtc_verts[j].w = radius[j];
|
||||
}
|
||||
}
|
||||
|
||||
void BVHEmbree::set_point_vertex_buffer(RTCGeometry geom_id,
|
||||
const PointCloud *pointcloud,
|
||||
const bool update)
|
||||
@@ -439,10 +421,15 @@ void BVHEmbree::set_point_vertex_buffer(RTCGeometry geom_id,
|
||||
const int t_mid = (num_motion_steps - 1) / 2;
|
||||
const float *radius = pointcloud->get_radius().data();
|
||||
for (int t = 0; t < num_motion_steps; ++t) {
|
||||
// As float4 and float3 are no longer interchangeable the 2 types need to be
|
||||
// handled separately. Attributes are float4s where the radius is stored in w and
|
||||
// the middle motion vector is from the mesh points which are stored float3s with
|
||||
// the radius stored in another array.
|
||||
const float3 *verts;
|
||||
if (t == t_mid || attr_mP == NULL) {
|
||||
verts = pointcloud->get_points().data();
|
||||
}
|
||||
else {
|
||||
int t_ = (t > t_mid) ? (t - 1) : t;
|
||||
verts = &attr_mP->data_float3()[t_ * num_points];
|
||||
}
|
||||
|
||||
float4 *rtc_verts = (update) ? (float4 *)rtcGetGeometryBufferData(
|
||||
geom_id, RTC_BUFFER_TYPE_VERTEX, t) :
|
||||
(float4 *)rtcSetNewGeometryBuffer(geom_id,
|
||||
@@ -454,14 +441,9 @@ void BVHEmbree::set_point_vertex_buffer(RTCGeometry geom_id,
|
||||
|
||||
assert(rtc_verts);
|
||||
if (rtc_verts) {
|
||||
if (t == t_mid || attr_mP == NULL) {
|
||||
const float3 *verts = pointcloud->get_points().data();
|
||||
pack_motion_points<float3>(num_points, verts, radius, rtc_verts);
|
||||
}
|
||||
else {
|
||||
int t_ = (t > t_mid) ? (t - 1) : t;
|
||||
const float4 *verts = &attr_mP->data_float4()[t_ * num_points];
|
||||
pack_motion_points<float4>(num_points, verts, radius, rtc_verts);
|
||||
for (size_t j = 0; j < num_points; ++j) {
|
||||
rtc_verts[j] = float3_to_float4(verts[j]);
|
||||
rtc_verts[j].w = radius[j];
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -35,7 +35,7 @@ struct BVHReferenceCompare {
|
||||
|
||||
/* Compare two references.
|
||||
*
|
||||
* Returns value is similar to return value of `strcmp()`.
|
||||
* Returns value is similar to return value of strcmp().
|
||||
*/
|
||||
__forceinline int compare(const BVHReference &ra, const BVHReference &rb) const
|
||||
{
|
||||
|
@@ -42,15 +42,12 @@ endif()
|
||||
###########################################################################
|
||||
|
||||
if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
|
||||
set(WITH_CYCLES_HIP_BINARIES OFF)
|
||||
message(STATUS "HIP temporarily disabled due to compiler bugs")
|
||||
find_package(HIP)
|
||||
set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES)
|
||||
|
||||
# find_package(HIP)
|
||||
# set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES)
|
||||
|
||||
# if(HIP_FOUND)
|
||||
# message(STATUS "Found HIP ${HIP_HIPCC_EXECUTABLE} (${HIP_VERSION})")
|
||||
# endif()
|
||||
if(HIP_FOUND)
|
||||
message(STATUS "Found HIP ${HIP_HIPCC_EXECUTABLE} (${HIP_VERSION})")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT WITH_HIP_DYNLOAD)
|
||||
|
@@ -53,12 +53,8 @@ void CUDADevice::set_error(const string &error)
|
||||
}
|
||||
|
||||
CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
: GPUDevice(info, stats, profiler)
|
||||
: Device(info, stats, profiler), texture_info(this, "texture_info", MEM_GLOBAL)
|
||||
{
|
||||
/* Verify that base class types can be used with specific backend types */
|
||||
static_assert(sizeof(texMemObject) == sizeof(CUtexObject));
|
||||
static_assert(sizeof(arrayMemObject) == sizeof(CUarray));
|
||||
|
||||
first_error = true;
|
||||
|
||||
cuDevId = info.num;
|
||||
@@ -69,6 +65,12 @@ CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
|
||||
need_texture_info = false;
|
||||
|
||||
device_texture_headroom = 0;
|
||||
device_working_headroom = 0;
|
||||
move_texture_to_host = false;
|
||||
map_host_limit = 0;
|
||||
map_host_used = 0;
|
||||
can_map_host = 0;
|
||||
pitch_alignment = 0;
|
||||
|
||||
/* Initialize CUDA. */
|
||||
@@ -89,9 +91,8 @@ CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
/* CU_CTX_MAP_HOST for mapping host memory when out of device memory.
|
||||
* CU_CTX_LMEM_RESIZE_TO_MAX for reserving local memory ahead of render,
|
||||
* so we can predict which memory to map to host. */
|
||||
int value;
|
||||
cuda_assert(cuDeviceGetAttribute(&value, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuDevice));
|
||||
can_map_host = value != 0;
|
||||
cuda_assert(
|
||||
cuDeviceGetAttribute(&can_map_host, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuDevice));
|
||||
|
||||
cuda_assert(cuDeviceGetAttribute(
|
||||
&pitch_alignment, CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, cuDevice));
|
||||
@@ -498,56 +499,311 @@ void CUDADevice::reserve_local_memory(const uint kernel_features)
|
||||
# endif
|
||||
}
|
||||
|
||||
void CUDADevice::get_device_memory_info(size_t &total, size_t &free)
|
||||
void CUDADevice::init_host_memory()
|
||||
{
|
||||
/* Limit amount of host mapped memory, because allocating too much can
|
||||
* cause system instability. Leave at least half or 4 GB of system
|
||||
* memory free, whichever is smaller. */
|
||||
size_t default_limit = 4 * 1024 * 1024 * 1024LL;
|
||||
size_t system_ram = system_physical_ram();
|
||||
|
||||
if (system_ram > 0) {
|
||||
if (system_ram / 2 > default_limit) {
|
||||
map_host_limit = system_ram - default_limit;
|
||||
}
|
||||
else {
|
||||
map_host_limit = system_ram / 2;
|
||||
}
|
||||
}
|
||||
else {
|
||||
VLOG_WARNING << "Mapped host memory disabled, failed to get system RAM";
|
||||
map_host_limit = 0;
|
||||
}
|
||||
|
||||
/* Amount of device memory to keep is free after texture memory
|
||||
* and working memory allocations respectively. We set the working
|
||||
* memory limit headroom lower so that some space is left after all
|
||||
* texture memory allocations. */
|
||||
device_working_headroom = 32 * 1024 * 1024LL; // 32MB
|
||||
device_texture_headroom = 128 * 1024 * 1024LL; // 128MB
|
||||
|
||||
VLOG_INFO << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
|
||||
<< " bytes. (" << string_human_readable_size(map_host_limit) << ")";
|
||||
}
|
||||
|
||||
void CUDADevice::load_texture_info()
|
||||
{
|
||||
if (need_texture_info) {
|
||||
/* Unset flag before copying, so this does not loop indefinitely if the copy below calls
|
||||
* into 'move_textures_to_host' (which calls 'load_texture_info' again). */
|
||||
need_texture_info = false;
|
||||
texture_info.copy_to_device();
|
||||
}
|
||||
}
|
||||
|
||||
void CUDADevice::move_textures_to_host(size_t size, bool for_texture)
|
||||
{
|
||||
/* Break out of recursive call, which can happen when moving memory on a multi device. */
|
||||
static bool any_device_moving_textures_to_host = false;
|
||||
if (any_device_moving_textures_to_host) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Signal to reallocate textures in host memory only. */
|
||||
move_texture_to_host = true;
|
||||
|
||||
while (size > 0) {
|
||||
/* Find suitable memory allocation to move. */
|
||||
device_memory *max_mem = NULL;
|
||||
size_t max_size = 0;
|
||||
bool max_is_image = false;
|
||||
|
||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
||||
foreach (CUDAMemMap::value_type &pair, cuda_mem_map) {
|
||||
device_memory &mem = *pair.first;
|
||||
CUDAMem *cmem = &pair.second;
|
||||
|
||||
/* Can only move textures allocated on this device (and not those from peer devices).
|
||||
* And need to ignore memory that is already on the host. */
|
||||
if (!mem.is_resident(this) || cmem->use_mapped_host) {
|
||||
continue;
|
||||
}
|
||||
|
||||
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) &&
|
||||
(&mem != &texture_info);
|
||||
bool is_image = is_texture && (mem.data_height > 1);
|
||||
|
||||
/* Can't move this type of memory. */
|
||||
if (!is_texture || cmem->array) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* For other textures, only move image textures. */
|
||||
if (for_texture && !is_image) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Try to move largest allocation, prefer moving images. */
|
||||
if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
|
||||
max_is_image = is_image;
|
||||
max_size = mem.device_size;
|
||||
max_mem = &mem;
|
||||
}
|
||||
}
|
||||
lock.unlock();
|
||||
|
||||
/* Move to host memory. This part is mutex protected since
|
||||
* multiple CUDA devices could be moving the memory. The
|
||||
* first one will do it, and the rest will adopt the pointer. */
|
||||
if (max_mem) {
|
||||
VLOG_WORK << "Move memory from device to host: " << max_mem->name;
|
||||
|
||||
static thread_mutex move_mutex;
|
||||
thread_scoped_lock lock(move_mutex);
|
||||
|
||||
any_device_moving_textures_to_host = true;
|
||||
|
||||
/* Potentially need to call back into multi device, so pointer mapping
|
||||
* and peer devices are updated. This is also necessary since the device
|
||||
* pointer may just be a key here, so cannot be accessed and freed directly.
|
||||
* Unfortunately it does mean that memory is reallocated on all other
|
||||
* devices as well, which is potentially dangerous when still in use (since
|
||||
* a thread rendering on another devices would only be caught in this mutex
|
||||
* if it so happens to do an allocation at the same time as well. */
|
||||
max_mem->device_copy_to();
|
||||
size = (max_size >= size) ? 0 : size - max_size;
|
||||
|
||||
any_device_moving_textures_to_host = false;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Unset flag before texture info is reloaded, since it should stay in device memory. */
|
||||
move_texture_to_host = false;
|
||||
|
||||
/* Update texture info array with new pointers. */
|
||||
load_texture_info();
|
||||
}
|
||||
|
||||
CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_padding)
|
||||
{
|
||||
CUDAContextScope scope(this);
|
||||
|
||||
CUdeviceptr device_pointer = 0;
|
||||
size_t size = mem.memory_size() + pitch_padding;
|
||||
|
||||
CUresult mem_alloc_result = CUDA_ERROR_OUT_OF_MEMORY;
|
||||
const char *status = "";
|
||||
|
||||
/* First try allocating in device memory, respecting headroom. We make
|
||||
* an exception for texture info. It is small and frequently accessed,
|
||||
* so treat it as working memory.
|
||||
*
|
||||
* If there is not enough room for working memory, we will try to move
|
||||
* textures to host memory, assuming the performance impact would have
|
||||
* been worse for working memory. */
|
||||
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && (&mem != &texture_info);
|
||||
bool is_image = is_texture && (mem.data_height > 1);
|
||||
|
||||
size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
|
||||
|
||||
size_t total = 0, free = 0;
|
||||
cuMemGetInfo(&free, &total);
|
||||
|
||||
/* Move textures to host memory if needed. */
|
||||
if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
|
||||
move_textures_to_host(size + headroom - free, is_texture);
|
||||
cuMemGetInfo(&free, &total);
|
||||
}
|
||||
|
||||
/* Allocate in device memory. */
|
||||
if (!move_texture_to_host && (size + headroom) < free) {
|
||||
mem_alloc_result = cuMemAlloc(&device_pointer, size);
|
||||
if (mem_alloc_result == CUDA_SUCCESS) {
|
||||
status = " in device memory";
|
||||
}
|
||||
}
|
||||
|
||||
/* Fall back to mapped host memory if needed and possible. */
|
||||
|
||||
void *shared_pointer = 0;
|
||||
|
||||
if (mem_alloc_result != CUDA_SUCCESS && can_map_host && mem.type != MEM_DEVICE_ONLY) {
|
||||
if (mem.shared_pointer) {
|
||||
/* Another device already allocated host memory. */
|
||||
mem_alloc_result = CUDA_SUCCESS;
|
||||
shared_pointer = mem.shared_pointer;
|
||||
}
|
||||
else if (map_host_used + size < map_host_limit) {
|
||||
/* Allocate host memory ourselves. */
|
||||
mem_alloc_result = cuMemHostAlloc(
|
||||
&shared_pointer, size, CU_MEMHOSTALLOC_DEVICEMAP | CU_MEMHOSTALLOC_WRITECOMBINED);
|
||||
|
||||
assert((mem_alloc_result == CUDA_SUCCESS && shared_pointer != 0) ||
|
||||
(mem_alloc_result != CUDA_SUCCESS && shared_pointer == 0));
|
||||
}
|
||||
|
||||
if (mem_alloc_result == CUDA_SUCCESS) {
|
||||
cuda_assert(cuMemHostGetDevicePointer_v2(&device_pointer, shared_pointer, 0));
|
||||
map_host_used += size;
|
||||
status = " in host memory";
|
||||
}
|
||||
}
|
||||
|
||||
if (mem_alloc_result != CUDA_SUCCESS) {
|
||||
if (mem.type == MEM_DEVICE_ONLY) {
|
||||
status = " failed, out of device memory";
|
||||
set_error("System is out of GPU memory");
|
||||
}
|
||||
else {
|
||||
status = " failed, out of device and host memory";
|
||||
set_error("System is out of GPU and shared host memory");
|
||||
}
|
||||
}
|
||||
|
||||
if (mem.name) {
|
||||
VLOG_WORK << "Buffer allocate: " << mem.name << ", "
|
||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||
<< string_human_readable_size(mem.memory_size()) << ")" << status;
|
||||
}
|
||||
|
||||
mem.device_pointer = (device_ptr)device_pointer;
|
||||
mem.device_size = size;
|
||||
stats.mem_alloc(size);
|
||||
|
||||
if (!mem.device_pointer) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Insert into map of allocations. */
|
||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
||||
CUDAMem *cmem = &cuda_mem_map[&mem];
|
||||
if (shared_pointer != 0) {
|
||||
/* Replace host pointer with our host allocation. Only works if
|
||||
* CUDA memory layout is the same and has no pitch padding. Also
|
||||
* does not work if we move textures to host during a render,
|
||||
* since other devices might be using the memory. */
|
||||
|
||||
if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
|
||||
mem.host_pointer != shared_pointer) {
|
||||
memcpy(shared_pointer, mem.host_pointer, size);
|
||||
|
||||
/* A Call to device_memory::host_free() should be preceded by
|
||||
* a call to device_memory::device_free() for host memory
|
||||
* allocated by a device to be handled properly. Two exceptions
|
||||
* are here and a call in OptiXDevice::generic_alloc(), where
|
||||
* the current host memory can be assumed to be allocated by
|
||||
* device_memory::host_alloc(), not by a device */
|
||||
|
||||
mem.host_free();
|
||||
mem.host_pointer = shared_pointer;
|
||||
}
|
||||
mem.shared_pointer = shared_pointer;
|
||||
mem.shared_counter++;
|
||||
cmem->use_mapped_host = true;
|
||||
}
|
||||
else {
|
||||
cmem->use_mapped_host = false;
|
||||
}
|
||||
|
||||
return cmem;
|
||||
}
|
||||
|
||||
bool CUDADevice::alloc_device(void *&device_pointer, size_t size)
|
||||
void CUDADevice::generic_copy_to(device_memory &mem)
|
||||
{
|
||||
CUDAContextScope scope(this);
|
||||
if (!mem.host_pointer || !mem.device_pointer) {
|
||||
return;
|
||||
}
|
||||
|
||||
CUresult mem_alloc_result = cuMemAlloc((CUdeviceptr *)&device_pointer, size);
|
||||
return mem_alloc_result == CUDA_SUCCESS;
|
||||
/* If use_mapped_host of mem is false, the current device only uses device memory allocated by
|
||||
* cuMemAlloc regardless of mem.host_pointer and mem.shared_pointer, and should copy data from
|
||||
* mem.host_pointer. */
|
||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
||||
if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
|
||||
const CUDAContextScope scope(this);
|
||||
cuda_assert(
|
||||
cuMemcpyHtoD((CUdeviceptr)mem.device_pointer, mem.host_pointer, mem.memory_size()));
|
||||
}
|
||||
}
|
||||
|
||||
void CUDADevice::free_device(void *device_pointer)
|
||||
void CUDADevice::generic_free(device_memory &mem)
|
||||
{
|
||||
CUDAContextScope scope(this);
|
||||
if (mem.device_pointer) {
|
||||
CUDAContextScope scope(this);
|
||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
||||
DCHECK(cuda_mem_map.find(&mem) != cuda_mem_map.end());
|
||||
const CUDAMem &cmem = cuda_mem_map[&mem];
|
||||
|
||||
cuda_assert(cuMemFree((CUdeviceptr)device_pointer));
|
||||
}
|
||||
/* If cmem.use_mapped_host is true, reference counting is used
|
||||
* to safely free a mapped host memory. */
|
||||
|
||||
bool CUDADevice::alloc_host(void *&shared_pointer, size_t size)
|
||||
{
|
||||
CUDAContextScope scope(this);
|
||||
if (cmem.use_mapped_host) {
|
||||
assert(mem.shared_pointer);
|
||||
if (mem.shared_pointer) {
|
||||
assert(mem.shared_counter > 0);
|
||||
if (--mem.shared_counter == 0) {
|
||||
if (mem.host_pointer == mem.shared_pointer) {
|
||||
mem.host_pointer = 0;
|
||||
}
|
||||
cuMemFreeHost(mem.shared_pointer);
|
||||
mem.shared_pointer = 0;
|
||||
}
|
||||
}
|
||||
map_host_used -= mem.device_size;
|
||||
}
|
||||
else {
|
||||
/* Free device memory. */
|
||||
cuda_assert(cuMemFree(mem.device_pointer));
|
||||
}
|
||||
|
||||
CUresult mem_alloc_result = cuMemHostAlloc(
|
||||
&shared_pointer, size, CU_MEMHOSTALLOC_DEVICEMAP | CU_MEMHOSTALLOC_WRITECOMBINED);
|
||||
return mem_alloc_result == CUDA_SUCCESS;
|
||||
}
|
||||
stats.mem_free(mem.device_size);
|
||||
mem.device_pointer = 0;
|
||||
mem.device_size = 0;
|
||||
|
||||
void CUDADevice::free_host(void *shared_pointer)
|
||||
{
|
||||
CUDAContextScope scope(this);
|
||||
|
||||
cuMemFreeHost(shared_pointer);
|
||||
}
|
||||
|
||||
void CUDADevice::transform_host_pointer(void *&device_pointer, void *&shared_pointer)
|
||||
{
|
||||
CUDAContextScope scope(this);
|
||||
|
||||
cuda_assert(cuMemHostGetDevicePointer_v2((CUdeviceptr *)&device_pointer, shared_pointer, 0));
|
||||
}
|
||||
|
||||
void CUDADevice::copy_host_to_device(void *device_pointer, void *host_pointer, size_t size)
|
||||
{
|
||||
const CUDAContextScope scope(this);
|
||||
|
||||
cuda_assert(cuMemcpyHtoD((CUdeviceptr)device_pointer, host_pointer, size));
|
||||
cuda_mem_map.erase(cuda_mem_map.find(&mem));
|
||||
}
|
||||
}
|
||||
|
||||
void CUDADevice::mem_alloc(device_memory &mem)
|
||||
@@ -612,8 +868,8 @@ void CUDADevice::mem_zero(device_memory &mem)
|
||||
|
||||
/* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory
|
||||
* regardless of mem.host_pointer and mem.shared_pointer. */
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
if (!device_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
|
||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
||||
if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
|
||||
const CUDAContextScope scope(this);
|
||||
cuda_assert(cuMemsetD8((CUdeviceptr)mem.device_pointer, 0, mem.memory_size()));
|
||||
}
|
||||
@@ -738,19 +994,19 @@ void CUDADevice::tex_alloc(device_texture &mem)
|
||||
return;
|
||||
}
|
||||
|
||||
Mem *cmem = NULL;
|
||||
CUDAMem *cmem = NULL;
|
||||
CUarray array_3d = NULL;
|
||||
size_t src_pitch = mem.data_width * dsize * mem.data_elements;
|
||||
size_t dst_pitch = src_pitch;
|
||||
|
||||
if (!mem.is_resident(this)) {
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
cmem = &device_mem_map[&mem];
|
||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
||||
cmem = &cuda_mem_map[&mem];
|
||||
cmem->texobject = 0;
|
||||
|
||||
if (mem.data_depth > 1) {
|
||||
array_3d = (CUarray)mem.device_pointer;
|
||||
cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
|
||||
cmem->array = array_3d;
|
||||
}
|
||||
else if (mem.data_height > 0) {
|
||||
dst_pitch = align_up(src_pitch, pitch_alignment);
|
||||
@@ -794,10 +1050,10 @@ void CUDADevice::tex_alloc(device_texture &mem)
|
||||
mem.device_size = size;
|
||||
stats.mem_alloc(size);
|
||||
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
cmem = &device_mem_map[&mem];
|
||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
||||
cmem = &cuda_mem_map[&mem];
|
||||
cmem->texobject = 0;
|
||||
cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
|
||||
cmem->array = array_3d;
|
||||
}
|
||||
else if (mem.data_height > 0) {
|
||||
/* 2D texture, using pitch aligned linear memory. */
|
||||
@@ -881,8 +1137,8 @@ void CUDADevice::tex_alloc(device_texture &mem)
|
||||
texDesc.filterMode = filter_mode;
|
||||
texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
|
||||
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
cmem = &device_mem_map[&mem];
|
||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
||||
cmem = &cuda_mem_map[&mem];
|
||||
|
||||
cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
|
||||
|
||||
@@ -897,9 +1153,9 @@ void CUDADevice::tex_free(device_texture &mem)
|
||||
{
|
||||
if (mem.device_pointer) {
|
||||
CUDAContextScope scope(this);
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
DCHECK(device_mem_map.find(&mem) != device_mem_map.end());
|
||||
const Mem &cmem = device_mem_map[&mem];
|
||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
||||
DCHECK(cuda_mem_map.find(&mem) != cuda_mem_map.end());
|
||||
const CUDAMem &cmem = cuda_mem_map[&mem];
|
||||
|
||||
if (cmem.texobject) {
|
||||
/* Free bindless texture. */
|
||||
@@ -908,16 +1164,16 @@ void CUDADevice::tex_free(device_texture &mem)
|
||||
|
||||
if (!mem.is_resident(this)) {
|
||||
/* Do not free memory here, since it was allocated on a different device. */
|
||||
device_mem_map.erase(device_mem_map.find(&mem));
|
||||
cuda_mem_map.erase(cuda_mem_map.find(&mem));
|
||||
}
|
||||
else if (cmem.array) {
|
||||
/* Free array. */
|
||||
cuArrayDestroy(reinterpret_cast<CUarray>(cmem.array));
|
||||
cuArrayDestroy(cmem.array);
|
||||
stats.mem_free(mem.device_size);
|
||||
mem.device_pointer = 0;
|
||||
mem.device_size = 0;
|
||||
|
||||
device_mem_map.erase(device_mem_map.find(&mem));
|
||||
cuda_mem_map.erase(cuda_mem_map.find(&mem));
|
||||
}
|
||||
else {
|
||||
lock.unlock();
|
||||
|
@@ -21,7 +21,7 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
class DeviceQueue;
|
||||
|
||||
class CUDADevice : public GPUDevice {
|
||||
class CUDADevice : public Device {
|
||||
|
||||
friend class CUDAContextScope;
|
||||
|
||||
@@ -29,11 +29,36 @@ class CUDADevice : public GPUDevice {
|
||||
CUdevice cuDevice;
|
||||
CUcontext cuContext;
|
||||
CUmodule cuModule;
|
||||
size_t device_texture_headroom;
|
||||
size_t device_working_headroom;
|
||||
bool move_texture_to_host;
|
||||
size_t map_host_used;
|
||||
size_t map_host_limit;
|
||||
int can_map_host;
|
||||
int pitch_alignment;
|
||||
int cuDevId;
|
||||
int cuDevArchitecture;
|
||||
bool first_error;
|
||||
|
||||
struct CUDAMem {
|
||||
CUDAMem() : texobject(0), array(0), use_mapped_host(false)
|
||||
{
|
||||
}
|
||||
|
||||
CUtexObject texobject;
|
||||
CUarray array;
|
||||
|
||||
/* If true, a mapped host memory in shared_pointer is being used. */
|
||||
bool use_mapped_host;
|
||||
};
|
||||
typedef map<device_memory *, CUDAMem> CUDAMemMap;
|
||||
CUDAMemMap cuda_mem_map;
|
||||
thread_mutex cuda_mem_map_mutex;
|
||||
|
||||
/* Bindless Textures */
|
||||
device_vector<TextureInfo> texture_info;
|
||||
bool need_texture_info;
|
||||
|
||||
CUDADeviceKernels kernels;
|
||||
|
||||
static bool have_precompiled_kernels();
|
||||
@@ -63,13 +88,17 @@ class CUDADevice : public GPUDevice {
|
||||
|
||||
void reserve_local_memory(const uint kernel_features);
|
||||
|
||||
virtual void get_device_memory_info(size_t &total, size_t &free) override;
|
||||
virtual bool alloc_device(void *&device_pointer, size_t size) override;
|
||||
virtual void free_device(void *device_pointer) override;
|
||||
virtual bool alloc_host(void *&shared_pointer, size_t size) override;
|
||||
virtual void free_host(void *shared_pointer) override;
|
||||
virtual void transform_host_pointer(void *&device_pointer, void *&shared_pointer) override;
|
||||
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size) override;
|
||||
void init_host_memory();
|
||||
|
||||
void load_texture_info();
|
||||
|
||||
void move_textures_to_host(size_t size, bool for_texture);
|
||||
|
||||
CUDAMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
|
||||
|
||||
void generic_copy_to(device_memory &mem);
|
||||
|
||||
void generic_free(device_memory &mem);
|
||||
|
||||
void mem_alloc(device_memory &mem) override;
|
||||
|
||||
|
@@ -452,320 +452,6 @@ void *Device::get_cpu_osl_memory()
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
GPUDevice::~GPUDevice() noexcept(false)
|
||||
{
|
||||
}
|
||||
|
||||
bool GPUDevice::load_texture_info()
|
||||
{
|
||||
if (need_texture_info) {
|
||||
/* Unset flag before copying, so this does not loop indefinitely if the copy below calls
|
||||
* into 'move_textures_to_host' (which calls 'load_texture_info' again). */
|
||||
need_texture_info = false;
|
||||
texture_info.copy_to_device();
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void GPUDevice::init_host_memory(size_t preferred_texture_headroom,
|
||||
size_t preferred_working_headroom)
|
||||
{
|
||||
/* Limit amount of host mapped memory, because allocating too much can
|
||||
* cause system instability. Leave at least half or 4 GB of system
|
||||
* memory free, whichever is smaller. */
|
||||
size_t default_limit = 4 * 1024 * 1024 * 1024LL;
|
||||
size_t system_ram = system_physical_ram();
|
||||
|
||||
if (system_ram > 0) {
|
||||
if (system_ram / 2 > default_limit) {
|
||||
map_host_limit = system_ram - default_limit;
|
||||
}
|
||||
else {
|
||||
map_host_limit = system_ram / 2;
|
||||
}
|
||||
}
|
||||
else {
|
||||
VLOG_WARNING << "Mapped host memory disabled, failed to get system RAM";
|
||||
map_host_limit = 0;
|
||||
}
|
||||
|
||||
/* Amount of device memory to keep free after texture memory
|
||||
* and working memory allocations respectively. We set the working
|
||||
* memory limit headroom lower than the working one so there
|
||||
* is space left for it. */
|
||||
device_working_headroom = preferred_working_headroom > 0 ? preferred_working_headroom :
|
||||
32 * 1024 * 1024LL; // 32MB
|
||||
device_texture_headroom = preferred_texture_headroom > 0 ? preferred_texture_headroom :
|
||||
128 * 1024 * 1024LL; // 128MB
|
||||
|
||||
VLOG_INFO << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
|
||||
<< " bytes. (" << string_human_readable_size(map_host_limit) << ")";
|
||||
}
|
||||
|
||||
void GPUDevice::move_textures_to_host(size_t size, bool for_texture)
|
||||
{
|
||||
/* Break out of recursive call, which can happen when moving memory on a multi device. */
|
||||
static bool any_device_moving_textures_to_host = false;
|
||||
if (any_device_moving_textures_to_host) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Signal to reallocate textures in host memory only. */
|
||||
move_texture_to_host = true;
|
||||
|
||||
while (size > 0) {
|
||||
/* Find suitable memory allocation to move. */
|
||||
device_memory *max_mem = NULL;
|
||||
size_t max_size = 0;
|
||||
bool max_is_image = false;
|
||||
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
foreach (MemMap::value_type &pair, device_mem_map) {
|
||||
device_memory &mem = *pair.first;
|
||||
Mem *cmem = &pair.second;
|
||||
|
||||
/* Can only move textures allocated on this device (and not those from peer devices).
|
||||
* And need to ignore memory that is already on the host. */
|
||||
if (!mem.is_resident(this) || cmem->use_mapped_host) {
|
||||
continue;
|
||||
}
|
||||
|
||||
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) &&
|
||||
(&mem != &texture_info);
|
||||
bool is_image = is_texture && (mem.data_height > 1);
|
||||
|
||||
/* Can't move this type of memory. */
|
||||
if (!is_texture || cmem->array) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* For other textures, only move image textures. */
|
||||
if (for_texture && !is_image) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Try to move largest allocation, prefer moving images. */
|
||||
if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
|
||||
max_is_image = is_image;
|
||||
max_size = mem.device_size;
|
||||
max_mem = &mem;
|
||||
}
|
||||
}
|
||||
lock.unlock();
|
||||
|
||||
/* Move to host memory. This part is mutex protected since
|
||||
* multiple backend devices could be moving the memory. The
|
||||
* first one will do it, and the rest will adopt the pointer. */
|
||||
if (max_mem) {
|
||||
VLOG_WORK << "Move memory from device to host: " << max_mem->name;
|
||||
|
||||
static thread_mutex move_mutex;
|
||||
thread_scoped_lock lock(move_mutex);
|
||||
|
||||
any_device_moving_textures_to_host = true;
|
||||
|
||||
/* Potentially need to call back into multi device, so pointer mapping
|
||||
* and peer devices are updated. This is also necessary since the device
|
||||
* pointer may just be a key here, so cannot be accessed and freed directly.
|
||||
* Unfortunately it does mean that memory is reallocated on all other
|
||||
* devices as well, which is potentially dangerous when still in use (since
|
||||
* a thread rendering on another devices would only be caught in this mutex
|
||||
* if it so happens to do an allocation at the same time as well. */
|
||||
max_mem->device_copy_to();
|
||||
size = (max_size >= size) ? 0 : size - max_size;
|
||||
|
||||
any_device_moving_textures_to_host = false;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Unset flag before texture info is reloaded, since it should stay in device memory. */
|
||||
move_texture_to_host = false;
|
||||
|
||||
/* Update texture info array with new pointers. */
|
||||
load_texture_info();
|
||||
}
|
||||
|
||||
GPUDevice::Mem *GPUDevice::generic_alloc(device_memory &mem, size_t pitch_padding)
|
||||
{
|
||||
void *device_pointer = 0;
|
||||
size_t size = mem.memory_size() + pitch_padding;
|
||||
|
||||
bool mem_alloc_result = false;
|
||||
const char *status = "";
|
||||
|
||||
/* First try allocating in device memory, respecting headroom. We make
|
||||
* an exception for texture info. It is small and frequently accessed,
|
||||
* so treat it as working memory.
|
||||
*
|
||||
* If there is not enough room for working memory, we will try to move
|
||||
* textures to host memory, assuming the performance impact would have
|
||||
* been worse for working memory. */
|
||||
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && (&mem != &texture_info);
|
||||
bool is_image = is_texture && (mem.data_height > 1);
|
||||
|
||||
size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
|
||||
|
||||
size_t total = 0, free = 0;
|
||||
get_device_memory_info(total, free);
|
||||
|
||||
/* Move textures to host memory if needed. */
|
||||
if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
|
||||
move_textures_to_host(size + headroom - free, is_texture);
|
||||
get_device_memory_info(total, free);
|
||||
}
|
||||
|
||||
/* Allocate in device memory. */
|
||||
if (!move_texture_to_host && (size + headroom) < free) {
|
||||
mem_alloc_result = alloc_device(device_pointer, size);
|
||||
if (mem_alloc_result) {
|
||||
device_mem_in_use += size;
|
||||
status = " in device memory";
|
||||
}
|
||||
}
|
||||
|
||||
/* Fall back to mapped host memory if needed and possible. */
|
||||
|
||||
void *shared_pointer = 0;
|
||||
|
||||
if (!mem_alloc_result && can_map_host && mem.type != MEM_DEVICE_ONLY) {
|
||||
if (mem.shared_pointer) {
|
||||
/* Another device already allocated host memory. */
|
||||
mem_alloc_result = true;
|
||||
shared_pointer = mem.shared_pointer;
|
||||
}
|
||||
else if (map_host_used + size < map_host_limit) {
|
||||
/* Allocate host memory ourselves. */
|
||||
mem_alloc_result = alloc_host(shared_pointer, size);
|
||||
|
||||
assert((mem_alloc_result && shared_pointer != 0) ||
|
||||
(!mem_alloc_result && shared_pointer == 0));
|
||||
}
|
||||
|
||||
if (mem_alloc_result) {
|
||||
transform_host_pointer(device_pointer, shared_pointer);
|
||||
map_host_used += size;
|
||||
status = " in host memory";
|
||||
}
|
||||
}
|
||||
|
||||
if (!mem_alloc_result) {
|
||||
if (mem.type == MEM_DEVICE_ONLY) {
|
||||
status = " failed, out of device memory";
|
||||
set_error("System is out of GPU memory");
|
||||
}
|
||||
else {
|
||||
status = " failed, out of device and host memory";
|
||||
set_error("System is out of GPU and shared host memory");
|
||||
}
|
||||
}
|
||||
|
||||
if (mem.name) {
|
||||
VLOG_WORK << "Buffer allocate: " << mem.name << ", "
|
||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||
<< string_human_readable_size(mem.memory_size()) << ")" << status;
|
||||
}
|
||||
|
||||
mem.device_pointer = (device_ptr)device_pointer;
|
||||
mem.device_size = size;
|
||||
stats.mem_alloc(size);
|
||||
|
||||
if (!mem.device_pointer) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Insert into map of allocations. */
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
Mem *cmem = &device_mem_map[&mem];
|
||||
if (shared_pointer != 0) {
|
||||
/* Replace host pointer with our host allocation. Only works if
|
||||
* memory layout is the same and has no pitch padding. Also
|
||||
* does not work if we move textures to host during a render,
|
||||
* since other devices might be using the memory. */
|
||||
|
||||
if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
|
||||
mem.host_pointer != shared_pointer) {
|
||||
memcpy(shared_pointer, mem.host_pointer, size);
|
||||
|
||||
/* A Call to device_memory::host_free() should be preceded by
|
||||
* a call to device_memory::device_free() for host memory
|
||||
* allocated by a device to be handled properly. Two exceptions
|
||||
* are here and a call in OptiXDevice::generic_alloc(), where
|
||||
* the current host memory can be assumed to be allocated by
|
||||
* device_memory::host_alloc(), not by a device */
|
||||
|
||||
mem.host_free();
|
||||
mem.host_pointer = shared_pointer;
|
||||
}
|
||||
mem.shared_pointer = shared_pointer;
|
||||
mem.shared_counter++;
|
||||
cmem->use_mapped_host = true;
|
||||
}
|
||||
else {
|
||||
cmem->use_mapped_host = false;
|
||||
}
|
||||
|
||||
return cmem;
|
||||
}
|
||||
|
||||
void GPUDevice::generic_free(device_memory &mem)
|
||||
{
|
||||
if (mem.device_pointer) {
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
DCHECK(device_mem_map.find(&mem) != device_mem_map.end());
|
||||
const Mem &cmem = device_mem_map[&mem];
|
||||
|
||||
/* If cmem.use_mapped_host is true, reference counting is used
|
||||
* to safely free a mapped host memory. */
|
||||
|
||||
if (cmem.use_mapped_host) {
|
||||
assert(mem.shared_pointer);
|
||||
if (mem.shared_pointer) {
|
||||
assert(mem.shared_counter > 0);
|
||||
if (--mem.shared_counter == 0) {
|
||||
if (mem.host_pointer == mem.shared_pointer) {
|
||||
mem.host_pointer = 0;
|
||||
}
|
||||
free_host(mem.shared_pointer);
|
||||
mem.shared_pointer = 0;
|
||||
}
|
||||
}
|
||||
map_host_used -= mem.device_size;
|
||||
}
|
||||
else {
|
||||
/* Free device memory. */
|
||||
free_device((void *)mem.device_pointer);
|
||||
device_mem_in_use -= mem.device_size;
|
||||
}
|
||||
|
||||
stats.mem_free(mem.device_size);
|
||||
mem.device_pointer = 0;
|
||||
mem.device_size = 0;
|
||||
|
||||
device_mem_map.erase(device_mem_map.find(&mem));
|
||||
}
|
||||
}
|
||||
|
||||
void GPUDevice::generic_copy_to(device_memory &mem)
|
||||
{
|
||||
if (!mem.host_pointer || !mem.device_pointer) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* If use_mapped_host of mem is false, the current device only uses device memory allocated by
|
||||
* backend device allocation regardless of mem.host_pointer and mem.shared_pointer, and should
|
||||
* copy data from mem.host_pointer. */
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
if (!device_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
|
||||
copy_host_to_device((void *)mem.device_pointer, mem.host_pointer, mem.memory_size());
|
||||
}
|
||||
}
|
||||
|
||||
/* DeviceInfo */
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -81,7 +81,7 @@ class DeviceInfo {
|
||||
bool has_gpu_queue; /* Device supports GPU queue. */
|
||||
bool use_metalrt; /* Use MetalRT to accelerate ray queries (Metal only). */
|
||||
KernelOptimizationLevel kernel_optimization_level; /* Optimization level applied to path tracing
|
||||
* kernels (Metal only). */
|
||||
kernels (Metal only). */
|
||||
DenoiserTypeMask denoisers; /* Supported denoiser types. */
|
||||
int cpu_threads;
|
||||
vector<DeviceInfo> multi_devices;
|
||||
@@ -182,7 +182,7 @@ class Device {
|
||||
{
|
||||
}
|
||||
|
||||
/* Report status and return true if device is ready for rendering. */
|
||||
/* Return true if device is ready for rendering, or report status if not. */
|
||||
virtual bool is_ready(string & /*status*/) const
|
||||
{
|
||||
return true;
|
||||
@@ -309,93 +309,6 @@ class Device {
|
||||
static uint devices_initialized_mask;
|
||||
};
|
||||
|
||||
/* Device, which is GPU, with some common functionality for GPU back-ends. */
|
||||
class GPUDevice : public Device {
|
||||
protected:
|
||||
GPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_)
|
||||
: Device(info_, stats_, profiler_),
|
||||
texture_info(this, "texture_info", MEM_GLOBAL),
|
||||
need_texture_info(false),
|
||||
can_map_host(false),
|
||||
map_host_used(0),
|
||||
map_host_limit(0),
|
||||
device_texture_headroom(0),
|
||||
device_working_headroom(0),
|
||||
device_mem_map(),
|
||||
device_mem_map_mutex(),
|
||||
move_texture_to_host(false),
|
||||
device_mem_in_use(0)
|
||||
{
|
||||
}
|
||||
|
||||
public:
|
||||
virtual ~GPUDevice() noexcept(false);
|
||||
|
||||
/* For GPUs that can use bindless textures in some way or another. */
|
||||
device_vector<TextureInfo> texture_info;
|
||||
bool need_texture_info;
|
||||
/* Returns true if the texture info was copied to the device (meaning, some more
|
||||
* re-initialization might be needed). */
|
||||
virtual bool load_texture_info();
|
||||
|
||||
protected:
|
||||
/* Memory allocation, only accessed through device_memory. */
|
||||
friend class device_memory;
|
||||
|
||||
bool can_map_host;
|
||||
size_t map_host_used;
|
||||
size_t map_host_limit;
|
||||
size_t device_texture_headroom;
|
||||
size_t device_working_headroom;
|
||||
typedef unsigned long long texMemObject;
|
||||
typedef unsigned long long arrayMemObject;
|
||||
struct Mem {
|
||||
Mem() : texobject(0), array(0), use_mapped_host(false)
|
||||
{
|
||||
}
|
||||
|
||||
texMemObject texobject;
|
||||
arrayMemObject array;
|
||||
|
||||
/* If true, a mapped host memory in shared_pointer is being used. */
|
||||
bool use_mapped_host;
|
||||
};
|
||||
typedef map<device_memory *, Mem> MemMap;
|
||||
MemMap device_mem_map;
|
||||
thread_mutex device_mem_map_mutex;
|
||||
bool move_texture_to_host;
|
||||
/* Simple counter which will try to track amount of used device memory */
|
||||
size_t device_mem_in_use;
|
||||
|
||||
virtual void init_host_memory(size_t preferred_texture_headroom = 0,
|
||||
size_t preferred_working_headroom = 0);
|
||||
virtual void move_textures_to_host(size_t size, bool for_texture);
|
||||
|
||||
/* Allocation, deallocation and copy functions, with corresponding
|
||||
* support of device/host allocations. */
|
||||
virtual GPUDevice::Mem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
|
||||
virtual void generic_free(device_memory &mem);
|
||||
virtual void generic_copy_to(device_memory &mem);
|
||||
|
||||
/* total - amount of device memory, free - amount of available device memory */
|
||||
virtual void get_device_memory_info(size_t &total, size_t &free) = 0;
|
||||
|
||||
virtual bool alloc_device(void *&device_pointer, size_t size) = 0;
|
||||
|
||||
virtual void free_device(void *device_pointer) = 0;
|
||||
|
||||
virtual bool alloc_host(void *&shared_pointer, size_t size) = 0;
|
||||
|
||||
virtual void free_host(void *shared_pointer) = 0;
|
||||
|
||||
/* This function should return device pointer corresponding to shared pointer, which
|
||||
* is host buffer, allocated in `alloc_host`. The function should `true`, if such
|
||||
* address transformation is possible and `false` otherwise. */
|
||||
virtual void transform_host_pointer(void *&device_pointer, void *&shared_pointer) = 0;
|
||||
|
||||
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size) = 0;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* __DEVICE_H__ */
|
||||
|
@@ -53,12 +53,8 @@ void HIPDevice::set_error(const string &error)
|
||||
}
|
||||
|
||||
HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
: GPUDevice(info, stats, profiler)
|
||||
: Device(info, stats, profiler), texture_info(this, "texture_info", MEM_GLOBAL)
|
||||
{
|
||||
/* Verify that base class types can be used with specific backend types */
|
||||
static_assert(sizeof(texMemObject) == sizeof(hipTextureObject_t));
|
||||
static_assert(sizeof(arrayMemObject) == sizeof(hArray));
|
||||
|
||||
first_error = true;
|
||||
|
||||
hipDevId = info.num;
|
||||
@@ -69,6 +65,12 @@ HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
|
||||
need_texture_info = false;
|
||||
|
||||
device_texture_headroom = 0;
|
||||
device_working_headroom = 0;
|
||||
move_texture_to_host = false;
|
||||
map_host_limit = 0;
|
||||
map_host_used = 0;
|
||||
can_map_host = 0;
|
||||
pitch_alignment = 0;
|
||||
|
||||
/* Initialize HIP. */
|
||||
@@ -89,9 +91,7 @@ HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
/* hipDeviceMapHost for mapping host memory when out of device memory.
|
||||
* hipDeviceLmemResizeToMax for reserving local memory ahead of render,
|
||||
* so we can predict which memory to map to host. */
|
||||
int value;
|
||||
hip_assert(hipDeviceGetAttribute(&value, hipDeviceAttributeCanMapHostMemory, hipDevice));
|
||||
can_map_host = value != 0;
|
||||
hip_assert(hipDeviceGetAttribute(&can_map_host, hipDeviceAttributeCanMapHostMemory, hipDevice));
|
||||
|
||||
hip_assert(
|
||||
hipDeviceGetAttribute(&pitch_alignment, hipDeviceAttributeTexturePitchAlignment, hipDevice));
|
||||
@@ -460,57 +460,305 @@ void HIPDevice::reserve_local_memory(const uint kernel_features)
|
||||
# endif
|
||||
}
|
||||
|
||||
void HIPDevice::get_device_memory_info(size_t &total, size_t &free)
|
||||
void HIPDevice::init_host_memory()
|
||||
{
|
||||
/* Limit amount of host mapped memory, because allocating too much can
|
||||
* cause system instability. Leave at least half or 4 GB of system
|
||||
* memory free, whichever is smaller. */
|
||||
size_t default_limit = 4 * 1024 * 1024 * 1024LL;
|
||||
size_t system_ram = system_physical_ram();
|
||||
|
||||
if (system_ram > 0) {
|
||||
if (system_ram / 2 > default_limit) {
|
||||
map_host_limit = system_ram - default_limit;
|
||||
}
|
||||
else {
|
||||
map_host_limit = system_ram / 2;
|
||||
}
|
||||
}
|
||||
else {
|
||||
VLOG_WARNING << "Mapped host memory disabled, failed to get system RAM";
|
||||
map_host_limit = 0;
|
||||
}
|
||||
|
||||
/* Amount of device memory to keep is free after texture memory
|
||||
* and working memory allocations respectively. We set the working
|
||||
* memory limit headroom lower so that some space is left after all
|
||||
* texture memory allocations. */
|
||||
device_working_headroom = 32 * 1024 * 1024LL; // 32MB
|
||||
device_texture_headroom = 128 * 1024 * 1024LL; // 128MB
|
||||
|
||||
VLOG_INFO << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
|
||||
<< " bytes. (" << string_human_readable_size(map_host_limit) << ")";
|
||||
}
|
||||
|
||||
void HIPDevice::load_texture_info()
|
||||
{
|
||||
if (need_texture_info) {
|
||||
/* Unset flag before copying, so this does not loop indefinitely if the copy below calls
|
||||
* into 'move_textures_to_host' (which calls 'load_texture_info' again). */
|
||||
need_texture_info = false;
|
||||
texture_info.copy_to_device();
|
||||
}
|
||||
}
|
||||
|
||||
void HIPDevice::move_textures_to_host(size_t size, bool for_texture)
|
||||
{
|
||||
/* Break out of recursive call, which can happen when moving memory on a multi device. */
|
||||
static bool any_device_moving_textures_to_host = false;
|
||||
if (any_device_moving_textures_to_host) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Signal to reallocate textures in host memory only. */
|
||||
move_texture_to_host = true;
|
||||
|
||||
while (size > 0) {
|
||||
/* Find suitable memory allocation to move. */
|
||||
device_memory *max_mem = NULL;
|
||||
size_t max_size = 0;
|
||||
bool max_is_image = false;
|
||||
|
||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
||||
foreach (HIPMemMap::value_type &pair, hip_mem_map) {
|
||||
device_memory &mem = *pair.first;
|
||||
HIPMem *cmem = &pair.second;
|
||||
|
||||
/* Can only move textures allocated on this device (and not those from peer devices).
|
||||
* And need to ignore memory that is already on the host. */
|
||||
if (!mem.is_resident(this) || cmem->use_mapped_host) {
|
||||
continue;
|
||||
}
|
||||
|
||||
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) &&
|
||||
(&mem != &texture_info);
|
||||
bool is_image = is_texture && (mem.data_height > 1);
|
||||
|
||||
/* Can't move this type of memory. */
|
||||
if (!is_texture || cmem->array) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* For other textures, only move image textures. */
|
||||
if (for_texture && !is_image) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Try to move largest allocation, prefer moving images. */
|
||||
if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
|
||||
max_is_image = is_image;
|
||||
max_size = mem.device_size;
|
||||
max_mem = &mem;
|
||||
}
|
||||
}
|
||||
lock.unlock();
|
||||
|
||||
/* Move to host memory. This part is mutex protected since
|
||||
* multiple HIP devices could be moving the memory. The
|
||||
* first one will do it, and the rest will adopt the pointer. */
|
||||
if (max_mem) {
|
||||
VLOG_WORK << "Move memory from device to host: " << max_mem->name;
|
||||
|
||||
static thread_mutex move_mutex;
|
||||
thread_scoped_lock lock(move_mutex);
|
||||
|
||||
any_device_moving_textures_to_host = true;
|
||||
|
||||
/* Potentially need to call back into multi device, so pointer mapping
|
||||
* and peer devices are updated. This is also necessary since the device
|
||||
* pointer may just be a key here, so cannot be accessed and freed directly.
|
||||
* Unfortunately it does mean that memory is reallocated on all other
|
||||
* devices as well, which is potentially dangerous when still in use (since
|
||||
* a thread rendering on another devices would only be caught in this mutex
|
||||
* if it so happens to do an allocation at the same time as well. */
|
||||
max_mem->device_copy_to();
|
||||
size = (max_size >= size) ? 0 : size - max_size;
|
||||
|
||||
any_device_moving_textures_to_host = false;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Unset flag before texture info is reloaded, since it should stay in device memory. */
|
||||
move_texture_to_host = false;
|
||||
|
||||
/* Update texture info array with new pointers. */
|
||||
load_texture_info();
|
||||
}
|
||||
|
||||
HIPDevice::HIPMem *HIPDevice::generic_alloc(device_memory &mem, size_t pitch_padding)
|
||||
{
|
||||
HIPContextScope scope(this);
|
||||
|
||||
hipDeviceptr_t device_pointer = 0;
|
||||
size_t size = mem.memory_size() + pitch_padding;
|
||||
|
||||
hipError_t mem_alloc_result = hipErrorOutOfMemory;
|
||||
const char *status = "";
|
||||
|
||||
/* First try allocating in device memory, respecting headroom. We make
|
||||
* an exception for texture info. It is small and frequently accessed,
|
||||
* so treat it as working memory.
|
||||
*
|
||||
* If there is not enough room for working memory, we will try to move
|
||||
* textures to host memory, assuming the performance impact would have
|
||||
* been worse for working memory. */
|
||||
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && (&mem != &texture_info);
|
||||
bool is_image = is_texture && (mem.data_height > 1);
|
||||
|
||||
size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
|
||||
|
||||
size_t total = 0, free = 0;
|
||||
hipMemGetInfo(&free, &total);
|
||||
|
||||
/* Move textures to host memory if needed. */
|
||||
if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
|
||||
move_textures_to_host(size + headroom - free, is_texture);
|
||||
hipMemGetInfo(&free, &total);
|
||||
}
|
||||
|
||||
/* Allocate in device memory. */
|
||||
if (!move_texture_to_host && (size + headroom) < free) {
|
||||
mem_alloc_result = hipMalloc(&device_pointer, size);
|
||||
if (mem_alloc_result == hipSuccess) {
|
||||
status = " in device memory";
|
||||
}
|
||||
}
|
||||
|
||||
/* Fall back to mapped host memory if needed and possible. */
|
||||
|
||||
void *shared_pointer = 0;
|
||||
|
||||
if (mem_alloc_result != hipSuccess && can_map_host) {
|
||||
if (mem.shared_pointer) {
|
||||
/* Another device already allocated host memory. */
|
||||
mem_alloc_result = hipSuccess;
|
||||
shared_pointer = mem.shared_pointer;
|
||||
}
|
||||
else if (map_host_used + size < map_host_limit) {
|
||||
/* Allocate host memory ourselves. */
|
||||
mem_alloc_result = hipHostMalloc(
|
||||
&shared_pointer, size, hipHostMallocMapped | hipHostMallocWriteCombined);
|
||||
|
||||
assert((mem_alloc_result == hipSuccess && shared_pointer != 0) ||
|
||||
(mem_alloc_result != hipSuccess && shared_pointer == 0));
|
||||
}
|
||||
|
||||
if (mem_alloc_result == hipSuccess) {
|
||||
hip_assert(hipHostGetDevicePointer(&device_pointer, shared_pointer, 0));
|
||||
map_host_used += size;
|
||||
status = " in host memory";
|
||||
}
|
||||
}
|
||||
|
||||
if (mem_alloc_result != hipSuccess) {
|
||||
status = " failed, out of device and host memory";
|
||||
set_error("System is out of GPU and shared host memory");
|
||||
}
|
||||
|
||||
if (mem.name) {
|
||||
VLOG_WORK << "Buffer allocate: " << mem.name << ", "
|
||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||
<< string_human_readable_size(mem.memory_size()) << ")" << status;
|
||||
}
|
||||
|
||||
mem.device_pointer = (device_ptr)device_pointer;
|
||||
mem.device_size = size;
|
||||
stats.mem_alloc(size);
|
||||
|
||||
if (!mem.device_pointer) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Insert into map of allocations. */
|
||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
||||
HIPMem *cmem = &hip_mem_map[&mem];
|
||||
if (shared_pointer != 0) {
|
||||
/* Replace host pointer with our host allocation. Only works if
|
||||
* HIP memory layout is the same and has no pitch padding. Also
|
||||
* does not work if we move textures to host during a render,
|
||||
* since other devices might be using the memory. */
|
||||
|
||||
if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
|
||||
mem.host_pointer != shared_pointer) {
|
||||
memcpy(shared_pointer, mem.host_pointer, size);
|
||||
|
||||
/* A Call to device_memory::host_free() should be preceded by
|
||||
* a call to device_memory::device_free() for host memory
|
||||
* allocated by a device to be handled properly. Two exceptions
|
||||
* are here and a call in OptiXDevice::generic_alloc(), where
|
||||
* the current host memory can be assumed to be allocated by
|
||||
* device_memory::host_alloc(), not by a device */
|
||||
|
||||
mem.host_free();
|
||||
mem.host_pointer = shared_pointer;
|
||||
}
|
||||
mem.shared_pointer = shared_pointer;
|
||||
mem.shared_counter++;
|
||||
cmem->use_mapped_host = true;
|
||||
}
|
||||
else {
|
||||
cmem->use_mapped_host = false;
|
||||
}
|
||||
|
||||
return cmem;
|
||||
}
|
||||
|
||||
bool HIPDevice::alloc_device(void *&device_pointer, size_t size)
|
||||
void HIPDevice::generic_copy_to(device_memory &mem)
|
||||
{
|
||||
HIPContextScope scope(this);
|
||||
if (!mem.host_pointer || !mem.device_pointer) {
|
||||
return;
|
||||
}
|
||||
|
||||
hipError_t mem_alloc_result = hipMalloc((hipDeviceptr_t *)&device_pointer, size);
|
||||
return mem_alloc_result == hipSuccess;
|
||||
/* If use_mapped_host of mem is false, the current device only uses device memory allocated by
|
||||
* hipMalloc regardless of mem.host_pointer and mem.shared_pointer, and should copy data from
|
||||
* mem.host_pointer. */
|
||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
||||
if (!hip_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
|
||||
const HIPContextScope scope(this);
|
||||
hip_assert(
|
||||
hipMemcpyHtoD((hipDeviceptr_t)mem.device_pointer, mem.host_pointer, mem.memory_size()));
|
||||
}
|
||||
}
|
||||
|
||||
void HIPDevice::free_device(void *device_pointer)
|
||||
void HIPDevice::generic_free(device_memory &mem)
|
||||
{
|
||||
HIPContextScope scope(this);
|
||||
if (mem.device_pointer) {
|
||||
HIPContextScope scope(this);
|
||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
||||
DCHECK(hip_mem_map.find(&mem) != hip_mem_map.end());
|
||||
const HIPMem &cmem = hip_mem_map[&mem];
|
||||
|
||||
hip_assert(hipFree((hipDeviceptr_t)device_pointer));
|
||||
}
|
||||
/* If cmem.use_mapped_host is true, reference counting is used
|
||||
* to safely free a mapped host memory. */
|
||||
|
||||
bool HIPDevice::alloc_host(void *&shared_pointer, size_t size)
|
||||
{
|
||||
HIPContextScope scope(this);
|
||||
if (cmem.use_mapped_host) {
|
||||
assert(mem.shared_pointer);
|
||||
if (mem.shared_pointer) {
|
||||
assert(mem.shared_counter > 0);
|
||||
if (--mem.shared_counter == 0) {
|
||||
if (mem.host_pointer == mem.shared_pointer) {
|
||||
mem.host_pointer = 0;
|
||||
}
|
||||
hipHostFree(mem.shared_pointer);
|
||||
mem.shared_pointer = 0;
|
||||
}
|
||||
}
|
||||
map_host_used -= mem.device_size;
|
||||
}
|
||||
else {
|
||||
/* Free device memory. */
|
||||
hip_assert(hipFree(mem.device_pointer));
|
||||
}
|
||||
|
||||
hipError_t mem_alloc_result = hipHostMalloc(
|
||||
&shared_pointer, size, hipHostMallocMapped | hipHostMallocWriteCombined);
|
||||
stats.mem_free(mem.device_size);
|
||||
mem.device_pointer = 0;
|
||||
mem.device_size = 0;
|
||||
|
||||
return mem_alloc_result == hipSuccess;
|
||||
}
|
||||
|
||||
void HIPDevice::free_host(void *shared_pointer)
|
||||
{
|
||||
HIPContextScope scope(this);
|
||||
|
||||
hipHostFree(shared_pointer);
|
||||
}
|
||||
|
||||
void HIPDevice::transform_host_pointer(void *&device_pointer, void *&shared_pointer)
|
||||
{
|
||||
HIPContextScope scope(this);
|
||||
|
||||
hip_assert(hipHostGetDevicePointer((hipDeviceptr_t *)&device_pointer, shared_pointer, 0));
|
||||
}
|
||||
|
||||
void HIPDevice::copy_host_to_device(void *device_pointer, void *host_pointer, size_t size)
|
||||
{
|
||||
const HIPContextScope scope(this);
|
||||
|
||||
hip_assert(hipMemcpyHtoD((hipDeviceptr_t)device_pointer, host_pointer, size));
|
||||
hip_mem_map.erase(hip_mem_map.find(&mem));
|
||||
}
|
||||
}
|
||||
|
||||
void HIPDevice::mem_alloc(device_memory &mem)
|
||||
@@ -575,8 +823,8 @@ void HIPDevice::mem_zero(device_memory &mem)
|
||||
|
||||
/* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory
|
||||
* regardless of mem.host_pointer and mem.shared_pointer. */
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
if (!device_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
|
||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
||||
if (!hip_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
|
||||
const HIPContextScope scope(this);
|
||||
hip_assert(hipMemsetD8((hipDeviceptr_t)mem.device_pointer, 0, mem.memory_size()));
|
||||
}
|
||||
@@ -703,19 +951,19 @@ void HIPDevice::tex_alloc(device_texture &mem)
|
||||
return;
|
||||
}
|
||||
|
||||
Mem *cmem = NULL;
|
||||
HIPMem *cmem = NULL;
|
||||
hArray array_3d = NULL;
|
||||
size_t src_pitch = mem.data_width * dsize * mem.data_elements;
|
||||
size_t dst_pitch = src_pitch;
|
||||
|
||||
if (!mem.is_resident(this)) {
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
cmem = &device_mem_map[&mem];
|
||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
||||
cmem = &hip_mem_map[&mem];
|
||||
cmem->texobject = 0;
|
||||
|
||||
if (mem.data_depth > 1) {
|
||||
array_3d = (hArray)mem.device_pointer;
|
||||
cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
|
||||
cmem->array = array_3d;
|
||||
}
|
||||
else if (mem.data_height > 0) {
|
||||
dst_pitch = align_up(src_pitch, pitch_alignment);
|
||||
@@ -759,10 +1007,10 @@ void HIPDevice::tex_alloc(device_texture &mem)
|
||||
mem.device_size = size;
|
||||
stats.mem_alloc(size);
|
||||
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
cmem = &device_mem_map[&mem];
|
||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
||||
cmem = &hip_mem_map[&mem];
|
||||
cmem->texobject = 0;
|
||||
cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
|
||||
cmem->array = array_3d;
|
||||
}
|
||||
else if (mem.data_height > 0) {
|
||||
/* 2D texture, using pitch aligned linear memory. */
|
||||
@@ -847,8 +1095,8 @@ void HIPDevice::tex_alloc(device_texture &mem)
|
||||
texDesc.filterMode = filter_mode;
|
||||
texDesc.flags = HIP_TRSF_NORMALIZED_COORDINATES;
|
||||
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
cmem = &device_mem_map[&mem];
|
||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
||||
cmem = &hip_mem_map[&mem];
|
||||
|
||||
hip_assert(hipTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
|
||||
|
||||
@@ -863,9 +1111,9 @@ void HIPDevice::tex_free(device_texture &mem)
|
||||
{
|
||||
if (mem.device_pointer) {
|
||||
HIPContextScope scope(this);
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
DCHECK(device_mem_map.find(&mem) != device_mem_map.end());
|
||||
const Mem &cmem = device_mem_map[&mem];
|
||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
||||
DCHECK(hip_mem_map.find(&mem) != hip_mem_map.end());
|
||||
const HIPMem &cmem = hip_mem_map[&mem];
|
||||
|
||||
if (cmem.texobject) {
|
||||
/* Free bindless texture. */
|
||||
@@ -874,16 +1122,16 @@ void HIPDevice::tex_free(device_texture &mem)
|
||||
|
||||
if (!mem.is_resident(this)) {
|
||||
/* Do not free memory here, since it was allocated on a different device. */
|
||||
device_mem_map.erase(device_mem_map.find(&mem));
|
||||
hip_mem_map.erase(hip_mem_map.find(&mem));
|
||||
}
|
||||
else if (cmem.array) {
|
||||
/* Free array. */
|
||||
hipArrayDestroy(reinterpret_cast<hArray>(cmem.array));
|
||||
hipArrayDestroy(cmem.array);
|
||||
stats.mem_free(mem.device_size);
|
||||
mem.device_pointer = 0;
|
||||
mem.device_size = 0;
|
||||
|
||||
device_mem_map.erase(device_mem_map.find(&mem));
|
||||
hip_mem_map.erase(hip_mem_map.find(&mem));
|
||||
}
|
||||
else {
|
||||
lock.unlock();
|
||||
@@ -905,7 +1153,7 @@ bool HIPDevice::should_use_graphics_interop()
|
||||
* possible, but from the empiric measurements it can be considerably slower than using naive
|
||||
* pixels copy. */
|
||||
|
||||
/* Disable graphics interop for now, because of driver bug in 21.40. See #92972 */
|
||||
/* Disable graphics interop for now, because of driver bug in 21.40. See T92972 */
|
||||
# if 0
|
||||
HIPContextScope scope(this);
|
||||
|
||||
|
@@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
class DeviceQueue;
|
||||
|
||||
class HIPDevice : public GPUDevice {
|
||||
class HIPDevice : public Device {
|
||||
|
||||
friend class HIPContextScope;
|
||||
|
||||
@@ -26,11 +26,36 @@ class HIPDevice : public GPUDevice {
|
||||
hipDevice_t hipDevice;
|
||||
hipCtx_t hipContext;
|
||||
hipModule_t hipModule;
|
||||
size_t device_texture_headroom;
|
||||
size_t device_working_headroom;
|
||||
bool move_texture_to_host;
|
||||
size_t map_host_used;
|
||||
size_t map_host_limit;
|
||||
int can_map_host;
|
||||
int pitch_alignment;
|
||||
int hipDevId;
|
||||
int hipDevArchitecture;
|
||||
bool first_error;
|
||||
|
||||
struct HIPMem {
|
||||
HIPMem() : texobject(0), array(0), use_mapped_host(false)
|
||||
{
|
||||
}
|
||||
|
||||
hipTextureObject_t texobject;
|
||||
hArray array;
|
||||
|
||||
/* If true, a mapped host memory in shared_pointer is being used. */
|
||||
bool use_mapped_host;
|
||||
};
|
||||
typedef map<device_memory *, HIPMem> HIPMemMap;
|
||||
HIPMemMap hip_mem_map;
|
||||
thread_mutex hip_mem_map_mutex;
|
||||
|
||||
/* Bindless Textures */
|
||||
device_vector<TextureInfo> texture_info;
|
||||
bool need_texture_info;
|
||||
|
||||
HIPDeviceKernels kernels;
|
||||
|
||||
static bool have_precompiled_kernels();
|
||||
@@ -56,13 +81,17 @@ class HIPDevice : public GPUDevice {
|
||||
virtual bool load_kernels(const uint kernel_features) override;
|
||||
void reserve_local_memory(const uint kernel_features);
|
||||
|
||||
virtual void get_device_memory_info(size_t &total, size_t &free) override;
|
||||
virtual bool alloc_device(void *&device_pointer, size_t size) override;
|
||||
virtual void free_device(void *device_pointer) override;
|
||||
virtual bool alloc_host(void *&shared_pointer, size_t size) override;
|
||||
virtual void free_host(void *shared_pointer) override;
|
||||
virtual void transform_host_pointer(void *&device_pointer, void *&shared_pointer) override;
|
||||
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size) override;
|
||||
void init_host_memory();
|
||||
|
||||
void load_texture_info();
|
||||
|
||||
void move_textures_to_host(size_t size, bool for_texture);
|
||||
|
||||
HIPMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
|
||||
|
||||
void generic_copy_to(device_memory &mem);
|
||||
|
||||
void generic_free(device_memory &mem);
|
||||
|
||||
void mem_alloc(device_memory &mem) override;
|
||||
|
||||
|
@@ -51,7 +51,7 @@ static inline bool hipSupportsDevice(const int hipDevId)
|
||||
hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
|
||||
hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
|
||||
|
||||
return (major >= 9);
|
||||
return (major >= 10);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -108,10 +108,9 @@ template<> struct device_type_traits<uint2> {
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uint3> {
|
||||
/* uint3 has different size depending on the device, can't use it for interchanging
|
||||
* memory between CPU and GPU.
|
||||
*
|
||||
* Leave body empty to trigger a compile error if used. */
|
||||
static const DataType data_type = TYPE_UINT;
|
||||
static const size_t num_elements = 3;
|
||||
static_assert(sizeof(uint3) == num_elements * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uint4> {
|
||||
@@ -133,10 +132,9 @@ template<> struct device_type_traits<int2> {
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<int3> {
|
||||
/* int3 has different size depending on the device, can't use it for interchanging
|
||||
* memory between CPU and GPU.
|
||||
*
|
||||
* Leave body empty to trigger a compile error if used. */
|
||||
static const DataType data_type = TYPE_INT;
|
||||
static const size_t num_elements = 4;
|
||||
static_assert(sizeof(int3) == num_elements * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<int4> {
|
||||
@@ -249,8 +247,6 @@ class device_memory {
|
||||
bool is_resident(Device *sub_device) const;
|
||||
|
||||
protected:
|
||||
friend class Device;
|
||||
friend class GPUDevice;
|
||||
friend class CUDADevice;
|
||||
friend class OptiXDevice;
|
||||
friend class HIPDevice;
|
||||
|
@@ -55,8 +55,9 @@ void device_metal_info(vector<DeviceInfo> &devices)
|
||||
info.denoisers = DENOISER_NONE;
|
||||
info.id = id;
|
||||
|
||||
info.has_nanovdb = MetalInfo::get_device_vendor(device) == METAL_GPU_APPLE;
|
||||
info.has_light_tree = MetalInfo::get_device_vendor(device) != METAL_GPU_AMD;
|
||||
if (MetalInfo::get_device_vendor(device) == METAL_GPU_AMD) {
|
||||
info.has_light_tree = false;
|
||||
}
|
||||
|
||||
devices.push_back(info);
|
||||
device_index++;
|
||||
|
@@ -29,8 +29,7 @@ class MetalDevice : public Device {
|
||||
id<MTLArgumentEncoder> mtlAncillaryArgEncoder =
|
||||
nil; /* encoder used for fetching device pointers from MTLBuffers */
|
||||
string source[PSO_NUM];
|
||||
string kernels_md5[PSO_NUM];
|
||||
string global_defines_md5[PSO_NUM];
|
||||
string source_md5[PSO_NUM];
|
||||
|
||||
bool capture_enabled = false;
|
||||
|
||||
@@ -68,12 +67,9 @@ class MetalDevice : public Device {
|
||||
std::recursive_mutex metal_mem_map_mutex;
|
||||
|
||||
/* Bindless Textures */
|
||||
bool is_texture(const TextureInfo &tex);
|
||||
device_vector<TextureInfo> texture_info;
|
||||
bool need_texture_info;
|
||||
id<MTLArgumentEncoder> mtlTextureArgEncoder = nil;
|
||||
id<MTLArgumentEncoder> mtlBufferArgEncoder = nil;
|
||||
id<MTLBuffer> buffer_bindings_1d = nil;
|
||||
id<MTLBuffer> texture_bindings_2d = nil;
|
||||
id<MTLBuffer> texture_bindings_3d = nil;
|
||||
std::vector<id<MTLTexture>> texture_slot_map;
|
||||
@@ -116,10 +112,6 @@ class MetalDevice : public Device {
|
||||
|
||||
bool use_local_atomic_sort() const;
|
||||
|
||||
string preprocess_source(MetalPipelineType pso_type,
|
||||
const uint kernel_features,
|
||||
string *source = nullptr);
|
||||
|
||||
bool make_source_and_check_if_compile_needed(MetalPipelineType pso_type);
|
||||
|
||||
void make_source(MetalPipelineType pso_type, const uint kernel_features);
|
||||
|
@@ -91,6 +91,11 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
|
||||
}
|
||||
}
|
||||
|
||||
texture_bindings_2d = [mtlDevice newBufferWithLength:4096 options:default_storage_mode];
|
||||
texture_bindings_3d = [mtlDevice newBufferWithLength:4096 options:default_storage_mode];
|
||||
|
||||
stats.mem_alloc(texture_bindings_2d.allocatedSize + texture_bindings_3d.allocatedSize);
|
||||
|
||||
switch (device_vendor) {
|
||||
default:
|
||||
break;
|
||||
@@ -100,7 +105,6 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
|
||||
}
|
||||
case METAL_GPU_AMD: {
|
||||
max_threads_per_threadgroup = 128;
|
||||
use_metalrt = info.use_metalrt;
|
||||
break;
|
||||
}
|
||||
case METAL_GPU_APPLE: {
|
||||
@@ -151,16 +155,6 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
|
||||
arg_desc_texture.dataType = MTLDataTypeTexture;
|
||||
arg_desc_texture.access = MTLArgumentAccessReadOnly;
|
||||
mtlTextureArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_texture ]];
|
||||
MTLArgumentDescriptor *arg_desc_buffer = [[MTLArgumentDescriptor alloc] init];
|
||||
arg_desc_buffer.dataType = MTLDataTypePointer;
|
||||
arg_desc_buffer.access = MTLArgumentAccessReadOnly;
|
||||
mtlBufferArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_buffer ]];
|
||||
|
||||
buffer_bindings_1d = [mtlDevice newBufferWithLength:8192 options:default_storage_mode];
|
||||
texture_bindings_2d = [mtlDevice newBufferWithLength:8192 options:default_storage_mode];
|
||||
texture_bindings_3d = [mtlDevice newBufferWithLength:8192 options:default_storage_mode];
|
||||
stats.mem_alloc(buffer_bindings_1d.allocatedSize + texture_bindings_2d.allocatedSize +
|
||||
texture_bindings_3d.allocatedSize);
|
||||
|
||||
/* command queue for non-tracing work on the GPU */
|
||||
mtlGeneralCommandQueue = [mtlDevice newCommandQueue];
|
||||
@@ -185,8 +179,6 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
|
||||
arg_desc_tex.dataType = MTLDataTypePointer;
|
||||
arg_desc_tex.access = MTLArgumentAccessReadOnly;
|
||||
|
||||
arg_desc_tex.index = index++;
|
||||
[ancillary_desc addObject:[arg_desc_tex copy]]; /* metal_buf_1d */
|
||||
arg_desc_tex.index = index++;
|
||||
[ancillary_desc addObject:[arg_desc_tex copy]]; /* metal_tex_2d */
|
||||
arg_desc_tex.index = index++;
|
||||
@@ -232,15 +224,11 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
|
||||
mtlAncillaryArgEncoder = [mtlDevice newArgumentEncoderWithArguments:ancillary_desc];
|
||||
|
||||
// preparing the blas arg encoder
|
||||
if (@available(macos 11.0, *)) {
|
||||
if (use_metalrt) {
|
||||
MTLArgumentDescriptor *arg_desc_blas = [[MTLArgumentDescriptor alloc] init];
|
||||
arg_desc_blas.dataType = MTLDataTypeInstanceAccelerationStructure;
|
||||
arg_desc_blas.access = MTLArgumentAccessReadOnly;
|
||||
mtlBlasArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_blas ]];
|
||||
[arg_desc_blas release];
|
||||
}
|
||||
}
|
||||
MTLArgumentDescriptor *arg_desc_blas = [[MTLArgumentDescriptor alloc] init];
|
||||
arg_desc_blas.dataType = MTLDataTypeInstanceAccelerationStructure;
|
||||
arg_desc_blas.access = MTLArgumentAccessReadOnly;
|
||||
mtlBlasArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_blas ]];
|
||||
[arg_desc_blas release];
|
||||
|
||||
for (int i = 0; i < ancillary_desc.count; i++) {
|
||||
[ancillary_desc[i] release];
|
||||
@@ -260,26 +248,22 @@ MetalDevice::~MetalDevice()
|
||||
* existing_devices_mutex). */
|
||||
thread_scoped_lock lock(existing_devices_mutex);
|
||||
|
||||
int num_resources = texture_info.size();
|
||||
for (int res = 0; res < num_resources; res++) {
|
||||
if (is_texture(texture_info[res])) {
|
||||
[texture_slot_map[res] release];
|
||||
texture_slot_map[res] = nil;
|
||||
for (auto &tex : texture_slot_map) {
|
||||
if (tex) {
|
||||
[tex release];
|
||||
tex = nil;
|
||||
}
|
||||
}
|
||||
|
||||
flush_delayed_free_list();
|
||||
|
||||
if (texture_bindings_2d) {
|
||||
stats.mem_free(buffer_bindings_1d.allocatedSize + texture_bindings_2d.allocatedSize +
|
||||
texture_bindings_3d.allocatedSize);
|
||||
[buffer_bindings_1d release];
|
||||
stats.mem_free(texture_bindings_2d.allocatedSize + texture_bindings_3d.allocatedSize);
|
||||
|
||||
[texture_bindings_2d release];
|
||||
[texture_bindings_3d release];
|
||||
}
|
||||
[mtlTextureArgEncoder release];
|
||||
[mtlBufferKernelParamsEncoder release];
|
||||
[mtlBufferArgEncoder release];
|
||||
[mtlASArgEncoder release];
|
||||
[mtlAncillaryArgEncoder release];
|
||||
[mtlGeneralCommandQueue release];
|
||||
@@ -310,9 +294,7 @@ bool MetalDevice::use_local_atomic_sort() const
|
||||
return DebugFlags().metal.use_local_atomic_sort;
|
||||
}
|
||||
|
||||
string MetalDevice::preprocess_source(MetalPipelineType pso_type,
|
||||
const uint kernel_features,
|
||||
string *source)
|
||||
void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_features)
|
||||
{
|
||||
string global_defines;
|
||||
if (use_adaptive_compilation()) {
|
||||
@@ -345,9 +327,6 @@ string MetalDevice::preprocess_source(MetalPipelineType pso_type,
|
||||
break;
|
||||
case METAL_GPU_APPLE:
|
||||
global_defines += "#define __KERNEL_METAL_APPLE__\n";
|
||||
# ifdef WITH_NANOVDB
|
||||
global_defines += "#define WITH_NANOVDB\n";
|
||||
# endif
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -355,61 +334,6 @@ string MetalDevice::preprocess_source(MetalPipelineType pso_type,
|
||||
NSOperatingSystemVersion macos_ver = [processInfo operatingSystemVersion];
|
||||
global_defines += "#define __KERNEL_METAL_MACOS__ " + to_string(macos_ver.majorVersion) + "\n";
|
||||
|
||||
/* Replace specific KernelData "dot" dereferences with a Metal function_constant identifier of
|
||||
* the same character length. Build a string of all active constant values which is then hashed
|
||||
* in order to identify the PSO.
|
||||
*/
|
||||
if (pso_type != PSO_GENERIC) {
|
||||
if (source) {
|
||||
const double starttime = time_dt();
|
||||
|
||||
# define KERNEL_STRUCT_BEGIN(name, parent) \
|
||||
string_replace_same_length(*source, "kernel_data." #parent ".", "kernel_data_" #parent "_");
|
||||
|
||||
bool next_member_is_specialized = true;
|
||||
|
||||
# define KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE next_member_is_specialized = false;
|
||||
|
||||
# define KERNEL_STRUCT_MEMBER(parent, _type, name) \
|
||||
if (!next_member_is_specialized) { \
|
||||
string_replace( \
|
||||
*source, "kernel_data_" #parent "_" #name, "kernel_data." #parent ".__unused_" #name); \
|
||||
next_member_is_specialized = true; \
|
||||
}
|
||||
|
||||
# include "kernel/data_template.h"
|
||||
|
||||
# undef KERNEL_STRUCT_MEMBER
|
||||
# undef KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE
|
||||
# undef KERNEL_STRUCT_BEGIN
|
||||
|
||||
metal_printf("KernelData patching took %.1f ms\n", (time_dt() - starttime) * 1000.0);
|
||||
}
|
||||
|
||||
/* Opt in to all of available specializations. This can be made more granular for the
|
||||
* PSO_SPECIALIZED_INTERSECT case in order to minimize the number of specialization requests,
|
||||
* but the overhead should be negligible as these are very quick to (re)build and aren't
|
||||
* serialized to disk via MTLBinaryArchives.
|
||||
*/
|
||||
global_defines += "#define __KERNEL_USE_DATA_CONSTANTS__\n";
|
||||
}
|
||||
|
||||
# if 0
|
||||
metal_printf("================\n%s================\n",
|
||||
global_defines.c_str());
|
||||
# endif
|
||||
|
||||
if (source) {
|
||||
*source = global_defines + *source;
|
||||
}
|
||||
|
||||
MD5Hash md5;
|
||||
md5.append(global_defines);
|
||||
return md5.get_hex();
|
||||
}
|
||||
|
||||
void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_features)
|
||||
{
|
||||
string &source = this->source[pso_type];
|
||||
source = "\n#include \"kernel/device/metal/kernel.metal\"\n";
|
||||
source = path_source_replace_includes(source, path_get("source"));
|
||||
@@ -418,7 +342,62 @@ void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_feat
|
||||
* With Metal function constants we can generate a single variant of the kernel source which can
|
||||
* be repeatedly respecialized.
|
||||
*/
|
||||
global_defines_md5[pso_type] = preprocess_source(pso_type, kernel_features, &source);
|
||||
string baked_constants;
|
||||
|
||||
/* Replace specific KernelData "dot" dereferences with a Metal function_constant identifier of
|
||||
* the same character length. Build a string of all active constant values which is then hashed
|
||||
* in order to identify the PSO.
|
||||
*/
|
||||
if (pso_type != PSO_GENERIC) {
|
||||
const double starttime = time_dt();
|
||||
|
||||
# define KERNEL_STRUCT_BEGIN(name, parent) \
|
||||
string_replace_same_length(source, "kernel_data." #parent ".", "kernel_data_" #parent "_");
|
||||
|
||||
bool next_member_is_specialized = true;
|
||||
|
||||
# define KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE next_member_is_specialized = false;
|
||||
|
||||
/* Add constants to md5 so that 'get_best_pipeline' is able to return a suitable match. */
|
||||
# define KERNEL_STRUCT_MEMBER(parent, _type, name) \
|
||||
if (next_member_is_specialized) { \
|
||||
baked_constants += string(#parent "." #name "=") + \
|
||||
to_string(_type(launch_params.data.parent.name)) + "\n"; \
|
||||
} \
|
||||
else { \
|
||||
string_replace( \
|
||||
source, "kernel_data_" #parent "_" #name, "kernel_data." #parent ".__unused_" #name); \
|
||||
next_member_is_specialized = true; \
|
||||
}
|
||||
|
||||
# include "kernel/data_template.h"
|
||||
|
||||
/* Opt in to all of available specializations. This can be made more granular for the
|
||||
* PSO_SPECIALIZED_INTERSECT case in order to minimize the number of specialization requests,
|
||||
* but the overhead should be negligible as these are very quick to (re)build and aren't
|
||||
* serialized to disk via MTLBinaryArchives.
|
||||
*/
|
||||
global_defines += "#define __KERNEL_USE_DATA_CONSTANTS__\n";
|
||||
|
||||
metal_printf("KernelData patching took %.1f ms\n", (time_dt() - starttime) * 1000.0);
|
||||
}
|
||||
|
||||
source = global_defines + source;
|
||||
# if 0
|
||||
metal_printf("================\n%s================\n\%s================\n",
|
||||
global_defines.c_str(),
|
||||
baked_constants.c_str());
|
||||
# endif
|
||||
|
||||
/* Generate an MD5 from the source and include any baked constants. This is used when caching
|
||||
* PSOs. */
|
||||
MD5Hash md5;
|
||||
md5.append(baked_constants);
|
||||
md5.append(source);
|
||||
if (use_metalrt) {
|
||||
md5.append(std::to_string(kernel_features & METALRT_FEATURE_MASK));
|
||||
}
|
||||
source_md5[pso_type] = md5.get_hex();
|
||||
}
|
||||
|
||||
bool MetalDevice::load_kernels(const uint _kernel_features)
|
||||
@@ -452,49 +431,9 @@ bool MetalDevice::load_kernels(const uint _kernel_features)
|
||||
|
||||
bool MetalDevice::make_source_and_check_if_compile_needed(MetalPipelineType pso_type)
|
||||
{
|
||||
string defines_md5 = preprocess_source(pso_type, kernel_features);
|
||||
|
||||
/* Rebuild the source string if the injected block of #defines has changed. */
|
||||
if (global_defines_md5[pso_type] != defines_md5) {
|
||||
if (this->source[pso_type].empty()) {
|
||||
make_source(pso_type, kernel_features);
|
||||
}
|
||||
|
||||
string constant_values;
|
||||
if (pso_type != PSO_GENERIC) {
|
||||
bool next_member_is_specialized = true;
|
||||
|
||||
# define KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE next_member_is_specialized = false;
|
||||
|
||||
/* Add specialization constants to md5 so that 'get_best_pipeline' is able to return a suitable
|
||||
* match. */
|
||||
# define KERNEL_STRUCT_MEMBER(parent, _type, name) \
|
||||
if (next_member_is_specialized) { \
|
||||
constant_values += string(#parent "." #name "=") + \
|
||||
to_string(_type(launch_params.data.parent.name)) + "\n"; \
|
||||
} \
|
||||
else { \
|
||||
next_member_is_specialized = true; \
|
||||
}
|
||||
|
||||
# include "kernel/data_template.h"
|
||||
|
||||
# undef KERNEL_STRUCT_MEMBER
|
||||
# undef KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE
|
||||
|
||||
# if 0
|
||||
metal_printf("================\n%s================\n",
|
||||
constant_values.c_str());
|
||||
# endif
|
||||
}
|
||||
|
||||
MD5Hash md5;
|
||||
md5.append(constant_values);
|
||||
md5.append(source[pso_type]);
|
||||
if (use_metalrt) {
|
||||
md5.append(string_printf("metalrt_features=%d", kernel_features & METALRT_FEATURE_MASK));
|
||||
}
|
||||
kernels_md5[pso_type] = md5.get_hex();
|
||||
|
||||
return MetalDeviceKernels::should_load_kernels(this, pso_type);
|
||||
}
|
||||
|
||||
@@ -581,11 +520,6 @@ void MetalDevice::compile_and_load(int device_id, MetalPipelineType pso_type)
|
||||
thread_scoped_lock lock(existing_devices_mutex);
|
||||
if (MetalDevice *instance = get_device_by_ID(device_id, lock)) {
|
||||
if (mtlLibrary) {
|
||||
if (error && [error localizedDescription]) {
|
||||
VLOG_WARNING << "MSL compilation messages: "
|
||||
<< [[error localizedDescription] UTF8String];
|
||||
}
|
||||
|
||||
instance->mtlLibrary[pso_type] = mtlLibrary;
|
||||
|
||||
starttime = time_dt();
|
||||
@@ -607,11 +541,6 @@ void MetalDevice::compile_and_load(int device_id, MetalPipelineType pso_type)
|
||||
}
|
||||
}
|
||||
|
||||
bool MetalDevice::is_texture(const TextureInfo &tex)
|
||||
{
|
||||
return (tex.depth > 0 || tex.height > 0);
|
||||
}
|
||||
|
||||
void MetalDevice::load_texture_info()
|
||||
{
|
||||
if (need_texture_info) {
|
||||
@@ -623,20 +552,21 @@ void MetalDevice::load_texture_info()
|
||||
|
||||
for (int tex = 0; tex < num_textures; tex++) {
|
||||
uint64_t offset = tex * sizeof(void *);
|
||||
if (is_texture(texture_info[tex]) && texture_slot_map[tex]) {
|
||||
id<MTLTexture> metal_texture = texture_slot_map[tex];
|
||||
|
||||
id<MTLTexture> metal_texture = texture_slot_map[tex];
|
||||
if (!metal_texture) {
|
||||
[mtlTextureArgEncoder setArgumentBuffer:texture_bindings_2d offset:offset];
|
||||
[mtlTextureArgEncoder setTexture:nil atIndex:0];
|
||||
[mtlTextureArgEncoder setArgumentBuffer:texture_bindings_3d offset:offset];
|
||||
[mtlTextureArgEncoder setTexture:nil atIndex:0];
|
||||
}
|
||||
else {
|
||||
MTLTextureType type = metal_texture.textureType;
|
||||
[mtlTextureArgEncoder setArgumentBuffer:texture_bindings_2d offset:offset];
|
||||
[mtlTextureArgEncoder setTexture:type == MTLTextureType2D ? metal_texture : nil atIndex:0];
|
||||
[mtlTextureArgEncoder setArgumentBuffer:texture_bindings_3d offset:offset];
|
||||
[mtlTextureArgEncoder setTexture:type == MTLTextureType3D ? metal_texture : nil atIndex:0];
|
||||
}
|
||||
else {
|
||||
[mtlTextureArgEncoder setArgumentBuffer:texture_bindings_2d offset:offset];
|
||||
[mtlTextureArgEncoder setTexture:nil atIndex:0];
|
||||
[mtlTextureArgEncoder setArgumentBuffer:texture_bindings_3d offset:offset];
|
||||
[mtlTextureArgEncoder setTexture:nil atIndex:0];
|
||||
}
|
||||
}
|
||||
if (default_storage_mode == MTLResourceStorageModeManaged) {
|
||||
[texture_bindings_2d didModifyRange:NSMakeRange(0, num_textures * sizeof(void *))];
|
||||
@@ -655,7 +585,7 @@ void MetalDevice::erase_allocation(device_memory &mem)
|
||||
if (it != metal_mem_map.end()) {
|
||||
MetalMem *mmem = it->second.get();
|
||||
|
||||
/* blank out reference to MetalMem* in the launch params (fixes crash #94736) */
|
||||
/* blank out reference to MetalMem* in the launch params (fixes crash T94736) */
|
||||
if (mmem->pointer_index >= 0) {
|
||||
device_ptr *pointers = (device_ptr *)&launch_params;
|
||||
pointers[mmem->pointer_index] = 0;
|
||||
@@ -809,6 +739,7 @@ void MetalDevice::generic_free(device_memory &mem)
|
||||
mem.shared_pointer = 0;
|
||||
|
||||
/* Free device memory. */
|
||||
delayed_free_list.push_back(mmem.mtlBuffer);
|
||||
mmem.mtlBuffer = nil;
|
||||
}
|
||||
|
||||
@@ -930,11 +861,6 @@ void MetalDevice::cancel()
|
||||
|
||||
bool MetalDevice::is_ready(string &status) const
|
||||
{
|
||||
if (!error_msg.empty()) {
|
||||
/* Avoid hanging if we had an error. */
|
||||
return true;
|
||||
}
|
||||
|
||||
int num_loaded = MetalDeviceKernels::get_loaded_kernel_count(this, PSO_GENERIC);
|
||||
if (num_loaded < DEVICE_KERNEL_NUM) {
|
||||
status = string_printf("%d / %d render kernels loaded (may take a few minutes the first time)",
|
||||
@@ -942,17 +868,6 @@ bool MetalDevice::is_ready(string &status) const
|
||||
DEVICE_KERNEL_NUM);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (int num_requests = MetalDeviceKernels::num_incomplete_specialization_requests()) {
|
||||
status = string_printf("%d kernels to optimize", num_requests);
|
||||
}
|
||||
else if (kernel_specialization_level == PSO_SPECIALIZED_INTERSECT) {
|
||||
status = "Using optimized intersection kernels";
|
||||
}
|
||||
else if (kernel_specialization_level == PSO_SPECIALIZED_SHADE) {
|
||||
status = "Using optimized kernels";
|
||||
}
|
||||
|
||||
metal_printf("MetalDevice::is_ready(...) --> true\n");
|
||||
return true;
|
||||
}
|
||||
@@ -989,7 +904,7 @@ void MetalDevice::optimize_for_scene(Scene *scene)
|
||||
}
|
||||
|
||||
if (specialize_in_background) {
|
||||
if (MetalDeviceKernels::num_incomplete_specialization_requests() == 0) {
|
||||
if (!MetalDeviceKernels::any_specialization_happening_now()) {
|
||||
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0),
|
||||
specialize_kernels_fn);
|
||||
}
|
||||
@@ -1059,7 +974,7 @@ void MetalDevice::global_free(device_memory &mem)
|
||||
|
||||
void MetalDevice::tex_alloc_as_buffer(device_texture &mem)
|
||||
{
|
||||
MetalDevice::MetalMem *mmem = generic_alloc(mem);
|
||||
generic_alloc(mem);
|
||||
generic_copy_to(mem);
|
||||
|
||||
/* Resize once */
|
||||
@@ -1068,32 +983,27 @@ void MetalDevice::tex_alloc_as_buffer(device_texture &mem)
|
||||
/* Allocate some slots in advance, to reduce amount
|
||||
* of re-allocations. */
|
||||
texture_info.resize(round_up(slot + 1, 128));
|
||||
texture_slot_map.resize(round_up(slot + 1, 128));
|
||||
}
|
||||
|
||||
mem.info.data = (uint64_t)mem.device_pointer;
|
||||
|
||||
/* Set Mapping and tag that we need to (re-)upload to device */
|
||||
texture_info[slot] = mem.info;
|
||||
uint64_t offset = slot * sizeof(void *);
|
||||
[mtlBufferArgEncoder setArgumentBuffer:buffer_bindings_1d offset:offset];
|
||||
[mtlBufferArgEncoder setBuffer:mmem->mtlBuffer offset:0 atIndex:0];
|
||||
texture_info[slot].data = *(uint64_t *)((uint64_t)buffer_bindings_1d.contents + offset);
|
||||
texture_slot_map[slot] = nil;
|
||||
need_texture_info = true;
|
||||
}
|
||||
|
||||
void MetalDevice::tex_alloc(device_texture &mem)
|
||||
{
|
||||
/* Check that dimensions fit within maximum allowable size.
|
||||
* If 1D texture is allocated, use 1D buffer.
|
||||
* See: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf */
|
||||
if (mem.data_height > 0) {
|
||||
if (mem.data_width > 16384 || mem.data_height > 16384) {
|
||||
set_error(string_printf(
|
||||
"Texture exceeds maximum allowed size of 16384 x 16384 (requested: %zu x %zu)",
|
||||
mem.data_width,
|
||||
mem.data_height));
|
||||
return;
|
||||
}
|
||||
if (mem.data_width > 16384 || mem.data_height > 16384) {
|
||||
set_error(string_printf(
|
||||
"Texture exceeds maximum allowed size of 16384 x 16384 (requested: %zu x %zu)",
|
||||
mem.data_width,
|
||||
mem.data_height));
|
||||
return;
|
||||
}
|
||||
|
||||
MTLStorageMode storage_mode = MTLStorageModeManaged;
|
||||
if (@available(macos 10.15, *)) {
|
||||
if ([mtlDevice hasUnifiedMemory] &&
|
||||
@@ -1233,9 +1143,8 @@ void MetalDevice::tex_alloc(device_texture &mem)
|
||||
bytesPerRow:src_pitch];
|
||||
}
|
||||
else {
|
||||
assert(0);
|
||||
/* 1D texture, using linear memory. */
|
||||
tex_alloc_as_buffer(mem);
|
||||
return;
|
||||
}
|
||||
|
||||
mem.device_pointer = (device_ptr)mtlTexture;
|
||||
@@ -1259,22 +1168,17 @@ void MetalDevice::tex_alloc(device_texture &mem)
|
||||
ssize_t min_buffer_length = sizeof(void *) * texture_info.size();
|
||||
if (!texture_bindings_2d || (texture_bindings_2d.length < min_buffer_length)) {
|
||||
if (texture_bindings_2d) {
|
||||
delayed_free_list.push_back(buffer_bindings_1d);
|
||||
delayed_free_list.push_back(texture_bindings_2d);
|
||||
delayed_free_list.push_back(texture_bindings_3d);
|
||||
|
||||
stats.mem_free(buffer_bindings_1d.allocatedSize + texture_bindings_2d.allocatedSize +
|
||||
texture_bindings_3d.allocatedSize);
|
||||
stats.mem_free(texture_bindings_2d.allocatedSize + texture_bindings_3d.allocatedSize);
|
||||
}
|
||||
buffer_bindings_1d = [mtlDevice newBufferWithLength:min_buffer_length
|
||||
options:default_storage_mode];
|
||||
texture_bindings_2d = [mtlDevice newBufferWithLength:min_buffer_length
|
||||
options:default_storage_mode];
|
||||
texture_bindings_3d = [mtlDevice newBufferWithLength:min_buffer_length
|
||||
options:default_storage_mode];
|
||||
|
||||
stats.mem_alloc(buffer_bindings_1d.allocatedSize + texture_bindings_2d.allocatedSize +
|
||||
texture_bindings_3d.allocatedSize);
|
||||
stats.mem_alloc(texture_bindings_2d.allocatedSize + texture_bindings_3d.allocatedSize);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1301,18 +1205,12 @@ void MetalDevice::tex_alloc(device_texture &mem)
|
||||
|
||||
void MetalDevice::tex_free(device_texture &mem)
|
||||
{
|
||||
if (mem.data_depth == 0 && mem.data_height == 0) {
|
||||
generic_free(mem);
|
||||
return;
|
||||
}
|
||||
|
||||
if (metal_mem_map.count(&mem)) {
|
||||
std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex);
|
||||
MetalMem &mmem = *metal_mem_map.at(&mem);
|
||||
|
||||
assert(texture_slot_map[mem.slot] == mmem.mtlTexture);
|
||||
if (texture_slot_map[mem.slot] == mmem.mtlTexture)
|
||||
texture_slot_map[mem.slot] = nil;
|
||||
texture_slot_map[mem.slot] = nil;
|
||||
|
||||
if (mmem.mtlTexture) {
|
||||
/* Free bindless texture. */
|
||||
|
@@ -63,7 +63,8 @@ enum MetalPipelineType {
|
||||
};
|
||||
|
||||
# define METALRT_FEATURE_MASK \
|
||||
(KERNEL_FEATURE_HAIR | KERNEL_FEATURE_HAIR_THICK | KERNEL_FEATURE_POINTCLOUD)
|
||||
(KERNEL_FEATURE_HAIR | KERNEL_FEATURE_HAIR_THICK | KERNEL_FEATURE_POINTCLOUD | \
|
||||
KERNEL_FEATURE_OBJECT_MOTION)
|
||||
|
||||
const char *kernel_type_as_string(MetalPipelineType pso_type);
|
||||
|
||||
@@ -75,12 +76,12 @@ struct MetalKernelPipeline {
|
||||
|
||||
id<MTLLibrary> mtlLibrary = nil;
|
||||
MetalPipelineType pso_type;
|
||||
string kernels_md5;
|
||||
string source_md5;
|
||||
size_t usage_count = 0;
|
||||
|
||||
KernelData kernel_data_;
|
||||
bool use_metalrt;
|
||||
uint32_t kernel_features = 0;
|
||||
uint32_t metalrt_features = 0;
|
||||
|
||||
int threads_per_threadgroup;
|
||||
|
||||
@@ -103,7 +104,7 @@ struct MetalKernelPipeline {
|
||||
/* Cache of Metal kernels for each DeviceKernel. */
|
||||
namespace MetalDeviceKernels {
|
||||
|
||||
int num_incomplete_specialization_requests();
|
||||
bool any_specialization_happening_now();
|
||||
int get_loaded_kernel_count(MetalDevice const *device, MetalPipelineType pso_type);
|
||||
bool should_load_kernels(MetalDevice const *device, MetalPipelineType pso_type);
|
||||
bool load(MetalDevice *device, MetalPipelineType pso_type);
|
||||
|
@@ -161,12 +161,25 @@ ShaderCache::~ShaderCache()
|
||||
running = false;
|
||||
cond_var.notify_all();
|
||||
|
||||
metal_printf("Waiting for ShaderCache threads... (incomplete_requests = %d)\n",
|
||||
int(incomplete_requests));
|
||||
int num_incomplete = int(incomplete_requests);
|
||||
if (num_incomplete) {
|
||||
/* Shutting down the app with incomplete shader compilation requests. Give 1 second's grace for
|
||||
* clean shutdown. */
|
||||
metal_printf("ShaderCache busy (incomplete_requests = %d)...\n", num_incomplete);
|
||||
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||
num_incomplete = int(incomplete_requests);
|
||||
}
|
||||
|
||||
if (num_incomplete && !MetalDeviceKernels::is_benchmark_warmup()) {
|
||||
metal_printf("ShaderCache still busy (incomplete_requests = %d). Terminating...\n",
|
||||
num_incomplete);
|
||||
std::terminate();
|
||||
}
|
||||
|
||||
metal_printf("ShaderCache idle. Shutting down.\n");
|
||||
for (auto &thread : compile_threads) {
|
||||
thread.join();
|
||||
}
|
||||
metal_printf("ShaderCache shut down.\n");
|
||||
}
|
||||
|
||||
void ShaderCache::wait_for_all()
|
||||
@@ -279,7 +292,7 @@ bool ShaderCache::should_load_kernel(DeviceKernel device_kernel,
|
||||
/* check whether the kernel has already been requested / cached */
|
||||
thread_scoped_lock lock(cache_mutex);
|
||||
for (auto &pipeline : pipelines[device_kernel]) {
|
||||
if (pipeline->kernels_md5 == device->kernels_md5[pso_type]) {
|
||||
if (pipeline->source_md5 == device->source_md5[pso_type]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -319,7 +332,7 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
|
||||
memcpy(&pipeline->kernel_data_, &device->launch_params.data, sizeof(pipeline->kernel_data_));
|
||||
pipeline->pso_type = pso_type;
|
||||
pipeline->mtlDevice = mtlDevice;
|
||||
pipeline->kernels_md5 = device->kernels_md5[pso_type];
|
||||
pipeline->source_md5 = device->source_md5[pso_type];
|
||||
pipeline->mtlLibrary = device->mtlLibrary[pso_type];
|
||||
pipeline->device_kernel = device_kernel;
|
||||
pipeline->threads_per_threadgroup = device->max_threads_per_threadgroup;
|
||||
@@ -331,7 +344,9 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
|
||||
|
||||
/* metalrt options */
|
||||
pipeline->use_metalrt = device->use_metalrt;
|
||||
pipeline->kernel_features = device->kernel_features;
|
||||
pipeline->metalrt_features = device->use_metalrt ?
|
||||
(device->kernel_features & METALRT_FEATURE_MASK) :
|
||||
0;
|
||||
|
||||
{
|
||||
thread_scoped_lock lock(cache_mutex);
|
||||
@@ -342,36 +357,65 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
|
||||
|
||||
MetalKernelPipeline *ShaderCache::get_best_pipeline(DeviceKernel kernel, const MetalDevice *device)
|
||||
{
|
||||
while (running) {
|
||||
/* Search all loaded pipelines with matching kernels_md5 checksums. */
|
||||
MetalKernelPipeline *best_match = nullptr;
|
||||
/* metalrt options */
|
||||
bool use_metalrt = device->use_metalrt;
|
||||
bool device_metalrt_hair = use_metalrt && device->kernel_features & KERNEL_FEATURE_HAIR;
|
||||
bool device_metalrt_hair_thick = use_metalrt &&
|
||||
device->kernel_features & KERNEL_FEATURE_HAIR_THICK;
|
||||
bool device_metalrt_pointcloud = use_metalrt &&
|
||||
device->kernel_features & KERNEL_FEATURE_POINTCLOUD;
|
||||
bool device_metalrt_motion = use_metalrt &&
|
||||
device->kernel_features & KERNEL_FEATURE_OBJECT_MOTION;
|
||||
|
||||
MetalKernelPipeline *best_pipeline = nullptr;
|
||||
while (!best_pipeline) {
|
||||
{
|
||||
thread_scoped_lock lock(cache_mutex);
|
||||
for (auto &candidate : pipelines[kernel]) {
|
||||
if (candidate->loaded &&
|
||||
candidate->kernels_md5 == device->kernels_md5[candidate->pso_type]) {
|
||||
/* Replace existing match if candidate is more specialized. */
|
||||
if (!best_match || candidate->pso_type > best_match->pso_type) {
|
||||
best_match = candidate.get();
|
||||
for (auto &pipeline : pipelines[kernel]) {
|
||||
if (!pipeline->loaded) {
|
||||
/* still loading - ignore */
|
||||
continue;
|
||||
}
|
||||
|
||||
bool pipeline_metalrt_hair = pipeline->metalrt_features & KERNEL_FEATURE_HAIR;
|
||||
bool pipeline_metalrt_hair_thick = pipeline->metalrt_features & KERNEL_FEATURE_HAIR_THICK;
|
||||
bool pipeline_metalrt_pointcloud = pipeline->metalrt_features & KERNEL_FEATURE_POINTCLOUD;
|
||||
bool pipeline_metalrt_motion = use_metalrt &&
|
||||
pipeline->metalrt_features & KERNEL_FEATURE_OBJECT_MOTION;
|
||||
|
||||
if (pipeline->use_metalrt != use_metalrt || pipeline_metalrt_hair != device_metalrt_hair ||
|
||||
pipeline_metalrt_hair_thick != device_metalrt_hair_thick ||
|
||||
pipeline_metalrt_pointcloud != device_metalrt_pointcloud ||
|
||||
pipeline_metalrt_motion != device_metalrt_motion) {
|
||||
/* wrong combination of metalrt options */
|
||||
continue;
|
||||
}
|
||||
|
||||
if (pipeline->pso_type != PSO_GENERIC) {
|
||||
if (pipeline->source_md5 == device->source_md5[PSO_SPECIALIZED_INTERSECT] ||
|
||||
pipeline->source_md5 == device->source_md5[PSO_SPECIALIZED_SHADE]) {
|
||||
best_pipeline = pipeline.get();
|
||||
}
|
||||
}
|
||||
else if (!best_pipeline) {
|
||||
best_pipeline = pipeline.get();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (best_match) {
|
||||
if (best_match->usage_count == 0 && best_match->pso_type != PSO_GENERIC) {
|
||||
metal_printf("Swapping in %s version of %s\n",
|
||||
kernel_type_as_string(best_match->pso_type),
|
||||
device_kernel_as_string(kernel));
|
||||
}
|
||||
best_match->usage_count += 1;
|
||||
return best_match;
|
||||
if (!best_pipeline) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
}
|
||||
|
||||
/* Spin until a matching kernel is loaded, or we're shutting down. */
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
}
|
||||
return nullptr;
|
||||
|
||||
if (best_pipeline->usage_count == 0 && best_pipeline->pso_type != PSO_GENERIC) {
|
||||
metal_printf("Swapping in %s version of %s\n",
|
||||
kernel_type_as_string(best_pipeline->pso_type),
|
||||
device_kernel_as_string(kernel));
|
||||
}
|
||||
best_pipeline->usage_count += 1;
|
||||
|
||||
return best_pipeline;
|
||||
}
|
||||
|
||||
bool MetalKernelPipeline::should_use_binary_archive() const
|
||||
@@ -384,12 +428,11 @@ bool MetalKernelPipeline::should_use_binary_archive() const
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Workaround for issues using Binary Archives on non-Apple Silicon systems. */
|
||||
MetalGPUVendor gpu_vendor = MetalInfo::get_device_vendor(mtlDevice);
|
||||
if (gpu_vendor != METAL_GPU_APPLE) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Workaround for Intel GPU having issue using Binary Archives */
|
||||
MetalGPUVendor gpu_vendor = MetalInfo::get_device_vendor(mtlDevice);
|
||||
if (gpu_vendor == METAL_GPU_INTEL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (pso_type == PSO_GENERIC) {
|
||||
@@ -397,10 +440,8 @@ bool MetalKernelPipeline::should_use_binary_archive() const
|
||||
return true;
|
||||
}
|
||||
|
||||
if ((device_kernel >= DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND &&
|
||||
device_kernel <= DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW) ||
|
||||
(device_kernel >= DEVICE_KERNEL_SHADER_EVAL_DISPLACE &&
|
||||
device_kernel <= DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY)) {
|
||||
if (device_kernel >= DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND &&
|
||||
device_kernel <= DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW) {
|
||||
/* Archive all shade kernels - they take a long time to compile. */
|
||||
return true;
|
||||
}
|
||||
@@ -526,14 +567,18 @@ void MetalKernelPipeline::compile()
|
||||
NSArray *table_functions[METALRT_TABLE_NUM] = {nil};
|
||||
NSArray *linked_functions = nil;
|
||||
|
||||
bool metalrt_hair = use_metalrt && (metalrt_features & KERNEL_FEATURE_HAIR);
|
||||
bool metalrt_hair_thick = use_metalrt && (metalrt_features & KERNEL_FEATURE_HAIR_THICK);
|
||||
bool metalrt_pointcloud = use_metalrt && (metalrt_features & KERNEL_FEATURE_POINTCLOUD);
|
||||
|
||||
if (use_metalrt) {
|
||||
id<MTLFunction> curve_intersect_default = nil;
|
||||
id<MTLFunction> curve_intersect_shadow = nil;
|
||||
id<MTLFunction> point_intersect_default = nil;
|
||||
id<MTLFunction> point_intersect_shadow = nil;
|
||||
if (kernel_features & KERNEL_FEATURE_HAIR) {
|
||||
if (metalrt_hair) {
|
||||
/* Add curve intersection programs. */
|
||||
if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
|
||||
if (metalrt_hair_thick) {
|
||||
/* Slower programs for thick hair since that also slows down ribbons.
|
||||
* Ideally this should not be needed. */
|
||||
curve_intersect_default = rt_intersection_function[METALRT_FUNC_CURVE_ALL];
|
||||
@@ -544,7 +589,7 @@ void MetalKernelPipeline::compile()
|
||||
curve_intersect_shadow = rt_intersection_function[METALRT_FUNC_CURVE_RIBBON_SHADOW];
|
||||
}
|
||||
}
|
||||
if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
|
||||
if (metalrt_pointcloud) {
|
||||
point_intersect_default = rt_intersection_function[METALRT_FUNC_POINT];
|
||||
point_intersect_shadow = rt_intersection_function[METALRT_FUNC_POINT_SHADOW];
|
||||
}
|
||||
@@ -621,8 +666,6 @@ void MetalKernelPipeline::compile()
|
||||
MTLPipelineOption pipelineOptions = MTLPipelineOptionNone;
|
||||
|
||||
bool use_binary_archive = should_use_binary_archive();
|
||||
bool loading_existing_archive = false;
|
||||
bool creating_new_archive = false;
|
||||
|
||||
id<MTLBinaryArchive> archive = nil;
|
||||
string metalbin_path;
|
||||
@@ -631,11 +674,20 @@ void MetalKernelPipeline::compile()
|
||||
NSProcessInfo *processInfo = [NSProcessInfo processInfo];
|
||||
string osVersion = [[processInfo operatingSystemVersionString] UTF8String];
|
||||
MD5Hash local_md5;
|
||||
local_md5.append(kernels_md5);
|
||||
local_md5.append(source_md5);
|
||||
local_md5.append(osVersion);
|
||||
local_md5.append((uint8_t *)&this->threads_per_threadgroup,
|
||||
sizeof(this->threads_per_threadgroup));
|
||||
|
||||
string options;
|
||||
if (use_metalrt && kernel_has_intersection(device_kernel)) {
|
||||
/* incorporate any MetalRT specializations into the archive name */
|
||||
options += string_printf(".hair_%d.hair_thick_%d.pointcloud_%d",
|
||||
metalrt_hair ? 1 : 0,
|
||||
metalrt_hair_thick ? 1 : 0,
|
||||
metalrt_pointcloud ? 1 : 0);
|
||||
}
|
||||
|
||||
/* Replace non-alphanumerical characters with underscores. */
|
||||
string device_name = [mtlDevice.name UTF8String];
|
||||
for (char &c : device_name) {
|
||||
@@ -647,141 +699,77 @@ void MetalKernelPipeline::compile()
|
||||
metalbin_name = device_name;
|
||||
metalbin_name = path_join(metalbin_name, device_kernel_as_string(device_kernel));
|
||||
metalbin_name = path_join(metalbin_name, kernel_type_as_string(pso_type));
|
||||
metalbin_name = path_join(metalbin_name, local_md5.get_hex() + ".bin");
|
||||
metalbin_name = path_join(metalbin_name, local_md5.get_hex() + options + ".bin");
|
||||
|
||||
metalbin_path = path_cache_get(path_join("kernels", metalbin_name));
|
||||
path_create_directories(metalbin_path);
|
||||
|
||||
/* Check if shader binary exists on disk, and if so, update the file timestamp for LRU purging
|
||||
* to work as intended. */
|
||||
loading_existing_archive = path_cache_kernel_exists_and_mark_used(metalbin_path);
|
||||
creating_new_archive = !loading_existing_archive;
|
||||
|
||||
if (@available(macOS 11.0, *)) {
|
||||
MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
|
||||
if (loading_existing_archive) {
|
||||
/* Retrieve shader binary from disk, and update the file timestamp for LRU purging to work as
|
||||
* intended. */
|
||||
if (use_binary_archive && path_cache_kernel_exists_and_mark_used(metalbin_path)) {
|
||||
if (@available(macOS 11.0, *)) {
|
||||
MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
|
||||
archiveDesc.url = [NSURL fileURLWithPath:@(metalbin_path.c_str())];
|
||||
}
|
||||
NSError *error = nil;
|
||||
archive = [mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:&error];
|
||||
if (!archive) {
|
||||
const char *err = error ? [[error localizedDescription] UTF8String] : nullptr;
|
||||
metal_printf("newBinaryArchiveWithDescriptor failed: %s\n", err ? err : "nil");
|
||||
}
|
||||
[archiveDesc release];
|
||||
|
||||
if (loading_existing_archive) {
|
||||
pipelineOptions = MTLPipelineOptionFailOnBinaryArchiveMiss;
|
||||
computePipelineStateDescriptor.binaryArchives = [NSArray arrayWithObjects:archive, nil];
|
||||
archive = [mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil];
|
||||
[archiveDesc release];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool recreate_archive = false;
|
||||
|
||||
/* Lambda to do the actual pipeline compilation. */
|
||||
auto do_compilation = [&]() {
|
||||
__block bool compilation_finished = false;
|
||||
__block string error_str;
|
||||
|
||||
if (loading_existing_archive) {
|
||||
/* Use the blocking variant of newComputePipelineStateWithDescriptor if an archive exists on
|
||||
* disk. It should load almost instantaneously, and will fail gracefully when loading a
|
||||
* corrupt archive (unlike the async variant). */
|
||||
NSError *error = nil;
|
||||
pipeline = [mtlDevice newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
|
||||
options:pipelineOptions
|
||||
reflection:nullptr
|
||||
error:&error];
|
||||
const char *err = error ? [[error localizedDescription] UTF8String] : nullptr;
|
||||
error_str = err ? err : "nil";
|
||||
}
|
||||
else {
|
||||
/* TODO / MetalRT workaround:
|
||||
* Workaround for a crash when addComputePipelineFunctionsWithDescriptor is called *after*
|
||||
* newComputePipelineStateWithDescriptor with linked functions (i.e. with MetalRT enabled).
|
||||
* Ideally we would like to call newComputePipelineStateWithDescriptor (async) first so we
|
||||
* can bail out if needed, but we can stop the crash by flipping the order when there are
|
||||
* linked functions. However when addComputePipelineFunctionsWithDescriptor is called first
|
||||
* it will block while it builds the pipeline, offering no way of bailing out. */
|
||||
auto addComputePipelineFunctionsWithDescriptor = [&]() {
|
||||
if (creating_new_archive && ShaderCache::running) {
|
||||
NSError *error;
|
||||
if (![archive addComputePipelineFunctionsWithDescriptor:computePipelineStateDescriptor
|
||||
error:&error]) {
|
||||
NSString *errStr = [error localizedDescription];
|
||||
metal_printf("Failed to add PSO to archive:\n%s\n",
|
||||
errStr ? [errStr UTF8String] : "nil");
|
||||
}
|
||||
}
|
||||
};
|
||||
if (linked_functions) {
|
||||
addComputePipelineFunctionsWithDescriptor();
|
||||
}
|
||||
|
||||
/* Use the async variant of newComputePipelineStateWithDescriptor if no archive exists on
|
||||
* disk. This allows us to respond to app shutdown. */
|
||||
[mtlDevice
|
||||
newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
|
||||
options:pipelineOptions
|
||||
completionHandler:^(id<MTLComputePipelineState> computePipelineState,
|
||||
MTLComputePipelineReflection *reflection,
|
||||
NSError *error) {
|
||||
pipeline = computePipelineState;
|
||||
|
||||
/* Retain the pipeline so we can use it safely past the completion
|
||||
* handler. */
|
||||
if (pipeline) {
|
||||
[pipeline retain];
|
||||
}
|
||||
const char *err = error ?
|
||||
[[error localizedDescription] UTF8String] :
|
||||
nullptr;
|
||||
error_str = err ? err : "nil";
|
||||
|
||||
compilation_finished = true;
|
||||
}];
|
||||
|
||||
/* Immediately wait for either the compilation to finish or for app shutdown. */
|
||||
while (ShaderCache::running && !compilation_finished) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(5));
|
||||
}
|
||||
|
||||
/* Add pipeline into the new archive (unless we did it earlier). */
|
||||
if (pipeline && !linked_functions) {
|
||||
addComputePipelineFunctionsWithDescriptor();
|
||||
__block bool creating_new_archive = false;
|
||||
if (@available(macOS 11.0, *)) {
|
||||
if (use_binary_archive) {
|
||||
if (!archive) {
|
||||
MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
|
||||
archiveDesc.url = nil;
|
||||
archive = [mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil];
|
||||
creating_new_archive = true;
|
||||
}
|
||||
computePipelineStateDescriptor.binaryArchives = [NSArray arrayWithObjects:archive, nil];
|
||||
pipelineOptions = MTLPipelineOptionFailOnBinaryArchiveMiss;
|
||||
}
|
||||
|
||||
if (!pipeline) {
|
||||
metal_printf(
|
||||
"newComputePipelineStateWithDescriptor failed for \"%s\"%s. "
|
||||
"Error:\n%s\n",
|
||||
device_kernel_as_string((DeviceKernel)device_kernel),
|
||||
(archive && !recreate_archive) ? " Archive may be incomplete or corrupt - attempting "
|
||||
"recreation.." :
|
||||
"",
|
||||
error_str.c_str());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
double starttime = time_dt();
|
||||
|
||||
do_compilation();
|
||||
/* Block on load to ensure we continue with a valid kernel function */
|
||||
if (creating_new_archive) {
|
||||
starttime = time_dt();
|
||||
NSError *error;
|
||||
if (![archive addComputePipelineFunctionsWithDescriptor:computePipelineStateDescriptor
|
||||
error:&error]) {
|
||||
NSString *errStr = [error localizedDescription];
|
||||
metal_printf("Failed to add PSO to archive:\n%s\n", errStr ? [errStr UTF8String] : "nil");
|
||||
}
|
||||
}
|
||||
|
||||
/* An archive might have a corrupt entry and fail to materialize the pipeline. This shouldn't
|
||||
* happen, but if it does we recreate it. */
|
||||
pipeline = [mtlDevice newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
|
||||
options:pipelineOptions
|
||||
reflection:nullptr
|
||||
error:&error];
|
||||
|
||||
bool recreate_archive = false;
|
||||
if (pipeline == nil && archive) {
|
||||
NSString *errStr = [error localizedDescription];
|
||||
metal_printf(
|
||||
"Failed to create compute pipeline state \"%s\" from archive - attempting recreation... "
|
||||
"(error: %s)\n",
|
||||
device_kernel_as_string((DeviceKernel)device_kernel),
|
||||
errStr ? [errStr UTF8String] : "nil");
|
||||
pipeline = [mtlDevice newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
|
||||
options:MTLPipelineOptionNone
|
||||
reflection:nullptr
|
||||
error:&error];
|
||||
recreate_archive = true;
|
||||
pipelineOptions = MTLPipelineOptionNone;
|
||||
path_remove(metalbin_path);
|
||||
|
||||
do_compilation();
|
||||
}
|
||||
|
||||
double duration = time_dt() - starttime;
|
||||
|
||||
if (pipeline == nil) {
|
||||
NSString *errStr = [error localizedDescription];
|
||||
error_str = string_printf("Failed to create compute pipeline state \"%s\", error: \n",
|
||||
device_kernel_as_string((DeviceKernel)device_kernel));
|
||||
error_str += (errStr ? [errStr UTF8String] : "nil");
|
||||
metal_printf("%16s | %2d | %-55s | %7.2fs | FAILED!\n",
|
||||
kernel_type_as_string(pso_type),
|
||||
device_kernel,
|
||||
@@ -801,8 +789,7 @@ void MetalKernelPipeline::compile()
|
||||
if (creating_new_archive || recreate_archive) {
|
||||
if (![archive serializeToURL:[NSURL fileURLWithPath:@(metalbin_path.c_str())]
|
||||
error:&error]) {
|
||||
metal_printf("Failed to save binary archive to %s, error:\n%s\n",
|
||||
metalbin_path.c_str(),
|
||||
metal_printf("Failed to save binary archive, error:\n%s\n",
|
||||
[[error localizedDescription] UTF8String]);
|
||||
}
|
||||
else {
|
||||
@@ -870,15 +857,16 @@ void MetalDeviceKernels::wait_for_all()
|
||||
}
|
||||
}
|
||||
|
||||
int MetalDeviceKernels::num_incomplete_specialization_requests()
|
||||
bool MetalDeviceKernels::any_specialization_happening_now()
|
||||
{
|
||||
/* Return true if any ShaderCaches have ongoing specialization requests (typically there will be
|
||||
* only 1). */
|
||||
int total = 0;
|
||||
for (int i = 0; i < g_shaderCacheCount; i++) {
|
||||
total += g_shaderCache[i].second->incomplete_specialization_requests;
|
||||
if (g_shaderCache[i].second->incomplete_specialization_requests > 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return total;
|
||||
return false;
|
||||
}
|
||||
|
||||
int MetalDeviceKernels::get_loaded_kernel_count(MetalDevice const *device,
|
||||
|
@@ -278,7 +278,7 @@ int MetalDeviceQueue::num_concurrent_states(const size_t state_size) const
|
||||
if (metal_device_->device_vendor == METAL_GPU_APPLE) {
|
||||
result *= 4;
|
||||
|
||||
/* Increasing the state count doesn't notably benefit M1-family systems. */
|
||||
/* Increasing the state count doesn't notably benefit M1-family systems. */
|
||||
if (MetalInfo::get_apple_gpu_architecture(metal_device_->mtlDevice) != APPLE_M1) {
|
||||
size_t system_ram = system_physical_ram();
|
||||
size_t allocated_so_far = [metal_device_->mtlDevice currentAllocatedSize];
|
||||
@@ -477,21 +477,17 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
[metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->texture_bindings_3d
|
||||
offset:0
|
||||
atIndex:1];
|
||||
[metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->buffer_bindings_1d
|
||||
offset:0
|
||||
atIndex:2];
|
||||
|
||||
if (@available(macos 12.0, *)) {
|
||||
if (metal_device_->use_metalrt) {
|
||||
if (metal_device_->bvhMetalRT) {
|
||||
id<MTLAccelerationStructure> accel_struct = metal_device_->bvhMetalRT->accel_struct;
|
||||
[metal_device_->mtlAncillaryArgEncoder setAccelerationStructure:accel_struct atIndex:3];
|
||||
[metal_device_->mtlAncillaryArgEncoder setAccelerationStructure:accel_struct atIndex:2];
|
||||
[metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->blas_buffer
|
||||
offset:0
|
||||
atIndex:8];
|
||||
atIndex:7];
|
||||
[metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->blas_lookup_buffer
|
||||
offset:0
|
||||
atIndex:9];
|
||||
atIndex:8];
|
||||
}
|
||||
|
||||
for (int table = 0; table < METALRT_TABLE_NUM; table++) {
|
||||
@@ -501,13 +497,13 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
atIndex:1];
|
||||
[metal_device_->mtlAncillaryArgEncoder
|
||||
setIntersectionFunctionTable:metal_kernel_pso->intersection_func_table[table]
|
||||
atIndex:4 + table];
|
||||
atIndex:3 + table];
|
||||
[mtlComputeCommandEncoder useResource:metal_kernel_pso->intersection_func_table[table]
|
||||
usage:MTLResourceUsageRead];
|
||||
}
|
||||
else {
|
||||
[metal_device_->mtlAncillaryArgEncoder setIntersectionFunctionTable:nil
|
||||
atIndex:4 + table];
|
||||
atIndex:3 + table];
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -878,7 +874,6 @@ void MetalDeviceQueue::prepare_resources(DeviceKernel kernel)
|
||||
/* ancillaries */
|
||||
[mtlComputeEncoder_ useResource:metal_device_->texture_bindings_2d usage:MTLResourceUsageRead];
|
||||
[mtlComputeEncoder_ useResource:metal_device_->texture_bindings_3d usage:MTLResourceUsageRead];
|
||||
[mtlComputeEncoder_ useResource:metal_device_->buffer_bindings_1d usage:MTLResourceUsageRead];
|
||||
}
|
||||
|
||||
id<MTLComputeCommandEncoder> MetalDeviceQueue::get_compute_encoder(DeviceKernel kernel)
|
||||
|
@@ -103,7 +103,7 @@ vector<id<MTLDevice>> const &MetalInfo::get_usable_devices()
|
||||
}
|
||||
|
||||
/* If the system has both an AMD GPU (discrete) and an Intel one (integrated), prefer the AMD
|
||||
* one. This can be overridden with CYCLES_METAL_FORCE_INTEL. */
|
||||
* one. This can be overriden with CYCLES_METAL_FORCE_INTEL. */
|
||||
bool has_usable_amd_gpu = false;
|
||||
if (@available(macos 12.3, *)) {
|
||||
for (id<MTLDevice> device in MTLCopyAllDevices()) {
|
||||
|
@@ -1437,9 +1437,6 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
|
||||
BVHOptiX *const blas = static_cast<BVHOptiX *>(ob->get_geometry()->bvh);
|
||||
OptixTraversableHandle handle = blas->traversable_handle;
|
||||
if (handle == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
OptixInstance &instance = instances[num_instances++];
|
||||
memset(&instance, 0, sizeof(instance));
|
||||
|
@@ -1343,7 +1343,7 @@ void PathTrace::guiding_prepare_structures()
|
||||
* per update to be limited, for reproducible results and reasonable training size.
|
||||
*
|
||||
* Idea: we could stochastically discard samples with a probability of 1/num_samples_per_update
|
||||
* we can then update only after the num_samples_per_update iterations are rendered. */
|
||||
* we can then update only after the num_samples_per_update iterations are rendered. */
|
||||
render_scheduler_.set_limit_samples_per_update(4);
|
||||
}
|
||||
else {
|
||||
|
@@ -94,7 +94,7 @@ class PathTrace {
|
||||
void set_adaptive_sampling(const AdaptiveSampling &adaptive_sampling);
|
||||
|
||||
/* Set the parameters for guiding.
|
||||
* Use to setup the guiding structures before each rendering iteration. */
|
||||
* Use to setup the guiding structures before each rendering iteration.*/
|
||||
void set_guiding_params(const GuidingParams ¶ms, const bool reset);
|
||||
|
||||
/* Sets output driver for render buffer output. */
|
||||
@@ -119,7 +119,7 @@ class PathTrace {
|
||||
*/
|
||||
void cancel();
|
||||
|
||||
/* Copy an entire render buffer to/from the path trace. */
|
||||
/* Copy an entire render buffer to/from the path trace. */
|
||||
|
||||
/* Copy happens via CPU side buffer: data will be copied from every device of the path trace, and
|
||||
* the data will be copied to the device of the given render buffers. */
|
||||
@@ -294,7 +294,7 @@ class PathTrace {
|
||||
* rendering iteration. */
|
||||
unique_ptr<openpgl::cpp::SampleStorage> guiding_sample_data_storage_;
|
||||
|
||||
/* The number of already performed training iterations for the guiding field. */
|
||||
/* The number of already performed training iterations for the guiding field.*/
|
||||
int guiding_update_count = 0;
|
||||
#endif
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user