1
1

Merge branch 'main', update to changes

This commit is contained in:
2023-02-17 15:37:16 +01:00
2193 changed files with 84394 additions and 55814 deletions

View File

@@ -1,8 +0,0 @@
{
"project_id" : "Blender",
"conduit_uri" : "https://developer.blender.org/",
"phabricator.uri" : "https://developer.blender.org/",
"git.default-relative-commit" : "origin/master",
"arc.land.update.default" : "rebase",
"arc.land.onto.default" : "master"
}

View File

@@ -236,6 +236,8 @@ ForEachMacros:
- LOOP_UNSELECTED_POINTS
- LOOP_VISIBLE_KEYS
- LOOP_VISIBLE_POINTS
- LIGHT_FOREACH_BEGIN_DIRECTIONAL
- LIGHT_FOREACH_BEGIN_LOCAL
- LISTBASE_CIRCULAR_BACKWARD_BEGIN
- LISTBASE_CIRCULAR_FORWARD_BEGIN
- LISTBASE_FOREACH

View File

@@ -0,0 +1,5 @@
${CommitTitle}
${CommitBody}
Pull Request #${PullRequestIndex}

View File

@@ -0,0 +1,3 @@
${PullRequestTitle}
Pull Request #${PullRequestIndex}

View File

@@ -0,0 +1,44 @@
name: Bug Report
about: File a bug report
labels:
- "type::Report"
- "status::Needs Triage"
- "priority::Normal"
body:
- type: markdown
attributes:
value: |
### Instructions
First time reporting? See [tips](https://wiki.blender.org/wiki/Process/Bug_Reports).
* Use **Help > Report a Bug** in Blender to fill system information and exact Blender version.
* Test [daily builds](https://builder.blender.org/) to verify if the issue is already fixed.
* Test [previous versions](https://download.blender.org/release/) to find an older working version.
* For feature requests, feedback, questions or build issues, see [communication channels](https://wiki.blender.org/wiki/Communication/Contact#User_Feedback_and_Requests).
* If there are multiple bugs, make multiple bug reports.
- type: textarea
id: body
attributes:
label: "Description"
hide_label: true
value: |
**System Information**
Operating system:
Graphics card:
**Blender Version**
Broken: (example: 2.80, edbf15d3c044, master, 2018-11-28, as found on the splash screen)
Worked: (newest version of Blender that worked as expected)
**Short description of error**
**Exact steps for others to reproduce the error**
Based on the default startup or an attached .blend file (as simple as possible).
- type: markdown
attributes:
value: |
### Help the developers
Bug fixing is important, the developers will handle reports swiftly. For that reason, carefully provide exact steps and a **small and simple .blend file** to reproduce the problem. You do your half of the work, then we do our half!

View File

@@ -0,0 +1 @@
blank_issues_enabled: false

View File

@@ -0,0 +1,10 @@
name: Design
about: Create a design task (for developers only)
labels:
- "type::Design"
body:
- type: textarea
id: body
attributes:
label: "Description"
hide_label: true

View File

@@ -0,0 +1,10 @@
name: To Do
about: Create a to do task (for developers only)
labels:
- "type::To Do"
body:
- type: textarea
id: body
attributes:
label: "Description"
hide_label: true

View File

@@ -0,0 +1,17 @@
name: Pull Request
about: Contribute code to Blender
body:
- type: markdown
attributes:
value: |
### Instructions
Guides to [contributing code](https://wiki.blender.org/index.php/Dev:Doc/Process/Contributing_Code) and effective [code review](https://wiki.blender.org/index.php/Dev:Doc/Tools/Code_Review).
By submitting code here, you agree that the code is (compatible with) GNU GPL v2 or later.
- type: textarea
id: body
attributes:
label: "Description"
hide_label: true

View File

@@ -1,5 +1,4 @@
This repository is only used as a mirror of git.blender.org. Blender development happens on
https://developer.blender.org.
This repository is only used as a mirror. Blender development happens on projects.blender.org.
To get started with contributing code, please see:
https://wiki.blender.org/wiki/Process/Contributing_Code

3
.github/stale.yml vendored
View File

@@ -15,8 +15,7 @@ staleLabel: stale
# Comment to post when closing a stale Issue or Pull Request.
closeComment: >
This issue has been automatically closed, because this repository is only
used as a mirror of git.blender.org. Blender development happens on
developer.blender.org.
used as a mirror. Blender development happens on projects.blender.org.
To get started contributing code, please read:
https://wiki.blender.org/wiki/Process/Contributing_Code

8
.gitmodules vendored
View File

@@ -1,20 +1,20 @@
[submodule "release/scripts/addons"]
path = release/scripts/addons
url = ../blender-addons.git
branch = master
branch = main
ignore = all
[submodule "release/scripts/addons_contrib"]
path = release/scripts/addons_contrib
url = ../blender-addons-contrib.git
branch = master
branch = main
ignore = all
[submodule "release/datafiles/locale"]
path = release/datafiles/locale
url = ../blender-translations.git
branch = master
branch = main
ignore = all
[submodule "source/tools"]
path = source/tools
url = ../blender-dev-tools.git
branch = master
branch = main
ignore = all

View File

@@ -167,14 +167,26 @@ get_blender_version()
option(WITH_BLENDER "Build blender (disable to build only the blender player)" ON)
mark_as_advanced(WITH_BLENDER)
if(APPLE)
# In future, can be used with `quicklookthumbnailing/qlthumbnailreply` to create file
# thumbnails for say Finder. Turn it off for now.
option(WITH_BLENDER_THUMBNAILER "Build \"blender-thumbnailer\" thumbnail extraction utility" OFF)
elseif(WIN32)
option(WITH_BLENDER_THUMBNAILER "Build \"BlendThumb.dll\" helper for Windows explorer integration" ON)
if(WIN32)
option(WITH_BLENDER_THUMBNAILER "\
Build \"BlendThumb.dll\" helper for Windows explorer integration to support extracting \
thumbnails from `.blend` files."
ON
)
else()
option(WITH_BLENDER_THUMBNAILER "Build \"blender-thumbnailer\" thumbnail extraction utility" ON)
set(_option_default ON)
if(APPLE)
# In future, can be used with `quicklookthumbnailing/qlthumbnailreply`
# to create file thumbnails for say Finder.
# Turn it off for now, even though it can build on APPLE, it's not likely to be useful.
set(_option_default OFF)
endif()
option(WITH_BLENDER_THUMBNAILER "\
Build stand-alone \"blender-thumbnailer\" command-line thumbnail extraction utility, \
intended for use by file-managers to extract PNG images from `.blend` files."
${_option_default}
)
unset(_option_default)
endif()
option(WITH_INTERNATIONAL "Enable I18N (International fonts and text)" ON)
@@ -214,14 +226,19 @@ option(WITH_BULLET "Enable Bullet (Physics Engine)" ON)
option(WITH_SYSTEM_BULLET "Use the systems bullet library (currently unsupported due to missing features in upstream!)" )
mark_as_advanced(WITH_SYSTEM_BULLET)
option(WITH_OPENCOLORIO "Enable OpenColorIO color management" ON)
set(_option_default ON)
if(APPLE)
# There's no OpenXR runtime in sight for macOS, neither is code well
# tested there -> disable it by default.
option(WITH_XR_OPENXR "Enable VR features through the OpenXR specification" OFF)
mark_as_advanced(WITH_XR_OPENXR)
else()
option(WITH_XR_OPENXR "Enable VR features through the OpenXR specification" ON)
set(_option_default OFF)
endif()
option(WITH_XR_OPENXR "Enable VR features through the OpenXR specification" ${_option_default})
if(APPLE)
mark_as_advanced(WITH_XR_OPENXR)
endif()
unset(_option_default)
option(WITH_GMP "Enable features depending on GMP (Exact Boolean)" ON)
# Compositor
@@ -353,12 +370,13 @@ else()
set(WITH_COREAUDIO OFF)
endif()
if(NOT WIN32)
set(_option_default ON)
if(APPLE)
option(WITH_JACK "Enable JACK Support (http://www.jackaudio.org)" OFF)
else()
option(WITH_JACK "Enable JACK Support (http://www.jackaudio.org)" ON)
set(_option_default OFF)
endif()
option(WITH_JACK_DYNLOAD "Enable runtime dynamic JACK libraries loading" OFF)
option(WITH_JACK "Enable JACK Support (http://www.jackaudio.org)" ${_option_default})
unset(_option_default)
option(WITH_JACK_DYNLOAD "Enable runtime dynamic JACK libraries loading" OFF)
else()
set(WITH_JACK OFF)
endif()
@@ -399,6 +417,26 @@ mark_as_advanced(WITH_SYSTEM_GLOG)
# Freestyle
option(WITH_FREESTYLE "Enable Freestyle (advanced edges rendering)" ON)
# Libraries.
if(UNIX AND NOT APPLE)
# Optionally build without pre-compiled libraries.
# NOTE: this could be supported on all platforms however in practice UNIX is the only platform
# that has good support for detecting installed libraries.
option(WITH_LIBS_PRECOMPILED "\
Detect and link against pre-compiled libraries (typically found under \"../lib/\"). \
Disabling this option will use the system libraries although cached paths \
that point to pre-compiled libraries will be left as-is."
ON
)
mark_as_advanced(WITH_LIBS_PRECOMPILED)
option(WITH_STATIC_LIBS "Try to link with static libraries, as much as possible, to make blender more portable across distributions" OFF)
if(WITH_STATIC_LIBS)
option(WITH_BOOST_ICU "Boost uses ICU library (required for linking with static Boost built with libicu)." OFF)
mark_as_advanced(WITH_BOOST_ICU)
endif()
endif()
# Misc
if(WIN32 OR APPLE)
option(WITH_INPUT_IME "Enable Input Method Editor (IME) for complex Asian character input" ON)
@@ -406,11 +444,6 @@ endif()
option(WITH_INPUT_NDOF "Enable NDOF input devices (SpaceNavigator and friends)" ON)
if(UNIX AND NOT APPLE)
option(WITH_INSTALL_PORTABLE "Install redistributable runtime, otherwise install into CMAKE_INSTALL_PREFIX" ON)
option(WITH_STATIC_LIBS "Try to link with static libraries, as much as possible, to make blender more portable across distributions" OFF)
if(WITH_STATIC_LIBS)
option(WITH_BOOST_ICU "Boost uses ICU library (required for linking with static Boost built with libicu)." OFF)
mark_as_advanced(WITH_BOOST_ICU)
endif()
endif()
option(WITH_PYTHON_INSTALL "Copy system python into the blender install folder" ON)
@@ -592,8 +625,10 @@ mark_as_advanced(
# Vulkan
option(WITH_VULKAN_BACKEND "Enable Vulkan as graphics backend (only for development)" OFF)
option(WITH_VULKAN_GUARDEDALLOC "Use guardedalloc for host allocations done inside Vulkan (development option)" OFF)
mark_as_advanced(
WITH_VULKAN_BACKEND
WITH_VULKAN_GUARDEDALLOC
)
# Metal
@@ -993,6 +1028,8 @@ set(PLATFORM_LINKLIBS "")
# - CMAKE_EXE_LINKER_FLAGS_DEBUG
set(PLATFORM_LINKFLAGS "")
set(PLATFORM_LINKFLAGS_DEBUG "")
set(PLATFORM_LINKFLAGS_RELEASE "")
set(PLATFORM_LINKFLAGS_EXECUTABLE "")
if(NOT CMAKE_BUILD_TYPE MATCHES "Release")
if(WITH_COMPILER_ASAN)
@@ -1206,13 +1243,6 @@ if(WITH_OPENGL)
add_definitions(-DWITH_OPENGL)
endif()
#-----------------------------------------------------------------------------
# Configure Vulkan.
if(WITH_VULKAN_BACKEND)
list(APPEND BLENDER_GL_LIBRARIES ${VULKAN_LIBRARIES})
endif()
# -----------------------------------------------------------------------------
# Configure Metal
@@ -1262,12 +1292,14 @@ endif()
# -----------------------------------------------------------------------------
# Configure Bullet
if(WITH_BULLET AND WITH_SYSTEM_BULLET)
find_package(Bullet)
set_and_warn_library_found("Bullet" BULLET_FOUND WITH_BULLET)
else()
set(BULLET_INCLUDE_DIRS "${CMAKE_SOURCE_DIR}/extern/bullet2/src")
# set(BULLET_LIBRARIES "")
if(WITH_BULLET)
if(WITH_SYSTEM_BULLET)
find_package(Bullet)
set_and_warn_library_found("Bullet" BULLET_FOUND WITH_BULLET)
else()
set(BULLET_INCLUDE_DIRS "${CMAKE_SOURCE_DIR}/extern/bullet2/src")
set(BULLET_LIBRARIES "extern_bullet")
endif()
endif()
@@ -1427,6 +1459,9 @@ if(CMAKE_COMPILER_IS_GNUCC)
add_check_c_compiler_flag(C_WARNINGS C_WARN_TYPE_LIMITS -Wtype-limits)
add_check_c_compiler_flag(C_WARNINGS C_WARN_FORMAT_SIGN -Wformat-signedness)
add_check_c_compiler_flag(C_WARNINGS C_WARN_RESTRICT -Wrestrict)
# Useful but too many false positives and inconvenient to suppress each occurrence.
add_check_c_compiler_flag(C_WARNINGS C_WARN_NO_STRINGOP_OVERREAD -Wno-stringop-overread)
add_check_c_compiler_flag(C_WARNINGS C_WARN_NO_STRINGOP_OVERFLOW -Wno-stringop-overflow)
# C-only.
add_check_c_compiler_flag(C_WARNINGS C_WARN_NO_NULL -Wnonnull)
@@ -1466,6 +1501,9 @@ if(CMAKE_COMPILER_IS_GNUCC)
add_check_cxx_compiler_flag(CXX_WARNINGS CXX_WARN_RESTRICT -Wrestrict)
add_check_cxx_compiler_flag(CXX_WARNINGS CXX_WARN_NO_SUGGEST_OVERRIDE -Wno-suggest-override)
add_check_cxx_compiler_flag(CXX_WARNINGS CXX_WARN_UNINITIALIZED -Wuninitialized)
# Useful but too many false positives and inconvenient to suppress each occurrence.
add_check_cxx_compiler_flag(CXX_WARNINGS CXX_WARN_NO_STRINGOP_OVERREAD -Wno-stringop-overread)
add_check_cxx_compiler_flag(CXX_WARNINGS CXX_WARN_NO_STRINGOP_OVERFLOW -Wno-stringop-overflow)
# causes too many warnings
if(NOT APPLE)

View File

@@ -71,6 +71,13 @@ Static Source Code Checking
* check_mypy: Checks all Python scripts using mypy,
see: source/tools/check_source/check_mypy_config.py scripts which are included.
Documentation Checking
* check_wiki_file_structure:
Check the WIKI documentation for the source-tree's file structure
matches Blender's source-code.
See: https://wiki.blender.org/wiki/Source/File_Structure
Spell Checkers
This runs the spell checker from the developer tools repositor.
@@ -292,7 +299,11 @@ else
ifneq ("$(wildcard $(DEPS_BUILD_DIR)/build.ninja)","")
DEPS_BUILD_COMMAND:=ninja
else
DEPS_BUILD_COMMAND:=make -s
ifeq ($(OS), Darwin)
DEPS_BUILD_COMMAND:=make -s
else
DEPS_BUILD_COMMAND:="$(BLENDER_DIR)/build_files/build_environment/linux/make_deps_wrapper.sh" -s
endif
endif
endif
@@ -391,7 +402,7 @@ endif
deps: .FORCE
@echo
@echo Configuring dependencies in \"$(DEPS_BUILD_DIR)\"
@echo Configuring dependencies in \"$(DEPS_BUILD_DIR)\", install to \"$(DEPS_INSTALL_DIR)\"
@cmake -H"$(DEPS_SOURCE_DIR)" \
-B"$(DEPS_BUILD_DIR)" \
@@ -481,6 +492,10 @@ check_smatch: .FORCE
check_mypy: .FORCE
@$(PYTHON) "$(BLENDER_DIR)/source/tools/check_source/check_mypy.py"
check_wiki_file_structure: .FORCE
@PYTHONIOENCODING=utf_8 $(PYTHON) \
"$(BLENDER_DIR)/source/tools/check_wiki/check_wiki_file_structure.py"
check_spelling_py: .FORCE
@cd "$(BUILD_DIR)" ; \
PYTHONIOENCODING=utf_8 $(PYTHON) \

38
README.md Normal file
View File

@@ -0,0 +1,38 @@
<!--
Keep this document short & concise,
linking to external resources instead of including content in-line.
See 'release/text/readme.html' for the end user read-me.
-->
Blender
=======
Blender is the free and open source 3D creation suite.
It supports the entirety of the 3D pipeline-modeling, rigging, animation, simulation, rendering, compositing,
motion tracking and video editing.
![Blender screenshot](https://code.blender.org/wp-content/uploads/2018/12/springrg.jpg "Blender screenshot")
Project Pages
-------------
- [Main Website](http://www.blender.org)
- [Reference Manual](https://docs.blender.org/manual/en/latest/index.html)
- [User Community](https://www.blender.org/community/)
Development
-----------
- [Build Instructions](https://wiki.blender.org/wiki/Building_Blender)
- [Code Review & Bug Tracker](https://projects.blender.org)
- [Developer Forum](https://devtalk.blender.org)
- [Developer Documentation](https://wiki.blender.org)
License
-------
Blender as a whole is licensed under the GNU General Public License, Version 3.
Individual files may have a different, but compatible license.
See [blender.org/about/license](https://www.blender.org/about/license) for details.

View File

@@ -2,7 +2,7 @@
# LLVM does not switch over to cpp17 until llvm 16 and building ealier versions with
# MSVC is leading to some crashes in ISPC. Switch back to their default on all platforms
# for now.
# for now.
string(REPLACE "-DCMAKE_CXX_STANDARD=17" " " DPCPP_CMAKE_FLAGS "${DEFAULT_CMAKE_FLAGS}")
if(WIN32)

View File

@@ -10,7 +10,7 @@ ExternalProject_Add(external_epoxy
URL_HASH ${EPOXY_HASH_TYPE}=${EPOXY_HASH}
PREFIX ${BUILD_DIR}/epoxy
PATCH_COMMAND ${PATCH_CMD} -p 1 -N -d ${BUILD_DIR}/epoxy/src/external_epoxy/ < ${PATCH_DIR}/epoxy.diff
CONFIGURE_COMMAND ${CONFIGURE_ENV} && ${MESON} setup --prefix ${LIBDIR}/epoxy --default-library ${EPOXY_LIB_TYPE} --libdir lib ${BUILD_DIR}/epoxy/src/external_epoxy-build ${BUILD_DIR}/epoxy/src/external_epoxy -Dtests=false
CONFIGURE_COMMAND ${CONFIGURE_ENV} && ${MESON} setup --prefix ${LIBDIR}/epoxy --default-library ${EPOXY_LIB_TYPE} --libdir lib ${BUILD_DIR}/epoxy/src/external_epoxy-build ${BUILD_DIR}/epoxy/src/external_epoxy -Dtests=false ${MESON_BUILD_TYPE}
BUILD_COMMAND ninja
INSTALL_COMMAND ninja install
)

View File

@@ -9,7 +9,7 @@ ExternalProject_Add(external_fribidi
URL_HASH ${FRIBIDI_HASH_TYPE}=${FRIBIDI_HASH}
DOWNLOAD_DIR ${DOWNLOAD_DIR}
PREFIX ${BUILD_DIR}/fribidi
CONFIGURE_COMMAND ${MESON} setup --prefix ${LIBDIR}/fribidi -Ddocs=false --default-library static --libdir lib ${BUILD_DIR}/fribidi/src/external_fribidi-build ${BUILD_DIR}/fribidi/src/external_fribidi
CONFIGURE_COMMAND ${MESON} setup --prefix ${LIBDIR}/fribidi ${MESON_BUILD_TYPE} -Ddocs=false --default-library static --libdir lib ${BUILD_DIR}/fribidi/src/external_fribidi-build ${BUILD_DIR}/fribidi/src/external_fribidi
BUILD_COMMAND ninja
INSTALL_COMMAND ninja install
INSTALL_DIR ${LIBDIR}/fribidi

View File

@@ -22,7 +22,7 @@ elseif(UNIX AND NOT APPLE)
)
endif()
# Boolean crashes with Arm assembly, see T103423.
# Boolean crashes with Arm assembly, see #103423.
if(BLENDER_PLATFORM_ARM)
set(GMP_OPTIONS
${GMP_OPTIONS}

View File

@@ -21,6 +21,7 @@ set(HARFBUZZ_EXTRA_OPTIONS
# Only used for command line utilities,
# disable as this would add an addition & unnecessary build-dependency.
-Dcairo=disabled
${MESON_BUILD_TYPE}
)
ExternalProject_Add(external_harfbuzz
@@ -59,3 +60,10 @@ if(BUILD_MODE STREQUAL Release AND WIN32)
DEPENDEES install
)
endif()
if(BUILD_MODE STREQUAL Debug AND WIN32)
ExternalProject_Add_Step(external_harfbuzz after_install
COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/harfbuzz/lib/libharfbuzz.a ${HARVEST_TARGET}/harfbuzz/lib/libharfbuzz_d.lib
DEPENDEES install
)
endif()

View File

@@ -40,7 +40,8 @@ ExternalProject_Add(external_igc_llvm
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0004-OpenCL-support-cl_ext_float_atomics.patch &&
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0005-OpenCL-Add-cl_khr_integer_dot_product.patch &&
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0001-Memory-leak-fix-for-Managed-Static-Mutex.patch &&
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0002-Remove-repo-name-in-LLVM-IR.patch
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0002-Remove-repo-name-in-LLVM-IR.patch &&
${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0003-Add-missing-include-limit-in-benchmark.patch
)
add_dependencies(
external_igc_llvm
@@ -55,9 +56,6 @@ ExternalProject_Add(external_igc_spirv_translator
CONFIGURE_COMMAND echo .
BUILD_COMMAND echo .
INSTALL_COMMAND echo .
PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${IGC_SPIRV_TRANSLATOR_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/spirv/0001-update-SPIR-V-headers-for-SPV_INTEL_split_barrier.patch &&
${PATCH_CMD} -p 1 -d ${IGC_SPIRV_TRANSLATOR_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/spirv/0002-Add-support-for-split-barriers-extension-SPV_INTEL_s.patch &&
${PATCH_CMD} -p 1 -d ${IGC_SPIRV_TRANSLATOR_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/spirv/0003-Support-cl_bf16_conversions.patch
)
add_dependencies(
external_igc_spirv_translator

View File

@@ -42,7 +42,7 @@ endif()
# LLVM does not switch over to cpp17 until llvm 16 and building ealier versions with
# MSVC is leading to some crashes in ISPC. Switch back to their default on all platforms
# for now.
# for now.
string(REPLACE "-DCMAKE_CXX_STANDARD=17" " " LLVM_CMAKE_FLAGS "${DEFAULT_CMAKE_FLAGS}")
# short project name due to long filename issues on windows

View File

@@ -15,7 +15,7 @@ llvm-config = '${LIBDIR}/llvm/bin/llvm-config'"
)
set(MESA_EXTRA_FLAGS
-Dbuildtype=release
${MESON_BUILD_TYPE}
-Dc_args=${MESA_CFLAGS}
-Dcpp_args=${MESA_CXXFLAGS}
-Dc_link_args=${MESA_LDFLAGS}

View File

@@ -16,8 +16,10 @@ message("BuildMode = ${BUILD_MODE}")
if(BUILD_MODE STREQUAL "Debug")
set(LIBDIR ${CMAKE_CURRENT_BINARY_DIR}/Debug)
set(MESON_BUILD_TYPE -Dbuildtype=debug)
else()
set(LIBDIR ${CMAKE_CURRENT_BINARY_DIR}/Release)
set(MESON_BUILD_TYPE -Dbuildtype=release)
endif()
set(DOWNLOAD_DIR "${CMAKE_CURRENT_BINARY_DIR}/downloads" CACHE STRING "Path for downloaded files")

View File

@@ -88,6 +88,19 @@ else()
export LDFLAGS=${PYTHON_LDFLAGS} &&
export PKG_CONFIG_PATH=${LIBDIR}/ffi/lib/pkgconfig)
# NOTE: untested on APPLE so far.
if(NOT APPLE)
set(PYTHON_CONFIGURE_EXTRA_ARGS
${PYTHON_CONFIGURE_EXTRA_ARGS}
# Used on most release Linux builds (Fedora for e.g.),
# increases build times noticeably with the benefit of a modest speedup at runtime.
--enable-optimizations
# While LTO is OK when building on the same system, it's incompatible across GCC versions,
# making it impractical for developers to build against, so keep it disabled.
# `--with-lto`
)
endif()
ExternalProject_Add(external_python
URL file://${PACKAGE_DIR}/${PYTHON_FILE}
DOWNLOAD_DIR ${DOWNLOAD_DIR}

View File

@@ -10,9 +10,9 @@ if(WIN32)
DOWNLOAD_DIR ${DOWNLOAD_DIR}
URL_HASH ${SSL_HASH_TYPE}=${SSL_HASH}
PREFIX ${BUILD_DIR}/ssl
CONFIGURE_COMMAND echo "."
BUILD_COMMAND echo "."
INSTALL_COMMAND echo "."
CONFIGURE_COMMAND echo "."
BUILD_COMMAND echo "."
INSTALL_COMMAND echo "."
INSTALL_DIR ${LIBDIR}/ssl
)
else()
@@ -46,4 +46,4 @@ else()
INSTALL_COMMAND ${CONFIGURE_ENV} && cd ${BUILD_DIR}/ssl/src/external_ssl/ && make install
INSTALL_DIR ${LIBDIR}/ssl
)
endif()
endif()

View File

@@ -29,7 +29,7 @@ elseif(UNIX)
set(USD_PLATFORM_FLAGS
-DPYTHON_INCLUDE_DIR=${LIBDIR}/python/include/python${PYTHON_SHORT_VERSION}/
-DPYTHON_LIBRARY=${LIBDIR}/tbb/lib/${LIBPREFIX}${TBB_LIBRARY}${SHAREDLIBEXT}
)
)
if(APPLE)
set(USD_SHARED_LINKER_FLAGS "-Xlinker -undefined -Xlinker dynamic_lookup")

View File

@@ -668,9 +668,9 @@ set(SPIRV_HEADERS_FILE SPIR-V-Headers-${SPIRV_HEADERS_VERSION}.tar.gz)
# compiler, the versions used are taken from the following location
# https://github.com/intel/intel-graphics-compiler/releases
set(IGC_VERSION 1.0.12149.1)
set(IGC_VERSION 1.0.13064.7)
set(IGC_URI https://github.com/intel/intel-graphics-compiler/archive/refs/tags/igc-${IGC_VERSION}.tar.gz)
set(IGC_HASH 44f67f24e3bc5130f9f062533abf8154782a9d0a992bc19b498639a8521ae836)
set(IGC_HASH a929abd4cca2b293961ec0437ee4b3b2147bd3b2c8a3c423af78c0c359b2e5ae)
set(IGC_HASH_TYPE SHA256)
set(IGC_FILE igc-${IGC_VERSION}.tar.gz)
@@ -690,15 +690,15 @@ set(IGC_LLVM_FILE ${IGC_LLVM_VERSION}.tar.gz)
#
# WARNING WARNING WARNING
set(IGC_OPENCL_CLANG_VERSION 363a5262d8c7cff3fb28f3bdb5d85c8d7e91c1bb)
set(IGC_OPENCL_CLANG_VERSION ee31812ea8b89d08c2918f045d11a19bd33525c5)
set(IGC_OPENCL_CLANG_URI https://github.com/intel/opencl-clang/archive/${IGC_OPENCL_CLANG_VERSION}.tar.gz)
set(IGC_OPENCL_CLANG_HASH aa8cf72bb239722ce8ce44f79413c6887ecc8ca18477dd520aa5c4809756da9a)
set(IGC_OPENCL_CLANG_HASH 1db6735bbcfaa31e8a9ba39f121d6bafa806ea8919e9f56782d6aaa67771ddda)
set(IGC_OPENCL_CLANG_HASH_TYPE SHA256)
set(IGC_OPENCL_CLANG_FILE opencl-clang-${IGC_OPENCL_CLANG_VERSION}.tar.gz)
set(IGC_VCINTRINSICS_VERSION v0.5.0)
set(IGC_VCINTRINSICS_VERSION v0.11.0)
set(IGC_VCINTRINSICS_URI https://github.com/intel/vc-intrinsics/archive/refs/tags/${IGC_VCINTRINSICS_VERSION}.tar.gz)
set(IGC_VCINTRINSICS_HASH 70bb47c5e32173cf61514941e83ae7c7eb4485e6d2fca60cfa1f50d4f42c41f2)
set(IGC_VCINTRINSICS_HASH e5acd5626ce7fa6d41ce154c50ac805eda734ee66af94ef28e680ac2ad81bb9f)
set(IGC_VCINTRINSICS_HASH_TYPE SHA256)
set(IGC_VCINTRINSICS_FILE vc-intrinsics-${IGC_VCINTRINSICS_VERSION}.tar.gz)
@@ -714,9 +714,9 @@ set(IGC_SPIRV_TOOLS_HASH 6e19900e948944243024aedd0a201baf3854b377b9cc7a386553bc1
set(IGC_SPIRV_TOOLS_HASH_TYPE SHA256)
set(IGC_SPIRV_TOOLS_FILE SPIR-V-Tools-${IGC_SPIRV_TOOLS_VERSION}.tar.gz)
set(IGC_SPIRV_TRANSLATOR_VERSION a31ffaeef77e23d500b3ea3d35e0c42ff5648ad9)
set(IGC_SPIRV_TRANSLATOR_VERSION d739c01d65ec00dee64dedd40deed805216a7193)
set(IGC_SPIRV_TRANSLATOR_URI https://github.com/KhronosGroup/SPIRV-LLVM-Translator/archive/${IGC_SPIRV_TRANSLATOR_VERSION}.tar.gz)
set(IGC_SPIRV_TRANSLATOR_HASH 9e26c96a45341b8f8af521bacea20e752623346340addd02af95d669f6e89252)
set(IGC_SPIRV_TRANSLATOR_HASH ddc0cc9ccbe59dadeaf291012d59de142b2e9f2b124dbb634644d39daddaa13e)
set(IGC_SPIRV_TRANSLATOR_HASH_TYPE SHA256)
set(IGC_SPIRV_TRANSLATOR_FILE SPIR-V-Translator-${IGC_SPIRV_TRANSLATOR_VERSION}.tar.gz)
@@ -724,15 +724,15 @@ set(IGC_SPIRV_TRANSLATOR_FILE SPIR-V-Translator-${IGC_SPIRV_TRANSLATOR_VERSION}.
### Intel Graphics Compiler DEPS END ###
########################################
set(GMMLIB_VERSION intel-gmmlib-22.1.8)
set(GMMLIB_VERSION intel-gmmlib-22.3.0)
set(GMMLIB_URI https://github.com/intel/gmmlib/archive/refs/tags/${GMMLIB_VERSION}.tar.gz)
set(GMMLIB_HASH bf23e9a3742b4fb98c7666c9e9b29f3219e4b2fb4d831aaf4eed71f5e2d17368)
set(GMMLIB_HASH c1f33e1519edfc527127baeb0436b783430dfd256c643130169a3a71dc86aff9)
set(GMMLIB_HASH_TYPE SHA256)
set(GMMLIB_FILE ${GMMLIB_VERSION}.tar.gz)
set(OCLOC_VERSION 22.38.24278)
set(OCLOC_VERSION 22.49.25018.21)
set(OCLOC_URI https://github.com/intel/compute-runtime/archive/refs/tags/${OCLOC_VERSION}.tar.gz)
set(OCLOC_HASH db0c542fccd651e6404b15a74d46027f1ce0eda8dc9e25a40cbb6c0faef257ee)
set(OCLOC_HASH 92362dae08b503a34e5d3820ed284198c452bcd5e7504d90eb69887b20492c06)
set(OCLOC_HASH_TYPE SHA256)
set(OCLOC_FILE ocloc-${OCLOC_VERSION}.tar.gz)

View File

@@ -13,7 +13,7 @@ ExternalProject_Add(external_wayland
# NOTE: `-lm` is needed for `libxml2` which is a static library that uses `libm.so`,
# without this, math symbols such as `floor` aren't found.
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env PKG_CONFIG_PATH=${LIBDIR}/expat/lib/pkgconfig:${LIBDIR}/xml2/lib/pkgconfig:${LIBDIR}/ffi/lib/pkgconfig:$PKG_CONFIG_PATH
${MESON} --prefix ${LIBDIR}/wayland -Ddocumentation=false -Dtests=false -D "c_link_args=-L${LIBDIR}/ffi/lib -lm" . ../external_wayland
${MESON} --prefix ${LIBDIR}/wayland ${MESON_BUILD_TYPE} -Ddocumentation=false -Dtests=false -D "c_link_args=-L${LIBDIR}/ffi/lib -lm" . ../external_wayland
BUILD_COMMAND ninja
INSTALL_COMMAND ninja install
)

View File

@@ -7,7 +7,7 @@ ExternalProject_Add(external_wayland_protocols
PREFIX ${BUILD_DIR}/wayland-protocols
# Use `-E` so the `PKG_CONFIG_PATH` can be defined to link against our own WAYLAND.
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env PKG_CONFIG_PATH=${LIBDIR}/wayland/lib64/pkgconfig:$PKG_CONFIG_PATH
${MESON} --prefix ${LIBDIR}/wayland-protocols . ../external_wayland_protocols -Dtests=false
${MESON} --prefix ${LIBDIR}/wayland-protocols ${MESON_BUILD_TYPE} . ../external_wayland_protocols -Dtests=false
BUILD_COMMAND ninja
INSTALL_COMMAND ninja install
)

View File

@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-or-later
if(WIN32)
set(XML2_EXTRA_ARGS
set(XML2_EXTRA_ARGS
-DLIBXML2_WITH_ZLIB=OFF
-DLIBXML2_WITH_LZMA=OFF
-DLIBXML2_WITH_PYTHON=OFF

View File

@@ -0,0 +1,74 @@
#!/usr/bin/env bash
# SPDX-License-Identifier: GPL-2.0-or-later
# This script ensures:
# - One dependency is built at a time.
# - That dependency uses all available cores.
#
# Without this, simply calling `make -j$(nproc)` from the `${CMAKE_BUILD_DIR}/deps/`
# directory will build many projects at once.
#
# This is undesirable for the following reasons:
#
# - The output from projects is mixed together,
# making it difficult to track down the cause of a build failure.
#
# - Larger dependencies such as LLVM can bottleneck the build process,
# making it necessary to cancel the build and manually run build commands in each directory.
#
# - Building many projects at once means canceling (Control-C) can lead to the build being in an undefined state.
# It's possible canceling happens as a patch is being applied or files are being copied.
# (steps that aren't part of the compilation process where it's typically safe to cancel).
if [[ -z "$MY_MAKE_CALL_LEVEL" ]]; then
export MY_MAKE_CALL_LEVEL=0
export MY_MAKEFLAGS=$MAKEFLAGS
# Extract the jobs argument (`-jN`, `-j N`, `--jobs=N`).
add_next=0
for i in "$@"; do
case $i in
-j*)
export MY_JOBS_ARG=$i
if [ "$MY_JOBS_ARG" = "-j" ]; then
add_next=1
fi
;;
--jobs=*)
shift # past argument=value
MY_JOBS_ARG=$i
;;
*)
if (( add_next == 1 )); then
MY_JOBS_ARG="$MY_JOBS_ARG $i"
add_next=0
fi
;;
esac
done
unset i add_next
if [[ -z "$MY_JOBS_ARG" ]]; then
MY_JOBS_ARG="-j$(nproc)"
fi
export MY_JOBS_ARG
# Support user defined `MAKEFLAGS`.
export MAKEFLAGS="$MY_MAKEFLAGS -j1"
else
export MY_MAKE_CALL_LEVEL=$(( MY_MAKE_CALL_LEVEL + 1 ))
if (( MY_MAKE_CALL_LEVEL == 1 )); then
# Important to set jobs to 1, otherwise user defined jobs argument is used.
export MAKEFLAGS="$MY_MAKEFLAGS -j1"
elif (( MY_MAKE_CALL_LEVEL == 2 )); then
# This is the level used by each sub-project.
export MAKEFLAGS="$MY_MAKEFLAGS $MY_JOBS_ARG"
fi
# Else leave `MY_MAKEFLAGS` flags as-is, avoids setting a high number of jobs on recursive
# calls (which may easily run out of memory). Let the job-server handle the rest.
fi
# Useful for troubleshooting the wrapper.
# echo "Call level: $MY_MAKE_CALL_LEVEL, args=$@".
# Call actual make but ensure recursive calls run via this script.
exec make MAKE="$0" "$@"

View File

@@ -1,7 +1,7 @@
diff -Naur external_igc_opencl_clang.orig/CMakeLists.txt external_igc_opencl_clang/CMakeLists.txt
--- external_igc_opencl_clang.orig/CMakeLists.txt 2022-03-16 05:51:10 -0600
+++ external_igc_opencl_clang/CMakeLists.txt 2022-05-23 10:40:09 -0600
@@ -126,22 +126,24 @@
@@ -147,22 +147,24 @@
)
endif()

View File

@@ -19,9 +19,13 @@ ENDIF()
SET(_moltenvk_SEARCH_DIRS
${MOLTENVK_ROOT_DIR}
${LIBDIR}/vulkan/MoltenVK
)
# FIXME: These finder modules typically don't use LIBDIR,
# this should be set by `./build_files/cmake/platform/` instead.
IF(DEFINED LIBDIR)
SET(_moltenvk_SEARCH_DIRS ${_moltenvk_SEARCH_DIRS} ${LIBDIR}/moltenvk)
ENDIF()
FIND_PATH(MOLTENVK_INCLUDE_DIR
NAMES

View File

@@ -17,9 +17,13 @@ ENDIF()
SET(_optix_SEARCH_DIRS
${OPTIX_ROOT_DIR}
"$ENV{PROGRAMDATA}/NVIDIA Corporation/OptiX SDK 7.3.0"
)
# TODO: Which environment uses this?
if(DEFINED ENV{PROGRAMDATA})
list(APPEND _optix_SEARCH_DIRS "$ENV{PROGRAMDATA}/NVIDIA Corporation/OptiX SDK 7.3.0")
endif()
FIND_PATH(OPTIX_INCLUDE_DIR
NAMES
optix.h

View File

@@ -67,6 +67,8 @@ ENDIF()
STRING(REPLACE "." "" PYTHON_VERSION_NO_DOTS ${PYTHON_VERSION})
SET(_PYTHON_ABI_FLAGS "")
SET(_python_SEARCH_DIRS
${PYTHON_ROOT_DIR}
"$ENV{HOME}/py${PYTHON_VERSION_NO_DOTS}"

View File

@@ -0,0 +1,63 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright 2023 Blender Foundation.
# - Find ShaderC libraries
# Find the ShaderC includes and libraries
# This module defines
# SHADERC_INCLUDE_DIRS, where to find MoltenVK headers, Set when
# SHADERC_INCLUDE_DIR is found.
# SHADERC_LIBRARIES, libraries to link against to use ShaderC.
# SHADERC_ROOT_DIR, The base directory to search for ShaderC.
# This can also be an environment variable.
# SHADERC_FOUND, If false, do not try to use ShaderC.
#
# If SHADERC_ROOT_DIR was defined in the environment, use it.
IF(NOT SHADERC_ROOT_DIR AND NOT $ENV{SHADERC_ROOT_DIR} STREQUAL "")
SET(SHADERC_ROOT_DIR $ENV{SHADERC_ROOT_DIR})
ENDIF()
SET(_shaderc_SEARCH_DIRS
${SHADERC_ROOT_DIR}
)
# FIXME: These finder modules typically don't use LIBDIR,
# this should be set by `./build_files/cmake/platform/` instead.
IF(DEFINED LIBDIR)
SET(_shaderc_SEARCH_DIRS ${_shaderc_SEARCH_DIRS} ${LIBDIR}/shaderc)
ENDIF()
FIND_PATH(SHADERC_INCLUDE_DIR
NAMES
shaderc/shaderc.h
HINTS
${_shaderc_SEARCH_DIRS}
PATH_SUFFIXES
include
)
FIND_LIBRARY(SHADERC_LIBRARY
NAMES
shaderc_combined
HINTS
${_shaderc_SEARCH_DIRS}
PATH_SUFFIXES
lib
)
# handle the QUIETLY and REQUIRED arguments and set SHADERC_FOUND to TRUE if
# all listed variables are TRUE
INCLUDE(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(ShaderC DEFAULT_MSG SHADERC_LIBRARY SHADERC_INCLUDE_DIR)
IF(SHADERC_FOUND)
SET(SHADERC_LIBRARIES ${SHADERC_LIBRARY})
SET(SHADERC_INCLUDE_DIRS ${SHADERC_INCLUDE_DIR})
ENDIF()
MARK_AS_ADVANCED(
SHADERC_INCLUDE_DIR
SHADERC_LIBRARY
)
UNSET(_shaderc_SEARCH_DIRS)

View File

@@ -0,0 +1,63 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright 2023 Blender Foundation.
# - Find Vulkan libraries
# Find the Vulkan includes and libraries
# This module defines
# VULKAN_INCLUDE_DIRS, where to find Vulkan headers, Set when
# VULKAN_INCLUDE_DIR is found.
# VULKAN_LIBRARIES, libraries to link against to use Vulkan.
# VULKAN_ROOT_DIR, The base directory to search for Vulkan.
# This can also be an environment variable.
# VULKAN_FOUND, If false, do not try to use Vulkan.
#
# If VULKAN_ROOT_DIR was defined in the environment, use it.
IF(NOT VULKAN_ROOT_DIR AND NOT $ENV{VULKAN_ROOT_DIR} STREQUAL "")
SET(VULKAN_ROOT_DIR $ENV{VULKAN_ROOT_DIR})
ENDIF()
SET(_vulkan_SEARCH_DIRS
${VULKAN_ROOT_DIR}
)
# FIXME: These finder modules typically don't use LIBDIR,
# this should be set by `./build_files/cmake/platform/` instead.
IF(DEFINED LIBDIR)
SET(_vulkan_SEARCH_DIRS ${_vulkan_SEARCH_DIRS} ${LIBDIR}/vulkan)
ENDIF()
FIND_PATH(VULKAN_INCLUDE_DIR
NAMES
vulkan/vulkan.h
HINTS
${_vulkan_SEARCH_DIRS}
PATH_SUFFIXES
include
)
FIND_LIBRARY(VULKAN_LIBRARY
NAMES
vulkan
HINTS
${_vulkan_SEARCH_DIRS}
PATH_SUFFIXES
lib
)
# handle the QUIETLY and REQUIRED arguments and set VULKAN_FOUND to TRUE if
# all listed variables are TRUE
INCLUDE(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(Vulkan DEFAULT_MSG VULKAN_LIBRARY VULKAN_INCLUDE_DIR)
IF(VULKAN_FOUND)
SET(VULKAN_LIBRARIES ${VULKAN_LIBRARY})
SET(VULKAN_INCLUDE_DIRS ${VULKAN_INCLUDE_DIR})
ENDIF()
MARK_AS_ADVANCED(
VULKAN_INCLUDE_DIR
VULKAN_LIBRARY
)
UNSET(_vulkan_SEARCH_DIRS)

View File

@@ -23,19 +23,19 @@ if(EXISTS ${SOURCE_DIR}/.git)
if(MY_WC_BRANCH STREQUAL "HEAD")
# Detached HEAD, check whether commit hash is reachable
# in the master branch
# in the main branch
execute_process(COMMAND git rev-parse --short=12 HEAD
WORKING_DIRECTORY ${SOURCE_DIR}
OUTPUT_VARIABLE MY_WC_HASH
OUTPUT_STRIP_TRAILING_WHITESPACE)
execute_process(COMMAND git branch --list master blender-v* --contains ${MY_WC_HASH}
execute_process(COMMAND git branch --list main blender-v* --contains ${MY_WC_HASH}
WORKING_DIRECTORY ${SOURCE_DIR}
OUTPUT_VARIABLE _git_contains_check
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT _git_contains_check STREQUAL "")
set(MY_WC_BRANCH "master")
set(MY_WC_BRANCH "main")
else()
execute_process(COMMAND git show-ref --tags -d
WORKING_DIRECTORY ${SOURCE_DIR}
@@ -48,7 +48,7 @@ if(EXISTS ${SOURCE_DIR}/.git)
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(_git_tag_hashes MATCHES "${_git_head_hash}")
set(MY_WC_BRANCH "master")
set(MY_WC_BRANCH "main")
else()
execute_process(COMMAND git branch --contains ${MY_WC_HASH}
WORKING_DIRECTORY ${SOURCE_DIR}

View File

@@ -6,18 +6,80 @@
import re
import sys
from typing import Optional
cmakelists_file = sys.argv[-1]
def main():
def count_backslashes_before_pos(file_data: str, pos: int) -> int:
slash_count = 0
pos -= 1
while pos >= 0:
if file_data[pos] != '\\':
break
pos -= 1
slash_count += 1
return slash_count
def extract_cmake_string_at_pos(file_data: str, pos_beg: int) -> Optional[str]:
assert file_data[pos_beg - 1] == '"'
pos = pos_beg
# Dummy assignment.
pos_end = pos_beg
while True:
pos_next = file_data.find('"', pos)
if pos_next == -1:
raise Exception("Un-terminated string (parse error?)")
count_slashes = count_backslashes_before_pos(file_data, pos_next)
if (count_slashes % 2) == 0:
pos_end = pos_next
# Found the closing quote.
break
# The quote was back-slash escaped, step over it.
pos = pos_next + 1
file_data[pos_next]
assert file_data[pos_end] == '"'
if pos_beg == pos_end:
return None
# See: https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#escape-sequences
text = file_data[pos_beg: pos_end].replace(
# Handle back-slash literals.
"\\\\", "\\",
).replace(
# Handle tabs.
"\\t", "\t",
).replace(
# Handle escaped quotes.
"\\\"", "\"",
).replace(
# Handle tabs.
"\\;", ";",
).replace(
# Handle trailing newlines.
"\\\n", "",
)
return text
def main() -> None:
options = []
for l in open(cmakelists_file, 'r').readlines():
if not l.lstrip().startswith('#'):
l_option = re.sub(r'.*\boption\s*\(\s*(WITH_[a-zA-Z0-9_]+)\s+\"(.*)\"\s*.*', r'\g<1> - \g<2>', l)
if l_option != l:
l_option = l_option.strip()
if l_option.startswith('WITH_'):
options.append(l_option)
with open(cmakelists_file, 'r', encoding="utf-8") as fh:
file_data = fh.read()
for m in re.finditer(r"^\s*option\s*\(\s*(WITH_[a-zA-Z0-9_]+)\s+(\")", file_data, re.MULTILINE):
option_name = m.group(1)
option_descr = extract_cmake_string_at_pos(file_data, m.span(2)[1])
if option_descr is None:
# Possibly a parsing error, at least show something.
option_descr = "(UNDOCUMENTED)"
options.append("{:s}: {:s}".format(option_name, option_descr))
print('\n'.join(options))

View File

@@ -85,7 +85,7 @@ if(NOT APPLE)
set(WITH_CYCLES_DEVICE_OPTIX ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE)
set(WITH_CYCLES_HIP_BINARIES ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_HIP_BINARIES OFF CACHE BOOL "" FORCE)
set(WITH_CYCLES_DEVICE_ONEAPI ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_ONEAPI_BINARIES ON CACHE BOOL "" FORCE)
endif()

View File

@@ -11,11 +11,11 @@
mkdir ~/blender-git
cd ~/blender-git
git clone http://git.blender.org/blender.git
git clone https://projects.blender.org/blender/blender.git
cd blender
git submodule update --init --recursive
git submodule foreach git checkout master
git submodule foreach git pull --rebase origin master
git submodule foreach git checkout main
git submodule foreach git pull --rebase origin main
# create build dir
mkdir ~/blender-git/build-cmake
@@ -35,7 +35,7 @@ ln -s ~/blender-git/build-cmake/bin/blender ~/blender-git/blender/blender.bin
echo ""
echo "* Useful Commands *"
echo " Run Blender: ~/blender-git/blender/blender.bin"
echo " Update Blender: git pull --rebase; git submodule foreach git pull --rebase origin master"
echo " Update Blender: git pull --rebase; git submodule foreach git pull --rebase origin main"
echo " Reconfigure Blender: cd ~/blender-git/build-cmake ; cmake ."
echo " Build Blender: cd ~/blender-git/build-cmake ; make"
echo ""

View File

@@ -544,13 +544,15 @@ endfunction()
function(setup_platform_linker_libs
target
)
# jemalloc must be early in the list, to be before pthread (see T57998)
# jemalloc must be early in the list, to be before pthread (see #57998).
if(WITH_MEM_JEMALLOC)
target_link_libraries(${target} ${JEMALLOC_LIBRARIES})
endif()
if(WIN32 AND NOT UNIX)
target_link_libraries(${target} ${PTHREADS_LIBRARIES})
if(DEFINED PTHREADS_LIBRARIES)
target_link_libraries(${target} ${PTHREADS_LIBRARIES})
endif()
endif()
# target_link_libraries(${target} ${PLATFORM_LINKLIBS} ${CMAKE_DL_LIBS})
@@ -1115,7 +1117,7 @@ function(find_python_package
# endif()
# Not set, so initialize.
else()
string(REPLACE "." ";" _PY_VER_SPLIT "${PYTHON_VERSION}")
string(REPLACE "." ";" _PY_VER_SPLIT "${PYTHON_VERSION}")
list(GET _PY_VER_SPLIT 0 _PY_VER_MAJOR)
# re-cache
@@ -1262,7 +1264,7 @@ endmacro()
# Utility to gather and install precompiled shared libraries.
macro(add_bundled_libraries library_dir)
if(EXISTS ${LIBDIR})
if(DEFINED LIBDIR)
set(_library_dir ${LIBDIR}/${library_dir})
if(WIN32)
file(GLOB _all_library_versions ${_library_dir}/*\.dll)
@@ -1275,7 +1277,7 @@ macro(add_bundled_libraries library_dir)
list(APPEND PLATFORM_BUNDLED_LIBRARY_DIRS ${_library_dir})
unset(_all_library_versions)
unset(_library_dir)
endif()
endif()
endmacro()
macro(windows_install_shared_manifest)

View File

@@ -97,20 +97,8 @@ add_bundled_libraries(materialx/lib)
if(WITH_VULKAN_BACKEND)
find_package(MoltenVK REQUIRED)
if(EXISTS ${LIBDIR}/vulkan)
set(VULKAN_FOUND On)
set(VULKAN_ROOT_DIR ${LIBDIR}/vulkan/macOS)
set(VULKAN_INCLUDE_DIR ${VULKAN_ROOT_DIR}/include)
set(VULKAN_LIBRARY ${VULKAN_ROOT_DIR}/lib/libvulkan.1.dylib)
set(SHADERC_LIBRARY ${VULKAN_ROOT_DIR}/lib/libshaderc_combined.a)
set(VULKAN_INCLUDE_DIRS ${VULKAN_INCLUDE_DIR} ${MOLTENVK_INCLUDE_DIRS})
set(VULKAN_LIBRARIES ${VULKAN_LIBRARY} ${SHADERC_LIBRARY} ${MOLTENVK_LIBRARIES})
else()
message(WARNING "Vulkan SDK was not found, disabling WITH_VULKAN_BACKEND")
set(WITH_VULKAN_BACKEND OFF)
endif()
find_package(ShaderC REQUIRED)
find_package(Vulkan REQUIRED)
endif()
if(WITH_OPENSUBDIV)
@@ -452,7 +440,7 @@ string(APPEND PLATFORM_LINKFLAGS " -stdlib=libc++")
# Make stack size more similar to Embree, required for Embree.
string(APPEND PLATFORM_LINKFLAGS_EXECUTABLE " -Wl,-stack_size,0x100000")
# Suppress ranlib "has no symbols" warnings (workaround for T48250)
# Suppress ranlib "has no symbols" warnings (workaround for #48250).
set(CMAKE_C_ARCHIVE_CREATE "<CMAKE_AR> Scr <TARGET> <LINK_FLAGS> <OBJECTS>")
set(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> Scr <TARGET> <LINK_FLAGS> <OBJECTS>")
# llvm-ranlib doesn't support this flag. Xcode's libtool does.

View File

@@ -1,7 +1,12 @@
# SPDX-License-Identifier: GPL-2.0-or-later
# Copyright 2022 Blender Foundation. All rights reserved.
# Auto update existing CMake caches for new libraries
# Auto update existing CMake caches for new libraries.
# Assert that `LIBDIR` is defined.
if(NOT (DEFINED LIBDIR))
message(FATAL_ERROR "Logical error, expected 'LIBDIR' to be defined!")
endif()
# Clear cached variables whose name matches `pattern`.
function(unset_cache_variables pattern)

View File

@@ -4,38 +4,52 @@
# Libraries configuration for any *nix system including Linux and Unix (excluding APPLE).
# Detect precompiled library directory
if(NOT DEFINED LIBDIR)
# Path to a locally compiled libraries.
set(LIBDIR_NAME ${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR})
string(TOLOWER ${LIBDIR_NAME} LIBDIR_NAME)
set(LIBDIR_NATIVE_ABI ${CMAKE_SOURCE_DIR}/../lib/${LIBDIR_NAME})
# Path to precompiled libraries with known glibc 2.28 ABI.
set(LIBDIR_GLIBC228_ABI ${CMAKE_SOURCE_DIR}/../lib/linux_x86_64_glibc_228)
if(NOT WITH_LIBS_PRECOMPILED)
unset(LIBDIR)
else()
if(NOT DEFINED LIBDIR)
# Path to a locally compiled libraries.
set(LIBDIR_NAME ${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR})
string(TOLOWER ${LIBDIR_NAME} LIBDIR_NAME)
set(LIBDIR_NATIVE_ABI ${CMAKE_SOURCE_DIR}/../lib/${LIBDIR_NAME})
# Choose the best suitable libraries.
if(EXISTS ${LIBDIR_NATIVE_ABI})
set(LIBDIR ${LIBDIR_NATIVE_ABI})
set(WITH_LIBC_MALLOC_HOOK_WORKAROUND True)
elseif(EXISTS ${LIBDIR_GLIBC228_ABI})
set(LIBDIR ${LIBDIR_GLIBC228_ABI})
if(WITH_MEM_JEMALLOC)
# jemalloc provides malloc hooks.
set(WITH_LIBC_MALLOC_HOOK_WORKAROUND False)
else()
# Path to precompiled libraries with known glibc 2.28 ABI.
set(LIBDIR_GLIBC228_ABI ${CMAKE_SOURCE_DIR}/../lib/linux_x86_64_glibc_228)
# Choose the best suitable libraries.
if(EXISTS ${LIBDIR_NATIVE_ABI})
set(LIBDIR ${LIBDIR_NATIVE_ABI})
set(WITH_LIBC_MALLOC_HOOK_WORKAROUND True)
elseif(EXISTS ${LIBDIR_GLIBC228_ABI})
set(LIBDIR ${LIBDIR_GLIBC228_ABI})
if(WITH_MEM_JEMALLOC)
# jemalloc provides malloc hooks.
set(WITH_LIBC_MALLOC_HOOK_WORKAROUND False)
else()
set(WITH_LIBC_MALLOC_HOOK_WORKAROUND True)
endif()
endif()
# Avoid namespace pollustion.
unset(LIBDIR_NATIVE_ABI)
unset(LIBDIR_GLIBC228_ABI)
endif()
# Avoid namespace pollustion.
unset(LIBDIR_NATIVE_ABI)
unset(LIBDIR_GLIBC228_ABI)
if(NOT (EXISTS ${LIBDIR}))
message(STATUS
"Unable to find LIBDIR: ${LIBDIR}, system libraries may be used "
"(disable WITH_LIBS_PRECOMPILED to suppress this message)."
)
unset(LIBDIR)
endif()
endif()
# Support restoring this value once pre-compiled libraries have been handled.
set(WITH_STATIC_LIBS_INIT ${WITH_STATIC_LIBS})
if(EXISTS ${LIBDIR})
if(DEFINED LIBDIR)
message(STATUS "Using pre-compiled LIBDIR: ${LIBDIR}")
file(GLOB LIB_SUBDIRS ${LIBDIR}/*)
@@ -85,7 +99,7 @@ endmacro()
# These are libraries that may be precompiled. For this we disable searching in
# the system directories so that we don't accidentally use them instead.
if(EXISTS ${LIBDIR})
if(DEFINED LIBDIR)
without_system_libs_begin()
endif()
@@ -97,6 +111,7 @@ find_package_wrapper(Epoxy REQUIRED)
if(WITH_VULKAN_BACKEND)
find_package_wrapper(Vulkan REQUIRED)
find_package_wrapper(ShaderC REQUIRED)
endif()
function(check_freetype_for_brotli)
@@ -114,7 +129,7 @@ endfunction()
if(NOT WITH_SYSTEM_FREETYPE)
# FreeType compiled with Brotli compression for woff2.
find_package_wrapper(Freetype REQUIRED)
if(EXISTS ${LIBDIR})
if(DEFINED LIBDIR)
find_package_wrapper(Brotli REQUIRED)
# NOTE: This is done on WIN32 & APPLE but fails on some Linux systems.
@@ -141,7 +156,7 @@ if(WITH_PYTHON)
if(WITH_PYTHON_MODULE AND NOT WITH_INSTALL_PORTABLE)
# Installing into `site-packages`, warn when installing into `./../lib/`
# which script authors almost certainly don't want.
if(EXISTS ${LIBDIR})
if(DEFINED LIBDIR)
path_is_prefix(LIBDIR PYTHON_SITE_PACKAGES _is_prefix)
if(_is_prefix)
message(WARNING "
@@ -217,7 +232,7 @@ if(WITH_CODEC_SNDFILE)
endif()
if(WITH_CODEC_FFMPEG)
if(EXISTS ${LIBDIR})
if(DEFINED LIBDIR)
set(FFMPEG_ROOT_DIR ${LIBDIR}/ffmpeg)
# Override FFMPEG components to also include static library dependencies
# included with precompiled libraries, and to ensure correct link order.
@@ -232,7 +247,7 @@ if(WITH_CODEC_FFMPEG)
vpx
x264
xvidcore)
if(EXISTS ${LIBDIR}/ffmpeg/lib/libaom.a)
if((DEFINED LIBDIR) AND (EXISTS ${LIBDIR}/ffmpeg/lib/libaom.a))
list(APPEND FFMPEG_FIND_COMPONENTS aom)
endif()
elseif(FFMPEG)
@@ -430,10 +445,13 @@ if(WITH_OPENIMAGEIO)
${PNG_LIBRARIES}
${JPEG_LIBRARIES}
${ZLIB_LIBRARIES}
${BOOST_LIBRARIES}
)
set(OPENIMAGEIO_DEFINITIONS "")
if(WITH_BOOST)
list(APPEND OPENIMAGEIO_LIBRARIES "${BOOST_LIBRARIES}")
endif()
if(WITH_IMAGE_TIFF)
list(APPEND OPENIMAGEIO_LIBRARIES "${TIFF_LIBRARY}")
endif()
@@ -451,7 +469,7 @@ add_bundled_libraries(openimageio/lib)
if(WITH_OPENCOLORIO)
find_package_wrapper(OpenColorIO 2.0.0)
set(OPENCOLORIO_DEFINITIONS)
set(OPENCOLORIO_DEFINITIONS "")
set_and_warn_library_found("OpenColorIO" OPENCOLORIO_FOUND WITH_OPENCOLORIO)
endif()
add_bundled_libraries(opencolorio/lib)
@@ -466,7 +484,7 @@ if(WITH_OPENIMAGEDENOISE)
endif()
if(WITH_LLVM)
if(EXISTS ${LIBDIR})
if(DEFINED LIBDIR)
set(LLVM_STATIC ON)
endif()
@@ -480,7 +498,7 @@ if(WITH_LLVM)
endif()
# Symbol conflicts with same UTF library used by OpenCollada
if(EXISTS ${LIBDIR})
if(DEFINED LIBDIR)
if(WITH_OPENCOLLADA AND (${LLVM_VERSION} VERSION_LESS "4.0.0"))
list(REMOVE_ITEM OPENCOLLADA_LIBRARIES ${OPENCOLLADA_UTF_LIBRARY})
endif()
@@ -536,7 +554,7 @@ if(WITH_CYCLES AND WITH_CYCLES_PATH_GUIDING)
endif()
endif()
if(EXISTS ${LIBDIR})
if(DEFINED LIBDIR)
without_system_libs_end()
endif()
@@ -551,9 +569,14 @@ else()
endif()
find_package(Threads REQUIRED)
list(APPEND PLATFORM_LINKLIBS ${CMAKE_THREAD_LIBS_INIT})
# used by other platforms
set(PTHREADS_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
# `FindThreads` documentation notes that this may be empty
# with the system libraries provide threading functionality.
if(CMAKE_THREAD_LIBS_INIT)
list(APPEND PLATFORM_LINKLIBS ${CMAKE_THREAD_LIBS_INIT})
# used by other platforms
set(PTHREADS_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
endif()
if(CMAKE_DL_LIBS)
list(APPEND PLATFORM_LINKLIBS ${CMAKE_DL_LIBS})
@@ -575,7 +598,7 @@ add_definitions(-D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE
#
# Keep last, so indirectly linked libraries don't override our own pre-compiled libs.
if(EXISTS ${LIBDIR})
if(DEFINED LIBDIR)
# Clear the prefix path as it causes the `LIBDIR` to override system locations.
unset(CMAKE_PREFIX_PATH)
@@ -631,7 +654,7 @@ if(WITH_GHOST_WAYLAND)
# When dynamically linked WAYLAND is used and `${LIBDIR}/wayland` is present,
# there is no need to search for the libraries as they are not needed for building.
# Only the headers are needed which can reference the known paths.
if(EXISTS "${LIBDIR}/wayland" AND WITH_GHOST_WAYLAND_DYNLOAD)
if((DEFINED LIBDIR) AND (EXISTS "${LIBDIR}/wayland" AND WITH_GHOST_WAYLAND_DYNLOAD))
set(_use_system_wayland OFF)
else()
set(_use_system_wayland ON)
@@ -695,7 +718,7 @@ if(WITH_GHOST_WAYLAND)
add_definitions(-DWITH_GHOST_WAYLAND_LIBDECOR)
endif()
if(EXISTS "${LIBDIR}/wayland/bin/wayland-scanner")
if((DEFINED LIBDIR) AND (EXISTS "${LIBDIR}/wayland/bin/wayland-scanner"))
set(WAYLAND_SCANNER "${LIBDIR}/wayland/bin/wayland-scanner")
else()
pkg_get_variable(WAYLAND_SCANNER wayland-scanner wayland_scanner)

View File

@@ -121,7 +121,7 @@ if(WITH_WINDOWS_BUNDLE_CRT)
include(InstallRequiredSystemLibraries)
# ucrtbase(d).dll cannot be in the manifest, due to the way windows 10 handles
# redirects for this dll, for details see T88813.
# redirects for this dll, for details see #88813.
foreach(lib ${CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS})
string(FIND ${lib} "ucrtbase" pos)
if(NOT pos EQUAL -1)
@@ -295,7 +295,7 @@ unset(MATERIALX_LIB_FOLDER_EXISTS)
if(NOT MSVC_CLANG AND # Available with MSVC 15.7+ but not for CLANG.
NOT WITH_WINDOWS_SCCACHE AND # And not when sccache is enabled
NOT VS_CLANG_TIDY) # Clang-tidy does not like these options
add_compile_options(/experimental:external /external:templates- /external:I "${LIBDIR}" /external:W0)
add_compile_options(/experimental:external /external:I "${LIBDIR}" /external:W0)
endif()
# Add each of our libraries to our cmake_prefix_path so find_package() could work

View File

@@ -5,16 +5,16 @@
update-code:
git:
submodules:
- branch: master
- branch: main
commit_id: HEAD
path: release/scripts/addons
- branch: master
- branch: main
commit_id: HEAD
path: release/scripts/addons_contrib
- branch: master
- branch: main
commit_id: HEAD
path: release/datafiles/locale
- branch: master
- branch: main
commit_id: HEAD
path: source/tools
svn:
@@ -43,6 +43,10 @@ update-code:
branch: trunk
commit_id: HEAD
path: lib/benchmarks
assets:
branch: trunk
commit_id: HEAD
path: lib/assets
#
# Buildbot only configs
@@ -59,7 +63,7 @@ buildbot:
optix:
version: '7.3.0'
ocloc:
version: '101.3430'
version: '101.4032'
cmake:
default:
version: any

View File

@@ -24,7 +24,7 @@ import os
import re
import platform
import string
import setuptools # type: ignore
import setuptools
import sys
from typing import (
@@ -58,7 +58,7 @@ Each Blender release supports one Python version, and the package is only compat
## Source Code
* [Releases](https://download.blender.org/source/)
* Repository: [git.blender.org/blender.git](https://git.blender.org/gitweb/gitweb.cgi/blender.git)
* Repository: [projects.blender.org/blender/blender.git](https://projects.blender.org/blender/blender)
## Credits
@@ -208,7 +208,7 @@ def main() -> None:
return paths
# Ensure this wheel is marked platform specific.
class BinaryDistribution(setuptools.dist.Distribution): # type: ignore
class BinaryDistribution(setuptools.dist.Distribution):
def has_ext_modules(self) -> bool:
return True

View File

@@ -13,10 +13,10 @@ import sys
import make_utils
from make_utils import call
# Parse arguments
# Parse arguments.
def parse_arguments():
def parse_arguments() -> argparse.Namespace:
parser = argparse.ArgumentParser()
parser.add_argument("--ctest-command", default="ctest")
parser.add_argument("--cmake-command", default="cmake")

View File

@@ -42,6 +42,7 @@ def parse_arguments() -> argparse.Namespace:
parser.add_argument("--svn-branch", default=None)
parser.add_argument("--git-command", default="git")
parser.add_argument("--use-linux-libraries", action="store_true")
parser.add_argument("--architecture", type=str, choices=("x86_64", "amd64", "arm64",))
return parser.parse_args()
@@ -51,6 +52,17 @@ def get_blender_git_root() -> str:
# Setup for precompiled libraries and tests from svn.
def get_effective_architecture(args: argparse.Namespace):
if args.architecture:
return args.architecture
# Check platform.version to detect arm64 with x86_64 python binary.
if "ARM64" in platform.version():
return "arm64"
return platform.machine().lower()
def svn_update(args: argparse.Namespace, release_version: Optional[str]) -> None:
svn_non_interactive = [args.svn_command, '--non-interactive']
@@ -58,11 +70,11 @@ def svn_update(args: argparse.Namespace, release_version: Optional[str]) -> None
svn_url = make_utils.svn_libraries_base_url(release_version, args.svn_branch)
# Checkout precompiled libraries
architecture = get_effective_architecture(args)
if sys.platform == 'darwin':
# Check platform.version to detect arm64 with x86_64 python binary.
if platform.machine() == 'arm64' or ('ARM64' in platform.version()):
if architecture == 'arm64':
lib_platform = "darwin_arm64"
elif platform.machine() == 'x86_64':
elif architecture == 'x86_64':
lib_platform = "darwin"
else:
lib_platform = None
@@ -104,17 +116,30 @@ def svn_update(args: argparse.Namespace, release_version: Optional[str]) -> None
svn_url_tests = svn_url + lib_tests
call(svn_non_interactive + ["checkout", svn_url_tests, lib_tests_dirpath])
# Update precompiled libraries and tests
lib_assets = "assets"
lib_assets_dirpath = os.path.join(lib_dirpath, lib_assets)
if not os.path.exists(lib_assets_dirpath):
print_stage("Checking out Assets")
if make_utils.command_missing(args.svn_command):
sys.stderr.write("svn not found, can't checkout assets\n")
sys.exit(1)
svn_url_assets = svn_url + lib_assets
call(svn_non_interactive + ["checkout", svn_url_assets, lib_assets_dirpath])
# Update precompiled libraries, assets and tests
if not os.path.isdir(lib_dirpath):
print("Library path: %r, not found, skipping" % lib_dirpath)
else:
paths_local_and_remote = []
if os.path.exists(os.path.join(lib_dirpath, ".svn")):
print_stage("Updating Precompiled Libraries and Tests (one repository)")
print_stage("Updating Precompiled Libraries, Assets and Tests (one repository)")
paths_local_and_remote.append((lib_dirpath, svn_url))
else:
print_stage("Updating Precompiled Libraries and Tests (multiple repositories)")
print_stage("Updating Precompiled Libraries, Assets and Tests (multiple repositories)")
# Separate paths checked out.
for dirname in os.listdir(lib_dirpath):
if dirname.startswith("."):
@@ -157,7 +182,7 @@ def git_update_skip(args: argparse.Namespace, check_remote_exists: bool = True)
return "rebase or merge in progress, complete it first"
# Abort if uncommitted changes.
changes = check_output([args.git_command, 'status', '--porcelain', '--untracked-files=no'])
changes = check_output([args.git_command, 'status', '--porcelain', '--untracked-files=no', '--ignore-submodules'])
if len(changes) != 0:
return "you have unstaged changes"
@@ -189,8 +214,8 @@ def submodules_update(
sys.exit(1)
# Update submodules to appropriate given branch,
# falling back to master if none is given and/or found in a sub-repository.
branch_fallback = "master"
# falling back to main if none is given and/or found in a sub-repository.
branch_fallback = "main"
if not branch:
branch = branch_fallback
@@ -243,14 +268,15 @@ if __name__ == "__main__":
blender_skip_msg = ""
submodules_skip_msg = ""
# Test if we are building a specific release version.
branch = make_utils.git_branch(args.git_command)
if branch == 'HEAD':
sys.stderr.write('Blender git repository is in detached HEAD state, must be in a branch\n')
sys.exit(1)
tag = make_utils.git_tag(args.git_command)
release_version = make_utils.git_branch_release_version(branch, tag)
blender_version = make_utils. parse_blender_version()
if blender_version.cycle != 'alpha':
major = blender_version.version // 100
minor = blender_version.version % 100
branch = f"blender-v{major}.{minor}-release"
release_version = f"{major}.{minor}"
else:
branch = 'main'
release_version = None
if not args.no_libraries:
svn_update(args, release_version)

View File

@@ -3,9 +3,9 @@ if NOT exist "%BLENDER_DIR%\source\tools\.git" (
if not "%GIT%" == "" (
"%GIT%" submodule update --init --recursive --progress
if errorlevel 1 goto FAIL
"%GIT%" submodule foreach git checkout master
"%GIT%" submodule foreach git checkout main
if errorlevel 1 goto FAIL
"%GIT%" submodule foreach git pull --rebase origin master
"%GIT%" submodule foreach git pull --rebase origin main
if errorlevel 1 goto FAIL
goto EOF
) else (

View File

@@ -38,7 +38,7 @@ PROJECT_NAME = Blender
# could be handy for archiving the generated documentation or if some version
# control system is used.
PROJECT_NUMBER = V3.5
PROJECT_NUMBER = V3.6
# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer a

View File

@@ -37,7 +37,7 @@ def draw_callback_px(self, context):
# BLF drawing routine
font_id = font_info["font_id"]
blf.position(font_id, 2, 80, 0)
blf.size(font_id, 50, 72)
blf.size(font_id, 50)
blf.draw(font_id, "Hello World")

View File

@@ -13,16 +13,16 @@ Blender stores 4 main arrays to define mesh geometry.
- :class:`Mesh.polygons`: (reference a range of loops)
Each polygon reference a slice in the loop array, this way, polygons do not store vertices or corner data such as UV's directly,
Each polygon references a slice in the loop array, this way, polygons do not store vertices or corner data such as UVs directly,
only a reference to loops that the polygon uses.
:class:`Mesh.loops`, :class:`Mesh.uv_layers` :class:`Mesh.vertex_colors` are all aligned so the same polygon loop
indices can be used to find the UV's and vertex colors as with as the vertices.
indices can be used to find the UVs and vertex colors as with as the vertices.
To compare mesh API options see: :ref:`NGons and Tessellation Faces <info_gotcha_mesh_faces>`
This example script prints the vertices and UV's for each polygon, assumes the active object is a mesh with UVs.
This example script prints the vertices and UVs for each polygon, assumes the active object is a mesh with UVs.
"""
import bpy

View File

@@ -476,7 +476,7 @@ MODULE_GROUPING = {
# -------------------------------BLENDER----------------------------------------
# converting bytes to strings, due to T30154
# Converting bytes to strings, due to #30154.
BLENDER_REVISION = str(bpy.app.build_hash, 'utf_8')
BLENDER_REVISION_TIMESTAMP = bpy.app.build_commit_timestamp
@@ -487,7 +487,7 @@ BLENDER_VERSION_DOTS = "%d.%d" % (bpy.app.version[0], bpy.app.version[1])
if BLENDER_REVISION != "Unknown":
# SHA1 Git hash
BLENDER_VERSION_HASH = BLENDER_REVISION
BLENDER_VERSION_HASH_HTML_LINK = "<a href=https://developer.blender.org/rB%s>%s</a>" % (
BLENDER_VERSION_HASH_HTML_LINK = "<a href=https://projects.blender.org/blender/blender/commit/%s>%s</a>" % (
BLENDER_VERSION_HASH, BLENDER_VERSION_HASH,
)
BLENDER_VERSION_DATE = time.strftime("%d/%m/%Y", time.localtime(BLENDER_REVISION_TIMESTAMP))
@@ -647,7 +647,7 @@ def undocumented_message(module_name, type_name, identifier):
module_name, type_name, identifier,
)
return "Undocumented, consider `contributing <https://developer.blender.org/T51061>`__."
return "Undocumented, consider `contributing <https://developer.blender.org/>`__."
def range_str(val):
@@ -1816,9 +1816,9 @@ def pyrna2sphinx(basepath):
# operators
def write_ops():
API_BASEURL = "https://developer.blender.org/diffusion/B/browse/master/release/scripts"
API_BASEURL_ADDON = "https://developer.blender.org/diffusion/BA"
API_BASEURL_ADDON_CONTRIB = "https://developer.blender.org/diffusion/BAC"
API_BASEURL = "https://projects.blender.org/blender/blender/src/branch/main/release/scripts"
API_BASEURL_ADDON = "https://projects.blender.org/blender/blender-addons"
API_BASEURL_ADDON_CONTRIB = "https://projects.blender.org/blender/blender-addons-contrib"
op_modules = {}
op = None
@@ -2098,6 +2098,8 @@ def write_rst_types_index(basepath):
fw(title_string("Types (bpy.types)", "="))
fw(".. module:: bpy.types\n\n")
fw(".. toctree::\n")
# Only show top-level entries (avoids unreasonably large pages).
fw(" :maxdepth: 1\n")
fw(" :glob:\n\n")
fw(" bpy.types.*\n\n")
@@ -2124,6 +2126,8 @@ def write_rst_ops_index(basepath):
write_example_ref("", fw, "bpy.ops")
fw(".. toctree::\n")
fw(" :caption: Submodules\n")
# Only show top-level entries (avoids unreasonably large pages).
fw(" :maxdepth: 1\n")
fw(" :glob:\n\n")
fw(" bpy.ops.*\n\n")
file.close()
@@ -2196,7 +2200,7 @@ def write_rst_enum_items(basepath, key, key_no_prefix, enum_items):
Write a single page for a static enum in RST.
This helps avoiding very large lists being in-lined in many places which is an issue
especially with icons in ``bpy.types.UILayout``. See T87008.
especially with icons in ``bpy.types.UILayout``. See #87008.
"""
filepath = os.path.join(basepath, "%s.rst" % key_no_prefix)
with open(filepath, "w", encoding="utf-8") as fh:

View File

@@ -156,7 +156,7 @@ var Popover = function() {
},
getNamed : function(v) {
$.each(all_versions, function(ix, title) {
if (ix === "master" || ix === "latest") {
if (ix === "master" || ix === "main" || ix === "latest") {
var m = title.match(/\d\.\d[\w\d\.]*/)[0];
if (parseFloat(m) == v) {
v = ix;

View File

@@ -513,17 +513,19 @@ if(WITH_FFTW)
src/fx/Convolver.cpp
src/fx/ConvolverReader.cpp
src/fx/ConvolverSound.cpp
src/fx/Equalizer.cpp
src/fx/FFTConvolver.cpp
src/fx/HRTF.cpp
src/fx/ImpulseResponse.cpp
src/util/FFTPlan.cpp
)
set(FFTW_HDR
include/fx/BinauralSound.h
include/fx/BinauralSound.h
include/fx/BinauralReader.h
include/fx/Convolver.h
include/fx/ConvolverReader.h
include/fx/ConvolverSound.h
include/fx/Equalizer.h
include/fx/FFTConvolver.h
include/fx/HRTF.h
include/fx/HRTFLoader.h

View File

@@ -54,6 +54,7 @@
#ifdef WITH_CONVOLUTION
#include "fx/BinauralSound.h"
#include "fx/ConvolverSound.h"
#include "fx/Equalizer.h"
#endif
#include <cassert>
@@ -768,4 +769,14 @@ AUD_API AUD_Sound* AUD_Sound_Binaural(AUD_Sound* sound, AUD_HRTF* hrtfs, AUD_Sou
}
}
AUD_API AUD_Sound* AUD_Sound_equalize(AUD_Sound* sound, float *definition, int size, float maxFreqEq, int sizeConversion)
{
assert(sound);
std::shared_ptr<Buffer> buf = std::shared_ptr<Buffer>(new Buffer(sizeof(float)*size));
std::memcpy(buf->getBuffer(), definition, sizeof(float)*size);
AUD_Sound *equalizer=new AUD_Sound(new Equalizer(*sound, buf, size, maxFreqEq, sizeConversion));
return equalizer;
}
#endif

View File

@@ -397,6 +397,16 @@ extern AUD_API AUD_Sound* AUD_Sound_mutable(AUD_Sound* sound);
#ifdef WITH_CONVOLUTION
extern AUD_API AUD_Sound* AUD_Sound_Convolver(AUD_Sound* sound, AUD_ImpulseResponse* filter, AUD_ThreadPool* threadPool);
extern AUD_API AUD_Sound* AUD_Sound_Binaural(AUD_Sound* sound, AUD_HRTF* hrtfs, AUD_Source* source, AUD_ThreadPool* threadPool);
/**
* Creates an Equalizer for the sound
* \param sound The handle of the sound
* \param definition buffer of size*sizeof(float) with the array of equalization values
* \param maxFreqEq Maximum frequency refered by the array
* \param sizeConversion Size of the transformation. Must be 2^number (for example 1024, 2048,...)
* \return A handle to the Equalizer refered to that sound
*/
extern AUD_API AUD_Sound* AUD_Sound_equalize(AUD_Sound* sound, float *definition, int size, float maxFreqEq, int sizeConversion);
#endif
#ifdef __cplusplus

View File

@@ -53,6 +53,7 @@ extern AUD_API AUD_Handle* AUD_pauseAfter(AUD_Handle* handle, double seconds);
* \param buffer The buffer to write to. Must have a size of 3*4*length.
* \param length How many samples to read from the sound.
* \param samples_per_second How many samples to read per second of the sound.
* \param interrupt Must point to a short that equals 0. If it is set to a non-zero value, the method will be interrupted and return 0.
* \return How many samples really have been read. Always <= length.
*/
extern AUD_API int AUD_readSound(AUD_Sound* sound, float* buffer, int length, int samples_per_second, short* interrupt);

View File

@@ -5,12 +5,12 @@ import os
import codecs
import numpy
from distutils.core import setup, Extension
from setuptools import setup, Extension
if len(sys.argv) > 2 and sys.argv[1] == '--build-docs':
import subprocess
from distutils.core import Distribution
from distutils.command.build import build
from setuptools import Distribution
from setuptools.command.build import build
dist = Distribution()
cmd = build(dist)

106
extern/audaspace/include/fx/Equalizer.h vendored Normal file
View File

@@ -0,0 +1,106 @@
/*******************************************************************************
* Copyright 2022 Marcos Perez Gonzalez
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
#pragma once
/**
* @file Equalizer.h
* @ingroup fx
* The Equalizer class.
*/
#include <memory>
#include <vector>
#include "ISound.h"
#include "ImpulseResponse.h"
AUD_NAMESPACE_BEGIN
class Buffer;
class ImpulseResponse;
/**
* This class represents a sound that can be modified depending on a given impulse response.
*/
class AUD_API Equalizer : public ISound
{
private:
/**
* A pointer to the imput sound.
*/
std::shared_ptr<ISound> m_sound;
/**
* Local definition of Equalizer
*/
std::shared_ptr<Buffer> m_bufEQ;
/**
* A pointer to the impulse response.
*/
std::shared_ptr<ImpulseResponse> m_impulseResponse;
/**
* delete copy constructor and operator=
*/
Equalizer(const Equalizer&) = delete;
Equalizer& operator=(const Equalizer&) = delete;
/**
* Create ImpulseResponse from the definition in the Buffer,
* using at the end a minimum phase change
*/
std::shared_ptr<ImpulseResponse> createImpulseResponse();
/**
* Create an Impulse Response with minimum phase distortion using Homomorphic
* The input is an Impulse Response
*/
std::shared_ptr<Buffer> minimumPhaseFilterHomomorphic(std::shared_ptr<Buffer> original, int lOriginal, int lWork);
/**
* Create an Impulse Response with minimum phase distortion using Hilbert
* The input is an Impulse Response
*/
std::shared_ptr<Buffer> minimumPhaseFilterHilbert(std::shared_ptr<Buffer> original, int lOriginal, int lWork);
public:
/**
* Creates a new Equalizer.
* \param sound The sound that will be equalized
*/
Equalizer(std::shared_ptr<ISound> sound, std::shared_ptr<Buffer> bufEQ, int externalSizeEq, float maxFreqEq, int sizeConversion);
virtual ~Equalizer();
virtual std::shared_ptr<IReader> createReader();
/*
* Length of the external equalizer definition. It must be the number of "float" positions of the Buffer
*/
int external_size_eq;
/*
* Length of the internal equalizer definition
*/
int filter_length;
/*
* Maximum frequency used in the equalizer definition
*/
float maxFreqEq;
};
AUD_NAMESPACE_END

View File

@@ -27,7 +27,7 @@
AUD_NAMESPACE_BEGIN
BinauralReader::BinauralReader(std::shared_ptr<IReader> reader, std::shared_ptr<HRTF> hrtfs, std::shared_ptr<Source> source, std::shared_ptr<ThreadPool> threadPool, std::shared_ptr<FFTPlan> plan) :
m_reader(reader), m_hrtfs(hrtfs), m_source(source), m_N(plan->getSize()), m_threadPool(threadPool), m_position(0), m_eosReader(false), m_eosTail(false), m_transition(false), m_transPos(CROSSFADE_SAMPLES*NUM_OUTCHANNELS)
m_position(0), m_reader(reader), m_hrtfs(hrtfs), m_source(source), m_N(plan->getSize()), m_transition(false), m_transPos(CROSSFADE_SAMPLES*NUM_OUTCHANNELS), m_eosReader(false), m_eosTail(false), m_threadPool(threadPool)
{
if(m_hrtfs->isEmpty())
AUD_THROW(StateException, "The provided HRTF object is empty");

View File

@@ -23,7 +23,7 @@
AUD_NAMESPACE_BEGIN
Convolver::Convolver(std::shared_ptr<std::vector<std::shared_ptr<std::vector<std::complex<sample_t>>>>> ir, int irLength, std::shared_ptr<ThreadPool> threadPool, std::shared_ptr<FFTPlan> plan) :
m_N(plan->getSize()), m_M(plan->getSize()/2), m_L(plan->getSize()/2), m_irBuffers(ir), m_irLength(irLength), m_threadPool(threadPool), m_numThreads(std::min(threadPool->getNumOfThreads(), static_cast<unsigned int>(m_irBuffers->size() - 1))), m_tailCounter(0), m_eos(false)
m_N(plan->getSize()), m_M(plan->getSize()/2), m_L(plan->getSize()/2), m_irBuffers(ir), m_numThreads(std::min(threadPool->getNumOfThreads(), static_cast<unsigned int>(m_irBuffers->size() - 1))), m_threadPool(threadPool), m_irLength(irLength), m_tailCounter(0), m_eos(false)
{
m_resetFlag = false;

View File

@@ -24,7 +24,7 @@
AUD_NAMESPACE_BEGIN
ConvolverReader::ConvolverReader(std::shared_ptr<IReader> reader, std::shared_ptr<ImpulseResponse> ir, std::shared_ptr<ThreadPool> threadPool, std::shared_ptr<FFTPlan> plan) :
m_reader(reader), m_ir(ir), m_N(plan->getSize()), m_eosReader(false), m_eosTail(false), m_inChannels(reader->getSpecs().channels), m_irChannels(ir->getSpecs().channels), m_threadPool(threadPool), m_position(0)
m_position(0), m_reader(reader), m_ir(ir), m_N(plan->getSize()), m_eosReader(false), m_eosTail(false), m_inChannels(reader->getSpecs().channels), m_irChannels(ir->getSpecs().channels), m_threadPool(threadPool)
{
m_nChannelThreads = std::min((int)threadPool->getNumOfThreads(), m_inChannels);
m_futures.resize(m_nChannelThreads);

367
extern/audaspace/src/fx/Equalizer.cpp vendored Normal file
View File

@@ -0,0 +1,367 @@
/*******************************************************************************
* Copyright 2022 Marcos Perez Gonzalez
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
#include "fx/Equalizer.h"
#include <chrono>
#include <cstring>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <memory>
#include <string>
#include "Exception.h"
#include "fx/ConvolverReader.h"
#include "fx/ImpulseResponse.h"
#include "util/Buffer.h"
#include "util/FFTPlan.h"
#include "util/ThreadPool.h"
AUD_NAMESPACE_BEGIN
Equalizer::Equalizer(std::shared_ptr<ISound> sound, std::shared_ptr<Buffer> bufEQ, int externalSizeEq, float maxFreqEq, int sizeConversion) : m_sound(sound), m_bufEQ(bufEQ)
{
this->maxFreqEq = maxFreqEq;
this->external_size_eq = externalSizeEq;
filter_length = sizeConversion;
}
Equalizer::~Equalizer()
{
}
std::shared_ptr<IReader> Equalizer::createReader()
{
std::shared_ptr<FFTPlan> fp = std::shared_ptr<FFTPlan>(new FFTPlan(filter_length));
// 2 threads to start with
return std::shared_ptr<ConvolverReader>(new ConvolverReader(m_sound->createReader(), createImpulseResponse(), std::shared_ptr<ThreadPool>(new ThreadPool(2)), fp));
}
float calculateValueArray(float* data, float minX, float maxX, int length, float posX)
{
if(posX < minX)
return 1.0;
if(posX > maxX)
return data[length - 1];
float interval = (maxX - minX) / (float) length;
int idx = (int) ((posX - minX) / interval);
return data[idx];
}
void complex_prod(float a, float b, float c, float d, float* r, float* imag)
{
float prod1 = a * c;
float prod2 = b * d;
float prod3 = (a + b) * (c + d);
// Real Part
*r = prod1 - prod2;
// Imaginary Part
*imag = prod3 - (prod1 + prod2);
}
/**
* The creation of the ImpuseResponse which will be convoluted with the sound
*
* The implementation is based on scikit-signal
*/
std::shared_ptr<ImpulseResponse> Equalizer::createImpulseResponse()
{
std::shared_ptr<FFTPlan> fp = std::shared_ptr<FFTPlan>(new FFTPlan(filter_length));
fftwf_complex* buffer = (fftwf_complex*) fp->getBuffer();
std::memset(buffer, 0, filter_length * sizeof(fftwf_complex));
std::shared_ptr<IReader> soundReader = m_sound.get()->createReader();
Specs specsSound = soundReader.get()->getSpecs();
int sampleRate = specsSound.rate;
for(unsigned i = 0; i < filter_length / 2; i++)
{
double freq = (((float) i) / (float) filter_length) * (float) sampleRate;
double dbGain = calculateValueArray(m_bufEQ->getBuffer(), 0.0, maxFreqEq, external_size_eq, freq);
// gain = 10^(decibels / 20.0)
// 0 db = 1
// 20 db = 10
// 40 db = 100
float gain = (float) pow(10.0, dbGain / 20.0);
if(i == filter_length / 2 - 1)
{
gain = 0;
}
// IMPORTANT!!!! It is needed for the minimum phase step.
// Without this, the amplitude would be square rooted
//
gain *= gain;
// Calculation of exponential with std.. or "by hand"
/*
std::complex<float> preShift= std::complex<float>(0.0, -(filter_length - 1)
/ 2. * M_PI * freq / ( sampleRate/2)); std::complex<float> shift =
std::exp(preShift);
std::complex<float> cGain = gain * shift;
*/
float imaginary_shift = -(filter_length - 1) / 2. * M_PI * freq / (sampleRate / 2);
float cGain_real = gain * cos(imaginary_shift);
float cGain_imag = gain * sin(imaginary_shift);
int i2 = filter_length - i - 1;
buffer[i][0] = cGain_real; // Real
buffer[i][1] = cGain_imag; // Imag
if(i > 0 && i2 < filter_length)
{
buffer[i2][0] = cGain_real; // Real
buffer[i2][1] = cGain_imag; // Imag
}
}
// In place. From Complex to sample_t
fp->IFFT(buffer);
// Window Hamming
sample_t* pt_sample_t = (sample_t*) buffer;
float half_filter = ((float) filter_length) / 2.0;
for(int i = 0; i < filter_length; i++)
{
// Centered in filter_length/2
float window = 0.54 - 0.46 * cos((2 * M_PI * (float) i) / (float) (filter_length - 1));
pt_sample_t[i] *= window;
}
std::shared_ptr<Buffer> b2 = std::shared_ptr<Buffer>(new Buffer(filter_length * sizeof(sample_t)));
sample_t* buffer_real = (sample_t*) buffer;
sample_t* buffer2 = b2->getBuffer();
float normaliziter = (float) filter_length;
for(int i = 0; i < filter_length; i++)
{
buffer2[i] = (buffer_real[i] / normaliziter);
}
fp->freeBuffer(buffer);
//
// Here b2 is the buffer with a "valid" FIR (remember the squared amplitude
//
std::shared_ptr<Buffer> ir_minimum = minimumPhaseFilterHomomorphic(b2, filter_length, -1);
Specs specsIR;
specsIR.rate = sampleRate;
specsIR.channels = CHANNELS_MONO;
return std::shared_ptr<ImpulseResponse>(new ImpulseResponse(std::shared_ptr<StreamBuffer>(new StreamBuffer(ir_minimum, specsIR)), fp));
}
std::shared_ptr<Buffer> Equalizer::minimumPhaseFilterHomomorphic(std::shared_ptr<Buffer> original, int lOriginal, int lWork)
{
void* b_orig = original->getBuffer();
if(lWork < lOriginal || lWork < 0)
{
lWork = (int) pow(2, ceil(log2((float) (2 * (lOriginal - 1) / 0.01))));
}
std::shared_ptr<FFTPlan> fp = std::shared_ptr<FFTPlan>(new FFTPlan(lWork, 0.1));
fftwf_complex* buffer = (fftwf_complex*) fp->getBuffer();
sample_t* b_work = (sample_t*) buffer;
// Padding with 0
std::memset(b_work, 0, lWork * sizeof(sample_t));
std::memcpy(b_work, b_orig, lOriginal * sizeof(sample_t));
fp->FFT(b_work);
for(int i = 0; i < lWork / 2; i++)
{
buffer[i][0] = fabs(sqrt(buffer[i][0] * buffer[i][0] + buffer[i][1] * buffer[i][1]));
buffer[i][1] = 0.0;
int conjugate = lWork - i - 1;
buffer[conjugate][0] = buffer[i][0];
buffer[conjugate][1] = 0.0;
}
double threshold = pow(10.0, -7);
float logThreshold = (float) log(threshold);
// take 0.25*log(|H|**2) = 0.5*log(|H|)
for(int i = 0; i < lWork; i++)
{
if(buffer[i][0] < threshold)
{
buffer[i][0] = 0.5 * logThreshold;
}
else
{
buffer[i][0] = 0.5 * log(buffer[i][0]);
}
}
fp->IFFT(buffer);
// homomorphic filter
int stop = (lOriginal + 1) / 2;
b_work[0] = b_work[0] / (float) lWork;
for(int i = 1; i < stop; i++)
{
b_work[i] = b_work[i] / (float) lWork * 2.0;
}
for(int i = stop; i < lWork; i++)
{
b_work[i] = 0;
}
fp->FFT(buffer);
// EXP
// e^x = e^ (a+bi)= e^a * e^bi = e^a * (cos b + i sin b)
for(int i = 0; i < lWork / 2; i++)
{
float new_real;
float new_imag;
new_real = exp(buffer[i][0]) * cos(buffer[i][1]);
new_imag = exp(buffer[i][0]) * sin(buffer[i][1]);
buffer[i][0] = new_real;
buffer[i][1] = new_imag;
int conjugate = lWork - i - 1;
buffer[conjugate][0] = new_real;
buffer[conjugate][1] = new_imag;
}
// IFFT
fp->IFFT(buffer);
// Create new clean Buffer with only the result and normalization
int lOut = (lOriginal / 2) + lOriginal % 2;
std::shared_ptr<Buffer> bOut = std::shared_ptr<Buffer>(new Buffer(sizeof(float) * lOut));
float* bbOut = (float*) bOut->getBuffer();
// Copy and normalize
for(int i = 0; i < lOut; i++)
{
bbOut[i] = b_work[i] / (float) lWork;
}
fp->freeBuffer(buffer);
return bOut;
}
std::shared_ptr<Buffer> Equalizer::minimumPhaseFilterHilbert(std::shared_ptr<Buffer> original, int lOriginal, int lWork)
{
void* b_orig = original->getBuffer();
if(lWork < lOriginal || lWork < 0)
{
lWork = (int) pow(2, ceil(log2((float) (2 * (lOriginal - 1) / 0.01))));
}
std::shared_ptr<FFTPlan> fp = std::shared_ptr<FFTPlan>(new FFTPlan(lWork, 0.1));
fftwf_complex* buffer = (fftwf_complex*) fp->getBuffer();
sample_t* b_work = (sample_t*) buffer;
// Padding with 0
std::memset(b_work, 0, lWork * sizeof(sample_t));
std::memcpy(b_work, b_orig, lOriginal * sizeof(sample_t));
fp->FFT(b_work);
float mymax, mymin;
float n_half = (float) (lOriginal >> 1);
for(int i = 0; i < lWork; i++)
{
float w = ((float) i) * 2.0 * M_PI / (float) lWork * n_half;
float f1 = cos(w);
float f2 = sin(w);
float f3, f4;
complex_prod(buffer[i][0], buffer[i][1], f1, f2, &f3, &f4);
buffer[i][0] = f3;
buffer[i][1] = 0.0;
if(i == 0)
{
mymax = f3;
mymin = f3;
}
else
{
if(f3 < mymin)
mymin = f3;
if(f3 > mymax)
mymax = f3;
}
}
float dp = mymax - 1;
float ds = 0 - mymin;
float S = 4.0 / pow(2, (sqrt(1 + dp + ds) + sqrt(1 - dp + ds)));
for(int i = 0; i < lWork; i++)
{
buffer[i][0] = sqrt((buffer[i][0] + ds) * S) + 1.0E-10;
}
fftwf_complex* buffer_tmp = (fftwf_complex*) std::malloc(lWork * sizeof(fftwf_complex));
std::memcpy(buffer_tmp, buffer, lWork * sizeof(fftwf_complex));
//
// Hilbert transform
//
int midpt = lWork >> 1;
for(int i = 0; i < lWork; i++)
buffer[i][0] = log(buffer[i][0]);
fp->IFFT(buffer);
b_work[0] = 0.0;
for(int i = 1; i < midpt; i++)
{
b_work[i] /= (float) lWork;
}
b_work[midpt] = 0.0;
for(int i = midpt + 1; i < lWork; i++)
{
b_work[i] /= (-1.0 * lWork);
}
fp->FFT(b_work);
// Exp
for(int i = 0; i < lWork; i++)
{
float base = exp(buffer[i][0]);
buffer[i][0] = base * cos(buffer[i][1]);
buffer[i][1] = base * sin(buffer[i][1]);
complex_prod(buffer_tmp[i][0], buffer_tmp[i][1], buffer[i][0], buffer[i][1], &(buffer[i][0]), &(buffer[i][1]));
}
std::free(buffer_tmp);
fp->IFFT(buffer);
//
// Copy and normalization
//
int n_out = n_half + lOriginal % 2;
std::shared_ptr<Buffer> b_minimum = std::shared_ptr<Buffer>(new Buffer(n_out * sizeof(sample_t)));
std::memcpy(b_minimum->getBuffer(), buffer, n_out * sizeof(sample_t));
sample_t* b_final = (sample_t*) b_minimum->getBuffer();
for(int i = 0; i < n_out; i++)
{
b_final[i] /= (float) lWork;
}
return b_minimum;
}
AUD_NAMESPACE_END

View File

@@ -22,7 +22,7 @@
AUD_NAMESPACE_BEGIN
FFTConvolver::FFTConvolver(std::shared_ptr<std::vector<std::complex<sample_t>>> ir, std::shared_ptr<FFTPlan> plan) :
m_plan(plan), m_N(plan->getSize()), m_M(plan->getSize()/2), m_L(plan->getSize()/2), m_tailPos(0), m_irBuffer(ir)
m_plan(plan), m_N(plan->getSize()), m_M(plan->getSize()/2), m_L(plan->getSize()/2), m_irBuffer(ir), m_tailPos(0)
{
m_tail = (float*)calloc(m_M - 1, sizeof(float));
m_realBufLen = ((m_N / 2) + 1) * 2;

View File

@@ -75,7 +75,7 @@ void HRTFLoader::loadHRTFs(std::shared_ptr<HRTF> hrtfs, char ear, const std::str
if(ear == 'L')
azim = 360 - azim;
}
catch(std::exception& e)
catch(...)
{
AUD_THROW(FileException, "The HRTF name doesn't follow the naming scheme: " + filename);
}
@@ -86,4 +86,4 @@ void HRTFLoader::loadHRTFs(std::shared_ptr<HRTF> hrtfs, char ear, const std::str
return;
}
AUD_NAMESPACE_END
AUD_NAMESPACE_END

View File

@@ -78,7 +78,7 @@ void HRTFLoader::loadHRTFs(std::shared_ptr<HRTF> hrtfs, char ear, const std::str
if(ear == 'L')
azim = 360 - azim;
}
catch(std::exception& e)
catch(...)
{
AUD_THROW(FileException, "The HRTF name doesn't follow the naming scheme: " + filename);
}
@@ -90,4 +90,4 @@ void HRTFLoader::loadHRTFs(std::shared_ptr<HRTF> hrtfs, char ear, const std::str
return;
}
AUD_NAMESPACE_END
AUD_NAMESPACE_END

View File

@@ -1,5 +1,5 @@
Project: Blender
URL: https://git.blender.org/blender.git
URL: https://projects.blender.org/blender/blender.git
License: Apache 2.0
Upstream version: N/A
Local modifications: None

View File

@@ -13,10 +13,12 @@ endif()
# Exporting functions from the blender binary gives linker warnings on Apple arm64 systems.
# Silence them here.
if(APPLE AND ("${CMAKE_OSX_ARCHITECTURES}" STREQUAL "arm64"))
if(CMAKE_COMPILER_IS_GNUCXX OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
string(APPEND CMAKE_C_FLAGS " -fvisibility=hidden")
string(APPEND CMAKE_CXX_FLAGS " -fvisibility=hidden")
if(APPLE)
if("${CMAKE_OSX_ARCHITECTURES}" STREQUAL "arm64")
if(CMAKE_COMPILER_IS_GNUCXX OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
string(APPEND CMAKE_C_FLAGS " -fvisibility=hidden")
string(APPEND CMAKE_CXX_FLAGS " -fvisibility=hidden")
endif()
endif()
endif()
@@ -261,9 +263,11 @@ set(LIB
blender_add_lib(extern_mantaflow "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
# The VDB libs above are only added to as INTERFACE libs by blender_add_lib,
# meaning extern_mantaflow itself actually does not have a dependency on the
# openvdb libraries, and CMAKE is free to link the vdb libs before
# extern_mantaflow causing linker errors on linux. By explicitly declaring
# a dependency here, cmake will do the right thing.
target_link_libraries(extern_mantaflow PRIVATE ${OPENVDB_LIBRARIES})
if(WITH_OPENVDB)
# The VDB libs above are only added to as INTERFACE libs by blender_add_lib,
# meaning extern_mantaflow itself actually does not have a dependency on the
# openvdb libraries, and CMAKE is free to link the vdb libs before
# extern_mantaflow causing linker errors on linux. By explicitly declaring
# a dependency here, cmake will do the right thing.
target_link_libraries(extern_mantaflow PRIVATE ${OPENVDB_LIBRARIES})
endif()

View File

@@ -7,6 +7,7 @@ set(INC
set(INC_SYS
${VULKAN_INCLUDE_DIRS}
${MOLTENVK_INCLUDE_DIRS}
)
set(SRC

View File

@@ -0,0 +1,15 @@
diff --git a/extern/vulkan_memory_allocator/vk_mem_alloc.h b/extern/vulkan_memory_allocator/vk_mem_alloc.h
index 60f572038c0..63a9994ba46 100644
--- a/extern/vulkan_memory_allocator/vk_mem_alloc.h
+++ b/extern/vulkan_memory_allocator/vk_mem_alloc.h
@@ -13371,8 +13371,8 @@ bool VmaDefragmentationContext_T::IncrementCounters(VkDeviceSize bytes)
// Early return when max found
if (++m_PassStats.allocationsMoved >= m_MaxPassAllocations || m_PassStats.bytesMoved >= m_MaxPassBytes)
{
- VMA_ASSERT(m_PassStats.allocationsMoved == m_MaxPassAllocations ||
- m_PassStats.bytesMoved == m_MaxPassBytes && "Exceeded maximal pass threshold!");
+ VMA_ASSERT((m_PassStats.allocationsMoved == m_MaxPassAllocations ||
+ m_PassStats.bytesMoved == m_MaxPassBytes) && "Exceeded maximal pass threshold!");
return true;
}
return false;

File diff suppressed because it is too large Load Diff

View File

@@ -85,15 +85,11 @@ elseif(WIN32 AND MSVC AND NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang")
# there is no /arch:SSE3, but intrinsics are available anyway
if(CMAKE_CL_64)
set(CYCLES_SSE2_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS}")
set(CYCLES_SSE3_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS}")
set(CYCLES_SSE41_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS}")
set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_AVX_ARCH_FLAGS} ${CYCLES_KERNEL_FLAGS}")
set(CYCLES_AVX2_KERNEL_FLAGS "${CYCLES_AVX2_ARCH_FLAGS} ${CYCLES_KERNEL_FLAGS}")
else()
set(CYCLES_SSE2_KERNEL_FLAGS "/arch:SSE2 ${CYCLES_KERNEL_FLAGS}")
set(CYCLES_SSE3_KERNEL_FLAGS "/arch:SSE2 ${CYCLES_KERNEL_FLAGS}")
set(CYCLES_SSE41_KERNEL_FLAGS "/arch:SSE2 ${CYCLES_KERNEL_FLAGS}")
set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_AVX_ARCH_FLAGS} ${CYCLES_KERNEL_FLAGS}")
set(CYCLES_AVX2_KERNEL_FLAGS "${CYCLES_AVX2_ARCH_FLAGS} ${CYCLES_KERNEL_FLAGS}")
endif()
@@ -126,11 +122,7 @@ elseif(CMAKE_COMPILER_IS_GNUCC OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
endif()
set(CYCLES_SSE2_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS} -msse -msse2")
set(CYCLES_SSE3_KERNEL_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS} -msse3 -mssse3")
set(CYCLES_SSE41_KERNEL_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS} -msse4.1")
if(CXX_HAS_AVX)
set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS} -mavx")
endif()
set(CYCLES_SSE41_KERNEL_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS} -msse3 -mssse3 -msse4.1")
if(CXX_HAS_AVX2)
set(CYCLES_AVX2_KERNEL_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS} -mavx -mavx2 -mfma -mlzcnt -mbmi -mbmi2 -mf16c")
endif()
@@ -144,13 +136,8 @@ elseif(WIN32 AND CMAKE_CXX_COMPILER_ID MATCHES "Intel")
if(CXX_HAS_SSE)
set(CYCLES_SSE2_KERNEL_FLAGS "/QxSSE2")
set(CYCLES_SSE3_KERNEL_FLAGS "/QxSSSE3")
set(CYCLES_SSE41_KERNEL_FLAGS "/QxSSE4.1")
if(CXX_HAS_AVX)
set(CYCLES_AVX_KERNEL_FLAGS "/arch:AVX")
endif()
if(CXX_HAS_AVX2)
set(CYCLES_AVX2_KERNEL_FLAGS "/QxCORE-AVX2")
endif()
@@ -174,13 +161,8 @@ elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
set(CYCLES_SSE2_KERNEL_FLAGS "-xsse2")
endif()
set(CYCLES_SSE3_KERNEL_FLAGS "-xssse3")
set(CYCLES_SSE41_KERNEL_FLAGS "-xsse4.1")
if(CXX_HAS_AVX)
set(CYCLES_AVX_KERNEL_FLAGS "-xavx")
endif()
if(CXX_HAS_AVX2)
set(CYCLES_AVX2_KERNEL_FLAGS "-xcore-avx2")
endif()
@@ -190,15 +172,10 @@ endif()
if(CXX_HAS_SSE)
add_definitions(
-DWITH_KERNEL_SSE2
-DWITH_KERNEL_SSE3
-DWITH_KERNEL_SSE41
)
endif()
if(CXX_HAS_AVX)
add_definitions(-DWITH_KERNEL_AVX)
endif()
if(CXX_HAS_AVX2)
add_definitions(-DWITH_KERNEL_AVX2)
endif()

View File

@@ -12,6 +12,7 @@ from bpy.props import (
PointerProperty,
StringProperty,
)
from bpy.app.translations import pgettext_iface as iface_
from math import pi
@@ -951,9 +952,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
return _cycles.debug_flags_update(scene)
debug_use_cpu_avx2: BoolProperty(name="AVX2", default=True)
debug_use_cpu_avx: BoolProperty(name="AVX", default=True)
debug_use_cpu_sse41: BoolProperty(name="SSE41", default=True)
debug_use_cpu_sse3: BoolProperty(name="SSE3", default=True)
debug_use_cpu_sse2: BoolProperty(name="SSE2", default=True)
debug_bvh_layout: EnumProperty(
name="BVH Layout",
@@ -1666,30 +1665,51 @@ class CyclesPreferences(bpy.types.AddonPreferences):
col.label(text="No compatible GPUs found for Cycles", icon='INFO')
if device_type == 'CUDA':
col.label(text="Requires NVIDIA GPU with compute capability 3.0", icon='BLANK1')
compute_capability = "3.0"
col.label(text=iface_("Requires NVIDIA GPU with compute capability %s") % compute_capability,
icon='BLANK1', translate=False)
elif device_type == 'OPTIX':
col.label(text="Requires NVIDIA GPU with compute capability 5.0", icon='BLANK1')
col.label(text="and NVIDIA driver version 470 or newer", icon='BLANK1')
compute_capability = "5.0"
driver_version = "470"
col.label(text=iface_("Requires NVIDIA GPU with compute capability %s") % compute_capability,
icon='BLANK1', translate=False)
col.label(text="and NVIDIA driver version %s or newer" % driver_version,
icon='BLANK1', translate=False)
elif device_type == 'HIP':
import sys
if sys.platform[:3] == "win":
col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
col.label(text="and AMD Radeon Pro 21.Q4 driver or newer", icon='BLANK1')
elif sys.platform.startswith("linux"):
col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
col.label(text="and AMD driver version 22.10 or newer", icon='BLANK1')
if True:
col.label(text="HIP temporarily disabled due to compiler bugs", icon='BLANK1')
else:
import sys
if sys.platform[:3] == "win":
driver_version = "21.Q4"
col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
col.label(text=iface_("and AMD Radeon Pro %s driver or newer") % driver_version,
icon='BLANK1', translate=False)
elif sys.platform.startswith("linux"):
driver_version = "22.10"
col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
col.label(text=iface_("and AMD driver version %s or newer") % driver_version, icon='BLANK1',
translate=False)
elif device_type == 'ONEAPI':
import sys
if sys.platform.startswith("win"):
driver_version = "101.4032"
col.label(text="Requires Intel GPU with Xe-HPG architecture", icon='BLANK1')
col.label(text="and Windows driver version 101.3430 or newer", icon='BLANK1')
col.label(text=iface_("and Windows driver version %s or newer") % driver_version,
icon='BLANK1', translate=False)
elif sys.platform.startswith("linux"):
driver_version = "1.3.24931"
col.label(text="Requires Intel GPU with Xe-HPG architecture and", icon='BLANK1')
col.label(text=" - intel-level-zero-gpu version 1.3.23904 or newer", icon='BLANK1')
col.label(text=iface_(" - intel-level-zero-gpu version %s or newer") % driver_version,
icon='BLANK1', translate=False)
col.label(text=" - oneAPI Level-Zero Loader", icon='BLANK1')
elif device_type == 'METAL':
col.label(text="Requires Apple Silicon with macOS 12.2 or newer", icon='BLANK1')
col.label(text="or AMD with macOS 12.3 or newer", icon='BLANK1')
silicon_mac_version = "12.2"
amd_mac_version = "12.3"
col.label(text=iface_("Requires Apple Silicon with macOS %s or newer") % silicon_mac_version,
icon='BLANK1', translate=False)
col.label(text=iface_("or AMD with macOS %s or newer") % amd_mac_version, icon='BLANK1',
translate=False)
return
for device in devices:
@@ -1725,12 +1745,21 @@ class CyclesPreferences(bpy.types.AddonPreferences):
if compute_device_type == 'METAL':
import platform
# MetalRT only works on Apple Silicon at present, pending argument encoding fixes on AMD
# Kernel specialization is only viable on Apple Silicon at present due to relative compilation speed
if platform.machine() == 'arm64':
import re
is_navi_2 = False
for device in devices:
if re.search(r"((RX)|(Pro)|(PRO))\s+W?6\d00X", device.name):
is_navi_2 = True
break
# MetalRT only works on Apple Silicon and Navi2.
is_arm64 = platform.machine() == 'arm64'
if is_arm64 or is_navi_2:
col = layout.column()
col.use_property_split = True
col.prop(self, "kernel_optimization_level")
# Kernel specialization is only supported on Apple Silicon
if is_arm64:
col.prop(self, "kernel_optimization_level")
col.prop(self, "use_metalrt")
def draw(self, context):

View File

@@ -20,7 +20,7 @@ class CyclesPresetPanel(PresetPanel, Panel):
@staticmethod
def post_cb(context):
# Modify an arbitrary built-in scene property to force a depsgraph
# update, because add-on properties don't. (see T62325)
# update, because add-on properties don't. (see #62325)
render = context.scene.render
render.filter_size = render.filter_size
@@ -2112,9 +2112,7 @@ class CYCLES_RENDER_PT_debug(CyclesDebugButtonsPanel, Panel):
row = col.row(align=True)
row.prop(cscene, "debug_use_cpu_sse2", toggle=True)
row.prop(cscene, "debug_use_cpu_sse3", toggle=True)
row.prop(cscene, "debug_use_cpu_sse41", toggle=True)
row.prop(cscene, "debug_use_cpu_avx", toggle=True)
row.prop(cscene, "debug_use_cpu_avx2", toggle=True)
col.prop(cscene, "debug_bvh_layout", text="BVH")

View File

@@ -105,11 +105,12 @@ GPUShader *BlenderFallbackDisplayShader::bind(int width, int height)
/* Bind shader now to enable uniform assignment. */
GPU_shader_bind(shader_program_);
GPU_shader_uniform_int(shader_program_, image_texture_location_, 0);
int slot = 0;
GPU_shader_uniform_int_ex(shader_program_, image_texture_location_, 1, 1, &slot);
float size[2];
size[0] = width;
size[1] = height;
GPU_shader_uniform_vector(shader_program_, fullscreen_location_, 2, 1, size);
GPU_shader_uniform_float_ex(shader_program_, fullscreen_location_, 2, 1, size);
return shader_program_;
}

View File

@@ -20,7 +20,7 @@ BlenderImageLoader::BlenderImageLoader(BL::Image b_image,
: b_image(b_image),
frame(frame),
tile_number(tile_number),
/* Don't free cache for preview render to avoid race condition from T93560, to be fixed
/* Don't free cache for preview render to avoid race condition from #93560, to be fixed
* properly later as we are close to release. */
free_cache(!is_preview_render && !b_image.has_data())
{
@@ -72,7 +72,7 @@ bool BlenderImageLoader::load_metadata(const ImageDeviceFeatures &, ImageMetaDat
metadata.colorspace = u_colorspace_raw;
}
else {
/* In some cases (e.g. T94135), the colorspace setting in Blender gets updated as part of the
/* In some cases (e.g. #94135), the colorspace setting in Blender gets updated as part of the
* metadata queries in this function, so update the colorspace setting here. */
PointerRNA colorspace_ptr = b_image.colorspace_settings().ptr;
metadata.colorspace = get_enum_identifier(colorspace_ptr, "name");

View File

@@ -24,7 +24,7 @@ void BlenderSync::sync_light(BL::Object &b_parent,
Light *light = light_map.find(key);
/* Check if the transform was modified, in case a linked collection is moved we do not get a
* specific depsgraph update (T88515). This also mimics the behavior for Objects. */
* specific depsgraph update (#88515). This also mimics the behavior for Objects. */
const bool tfm_updated = (light && light->get_tfm() != tfm);
/* Update if either object or light data changed. */
@@ -48,6 +48,8 @@ void BlenderSync::sync_light(BL::Object &b_parent,
case BL::Light::type_SPOT: {
BL::SpotLight b_spot_light(b_light);
light->set_size(b_spot_light.shadow_soft_size());
light->set_axisu(transform_get_column(&tfm, 0));
light->set_axisv(transform_get_column(&tfm, 1));
light->set_light_type(LIGHT_SPOT);
light->set_spot_angle(b_spot_light.spot_size());
light->set_spot_smooth(b_spot_light.spot_blend());

View File

@@ -63,9 +63,7 @@ static void debug_flags_sync_from_scene(BL::Scene b_scene)
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
/* Synchronize CPU flags. */
flags.cpu.avx2 = get_boolean(cscene, "debug_use_cpu_avx2");
flags.cpu.avx = get_boolean(cscene, "debug_use_cpu_avx");
flags.cpu.sse41 = get_boolean(cscene, "debug_use_cpu_sse41");
flags.cpu.sse3 = get_boolean(cscene, "debug_use_cpu_sse3");
flags.cpu.sse2 = get_boolean(cscene, "debug_use_cpu_sse2");
flags.cpu.bvh_layout = (BVHLayout)get_enum(cscene, "debug_bvh_layout");
/* Synchronize CUDA flags. */
@@ -96,7 +94,7 @@ void python_thread_state_restore(void **python_thread_state)
*python_thread_state = NULL;
}
static const char *PyC_UnicodeAsByte(PyObject *py_str, PyObject **coerce)
static const char *PyC_UnicodeAsBytes(PyObject *py_str, PyObject **coerce)
{
const char *result = PyUnicode_AsUTF8(py_str);
if (result) {
@@ -133,8 +131,8 @@ static PyObject *init_func(PyObject * /*self*/, PyObject *args)
}
PyObject *path_coerce = nullptr, *user_path_coerce = nullptr;
path_init(PyC_UnicodeAsByte(path, &path_coerce),
PyC_UnicodeAsByte(user_path, &user_path_coerce));
path_init(PyC_UnicodeAsBytes(path, &path_coerce),
PyC_UnicodeAsBytes(user_path, &user_path_coerce));
Py_XDECREF(path_coerce);
Py_XDECREF(user_path_coerce);

View File

@@ -404,7 +404,7 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
* point we know that we've got everything to render current view layer.
*/
/* At the moment we only free if we are not doing multi-view
* (or if we are rendering the last view). See T58142/D4239 for discussion.
* (or if we are rendering the last view). See #58142/D4239 for discussion.
*/
if (view_index == num_views - 1) {
free_blender_memory_if_possible();

View File

@@ -26,7 +26,7 @@
CCL_NAMESPACE_BEGIN
typedef map<void *, ShaderInput *> PtrInputMap;
typedef unordered_multimap<void *, ShaderInput *> PtrInputMap;
typedef map<void *, ShaderOutput *> PtrOutputMap;
typedef map<string, ConvertNode *> ProxyMap;
@@ -1251,7 +1251,9 @@ static void add_nodes(Scene *scene,
ConvertNode *proxy = graph->create_node<ConvertNode>(to_socket_type, to_socket_type, true);
input_map[b_link.from_socket().ptr.data] = proxy->inputs[0];
/* Muted nodes can result in multiple Cycles input sockets mapping to the same Blender
* input socket, so this needs to be a multimap. */
input_map.emplace(b_link.from_socket().ptr.data, proxy->inputs[0]);
output_map[b_link.to_socket().ptr.data] = proxy->outputs[0];
graph->add(proxy);
@@ -1286,7 +1288,7 @@ static void add_nodes(Scene *scene,
/* register the proxy node for internal binding */
group_proxy_input_map[b_input.identifier()] = proxy;
input_map[b_input.ptr.data] = proxy->inputs[0];
input_map.emplace(b_input.ptr.data, proxy->inputs[0]);
set_default_value(proxy->inputs[0], b_input, b_data, b_ntree);
}
@@ -1338,7 +1340,7 @@ static void add_nodes(Scene *scene,
if (proxy_it != proxy_output_map.end()) {
ConvertNode *proxy = proxy_it->second;
input_map[b_input.ptr.data] = proxy->inputs[0];
input_map.emplace(b_input.ptr.data, proxy->inputs[0]);
set_default_value(proxy->inputs[0], b_input, b_data, b_ntree);
}
@@ -1369,7 +1371,7 @@ static void add_nodes(Scene *scene,
/* XXX should not happen, report error? */
continue;
}
input_map[b_input.ptr.data] = input;
input_map.emplace(b_input.ptr.data, input);
set_default_value(input, b_input, b_data, b_ntree);
}
@@ -1401,20 +1403,23 @@ static void add_nodes(Scene *scene,
BL::NodeSocket b_from_sock = b_link.from_socket();
BL::NodeSocket b_to_sock = b_link.to_socket();
ShaderOutput *output = 0;
ShaderInput *input = 0;
ShaderOutput *output = nullptr;
PtrOutputMap::iterator output_it = output_map.find(b_from_sock.ptr.data);
if (output_it != output_map.end())
output = output_it->second;
PtrInputMap::iterator input_it = input_map.find(b_to_sock.ptr.data);
if (input_it != input_map.end())
input = input_it->second;
/* either node may be NULL when the node was not exported, typically
/* either socket may be NULL when the node was not exported, typically
* because the node type is not supported */
if (output && input)
graph->connect(output, input);
if (output != nullptr) {
ShaderOutput *output = output_it->second;
auto inputs = input_map.equal_range(b_to_sock.ptr.data);
for (PtrInputMap::iterator input_it = inputs.first; input_it != inputs.second; ++input_it) {
ShaderInput *input = input_it->second;
if (input != nullptr) {
graph->connect(output, input);
}
}
}
}
}

View File

@@ -766,7 +766,7 @@ void BlenderSync::free_data_after_sync(BL::Depsgraph &b_depsgraph)
(BlenderSession::headless || is_interface_locked) &&
/* Baking re-uses the depsgraph multiple times, clearing crashes
* reading un-evaluated mesh data which isn't aligned with the
* geometry we're baking, see T71012. */
* geometry we're baking, see #71012. */
!scene->bake_manager->get_baking() &&
/* Persistent data must main caches for performance and correctness. */
!is_persistent_data;

View File

@@ -42,12 +42,15 @@ endif()
###########################################################################
if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
find_package(HIP)
set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES)
set(WITH_CYCLES_HIP_BINARIES OFF)
message(STATUS "HIP temporarily disabled due to compiler bugs")
if(HIP_FOUND)
message(STATUS "Found HIP ${HIP_HIPCC_EXECUTABLE} (${HIP_VERSION})")
endif()
# find_package(HIP)
# set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES)
# if(HIP_FOUND)
# message(STATUS "Found HIP ${HIP_HIPCC_EXECUTABLE} (${HIP_VERSION})")
# endif()
endif()
if(NOT WITH_HIP_DYNLOAD)

View File

@@ -111,8 +111,10 @@ macro(cycles_external_libraries_append libraries)
endif()
if(WITH_OPENIMAGEDENOISE)
list(APPEND ${libraries} ${OPENIMAGEDENOISE_LIBRARIES})
if(APPLE AND "${CMAKE_OSX_ARCHITECTURES}" STREQUAL "arm64")
list(APPEND ${libraries} "-framework Accelerate")
if(APPLE)
if("${CMAKE_OSX_ARCHITECTURES}" STREQUAL "arm64")
list(APPEND ${libraries} "-framework Accelerate")
endif()
endif()
endif()
if(WITH_ALEMBIC)
@@ -136,7 +138,15 @@ macro(cycles_external_libraries_append libraries)
${PYTHON_LIBRARIES}
${ZLIB_LIBRARIES}
${CMAKE_DL_LIBS}
${PTHREADS_LIBRARIES}
)
if(DEFINED PTHREADS_LIBRARIES)
list(APPEND ${libraries}
${PTHREADS_LIBRARIES}
)
endif()
list(APPEND ${libraries}
${PLATFORM_LINKLIBS}
)

View File

@@ -45,9 +45,7 @@ string device_cpu_capabilities()
{
string capabilities = "";
capabilities += system_cpu_support_sse2() ? "SSE2 " : "";
capabilities += system_cpu_support_sse3() ? "SSE3 " : "";
capabilities += system_cpu_support_sse41() ? "SSE41 " : "";
capabilities += system_cpu_support_avx() ? "AVX " : "";
capabilities += system_cpu_support_avx2() ? "AVX2" : "";
if (capabilities[capabilities.size() - 1] == ' ')
capabilities.resize(capabilities.size() - 1);

View File

@@ -9,8 +9,7 @@ CCL_NAMESPACE_BEGIN
#define KERNEL_FUNCTIONS(name) \
KERNEL_NAME_EVAL(cpu, name), KERNEL_NAME_EVAL(cpu_sse2, name), \
KERNEL_NAME_EVAL(cpu_sse3, name), KERNEL_NAME_EVAL(cpu_sse41, name), \
KERNEL_NAME_EVAL(cpu_avx, name), KERNEL_NAME_EVAL(cpu_avx2, name)
KERNEL_NAME_EVAL(cpu_sse41, name), KERNEL_NAME_EVAL(cpu_avx2, name)
#define REGISTER_KERNEL(name) name(KERNEL_FUNCTIONS(name))
#define REGISTER_KERNEL_FILM_CONVERT(name) \

View File

@@ -17,13 +17,10 @@ template<typename FunctionType> class CPUKernelFunction {
public:
CPUKernelFunction(FunctionType kernel_default,
FunctionType kernel_sse2,
FunctionType kernel_sse3,
FunctionType kernel_sse41,
FunctionType kernel_avx,
FunctionType kernel_avx2)
{
kernel_info_ = get_best_kernel_info(
kernel_default, kernel_sse2, kernel_sse3, kernel_sse41, kernel_avx, kernel_avx2);
kernel_info_ = get_best_kernel_info(kernel_default, kernel_sse2, kernel_sse41, kernel_avx2);
}
template<typename... Args> inline auto operator()(Args... args) const
@@ -60,16 +57,12 @@ template<typename FunctionType> class CPUKernelFunction {
KernelInfo get_best_kernel_info(FunctionType kernel_default,
FunctionType kernel_sse2,
FunctionType kernel_sse3,
FunctionType kernel_sse41,
FunctionType kernel_avx,
FunctionType kernel_avx2)
{
/* Silence warnings about unused variables when compiling without some architectures. */
(void)kernel_sse2;
(void)kernel_sse3;
(void)kernel_sse41;
(void)kernel_avx;
(void)kernel_avx2;
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
@@ -78,24 +71,12 @@ template<typename FunctionType> class CPUKernelFunction {
}
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
if (DebugFlags().cpu.has_avx() && system_cpu_support_avx()) {
return KernelInfo("AVX", kernel_avx);
}
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
if (DebugFlags().cpu.has_sse41() && system_cpu_support_sse41()) {
return KernelInfo("SSE4.1", kernel_sse41);
}
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
if (DebugFlags().cpu.has_sse3() && system_cpu_support_sse3()) {
return KernelInfo("SSE3", kernel_sse3);
}
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
if (DebugFlags().cpu.has_sse2() && system_cpu_support_sse2()) {
return KernelInfo("SSE2", kernel_sse2);

View File

@@ -53,8 +53,12 @@ void CUDADevice::set_error(const string &error)
}
CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
: Device(info, stats, profiler), texture_info(this, "texture_info", MEM_GLOBAL)
: GPUDevice(info, stats, profiler)
{
/* Verify that base class types can be used with specific backend types */
static_assert(sizeof(texMemObject) == sizeof(CUtexObject));
static_assert(sizeof(arrayMemObject) == sizeof(CUarray));
first_error = true;
cuDevId = info.num;
@@ -65,12 +69,6 @@ CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
need_texture_info = false;
device_texture_headroom = 0;
device_working_headroom = 0;
move_texture_to_host = false;
map_host_limit = 0;
map_host_used = 0;
can_map_host = 0;
pitch_alignment = 0;
/* Initialize CUDA. */
@@ -91,8 +89,9 @@ CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
/* CU_CTX_MAP_HOST for mapping host memory when out of device memory.
* CU_CTX_LMEM_RESIZE_TO_MAX for reserving local memory ahead of render,
* so we can predict which memory to map to host. */
cuda_assert(
cuDeviceGetAttribute(&can_map_host, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuDevice));
int value;
cuda_assert(cuDeviceGetAttribute(&value, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuDevice));
can_map_host = value != 0;
cuda_assert(cuDeviceGetAttribute(
&pitch_alignment, CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, cuDevice));
@@ -499,311 +498,57 @@ void CUDADevice::reserve_local_memory(const uint kernel_features)
# endif
}
void CUDADevice::init_host_memory()
{
/* Limit amount of host mapped memory, because allocating too much can
* cause system instability. Leave at least half or 4 GB of system
* memory free, whichever is smaller. */
size_t default_limit = 4 * 1024 * 1024 * 1024LL;
size_t system_ram = system_physical_ram();
if (system_ram > 0) {
if (system_ram / 2 > default_limit) {
map_host_limit = system_ram - default_limit;
}
else {
map_host_limit = system_ram / 2;
}
}
else {
VLOG_WARNING << "Mapped host memory disabled, failed to get system RAM";
map_host_limit = 0;
}
/* Amount of device memory to keep is free after texture memory
* and working memory allocations respectively. We set the working
* memory limit headroom lower so that some space is left after all
* texture memory allocations. */
device_working_headroom = 32 * 1024 * 1024LL; // 32MB
device_texture_headroom = 128 * 1024 * 1024LL; // 128MB
VLOG_INFO << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
<< " bytes. (" << string_human_readable_size(map_host_limit) << ")";
}
void CUDADevice::load_texture_info()
{
if (need_texture_info) {
/* Unset flag before copying, so this does not loop indefinitely if the copy below calls
* into 'move_textures_to_host' (which calls 'load_texture_info' again). */
need_texture_info = false;
texture_info.copy_to_device();
}
}
void CUDADevice::move_textures_to_host(size_t size, bool for_texture)
{
/* Break out of recursive call, which can happen when moving memory on a multi device. */
static bool any_device_moving_textures_to_host = false;
if (any_device_moving_textures_to_host) {
return;
}
/* Signal to reallocate textures in host memory only. */
move_texture_to_host = true;
while (size > 0) {
/* Find suitable memory allocation to move. */
device_memory *max_mem = NULL;
size_t max_size = 0;
bool max_is_image = false;
thread_scoped_lock lock(cuda_mem_map_mutex);
foreach (CUDAMemMap::value_type &pair, cuda_mem_map) {
device_memory &mem = *pair.first;
CUDAMem *cmem = &pair.second;
/* Can only move textures allocated on this device (and not those from peer devices).
* And need to ignore memory that is already on the host. */
if (!mem.is_resident(this) || cmem->use_mapped_host) {
continue;
}
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) &&
(&mem != &texture_info);
bool is_image = is_texture && (mem.data_height > 1);
/* Can't move this type of memory. */
if (!is_texture || cmem->array) {
continue;
}
/* For other textures, only move image textures. */
if (for_texture && !is_image) {
continue;
}
/* Try to move largest allocation, prefer moving images. */
if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
max_is_image = is_image;
max_size = mem.device_size;
max_mem = &mem;
}
}
lock.unlock();
/* Move to host memory. This part is mutex protected since
* multiple CUDA devices could be moving the memory. The
* first one will do it, and the rest will adopt the pointer. */
if (max_mem) {
VLOG_WORK << "Move memory from device to host: " << max_mem->name;
static thread_mutex move_mutex;
thread_scoped_lock lock(move_mutex);
any_device_moving_textures_to_host = true;
/* Potentially need to call back into multi device, so pointer mapping
* and peer devices are updated. This is also necessary since the device
* pointer may just be a key here, so cannot be accessed and freed directly.
* Unfortunately it does mean that memory is reallocated on all other
* devices as well, which is potentially dangerous when still in use (since
* a thread rendering on another devices would only be caught in this mutex
* if it so happens to do an allocation at the same time as well. */
max_mem->device_copy_to();
size = (max_size >= size) ? 0 : size - max_size;
any_device_moving_textures_to_host = false;
}
else {
break;
}
}
/* Unset flag before texture info is reloaded, since it should stay in device memory. */
move_texture_to_host = false;
/* Update texture info array with new pointers. */
load_texture_info();
}
CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_padding)
void CUDADevice::get_device_memory_info(size_t &total, size_t &free)
{
CUDAContextScope scope(this);
CUdeviceptr device_pointer = 0;
size_t size = mem.memory_size() + pitch_padding;
CUresult mem_alloc_result = CUDA_ERROR_OUT_OF_MEMORY;
const char *status = "";
/* First try allocating in device memory, respecting headroom. We make
* an exception for texture info. It is small and frequently accessed,
* so treat it as working memory.
*
* If there is not enough room for working memory, we will try to move
* textures to host memory, assuming the performance impact would have
* been worse for working memory. */
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && (&mem != &texture_info);
bool is_image = is_texture && (mem.data_height > 1);
size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
size_t total = 0, free = 0;
cuMemGetInfo(&free, &total);
/* Move textures to host memory if needed. */
if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
move_textures_to_host(size + headroom - free, is_texture);
cuMemGetInfo(&free, &total);
}
/* Allocate in device memory. */
if (!move_texture_to_host && (size + headroom) < free) {
mem_alloc_result = cuMemAlloc(&device_pointer, size);
if (mem_alloc_result == CUDA_SUCCESS) {
status = " in device memory";
}
}
/* Fall back to mapped host memory if needed and possible. */
void *shared_pointer = 0;
if (mem_alloc_result != CUDA_SUCCESS && can_map_host && mem.type != MEM_DEVICE_ONLY) {
if (mem.shared_pointer) {
/* Another device already allocated host memory. */
mem_alloc_result = CUDA_SUCCESS;
shared_pointer = mem.shared_pointer;
}
else if (map_host_used + size < map_host_limit) {
/* Allocate host memory ourselves. */
mem_alloc_result = cuMemHostAlloc(
&shared_pointer, size, CU_MEMHOSTALLOC_DEVICEMAP | CU_MEMHOSTALLOC_WRITECOMBINED);
assert((mem_alloc_result == CUDA_SUCCESS && shared_pointer != 0) ||
(mem_alloc_result != CUDA_SUCCESS && shared_pointer == 0));
}
if (mem_alloc_result == CUDA_SUCCESS) {
cuda_assert(cuMemHostGetDevicePointer_v2(&device_pointer, shared_pointer, 0));
map_host_used += size;
status = " in host memory";
}
}
if (mem_alloc_result != CUDA_SUCCESS) {
if (mem.type == MEM_DEVICE_ONLY) {
status = " failed, out of device memory";
set_error("System is out of GPU memory");
}
else {
status = " failed, out of device and host memory";
set_error("System is out of GPU and shared host memory");
}
}
if (mem.name) {
VLOG_WORK << "Buffer allocate: " << mem.name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")" << status;
}
mem.device_pointer = (device_ptr)device_pointer;
mem.device_size = size;
stats.mem_alloc(size);
if (!mem.device_pointer) {
return NULL;
}
/* Insert into map of allocations. */
thread_scoped_lock lock(cuda_mem_map_mutex);
CUDAMem *cmem = &cuda_mem_map[&mem];
if (shared_pointer != 0) {
/* Replace host pointer with our host allocation. Only works if
* CUDA memory layout is the same and has no pitch padding. Also
* does not work if we move textures to host during a render,
* since other devices might be using the memory. */
if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
mem.host_pointer != shared_pointer) {
memcpy(shared_pointer, mem.host_pointer, size);
/* A Call to device_memory::host_free() should be preceded by
* a call to device_memory::device_free() for host memory
* allocated by a device to be handled properly. Two exceptions
* are here and a call in OptiXDevice::generic_alloc(), where
* the current host memory can be assumed to be allocated by
* device_memory::host_alloc(), not by a device */
mem.host_free();
mem.host_pointer = shared_pointer;
}
mem.shared_pointer = shared_pointer;
mem.shared_counter++;
cmem->use_mapped_host = true;
}
else {
cmem->use_mapped_host = false;
}
return cmem;
}
void CUDADevice::generic_copy_to(device_memory &mem)
bool CUDADevice::alloc_device(void *&device_pointer, size_t size)
{
if (!mem.host_pointer || !mem.device_pointer) {
return;
}
CUDAContextScope scope(this);
/* If use_mapped_host of mem is false, the current device only uses device memory allocated by
* cuMemAlloc regardless of mem.host_pointer and mem.shared_pointer, and should copy data from
* mem.host_pointer. */
thread_scoped_lock lock(cuda_mem_map_mutex);
if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
const CUDAContextScope scope(this);
cuda_assert(
cuMemcpyHtoD((CUdeviceptr)mem.device_pointer, mem.host_pointer, mem.memory_size()));
}
CUresult mem_alloc_result = cuMemAlloc((CUdeviceptr *)&device_pointer, size);
return mem_alloc_result == CUDA_SUCCESS;
}
void CUDADevice::generic_free(device_memory &mem)
void CUDADevice::free_device(void *device_pointer)
{
if (mem.device_pointer) {
CUDAContextScope scope(this);
thread_scoped_lock lock(cuda_mem_map_mutex);
DCHECK(cuda_mem_map.find(&mem) != cuda_mem_map.end());
const CUDAMem &cmem = cuda_mem_map[&mem];
CUDAContextScope scope(this);
/* If cmem.use_mapped_host is true, reference counting is used
* to safely free a mapped host memory. */
cuda_assert(cuMemFree((CUdeviceptr)device_pointer));
}
if (cmem.use_mapped_host) {
assert(mem.shared_pointer);
if (mem.shared_pointer) {
assert(mem.shared_counter > 0);
if (--mem.shared_counter == 0) {
if (mem.host_pointer == mem.shared_pointer) {
mem.host_pointer = 0;
}
cuMemFreeHost(mem.shared_pointer);
mem.shared_pointer = 0;
}
}
map_host_used -= mem.device_size;
}
else {
/* Free device memory. */
cuda_assert(cuMemFree(mem.device_pointer));
}
bool CUDADevice::alloc_host(void *&shared_pointer, size_t size)
{
CUDAContextScope scope(this);
stats.mem_free(mem.device_size);
mem.device_pointer = 0;
mem.device_size = 0;
CUresult mem_alloc_result = cuMemHostAlloc(
&shared_pointer, size, CU_MEMHOSTALLOC_DEVICEMAP | CU_MEMHOSTALLOC_WRITECOMBINED);
return mem_alloc_result == CUDA_SUCCESS;
}
cuda_mem_map.erase(cuda_mem_map.find(&mem));
}
void CUDADevice::free_host(void *shared_pointer)
{
CUDAContextScope scope(this);
cuMemFreeHost(shared_pointer);
}
bool CUDADevice::transform_host_pointer(void *&device_pointer, void *&shared_pointer)
{
CUDAContextScope scope(this);
cuda_assert(cuMemHostGetDevicePointer_v2((CUdeviceptr *)&device_pointer, shared_pointer, 0));
return true;
}
void CUDADevice::copy_host_to_device(void *device_pointer, void *host_pointer, size_t size)
{
const CUDAContextScope scope(this);
cuda_assert(cuMemcpyHtoD((CUdeviceptr)device_pointer, host_pointer, size));
}
void CUDADevice::mem_alloc(device_memory &mem)
@@ -868,8 +613,8 @@ void CUDADevice::mem_zero(device_memory &mem)
/* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory
* regardless of mem.host_pointer and mem.shared_pointer. */
thread_scoped_lock lock(cuda_mem_map_mutex);
if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
thread_scoped_lock lock(device_mem_map_mutex);
if (!device_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
const CUDAContextScope scope(this);
cuda_assert(cuMemsetD8((CUdeviceptr)mem.device_pointer, 0, mem.memory_size()));
}
@@ -994,19 +739,19 @@ void CUDADevice::tex_alloc(device_texture &mem)
return;
}
CUDAMem *cmem = NULL;
Mem *cmem = NULL;
CUarray array_3d = NULL;
size_t src_pitch = mem.data_width * dsize * mem.data_elements;
size_t dst_pitch = src_pitch;
if (!mem.is_resident(this)) {
thread_scoped_lock lock(cuda_mem_map_mutex);
cmem = &cuda_mem_map[&mem];
thread_scoped_lock lock(device_mem_map_mutex);
cmem = &device_mem_map[&mem];
cmem->texobject = 0;
if (mem.data_depth > 1) {
array_3d = (CUarray)mem.device_pointer;
cmem->array = array_3d;
cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
}
else if (mem.data_height > 0) {
dst_pitch = align_up(src_pitch, pitch_alignment);
@@ -1050,10 +795,10 @@ void CUDADevice::tex_alloc(device_texture &mem)
mem.device_size = size;
stats.mem_alloc(size);
thread_scoped_lock lock(cuda_mem_map_mutex);
cmem = &cuda_mem_map[&mem];
thread_scoped_lock lock(device_mem_map_mutex);
cmem = &device_mem_map[&mem];
cmem->texobject = 0;
cmem->array = array_3d;
cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
}
else if (mem.data_height > 0) {
/* 2D texture, using pitch aligned linear memory. */
@@ -1137,8 +882,8 @@ void CUDADevice::tex_alloc(device_texture &mem)
texDesc.filterMode = filter_mode;
texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
thread_scoped_lock lock(cuda_mem_map_mutex);
cmem = &cuda_mem_map[&mem];
thread_scoped_lock lock(device_mem_map_mutex);
cmem = &device_mem_map[&mem];
cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
@@ -1153,9 +898,9 @@ void CUDADevice::tex_free(device_texture &mem)
{
if (mem.device_pointer) {
CUDAContextScope scope(this);
thread_scoped_lock lock(cuda_mem_map_mutex);
DCHECK(cuda_mem_map.find(&mem) != cuda_mem_map.end());
const CUDAMem &cmem = cuda_mem_map[&mem];
thread_scoped_lock lock(device_mem_map_mutex);
DCHECK(device_mem_map.find(&mem) != device_mem_map.end());
const Mem &cmem = device_mem_map[&mem];
if (cmem.texobject) {
/* Free bindless texture. */
@@ -1164,16 +909,16 @@ void CUDADevice::tex_free(device_texture &mem)
if (!mem.is_resident(this)) {
/* Do not free memory here, since it was allocated on a different device. */
cuda_mem_map.erase(cuda_mem_map.find(&mem));
device_mem_map.erase(device_mem_map.find(&mem));
}
else if (cmem.array) {
/* Free array. */
cuArrayDestroy(cmem.array);
cuArrayDestroy(reinterpret_cast<CUarray>(cmem.array));
stats.mem_free(mem.device_size);
mem.device_pointer = 0;
mem.device_size = 0;
cuda_mem_map.erase(cuda_mem_map.find(&mem));
device_mem_map.erase(device_mem_map.find(&mem));
}
else {
lock.unlock();

View File

@@ -21,7 +21,7 @@ CCL_NAMESPACE_BEGIN
class DeviceQueue;
class CUDADevice : public Device {
class CUDADevice : public GPUDevice {
friend class CUDAContextScope;
@@ -29,36 +29,11 @@ class CUDADevice : public Device {
CUdevice cuDevice;
CUcontext cuContext;
CUmodule cuModule;
size_t device_texture_headroom;
size_t device_working_headroom;
bool move_texture_to_host;
size_t map_host_used;
size_t map_host_limit;
int can_map_host;
int pitch_alignment;
int cuDevId;
int cuDevArchitecture;
bool first_error;
struct CUDAMem {
CUDAMem() : texobject(0), array(0), use_mapped_host(false)
{
}
CUtexObject texobject;
CUarray array;
/* If true, a mapped host memory in shared_pointer is being used. */
bool use_mapped_host;
};
typedef map<device_memory *, CUDAMem> CUDAMemMap;
CUDAMemMap cuda_mem_map;
thread_mutex cuda_mem_map_mutex;
/* Bindless Textures */
device_vector<TextureInfo> texture_info;
bool need_texture_info;
CUDADeviceKernels kernels;
static bool have_precompiled_kernels();
@@ -88,17 +63,13 @@ class CUDADevice : public Device {
void reserve_local_memory(const uint kernel_features);
void init_host_memory();
void load_texture_info();
void move_textures_to_host(size_t size, bool for_texture);
CUDAMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
void generic_copy_to(device_memory &mem);
void generic_free(device_memory &mem);
virtual void get_device_memory_info(size_t &total, size_t &free) override;
virtual bool alloc_device(void *&device_pointer, size_t size) override;
virtual void free_device(void *device_pointer) override;
virtual bool alloc_host(void *&shared_pointer, size_t size) override;
virtual void free_host(void *shared_pointer) override;
virtual bool transform_host_pointer(void *&device_pointer, void *&shared_pointer) override;
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size) override;
void mem_alloc(device_memory &mem) override;

View File

@@ -452,6 +452,320 @@ void *Device::get_cpu_osl_memory()
return nullptr;
}
GPUDevice::~GPUDevice() noexcept(false)
{
}
bool GPUDevice::load_texture_info()
{
if (need_texture_info) {
/* Unset flag before copying, so this does not loop indefinitely if the copy below calls
* into 'move_textures_to_host' (which calls 'load_texture_info' again). */
need_texture_info = false;
texture_info.copy_to_device();
return true;
}
else {
return false;
}
}
void GPUDevice::init_host_memory(size_t preferred_texture_headroom,
size_t preferred_working_headroom)
{
/* Limit amount of host mapped memory, because allocating too much can
* cause system instability. Leave at least half or 4 GB of system
* memory free, whichever is smaller. */
size_t default_limit = 4 * 1024 * 1024 * 1024LL;
size_t system_ram = system_physical_ram();
if (system_ram > 0) {
if (system_ram / 2 > default_limit) {
map_host_limit = system_ram - default_limit;
}
else {
map_host_limit = system_ram / 2;
}
}
else {
VLOG_WARNING << "Mapped host memory disabled, failed to get system RAM";
map_host_limit = 0;
}
/* Amount of device memory to keep free after texture memory
* and working memory allocations respectively. We set the working
* memory limit headroom lower than the working one so there
* is space left for it. */
device_working_headroom = preferred_working_headroom > 0 ? preferred_working_headroom :
32 * 1024 * 1024LL; // 32MB
device_texture_headroom = preferred_texture_headroom > 0 ? preferred_texture_headroom :
128 * 1024 * 1024LL; // 128MB
VLOG_INFO << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
<< " bytes. (" << string_human_readable_size(map_host_limit) << ")";
}
void GPUDevice::move_textures_to_host(size_t size, bool for_texture)
{
/* Break out of recursive call, which can happen when moving memory on a multi device. */
static bool any_device_moving_textures_to_host = false;
if (any_device_moving_textures_to_host) {
return;
}
/* Signal to reallocate textures in host memory only. */
move_texture_to_host = true;
while (size > 0) {
/* Find suitable memory allocation to move. */
device_memory *max_mem = NULL;
size_t max_size = 0;
bool max_is_image = false;
thread_scoped_lock lock(device_mem_map_mutex);
foreach (MemMap::value_type &pair, device_mem_map) {
device_memory &mem = *pair.first;
Mem *cmem = &pair.second;
/* Can only move textures allocated on this device (and not those from peer devices).
* And need to ignore memory that is already on the host. */
if (!mem.is_resident(this) || cmem->use_mapped_host) {
continue;
}
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) &&
(&mem != &texture_info);
bool is_image = is_texture && (mem.data_height > 1);
/* Can't move this type of memory. */
if (!is_texture || cmem->array) {
continue;
}
/* For other textures, only move image textures. */
if (for_texture && !is_image) {
continue;
}
/* Try to move largest allocation, prefer moving images. */
if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
max_is_image = is_image;
max_size = mem.device_size;
max_mem = &mem;
}
}
lock.unlock();
/* Move to host memory. This part is mutex protected since
* multiple backend devices could be moving the memory. The
* first one will do it, and the rest will adopt the pointer. */
if (max_mem) {
VLOG_WORK << "Move memory from device to host: " << max_mem->name;
static thread_mutex move_mutex;
thread_scoped_lock lock(move_mutex);
any_device_moving_textures_to_host = true;
/* Potentially need to call back into multi device, so pointer mapping
* and peer devices are updated. This is also necessary since the device
* pointer may just be a key here, so cannot be accessed and freed directly.
* Unfortunately it does mean that memory is reallocated on all other
* devices as well, which is potentially dangerous when still in use (since
* a thread rendering on another devices would only be caught in this mutex
* if it so happens to do an allocation at the same time as well. */
max_mem->device_copy_to();
size = (max_size >= size) ? 0 : size - max_size;
any_device_moving_textures_to_host = false;
}
else {
break;
}
}
/* Unset flag before texture info is reloaded, since it should stay in device memory. */
move_texture_to_host = false;
/* Update texture info array with new pointers. */
load_texture_info();
}
GPUDevice::Mem *GPUDevice::generic_alloc(device_memory &mem, size_t pitch_padding)
{
void *device_pointer = 0;
size_t size = mem.memory_size() + pitch_padding;
bool mem_alloc_result = false;
const char *status = "";
/* First try allocating in device memory, respecting headroom. We make
* an exception for texture info. It is small and frequently accessed,
* so treat it as working memory.
*
* If there is not enough room for working memory, we will try to move
* textures to host memory, assuming the performance impact would have
* been worse for working memory. */
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && (&mem != &texture_info);
bool is_image = is_texture && (mem.data_height > 1);
size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
size_t total = 0, free = 0;
get_device_memory_info(total, free);
/* Move textures to host memory if needed. */
if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
move_textures_to_host(size + headroom - free, is_texture);
get_device_memory_info(total, free);
}
/* Allocate in device memory. */
if (!move_texture_to_host && (size + headroom) < free) {
mem_alloc_result = alloc_device(device_pointer, size);
if (mem_alloc_result) {
device_mem_in_use += size;
status = " in device memory";
}
}
/* Fall back to mapped host memory if needed and possible. */
void *shared_pointer = 0;
if (!mem_alloc_result && can_map_host && mem.type != MEM_DEVICE_ONLY) {
if (mem.shared_pointer) {
/* Another device already allocated host memory. */
mem_alloc_result = true;
shared_pointer = mem.shared_pointer;
}
else if (map_host_used + size < map_host_limit) {
/* Allocate host memory ourselves. */
mem_alloc_result = alloc_host(shared_pointer, size);
assert((mem_alloc_result && shared_pointer != 0) ||
(!mem_alloc_result && shared_pointer == 0));
}
if (mem_alloc_result) {
assert(transform_host_pointer(device_pointer, shared_pointer));
map_host_used += size;
status = " in host memory";
}
}
if (!mem_alloc_result) {
if (mem.type == MEM_DEVICE_ONLY) {
status = " failed, out of device memory";
set_error("System is out of GPU memory");
}
else {
status = " failed, out of device and host memory";
set_error("System is out of GPU and shared host memory");
}
}
if (mem.name) {
VLOG_WORK << "Buffer allocate: " << mem.name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")" << status;
}
mem.device_pointer = (device_ptr)device_pointer;
mem.device_size = size;
stats.mem_alloc(size);
if (!mem.device_pointer) {
return NULL;
}
/* Insert into map of allocations. */
thread_scoped_lock lock(device_mem_map_mutex);
Mem *cmem = &device_mem_map[&mem];
if (shared_pointer != 0) {
/* Replace host pointer with our host allocation. Only works if
* memory layout is the same and has no pitch padding. Also
* does not work if we move textures to host during a render,
* since other devices might be using the memory. */
if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
mem.host_pointer != shared_pointer) {
memcpy(shared_pointer, mem.host_pointer, size);
/* A Call to device_memory::host_free() should be preceded by
* a call to device_memory::device_free() for host memory
* allocated by a device to be handled properly. Two exceptions
* are here and a call in OptiXDevice::generic_alloc(), where
* the current host memory can be assumed to be allocated by
* device_memory::host_alloc(), not by a device */
mem.host_free();
mem.host_pointer = shared_pointer;
}
mem.shared_pointer = shared_pointer;
mem.shared_counter++;
cmem->use_mapped_host = true;
}
else {
cmem->use_mapped_host = false;
}
return cmem;
}
void GPUDevice::generic_free(device_memory &mem)
{
if (mem.device_pointer) {
thread_scoped_lock lock(device_mem_map_mutex);
DCHECK(device_mem_map.find(&mem) != device_mem_map.end());
const Mem &cmem = device_mem_map[&mem];
/* If cmem.use_mapped_host is true, reference counting is used
* to safely free a mapped host memory. */
if (cmem.use_mapped_host) {
assert(mem.shared_pointer);
if (mem.shared_pointer) {
assert(mem.shared_counter > 0);
if (--mem.shared_counter == 0) {
if (mem.host_pointer == mem.shared_pointer) {
mem.host_pointer = 0;
}
free_host(mem.shared_pointer);
mem.shared_pointer = 0;
}
}
map_host_used -= mem.device_size;
}
else {
/* Free device memory. */
free_device((void *)mem.device_pointer);
device_mem_in_use -= mem.device_size;
}
stats.mem_free(mem.device_size);
mem.device_pointer = 0;
mem.device_size = 0;
device_mem_map.erase(device_mem_map.find(&mem));
}
}
void GPUDevice::generic_copy_to(device_memory &mem)
{
if (!mem.host_pointer || !mem.device_pointer) {
return;
}
/* If use_mapped_host of mem is false, the current device only uses device memory allocated by
* backend device allocation regardless of mem.host_pointer and mem.shared_pointer, and should
* copy data from mem.host_pointer. */
thread_scoped_lock lock(device_mem_map_mutex);
if (!device_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
copy_host_to_device((void *)mem.device_pointer, mem.host_pointer, mem.memory_size());
}
}
/* DeviceInfo */
CCL_NAMESPACE_END

View File

@@ -309,6 +309,93 @@ class Device {
static uint devices_initialized_mask;
};
/* Device, which is GPU, with some common functionality for GPU backends */
class GPUDevice : public Device {
protected:
GPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_)
: Device(info_, stats_, profiler_),
texture_info(this, "texture_info", MEM_GLOBAL),
need_texture_info(false),
can_map_host(false),
map_host_used(0),
map_host_limit(0),
device_texture_headroom(0),
device_working_headroom(0),
device_mem_map(),
device_mem_map_mutex(),
move_texture_to_host(false),
device_mem_in_use(0)
{
}
public:
virtual ~GPUDevice() noexcept(false);
/* For GPUs that can use bindless textures in some way or another. */
device_vector<TextureInfo> texture_info;
bool need_texture_info;
/* Returns true if the texture info was copied to the device (meaning, some more
* re-initialization might be needed). */
virtual bool load_texture_info();
protected:
/* Memory allocation, only accessed through device_memory. */
friend class device_memory;
bool can_map_host;
size_t map_host_used;
size_t map_host_limit;
size_t device_texture_headroom;
size_t device_working_headroom;
typedef unsigned long long texMemObject;
typedef unsigned long long arrayMemObject;
struct Mem {
Mem() : texobject(0), array(0), use_mapped_host(false)
{
}
texMemObject texobject;
arrayMemObject array;
/* If true, a mapped host memory in shared_pointer is being used. */
bool use_mapped_host;
};
typedef map<device_memory *, Mem> MemMap;
MemMap device_mem_map;
thread_mutex device_mem_map_mutex;
bool move_texture_to_host;
/* Simple counter which will try to track amount of used device memory */
size_t device_mem_in_use;
virtual void init_host_memory(size_t preferred_texture_headroom = 0,
size_t preferred_working_headroom = 0);
virtual void move_textures_to_host(size_t size, bool for_texture);
/* Allocation, deallocation and copy functions, with corresponding
* support of device/host allocations. */
virtual GPUDevice::Mem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
virtual void generic_free(device_memory &mem);
virtual void generic_copy_to(device_memory &mem);
/* total - amount of device memory, free - amount of available device memory */
virtual void get_device_memory_info(size_t &total, size_t &free) = 0;
virtual bool alloc_device(void *&device_pointer, size_t size) = 0;
virtual void free_device(void *device_pointer) = 0;
virtual bool alloc_host(void *&shared_pointer, size_t size) = 0;
virtual void free_host(void *shared_pointer) = 0;
/* This function should return device pointer corresponding to shared pointer, which
* is host buffer, allocated in `alloc_host`. The function should `true`, if such
* address transformation is possible and `false` otherwise. */
virtual bool transform_host_pointer(void *&device_pointer, void *&shared_pointer) = 0;
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size) = 0;
};
CCL_NAMESPACE_END
#endif /* __DEVICE_H__ */

View File

@@ -53,8 +53,12 @@ void HIPDevice::set_error(const string &error)
}
HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
: Device(info, stats, profiler), texture_info(this, "texture_info", MEM_GLOBAL)
: GPUDevice(info, stats, profiler)
{
/* Verify that base class types can be used with specific backend types */
static_assert(sizeof(texMemObject) == sizeof(hipTextureObject_t));
static_assert(sizeof(arrayMemObject) == sizeof(hArray));
first_error = true;
hipDevId = info.num;
@@ -65,12 +69,6 @@ HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
need_texture_info = false;
device_texture_headroom = 0;
device_working_headroom = 0;
move_texture_to_host = false;
map_host_limit = 0;
map_host_used = 0;
can_map_host = 0;
pitch_alignment = 0;
/* Initialize HIP. */
@@ -91,7 +89,9 @@ HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
/* hipDeviceMapHost for mapping host memory when out of device memory.
* hipDeviceLmemResizeToMax for reserving local memory ahead of render,
* so we can predict which memory to map to host. */
hip_assert(hipDeviceGetAttribute(&can_map_host, hipDeviceAttributeCanMapHostMemory, hipDevice));
int value;
hip_assert(hipDeviceGetAttribute(&value, hipDeviceAttributeCanMapHostMemory, hipDevice));
can_map_host = value != 0;
hip_assert(
hipDeviceGetAttribute(&pitch_alignment, hipDeviceAttributeTexturePitchAlignment, hipDevice));
@@ -460,305 +460,58 @@ void HIPDevice::reserve_local_memory(const uint kernel_features)
# endif
}
void HIPDevice::init_host_memory()
{
/* Limit amount of host mapped memory, because allocating too much can
* cause system instability. Leave at least half or 4 GB of system
* memory free, whichever is smaller. */
size_t default_limit = 4 * 1024 * 1024 * 1024LL;
size_t system_ram = system_physical_ram();
if (system_ram > 0) {
if (system_ram / 2 > default_limit) {
map_host_limit = system_ram - default_limit;
}
else {
map_host_limit = system_ram / 2;
}
}
else {
VLOG_WARNING << "Mapped host memory disabled, failed to get system RAM";
map_host_limit = 0;
}
/* Amount of device memory to keep is free after texture memory
* and working memory allocations respectively. We set the working
* memory limit headroom lower so that some space is left after all
* texture memory allocations. */
device_working_headroom = 32 * 1024 * 1024LL; // 32MB
device_texture_headroom = 128 * 1024 * 1024LL; // 128MB
VLOG_INFO << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
<< " bytes. (" << string_human_readable_size(map_host_limit) << ")";
}
void HIPDevice::load_texture_info()
{
if (need_texture_info) {
/* Unset flag before copying, so this does not loop indefinitely if the copy below calls
* into 'move_textures_to_host' (which calls 'load_texture_info' again). */
need_texture_info = false;
texture_info.copy_to_device();
}
}
void HIPDevice::move_textures_to_host(size_t size, bool for_texture)
{
/* Break out of recursive call, which can happen when moving memory on a multi device. */
static bool any_device_moving_textures_to_host = false;
if (any_device_moving_textures_to_host) {
return;
}
/* Signal to reallocate textures in host memory only. */
move_texture_to_host = true;
while (size > 0) {
/* Find suitable memory allocation to move. */
device_memory *max_mem = NULL;
size_t max_size = 0;
bool max_is_image = false;
thread_scoped_lock lock(hip_mem_map_mutex);
foreach (HIPMemMap::value_type &pair, hip_mem_map) {
device_memory &mem = *pair.first;
HIPMem *cmem = &pair.second;
/* Can only move textures allocated on this device (and not those from peer devices).
* And need to ignore memory that is already on the host. */
if (!mem.is_resident(this) || cmem->use_mapped_host) {
continue;
}
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) &&
(&mem != &texture_info);
bool is_image = is_texture && (mem.data_height > 1);
/* Can't move this type of memory. */
if (!is_texture || cmem->array) {
continue;
}
/* For other textures, only move image textures. */
if (for_texture && !is_image) {
continue;
}
/* Try to move largest allocation, prefer moving images. */
if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
max_is_image = is_image;
max_size = mem.device_size;
max_mem = &mem;
}
}
lock.unlock();
/* Move to host memory. This part is mutex protected since
* multiple HIP devices could be moving the memory. The
* first one will do it, and the rest will adopt the pointer. */
if (max_mem) {
VLOG_WORK << "Move memory from device to host: " << max_mem->name;
static thread_mutex move_mutex;
thread_scoped_lock lock(move_mutex);
any_device_moving_textures_to_host = true;
/* Potentially need to call back into multi device, so pointer mapping
* and peer devices are updated. This is also necessary since the device
* pointer may just be a key here, so cannot be accessed and freed directly.
* Unfortunately it does mean that memory is reallocated on all other
* devices as well, which is potentially dangerous when still in use (since
* a thread rendering on another devices would only be caught in this mutex
* if it so happens to do an allocation at the same time as well. */
max_mem->device_copy_to();
size = (max_size >= size) ? 0 : size - max_size;
any_device_moving_textures_to_host = false;
}
else {
break;
}
}
/* Unset flag before texture info is reloaded, since it should stay in device memory. */
move_texture_to_host = false;
/* Update texture info array with new pointers. */
load_texture_info();
}
HIPDevice::HIPMem *HIPDevice::generic_alloc(device_memory &mem, size_t pitch_padding)
void HIPDevice::get_device_memory_info(size_t &total, size_t &free)
{
HIPContextScope scope(this);
hipDeviceptr_t device_pointer = 0;
size_t size = mem.memory_size() + pitch_padding;
hipError_t mem_alloc_result = hipErrorOutOfMemory;
const char *status = "";
/* First try allocating in device memory, respecting headroom. We make
* an exception for texture info. It is small and frequently accessed,
* so treat it as working memory.
*
* If there is not enough room for working memory, we will try to move
* textures to host memory, assuming the performance impact would have
* been worse for working memory. */
bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && (&mem != &texture_info);
bool is_image = is_texture && (mem.data_height > 1);
size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
size_t total = 0, free = 0;
hipMemGetInfo(&free, &total);
/* Move textures to host memory if needed. */
if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
move_textures_to_host(size + headroom - free, is_texture);
hipMemGetInfo(&free, &total);
}
/* Allocate in device memory. */
if (!move_texture_to_host && (size + headroom) < free) {
mem_alloc_result = hipMalloc(&device_pointer, size);
if (mem_alloc_result == hipSuccess) {
status = " in device memory";
}
}
/* Fall back to mapped host memory if needed and possible. */
void *shared_pointer = 0;
if (mem_alloc_result != hipSuccess && can_map_host) {
if (mem.shared_pointer) {
/* Another device already allocated host memory. */
mem_alloc_result = hipSuccess;
shared_pointer = mem.shared_pointer;
}
else if (map_host_used + size < map_host_limit) {
/* Allocate host memory ourselves. */
mem_alloc_result = hipHostMalloc(
&shared_pointer, size, hipHostMallocMapped | hipHostMallocWriteCombined);
assert((mem_alloc_result == hipSuccess && shared_pointer != 0) ||
(mem_alloc_result != hipSuccess && shared_pointer == 0));
}
if (mem_alloc_result == hipSuccess) {
hip_assert(hipHostGetDevicePointer(&device_pointer, shared_pointer, 0));
map_host_used += size;
status = " in host memory";
}
}
if (mem_alloc_result != hipSuccess) {
status = " failed, out of device and host memory";
set_error("System is out of GPU and shared host memory");
}
if (mem.name) {
VLOG_WORK << "Buffer allocate: " << mem.name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")" << status;
}
mem.device_pointer = (device_ptr)device_pointer;
mem.device_size = size;
stats.mem_alloc(size);
if (!mem.device_pointer) {
return NULL;
}
/* Insert into map of allocations. */
thread_scoped_lock lock(hip_mem_map_mutex);
HIPMem *cmem = &hip_mem_map[&mem];
if (shared_pointer != 0) {
/* Replace host pointer with our host allocation. Only works if
* HIP memory layout is the same and has no pitch padding. Also
* does not work if we move textures to host during a render,
* since other devices might be using the memory. */
if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
mem.host_pointer != shared_pointer) {
memcpy(shared_pointer, mem.host_pointer, size);
/* A Call to device_memory::host_free() should be preceded by
* a call to device_memory::device_free() for host memory
* allocated by a device to be handled properly. Two exceptions
* are here and a call in OptiXDevice::generic_alloc(), where
* the current host memory can be assumed to be allocated by
* device_memory::host_alloc(), not by a device */
mem.host_free();
mem.host_pointer = shared_pointer;
}
mem.shared_pointer = shared_pointer;
mem.shared_counter++;
cmem->use_mapped_host = true;
}
else {
cmem->use_mapped_host = false;
}
return cmem;
}
void HIPDevice::generic_copy_to(device_memory &mem)
bool HIPDevice::alloc_device(void *&device_pointer, size_t size)
{
if (!mem.host_pointer || !mem.device_pointer) {
return;
}
HIPContextScope scope(this);
/* If use_mapped_host of mem is false, the current device only uses device memory allocated by
* hipMalloc regardless of mem.host_pointer and mem.shared_pointer, and should copy data from
* mem.host_pointer. */
thread_scoped_lock lock(hip_mem_map_mutex);
if (!hip_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
const HIPContextScope scope(this);
hip_assert(
hipMemcpyHtoD((hipDeviceptr_t)mem.device_pointer, mem.host_pointer, mem.memory_size()));
}
hipError_t mem_alloc_result = hipMalloc((hipDeviceptr_t *)&device_pointer, size);
return mem_alloc_result == hipSuccess;
}
void HIPDevice::generic_free(device_memory &mem)
void HIPDevice::free_device(void *device_pointer)
{
if (mem.device_pointer) {
HIPContextScope scope(this);
thread_scoped_lock lock(hip_mem_map_mutex);
DCHECK(hip_mem_map.find(&mem) != hip_mem_map.end());
const HIPMem &cmem = hip_mem_map[&mem];
HIPContextScope scope(this);
/* If cmem.use_mapped_host is true, reference counting is used
* to safely free a mapped host memory. */
hip_assert(hipFree((hipDeviceptr_t)device_pointer));
}
if (cmem.use_mapped_host) {
assert(mem.shared_pointer);
if (mem.shared_pointer) {
assert(mem.shared_counter > 0);
if (--mem.shared_counter == 0) {
if (mem.host_pointer == mem.shared_pointer) {
mem.host_pointer = 0;
}
hipHostFree(mem.shared_pointer);
mem.shared_pointer = 0;
}
}
map_host_used -= mem.device_size;
}
else {
/* Free device memory. */
hip_assert(hipFree(mem.device_pointer));
}
bool HIPDevice::alloc_host(void *&shared_pointer, size_t size)
{
HIPContextScope scope(this);
stats.mem_free(mem.device_size);
mem.device_pointer = 0;
mem.device_size = 0;
hipError_t mem_alloc_result = hipHostMalloc(
&shared_pointer, size, hipHostMallocMapped | hipHostMallocWriteCombined);
hip_mem_map.erase(hip_mem_map.find(&mem));
}
return mem_alloc_result == hipSuccess;
}
void HIPDevice::free_host(void *shared_pointer)
{
HIPContextScope scope(this);
hipHostFree(shared_pointer);
}
bool HIPDevice::transform_host_pointer(void *&device_pointer, void *&shared_pointer)
{
HIPContextScope scope(this);
hip_assert(hipHostGetDevicePointer((hipDeviceptr_t *)&device_pointer, shared_pointer, 0));
return true;
}
void HIPDevice::copy_host_to_device(void *device_pointer, void *host_pointer, size_t size)
{
const HIPContextScope scope(this);
hip_assert(hipMemcpyHtoD((hipDeviceptr_t)device_pointer, host_pointer, size));
}
void HIPDevice::mem_alloc(device_memory &mem)
@@ -823,8 +576,8 @@ void HIPDevice::mem_zero(device_memory &mem)
/* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory
* regardless of mem.host_pointer and mem.shared_pointer. */
thread_scoped_lock lock(hip_mem_map_mutex);
if (!hip_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
thread_scoped_lock lock(device_mem_map_mutex);
if (!device_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
const HIPContextScope scope(this);
hip_assert(hipMemsetD8((hipDeviceptr_t)mem.device_pointer, 0, mem.memory_size()));
}
@@ -951,19 +704,19 @@ void HIPDevice::tex_alloc(device_texture &mem)
return;
}
HIPMem *cmem = NULL;
Mem *cmem = NULL;
hArray array_3d = NULL;
size_t src_pitch = mem.data_width * dsize * mem.data_elements;
size_t dst_pitch = src_pitch;
if (!mem.is_resident(this)) {
thread_scoped_lock lock(hip_mem_map_mutex);
cmem = &hip_mem_map[&mem];
thread_scoped_lock lock(device_mem_map_mutex);
cmem = &device_mem_map[&mem];
cmem->texobject = 0;
if (mem.data_depth > 1) {
array_3d = (hArray)mem.device_pointer;
cmem->array = array_3d;
cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
}
else if (mem.data_height > 0) {
dst_pitch = align_up(src_pitch, pitch_alignment);
@@ -1007,10 +760,10 @@ void HIPDevice::tex_alloc(device_texture &mem)
mem.device_size = size;
stats.mem_alloc(size);
thread_scoped_lock lock(hip_mem_map_mutex);
cmem = &hip_mem_map[&mem];
thread_scoped_lock lock(device_mem_map_mutex);
cmem = &device_mem_map[&mem];
cmem->texobject = 0;
cmem->array = array_3d;
cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
}
else if (mem.data_height > 0) {
/* 2D texture, using pitch aligned linear memory. */
@@ -1095,8 +848,8 @@ void HIPDevice::tex_alloc(device_texture &mem)
texDesc.filterMode = filter_mode;
texDesc.flags = HIP_TRSF_NORMALIZED_COORDINATES;
thread_scoped_lock lock(hip_mem_map_mutex);
cmem = &hip_mem_map[&mem];
thread_scoped_lock lock(device_mem_map_mutex);
cmem = &device_mem_map[&mem];
hip_assert(hipTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
@@ -1111,9 +864,9 @@ void HIPDevice::tex_free(device_texture &mem)
{
if (mem.device_pointer) {
HIPContextScope scope(this);
thread_scoped_lock lock(hip_mem_map_mutex);
DCHECK(hip_mem_map.find(&mem) != hip_mem_map.end());
const HIPMem &cmem = hip_mem_map[&mem];
thread_scoped_lock lock(device_mem_map_mutex);
DCHECK(device_mem_map.find(&mem) != device_mem_map.end());
const Mem &cmem = device_mem_map[&mem];
if (cmem.texobject) {
/* Free bindless texture. */
@@ -1122,16 +875,16 @@ void HIPDevice::tex_free(device_texture &mem)
if (!mem.is_resident(this)) {
/* Do not free memory here, since it was allocated on a different device. */
hip_mem_map.erase(hip_mem_map.find(&mem));
device_mem_map.erase(device_mem_map.find(&mem));
}
else if (cmem.array) {
/* Free array. */
hipArrayDestroy(cmem.array);
hipArrayDestroy(reinterpret_cast<hArray>(cmem.array));
stats.mem_free(mem.device_size);
mem.device_pointer = 0;
mem.device_size = 0;
hip_mem_map.erase(hip_mem_map.find(&mem));
device_mem_map.erase(device_mem_map.find(&mem));
}
else {
lock.unlock();
@@ -1153,7 +906,7 @@ bool HIPDevice::should_use_graphics_interop()
* possible, but from the empiric measurements it can be considerably slower than using naive
* pixels copy. */
/* Disable graphics interop for now, because of driver bug in 21.40. See T92972 */
/* Disable graphics interop for now, because of driver bug in 21.40. See #92972 */
# if 0
HIPContextScope scope(this);

View File

@@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN
class DeviceQueue;
class HIPDevice : public Device {
class HIPDevice : public GPUDevice {
friend class HIPContextScope;
@@ -26,36 +26,11 @@ class HIPDevice : public Device {
hipDevice_t hipDevice;
hipCtx_t hipContext;
hipModule_t hipModule;
size_t device_texture_headroom;
size_t device_working_headroom;
bool move_texture_to_host;
size_t map_host_used;
size_t map_host_limit;
int can_map_host;
int pitch_alignment;
int hipDevId;
int hipDevArchitecture;
bool first_error;
struct HIPMem {
HIPMem() : texobject(0), array(0), use_mapped_host(false)
{
}
hipTextureObject_t texobject;
hArray array;
/* If true, a mapped host memory in shared_pointer is being used. */
bool use_mapped_host;
};
typedef map<device_memory *, HIPMem> HIPMemMap;
HIPMemMap hip_mem_map;
thread_mutex hip_mem_map_mutex;
/* Bindless Textures */
device_vector<TextureInfo> texture_info;
bool need_texture_info;
HIPDeviceKernels kernels;
static bool have_precompiled_kernels();
@@ -81,17 +56,13 @@ class HIPDevice : public Device {
virtual bool load_kernels(const uint kernel_features) override;
void reserve_local_memory(const uint kernel_features);
void init_host_memory();
void load_texture_info();
void move_textures_to_host(size_t size, bool for_texture);
HIPMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
void generic_copy_to(device_memory &mem);
void generic_free(device_memory &mem);
virtual void get_device_memory_info(size_t &total, size_t &free) override;
virtual bool alloc_device(void *&device_pointer, size_t size) override;
virtual void free_device(void *device_pointer) override;
virtual bool alloc_host(void *&shared_pointer, size_t size) override;
virtual void free_host(void *shared_pointer) override;
virtual bool transform_host_pointer(void *&device_pointer, void *&shared_pointer) override;
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size) override;
void mem_alloc(device_memory &mem) override;

View File

@@ -73,6 +73,10 @@ const char *device_kernel_as_string(DeviceKernel kernel)
return "integrator_terminated_paths_array";
case DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY:
return "integrator_sorted_paths_array";
case DEVICE_KERNEL_INTEGRATOR_SORT_BUCKET_PASS:
return "integrator_sort_bucket_pass";
case DEVICE_KERNEL_INTEGRATOR_SORT_WRITE_PASS:
return "integrator_sort_write_pass";
case DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY:
return "integrator_compact_paths_array";
case DEVICE_KERNEL_INTEGRATOR_COMPACT_STATES:

View File

@@ -247,6 +247,8 @@ class device_memory {
bool is_resident(Device *sub_device) const;
protected:
friend class Device;
friend class GPUDevice;
friend class CUDADevice;
friend class OptiXDevice;
friend class HIPDevice;

View File

@@ -21,6 +21,7 @@ class BVHMetal : public BVH {
API_AVAILABLE(macos(11.0))
vector<id<MTLAccelerationStructure>> blas_array;
vector<uint32_t> blas_lookup;
bool motion_blur = false;

View File

@@ -816,6 +816,11 @@ bool BVHMetal::build_TLAS(Progress &progress,
uint32_t instance_index = 0;
uint32_t motion_transform_index = 0;
// allocate look up buffer for wost case scenario
uint64_t count = objects.size();
blas_lookup.resize(count);
for (Object *ob : objects) {
/* Skip non-traceable objects */
if (!ob->is_traceable())
@@ -843,12 +848,15 @@ bool BVHMetal::build_TLAS(Progress &progress,
/* Set user instance ID to object index */
int object_index = ob->get_device_index();
uint32_t user_id = uint32_t(object_index);
int currIndex = instance_index++;
assert(user_id < blas_lookup.size());
blas_lookup[user_id] = accel_struct_index;
/* Bake into the appropriate descriptor */
if (motion_blur) {
MTLAccelerationStructureMotionInstanceDescriptor *instances =
(MTLAccelerationStructureMotionInstanceDescriptor *)[instanceBuf contents];
MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[instance_index++];
MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[currIndex];
desc.accelerationStructureIndex = accel_struct_index;
desc.userID = user_id;
@@ -894,7 +902,7 @@ bool BVHMetal::build_TLAS(Progress &progress,
else {
MTLAccelerationStructureUserIDInstanceDescriptor *instances =
(MTLAccelerationStructureUserIDInstanceDescriptor *)[instanceBuf contents];
MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[instance_index++];
MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[currIndex];
desc.accelerationStructureIndex = accel_struct_index;
desc.userID = user_id;

Some files were not shown because too many files have changed in this diff Show More