Compare commits

..

19 Commits

Author SHA1 Message Date
01f509232b Merge branch 'master' into refactor-mesh-sharp-edge-generic 2023-01-10 13:05:56 -05:00
5609f61bb3 Merge branch 'master' into refactor-mesh-sharp-edge-generic 2023-01-09 13:16:58 -05:00
a357adf6cd Remove dot from attribute name 2023-01-04 23:58:13 -05:00
b18fd83883 Cleanup: Use static cast 2023-01-04 22:54:43 -05:00
4c2c114324 Use VArraySpan for sharp edge face set conversion 2023-01-04 22:54:35 -05:00
e31f5fde34 Fix sharp edge data transfer modifier 2023-01-04 22:47:20 -05:00
cbbab81b9e Cleanup: Use helper variable for better formatting 2023-01-04 22:47:06 -05:00
5fce94f825 Move code to make diff smaller 2023-01-04 22:46:50 -05:00
2e87364e3c Fix sharp tagging in Curve to Mesh conversion 2023-01-04 22:18:18 -05:00
8fa7bcb59a Fix bad BMesh Mesh conversion logic 2023-01-04 22:18:05 -05:00
d9f71d3133 Fix crash in BMesh Mesh conversion 2023-01-04 20:55:38 -05:00
892fbf5ab6 Merge branch 'master' into refactor-mesh-sharp-edge-generic 2023-01-04 20:27:53 -05:00
c998d56b1e Add missing sharp edge handling when converting to and from BMesh 2023-01-03 22:34:26 -05:00
0178d3e4bb Add missing attribute provider 2023-01-03 22:32:35 -05:00
8e81e3b229 Fix variable name 2023-01-03 22:32:22 -05:00
155ec559fb Merge branch 'master' into refactor-mesh-sharp-edge-generic 2023-01-03 20:29:16 -05:00
f4292febb4 Various fixes 2023-01-03 20:28:02 -05:00
c5331f134d Merge branch 'master' into refactor-mesh-sharp-edge-generic 2023-01-03 20:04:51 -05:00
4e3b1ef861 Mesh: Move sharp edges to generic attribute
Initial compiling version, many tests don't pass yet.
2023-01-03 17:22:19 -05:00
1132 changed files with 17657 additions and 29461 deletions

View File

@@ -1,41 +0,0 @@
name: Bug Report
about: File a bug report
labels:
- bug
body:
- type: markdown
attributes:
value: |
### Instructions
First time reporting? See [tips](https://wiki.blender.org/wiki/Process/Bug_Reports) and [walkthrough video](https://www.youtube.com/watch?v=JTD0OJq_rF4).
* Use **Help > Report a Bug** in Blender to fill system information and exact Blender version.
* Test [daily builds](https://builder.blender.org/) to verify if the issue is already fixed.
* Test [previous versions](https://download.blender.org/release/) to find an older working version.
* For feature requests, feedback, questions or build issues, see [communication channels](https://wiki.blender.org/wiki/Communication/Contact#User_Feedback_and_Requests).
* If there are multiple bugs, make multiple bug reports.
- type: textarea
id: body
attributes:
label: "Description"
value: |
**System Information**
Operating system:
Graphics card:
**Blender Version**
Broken: (example: 2.80, edbf15d3c044, master, 2018-11-28, as found on the splash screen)
Worked: (newest version of Blender that worked as expected)
**Short description of error**
**Exact steps for others to reproduce the error**
Based on the default startup or an attached .blend file (as simple as possible).
- type: markdown
attributes:
value: |
### Help the developers
Bug fixing is important, the developers will handle reports swiftly. For that reason, carefully provide exact steps and a **small and simple .blend file** to reproduce the problem. You do your half of the work, then we do our half!

View File

@@ -1 +0,0 @@
blank_issues_enabled: false

View File

@@ -1,9 +0,0 @@
name: Design
about: Create a design task (for developers only)
labels:
- design
body:
- type: textarea
id: body
attributes:
label: "Description"

View File

@@ -1,9 +0,0 @@
name: To Do
about: Create a to do task (for developers only)
labels:
- todo
body:
- type: textarea
id: body
attributes:
label: "Description"

View File

@@ -1,20 +0,0 @@
name: Pull Request
about: Contribute code to Blender
body:
- type: markdown
attributes:
value: |
### Instructions
Guides to [contributing code](https://wiki.blender.org/index.php/Dev:Doc/Process/Contributing_Code) and effective [code review](https://wiki.blender.org/index.php/Dev:Doc/Tools/Code_Review).
By submitting code here, you agree that the code is (compatible with) GNU GPL v2 or later.
- type: textarea
id: body
attributes:
label: "Description"
value: |
Description of the problem that is addressed in the patch.
Description of the proposed solution and its implementation.

View File

@@ -399,26 +399,6 @@ mark_as_advanced(WITH_SYSTEM_GLOG)
# Freestyle
option(WITH_FREESTYLE "Enable Freestyle (advanced edges rendering)" ON)
# Libraries.
if(UNIX AND NOT APPLE)
# Optionally build without pre-compiled libraries.
# NOTE: this could be supported on all platforms however in practice UNIX is the only platform
# that has good support for detecting installed libraries.
option(WITH_LIBS_PRECOMPILED "\
Detect and link against pre-compiled libraries (typically found under \"../lib/\"). \
Disabling this option will use the system libraries although cached paths \
that point to pre-compiled libraries will be left as-is."
ON
)
mark_as_advanced(WITH_LIBS_PRECOMPILED)
option(WITH_STATIC_LIBS "Try to link with static libraries, as much as possible, to make blender more portable across distributions" OFF)
if(WITH_STATIC_LIBS)
option(WITH_BOOST_ICU "Boost uses ICU library (required for linking with static Boost built with libicu)." OFF)
mark_as_advanced(WITH_BOOST_ICU)
endif()
endif()
# Misc
if(WIN32 OR APPLE)
option(WITH_INPUT_IME "Enable Input Method Editor (IME) for complex Asian character input" ON)
@@ -426,6 +406,11 @@ endif()
option(WITH_INPUT_NDOF "Enable NDOF input devices (SpaceNavigator and friends)" ON)
if(UNIX AND NOT APPLE)
option(WITH_INSTALL_PORTABLE "Install redistributable runtime, otherwise install into CMAKE_INSTALL_PREFIX" ON)
option(WITH_STATIC_LIBS "Try to link with static libraries, as much as possible, to make blender more portable across distributions" OFF)
if(WITH_STATIC_LIBS)
option(WITH_BOOST_ICU "Boost uses ICU library (required for linking with static Boost built with libicu)." OFF)
mark_as_advanced(WITH_BOOST_ICU)
endif()
endif()
option(WITH_PYTHON_INSTALL "Copy system python into the blender install folder" ON)
@@ -506,7 +491,7 @@ endif()
if(NOT APPLE)
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" ON)
option(WITH_CYCLES_HIP_BINARIES "Build Cycles AMD HIP binaries" OFF)
set(CYCLES_HIP_BINARIES_ARCH gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx906 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
endif()
@@ -620,6 +605,10 @@ else()
set(WITH_METAL_BACKEND OFF)
endif()
if(WITH_METAL_BACKEND)
set(CMAKE_OSX_DEPLOYMENT_TARGET "10.15" CACHE STRING "Minimum OS X deployment version" FORCE)
endif()
if(WIN32)
getDefaultWindowsPrefixBase(CMAKE_GENERIC_PROGRAM_FILES)
set(CPACK_INSTALL_PREFIX ${CMAKE_GENERIC_PROGRAM_FILES}/${})
@@ -1008,8 +997,6 @@ set(PLATFORM_LINKLIBS "")
# - CMAKE_EXE_LINKER_FLAGS_DEBUG
set(PLATFORM_LINKFLAGS "")
set(PLATFORM_LINKFLAGS_DEBUG "")
set(PLATFORM_LINKFLAGS_RELEASE "")
set(PLATFORM_LINKFLAGS_EXECUTABLE "")
if(NOT CMAKE_BUILD_TYPE MATCHES "Release")
if(WITH_COMPILER_ASAN)
@@ -1279,14 +1266,12 @@ endif()
# -----------------------------------------------------------------------------
# Configure Bullet
if(WITH_BULLET)
if(WITH_SYSTEM_BULLET)
find_package(Bullet)
set_and_warn_library_found("Bullet" BULLET_FOUND WITH_BULLET)
else()
set(BULLET_INCLUDE_DIRS "${CMAKE_SOURCE_DIR}/extern/bullet2/src")
set(BULLET_LIBRARIES "extern_bullet")
endif()
if(WITH_BULLET AND WITH_SYSTEM_BULLET)
find_package(Bullet)
set_and_warn_library_found("Bullet" BULLET_FOUND WITH_BULLET)
else()
set(BULLET_INCLUDE_DIRS "${CMAKE_SOURCE_DIR}/extern/bullet2/src")
# set(BULLET_LIBRARIES "")
endif()
@@ -1446,9 +1431,6 @@ if(CMAKE_COMPILER_IS_GNUCC)
add_check_c_compiler_flag(C_WARNINGS C_WARN_TYPE_LIMITS -Wtype-limits)
add_check_c_compiler_flag(C_WARNINGS C_WARN_FORMAT_SIGN -Wformat-signedness)
add_check_c_compiler_flag(C_WARNINGS C_WARN_RESTRICT -Wrestrict)
# Useful but too many false positives and inconvenient to suppress each occurrence.
add_check_c_compiler_flag(C_WARNINGS C_WARN_NO_STRINGOP_OVERREAD -Wno-stringop-overread)
add_check_c_compiler_flag(C_WARNINGS C_WARN_NO_STRINGOP_OVERFLOW -Wno-stringop-overflow)
# C-only.
add_check_c_compiler_flag(C_WARNINGS C_WARN_NO_NULL -Wnonnull)
@@ -1488,9 +1470,6 @@ if(CMAKE_COMPILER_IS_GNUCC)
add_check_cxx_compiler_flag(CXX_WARNINGS CXX_WARN_RESTRICT -Wrestrict)
add_check_cxx_compiler_flag(CXX_WARNINGS CXX_WARN_NO_SUGGEST_OVERRIDE -Wno-suggest-override)
add_check_cxx_compiler_flag(CXX_WARNINGS CXX_WARN_UNINITIALIZED -Wuninitialized)
# Useful but too many false positives and inconvenient to suppress each occurrence.
add_check_cxx_compiler_flag(CXX_WARNINGS CXX_WARN_NO_STRINGOP_OVERREAD -Wno-stringop-overread)
add_check_cxx_compiler_flag(CXX_WARNINGS CXX_WARN_NO_STRINGOP_OVERFLOW -Wno-stringop-overflow)
# causes too many warnings
if(NOT APPLE)

View File

@@ -1,38 +0,0 @@
<!--
Keep this document short & concise,
linking to external resources instead of including content in-line.
See 'release/text/readme.html' for the end user read-me.
-->
Blender
=======
Blender is the free and open source 3D creation suite.
It supports the entirety of the 3D pipeline-modeling, rigging, animation, simulation, rendering, compositing,
motion tracking and video editing.
![Blender screenshot](https://code.blender.org/wp-content/uploads/2018/12/springrg.jpg "Blender screenshot")
Project Pages
-------------
- [Main Website](http://www.blender.org)
- [Reference Manual](https://docs.blender.org/manual/en/latest/index.html)
- [User Community](https://www.blender.org/community/)
Development
-----------
- [Build Instructions](https://wiki.blender.org/wiki/Building_Blender)
- [Code Review & Bug Tracker](https://developer.blender.org)
- [Developer Forum](https://devtalk.blender.org)
- [Developer Documentation](https://wiki.blender.org)
License
-------
Blender as a whole is licensed under the GNU General Public License, Version 3.
Individual files may have a different, but compatible license.
See [blender.org/about/license](https://www.blender.org/about/license) for details.

View File

@@ -29,7 +29,7 @@ elseif(UNIX)
set(USD_PLATFORM_FLAGS
-DPYTHON_INCLUDE_DIR=${LIBDIR}/python/include/python${PYTHON_SHORT_VERSION}/
-DPYTHON_LIBRARY=${LIBDIR}/tbb/lib/${LIBPREFIX}${TBB_LIBRARY}${SHAREDLIBEXT}
)
)
if(APPLE)
set(USD_SHARED_LINKER_FLAGS "-Xlinker -undefined -Xlinker dynamic_lookup")

View File

@@ -19,13 +19,9 @@ ENDIF()
SET(_moltenvk_SEARCH_DIRS
${MOLTENVK_ROOT_DIR}
${LIBDIR}/vulkan/MoltenVK
)
# FIXME: These finder modules typically don't use LIBDIR,
# this should be set by `./build_files/cmake/platform/` instead.
IF(DEFINED LIBDIR)
SET(_moltenvk_SEARCH_DIRS ${_moltenvk_SEARCH_DIRS} ${LIBDIR}/vulkan/MoltenVK)
ENDIF()
FIND_PATH(MOLTENVK_INCLUDE_DIR
NAMES

View File

@@ -17,13 +17,9 @@ ENDIF()
SET(_optix_SEARCH_DIRS
${OPTIX_ROOT_DIR}
"$ENV{PROGRAMDATA}/NVIDIA Corporation/OptiX SDK 7.3.0"
)
# TODO: Which environment uses this?
if(DEFINED ENV{PROGRAMDATA})
list(APPEND _optix_SEARCH_DIRS "$ENV{PROGRAMDATA}/NVIDIA Corporation/OptiX SDK 7.3.0")
endif()
FIND_PATH(OPTIX_INCLUDE_DIR
NAMES
optix.h

View File

@@ -67,8 +67,6 @@ ENDIF()
STRING(REPLACE "." "" PYTHON_VERSION_NO_DOTS ${PYTHON_VERSION})
SET(_PYTHON_ABI_FLAGS "")
SET(_python_SEARCH_DIRS
${PYTHON_ROOT_DIR}
"$ENV{HOME}/py${PYTHON_VERSION_NO_DOTS}"

View File

@@ -6,80 +6,18 @@
import re
import sys
from typing import Optional
cmakelists_file = sys.argv[-1]
def count_backslashes_before_pos(file_data: str, pos: int) -> int:
slash_count = 0
pos -= 1
while pos >= 0:
if file_data[pos] != '\\':
break
pos -= 1
slash_count += 1
return slash_count
def extract_cmake_string_at_pos(file_data: str, pos_beg: int) -> Optional[str]:
assert file_data[pos_beg - 1] == '"'
pos = pos_beg
# Dummy assignment.
pos_end = pos_beg
while True:
pos_next = file_data.find('"', pos)
if pos_next == -1:
raise Exception("Un-terminated string (parse error?)")
count_slashes = count_backslashes_before_pos(file_data, pos_next)
if (count_slashes % 2) == 0:
pos_end = pos_next
# Found the closing quote.
break
# The quote was back-slash escaped, step over it.
pos = pos_next + 1
file_data[pos_next]
assert file_data[pos_end] == '"'
if pos_beg == pos_end:
return None
# See: https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#escape-sequences
text = file_data[pos_beg: pos_end].replace(
# Handle back-slash literals.
"\\\\", "\\",
).replace(
# Handle tabs.
"\\t", "\t",
).replace(
# Handle escaped quotes.
"\\\"", "\"",
).replace(
# Handle tabs.
"\\;", ";",
).replace(
# Handle trailing newlines.
"\\\n", "",
)
return text
def main() -> None:
def main():
options = []
with open(cmakelists_file, 'r', encoding="utf-8") as fh:
file_data = fh.read()
for m in re.finditer(r"^\s*option\s*\(\s*(WITH_[a-zA-Z0-9_]+)\s+(\")", file_data, re.MULTILINE):
option_name = m.group(1)
option_descr = extract_cmake_string_at_pos(file_data, m.span(2)[1])
if option_descr is None:
# Possibly a parsing error, at least show something.
option_descr = "(UNDOCUMENTED)"
options.append("{:s}: {:s}".format(option_name, option_descr))
for l in open(cmakelists_file, 'r').readlines():
if not l.lstrip().startswith('#'):
l_option = re.sub(r'.*\boption\s*\(\s*(WITH_[a-zA-Z0-9_]+)\s+\"(.*)\"\s*.*', r'\g<1> - \g<2>', l)
if l_option != l:
l_option = l_option.strip()
if l_option.startswith('WITH_'):
options.append(l_option)
print('\n'.join(options))

View File

@@ -550,9 +550,7 @@ function(setup_platform_linker_libs
endif()
if(WIN32 AND NOT UNIX)
if(DEFINED PTHREADS_LIBRARIES)
target_link_libraries(${target} ${PTHREADS_LIBRARIES})
endif()
target_link_libraries(${target} ${PTHREADS_LIBRARIES})
endif()
# target_link_libraries(${target} ${PLATFORM_LINKLIBS} ${CMAKE_DL_LIBS})
@@ -1117,7 +1115,7 @@ function(find_python_package
# endif()
# Not set, so initialize.
else()
string(REPLACE "." ";" _PY_VER_SPLIT "${PYTHON_VERSION}")
string(REPLACE "." ";" _PY_VER_SPLIT "${PYTHON_VERSION}")
list(GET _PY_VER_SPLIT 0 _PY_VER_MAJOR)
# re-cache
@@ -1264,7 +1262,7 @@ endmacro()
# Utility to gather and install precompiled shared libraries.
macro(add_bundled_libraries library_dir)
if(DEFINED LIBDIR)
if(EXISTS ${LIBDIR})
set(_library_dir ${LIBDIR}/${library_dir})
if(WIN32)
file(GLOB _all_library_versions ${_library_dir}/*\.dll)
@@ -1277,7 +1275,7 @@ macro(add_bundled_libraries library_dir)
list(APPEND PLATFORM_BUNDLED_LIBRARY_DIRS ${_library_dir})
unset(_all_library_versions)
unset(_library_dir)
endif()
endif()
endmacro()
macro(windows_install_shared_manifest)

View File

@@ -155,8 +155,8 @@ if("${CMAKE_OSX_ARCHITECTURES}" STREQUAL "arm64")
# M1 chips run Big Sur onwards.
set(OSX_MIN_DEPLOYMENT_TARGET 11.00)
else()
# 10.15 is our min. target, if you use higher sdk, weak linking happens
set(OSX_MIN_DEPLOYMENT_TARGET 10.15)
# 10.13 is our min. target, if you use higher sdk, weak linking happens
set(OSX_MIN_DEPLOYMENT_TARGET 10.13)
endif()
set(CMAKE_OSX_DEPLOYMENT_TARGET "${OSX_MIN_DEPLOYMENT_TARGET}" CACHE STRING "" FORCE)

View File

@@ -1,12 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-or-later
# Copyright 2022 Blender Foundation. All rights reserved.
# Auto update existing CMake caches for new libraries.
# Assert that `LIBDIR` is defined.
if(NOT (DEFINED LIBDIR))
message(FATAL_ERROR "Logical error, expected 'LIBDIR' to be defined!")
endif()
# Auto update existing CMake caches for new libraries
# Clear cached variables whose name matches `pattern`.
function(unset_cache_variables pattern)

View File

@@ -4,52 +4,38 @@
# Libraries configuration for any *nix system including Linux and Unix (excluding APPLE).
# Detect precompiled library directory
if(NOT DEFINED LIBDIR)
# Path to a locally compiled libraries.
set(LIBDIR_NAME ${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR})
string(TOLOWER ${LIBDIR_NAME} LIBDIR_NAME)
set(LIBDIR_NATIVE_ABI ${CMAKE_SOURCE_DIR}/../lib/${LIBDIR_NAME})
if(NOT WITH_LIBS_PRECOMPILED)
unset(LIBDIR)
else()
if(NOT DEFINED LIBDIR)
# Path to a locally compiled libraries.
set(LIBDIR_NAME ${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR})
string(TOLOWER ${LIBDIR_NAME} LIBDIR_NAME)
set(LIBDIR_NATIVE_ABI ${CMAKE_SOURCE_DIR}/../lib/${LIBDIR_NAME})
# Path to precompiled libraries with known glibc 2.28 ABI.
set(LIBDIR_GLIBC228_ABI ${CMAKE_SOURCE_DIR}/../lib/linux_x86_64_glibc_228)
# Path to precompiled libraries with known glibc 2.28 ABI.
set(LIBDIR_GLIBC228_ABI ${CMAKE_SOURCE_DIR}/../lib/linux_x86_64_glibc_228)
# Choose the best suitable libraries.
if(EXISTS ${LIBDIR_NATIVE_ABI})
set(LIBDIR ${LIBDIR_NATIVE_ABI})
# Choose the best suitable libraries.
if(EXISTS ${LIBDIR_NATIVE_ABI})
set(LIBDIR ${LIBDIR_NATIVE_ABI})
set(WITH_LIBC_MALLOC_HOOK_WORKAROUND True)
elseif(EXISTS ${LIBDIR_GLIBC228_ABI})
set(LIBDIR ${LIBDIR_GLIBC228_ABI})
if(WITH_MEM_JEMALLOC)
# jemalloc provides malloc hooks.
set(WITH_LIBC_MALLOC_HOOK_WORKAROUND False)
else()
set(WITH_LIBC_MALLOC_HOOK_WORKAROUND True)
elseif(EXISTS ${LIBDIR_GLIBC228_ABI})
set(LIBDIR ${LIBDIR_GLIBC228_ABI})
if(WITH_MEM_JEMALLOC)
# jemalloc provides malloc hooks.
set(WITH_LIBC_MALLOC_HOOK_WORKAROUND False)
else()
set(WITH_LIBC_MALLOC_HOOK_WORKAROUND True)
endif()
endif()
# Avoid namespace pollustion.
unset(LIBDIR_NATIVE_ABI)
unset(LIBDIR_GLIBC228_ABI)
endif()
if(NOT (EXISTS ${LIBDIR}))
message(STATUS
"Unable to find LIBDIR: ${LIBDIR}, system libraries may be used "
"(disable WITH_LIBS_PRECOMPILED to suppress this message)."
)
unset(LIBDIR)
endif()
# Avoid namespace pollustion.
unset(LIBDIR_NATIVE_ABI)
unset(LIBDIR_GLIBC228_ABI)
endif()
# Support restoring this value once pre-compiled libraries have been handled.
set(WITH_STATIC_LIBS_INIT ${WITH_STATIC_LIBS})
if(DEFINED LIBDIR)
if(EXISTS ${LIBDIR})
message(STATUS "Using pre-compiled LIBDIR: ${LIBDIR}")
file(GLOB LIB_SUBDIRS ${LIBDIR}/*)
@@ -99,7 +85,7 @@ endmacro()
# These are libraries that may be precompiled. For this we disable searching in
# the system directories so that we don't accidentally use them instead.
if(DEFINED LIBDIR)
if(EXISTS ${LIBDIR})
without_system_libs_begin()
endif()
@@ -128,7 +114,7 @@ endfunction()
if(NOT WITH_SYSTEM_FREETYPE)
# FreeType compiled with Brotli compression for woff2.
find_package_wrapper(Freetype REQUIRED)
if(DEFINED LIBDIR)
if(EXISTS ${LIBDIR})
find_package_wrapper(Brotli REQUIRED)
# NOTE: This is done on WIN32 & APPLE but fails on some Linux systems.
@@ -155,7 +141,7 @@ if(WITH_PYTHON)
if(WITH_PYTHON_MODULE AND NOT WITH_INSTALL_PORTABLE)
# Installing into `site-packages`, warn when installing into `./../lib/`
# which script authors almost certainly don't want.
if(DEFINED LIBDIR)
if(EXISTS ${LIBDIR})
path_is_prefix(LIBDIR PYTHON_SITE_PACKAGES _is_prefix)
if(_is_prefix)
message(WARNING "
@@ -231,7 +217,7 @@ if(WITH_CODEC_SNDFILE)
endif()
if(WITH_CODEC_FFMPEG)
if(DEFINED LIBDIR)
if(EXISTS ${LIBDIR})
set(FFMPEG_ROOT_DIR ${LIBDIR}/ffmpeg)
# Override FFMPEG components to also include static library dependencies
# included with precompiled libraries, and to ensure correct link order.
@@ -246,7 +232,7 @@ if(WITH_CODEC_FFMPEG)
vpx
x264
xvidcore)
if((DEFINED LIBDIR) AND (EXISTS ${LIBDIR}/ffmpeg/lib/libaom.a))
if(EXISTS ${LIBDIR}/ffmpeg/lib/libaom.a)
list(APPEND FFMPEG_FIND_COMPONENTS aom)
endif()
elseif(FFMPEG)
@@ -444,13 +430,10 @@ if(WITH_OPENIMAGEIO)
${PNG_LIBRARIES}
${JPEG_LIBRARIES}
${ZLIB_LIBRARIES}
${BOOST_LIBRARIES}
)
set(OPENIMAGEIO_DEFINITIONS "")
if(WITH_BOOST)
list(APPEND OPENIMAGEIO_LIBRARIES "${BOOST_LIBRARIES}")
endif()
if(WITH_IMAGE_TIFF)
list(APPEND OPENIMAGEIO_LIBRARIES "${TIFF_LIBRARY}")
endif()
@@ -468,7 +451,7 @@ add_bundled_libraries(openimageio/lib)
if(WITH_OPENCOLORIO)
find_package_wrapper(OpenColorIO 2.0.0)
set(OPENCOLORIO_DEFINITIONS "")
set(OPENCOLORIO_DEFINITIONS)
set_and_warn_library_found("OpenColorIO" OPENCOLORIO_FOUND WITH_OPENCOLORIO)
endif()
add_bundled_libraries(opencolorio/lib)
@@ -483,7 +466,7 @@ if(WITH_OPENIMAGEDENOISE)
endif()
if(WITH_LLVM)
if(DEFINED LIBDIR)
if(EXISTS ${LIBDIR})
set(LLVM_STATIC ON)
endif()
@@ -497,7 +480,7 @@ if(WITH_LLVM)
endif()
# Symbol conflicts with same UTF library used by OpenCollada
if(DEFINED LIBDIR)
if(EXISTS ${LIBDIR})
if(WITH_OPENCOLLADA AND (${LLVM_VERSION} VERSION_LESS "4.0.0"))
list(REMOVE_ITEM OPENCOLLADA_LIBRARIES ${OPENCOLLADA_UTF_LIBRARY})
endif()
@@ -553,7 +536,7 @@ if(WITH_CYCLES AND WITH_CYCLES_PATH_GUIDING)
endif()
endif()
if(DEFINED LIBDIR)
if(EXISTS ${LIBDIR})
without_system_libs_end()
endif()
@@ -568,14 +551,9 @@ else()
endif()
find_package(Threads REQUIRED)
# `FindThreads` documentation notes that this may be empty
# with the system libraries provide threading functionality.
if(CMAKE_THREAD_LIBS_INIT)
list(APPEND PLATFORM_LINKLIBS ${CMAKE_THREAD_LIBS_INIT})
# used by other platforms
set(PTHREADS_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
endif()
list(APPEND PLATFORM_LINKLIBS ${CMAKE_THREAD_LIBS_INIT})
# used by other platforms
set(PTHREADS_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
if(CMAKE_DL_LIBS)
list(APPEND PLATFORM_LINKLIBS ${CMAKE_DL_LIBS})
@@ -597,7 +575,7 @@ add_definitions(-D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE
#
# Keep last, so indirectly linked libraries don't override our own pre-compiled libs.
if(DEFINED LIBDIR)
if(EXISTS ${LIBDIR})
# Clear the prefix path as it causes the `LIBDIR` to override system locations.
unset(CMAKE_PREFIX_PATH)
@@ -653,7 +631,7 @@ if(WITH_GHOST_WAYLAND)
# When dynamically linked WAYLAND is used and `${LIBDIR}/wayland` is present,
# there is no need to search for the libraries as they are not needed for building.
# Only the headers are needed which can reference the known paths.
if((DEFINED LIBDIR) AND (EXISTS "${LIBDIR}/wayland" AND WITH_GHOST_WAYLAND_DYNLOAD))
if(EXISTS "${LIBDIR}/wayland" AND WITH_GHOST_WAYLAND_DYNLOAD)
set(_use_system_wayland OFF)
else()
set(_use_system_wayland ON)
@@ -717,7 +695,7 @@ if(WITH_GHOST_WAYLAND)
add_definitions(-DWITH_GHOST_WAYLAND_LIBDECOR)
endif()
if((DEFINED LIBDIR) AND (EXISTS "${LIBDIR}/wayland/bin/wayland-scanner"))
if(EXISTS "${LIBDIR}/wayland/bin/wayland-scanner")
set(WAYLAND_SCANNER "${LIBDIR}/wayland/bin/wayland-scanner")
else()
pkg_get_variable(WAYLAND_SCANNER wayland-scanner wayland_scanner)

View File

@@ -43,10 +43,6 @@ update-code:
branch: trunk
commit_id: HEAD
path: lib/benchmarks
assets:
branch: trunk
commit_id: HEAD
path: lib/assets
#
# Buildbot only configs
@@ -63,7 +59,7 @@ buildbot:
optix:
version: '7.3.0'
ocloc:
version: '101.4032'
version: '101.3430'
cmake:
default:
version: any

View File

@@ -104,30 +104,17 @@ def svn_update(args: argparse.Namespace, release_version: Optional[str]) -> None
svn_url_tests = svn_url + lib_tests
call(svn_non_interactive + ["checkout", svn_url_tests, lib_tests_dirpath])
lib_assets = "assets"
lib_assets_dirpath = os.path.join(lib_dirpath, lib_assets)
if not os.path.exists(lib_assets_dirpath):
print_stage("Checking out Assets")
if make_utils.command_missing(args.svn_command):
sys.stderr.write("svn not found, can't checkout assets\n")
sys.exit(1)
svn_url_assets = svn_url + lib_assets
call(svn_non_interactive + ["checkout", svn_url_assets, lib_assets_dirpath])
# Update precompiled libraries, assets and tests
# Update precompiled libraries and tests
if not os.path.isdir(lib_dirpath):
print("Library path: %r, not found, skipping" % lib_dirpath)
else:
paths_local_and_remote = []
if os.path.exists(os.path.join(lib_dirpath, ".svn")):
print_stage("Updating Precompiled Libraries, Assets and Tests (one repository)")
print_stage("Updating Precompiled Libraries and Tests (one repository)")
paths_local_and_remote.append((lib_dirpath, svn_url))
else:
print_stage("Updating Precompiled Libraries, Assets and Tests (multiple repositories)")
print_stage("Updating Precompiled Libraries and Tests (multiple repositories)")
# Separate paths checked out.
for dirname in os.listdir(lib_dirpath):
if dirname.startswith("."):

View File

@@ -13,16 +13,16 @@ Blender stores 4 main arrays to define mesh geometry.
- :class:`Mesh.polygons`: (reference a range of loops)
Each polygon references a slice in the loop array, this way, polygons do not store vertices or corner data such as UVs directly,
Each polygon reference a slice in the loop array, this way, polygons do not store vertices or corner data such as UV's directly,
only a reference to loops that the polygon uses.
:class:`Mesh.loops`, :class:`Mesh.uv_layers` :class:`Mesh.vertex_colors` are all aligned so the same polygon loop
indices can be used to find the UVs and vertex colors as with as the vertices.
indices can be used to find the UV's and vertex colors as with as the vertices.
To compare mesh API options see: :ref:`NGons and Tessellation Faces <info_gotcha_mesh_faces>`
This example script prints the vertices and UVs for each polygon, assumes the active object is a mesh with UVs.
This example script prints the vertices and UV's for each polygon, assumes the active object is a mesh with UVs.
"""
import bpy

View File

@@ -2098,8 +2098,6 @@ def write_rst_types_index(basepath):
fw(title_string("Types (bpy.types)", "="))
fw(".. module:: bpy.types\n\n")
fw(".. toctree::\n")
# Only show top-level entries (avoids unreasonably large pages).
fw(" :maxdepth: 1\n")
fw(" :glob:\n\n")
fw(" bpy.types.*\n\n")
@@ -2126,8 +2124,6 @@ def write_rst_ops_index(basepath):
write_example_ref("", fw, "bpy.ops")
fw(".. toctree::\n")
fw(" :caption: Submodules\n")
# Only show top-level entries (avoids unreasonably large pages).
fw(" :maxdepth: 1\n")
fw(" :glob:\n\n")
fw(" bpy.ops.*\n\n")
file.close()

View File

@@ -513,19 +513,17 @@ if(WITH_FFTW)
src/fx/Convolver.cpp
src/fx/ConvolverReader.cpp
src/fx/ConvolverSound.cpp
src/fx/Equalizer.cpp
src/fx/FFTConvolver.cpp
src/fx/HRTF.cpp
src/fx/ImpulseResponse.cpp
src/util/FFTPlan.cpp
)
set(FFTW_HDR
include/fx/BinauralSound.h
include/fx/BinauralSound.h
include/fx/BinauralReader.h
include/fx/Convolver.h
include/fx/ConvolverReader.h
include/fx/ConvolverSound.h
include/fx/Equalizer.h
include/fx/FFTConvolver.h
include/fx/HRTF.h
include/fx/HRTFLoader.h

View File

@@ -54,7 +54,6 @@
#ifdef WITH_CONVOLUTION
#include "fx/BinauralSound.h"
#include "fx/ConvolverSound.h"
#include "fx/Equalizer.h"
#endif
#include <cassert>
@@ -769,14 +768,4 @@ AUD_API AUD_Sound* AUD_Sound_Binaural(AUD_Sound* sound, AUD_HRTF* hrtfs, AUD_Sou
}
}
AUD_API AUD_Sound* AUD_Sound_equalize(AUD_Sound* sound, float *definition, int size, float maxFreqEq, int sizeConversion)
{
assert(sound);
std::shared_ptr<Buffer> buf = std::shared_ptr<Buffer>(new Buffer(sizeof(float)*size));
std::memcpy(buf->getBuffer(), definition, sizeof(float)*size);
AUD_Sound *equalizer=new AUD_Sound(new Equalizer(*sound, buf, size, maxFreqEq, sizeConversion));
return equalizer;
}
#endif

View File

@@ -397,16 +397,6 @@ extern AUD_API AUD_Sound* AUD_Sound_mutable(AUD_Sound* sound);
#ifdef WITH_CONVOLUTION
extern AUD_API AUD_Sound* AUD_Sound_Convolver(AUD_Sound* sound, AUD_ImpulseResponse* filter, AUD_ThreadPool* threadPool);
extern AUD_API AUD_Sound* AUD_Sound_Binaural(AUD_Sound* sound, AUD_HRTF* hrtfs, AUD_Source* source, AUD_ThreadPool* threadPool);
/**
* Creates an Equalizer for the sound
* \param sound The handle of the sound
* \param definition buffer of size*sizeof(float) with the array of equalization values
* \param maxFreqEq Maximum frequency refered by the array
* \param sizeConversion Size of the transformation. Must be 2^number (for example 1024, 2048,...)
* \return A handle to the Equalizer refered to that sound
*/
extern AUD_API AUD_Sound* AUD_Sound_equalize(AUD_Sound* sound, float *definition, int size, float maxFreqEq, int sizeConversion);
#endif
#ifdef __cplusplus

View File

@@ -53,7 +53,6 @@ extern AUD_API AUD_Handle* AUD_pauseAfter(AUD_Handle* handle, double seconds);
* \param buffer The buffer to write to. Must have a size of 3*4*length.
* \param length How many samples to read from the sound.
* \param samples_per_second How many samples to read per second of the sound.
* \param interrupt Must point to a short that equals 0. If it is set to a non-zero value, the method will be interrupted and return 0.
* \return How many samples really have been read. Always <= length.
*/
extern AUD_API int AUD_readSound(AUD_Sound* sound, float* buffer, int length, int samples_per_second, short* interrupt);

View File

@@ -5,12 +5,12 @@ import os
import codecs
import numpy
from setuptools import setup, Extension
from distutils.core import setup, Extension
if len(sys.argv) > 2 and sys.argv[1] == '--build-docs':
import subprocess
from setuptools import Distribution
from setuptools.command.build import build
from distutils.core import Distribution
from distutils.command.build import build
dist = Distribution()
cmd = build(dist)

View File

@@ -1,106 +0,0 @@
/*******************************************************************************
* Copyright 2022 Marcos Perez Gonzalez
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
#pragma once
/**
* @file Equalizer.h
* @ingroup fx
* The Equalizer class.
*/
#include <memory>
#include <vector>
#include "ISound.h"
#include "ImpulseResponse.h"
AUD_NAMESPACE_BEGIN
class Buffer;
class ImpulseResponse;
/**
* This class represents a sound that can be modified depending on a given impulse response.
*/
class AUD_API Equalizer : public ISound
{
private:
/**
* A pointer to the imput sound.
*/
std::shared_ptr<ISound> m_sound;
/**
* Local definition of Equalizer
*/
std::shared_ptr<Buffer> m_bufEQ;
/**
* A pointer to the impulse response.
*/
std::shared_ptr<ImpulseResponse> m_impulseResponse;
/**
* delete copy constructor and operator=
*/
Equalizer(const Equalizer&) = delete;
Equalizer& operator=(const Equalizer&) = delete;
/**
* Create ImpulseResponse from the definition in the Buffer,
* using at the end a minimum phase change
*/
std::shared_ptr<ImpulseResponse> createImpulseResponse();
/**
* Create an Impulse Response with minimum phase distortion using Homomorphic
* The input is an Impulse Response
*/
std::shared_ptr<Buffer> minimumPhaseFilterHomomorphic(std::shared_ptr<Buffer> original, int lOriginal, int lWork);
/**
* Create an Impulse Response with minimum phase distortion using Hilbert
* The input is an Impulse Response
*/
std::shared_ptr<Buffer> minimumPhaseFilterHilbert(std::shared_ptr<Buffer> original, int lOriginal, int lWork);
public:
/**
* Creates a new Equalizer.
* \param sound The sound that will be equalized
*/
Equalizer(std::shared_ptr<ISound> sound, std::shared_ptr<Buffer> bufEQ, int externalSizeEq, float maxFreqEq, int sizeConversion);
virtual ~Equalizer();
virtual std::shared_ptr<IReader> createReader();
/*
* Length of the external equalizer definition. It must be the number of "float" positions of the Buffer
*/
int external_size_eq;
/*
* Length of the internal equalizer definition
*/
int filter_length;
/*
* Maximum frequency used in the equalizer definition
*/
float maxFreqEq;
};
AUD_NAMESPACE_END

View File

@@ -27,7 +27,7 @@
AUD_NAMESPACE_BEGIN
BinauralReader::BinauralReader(std::shared_ptr<IReader> reader, std::shared_ptr<HRTF> hrtfs, std::shared_ptr<Source> source, std::shared_ptr<ThreadPool> threadPool, std::shared_ptr<FFTPlan> plan) :
m_position(0), m_reader(reader), m_hrtfs(hrtfs), m_source(source), m_N(plan->getSize()), m_transition(false), m_transPos(CROSSFADE_SAMPLES*NUM_OUTCHANNELS), m_eosReader(false), m_eosTail(false), m_threadPool(threadPool)
m_reader(reader), m_hrtfs(hrtfs), m_source(source), m_N(plan->getSize()), m_threadPool(threadPool), m_position(0), m_eosReader(false), m_eosTail(false), m_transition(false), m_transPos(CROSSFADE_SAMPLES*NUM_OUTCHANNELS)
{
if(m_hrtfs->isEmpty())
AUD_THROW(StateException, "The provided HRTF object is empty");

View File

@@ -23,7 +23,7 @@
AUD_NAMESPACE_BEGIN
Convolver::Convolver(std::shared_ptr<std::vector<std::shared_ptr<std::vector<std::complex<sample_t>>>>> ir, int irLength, std::shared_ptr<ThreadPool> threadPool, std::shared_ptr<FFTPlan> plan) :
m_N(plan->getSize()), m_M(plan->getSize()/2), m_L(plan->getSize()/2), m_irBuffers(ir), m_numThreads(std::min(threadPool->getNumOfThreads(), static_cast<unsigned int>(m_irBuffers->size() - 1))), m_threadPool(threadPool), m_irLength(irLength), m_tailCounter(0), m_eos(false)
m_N(plan->getSize()), m_M(plan->getSize()/2), m_L(plan->getSize()/2), m_irBuffers(ir), m_irLength(irLength), m_threadPool(threadPool), m_numThreads(std::min(threadPool->getNumOfThreads(), static_cast<unsigned int>(m_irBuffers->size() - 1))), m_tailCounter(0), m_eos(false)
{
m_resetFlag = false;

View File

@@ -24,7 +24,7 @@
AUD_NAMESPACE_BEGIN
ConvolverReader::ConvolverReader(std::shared_ptr<IReader> reader, std::shared_ptr<ImpulseResponse> ir, std::shared_ptr<ThreadPool> threadPool, std::shared_ptr<FFTPlan> plan) :
m_position(0), m_reader(reader), m_ir(ir), m_N(plan->getSize()), m_eosReader(false), m_eosTail(false), m_inChannels(reader->getSpecs().channels), m_irChannels(ir->getSpecs().channels), m_threadPool(threadPool)
m_reader(reader), m_ir(ir), m_N(plan->getSize()), m_eosReader(false), m_eosTail(false), m_inChannels(reader->getSpecs().channels), m_irChannels(ir->getSpecs().channels), m_threadPool(threadPool), m_position(0)
{
m_nChannelThreads = std::min((int)threadPool->getNumOfThreads(), m_inChannels);
m_futures.resize(m_nChannelThreads);

View File

@@ -1,367 +0,0 @@
/*******************************************************************************
* Copyright 2022 Marcos Perez Gonzalez
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
#include "fx/Equalizer.h"
#include <chrono>
#include <cstring>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <memory>
#include <string>
#include "Exception.h"
#include "fx/ConvolverReader.h"
#include "fx/ImpulseResponse.h"
#include "util/Buffer.h"
#include "util/FFTPlan.h"
#include "util/ThreadPool.h"
AUD_NAMESPACE_BEGIN
Equalizer::Equalizer(std::shared_ptr<ISound> sound, std::shared_ptr<Buffer> bufEQ, int externalSizeEq, float maxFreqEq, int sizeConversion) : m_sound(sound), m_bufEQ(bufEQ)
{
this->maxFreqEq = maxFreqEq;
this->external_size_eq = externalSizeEq;
filter_length = sizeConversion;
}
Equalizer::~Equalizer()
{
}
std::shared_ptr<IReader> Equalizer::createReader()
{
std::shared_ptr<FFTPlan> fp = std::shared_ptr<FFTPlan>(new FFTPlan(filter_length));
// 2 threads to start with
return std::shared_ptr<ConvolverReader>(new ConvolverReader(m_sound->createReader(), createImpulseResponse(), std::shared_ptr<ThreadPool>(new ThreadPool(2)), fp));
}
float calculateValueArray(float* data, float minX, float maxX, int length, float posX)
{
if(posX < minX)
return 1.0;
if(posX > maxX)
return data[length - 1];
float interval = (maxX - minX) / (float) length;
int idx = (int) ((posX - minX) / interval);
return data[idx];
}
void complex_prod(float a, float b, float c, float d, float* r, float* imag)
{
float prod1 = a * c;
float prod2 = b * d;
float prod3 = (a + b) * (c + d);
// Real Part
*r = prod1 - prod2;
// Imaginary Part
*imag = prod3 - (prod1 + prod2);
}
/**
* The creation of the ImpuseResponse which will be convoluted with the sound
*
* The implementation is based on scikit-signal
*/
std::shared_ptr<ImpulseResponse> Equalizer::createImpulseResponse()
{
std::shared_ptr<FFTPlan> fp = std::shared_ptr<FFTPlan>(new FFTPlan(filter_length));
fftwf_complex* buffer = (fftwf_complex*) fp->getBuffer();
std::memset(buffer, 0, filter_length * sizeof(fftwf_complex));
std::shared_ptr<IReader> soundReader = m_sound.get()->createReader();
Specs specsSound = soundReader.get()->getSpecs();
int sampleRate = specsSound.rate;
for(unsigned i = 0; i < filter_length / 2; i++)
{
double freq = (((float) i) / (float) filter_length) * (float) sampleRate;
double dbGain = calculateValueArray(m_bufEQ->getBuffer(), 0.0, maxFreqEq, external_size_eq, freq);
// gain = 10^(decibels / 20.0)
// 0 db = 1
// 20 db = 10
// 40 db = 100
float gain = (float) pow(10.0, dbGain / 20.0);
if(i == filter_length / 2 - 1)
{
gain = 0;
}
// IMPORTANT!!!! It is needed for the minimum phase step.
// Without this, the amplitude would be square rooted
//
gain *= gain;
// Calculation of exponential with std.. or "by hand"
/*
std::complex<float> preShift= std::complex<float>(0.0, -(filter_length - 1)
/ 2. * M_PI * freq / ( sampleRate/2)); std::complex<float> shift =
std::exp(preShift);
std::complex<float> cGain = gain * shift;
*/
float imaginary_shift = -(filter_length - 1) / 2. * M_PI * freq / (sampleRate / 2);
float cGain_real = gain * cos(imaginary_shift);
float cGain_imag = gain * sin(imaginary_shift);
int i2 = filter_length - i - 1;
buffer[i][0] = cGain_real; // Real
buffer[i][1] = cGain_imag; // Imag
if(i > 0 && i2 < filter_length)
{
buffer[i2][0] = cGain_real; // Real
buffer[i2][1] = cGain_imag; // Imag
}
}
// In place. From Complex to sample_t
fp->IFFT(buffer);
// Window Hamming
sample_t* pt_sample_t = (sample_t*) buffer;
float half_filter = ((float) filter_length) / 2.0;
for(int i = 0; i < filter_length; i++)
{
// Centered in filter_length/2
float window = 0.54 - 0.46 * cos((2 * M_PI * (float) i) / (float) (filter_length - 1));
pt_sample_t[i] *= window;
}
std::shared_ptr<Buffer> b2 = std::shared_ptr<Buffer>(new Buffer(filter_length * sizeof(sample_t)));
sample_t* buffer_real = (sample_t*) buffer;
sample_t* buffer2 = b2->getBuffer();
float normaliziter = (float) filter_length;
for(int i = 0; i < filter_length; i++)
{
buffer2[i] = (buffer_real[i] / normaliziter);
}
fp->freeBuffer(buffer);
//
// Here b2 is the buffer with a "valid" FIR (remember the squared amplitude
//
std::shared_ptr<Buffer> ir_minimum = minimumPhaseFilterHomomorphic(b2, filter_length, -1);
Specs specsIR;
specsIR.rate = sampleRate;
specsIR.channels = CHANNELS_MONO;
return std::shared_ptr<ImpulseResponse>(new ImpulseResponse(std::shared_ptr<StreamBuffer>(new StreamBuffer(ir_minimum, specsIR)), fp));
}
std::shared_ptr<Buffer> Equalizer::minimumPhaseFilterHomomorphic(std::shared_ptr<Buffer> original, int lOriginal, int lWork)
{
void* b_orig = original->getBuffer();
if(lWork < lOriginal || lWork < 0)
{
lWork = (int) pow(2, ceil(log2((float) (2 * (lOriginal - 1) / 0.01))));
}
std::shared_ptr<FFTPlan> fp = std::shared_ptr<FFTPlan>(new FFTPlan(lWork, 0.1));
fftwf_complex* buffer = (fftwf_complex*) fp->getBuffer();
sample_t* b_work = (sample_t*) buffer;
// Padding with 0
std::memset(b_work, 0, lWork * sizeof(sample_t));
std::memcpy(b_work, b_orig, lOriginal * sizeof(sample_t));
fp->FFT(b_work);
for(int i = 0; i < lWork / 2; i++)
{
buffer[i][0] = fabs(sqrt(buffer[i][0] * buffer[i][0] + buffer[i][1] * buffer[i][1]));
buffer[i][1] = 0.0;
int conjugate = lWork - i - 1;
buffer[conjugate][0] = buffer[i][0];
buffer[conjugate][1] = 0.0;
}
double threshold = pow(10.0, -7);
float logThreshold = (float) log(threshold);
// take 0.25*log(|H|**2) = 0.5*log(|H|)
for(int i = 0; i < lWork; i++)
{
if(buffer[i][0] < threshold)
{
buffer[i][0] = 0.5 * logThreshold;
}
else
{
buffer[i][0] = 0.5 * log(buffer[i][0]);
}
}
fp->IFFT(buffer);
// homomorphic filter
int stop = (lOriginal + 1) / 2;
b_work[0] = b_work[0] / (float) lWork;
for(int i = 1; i < stop; i++)
{
b_work[i] = b_work[i] / (float) lWork * 2.0;
}
for(int i = stop; i < lWork; i++)
{
b_work[i] = 0;
}
fp->FFT(buffer);
// EXP
// e^x = e^ (a+bi)= e^a * e^bi = e^a * (cos b + i sin b)
for(int i = 0; i < lWork / 2; i++)
{
float new_real;
float new_imag;
new_real = exp(buffer[i][0]) * cos(buffer[i][1]);
new_imag = exp(buffer[i][0]) * sin(buffer[i][1]);
buffer[i][0] = new_real;
buffer[i][1] = new_imag;
int conjugate = lWork - i - 1;
buffer[conjugate][0] = new_real;
buffer[conjugate][1] = new_imag;
}
// IFFT
fp->IFFT(buffer);
// Create new clean Buffer with only the result and normalization
int lOut = (lOriginal / 2) + lOriginal % 2;
std::shared_ptr<Buffer> bOut = std::shared_ptr<Buffer>(new Buffer(sizeof(float) * lOut));
float* bbOut = (float*) bOut->getBuffer();
// Copy and normalize
for(int i = 0; i < lOut; i++)
{
bbOut[i] = b_work[i] / (float) lWork;
}
fp->freeBuffer(buffer);
return bOut;
}
std::shared_ptr<Buffer> Equalizer::minimumPhaseFilterHilbert(std::shared_ptr<Buffer> original, int lOriginal, int lWork)
{
void* b_orig = original->getBuffer();
if(lWork < lOriginal || lWork < 0)
{
lWork = (int) pow(2, ceil(log2((float) (2 * (lOriginal - 1) / 0.01))));
}
std::shared_ptr<FFTPlan> fp = std::shared_ptr<FFTPlan>(new FFTPlan(lWork, 0.1));
fftwf_complex* buffer = (fftwf_complex*) fp->getBuffer();
sample_t* b_work = (sample_t*) buffer;
// Padding with 0
std::memset(b_work, 0, lWork * sizeof(sample_t));
std::memcpy(b_work, b_orig, lOriginal * sizeof(sample_t));
fp->FFT(b_work);
float mymax, mymin;
float n_half = (float) (lOriginal >> 1);
for(int i = 0; i < lWork; i++)
{
float w = ((float) i) * 2.0 * M_PI / (float) lWork * n_half;
float f1 = cos(w);
float f2 = sin(w);
float f3, f4;
complex_prod(buffer[i][0], buffer[i][1], f1, f2, &f3, &f4);
buffer[i][0] = f3;
buffer[i][1] = 0.0;
if(i == 0)
{
mymax = f3;
mymin = f3;
}
else
{
if(f3 < mymin)
mymin = f3;
if(f3 > mymax)
mymax = f3;
}
}
float dp = mymax - 1;
float ds = 0 - mymin;
float S = 4.0 / pow(2, (sqrt(1 + dp + ds) + sqrt(1 - dp + ds)));
for(int i = 0; i < lWork; i++)
{
buffer[i][0] = sqrt((buffer[i][0] + ds) * S) + 1.0E-10;
}
fftwf_complex* buffer_tmp = (fftwf_complex*) std::malloc(lWork * sizeof(fftwf_complex));
std::memcpy(buffer_tmp, buffer, lWork * sizeof(fftwf_complex));
//
// Hilbert transform
//
int midpt = lWork >> 1;
for(int i = 0; i < lWork; i++)
buffer[i][0] = log(buffer[i][0]);
fp->IFFT(buffer);
b_work[0] = 0.0;
for(int i = 1; i < midpt; i++)
{
b_work[i] /= (float) lWork;
}
b_work[midpt] = 0.0;
for(int i = midpt + 1; i < lWork; i++)
{
b_work[i] /= (-1.0 * lWork);
}
fp->FFT(b_work);
// Exp
for(int i = 0; i < lWork; i++)
{
float base = exp(buffer[i][0]);
buffer[i][0] = base * cos(buffer[i][1]);
buffer[i][1] = base * sin(buffer[i][1]);
complex_prod(buffer_tmp[i][0], buffer_tmp[i][1], buffer[i][0], buffer[i][1], &(buffer[i][0]), &(buffer[i][1]));
}
std::free(buffer_tmp);
fp->IFFT(buffer);
//
// Copy and normalization
//
int n_out = n_half + lOriginal % 2;
std::shared_ptr<Buffer> b_minimum = std::shared_ptr<Buffer>(new Buffer(n_out * sizeof(sample_t)));
std::memcpy(b_minimum->getBuffer(), buffer, n_out * sizeof(sample_t));
sample_t* b_final = (sample_t*) b_minimum->getBuffer();
for(int i = 0; i < n_out; i++)
{
b_final[i] /= (float) lWork;
}
return b_minimum;
}
AUD_NAMESPACE_END

View File

@@ -22,7 +22,7 @@
AUD_NAMESPACE_BEGIN
FFTConvolver::FFTConvolver(std::shared_ptr<std::vector<std::complex<sample_t>>> ir, std::shared_ptr<FFTPlan> plan) :
m_plan(plan), m_N(plan->getSize()), m_M(plan->getSize()/2), m_L(plan->getSize()/2), m_irBuffer(ir), m_tailPos(0)
m_plan(plan), m_N(plan->getSize()), m_M(plan->getSize()/2), m_L(plan->getSize()/2), m_tailPos(0), m_irBuffer(ir)
{
m_tail = (float*)calloc(m_M - 1, sizeof(float));
m_realBufLen = ((m_N / 2) + 1) * 2;

View File

@@ -75,7 +75,7 @@ void HRTFLoader::loadHRTFs(std::shared_ptr<HRTF> hrtfs, char ear, const std::str
if(ear == 'L')
azim = 360 - azim;
}
catch(...)
catch(std::exception& e)
{
AUD_THROW(FileException, "The HRTF name doesn't follow the naming scheme: " + filename);
}
@@ -86,4 +86,4 @@ void HRTFLoader::loadHRTFs(std::shared_ptr<HRTF> hrtfs, char ear, const std::str
return;
}
AUD_NAMESPACE_END
AUD_NAMESPACE_END

View File

@@ -78,7 +78,7 @@ void HRTFLoader::loadHRTFs(std::shared_ptr<HRTF> hrtfs, char ear, const std::str
if(ear == 'L')
azim = 360 - azim;
}
catch(...)
catch(std::exception& e)
{
AUD_THROW(FileException, "The HRTF name doesn't follow the naming scheme: " + filename);
}
@@ -90,4 +90,4 @@ void HRTFLoader::loadHRTFs(std::shared_ptr<HRTF> hrtfs, char ear, const std::str
return;
}
AUD_NAMESPACE_END
AUD_NAMESPACE_END

View File

@@ -13,12 +13,10 @@ endif()
# Exporting functions from the blender binary gives linker warnings on Apple arm64 systems.
# Silence them here.
if(APPLE)
if("${CMAKE_OSX_ARCHITECTURES}" STREQUAL "arm64")
if(CMAKE_COMPILER_IS_GNUCXX OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
string(APPEND CMAKE_C_FLAGS " -fvisibility=hidden")
string(APPEND CMAKE_CXX_FLAGS " -fvisibility=hidden")
endif()
if(APPLE AND ("${CMAKE_OSX_ARCHITECTURES}" STREQUAL "arm64"))
if(CMAKE_COMPILER_IS_GNUCXX OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
string(APPEND CMAKE_C_FLAGS " -fvisibility=hidden")
string(APPEND CMAKE_CXX_FLAGS " -fvisibility=hidden")
endif()
endif()
@@ -263,11 +261,9 @@ set(LIB
blender_add_lib(extern_mantaflow "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
if(WITH_OPENVDB)
# The VDB libs above are only added to as INTERFACE libs by blender_add_lib,
# meaning extern_mantaflow itself actually does not have a dependency on the
# openvdb libraries, and CMAKE is free to link the vdb libs before
# extern_mantaflow causing linker errors on linux. By explicitly declaring
# a dependency here, cmake will do the right thing.
target_link_libraries(extern_mantaflow PRIVATE ${OPENVDB_LIBRARIES})
endif()
# The VDB libs above are only added to as INTERFACE libs by blender_add_lib,
# meaning extern_mantaflow itself actually does not have a dependency on the
# openvdb libraries, and CMAKE is free to link the vdb libs before
# extern_mantaflow causing linker errors on linux. By explicitly declaring
# a dependency here, cmake will do the right thing.
target_link_libraries(extern_mantaflow PRIVATE ${OPENVDB_LIBRARIES})

View File

@@ -85,11 +85,15 @@ elseif(WIN32 AND MSVC AND NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang")
# there is no /arch:SSE3, but intrinsics are available anyway
if(CMAKE_CL_64)
set(CYCLES_SSE2_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS}")
set(CYCLES_SSE3_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS}")
set(CYCLES_SSE41_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS}")
set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_AVX_ARCH_FLAGS} ${CYCLES_KERNEL_FLAGS}")
set(CYCLES_AVX2_KERNEL_FLAGS "${CYCLES_AVX2_ARCH_FLAGS} ${CYCLES_KERNEL_FLAGS}")
else()
set(CYCLES_SSE2_KERNEL_FLAGS "/arch:SSE2 ${CYCLES_KERNEL_FLAGS}")
set(CYCLES_SSE3_KERNEL_FLAGS "/arch:SSE2 ${CYCLES_KERNEL_FLAGS}")
set(CYCLES_SSE41_KERNEL_FLAGS "/arch:SSE2 ${CYCLES_KERNEL_FLAGS}")
set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_AVX_ARCH_FLAGS} ${CYCLES_KERNEL_FLAGS}")
set(CYCLES_AVX2_KERNEL_FLAGS "${CYCLES_AVX2_ARCH_FLAGS} ${CYCLES_KERNEL_FLAGS}")
endif()
@@ -122,7 +126,11 @@ elseif(CMAKE_COMPILER_IS_GNUCC OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
endif()
set(CYCLES_SSE2_KERNEL_FLAGS "${CYCLES_KERNEL_FLAGS} -msse -msse2")
set(CYCLES_SSE41_KERNEL_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS} -msse3 -mssse3 -msse4.1")
set(CYCLES_SSE3_KERNEL_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS} -msse3 -mssse3")
set(CYCLES_SSE41_KERNEL_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS} -msse4.1")
if(CXX_HAS_AVX)
set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS} -mavx")
endif()
if(CXX_HAS_AVX2)
set(CYCLES_AVX2_KERNEL_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS} -mavx -mavx2 -mfma -mlzcnt -mbmi -mbmi2 -mf16c")
endif()
@@ -136,8 +144,13 @@ elseif(WIN32 AND CMAKE_CXX_COMPILER_ID MATCHES "Intel")
if(CXX_HAS_SSE)
set(CYCLES_SSE2_KERNEL_FLAGS "/QxSSE2")
set(CYCLES_SSE3_KERNEL_FLAGS "/QxSSSE3")
set(CYCLES_SSE41_KERNEL_FLAGS "/QxSSE4.1")
if(CXX_HAS_AVX)
set(CYCLES_AVX_KERNEL_FLAGS "/arch:AVX")
endif()
if(CXX_HAS_AVX2)
set(CYCLES_AVX2_KERNEL_FLAGS "/QxCORE-AVX2")
endif()
@@ -161,8 +174,13 @@ elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
set(CYCLES_SSE2_KERNEL_FLAGS "-xsse2")
endif()
set(CYCLES_SSE3_KERNEL_FLAGS "-xssse3")
set(CYCLES_SSE41_KERNEL_FLAGS "-xsse4.1")
if(CXX_HAS_AVX)
set(CYCLES_AVX_KERNEL_FLAGS "-xavx")
endif()
if(CXX_HAS_AVX2)
set(CYCLES_AVX2_KERNEL_FLAGS "-xcore-avx2")
endif()
@@ -172,10 +190,15 @@ endif()
if(CXX_HAS_SSE)
add_definitions(
-DWITH_KERNEL_SSE2
-DWITH_KERNEL_SSE3
-DWITH_KERNEL_SSE41
)
endif()
if(CXX_HAS_AVX)
add_definitions(-DWITH_KERNEL_AVX)
endif()
if(CXX_HAS_AVX2)
add_definitions(-DWITH_KERNEL_AVX2)
endif()

View File

@@ -951,7 +951,9 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
return _cycles.debug_flags_update(scene)
debug_use_cpu_avx2: BoolProperty(name="AVX2", default=True)
debug_use_cpu_avx: BoolProperty(name="AVX", default=True)
debug_use_cpu_sse41: BoolProperty(name="SSE41", default=True)
debug_use_cpu_sse3: BoolProperty(name="SSE3", default=True)
debug_use_cpu_sse2: BoolProperty(name="SSE2", default=True)
debug_bvh_layout: EnumProperty(
name="BVH Layout",
@@ -1671,19 +1673,19 @@ class CyclesPreferences(bpy.types.AddonPreferences):
elif device_type == 'HIP':
import sys
if sys.platform[:3] == "win":
col.label(text="Requires AMD GPU with RDNA architecture", icon='BLANK1')
col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
col.label(text="and AMD Radeon Pro 21.Q4 driver or newer", icon='BLANK1')
elif sys.platform.startswith("linux"):
col.label(text="Requires AMD GPU with RDNA architecture", icon='BLANK1')
col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
col.label(text="and AMD driver version 22.10 or newer", icon='BLANK1')
elif device_type == 'ONEAPI':
import sys
if sys.platform.startswith("win"):
col.label(text="Requires Intel GPU with Xe-HPG architecture", icon='BLANK1')
col.label(text="and Windows driver version 101.4032 or newer", icon='BLANK1')
col.label(text="and Windows driver version 101.3430 or newer", icon='BLANK1')
elif sys.platform.startswith("linux"):
col.label(text="Requires Intel GPU with Xe-HPG architecture and", icon='BLANK1')
col.label(text=" - intel-level-zero-gpu version 1.3.24931 or newer", icon='BLANK1')
col.label(text=" - intel-level-zero-gpu version 1.3.23904 or newer", icon='BLANK1')
col.label(text=" - oneAPI Level-Zero Loader", icon='BLANK1')
elif device_type == 'METAL':
col.label(text="Requires Apple Silicon with macOS 12.2 or newer", icon='BLANK1')

View File

@@ -2112,7 +2112,9 @@ class CYCLES_RENDER_PT_debug(CyclesDebugButtonsPanel, Panel):
row = col.row(align=True)
row.prop(cscene, "debug_use_cpu_sse2", toggle=True)
row.prop(cscene, "debug_use_cpu_sse3", toggle=True)
row.prop(cscene, "debug_use_cpu_sse41", toggle=True)
row.prop(cscene, "debug_use_cpu_avx", toggle=True)
row.prop(cscene, "debug_use_cpu_avx2", toggle=True)
col.prop(cscene, "debug_bvh_layout", text="BVH")

View File

@@ -721,6 +721,8 @@ static void draw_tile(const float2 &zoom,
return;
}
GPU_texture_bind(texture.gpu_texture, 0);
/* Trick to keep sharp rendering without jagged edges on all GPUs.
*
* The idea here is to enforce driver to use linear interpolation when the image is not zoomed
@@ -733,14 +735,14 @@ static void draw_tile(const float2 &zoom,
const float zoomed_height = draw_tile.params.size.y * zoom.y;
if (texture.width != draw_tile.params.size.x || texture.height != draw_tile.params.size.y) {
/* Resolution divider is different from 1, force nearest interpolation. */
GPU_texture_bind_ex(texture.gpu_texture, GPU_SAMPLER_DEFAULT, 0, false);
GPU_texture_filter_mode(texture.gpu_texture, false);
}
else if (zoomed_width - draw_tile.params.size.x > 0.5f ||
zoomed_height - draw_tile.params.size.y > 0.5f) {
GPU_texture_bind_ex(texture.gpu_texture, GPU_SAMPLER_DEFAULT, 0, false);
GPU_texture_filter_mode(texture.gpu_texture, false);
}
else {
GPU_texture_bind_ex(texture.gpu_texture, GPU_SAMPLER_FILTER, 0, false);
GPU_texture_filter_mode(texture.gpu_texture, true);
}
/* Draw at the parameters for which the texture has been updated for. This allows to always draw

View File

@@ -63,7 +63,9 @@ static void debug_flags_sync_from_scene(BL::Scene b_scene)
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
/* Synchronize CPU flags. */
flags.cpu.avx2 = get_boolean(cscene, "debug_use_cpu_avx2");
flags.cpu.avx = get_boolean(cscene, "debug_use_cpu_avx");
flags.cpu.sse41 = get_boolean(cscene, "debug_use_cpu_sse41");
flags.cpu.sse3 = get_boolean(cscene, "debug_use_cpu_sse3");
flags.cpu.sse2 = get_boolean(cscene, "debug_use_cpu_sse2");
flags.cpu.bvh_layout = (BVHLayout)get_enum(cscene, "debug_bvh_layout");
/* Synchronize CUDA flags. */

View File

@@ -26,7 +26,7 @@
CCL_NAMESPACE_BEGIN
typedef unordered_multimap<void *, ShaderInput *> PtrInputMap;
typedef map<void *, ShaderInput *> PtrInputMap;
typedef map<void *, ShaderOutput *> PtrOutputMap;
typedef map<string, ConvertNode *> ProxyMap;
@@ -1251,9 +1251,7 @@ static void add_nodes(Scene *scene,
ConvertNode *proxy = graph->create_node<ConvertNode>(to_socket_type, to_socket_type, true);
/* Muted nodes can result in multiple Cycles input sockets mapping to the same Blender
* input socket, so this needs to be a multimap. */
input_map.emplace(b_link.from_socket().ptr.data, proxy->inputs[0]);
input_map[b_link.from_socket().ptr.data] = proxy->inputs[0];
output_map[b_link.to_socket().ptr.data] = proxy->outputs[0];
graph->add(proxy);
@@ -1288,7 +1286,7 @@ static void add_nodes(Scene *scene,
/* register the proxy node for internal binding */
group_proxy_input_map[b_input.identifier()] = proxy;
input_map.emplace(b_input.ptr.data, proxy->inputs[0]);
input_map[b_input.ptr.data] = proxy->inputs[0];
set_default_value(proxy->inputs[0], b_input, b_data, b_ntree);
}
@@ -1340,7 +1338,7 @@ static void add_nodes(Scene *scene,
if (proxy_it != proxy_output_map.end()) {
ConvertNode *proxy = proxy_it->second;
input_map.emplace(b_input.ptr.data, proxy->inputs[0]);
input_map[b_input.ptr.data] = proxy->inputs[0];
set_default_value(proxy->inputs[0], b_input, b_data, b_ntree);
}
@@ -1371,7 +1369,7 @@ static void add_nodes(Scene *scene,
/* XXX should not happen, report error? */
continue;
}
input_map.emplace(b_input.ptr.data, input);
input_map[b_input.ptr.data] = input;
set_default_value(input, b_input, b_data, b_ntree);
}
@@ -1403,23 +1401,20 @@ static void add_nodes(Scene *scene,
BL::NodeSocket b_from_sock = b_link.from_socket();
BL::NodeSocket b_to_sock = b_link.to_socket();
ShaderOutput *output = nullptr;
ShaderOutput *output = 0;
ShaderInput *input = 0;
PtrOutputMap::iterator output_it = output_map.find(b_from_sock.ptr.data);
if (output_it != output_map.end())
output = output_it->second;
PtrInputMap::iterator input_it = input_map.find(b_to_sock.ptr.data);
if (input_it != input_map.end())
input = input_it->second;
/* either socket may be NULL when the node was not exported, typically
/* either node may be NULL when the node was not exported, typically
* because the node type is not supported */
if (output != nullptr) {
ShaderOutput *output = output_it->second;
auto inputs = input_map.equal_range(b_to_sock.ptr.data);
for (PtrInputMap::iterator input_it = inputs.first; input_it != inputs.second; ++input_it) {
ShaderInput *input = input_it->second;
if (input != nullptr) {
graph->connect(output, input);
}
}
}
if (output && input)
graph->connect(output, input);
}
}

View File

@@ -111,10 +111,8 @@ macro(cycles_external_libraries_append libraries)
endif()
if(WITH_OPENIMAGEDENOISE)
list(APPEND ${libraries} ${OPENIMAGEDENOISE_LIBRARIES})
if(APPLE)
if("${CMAKE_OSX_ARCHITECTURES}" STREQUAL "arm64")
list(APPEND ${libraries} "-framework Accelerate")
endif()
if(APPLE AND "${CMAKE_OSX_ARCHITECTURES}" STREQUAL "arm64")
list(APPEND ${libraries} "-framework Accelerate")
endif()
endif()
if(WITH_ALEMBIC)
@@ -138,15 +136,7 @@ macro(cycles_external_libraries_append libraries)
${PYTHON_LIBRARIES}
${ZLIB_LIBRARIES}
${CMAKE_DL_LIBS}
)
if(DEFINED PTHREADS_LIBRARIES)
list(APPEND ${libraries}
${PTHREADS_LIBRARIES}
)
endif()
list(APPEND ${libraries}
${PTHREADS_LIBRARIES}
${PLATFORM_LINKLIBS}
)

View File

@@ -45,7 +45,9 @@ string device_cpu_capabilities()
{
string capabilities = "";
capabilities += system_cpu_support_sse2() ? "SSE2 " : "";
capabilities += system_cpu_support_sse3() ? "SSE3 " : "";
capabilities += system_cpu_support_sse41() ? "SSE41 " : "";
capabilities += system_cpu_support_avx() ? "AVX " : "";
capabilities += system_cpu_support_avx2() ? "AVX2" : "";
if (capabilities[capabilities.size() - 1] == ' ')
capabilities.resize(capabilities.size() - 1);

View File

@@ -9,7 +9,8 @@ CCL_NAMESPACE_BEGIN
#define KERNEL_FUNCTIONS(name) \
KERNEL_NAME_EVAL(cpu, name), KERNEL_NAME_EVAL(cpu_sse2, name), \
KERNEL_NAME_EVAL(cpu_sse41, name), KERNEL_NAME_EVAL(cpu_avx2, name)
KERNEL_NAME_EVAL(cpu_sse3, name), KERNEL_NAME_EVAL(cpu_sse41, name), \
KERNEL_NAME_EVAL(cpu_avx, name), KERNEL_NAME_EVAL(cpu_avx2, name)
#define REGISTER_KERNEL(name) name(KERNEL_FUNCTIONS(name))
#define REGISTER_KERNEL_FILM_CONVERT(name) \

View File

@@ -17,10 +17,13 @@ template<typename FunctionType> class CPUKernelFunction {
public:
CPUKernelFunction(FunctionType kernel_default,
FunctionType kernel_sse2,
FunctionType kernel_sse3,
FunctionType kernel_sse41,
FunctionType kernel_avx,
FunctionType kernel_avx2)
{
kernel_info_ = get_best_kernel_info(kernel_default, kernel_sse2, kernel_sse41, kernel_avx2);
kernel_info_ = get_best_kernel_info(
kernel_default, kernel_sse2, kernel_sse3, kernel_sse41, kernel_avx, kernel_avx2);
}
template<typename... Args> inline auto operator()(Args... args) const
@@ -57,12 +60,16 @@ template<typename FunctionType> class CPUKernelFunction {
KernelInfo get_best_kernel_info(FunctionType kernel_default,
FunctionType kernel_sse2,
FunctionType kernel_sse3,
FunctionType kernel_sse41,
FunctionType kernel_avx,
FunctionType kernel_avx2)
{
/* Silence warnings about unused variables when compiling without some architectures. */
(void)kernel_sse2;
(void)kernel_sse3;
(void)kernel_sse41;
(void)kernel_avx;
(void)kernel_avx2;
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
@@ -71,12 +78,24 @@ template<typename FunctionType> class CPUKernelFunction {
}
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
if (DebugFlags().cpu.has_avx() && system_cpu_support_avx()) {
return KernelInfo("AVX", kernel_avx);
}
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
if (DebugFlags().cpu.has_sse41() && system_cpu_support_sse41()) {
return KernelInfo("SSE4.1", kernel_sse41);
}
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
if (DebugFlags().cpu.has_sse3() && system_cpu_support_sse3()) {
return KernelInfo("SSE3", kernel_sse3);
}
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
if (DebugFlags().cpu.has_sse2() && system_cpu_support_sse2()) {
return KernelInfo("SSE2", kernel_sse2);

View File

@@ -51,7 +51,7 @@ static inline bool hipSupportsDevice(const int hipDevId)
hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
return (major >= 10);
return (major >= 9);
}
CCL_NAMESPACE_END

View File

@@ -327,21 +327,10 @@ void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_feat
# define KERNEL_STRUCT_BEGIN(name, parent) \
string_replace_same_length(source, "kernel_data." #parent ".", "kernel_data_" #parent "_");
bool next_member_is_specialized = true;
# define KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE next_member_is_specialized = false;
/* Add constants to md5 so that 'get_best_pipeline' is able to return a suitable match. */
# define KERNEL_STRUCT_MEMBER(parent, _type, name) \
if (next_member_is_specialized) { \
baked_constants += string(#parent "." #name "=") + \
to_string(_type(launch_params.data.parent.name)) + "\n"; \
} \
else { \
string_replace( \
source, "kernel_data_" #parent "_" #name, "kernel_data." #parent ".__unused_" #name); \
next_member_is_specialized = true; \
}
baked_constants += string(#parent "." #name "=") + \
to_string(_type(launch_params.data.parent.name)) + "\n";
# include "kernel/data_template.h"

View File

@@ -49,18 +49,6 @@ struct ShaderCache {
if (MetalInfo::get_device_vendor(mtlDevice) == METAL_GPU_APPLE) {
switch (MetalInfo::get_apple_gpu_architecture(mtlDevice)) {
default:
case APPLE_M2_BIG:
occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_STATES] = {384, 128};
occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA] = {640, 128};
occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST] = {1024, 64};
occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW] = {704, 704};
occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE] = {640, 32};
occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY] = {896, 768};
occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND] = {512, 128};
occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW] = {32, 32};
occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE] = {768, 576};
occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY] = {896, 768};
break;
case APPLE_M2:
occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_STATES] = {32, 32};
occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA] = {832, 32};
@@ -460,18 +448,13 @@ static MTLFunctionConstantValues *GetConstantValues(KernelData const *data = nul
if (!data) {
data = &zero_data;
}
[constant_values setConstantValue:&zero_data type:MTLDataType_int atIndex:Kernel_DummyConstant];
bool next_member_is_specialized = true;
# define KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE next_member_is_specialized = false;
int zero_int = 0;
[constant_values setConstantValue:&zero_int type:MTLDataType_int atIndex:Kernel_DummyConstant];
# define KERNEL_STRUCT_MEMBER(parent, _type, name) \
[constant_values setConstantValue:next_member_is_specialized ? (void *)&data->parent.name : \
(void *)&zero_data \
[constant_values setConstantValue:&data->parent.name \
type:MTLDataType_##_type \
atIndex:KernelData_##parent##_##name]; \
next_member_is_specialized = true;
atIndex:KernelData_##parent##_##name];
# include "kernel/data_template.h"

View File

@@ -278,8 +278,7 @@ int MetalDeviceQueue::num_concurrent_states(const size_t state_size) const
if (metal_device_->device_vendor == METAL_GPU_APPLE) {
result *= 4;
/* Increasing the state count doesn't notably benefit M1-family systems. */
if (MetalInfo::get_apple_gpu_architecture(metal_device_->mtlDevice) != APPLE_M1) {
if (MetalInfo::get_apple_gpu_architecture(metal_device_->mtlDevice) == APPLE_M2) {
size_t system_ram = system_physical_ram();
size_t allocated_so_far = [metal_device_->mtlDevice currentAllocatedSize];
size_t max_recommended_working_set = [metal_device_->mtlDevice recommendedMaxWorkingSetSize];

View File

@@ -29,7 +29,6 @@ enum AppleGPUArchitecture {
APPLE_UNKNOWN,
APPLE_M1,
APPLE_M2,
APPLE_M2_BIG,
};
/* Contains static Metal helper functions. */

View File

@@ -52,7 +52,7 @@ AppleGPUArchitecture MetalInfo::get_apple_gpu_architecture(id<MTLDevice> device)
return APPLE_M1;
}
else if (strstr(device_name, "M2")) {
return get_apple_gpu_core_count(device) <= 10 ? APPLE_M2 : APPLE_M2_BIG;
return APPLE_M2;
}
return APPLE_UNKNOWN;
}

View File

@@ -631,9 +631,9 @@ bool OneapiDevice::enqueue_kernel(KernelContext *kernel_context,
/* Compute-runtime (ie. NEO) version is what gets returned by sycl/L0 on Windows
* since Windows driver 101.3268. */
/* The same min compute-runtime version is currently required across Windows and Linux.
* For Windows driver 101.4032, compute-runtime version is 24931. */
static const int lowest_supported_driver_version_win = 1014032;
static const int lowest_supported_driver_version_neo = 24931;
* For Windows driver 101.3430, compute-runtime version is 23904. */
static const int lowest_supported_driver_version_win = 1013430;
static const int lowest_supported_driver_version_neo = 23904;
int OneapiDevice::parse_driver_build_version(const sycl::device &device)
{

View File

@@ -5,9 +5,6 @@ set(INC
..
)
set(INC_SYS
)
set(SRC
node.cpp
node_type.cpp

View File

@@ -5,9 +5,6 @@ set(INC
..
)
set(INC_SYS
)
set(SRC
adaptive_sampling.cpp
denoiser.cpp

View File

@@ -14,7 +14,9 @@ set(INC_SYS
set(SRC_KERNEL_DEVICE_CPU
device/cpu/kernel.cpp
device/cpu/kernel_sse2.cpp
device/cpu/kernel_sse3.cpp
device/cpu/kernel_sse41.cpp
device/cpu/kernel_avx.cpp
device/cpu/kernel_avx2.cpp
)
@@ -732,25 +734,25 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
endif()
# SYCL_CPP_FLAGS is a variable that the user can set to pass extra compiler options
set(sycl_compiler_flags
${CMAKE_CURRENT_SOURCE_DIR}/${SRC_KERNEL_DEVICE_ONEAPI}
-fsycl
-fsycl-unnamed-lambda
-fdelayed-template-parsing
-mllvm -inlinedefault-threshold=250
-mllvm -inlinehint-threshold=350
-fsycl-device-code-split=per_kernel
-fsycl-max-parallel-link-jobs=${SYCL_OFFLINE_COMPILER_PARALLEL_JOBS}
-shared
-DWITH_ONEAPI
-ffast-math
-DNDEBUG
-O2
-o ${cycles_kernel_oneapi_lib}
-I${CMAKE_CURRENT_SOURCE_DIR}/..
${SYCL_CPP_FLAGS}
)
${CMAKE_CURRENT_SOURCE_DIR}/${SRC_KERNEL_DEVICE_ONEAPI}
-fsycl
-fsycl-unnamed-lambda
-fdelayed-template-parsing
-mllvm -inlinedefault-threshold=250
-mllvm -inlinehint-threshold=350
-fsycl-device-code-split=per_kernel
-fsycl-max-parallel-link-jobs=${SYCL_OFFLINE_COMPILER_PARALLEL_JOBS}
-shared
-DWITH_ONEAPI
-ffast-math
-DNDEBUG
-O2
-o ${cycles_kernel_oneapi_lib}
-I${CMAKE_CURRENT_SOURCE_DIR}/..
${SYCL_CPP_FLAGS}
)
if(WITH_CYCLES_ONEAPI_HOST_TASK_EXECUTION)
if (WITH_CYCLES_ONEAPI_HOST_TASK_EXECUTION)
list(APPEND sycl_compiler_flags -DWITH_ONEAPI_SYCL_HOST_TASK)
endif()
@@ -938,9 +940,14 @@ set_source_files_properties(device/cpu/kernel.cpp PROPERTIES COMPILE_FLAGS "${CY
if(CXX_HAS_SSE)
set_source_files_properties(device/cpu/kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
set_source_files_properties(device/cpu/kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
set_source_files_properties(device/cpu/kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
endif()
if(CXX_HAS_AVX)
set_source_files_properties(device/cpu/kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
endif()
if(CXX_HAS_AVX2)
set_source_files_properties(device/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
endif()

View File

@@ -63,9 +63,8 @@ ccl_device void kernel_background_evaluate(KernelGlobals kg,
shader_setup_from_background(kg, &sd, ray_P, ray_D, ray_time);
/* Evaluate shader.
* This is being evaluated for all BSDFs, so path flag does not contain a specific type.
* However, we want to flag the ray visibility to ignore the sun in the background map. */
const uint32_t path_flag = PATH_RAY_EMISSION | PATH_RAY_IMPORTANCE_BAKE;
* This is being evaluated for all BSDFs, so path flag does not contain a specific type. */
const uint32_t path_flag = PATH_RAY_EMISSION;
surface_shader_eval<KERNEL_FEATURE_NODE_MASK_SURFACE_LIGHT &
~(KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_NODE_LIGHT_PATH)>(
kg, INTEGRATOR_STATE_NULL, &sd, NULL, path_flag);

View File

@@ -102,9 +102,10 @@ ccl_device_inline float shift_cos_in(float cos_in, const float frequency_multipl
return val;
}
ccl_device_inline bool bsdf_is_transmission(ccl_private const ShaderClosure *sc, const float3 wo)
ccl_device_inline bool bsdf_is_transmission(ccl_private const ShaderClosure *sc,
const float3 omega_in)
{
return dot(sc->N, wo) < 0.0f;
return dot(sc->N, omega_in) < 0.0f;
}
ccl_device_inline int bsdf_sample(KernelGlobals kg,
@@ -113,7 +114,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals kg,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *wo,
ccl_private float3 *omega_in,
ccl_private float *pdf,
ccl_private float2 *sampled_roughness,
ccl_private float *eta)
@@ -125,43 +126,43 @@ ccl_device_inline int bsdf_sample(KernelGlobals kg,
switch (sc->type) {
case CLOSURE_BSDF_DIFFUSE_ID:
label = bsdf_diffuse_sample(sc, Ng, sd->wi, randu, randv, eval, wo, pdf);
label = bsdf_diffuse_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
*sampled_roughness = one_float2();
*eta = 1.0f;
break;
#if defined(__SVM__) || defined(__OSL__)
case CLOSURE_BSDF_OREN_NAYAR_ID:
label = bsdf_oren_nayar_sample(sc, Ng, sd->wi, randu, randv, eval, wo, pdf);
label = bsdf_oren_nayar_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
*sampled_roughness = one_float2();
*eta = 1.0f;
break;
# ifdef __OSL__
case CLOSURE_BSDF_PHONG_RAMP_ID:
label = bsdf_phong_ramp_sample(
sc, Ng, sd->wi, randu, randv, eval, wo, pdf, sampled_roughness);
sc, Ng, sd->I, randu, randv, eval, omega_in, pdf, sampled_roughness);
*eta = 1.0f;
break;
case CLOSURE_BSDF_DIFFUSE_RAMP_ID:
label = bsdf_diffuse_ramp_sample(sc, Ng, sd->wi, randu, randv, eval, wo, pdf);
label = bsdf_diffuse_ramp_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
*sampled_roughness = one_float2();
*eta = 1.0f;
break;
# endif
case CLOSURE_BSDF_TRANSLUCENT_ID:
label = bsdf_translucent_sample(sc, Ng, sd->wi, randu, randv, eval, wo, pdf);
label = bsdf_translucent_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
*sampled_roughness = one_float2();
*eta = 1.0f;
break;
case CLOSURE_BSDF_REFLECTION_ID:
label = bsdf_reflection_sample(sc, Ng, sd->wi, randu, randv, eval, wo, pdf, eta);
label = bsdf_reflection_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf, eta);
*sampled_roughness = zero_float2();
break;
case CLOSURE_BSDF_REFRACTION_ID:
label = bsdf_refraction_sample(sc, Ng, sd->wi, randu, randv, eval, wo, pdf, eta);
label = bsdf_refraction_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf, eta);
*sampled_roughness = zero_float2();
break;
case CLOSURE_BSDF_TRANSPARENT_ID:
label = bsdf_transparent_sample(sc, Ng, sd->wi, randu, randv, eval, wo, pdf);
label = bsdf_transparent_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
*sampled_roughness = zero_float2();
*eta = 1.0f;
break;
@@ -170,65 +171,85 @@ ccl_device_inline int bsdf_sample(KernelGlobals kg,
case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
label = bsdf_microfacet_ggx_sample(
kg, sc, Ng, sd->wi, randu, randv, eval, wo, pdf, sampled_roughness, eta);
kg, sc, Ng, sd->I, randu, randv, eval, omega_in, pdf, sampled_roughness, eta);
break;
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
label = bsdf_microfacet_multi_ggx_sample(
kg, sc, Ng, sd->wi, randu, randv, eval, wo, pdf, &sd->lcg_state, sampled_roughness, eta);
label = bsdf_microfacet_multi_ggx_sample(kg,
sc,
Ng,
sd->I,
randu,
randv,
eval,
omega_in,
pdf,
&sd->lcg_state,
sampled_roughness,
eta);
break;
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
label = bsdf_microfacet_multi_ggx_glass_sample(
kg, sc, Ng, sd->wi, randu, randv, eval, wo, pdf, &sd->lcg_state, sampled_roughness, eta);
label = bsdf_microfacet_multi_ggx_glass_sample(kg,
sc,
Ng,
sd->I,
randu,
randv,
eval,
omega_in,
pdf,
&sd->lcg_state,
sampled_roughness,
eta);
break;
case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
label = bsdf_microfacet_beckmann_sample(
kg, sc, Ng, sd->wi, randu, randv, eval, wo, pdf, sampled_roughness, eta);
kg, sc, Ng, sd->I, randu, randv, eval, omega_in, pdf, sampled_roughness, eta);
break;
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
label = bsdf_ashikhmin_shirley_sample(
sc, Ng, sd->wi, randu, randv, eval, wo, pdf, sampled_roughness);
sc, Ng, sd->I, randu, randv, eval, omega_in, pdf, sampled_roughness);
*eta = 1.0f;
break;
case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
label = bsdf_ashikhmin_velvet_sample(sc, Ng, sd->wi, randu, randv, eval, wo, pdf);
label = bsdf_ashikhmin_velvet_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
*sampled_roughness = one_float2();
*eta = 1.0f;
break;
case CLOSURE_BSDF_DIFFUSE_TOON_ID:
label = bsdf_diffuse_toon_sample(sc, Ng, sd->wi, randu, randv, eval, wo, pdf);
label = bsdf_diffuse_toon_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
*sampled_roughness = one_float2();
*eta = 1.0f;
break;
case CLOSURE_BSDF_GLOSSY_TOON_ID:
label = bsdf_glossy_toon_sample(sc, Ng, sd->wi, randu, randv, eval, wo, pdf);
label = bsdf_glossy_toon_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
// double check if this is valid
*sampled_roughness = one_float2();
*eta = 1.0f;
break;
case CLOSURE_BSDF_HAIR_REFLECTION_ID:
label = bsdf_hair_reflection_sample(
sc, Ng, sd->wi, randu, randv, eval, wo, pdf, sampled_roughness);
sc, Ng, sd->I, randu, randv, eval, omega_in, pdf, sampled_roughness);
*eta = 1.0f;
break;
case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
label = bsdf_hair_transmission_sample(
sc, Ng, sd->wi, randu, randv, eval, wo, pdf, sampled_roughness);
sc, Ng, sd->I, randu, randv, eval, omega_in, pdf, sampled_roughness);
*eta = 1.0f;
break;
case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
label = bsdf_principled_hair_sample(
kg, sc, sd, randu, randv, eval, wo, pdf, sampled_roughness, eta);
kg, sc, sd, randu, randv, eval, omega_in, pdf, sampled_roughness, eta);
break;
case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
label = bsdf_principled_diffuse_sample(sc, Ng, sd->wi, randu, randv, eval, wo, pdf);
label = bsdf_principled_diffuse_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
*sampled_roughness = one_float2();
*eta = 1.0f;
break;
case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID:
label = bsdf_principled_sheen_sample(sc, Ng, sd->wi, randu, randv, eval, wo, pdf);
label = bsdf_principled_sheen_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
*sampled_roughness = one_float2();
*eta = 1.0f;
break;
@@ -253,12 +274,12 @@ ccl_device_inline int bsdf_sample(KernelGlobals kg,
const float frequency_multiplier =
kernel_data_fetch(objects, sd->object).shadow_terminator_shading_offset;
if (frequency_multiplier > 1.0f) {
const float cosNO = dot(*wo, sc->N);
*eval *= shift_cos_in(cosNO, frequency_multiplier);
const float cosNI = dot(*omega_in, sc->N);
*eval *= shift_cos_in(cosNI, frequency_multiplier);
}
if (label & LABEL_DIFFUSE) {
if (!isequal(sc->N, sd->N)) {
*eval *= bump_shadowing_term(sd->N, sc->N, *wo);
*eval *= bump_shadowing_term(sd->N, sc->N, *omega_in);
}
}
}
@@ -405,7 +426,7 @@ ccl_device_inline void bsdf_roughness_eta(const KernelGlobals kg,
ccl_device_inline int bsdf_label(const KernelGlobals kg,
ccl_private const ShaderClosure *sc,
const float3 wo)
const float3 omega_in)
{
/* For curves use the smooth normal, particularly for ribbons the geometric
* normal gives too much darkening otherwise. */
@@ -461,8 +482,8 @@ ccl_device_inline int bsdf_label(const KernelGlobals kg,
}
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
label = (bsdf_is_transmission(sc, wo)) ? LABEL_TRANSMIT | LABEL_GLOSSY :
LABEL_REFLECT | LABEL_GLOSSY;
label = (bsdf_is_transmission(sc, omega_in)) ? LABEL_TRANSMIT | LABEL_GLOSSY :
LABEL_REFLECT | LABEL_GLOSSY;
break;
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
label = LABEL_REFLECT | LABEL_GLOSSY;
@@ -483,7 +504,7 @@ ccl_device_inline int bsdf_label(const KernelGlobals kg,
label = LABEL_TRANSMIT | LABEL_GLOSSY;
break;
case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
if (bsdf_is_transmission(sc, wo))
if (bsdf_is_transmission(sc, omega_in))
label = LABEL_TRANSMIT | LABEL_GLOSSY;
else
label = LABEL_REFLECT | LABEL_GLOSSY;
@@ -522,83 +543,83 @@ ccl_device_inline
bsdf_eval(KernelGlobals kg,
ccl_private ShaderData *sd,
ccl_private const ShaderClosure *sc,
const float3 wo,
const float3 omega_in,
ccl_private float *pdf)
{
Spectrum eval = zero_spectrum();
switch (sc->type) {
case CLOSURE_BSDF_DIFFUSE_ID:
eval = bsdf_diffuse_eval(sc, sd->wi, wo, pdf);
eval = bsdf_diffuse_eval(sc, sd->I, omega_in, pdf);
break;
#if defined(__SVM__) || defined(__OSL__)
case CLOSURE_BSDF_OREN_NAYAR_ID:
eval = bsdf_oren_nayar_eval(sc, sd->wi, wo, pdf);
eval = bsdf_oren_nayar_eval(sc, sd->I, omega_in, pdf);
break;
# ifdef __OSL__
case CLOSURE_BSDF_PHONG_RAMP_ID:
eval = bsdf_phong_ramp_eval(sc, sd->wi, wo, pdf);
eval = bsdf_phong_ramp_eval(sc, sd->I, omega_in, pdf);
break;
case CLOSURE_BSDF_DIFFUSE_RAMP_ID:
eval = bsdf_diffuse_ramp_eval(sc, sd->wi, wo, pdf);
eval = bsdf_diffuse_ramp_eval(sc, sd->I, omega_in, pdf);
break;
# endif
case CLOSURE_BSDF_TRANSLUCENT_ID:
eval = bsdf_translucent_eval(sc, sd->wi, wo, pdf);
eval = bsdf_translucent_eval(sc, sd->I, omega_in, pdf);
break;
case CLOSURE_BSDF_REFLECTION_ID:
eval = bsdf_reflection_eval(sc, sd->wi, wo, pdf);
eval = bsdf_reflection_eval(sc, sd->I, omega_in, pdf);
break;
case CLOSURE_BSDF_REFRACTION_ID:
eval = bsdf_refraction_eval(sc, sd->wi, wo, pdf);
eval = bsdf_refraction_eval(sc, sd->I, omega_in, pdf);
break;
case CLOSURE_BSDF_TRANSPARENT_ID:
eval = bsdf_transparent_eval(sc, sd->wi, wo, pdf);
eval = bsdf_transparent_eval(sc, sd->I, omega_in, pdf);
break;
case CLOSURE_BSDF_MICROFACET_GGX_ID:
case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
eval = bsdf_microfacet_ggx_eval(sc, sd->N, sd->wi, wo, pdf);
eval = bsdf_microfacet_ggx_eval(sc, sd->N, sd->I, omega_in, pdf);
break;
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
eval = bsdf_microfacet_multi_ggx_eval(sc, sd->N, sd->wi, wo, pdf, &sd->lcg_state);
eval = bsdf_microfacet_multi_ggx_eval(sc, sd->N, sd->I, omega_in, pdf, &sd->lcg_state);
break;
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
eval = bsdf_microfacet_multi_ggx_glass_eval(sc, sd->wi, wo, pdf, &sd->lcg_state);
eval = bsdf_microfacet_multi_ggx_glass_eval(sc, sd->I, omega_in, pdf, &sd->lcg_state);
break;
case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
eval = bsdf_microfacet_beckmann_eval(sc, sd->N, sd->wi, wo, pdf);
eval = bsdf_microfacet_beckmann_eval(sc, sd->N, sd->I, omega_in, pdf);
break;
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
eval = bsdf_ashikhmin_shirley_eval(sc, sd->N, sd->wi, wo, pdf);
eval = bsdf_ashikhmin_shirley_eval(sc, sd->N, sd->I, omega_in, pdf);
break;
case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
eval = bsdf_ashikhmin_velvet_eval(sc, sd->wi, wo, pdf);
eval = bsdf_ashikhmin_velvet_eval(sc, sd->I, omega_in, pdf);
break;
case CLOSURE_BSDF_DIFFUSE_TOON_ID:
eval = bsdf_diffuse_toon_eval(sc, sd->wi, wo, pdf);
eval = bsdf_diffuse_toon_eval(sc, sd->I, omega_in, pdf);
break;
case CLOSURE_BSDF_GLOSSY_TOON_ID:
eval = bsdf_glossy_toon_eval(sc, sd->wi, wo, pdf);
eval = bsdf_glossy_toon_eval(sc, sd->I, omega_in, pdf);
break;
case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
eval = bsdf_principled_hair_eval(kg, sd, sc, wo, pdf);
eval = bsdf_principled_hair_eval(kg, sd, sc, omega_in, pdf);
break;
case CLOSURE_BSDF_HAIR_REFLECTION_ID:
eval = bsdf_hair_reflection_eval(sc, sd->wi, wo, pdf);
eval = bsdf_hair_reflection_eval(sc, sd->I, omega_in, pdf);
break;
case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
eval = bsdf_hair_transmission_eval(sc, sd->wi, wo, pdf);
eval = bsdf_hair_transmission_eval(sc, sd->I, omega_in, pdf);
break;
case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
eval = bsdf_principled_diffuse_eval(sc, sd->wi, wo, pdf);
eval = bsdf_principled_diffuse_eval(sc, sd->I, omega_in, pdf);
break;
case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID:
eval = bsdf_principled_sheen_eval(sc, sd->wi, wo, pdf);
eval = bsdf_principled_sheen_eval(sc, sd->I, omega_in, pdf);
break;
#endif
default:
@@ -607,7 +628,7 @@ ccl_device_inline
if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
if (!isequal(sc->N, sd->N)) {
eval *= bump_shadowing_term(sd->N, sc->N, wo);
eval *= bump_shadowing_term(sd->N, sc->N, omega_in);
}
}
@@ -615,9 +636,9 @@ ccl_device_inline
const float frequency_multiplier =
kernel_data_fetch(objects, sd->object).shadow_terminator_shading_offset;
if (frequency_multiplier > 1.0f) {
const float cosNO = dot(wo, sc->N);
if (cosNO >= 0.0f) {
eval *= shift_cos_in(cosNO, frequency_multiplier);
const float cosNI = dot(omega_in, sc->N);
if (cosNI >= 0.0f) {
eval *= shift_cos_in(cosNI, frequency_multiplier);
}
}

View File

@@ -41,20 +41,20 @@ ccl_device_inline float bsdf_ashikhmin_shirley_roughness_to_exponent(float rough
ccl_device_forceinline Spectrum bsdf_ashikhmin_shirley_eval(ccl_private const ShaderClosure *sc,
const float3 Ng,
const float3 wi,
const float3 wo,
const float3 I,
const float3 omega_in,
ccl_private float *pdf)
{
ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
const float cosNgO = dot(Ng, wo);
const float cosNgI = dot(Ng, omega_in);
float3 N = bsdf->N;
float NdotI = dot(N, wi);
float NdotO = dot(N, wo);
float NdotI = dot(N, I); /* in Cycles/OSL convention I is omega_out */
float NdotO = dot(N, omega_in); /* and consequently we use for O omaga_in ;) */
float out = 0.0f;
if ((cosNgO < 0.0f) || fmaxf(bsdf->alpha_x, bsdf->alpha_y) <= 1e-4f ||
if ((cosNgI < 0.0f) || fmaxf(bsdf->alpha_x, bsdf->alpha_y) <= 1e-4f ||
!(NdotI > 0.0f && NdotO > 0.0f)) {
*pdf = 0.0f;
return zero_spectrum();
@@ -62,15 +62,15 @@ ccl_device_forceinline Spectrum bsdf_ashikhmin_shirley_eval(ccl_private const Sh
NdotI = fmaxf(NdotI, 1e-6f);
NdotO = fmaxf(NdotO, 1e-6f);
float3 H = normalize(wi + wo);
float HdotI = fmaxf(fabsf(dot(H, wi)), 1e-6f);
float3 H = normalize(omega_in + I);
float HdotI = fmaxf(fabsf(dot(H, I)), 1e-6f);
float HdotN = fmaxf(dot(H, N), 1e-6f);
/* pump from original paper
* (first derivative disc., but cancels the HdotI in the pdf nicely) */
float pump = 1.0f / fmaxf(1e-6f, (HdotI * fmaxf(NdotI, NdotO)));
float pump = 1.0f / fmaxf(1e-6f, (HdotI * fmaxf(NdotO, NdotI)));
/* pump from d-brdf paper */
/*float pump = 1.0f / fmaxf(1e-4f, ((NdotI + NdotO) * (NdotI * NdotO))); */
/*float pump = 1.0f / fmaxf(1e-4f, ((NdotO + NdotI) * (NdotO*NdotI))); */
float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_x);
float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_y);
@@ -124,11 +124,11 @@ ccl_device_inline void bsdf_ashikhmin_shirley_sample_first_quadrant(float n_x,
ccl_device int bsdf_ashikhmin_shirley_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 wi,
float3 I,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *wo,
ccl_private float3 *omega_in,
ccl_private float *pdf,
ccl_private float2 *sampled_roughness)
{
@@ -137,7 +137,7 @@ ccl_device int bsdf_ashikhmin_shirley_sample(ccl_private const ShaderClosure *sc
float3 N = bsdf->N;
int label = LABEL_REFLECT | LABEL_GLOSSY;
float NdotI = dot(N, wi);
float NdotI = dot(N, I);
if (!(NdotI > 0.0f)) {
*pdf = 0.0f;
*eval = zero_spectrum();
@@ -198,12 +198,12 @@ ccl_device int bsdf_ashikhmin_shirley_sample(ccl_private const ShaderClosure *sc
/* half vector to world space */
float3 H = h.x * X + h.y * Y + h.z * N;
float HdotI = dot(H, wi);
float HdotI = dot(H, I);
if (HdotI < 0.0f)
H = -H;
/* reflect wi on H to get wo */
*wo = -wi + (2.0f * HdotI) * H;
/* reflect I on H to get omega_in */
*omega_in = -I + (2.0f * HdotI) * H;
if (fmaxf(bsdf->alpha_x, bsdf->alpha_y) <= 1e-4f) {
/* Some high number for MIS. */
@@ -213,7 +213,7 @@ ccl_device int bsdf_ashikhmin_shirley_sample(ccl_private const ShaderClosure *sc
}
else {
/* leave the rest to eval */
*eval = bsdf_ashikhmin_shirley_eval(sc, N, wi, *wo, pdf);
*eval = bsdf_ashikhmin_shirley_eval(sc, N, I, *omega_in, pdf);
}
return label;

View File

@@ -32,35 +32,35 @@ ccl_device int bsdf_ashikhmin_velvet_setup(ccl_private VelvetBsdf *bsdf)
}
ccl_device Spectrum bsdf_ashikhmin_velvet_eval(ccl_private const ShaderClosure *sc,
const float3 wi,
const float3 wo,
const float3 I,
const float3 omega_in,
ccl_private float *pdf)
{
ccl_private const VelvetBsdf *bsdf = (ccl_private const VelvetBsdf *)sc;
float m_invsigma2 = bsdf->invsigma2;
float3 N = bsdf->N;
float cosNI = dot(N, wi);
float cosNO = dot(N, wo);
if (!(cosNI > 0 && cosNO > 0)) {
float cosNO = dot(N, I);
float cosNI = dot(N, omega_in);
if (!(cosNO > 0 && cosNI > 0)) {
*pdf = 0.0f;
return zero_spectrum();
}
float3 H = normalize(wi + wo);
float3 H = normalize(omega_in + I);
float cosNH = dot(N, H);
float cosHI = fabsf(dot(wi, H));
float cosHO = fabsf(dot(I, H));
if (!(fabsf(cosNH) < 1.0f - 1e-5f && cosHI > 1e-5f)) {
if (!(fabsf(cosNH) < 1.0f - 1e-5f && cosHO > 1e-5f)) {
*pdf = 0.0f;
return zero_spectrum();
}
float cosNHdivHI = cosNH / cosHI;
cosNHdivHI = fmaxf(cosNHdivHI, 1e-5f);
float cosNHdivHO = cosNH / cosHO;
cosNHdivHO = fmaxf(cosNHdivHO, 1e-5f);
float fac1 = 2 * fabsf(cosNHdivHI * cosNI);
float fac2 = 2 * fabsf(cosNHdivHI * cosNO);
float fac1 = 2 * fabsf(cosNHdivHO * cosNO);
float fac2 = 2 * fabsf(cosNHdivHO * cosNI);
float sinNH2 = 1 - cosNH * cosNH;
float sinNH4 = sinNH2 * sinNH2;
@@ -69,7 +69,7 @@ ccl_device Spectrum bsdf_ashikhmin_velvet_eval(ccl_private const ShaderClosure *
float D = expf(-cotangent2 * m_invsigma2) * m_invsigma2 * M_1_PI_F / sinNH4;
float G = fminf(1.0f, fminf(fac1, fac2)); // TODO: derive G from D analytically
float out = 0.25f * (D * G) / cosNI;
float out = 0.25f * (D * G) / cosNO;
*pdf = 0.5f * M_1_PI_F;
return make_spectrum(out);
@@ -77,11 +77,11 @@ ccl_device Spectrum bsdf_ashikhmin_velvet_eval(ccl_private const ShaderClosure *
ccl_device int bsdf_ashikhmin_velvet_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 wi,
float3 I,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *wo,
ccl_private float3 *omega_in,
ccl_private float *pdf)
{
ccl_private const VelvetBsdf *bsdf = (ccl_private const VelvetBsdf *)sc;
@@ -90,32 +90,32 @@ ccl_device int bsdf_ashikhmin_velvet_sample(ccl_private const ShaderClosure *sc,
// we are viewing the surface from above - send a ray out with uniform
// distribution over the hemisphere
sample_uniform_hemisphere(N, randu, randv, wo, pdf);
sample_uniform_hemisphere(N, randu, randv, omega_in, pdf);
if (!(dot(Ng, *wo) > 0)) {
if (!(dot(Ng, *omega_in) > 0)) {
*pdf = 0.0f;
*eval = zero_spectrum();
return LABEL_NONE;
}
float3 H = normalize(wi + *wo);
float3 H = normalize(*omega_in + I);
float cosNI = dot(N, wi);
float cosNO = dot(N, *wo);
float cosHI = fabsf(dot(wi, H));
float cosNI = dot(N, *omega_in);
float cosNO = dot(N, I);
float cosNH = dot(N, H);
float cosHO = fabsf(dot(I, H));
if (!(fabsf(cosNI) > 1e-5f && fabsf(cosNH) < 1.0f - 1e-5f && cosHI > 1e-5f)) {
if (!(fabsf(cosNO) > 1e-5f && fabsf(cosNH) < 1.0f - 1e-5f && cosHO > 1e-5f)) {
*pdf = 0.0f;
*eval = zero_spectrum();
return LABEL_NONE;
}
float cosNHdivHI = cosNH / cosHI;
cosNHdivHI = fmaxf(cosNHdivHI, 1e-5f);
float cosNHdivHO = cosNH / cosHO;
cosNHdivHO = fmaxf(cosNHdivHO, 1e-5f);
float fac1 = 2 * fabsf(cosNHdivHI * cosNI);
float fac2 = 2 * fabsf(cosNHdivHI * cosNO);
float fac1 = 2 * fabsf(cosNHdivHO * cosNO);
float fac2 = 2 * fabsf(cosNHdivHO * cosNI);
float sinNH2 = 1 - cosNH * cosNH;
float sinNH4 = sinNH2 * sinNH2;
@@ -124,7 +124,7 @@ ccl_device int bsdf_ashikhmin_velvet_sample(ccl_private const ShaderClosure *sc,
float D = expf(-cotangent2 * m_invsigma2) * m_invsigma2 * M_1_PI_F / sinNH4;
float G = fminf(1.0f, fminf(fac1, fac2)); // TODO: derive G from D analytically
float power = 0.25f * (D * G) / cosNI;
float power = 0.25f * (D * G) / cosNO;
*eval = make_spectrum(power);

View File

@@ -27,34 +27,34 @@ ccl_device int bsdf_diffuse_setup(ccl_private DiffuseBsdf *bsdf)
}
ccl_device Spectrum bsdf_diffuse_eval(ccl_private const ShaderClosure *sc,
const float3 wi,
const float3 wo,
const float3 I,
const float3 omega_in,
ccl_private float *pdf)
{
ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc;
float3 N = bsdf->N;
float cosNO = fmaxf(dot(N, wo), 0.0f) * M_1_PI_F;
*pdf = cosNO;
return make_spectrum(cosNO);
float cos_pi = fmaxf(dot(N, omega_in), 0.0f) * M_1_PI_F;
*pdf = cos_pi;
return make_spectrum(cos_pi);
}
ccl_device int bsdf_diffuse_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 wi,
float3 I,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *wo,
ccl_private float3 *omega_in,
ccl_private float *pdf)
{
ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc;
float3 N = bsdf->N;
// distribution over the hemisphere
sample_cos_hemisphere(N, randu, randv, wo, pdf);
sample_cos_hemisphere(N, randu, randv, omega_in, pdf);
if (dot(Ng, *wo) > 0.0f) {
if (dot(Ng, *omega_in) > 0.0f) {
*eval = make_spectrum(*pdf);
}
else {
@@ -73,25 +73,25 @@ ccl_device int bsdf_translucent_setup(ccl_private DiffuseBsdf *bsdf)
}
ccl_device Spectrum bsdf_translucent_eval(ccl_private const ShaderClosure *sc,
const float3 wi,
const float3 wo,
const float3 I,
const float3 omega_in,
ccl_private float *pdf)
{
ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc;
float3 N = bsdf->N;
float cosNO = fmaxf(-dot(N, wo), 0.0f) * M_1_PI_F;
*pdf = cosNO;
return make_spectrum(cosNO);
float cos_pi = fmaxf(-dot(N, omega_in), 0.0f) * M_1_PI_F;
*pdf = cos_pi;
return make_spectrum(cos_pi);
}
ccl_device int bsdf_translucent_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 wi,
float3 I,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *wo,
ccl_private float3 *omega_in,
ccl_private float *pdf)
{
ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc;
@@ -99,8 +99,8 @@ ccl_device int bsdf_translucent_sample(ccl_private const ShaderClosure *sc,
// we are viewing the surface from the right side - send a ray out with cosine
// distribution over the hemisphere
sample_cos_hemisphere(-N, randu, randv, wo, pdf);
if (dot(Ng, *wo) < 0) {
sample_cos_hemisphere(-N, randu, randv, omega_in, pdf);
if (dot(Ng, *omega_in) < 0) {
*eval = make_spectrum(*pdf);
}
else {

View File

@@ -48,17 +48,17 @@ ccl_device void bsdf_diffuse_ramp_blur(ccl_private ShaderClosure *sc, float roug
}
ccl_device Spectrum bsdf_diffuse_ramp_eval(ccl_private const ShaderClosure *sc,
const float3 wi,
const float3 wo,
const float3 I,
const float3 omega_in,
ccl_private float *pdf)
{
const DiffuseRampBsdf *bsdf = (const DiffuseRampBsdf *)sc;
float3 N = bsdf->N;
float cosNO = fmaxf(dot(N, wo), 0.0f);
if (cosNO >= 0.0f) {
*pdf = cosNO * M_1_PI_F;
return rgb_to_spectrum(bsdf_diffuse_ramp_get_color(bsdf->colors, cosNO) * M_1_PI_F);
float cos_pi = fmaxf(dot(N, omega_in), 0.0f);
if (cos_pi >= 0.0f) {
*pdf = cos_pi * M_1_PI_F;
return rgb_to_spectrum(bsdf_diffuse_ramp_get_color(bsdf->colors, cos_pi) * M_1_PI_F);
}
else {
*pdf = 0.0f;
@@ -68,20 +68,20 @@ ccl_device Spectrum bsdf_diffuse_ramp_eval(ccl_private const ShaderClosure *sc,
ccl_device int bsdf_diffuse_ramp_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 wi,
float3 I,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *wo,
ccl_private float3 *omega_in,
ccl_private float *pdf)
{
const DiffuseRampBsdf *bsdf = (const DiffuseRampBsdf *)sc;
float3 N = bsdf->N;
// distribution over the hemisphere
sample_cos_hemisphere(N, randu, randv, wo, pdf);
sample_cos_hemisphere(N, randu, randv, omega_in, pdf);
if (dot(Ng, *wo) > 0.0f) {
if (dot(Ng, *omega_in) > 0.0f) {
*eval = rgb_to_spectrum(bsdf_diffuse_ramp_get_color(bsdf->colors, *pdf * M_PI_F) * M_1_PI_F);
}
else {

View File

@@ -38,12 +38,12 @@ ccl_device int bsdf_hair_transmission_setup(ccl_private HairBsdf *bsdf)
}
ccl_device Spectrum bsdf_hair_reflection_eval(ccl_private const ShaderClosure *sc,
const float3 wi,
const float3 wo,
const float3 I,
const float3 omega_in,
ccl_private float *pdf)
{
ccl_private const HairBsdf *bsdf = (ccl_private const HairBsdf *)sc;
if (dot(bsdf->N, wo) < 0.0f) {
if (dot(bsdf->N, omega_in) < 0.0f) {
*pdf = 0.0f;
return zero_spectrum();
}
@@ -53,16 +53,16 @@ ccl_device Spectrum bsdf_hair_reflection_eval(ccl_private const ShaderClosure *s
float roughness1 = bsdf->roughness1;
float roughness2 = bsdf->roughness2;
float Iz = dot(Tg, wi);
float3 locy = normalize(wi - Tg * Iz);
float Iz = dot(Tg, I);
float3 locy = normalize(I - Tg * Iz);
float theta_r = M_PI_2_F - fast_acosf(Iz);
float wo_z = dot(Tg, wo);
float3 wo_y = normalize(wo - Tg * wo_z);
float omega_in_z = dot(Tg, omega_in);
float3 omega_in_y = normalize(omega_in - Tg * omega_in_z);
float theta_i = M_PI_2_F - fast_acosf(wo_z);
float cosphi_i = dot(wo_y, locy);
float theta_i = M_PI_2_F - fast_acosf(omega_in_z);
float cosphi_i = dot(omega_in_y, locy);
if (M_PI_2_F - fabsf(theta_i) < 0.001f || cosphi_i < 0.0f) {
*pdf = 0.0f;
@@ -90,12 +90,12 @@ ccl_device Spectrum bsdf_hair_reflection_eval(ccl_private const ShaderClosure *s
}
ccl_device Spectrum bsdf_hair_transmission_eval(ccl_private const ShaderClosure *sc,
const float3 wi,
const float3 wo,
const float3 I,
const float3 omega_in,
ccl_private float *pdf)
{
ccl_private const HairBsdf *bsdf = (ccl_private const HairBsdf *)sc;
if (dot(bsdf->N, wo) >= 0.0f) {
if (dot(bsdf->N, omega_in) >= 0.0f) {
*pdf = 0.0f;
return zero_spectrum();
}
@@ -104,16 +104,16 @@ ccl_device Spectrum bsdf_hair_transmission_eval(ccl_private const ShaderClosure
float3 Tg = bsdf->T;
float roughness1 = bsdf->roughness1;
float roughness2 = bsdf->roughness2;
float Iz = dot(Tg, wi);
float3 locy = normalize(wi - Tg * Iz);
float Iz = dot(Tg, I);
float3 locy = normalize(I - Tg * Iz);
float theta_r = M_PI_2_F - fast_acosf(Iz);
float wo_z = dot(Tg, wo);
float3 wo_y = normalize(wo - Tg * wo_z);
float omega_in_z = dot(Tg, omega_in);
float3 omega_in_y = normalize(omega_in - Tg * omega_in_z);
float theta_i = M_PI_2_F - fast_acosf(wo_z);
float phi_i = fast_acosf(dot(wo_y, locy));
float theta_i = M_PI_2_F - fast_acosf(omega_in_z);
float phi_i = fast_acosf(dot(omega_in_y, locy));
if (M_PI_2_F - fabsf(theta_i) < 0.001f) {
*pdf = 0.0f;
@@ -142,11 +142,11 @@ ccl_device Spectrum bsdf_hair_transmission_eval(ccl_private const ShaderClosure
ccl_device int bsdf_hair_reflection_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 wi,
float3 I,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *wo,
ccl_private float3 *omega_in,
ccl_private float *pdf,
ccl_private float2 *sampled_roughness)
{
@@ -156,8 +156,8 @@ ccl_device int bsdf_hair_reflection_sample(ccl_private const ShaderClosure *sc,
float roughness1 = bsdf->roughness1;
float roughness2 = bsdf->roughness2;
*sampled_roughness = make_float2(roughness1, roughness2);
float Iz = dot(Tg, wi);
float3 locy = normalize(wi - Tg * Iz);
float Iz = dot(Tg, I);
float3 locy = normalize(I - Tg * Iz);
float3 locx = cross(locy, Tg);
float theta_r = M_PI_2_F - fast_acosf(Iz);
@@ -182,7 +182,7 @@ ccl_device int bsdf_hair_reflection_sample(ccl_private const ShaderClosure *sc,
float sinphi, cosphi;
fast_sincosf(phi, &sinphi, &cosphi);
*wo = (cosphi * costheta_i) * locy - (sinphi * costheta_i) * locx + (sintheta_i)*Tg;
*omega_in = (cosphi * costheta_i) * locy - (sinphi * costheta_i) * locx + (sintheta_i)*Tg;
*pdf = fabsf(phi_pdf * theta_pdf);
if (M_PI_2_F - fabsf(theta_i) < 0.001f)
@@ -195,11 +195,11 @@ ccl_device int bsdf_hair_reflection_sample(ccl_private const ShaderClosure *sc,
ccl_device int bsdf_hair_transmission_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 wi,
float3 I,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *wo,
ccl_private float3 *omega_in,
ccl_private float *pdf,
ccl_private float2 *sampled_roughness)
{
@@ -209,8 +209,8 @@ ccl_device int bsdf_hair_transmission_sample(ccl_private const ShaderClosure *sc
float roughness1 = bsdf->roughness1;
float roughness2 = bsdf->roughness2;
*sampled_roughness = make_float2(roughness1, roughness2);
float Iz = dot(Tg, wi);
float3 locy = normalize(wi - Tg * Iz);
float Iz = dot(Tg, I);
float3 locy = normalize(I - Tg * Iz);
float3 locx = cross(locy, Tg);
float theta_r = M_PI_2_F - fast_acosf(Iz);
@@ -235,7 +235,7 @@ ccl_device int bsdf_hair_transmission_sample(ccl_private const ShaderClosure *sc
float sinphi, cosphi;
fast_sincosf(phi, &sinphi, &cosphi);
*wo = (cosphi * costheta_i) * locy - (sinphi * costheta_i) * locx + (sintheta_i)*Tg;
*omega_in = (cosphi * costheta_i) * locy - (sinphi * costheta_i) * locx + (sintheta_i)*Tg;
*pdf = fabsf(phi_pdf * theta_pdf);
if (M_PI_2_F - fabsf(theta_i) < 0.001f) {
@@ -247,7 +247,7 @@ ccl_device int bsdf_hair_transmission_sample(ccl_private const ShaderClosure *sc
/* TODO(sergey): Should always be negative, but seems some precision issue
* is involved here.
*/
kernel_assert(dot(locy, *wo) < 1e-4f);
kernel_assert(dot(locy, *omega_in) < 1e-4f);
return LABEL_TRANSMIT | LABEL_GLOSSY;
}

View File

@@ -41,6 +41,11 @@ static_assert(sizeof(ShaderClosure) >= sizeof(PrincipledHairBSDF),
static_assert(sizeof(ShaderClosure) >= sizeof(PrincipledHairExtra),
"PrincipledHairExtra is too large!");
ccl_device_inline float cos_from_sin(const float s)
{
return safe_sqrtf(1.0f - s * s);
}
/* Gives the change in direction in the normal plane for the given angles and p-th-order
* scattering. */
ccl_device_inline float delta_phi(int p, float gamma_o, float gamma_t)
@@ -174,7 +179,7 @@ ccl_device int bsdf_principled_hair_setup(ccl_private ShaderData *sd,
/* Compute local frame, aligned to curve tangent and ray direction. */
float3 X = safe_normalize(sd->dPdu);
float3 Y = safe_normalize(cross(X, sd->wi));
float3 Y = safe_normalize(cross(X, sd->I));
float3 Z = safe_normalize(cross(X, Y));
/* h -1..0..1 means the rays goes from grazing the hair, to hitting it at
@@ -254,7 +259,7 @@ ccl_device_inline void hair_alpha_angles(float sin_theta_i,
ccl_device Spectrum bsdf_principled_hair_eval(KernelGlobals kg,
ccl_private const ShaderData *sd,
ccl_private const ShaderClosure *sc,
const float3 wo,
const float3 omega_in,
ccl_private float *pdf)
{
kernel_assert(isfinite_safe(sd->P) && isfinite_safe(sd->ray_length));
@@ -266,13 +271,12 @@ ccl_device Spectrum bsdf_principled_hair_eval(KernelGlobals kg,
kernel_assert(fabsf(dot(X, Y)) < 1e-3f);
const float3 Z = safe_normalize(cross(X, Y));
/* local_I is the illumination direction. */
const float3 local_O = make_float3(dot(sd->wi, X), dot(sd->wi, Y), dot(sd->wi, Z));
const float3 local_I = make_float3(dot(wo, X), dot(wo, Y), dot(wo, Z));
const float3 wo = make_float3(dot(sd->I, X), dot(sd->I, Y), dot(sd->I, Z));
const float3 wi = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z));
const float sin_theta_o = local_O.x;
const float sin_theta_o = wo.x;
const float cos_theta_o = cos_from_sin(sin_theta_o);
const float phi_o = atan2f(local_O.z, local_O.y);
const float phi_o = atan2f(wo.z, wo.y);
const float sin_theta_t = sin_theta_o / bsdf->eta;
const float cos_theta_t = cos_from_sin(sin_theta_t);
@@ -291,9 +295,9 @@ ccl_device Spectrum bsdf_principled_hair_eval(KernelGlobals kg,
hair_attenuation(
kg, fresnel_dielectric_cos(cos_theta_o * cos_gamma_o, bsdf->eta), T, Ap, Ap_energy);
const float sin_theta_i = local_I.x;
const float sin_theta_i = wi.x;
const float cos_theta_i = cos_from_sin(sin_theta_i);
const float phi_i = atan2f(local_I.z, local_I.y);
const float phi_i = atan2f(wi.z, wi.y);
const float phi = phi_i - phi_o;
@@ -339,7 +343,7 @@ ccl_device int bsdf_principled_hair_sample(KernelGlobals kg,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *wo,
ccl_private float3 *omega_in,
ccl_private float *pdf,
ccl_private float2 *sampled_roughness,
ccl_private float *eta)
@@ -355,16 +359,16 @@ ccl_device int bsdf_principled_hair_sample(KernelGlobals kg,
kernel_assert(fabsf(dot(X, Y)) < 1e-3f);
const float3 Z = safe_normalize(cross(X, Y));
const float3 local_O = make_float3(dot(sd->wi, X), dot(sd->wi, Y), dot(sd->wi, Z));
const float3 wo = make_float3(dot(sd->I, X), dot(sd->I, Y), dot(sd->I, Z));
float2 u[2];
u[0] = make_float2(randu, randv);
u[1].x = lcg_step_float(&sd->lcg_state);
u[1].y = lcg_step_float(&sd->lcg_state);
const float sin_theta_o = local_O.x;
const float sin_theta_o = wo.x;
const float cos_theta_o = cos_from_sin(sin_theta_o);
const float phi_o = atan2f(local_O.z, local_O.y);
const float phi_o = atan2f(wo.z, wo.y);
const float sin_theta_t = sin_theta_o / bsdf->eta;
const float cos_theta_t = cos_from_sin(sin_theta_t);
@@ -454,7 +458,7 @@ ccl_device int bsdf_principled_hair_sample(KernelGlobals kg,
*eval = F;
*pdf = F_energy;
*wo = X * sin_theta_i + Y * cos_theta_i * cosf(phi_i) + Z * cos_theta_i * sinf(phi_i);
*omega_in = X * sin_theta_i + Y * cos_theta_i * cosf(phi_i) + Z * cos_theta_i * sinf(phi_i);
return LABEL_GLOSSY | ((p == 0) ? LABEL_REFLECT : LABEL_TRANSMIT);
}

File diff suppressed because it is too large Load Diff

View File

@@ -43,7 +43,7 @@ ccl_device_forceinline float2 mf_sampleP22_11(const float cosI,
return make_float2(r * cosf(phi), r * sinf(phi));
}
const float sinI = sin_from_cos(cosI);
const float sinI = safe_sqrtf(1.0f - cosI * cosI);
const float tanI = sinI / cosI;
const float projA = 0.5f * (cosI + 1.0f);
if (projA < 0.0001f)
@@ -417,15 +417,15 @@ ccl_device int bsdf_microfacet_multi_ggx_refraction_setup(ccl_private Microfacet
ccl_device Spectrum bsdf_microfacet_multi_ggx_eval(ccl_private const ShaderClosure *sc,
const float3 Ng,
const float3 wi,
const float3 wo,
const float3 I,
const float3 omega_in,
ccl_private float *pdf,
ccl_private uint *lcg_state)
{
ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
const float cosNgO = dot(Ng, wo);
const float cosNgI = dot(Ng, omega_in);
if ((cosNgO < 0.0f) || bsdf->alpha_x * bsdf->alpha_y < 1e-7f) {
if ((cosNgI < 0.0f) || bsdf->alpha_x * bsdf->alpha_y < 1e-7f) {
*pdf = 0.0f;
return zero_spectrum();
}
@@ -434,7 +434,7 @@ ccl_device Spectrum bsdf_microfacet_multi_ggx_eval(ccl_private const ShaderClosu
Z = bsdf->N;
/* Ensure that the both directions are on the outside w.r.t. the shading normal. */
if (dot(Z, wi) <= 0.0f || dot(Z, wo) <= 0.0f) {
if (dot(Z, I) <= 0.0f || dot(Z, omega_in) <= 0.0f) {
*pdf = 0.0f;
return zero_spectrum();
}
@@ -447,21 +447,21 @@ ccl_device Spectrum bsdf_microfacet_multi_ggx_eval(ccl_private const ShaderClosu
else
make_orthonormals(Z, &X, &Y);
float3 local_I = make_float3(dot(wi, X), dot(wi, Y), dot(wi, Z));
float3 local_O = make_float3(dot(wo, X), dot(wo, Y), dot(wo, Z));
float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
float3 localO = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z));
if (is_aniso)
*pdf = mf_ggx_aniso_pdf(local_I, local_O, make_float2(bsdf->alpha_x, bsdf->alpha_y));
*pdf = mf_ggx_aniso_pdf(localI, localO, make_float2(bsdf->alpha_x, bsdf->alpha_y));
else
*pdf = mf_ggx_pdf(local_I, local_O, bsdf->alpha_x);
*pdf = mf_ggx_pdf(localI, localO, bsdf->alpha_x);
if (*pdf <= 0.f) {
*pdf = 0.f;
return make_float3(0.f, 0.f, 0.f);
}
return mf_eval_glossy(local_I,
local_O,
return mf_eval_glossy(localI,
localO,
true,
bsdf->extra->color,
bsdf->alpha_x,
@@ -475,11 +475,11 @@ ccl_device Spectrum bsdf_microfacet_multi_ggx_eval(ccl_private const ShaderClosu
ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals kg,
ccl_private const ShaderClosure *sc,
float3 Ng,
float3 wi,
float3 I,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *wo,
ccl_private float3 *omega_in,
ccl_private float *pdf,
ccl_private uint *lcg_state,
ccl_private float2 *sampled_roughness,
@@ -491,7 +491,7 @@ ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals kg,
Z = bsdf->N;
/* Ensure that the view direction is on the outside w.r.t. the shading normal. */
if (dot(Z, wi) <= 0.0f) {
if (dot(Z, I) <= 0.0f) {
*pdf = 0.0f;
return LABEL_NONE;
}
@@ -499,8 +499,8 @@ ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals kg,
/* Special case: Extremely low roughness.
* Don't bother with microfacets, just do specular reflection. */
if (bsdf->alpha_x * bsdf->alpha_y < 1e-7f) {
*wo = 2 * dot(Z, wi) * Z - wi;
if (dot(Ng, *wo) <= 0.0f) {
*omega_in = 2 * dot(Z, I) * Z - I;
if (dot(Ng, *omega_in) <= 0.0f) {
*pdf = 0.0f;
return LABEL_NONE;
}
@@ -520,11 +520,11 @@ ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals kg,
else
make_orthonormals(Z, &X, &Y);
float3 local_I = make_float3(dot(wi, X), dot(wi, Y), dot(wi, Z));
float3 local_O;
float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
float3 localO;
*eval = mf_sample_glossy(local_I,
&local_O,
*eval = mf_sample_glossy(localI,
&localO,
bsdf->extra->color,
bsdf->alpha_x,
bsdf->alpha_y,
@@ -532,18 +532,18 @@ ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals kg,
bsdf->ior,
use_fresnel,
bsdf->extra->cspec0);
*wo = X * local_O.x + Y * local_O.y + Z * local_O.z;
*omega_in = X * localO.x + Y * localO.y + Z * localO.z;
/* Ensure that the light direction is on the outside w.r.t. the geometry normal. */
if (dot(Ng, *wo) <= 0.0f) {
if (dot(Ng, *omega_in) <= 0.0f) {
*pdf = 0.0f;
return LABEL_NONE;
}
if (is_aniso)
*pdf = mf_ggx_aniso_pdf(local_I, local_O, make_float2(bsdf->alpha_x, bsdf->alpha_y));
*pdf = mf_ggx_aniso_pdf(localI, localO, make_float2(bsdf->alpha_x, bsdf->alpha_y));
else
*pdf = mf_ggx_pdf(local_I, local_O, bsdf->alpha_x);
*pdf = mf_ggx_pdf(localI, localO, bsdf->alpha_x);
*pdf = fmaxf(0.f, *pdf);
*eval *= *pdf;
@@ -581,8 +581,8 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(ccl_private Microfa
}
ccl_device Spectrum bsdf_microfacet_multi_ggx_glass_eval(ccl_private const ShaderClosure *sc,
const float3 wi,
const float3 wo,
const float3 I,
const float3 omega_in,
ccl_private float *pdf,
ccl_private uint *lcg_state)
{
@@ -597,17 +597,17 @@ ccl_device Spectrum bsdf_microfacet_multi_ggx_glass_eval(ccl_private const Shade
Z = bsdf->N;
make_orthonormals(Z, &X, &Y);
float3 local_I = make_float3(dot(wi, X), dot(wi, Y), dot(wi, Z));
float3 local_O = make_float3(dot(wo, X), dot(wo, Y), dot(wo, Z));
float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
float3 localO = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z));
const bool is_transmission = local_O.z < 0.0f;
const bool is_transmission = localO.z < 0.0f;
const bool use_fresnel = !is_transmission &&
(bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID);
*pdf = mf_glass_pdf(local_I, local_O, bsdf->alpha_x, bsdf->ior);
*pdf = mf_glass_pdf(localI, localO, bsdf->alpha_x, bsdf->ior);
kernel_assert(*pdf >= 0.f);
return mf_eval_glass(local_I,
local_O,
return mf_eval_glass(localI,
localO,
!is_transmission,
bsdf->extra->color,
bsdf->alpha_x,
@@ -621,11 +621,11 @@ ccl_device Spectrum bsdf_microfacet_multi_ggx_glass_eval(ccl_private const Shade
ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals kg,
ccl_private const ShaderClosure *sc,
float3 Ng,
float3 wi,
float3 I,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *wo,
ccl_private float3 *omega_in,
ccl_private float *pdf,
ccl_private uint *lcg_state,
ccl_private float2 *sampled_roughness,
@@ -642,16 +642,16 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals kg,
if (bsdf->alpha_x * bsdf->alpha_y < 1e-7f) {
float3 R, T;
bool inside;
float fresnel = fresnel_dielectric(bsdf->ior, Z, wi, &R, &T, &inside);
float fresnel = fresnel_dielectric(bsdf->ior, Z, I, &R, &T, &inside);
*pdf = 1e6f;
*eval = make_spectrum(1e6f);
if (randu < fresnel) {
*wo = R;
*omega_in = R;
return LABEL_REFLECT | LABEL_SINGULAR;
}
else {
*wo = T;
*omega_in = T;
return LABEL_TRANSMIT | LABEL_SINGULAR;
}
}
@@ -660,11 +660,11 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals kg,
make_orthonormals(Z, &X, &Y);
float3 local_I = make_float3(dot(wi, X), dot(wi, Y), dot(wi, Z));
float3 local_O;
float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
float3 localO;
*eval = mf_sample_glass(local_I,
&local_O,
*eval = mf_sample_glass(localI,
&localO,
bsdf->extra->color,
bsdf->alpha_x,
bsdf->alpha_y,
@@ -672,12 +672,12 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals kg,
bsdf->ior,
use_fresnel,
bsdf->extra->cspec0);
*pdf = mf_glass_pdf(local_I, local_O, bsdf->alpha_x, bsdf->ior);
*pdf = mf_glass_pdf(localI, localO, bsdf->alpha_x, bsdf->ior);
kernel_assert(*pdf >= 0.f);
*eval *= *pdf;
*wo = X * local_O.x + Y * local_O.y + Z * local_O.z;
if (local_O.z * local_I.z > 0.0f) {
*omega_in = X * localO.x + Y * localO.y + Z * localO.z;
if (localO.z * localI.z > 0.0f) {
return LABEL_REFLECT | LABEL_GLOSSY;
}
else {

View File

@@ -48,14 +48,14 @@ ccl_device int bsdf_oren_nayar_setup(ccl_private OrenNayarBsdf *bsdf)
}
ccl_device Spectrum bsdf_oren_nayar_eval(ccl_private const ShaderClosure *sc,
const float3 wi,
const float3 wo,
const float3 I,
const float3 omega_in,
ccl_private float *pdf)
{
ccl_private const OrenNayarBsdf *bsdf = (ccl_private const OrenNayarBsdf *)sc;
if (dot(bsdf->N, wo) > 0.0f) {
if (dot(bsdf->N, omega_in) > 0.0f) {
*pdf = 0.5f * M_1_PI_F;
return bsdf_oren_nayar_get_intensity(sc, bsdf->N, wi, wo);
return bsdf_oren_nayar_get_intensity(sc, bsdf->N, I, omega_in);
}
else {
*pdf = 0.0f;
@@ -65,18 +65,18 @@ ccl_device Spectrum bsdf_oren_nayar_eval(ccl_private const ShaderClosure *sc,
ccl_device int bsdf_oren_nayar_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 wi,
float3 I,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *wo,
ccl_private float3 *omega_in,
ccl_private float *pdf)
{
ccl_private const OrenNayarBsdf *bsdf = (ccl_private const OrenNayarBsdf *)sc;
sample_uniform_hemisphere(bsdf->N, randu, randv, wo, pdf);
sample_uniform_hemisphere(bsdf->N, randu, randv, omega_in, pdf);
if (dot(Ng, *wo) > 0.0f) {
*eval = bsdf_oren_nayar_get_intensity(sc, bsdf->N, wi, *wo);
if (dot(Ng, *omega_in) > 0.0f) {
*eval = bsdf_oren_nayar_get_intensity(sc, bsdf->N, I, *omega_in);
}
else {
*pdf = 0.0f;

View File

@@ -45,23 +45,23 @@ ccl_device int bsdf_phong_ramp_setup(ccl_private PhongRampBsdf *bsdf)
}
ccl_device Spectrum bsdf_phong_ramp_eval(ccl_private const ShaderClosure *sc,
const float3 wi,
const float3 wo,
const float3 I,
const float3 omega_in,
ccl_private float *pdf)
{
ccl_private const PhongRampBsdf *bsdf = (ccl_private const PhongRampBsdf *)sc;
float m_exponent = bsdf->exponent;
float cosNI = dot(bsdf->N, wi);
float cosNO = dot(bsdf->N, wo);
float cosNI = dot(bsdf->N, omega_in);
float cosNO = dot(bsdf->N, I);
if (cosNI > 0 && cosNO > 0) {
// reflect the view vector
float3 R = (2 * cosNI) * bsdf->N - wi;
float cosRO = dot(R, wo);
if (cosRO > 0) {
float cosp = powf(cosRO, m_exponent);
float3 R = (2 * cosNO) * bsdf->N - I;
float cosRI = dot(R, omega_in);
if (cosRI > 0) {
float cosp = powf(cosRI, m_exponent);
float common = 0.5f * M_1_PI_F * cosp;
float out = cosNO * (m_exponent + 2) * common;
float out = cosNI * (m_exponent + 2) * common;
*pdf = (m_exponent + 1) * common;
return rgb_to_spectrum(bsdf_phong_ramp_get_color(bsdf->colors, cosp) * out);
}
@@ -77,39 +77,39 @@ ccl_device_inline float phong_ramp_exponent_to_roughness(float exponent)
ccl_device int bsdf_phong_ramp_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 wi,
float3 I,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *wo,
ccl_private float3 *omega_in,
ccl_private float *pdf,
ccl_private float2 *sampled_roughness)
{
ccl_private const PhongRampBsdf *bsdf = (ccl_private const PhongRampBsdf *)sc;
float cosNI = dot(bsdf->N, wi);
float cosNO = dot(bsdf->N, I);
float m_exponent = bsdf->exponent;
const float m_roughness = phong_ramp_exponent_to_roughness(m_exponent);
*sampled_roughness = make_float2(m_roughness, m_roughness);
if (cosNI > 0) {
if (cosNO > 0) {
// reflect the view vector
float3 R = (2 * cosNI) * bsdf->N - wi;
float3 R = (2 * cosNO) * bsdf->N - I;
float3 T, B;
make_orthonormals(R, &T, &B);
float phi = M_2PI_F * randu;
float cosTheta = powf(randv, 1 / (m_exponent + 1));
float sinTheta2 = 1 - cosTheta * cosTheta;
float sinTheta = sinTheta2 > 0 ? sqrtf(sinTheta2) : 0;
*wo = (cosf(phi) * sinTheta) * T + (sinf(phi) * sinTheta) * B + (cosTheta)*R;
if (dot(Ng, *wo) > 0.0f) {
*omega_in = (cosf(phi) * sinTheta) * T + (sinf(phi) * sinTheta) * B + (cosTheta)*R;
if (dot(Ng, *omega_in) > 0.0f) {
// common terms for pdf and eval
float cosNO = dot(bsdf->N, *wo);
float cosNI = dot(bsdf->N, *omega_in);
// make sure the direction we chose is still in the right hemisphere
if (cosNO > 0) {
if (cosNI > 0) {
float cosp = powf(cosTheta, m_exponent);
float common = 0.5f * M_1_PI_F * cosp;
*pdf = (m_exponent + 1) * common;
float out = cosNO * (m_exponent + 2) * common;
float out = cosNI * (m_exponent + 2) * common;
*eval = rgb_to_spectrum(bsdf_phong_ramp_get_color(bsdf->colors, cosp) * out);
}
}

View File

@@ -110,17 +110,17 @@ ccl_device int bsdf_principled_diffuse_setup(ccl_private PrincipledDiffuseBsdf *
}
ccl_device Spectrum bsdf_principled_diffuse_eval(ccl_private const ShaderClosure *sc,
const float3 wi,
const float3 wo,
const float3 I,
const float3 omega_in,
ccl_private float *pdf)
{
ccl_private const PrincipledDiffuseBsdf *bsdf = (ccl_private const PrincipledDiffuseBsdf *)sc;
const float3 N = bsdf->N;
if (dot(N, wo) > 0.0f) {
const float3 V = wi;
const float3 L = wo;
*pdf = fmaxf(dot(N, wo), 0.0f) * M_1_PI_F;
if (dot(N, omega_in) > 0.0f) {
const float3 V = I; // outgoing
const float3 L = omega_in; // incoming
*pdf = fmaxf(dot(N, omega_in), 0.0f) * M_1_PI_F;
return bsdf_principled_diffuse_compute_brdf(bsdf, N, V, L, pdf);
}
else {
@@ -131,21 +131,21 @@ ccl_device Spectrum bsdf_principled_diffuse_eval(ccl_private const ShaderClosure
ccl_device int bsdf_principled_diffuse_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 wi,
float3 I,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *wo,
ccl_private float3 *omega_in,
ccl_private float *pdf)
{
ccl_private const PrincipledDiffuseBsdf *bsdf = (ccl_private const PrincipledDiffuseBsdf *)sc;
float3 N = bsdf->N;
sample_cos_hemisphere(N, randu, randv, wo, pdf);
sample_cos_hemisphere(N, randu, randv, omega_in, pdf);
if (dot(Ng, *wo) > 0) {
*eval = bsdf_principled_diffuse_compute_brdf(bsdf, N, wi, *wo, pdf);
if (dot(Ng, *omega_in) > 0) {
*eval = bsdf_principled_diffuse_compute_brdf(bsdf, N, I, *omega_in, pdf);
}
else {
*pdf = 0.0f;

View File

@@ -54,25 +54,25 @@ ccl_device int bsdf_principled_sheen_setup(ccl_private const ShaderData *sd,
ccl_private PrincipledSheenBsdf *bsdf)
{
bsdf->type = CLOSURE_BSDF_PRINCIPLED_SHEEN_ID;
bsdf->avg_value = calculate_avg_principled_sheen_brdf(bsdf->N, sd->wi);
bsdf->avg_value = calculate_avg_principled_sheen_brdf(bsdf->N, sd->I);
bsdf->sample_weight *= bsdf->avg_value;
return SD_BSDF | SD_BSDF_HAS_EVAL;
}
ccl_device Spectrum bsdf_principled_sheen_eval(ccl_private const ShaderClosure *sc,
const float3 wi,
const float3 wo,
const float3 I,
const float3 omega_in,
ccl_private float *pdf)
{
ccl_private const PrincipledSheenBsdf *bsdf = (ccl_private const PrincipledSheenBsdf *)sc;
const float3 N = bsdf->N;
if (dot(N, wo) > 0.0f) {
const float3 V = wi;
const float3 L = wo;
if (dot(N, omega_in) > 0.0f) {
const float3 V = I; // outgoing
const float3 L = omega_in; // incoming
const float3 H = normalize(L + V);
*pdf = fmaxf(dot(N, wo), 0.0f) * M_1_PI_F;
*pdf = fmaxf(dot(N, omega_in), 0.0f) * M_1_PI_F;
return calculate_principled_sheen_brdf(N, V, L, H, pdf);
}
else {
@@ -83,23 +83,23 @@ ccl_device Spectrum bsdf_principled_sheen_eval(ccl_private const ShaderClosure *
ccl_device int bsdf_principled_sheen_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 wi,
float3 I,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *wo,
ccl_private float3 *omega_in,
ccl_private float *pdf)
{
ccl_private const PrincipledSheenBsdf *bsdf = (ccl_private const PrincipledSheenBsdf *)sc;
float3 N = bsdf->N;
sample_cos_hemisphere(N, randu, randv, wo, pdf);
sample_cos_hemisphere(N, randu, randv, omega_in, pdf);
if (dot(Ng, *wo) > 0) {
float3 H = normalize(wi + *wo);
if (dot(Ng, *omega_in) > 0) {
float3 H = normalize(I + *omega_in);
*eval = calculate_principled_sheen_brdf(N, wi, *wo, H, pdf);
*eval = calculate_principled_sheen_brdf(N, I, *omega_in, H, pdf);
}
else {
*eval = zero_spectrum();

View File

@@ -19,8 +19,8 @@ ccl_device int bsdf_reflection_setup(ccl_private MicrofacetBsdf *bsdf)
}
ccl_device Spectrum bsdf_reflection_eval(ccl_private const ShaderClosure *sc,
const float3 wi,
const float3 wo,
const float3 I,
const float3 omega_in,
ccl_private float *pdf)
{
*pdf = 0.0f;
@@ -29,11 +29,11 @@ ccl_device Spectrum bsdf_reflection_eval(ccl_private const ShaderClosure *sc,
ccl_device int bsdf_reflection_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 wi,
float3 I,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *wo,
ccl_private float3 *omega_in,
ccl_private float *pdf,
ccl_private float *eta)
{
@@ -42,10 +42,10 @@ ccl_device int bsdf_reflection_sample(ccl_private const ShaderClosure *sc,
*eta = bsdf->ior;
// only one direction is possible
float cosNI = dot(N, wi);
if (cosNI > 0) {
*wo = (2 * cosNI) * N - wi;
if (dot(Ng, *wo) > 0) {
float cosNO = dot(N, I);
if (cosNO > 0) {
*omega_in = (2 * cosNO) * N - I;
if (dot(Ng, *omega_in) > 0) {
/* Some high number for MIS. */
*pdf = 1e6f;
*eval = make_spectrum(1e6f);

View File

@@ -19,8 +19,8 @@ ccl_device int bsdf_refraction_setup(ccl_private MicrofacetBsdf *bsdf)
}
ccl_device Spectrum bsdf_refraction_eval(ccl_private const ShaderClosure *sc,
const float3 wi,
const float3 wo,
const float3 I,
const float3 omega_in,
ccl_private float *pdf)
{
*pdf = 0.0f;
@@ -29,11 +29,11 @@ ccl_device Spectrum bsdf_refraction_eval(ccl_private const ShaderClosure *sc,
ccl_device int bsdf_refraction_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 wi,
float3 I,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *wo,
ccl_private float3 *omega_in,
ccl_private float *pdf,
ccl_private float *eta)
{
@@ -46,13 +46,13 @@ ccl_device int bsdf_refraction_sample(ccl_private const ShaderClosure *sc,
float3 R, T;
bool inside;
float fresnel;
fresnel = fresnel_dielectric(m_eta, N, wi, &R, &T, &inside);
fresnel = fresnel_dielectric(m_eta, N, I, &R, &T, &inside);
if (!inside && fresnel != 1.0f) {
/* Some high number for MIS. */
*pdf = 1e6f;
*eval = make_spectrum(1e6f);
*wo = T;
*omega_in = T;
}
else {
*pdf = 0.0f;

View File

@@ -50,17 +50,17 @@ ccl_device float bsdf_toon_get_sample_angle(float max_angle, float smooth)
}
ccl_device Spectrum bsdf_diffuse_toon_eval(ccl_private const ShaderClosure *sc,
const float3 wi,
const float3 wo,
const float3 I,
const float3 omega_in,
ccl_private float *pdf)
{
ccl_private const ToonBsdf *bsdf = (ccl_private const ToonBsdf *)sc;
float cosNO = dot(bsdf->N, wo);
float cosNI = dot(bsdf->N, omega_in);
if (cosNO >= 0.0f) {
if (cosNI >= 0.0f) {
float max_angle = bsdf->size * M_PI_2_F;
float smooth = bsdf->smooth * M_PI_2_F;
float angle = safe_acosf(fmaxf(cosNO, 0.0f));
float angle = safe_acosf(fmaxf(cosNI, 0.0f));
float eval = bsdf_toon_get_intensity(max_angle, smooth, angle);
@@ -78,11 +78,11 @@ ccl_device Spectrum bsdf_diffuse_toon_eval(ccl_private const ShaderClosure *sc,
ccl_device int bsdf_diffuse_toon_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 wi,
float3 I,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *wo,
ccl_private float3 *omega_in,
ccl_private float *pdf)
{
ccl_private const ToonBsdf *bsdf = (ccl_private const ToonBsdf *)sc;
@@ -92,9 +92,9 @@ ccl_device int bsdf_diffuse_toon_sample(ccl_private const ShaderClosure *sc,
float angle = sample_angle * randu;
if (sample_angle > 0.0f) {
sample_uniform_cone(bsdf->N, sample_angle, randu, randv, wo, pdf);
sample_uniform_cone(bsdf->N, sample_angle, randu, randv, omega_in, pdf);
if (dot(Ng, *wo) > 0.0f) {
if (dot(Ng, *omega_in) > 0.0f) {
*eval = make_spectrum(*pdf * bsdf_toon_get_intensity(max_angle, smooth, angle));
}
else {
@@ -122,22 +122,22 @@ ccl_device int bsdf_glossy_toon_setup(ccl_private ToonBsdf *bsdf)
}
ccl_device Spectrum bsdf_glossy_toon_eval(ccl_private const ShaderClosure *sc,
const float3 wi,
const float3 wo,
const float3 I,
const float3 omega_in,
ccl_private float *pdf)
{
ccl_private const ToonBsdf *bsdf = (ccl_private const ToonBsdf *)sc;
float max_angle = bsdf->size * M_PI_2_F;
float smooth = bsdf->smooth * M_PI_2_F;
float cosNI = dot(bsdf->N, wi);
float cosNO = dot(bsdf->N, wo);
float cosNI = dot(bsdf->N, omega_in);
float cosNO = dot(bsdf->N, I);
if (cosNI > 0 && cosNO > 0) {
/* reflect the view vector */
float3 R = (2 * cosNI) * bsdf->N - wi;
float cosRO = dot(R, wo);
float3 R = (2 * cosNO) * bsdf->N - I;
float cosRI = dot(R, omega_in);
float angle = safe_acosf(fmaxf(cosRO, 0.0f));
float angle = safe_acosf(fmaxf(cosRI, 0.0f));
float eval = bsdf_toon_get_intensity(max_angle, smooth, angle);
float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth);
@@ -151,32 +151,32 @@ ccl_device Spectrum bsdf_glossy_toon_eval(ccl_private const ShaderClosure *sc,
ccl_device int bsdf_glossy_toon_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 wi,
float3 I,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *wo,
ccl_private float3 *omega_in,
ccl_private float *pdf)
{
ccl_private const ToonBsdf *bsdf = (ccl_private const ToonBsdf *)sc;
float max_angle = bsdf->size * M_PI_2_F;
float smooth = bsdf->smooth * M_PI_2_F;
float cosNI = dot(bsdf->N, wi);
float cosNO = dot(bsdf->N, I);
if (cosNI > 0) {
if (cosNO > 0) {
/* reflect the view vector */
float3 R = (2 * cosNI) * bsdf->N - wi;
float3 R = (2 * cosNO) * bsdf->N - I;
float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth);
float angle = sample_angle * randu;
sample_uniform_cone(R, sample_angle, randu, randv, wo, pdf);
sample_uniform_cone(R, sample_angle, randu, randv, omega_in, pdf);
if (dot(Ng, *wo) > 0.0f) {
float cosNO = dot(bsdf->N, *wo);
if (dot(Ng, *omega_in) > 0.0f) {
float cosNI = dot(bsdf->N, *omega_in);
/* make sure the direction we chose is still in the right hemisphere */
if (cosNO > 0) {
if (cosNI > 0) {
*eval = make_spectrum(*pdf * bsdf_toon_get_intensity(max_angle, smooth, angle));
}
else {

View File

@@ -60,8 +60,8 @@ ccl_device void bsdf_transparent_setup(ccl_private ShaderData *sd,
}
ccl_device Spectrum bsdf_transparent_eval(ccl_private const ShaderClosure *sc,
const float3 wi,
const float3 wo,
const float3 I,
const float3 omega_in,
ccl_private float *pdf)
{
*pdf = 0.0f;
@@ -70,15 +70,15 @@ ccl_device Spectrum bsdf_transparent_eval(ccl_private const ShaderClosure *sc,
ccl_device int bsdf_transparent_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 wi,
float3 I,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *wo,
ccl_private float3 *omega_in,
ccl_private float *pdf)
{
// only one direction is possible
*wo = -wi;
*omega_in = -I;
*pdf = 1;
*eval = one_spectrum();
return LABEL_TRANSMIT | LABEL_TRANSPARENT;

View File

@@ -293,7 +293,7 @@ ccl_device int bssrdf_setup(ccl_private ShaderData *sd,
/* Ad-hoc weight adjustment to avoid retro-reflection taking away half the
* samples from BSSRDF. */
bsdf->sample_weight *= bsdf_principled_diffuse_retro_reflection_sample_weight(bsdf, sd->wi);
bsdf->sample_weight *= bsdf_principled_diffuse_retro_reflection_sample_weight(bsdf, sd->I);
}
}

View File

@@ -36,24 +36,27 @@ ccl_device void emission_setup(ccl_private ShaderData *sd, const Spectrum weight
}
}
/* return the probability distribution function in the direction wi,
/* return the probability distribution function in the direction I,
* given the parameters and the light's surface normal. This MUST match
* the PDF computed by sample(). */
ccl_device float emissive_pdf(const float3 Ng, const float3 wi)
ccl_device float emissive_pdf(const float3 Ng, const float3 I)
{
float cosNI = fabsf(dot(Ng, wi));
return (cosNI > 0.0f) ? 1.0f : 0.0f;
float cosNO = fabsf(dot(Ng, I));
return (cosNO > 0.0f) ? 1.0f : 0.0f;
}
ccl_device void emissive_sample(
const float3 Ng, float randu, float randv, ccl_private float3 *wi, ccl_private float *pdf)
ccl_device void emissive_sample(const float3 Ng,
float randu,
float randv,
ccl_private float3 *omega_out,
ccl_private float *pdf)
{
/* todo: not implemented and used yet */
}
ccl_device Spectrum emissive_simple_eval(const float3 Ng, const float3 wi)
ccl_device Spectrum emissive_simple_eval(const float3 Ng, const float3 I)
{
float res = emissive_pdf(Ng, wi);
float res = emissive_pdf(Ng, I);
return make_spectrum(res);
}

View File

@@ -49,18 +49,18 @@ ccl_device int volume_henyey_greenstein_setup(ccl_private HenyeyGreensteinVolume
}
ccl_device Spectrum volume_henyey_greenstein_eval_phase(ccl_private const ShaderVolumeClosure *svc,
const float3 wi,
float3 wo,
const float3 I,
float3 omega_in,
ccl_private float *pdf)
{
float g = svc->g;
/* note that wi points towards the viewer */
/* note that I points towards the viewer */
if (fabsf(g) < 1e-3f) {
*pdf = M_1_PI_F * 0.25f;
}
else {
float cos_theta = dot(-wi, wo);
float cos_theta = dot(-I, omega_in);
*pdf = single_peaked_henyey_greenstein(cos_theta, g);
}
@@ -88,7 +88,7 @@ henyey_greenstrein_sample(float3 D, float g, float randu, float randv, ccl_priva
}
}
float sin_theta = sin_from_cos(cos_theta);
float sin_theta = safe_sqrtf(1.0f - cos_theta * cos_theta);
float phi = M_2PI_F * randv;
float3 dir = make_float3(sin_theta * cosf(phi), sin_theta * sinf(phi), cos_theta);
@@ -100,17 +100,17 @@ henyey_greenstrein_sample(float3 D, float g, float randu, float randv, ccl_priva
}
ccl_device int volume_henyey_greenstein_sample(ccl_private const ShaderVolumeClosure *svc,
float3 wi,
float3 I,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *wo,
ccl_private float3 *omega_in,
ccl_private float *pdf)
{
float g = svc->g;
/* note that wi points towards the viewer and so is used negated */
*wo = henyey_greenstrein_sample(-wi, g, randu, randv, pdf);
/* note that I points towards the viewer and so is used negated */
*omega_in = henyey_greenstrein_sample(-I, g, randu, randv, pdf);
*eval = make_spectrum(*pdf); /* perfect importance sampling */
return LABEL_VOLUME_SCATTER;
@@ -120,10 +120,10 @@ ccl_device int volume_henyey_greenstein_sample(ccl_private const ShaderVolumeClo
ccl_device Spectrum volume_phase_eval(ccl_private const ShaderData *sd,
ccl_private const ShaderVolumeClosure *svc,
float3 wo,
float3 omega_in,
ccl_private float *pdf)
{
return volume_henyey_greenstein_eval_phase(svc, sd->wi, wo, pdf);
return volume_henyey_greenstein_eval_phase(svc, sd->I, omega_in, pdf);
}
ccl_device int volume_phase_sample(ccl_private const ShaderData *sd,
@@ -131,10 +131,10 @@ ccl_device int volume_phase_sample(ccl_private const ShaderData *sd,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *wo,
ccl_private float3 *omega_in,
ccl_private float *pdf)
{
return volume_henyey_greenstein_sample(svc, sd->wi, randu, randv, eval, wo, pdf);
return volume_henyey_greenstein_sample(svc, sd->I, randu, randv, eval, omega_in, pdf);
}
/* Volume sampling utilities. */

View File

@@ -10,9 +10,6 @@
#ifndef KERNEL_STRUCT_MEMBER
# define KERNEL_STRUCT_MEMBER(parent, type, name)
#endif
#ifndef KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE
# define KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE
#endif
/* Background. */
@@ -182,12 +179,9 @@ KERNEL_STRUCT_MEMBER(integrator, float, sample_clamp_indirect)
KERNEL_STRUCT_MEMBER(integrator, int, use_caustics)
/* Sampling pattern. */
KERNEL_STRUCT_MEMBER(integrator, int, sampling_pattern)
KERNEL_STRUCT_MEMBER(integrator, float, scrambling_distance)
/* Sobol pattern. */
KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE
KERNEL_STRUCT_MEMBER(integrator, int, tabulated_sobol_sequence_size)
KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE
KERNEL_STRUCT_MEMBER(integrator, int, sobol_index_mask)
KERNEL_STRUCT_MEMBER(integrator, float, scrambling_distance)
/* Volume render. */
KERNEL_STRUCT_MEMBER(integrator, int, use_volumes)
KERNEL_STRUCT_MEMBER(integrator, int, volume_max_steps)
@@ -222,5 +216,4 @@ KERNEL_STRUCT_END(KernelSVMUsage)
#undef KERNEL_STRUCT_BEGIN
#undef KERNEL_STRUCT_MEMBER
#undef KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE
#undef KERNEL_STRUCT_END

View File

@@ -35,9 +35,15 @@ void kernel_global_memory_copy(KernelGlobalsCPU *kg, const char *name, void *mem
#define KERNEL_ARCH cpu_sse2
#include "kernel/device/cpu/kernel_arch.h"
#define KERNEL_ARCH cpu_sse3
#include "kernel/device/cpu/kernel_arch.h"
#define KERNEL_ARCH cpu_sse41
#include "kernel/device/cpu/kernel_arch.h"
#define KERNEL_ARCH cpu_avx
#include "kernel/device/cpu/kernel_arch.h"
#define KERNEL_ARCH cpu_avx2
#include "kernel/device/cpu/kernel_arch.h"

View File

@@ -0,0 +1,26 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
/* Optimized CPU kernel entry points. This file is compiled with AVX
* optimization flags and nearly all functions inlined, while kernel.cpp
* is compiled without for other CPU's. */
#include "util/optimization.h"
#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
# define KERNEL_STUB
#else
/* SSE optimization disabled for now on 32 bit, see bug T36316. */
# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
# define __KERNEL_SSE__
# define __KERNEL_SSE2__
# define __KERNEL_SSE3__
# define __KERNEL_SSSE3__
# define __KERNEL_SSE41__
# define __KERNEL_AVX__
# endif
#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */
#include "kernel/device/cpu/kernel.h"
#define KERNEL_ARCH cpu_avx
#include "kernel/device/cpu/kernel_arch_impl.h"

View File

@@ -0,0 +1,23 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3
* optimization flags and nearly all functions inlined, while kernel.cpp
* is compiled without for other CPU's. */
#include "util/optimization.h"
#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
# define KERNEL_STUB
#else
/* SSE optimization disabled for now on 32 bit, see bug T36316. */
# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
# define __KERNEL_SSE2__
# define __KERNEL_SSE3__
# define __KERNEL_SSSE3__
# endif
#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */
#include "kernel/device/cpu/kernel.h"
#define KERNEL_ARCH cpu_sse3
#include "kernel/device/cpu/kernel_arch_impl.h"

View File

@@ -195,15 +195,7 @@ using sycl::half;
#define fmodf(x, y) sycl::fmod((x), (y))
#define lgammaf(x) sycl::lgamma((x))
/* sycl::native::cos precision is not sufficient and -ffast-math lets
* the current DPC++ compiler overload sycl::cos with it.
* We work around this issue by directly calling the spirv implementation which
* provides greater precision. */
#if defined(__SYCL_DEVICE_ONLY__) && defined(__SPIR__)
# define cosf(x) __spirv_ocl_cos(((float)(x)))
#else
# define cosf(x) sycl::cos(((float)(x)))
#endif
#define cosf(x) sycl::native::cos(((float)(x)))
#define sinf(x) sycl::native::sin(((float)(x)))
#define powf(x, y) sycl::native::powr(((float)(x)), ((float)(y)))
#define tanf(x) sycl::native::tan(((float)(x)))

View File

@@ -252,7 +252,7 @@ ccl_device float3 curve_tangent_normal(KernelGlobals kg, ccl_private const Shade
if (sd->type & PRIMITIVE_CURVE) {
tgN = -(-sd->wi - sd->dPdu * (dot(sd->dPdu, -sd->wi) / len_squared(sd->dPdu)));
tgN = -(-sd->I - sd->dPdu * (dot(sd->dPdu, -sd->I) / len_squared(sd->dPdu)));
tgN = normalize(tgN);
/* need to find suitable scaled gd for corrected normal */

View File

@@ -720,7 +720,7 @@ ccl_device_inline void curve_shader_setup(KernelGlobals kg,
const float3 tangent = normalize(dPdu);
const float3 bitangent = normalize(cross(tangent, -D));
const float sine = sd->v;
const float cosine = cos_from_sin(sine);
const float cosine = safe_sqrtf(1.0f - sine * sine);
sd->N = normalize(sine * bitangent - cosine * normalize(cross(tangent, bitangent)));
# if 0
@@ -738,7 +738,7 @@ ccl_device_inline void curve_shader_setup(KernelGlobals kg,
/* NOTE: It is possible that P will be the same as P_inside (precision issues, or very small
* radius). In this case use the view direction to approximate the normal. */
const float3 P_inside = float4_to_float3(catmull_rom_basis_eval(P_curve, sd->u));
const float3 N = (!isequal(P, P_inside)) ? normalize(P - P_inside) : -sd->wi;
const float3 N = (!isequal(P, P_inside)) ? normalize(P - P_inside) : -sd->I;
sd->N = N;
sd->v = 0.0f;
@@ -757,7 +757,7 @@ ccl_device_inline void curve_shader_setup(KernelGlobals kg,
}
sd->P = P;
sd->Ng = (sd->type & PRIMITIVE_CURVE_RIBBON) ? sd->wi : sd->N;
sd->Ng = (sd->type & PRIMITIVE_CURVE_RIBBON) ? sd->I : sd->N;
sd->dPdv = cross(sd->dPdu, sd->Ng);
sd->shader = kernel_data_fetch(curves, sd->prim).shader_id;
}

View File

@@ -55,7 +55,7 @@ ccl_device_inline void shader_setup_from_ray(KernelGlobals kg,
#endif
/* Read ray data into shader globals. */
sd->wi = -ray->D;
sd->I = -ray->D;
#ifdef __HAIR__
if (sd->type & PRIMITIVE_CURVE) {
@@ -111,7 +111,7 @@ ccl_device_inline void shader_setup_from_ray(KernelGlobals kg,
sd->flag = kernel_data_fetch(shaders, (sd->shader & SHADER_MASK)).flags;
/* backfacing test */
bool backfacing = (dot(sd->Ng, sd->wi) < 0.0f);
bool backfacing = (dot(sd->Ng, sd->I) < 0.0f);
if (backfacing) {
sd->flag |= SD_BACKFACING;
@@ -152,7 +152,7 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals kg,
sd->P = P;
sd->N = Ng;
sd->Ng = Ng;
sd->wi = I;
sd->I = I;
sd->shader = shader;
if (prim != PRIM_NONE)
sd->type = PRIMITIVE_TRIANGLE;
@@ -185,7 +185,7 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals kg,
object_position_transform_auto(kg, sd, &sd->P);
object_normal_transform_auto(kg, sd, &sd->Ng);
sd->N = sd->Ng;
object_dir_transform_auto(kg, sd, &sd->wi);
object_dir_transform_auto(kg, sd, &sd->I);
}
if (sd->type == PRIMITIVE_TRIANGLE) {
@@ -227,7 +227,7 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals kg,
/* backfacing test */
if (sd->prim != PRIM_NONE) {
bool backfacing = (dot(sd->Ng, sd->wi) < 0.0f);
bool backfacing = (dot(sd->Ng, sd->I) < 0.0f);
if (backfacing) {
sd->flag |= SD_BACKFACING;
@@ -341,7 +341,7 @@ ccl_device void shader_setup_from_curve(KernelGlobals kg,
}
/* No view direction, normals or bitangent. */
sd->wi = zero_float3();
sd->I = zero_float3();
sd->N = zero_float3();
sd->Ng = zero_float3();
#ifdef __DPDU__
@@ -372,7 +372,7 @@ ccl_device_inline void shader_setup_from_background(KernelGlobals kg,
sd->P = ray_D;
sd->N = -ray_D;
sd->Ng = -ray_D;
sd->wi = -ray_D;
sd->I = -ray_D;
sd->shader = kernel_data.background.surface_shader;
sd->flag = kernel_data_fetch(shaders, (sd->shader & SHADER_MASK)).flags;
sd->object_flag = 0;
@@ -412,7 +412,7 @@ ccl_device_inline void shader_setup_from_volume(KernelGlobals kg,
sd->P = ray->P + ray->D * ray->tmin;
sd->N = -ray->D;
sd->Ng = -ray->D;
sd->wi = -ray->D;
sd->I = -ray->D;
sd->shader = SHADER_NONE;
sd->flag = 0;
sd->object_flag = 0;

View File

@@ -44,7 +44,7 @@ ccl_device_forceinline void guiding_record_surface_segment(KernelGlobals kg,
state->guiding.path_segment = kg->opgl_path_segment_storage->NextSegment();
openpgl::cpp::SetPosition(state->guiding.path_segment, guiding_point3f(sd->P));
openpgl::cpp::SetDirectionOut(state->guiding.path_segment, guiding_vec3f(sd->wi));
openpgl::cpp::SetDirectionOut(state->guiding.path_segment, guiding_vec3f(sd->I));
openpgl::cpp::SetVolumeScatter(state->guiding.path_segment, false);
openpgl::cpp::SetScatteredContribution(state->guiding.path_segment, zero);
openpgl::cpp::SetDirectContribution(state->guiding.path_segment, zero);
@@ -60,7 +60,7 @@ ccl_device_forceinline void guiding_record_surface_bounce(KernelGlobals kg,
const Spectrum weight,
const float pdf,
const float3 N,
const float3 wo,
const float3 omega_in,
const float2 roughness,
const float eta)
{
@@ -78,7 +78,7 @@ ccl_device_forceinline void guiding_record_surface_bounce(KernelGlobals kg,
openpgl::cpp::SetTransmittanceWeight(state->guiding.path_segment, guiding_vec3f(one_float3()));
openpgl::cpp::SetVolumeScatter(state->guiding.path_segment, false);
openpgl::cpp::SetNormal(state->guiding.path_segment, guiding_vec3f(normal));
openpgl::cpp::SetDirectionIn(state->guiding.path_segment, guiding_vec3f(wo));
openpgl::cpp::SetDirectionIn(state->guiding.path_segment, guiding_vec3f(omega_in));
openpgl::cpp::SetPDFDirectionIn(state->guiding.path_segment, pdf);
openpgl::cpp::SetScatteringWeight(state->guiding.path_segment, guiding_vec3f(weight_rgb));
openpgl::cpp::SetIsDelta(state->guiding.path_segment, is_delta);
@@ -113,7 +113,7 @@ ccl_device_forceinline void guiding_record_surface_emission(KernelGlobals kg,
ccl_device_forceinline void guiding_record_bssrdf_segment(KernelGlobals kg,
IntegratorState state,
const float3 P,
const float3 wi)
const float3 I)
{
#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1
if (!kernel_data.integrator.train_guiding) {
@@ -124,7 +124,7 @@ ccl_device_forceinline void guiding_record_bssrdf_segment(KernelGlobals kg,
state->guiding.path_segment = kg->opgl_path_segment_storage->NextSegment();
openpgl::cpp::SetPosition(state->guiding.path_segment, guiding_point3f(P));
openpgl::cpp::SetDirectionOut(state->guiding.path_segment, guiding_vec3f(wi));
openpgl::cpp::SetDirectionOut(state->guiding.path_segment, guiding_vec3f(I));
openpgl::cpp::SetVolumeScatter(state->guiding.path_segment, true);
openpgl::cpp::SetScatteredContribution(state->guiding.path_segment, zero);
openpgl::cpp::SetDirectContribution(state->guiding.path_segment, zero);
@@ -166,7 +166,7 @@ ccl_device_forceinline void guiding_record_bssrdf_bounce(KernelGlobals kg,
IntegratorState state,
const float pdf,
const float3 N,
const float3 wo,
const float3 omega_in,
const Spectrum weight,
const Spectrum albedo)
{
@@ -181,7 +181,7 @@ ccl_device_forceinline void guiding_record_bssrdf_bounce(KernelGlobals kg,
openpgl::cpp::SetVolumeScatter(state->guiding.path_segment, false);
openpgl::cpp::SetNormal(state->guiding.path_segment, guiding_vec3f(normal));
openpgl::cpp::SetDirectionIn(state->guiding.path_segment, guiding_vec3f(wo));
openpgl::cpp::SetDirectionIn(state->guiding.path_segment, guiding_vec3f(omega_in));
openpgl::cpp::SetPDFDirectionIn(state->guiding.path_segment, pdf);
openpgl::cpp::SetTransmittanceWeight(state->guiding.path_segment, guiding_vec3f(weight_rgb));
#endif
@@ -222,7 +222,7 @@ ccl_device_forceinline void guiding_record_volume_bounce(KernelGlobals kg,
ccl_private const ShaderData *sd,
const Spectrum weight,
const float pdf,
const float3 wo,
const float3 omega_in,
const float roughness)
{
#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4
@@ -237,7 +237,7 @@ ccl_device_forceinline void guiding_record_volume_bounce(KernelGlobals kg,
openpgl::cpp::SetVolumeScatter(state->guiding.path_segment, true);
openpgl::cpp::SetTransmittanceWeight(state->guiding.path_segment, guiding_vec3f(one_float3()));
openpgl::cpp::SetNormal(state->guiding.path_segment, guiding_vec3f(normal));
openpgl::cpp::SetDirectionIn(state->guiding.path_segment, guiding_vec3f(wo));
openpgl::cpp::SetDirectionIn(state->guiding.path_segment, guiding_vec3f(omega_in));
openpgl::cpp::SetPDFDirectionIn(state->guiding.path_segment, pdf);
openpgl::cpp::SetScatteringWeight(state->guiding.path_segment, guiding_vec3f(weight_rgb));
openpgl::cpp::SetIsDelta(state->guiding.path_segment, false);
@@ -467,13 +467,13 @@ ccl_device_forceinline bool guiding_bsdf_init(KernelGlobals kg,
ccl_device_forceinline float guiding_bsdf_sample(KernelGlobals kg,
IntegratorState state,
const float2 rand_bsdf,
ccl_private float3 *wo)
ccl_private float3 *omega_in)
{
#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4
pgl_vec3f pgl_wo;
pgl_vec3f wo;
const pgl_point2f rand = openpgl::cpp::Point2(rand_bsdf.x, rand_bsdf.y);
const float pdf = kg->opgl_surface_sampling_distribution->SamplePDF(rand, pgl_wo);
*wo = make_float3(pgl_wo.x, pgl_wo.y, pgl_wo.z);
const float pdf = kg->opgl_surface_sampling_distribution->SamplePDF(rand, wo);
*omega_in = make_float3(wo.x, wo.y, wo.z);
return pdf;
#else
return 0.0f;
@@ -482,10 +482,10 @@ ccl_device_forceinline float guiding_bsdf_sample(KernelGlobals kg,
ccl_device_forceinline float guiding_bsdf_pdf(KernelGlobals kg,
IntegratorState state,
const float3 wo)
const float3 omega_in)
{
#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4
return kg->opgl_surface_sampling_distribution->PDF(guiding_vec3f(wo));
return kg->opgl_surface_sampling_distribution->PDF(guiding_vec3f(omega_in));
#else
return 0.0f;
#endif
@@ -520,13 +520,13 @@ ccl_device_forceinline bool guiding_phase_init(KernelGlobals kg,
ccl_device_forceinline float guiding_phase_sample(KernelGlobals kg,
IntegratorState state,
const float2 rand_phase,
ccl_private float3 *wo)
ccl_private float3 *omega_in)
{
#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4
pgl_vec3f pgl_wo;
pgl_vec3f wo;
const pgl_point2f rand = openpgl::cpp::Point2(rand_phase.x, rand_phase.y);
const float pdf = kg->opgl_volume_sampling_distribution->SamplePDF(rand, pgl_wo);
*wo = make_float3(pgl_wo.x, pgl_wo.y, pgl_wo.z);
const float pdf = kg->opgl_volume_sampling_distribution->SamplePDF(rand, wo);
*omega_in = make_float3(wo.x, wo.y, wo.z);
return pdf;
#else
return 0.0f;
@@ -535,10 +535,10 @@ ccl_device_forceinline float guiding_phase_sample(KernelGlobals kg,
ccl_device_forceinline float guiding_phase_pdf(KernelGlobals kg,
IntegratorState state,
const float3 wo)
const float3 omega_in)
{
#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4
return kg->opgl_volume_sampling_distribution->PDF(guiding_vec3f(wo));
return kg->opgl_volume_sampling_distribution->PDF(guiding_vec3f(omega_in));
#else
return 0.0f;
#endif

View File

@@ -607,22 +607,24 @@ ccl_device_forceinline Spectrum mnee_eval_bsdf_contribution(ccl_private ShaderCl
{
ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)closure;
float cosNI = dot(bsdf->N, wi);
float cosNO = dot(bsdf->N, wo);
float cosNO = dot(bsdf->N, wi);
float cosNI = dot(bsdf->N, wo);
float3 Ht = normalize(-(bsdf->ior * wo + wi));
float cosHI = dot(Ht, wi);
float cosHO = dot(Ht, wi);
float alpha2 = bsdf->alpha_x * bsdf->alpha_y;
float cosThetaM = dot(bsdf->N, Ht);
/* Now calculate G1(i, m) and G1(o, m). */
float G;
if (bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID) {
G = bsdf_G<MicrofacetType::BECKMANN>(alpha2, cosNI, cosNO);
/* Eq. 26, 27: now calculate G1(i,m) and G1(o,m). */
G = bsdf_beckmann_G1(bsdf->alpha_x, cosNO) * bsdf_beckmann_G1(bsdf->alpha_x, cosNI);
}
else { /* bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID assumed */
G = bsdf_G<MicrofacetType::GGX>(alpha2, cosNI, cosNO);
/* Eq. 34: now calculate G1(i,m) and G1(o,m). */
G = (2.f / (1.f + safe_sqrtf(1.f + alpha2 * (1.f - cosNO * cosNO) / (cosNO * cosNO)))) *
(2.f / (1.f + safe_sqrtf(1.f + alpha2 * (1.f - cosNI * cosNI) / (cosNI * cosNI))));
}
/*
@@ -633,7 +635,7 @@ ccl_device_forceinline Spectrum mnee_eval_bsdf_contribution(ccl_private ShaderCl
* contribution = bsdf_do * |do/dh| * |n.wo / n.h| / pdf_dh
* = (1 - F) * G * |h.wi / (n.wi * n.h^2)|
*/
return bsdf->weight * G * fabsf(cosHI / (cosNI * sqr(cosThetaM)));
return bsdf->weight * G * fabsf(cosHO / (cosNO * sqr(cosThetaM)));
}
/* Compute transfer matrix determinant |T1| = |dx1/dxn| (and |dh/dx| in the process) */
@@ -704,9 +706,9 @@ ccl_device_forceinline bool mnee_compute_transfer_matrix(ccl_private const Shade
float ilo = -eta * ilh;
float cos_theta = dot(wo, m.n);
float sin_theta = sin_from_cos(cos_theta);
float sin_theta = safe_sqrtf(1.f - sqr(cos_theta));
float cos_phi = dot(wo, s);
float sin_phi = sin_from_cos(cos_phi);
float sin_phi = safe_sqrtf(1.f - sqr(cos_phi));
/* Wo = (cos_phi * sin_theta) * s + (sin_phi * sin_theta) * t + cos_theta * n. */
float3 dH_dtheta = ilo * (cos_theta * (cos_phi * s + sin_phi * t) - sin_theta * m.n);

View File

@@ -235,6 +235,8 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
light_sample_to_surface_shadow_ray(kg, sd, &ls, &ray);
}
const bool is_light = light_sample_is_light(&ls);
/* Branch off shadow kernel. */
IntegratorShadowState shadow_state = integrator_shadow_path_init(
kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, false);
@@ -262,6 +264,7 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
/* Copy state from main path to shadow path. */
uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag);
shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0;
const Spectrum unlit_throughput = INTEGRATOR_STATE(state, path, throughput);
const Spectrum throughput = unlit_throughput * bsdf_eval_sum(&bsdf_eval);
@@ -361,7 +364,7 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
/* BSDF closure, sample direction. */
float bsdf_pdf = 0.0f, unguided_bsdf_pdf = 0.0f;
BsdfEval bsdf_eval ccl_optional_struct_init;
float3 bsdf_wo ccl_optional_struct_init;
float3 bsdf_omega_in ccl_optional_struct_init;
int label;
float2 bsdf_sampled_roughness = make_float2(1.0f, 1.0f);
@@ -375,7 +378,7 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
sc,
rand_bsdf,
&bsdf_eval,
&bsdf_wo,
&bsdf_omega_in,
&bsdf_pdf,
&unguided_bsdf_pdf,
&bsdf_sampled_roughness,
@@ -395,7 +398,7 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
sc,
rand_bsdf,
&bsdf_eval,
&bsdf_wo,
&bsdf_omega_in,
&bsdf_pdf,
&bsdf_sampled_roughness,
&bsdf_eta);
@@ -413,7 +416,7 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
}
else {
/* Setup ray with changed origin and direction. */
const float3 D = normalize(bsdf_wo);
const float3 D = normalize(bsdf_omega_in);
INTEGRATOR_STATE_WRITE(state, ray, P) = integrate_surface_ray_offset(kg, sd, sd->P, D);
INTEGRATOR_STATE_WRITE(state, ray, D) = D;
INTEGRATOR_STATE_WRITE(state, ray, tmin) = 0.0f;
@@ -452,7 +455,7 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
bsdf_weight,
bsdf_pdf,
sd->N,
normalize(bsdf_wo),
normalize(bsdf_omega_in),
bsdf_sampled_roughness,
bsdf_eta);

View File

@@ -821,6 +821,7 @@ ccl_device_forceinline void integrate_volume_direct_light(
/* Create shadow ray. */
Ray ray ccl_optional_struct_init;
light_sample_to_volume_shadow_ray(kg, sd, &ls, P, &ray);
const bool is_light = light_sample_is_light(&ls);
/* Branch off shadow kernel. */
IntegratorShadowState shadow_state = integrator_shadow_path_init(
@@ -837,6 +838,7 @@ ccl_device_forceinline void integrate_volume_direct_light(
const uint16_t bounce = INTEGRATOR_STATE(state, path, bounce);
const uint16_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce);
uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag);
shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0;
const Spectrum throughput_phase = throughput * bsdf_eval_sum(&phase_eval);
if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
@@ -910,7 +912,7 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
/* Phase closure, sample direction. */
float phase_pdf = 0.0f, unguided_phase_pdf = 0.0f;
BsdfEval phase_eval ccl_optional_struct_init;
float3 phase_wo ccl_optional_struct_init;
float3 phase_omega_in ccl_optional_struct_init;
float sampled_roughness = 1.0f;
int label;
@@ -922,7 +924,7 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
svc,
rand_phase,
&phase_eval,
&phase_wo,
&phase_omega_in,
&phase_pdf,
&unguided_phase_pdf,
&sampled_roughness);
@@ -936,8 +938,15 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
else
# endif
{
label = volume_shader_phase_sample(
kg, sd, phases, svc, rand_phase, &phase_eval, &phase_wo, &phase_pdf, &sampled_roughness);
label = volume_shader_phase_sample(kg,
sd,
phases,
svc,
rand_phase,
&phase_eval,
&phase_omega_in,
&phase_pdf,
&sampled_roughness);
if (phase_pdf == 0.0f || bsdf_eval_is_zero(&phase_eval)) {
return false;
@@ -948,7 +957,7 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
/* Setup ray. */
INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(phase_wo);
INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(phase_omega_in);
INTEGRATOR_STATE_WRITE(state, ray, tmin) = 0.0f;
INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX;
# ifdef __RAY_DIFFERENTIALS__
@@ -962,7 +971,7 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
/* Add phase function sampling data to the path segment. */
guiding_record_volume_bounce(
kg, state, sd, phase_weight, phase_pdf, normalize(phase_wo), sampled_roughness);
kg, state, sd, phase_weight, phase_pdf, normalize(phase_omega_in), sampled_roughness);
/* Update throughput. */
const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput);
@@ -1067,7 +1076,7 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
float3 transmittance_weight = spectrum_to_rgb(
safe_divide_color(result.indirect_throughput, initial_throughput));
guiding_record_volume_transmission(kg, state, transmittance_weight);
guiding_record_volume_segment(kg, state, direct_P, sd.wi);
guiding_record_volume_segment(kg, state, direct_P, sd.I);
guiding_generated_new_segment = true;
unlit_throughput = result.indirect_throughput / continuation_probability;
rand_phase_guiding = path_state_rng_1D(kg, &rng_state, PRNG_VOLUME_PHASE_GUIDING_DISTANCE);
@@ -1130,7 +1139,7 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
# if defined(__PATH_GUIDING__)
# if PATH_GUIDING_LEVEL >= 1
if (!guiding_generated_new_segment) {
guiding_record_volume_segment(kg, state, sd.P, sd.wi);
guiding_record_volume_segment(kg, state, sd.P, sd.I);
}
# endif
# if PATH_GUIDING_LEVEL >= 4

View File

@@ -136,7 +136,7 @@ ccl_device_forceinline float diffusion_length_dwivedi(float alpha)
ccl_device_forceinline float3 direction_from_cosine(float3 D, float cos_theta, float randv)
{
float sin_theta = sin_from_cos(cos_theta);
float sin_theta = safe_sqrtf(1.0f - cos_theta * cos_theta);
float phi = M_2PI_F * randv;
float3 dir = make_float3(sin_theta * cosf(phi), sin_theta * sinf(phi), cos_theta);

View File

@@ -174,14 +174,14 @@ ccl_device_inline void surface_shader_prepare_closures(KernelGlobals kg,
#if 0
ccl_device_inline void surface_shader_validate_bsdf_sample(const KernelGlobals kg,
const ShaderClosure *sc,
const float3 wo,
const float3 omega_in,
const int org_label,
const float2 org_roughness,
const float org_eta)
{
/* Validate the the bsdf_label and bsdf_roughness_eta functions
* by estimating the values after a bsdf sample. */
const int comp_label = bsdf_label(kg, sc, wo);
const int comp_label = bsdf_label(kg, sc, omega_in);
kernel_assert(org_label == comp_label);
float2 comp_roughness;
@@ -218,7 +218,7 @@ ccl_device_forceinline bool _surface_shader_exclude(ClosureType type, uint light
ccl_device_inline float _surface_shader_bsdf_eval_mis(KernelGlobals kg,
ccl_private ShaderData *sd,
const float3 wo,
const float3 omega_in,
ccl_private const ShaderClosure *skip_sc,
ccl_private BsdfEval *result_eval,
float sum_pdf,
@@ -237,7 +237,7 @@ ccl_device_inline float _surface_shader_bsdf_eval_mis(KernelGlobals kg,
if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
if (CLOSURE_IS_BSDF(sc->type) && !_surface_shader_exclude(sc->type, light_shader_flags)) {
float bsdf_pdf = 0.0f;
Spectrum eval = bsdf_eval(kg, sd, sc, wo, &bsdf_pdf);
Spectrum eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
if (bsdf_pdf != 0.0f) {
bsdf_eval_accum(result_eval, sc->type, eval * sc->weight);
@@ -254,7 +254,7 @@ ccl_device_inline float _surface_shader_bsdf_eval_mis(KernelGlobals kg,
ccl_device_inline float surface_shader_bsdf_eval_pdfs(const KernelGlobals kg,
ccl_private ShaderData *sd,
const float3 wo,
const float3 omega_in,
ccl_private BsdfEval *result_eval,
ccl_private float *pdfs,
const uint light_shader_flags)
@@ -270,7 +270,7 @@ ccl_device_inline float surface_shader_bsdf_eval_pdfs(const KernelGlobals kg,
if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
if (CLOSURE_IS_BSDF(sc->type) && !_surface_shader_exclude(sc->type, light_shader_flags)) {
float bsdf_pdf = 0.0f;
Spectrum eval = bsdf_eval(kg, sd, sc, wo, &bsdf_pdf);
Spectrum eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
kernel_assert(bsdf_pdf >= 0.0f);
if (bsdf_pdf != 0.0f) {
bsdf_eval_accum(result_eval, sc->type, eval * sc->weight);
@@ -310,20 +310,20 @@ ccl_device_inline
surface_shader_bsdf_eval(KernelGlobals kg,
IntegratorState state,
ccl_private ShaderData *sd,
const float3 wo,
const float3 omega_in,
ccl_private BsdfEval *bsdf_eval,
const uint light_shader_flags)
{
bsdf_eval_init(bsdf_eval, CLOSURE_NONE_ID, zero_spectrum());
float pdf = _surface_shader_bsdf_eval_mis(
kg, sd, wo, NULL, bsdf_eval, 0.0f, 0.0f, light_shader_flags);
kg, sd, omega_in, NULL, bsdf_eval, 0.0f, 0.0f, light_shader_flags);
#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4
if (state->guiding.use_surface_guiding) {
const float guiding_sampling_prob = state->guiding.surface_guiding_sampling_prob;
const float bssrdf_sampling_prob = state->guiding.bssrdf_sampling_prob;
const float guide_pdf = guiding_bsdf_pdf(kg, state, wo);
const float guide_pdf = guiding_bsdf_pdf(kg, state, omega_in);
pdf = (guiding_sampling_prob * guide_pdf * (1.0f - bssrdf_sampling_prob)) +
(1.0f - guiding_sampling_prob) * pdf;
}
@@ -407,7 +407,7 @@ ccl_device int surface_shader_bsdf_guided_sample_closure(KernelGlobals kg,
ccl_private const ShaderClosure *sc,
const float2 rand_bsdf,
ccl_private BsdfEval *bsdf_eval,
ccl_private float3 *wo,
ccl_private float3 *omega_in,
ccl_private float *bsdf_pdf,
ccl_private float *unguided_bsdf_pdf,
ccl_private float2 *sampled_rougness,
@@ -443,14 +443,14 @@ ccl_device int surface_shader_bsdf_guided_sample_closure(KernelGlobals kg,
if (sample_guiding) {
/* Sample guiding distribution. */
guide_pdf = guiding_bsdf_sample(kg, state, rand_bsdf, wo);
guide_pdf = guiding_bsdf_sample(kg, state, rand_bsdf, omega_in);
*bsdf_pdf = 0.0f;
if (guide_pdf != 0.0f) {
float unguided_bsdf_pdfs[MAX_CLOSURE];
*unguided_bsdf_pdf = surface_shader_bsdf_eval_pdfs(
kg, sd, *wo, bsdf_eval, unguided_bsdf_pdfs, 0);
kg, sd, *omega_in, bsdf_eval, unguided_bsdf_pdfs, 0);
*bsdf_pdf = (guiding_sampling_prob * guide_pdf * (1.0f - bssrdf_sampling_prob)) +
((1.0f - guiding_sampling_prob) * (*unguided_bsdf_pdf));
float sum_pdfs = 0.0f;
@@ -471,7 +471,7 @@ ccl_device int surface_shader_bsdf_guided_sample_closure(KernelGlobals kg,
* the sum of all unguided_bsdf_pdfs is just < 1.0f. */
idx = (rand_bsdf_guiding > sum_pdfs) ? sd->num_closure - 1 : idx;
label = bsdf_label(kg, &sd->closure[idx], *wo);
label = bsdf_label(kg, &sd->closure[idx], *omega_in);
}
}
@@ -483,11 +483,19 @@ ccl_device int surface_shader_bsdf_guided_sample_closure(KernelGlobals kg,
else {
/* Sample BSDF. */
*bsdf_pdf = 0.0f;
label = bsdf_sample(
kg, sd, sc, rand_bsdf.x, rand_bsdf.y, &eval, wo, unguided_bsdf_pdf, sampled_rougness, eta);
label = bsdf_sample(kg,
sd,
sc,
rand_bsdf.x,
rand_bsdf.y,
&eval,
omega_in,
unguided_bsdf_pdf,
sampled_rougness,
eta);
# if 0
if (*unguided_bsdf_pdf > 0.0f) {
surface_shader_validate_bsdf_sample(kg, sc, *wo, label, sampled_roughness, eta);
surface_shader_validate_bsdf_sample(kg, sc, *omega_in, label, sampled_roughness, eta);
}
# endif
@@ -499,13 +507,13 @@ ccl_device int surface_shader_bsdf_guided_sample_closure(KernelGlobals kg,
if (sd->num_closure > 1) {
float sweight = sc->sample_weight;
*unguided_bsdf_pdf = _surface_shader_bsdf_eval_mis(
kg, sd, *wo, sc, bsdf_eval, (*unguided_bsdf_pdf) * sweight, sweight, 0);
kg, sd, *omega_in, sc, bsdf_eval, (*unguided_bsdf_pdf) * sweight, sweight, 0);
kernel_assert(reduce_min(bsdf_eval_sum(bsdf_eval)) >= 0.0f);
}
*bsdf_pdf = *unguided_bsdf_pdf;
if (use_surface_guiding) {
guide_pdf = guiding_bsdf_pdf(kg, state, *wo);
guide_pdf = guiding_bsdf_pdf(kg, state, *omega_in);
*bsdf_pdf *= 1.0f - guiding_sampling_prob;
*bsdf_pdf += guiding_sampling_prob * guide_pdf * (1.0f - bssrdf_sampling_prob);
}
@@ -525,7 +533,7 @@ ccl_device int surface_shader_bsdf_sample_closure(KernelGlobals kg,
ccl_private const ShaderClosure *sc,
const float2 rand_bsdf,
ccl_private BsdfEval *bsdf_eval,
ccl_private float3 *wo,
ccl_private float3 *omega_in,
ccl_private float *pdf,
ccl_private float2 *sampled_roughness,
ccl_private float *eta)
@@ -538,14 +546,15 @@ ccl_device int surface_shader_bsdf_sample_closure(KernelGlobals kg,
*pdf = 0.0f;
label = bsdf_sample(
kg, sd, sc, rand_bsdf.x, rand_bsdf.y, &eval, wo, pdf, sampled_roughness, eta);
kg, sd, sc, rand_bsdf.x, rand_bsdf.y, &eval, omega_in, pdf, sampled_roughness, eta);
if (*pdf != 0.0f) {
bsdf_eval_init(bsdf_eval, sc->type, eval * sc->weight);
if (sd->num_closure > 1) {
float sweight = sc->sample_weight;
*pdf = _surface_shader_bsdf_eval_mis(kg, sd, *wo, sc, bsdf_eval, *pdf * sweight, sweight, 0);
*pdf = _surface_shader_bsdf_eval_mis(
kg, sd, *omega_in, sc, bsdf_eval, *pdf * sweight, sweight, 0);
}
}
else {
@@ -749,7 +758,7 @@ ccl_device Spectrum surface_shader_background(ccl_private const ShaderData *sd)
ccl_device Spectrum surface_shader_emission(ccl_private const ShaderData *sd)
{
if (sd->flag & SD_EMISSION) {
return emissive_simple_eval(sd->Ng, sd->wi) * sd->closure_emission_background;
return emissive_simple_eval(sd->Ng, sd->I) * sd->closure_emission_background;
}
else {
return zero_spectrum();

View File

@@ -202,7 +202,7 @@ ccl_device_inline ccl_private const ShaderVolumeClosure *volume_shader_phase_pic
ccl_device_inline float _volume_shader_phase_eval_mis(ccl_private const ShaderData *sd,
ccl_private const ShaderVolumePhases *phases,
const float3 wo,
const float3 omega_in,
int skip_phase,
ccl_private BsdfEval *result_eval,
float sum_pdf,
@@ -214,7 +214,7 @@ ccl_device_inline float _volume_shader_phase_eval_mis(ccl_private const ShaderDa
ccl_private const ShaderVolumeClosure *svc = &phases->closure[i];
float phase_pdf = 0.0f;
Spectrum eval = volume_phase_eval(sd, svc, wo, &phase_pdf);
Spectrum eval = volume_phase_eval(sd, svc, omega_in, &phase_pdf);
if (phase_pdf != 0.0f) {
bsdf_eval_accum(result_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);
@@ -230,11 +230,11 @@ ccl_device_inline float _volume_shader_phase_eval_mis(ccl_private const ShaderDa
ccl_device float volume_shader_phase_eval(KernelGlobals kg,
ccl_private const ShaderData *sd,
ccl_private const ShaderVolumeClosure *svc,
const float3 wo,
const float3 omega_in,
ccl_private BsdfEval *phase_eval)
{
float phase_pdf = 0.0f;
Spectrum eval = volume_phase_eval(sd, svc, wo, &phase_pdf);
Spectrum eval = volume_phase_eval(sd, svc, omega_in, &phase_pdf);
if (phase_pdf != 0.0f) {
bsdf_eval_accum(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);
@@ -247,17 +247,17 @@ ccl_device float volume_shader_phase_eval(KernelGlobals kg,
IntegratorState state,
ccl_private const ShaderData *sd,
ccl_private const ShaderVolumePhases *phases,
const float3 wo,
const float3 omega_in,
ccl_private BsdfEval *phase_eval)
{
bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, zero_spectrum());
float pdf = _volume_shader_phase_eval_mis(sd, phases, wo, -1, phase_eval, 0.0f, 0.0f);
float pdf = _volume_shader_phase_eval_mis(sd, phases, omega_in, -1, phase_eval, 0.0f, 0.0f);
# if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4
if (state->guiding.use_volume_guiding) {
const float guiding_sampling_prob = state->guiding.volume_guiding_sampling_prob;
const float guide_pdf = guiding_phase_pdf(kg, state, wo);
const float guide_pdf = guiding_phase_pdf(kg, state, omega_in);
pdf = (guiding_sampling_prob * guide_pdf) + (1.0f - guiding_sampling_prob) * pdf;
}
# endif
@@ -272,7 +272,7 @@ ccl_device int volume_shader_phase_guided_sample(KernelGlobals kg,
ccl_private const ShaderVolumeClosure *svc,
const float2 rand_phase,
ccl_private BsdfEval *phase_eval,
ccl_private float3 *wo,
ccl_private float3 *omega_in,
ccl_private float *phase_pdf,
ccl_private float *unguided_phase_pdf,
ccl_private float *sampled_roughness)
@@ -304,11 +304,11 @@ ccl_device int volume_shader_phase_guided_sample(KernelGlobals kg,
if (sample_guiding) {
/* Sample guiding distribution. */
guide_pdf = guiding_phase_sample(kg, state, rand_phase, wo);
guide_pdf = guiding_phase_sample(kg, state, rand_phase, omega_in);
*phase_pdf = 0.0f;
if (guide_pdf != 0.0f) {
*unguided_phase_pdf = volume_shader_phase_eval(kg, sd, svc, *wo, phase_eval);
*unguided_phase_pdf = volume_shader_phase_eval(kg, sd, svc, *omega_in, phase_eval);
*phase_pdf = (guiding_sampling_prob * guide_pdf) +
((1.0f - guiding_sampling_prob) * (*unguided_phase_pdf));
label = LABEL_VOLUME_SCATTER;
@@ -318,14 +318,14 @@ ccl_device int volume_shader_phase_guided_sample(KernelGlobals kg,
/* Sample phase. */
*phase_pdf = 0.0f;
label = volume_phase_sample(
sd, svc, rand_phase.x, rand_phase.y, &eval, wo, unguided_phase_pdf);
sd, svc, rand_phase.x, rand_phase.y, &eval, omega_in, unguided_phase_pdf);
if (*unguided_phase_pdf != 0.0f) {
bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);
*phase_pdf = *unguided_phase_pdf;
if (use_volume_guiding) {
guide_pdf = guiding_phase_pdf(kg, state, *wo);
guide_pdf = guiding_phase_pdf(kg, state, *omega_in);
*phase_pdf *= 1.0f - guiding_sampling_prob;
*phase_pdf += guiding_sampling_prob * guide_pdf;
}
@@ -349,7 +349,7 @@ ccl_device int volume_shader_phase_sample(KernelGlobals kg,
ccl_private const ShaderVolumeClosure *svc,
float2 rand_phase,
ccl_private BsdfEval *phase_eval,
ccl_private float3 *wo,
ccl_private float3 *omega_in,
ccl_private float *pdf,
ccl_private float *sampled_roughness)
{
@@ -357,7 +357,7 @@ ccl_device int volume_shader_phase_sample(KernelGlobals kg,
Spectrum eval = zero_spectrum();
*pdf = 0.0f;
int label = volume_phase_sample(sd, svc, rand_phase.x, rand_phase.y, &eval, wo, pdf);
int label = volume_phase_sample(sd, svc, rand_phase.x, rand_phase.y, &eval, omega_in, pdf);
if (*pdf != 0.0f) {
bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);

View File

@@ -102,7 +102,7 @@ ccl_device float area_light_spread_attenuation(const float3 D,
/* The factor M_PI_F comes from integrating the radiance over the hemisphere */
return (cos_a > 0.9999997f) ? M_PI_F : 0.0f;
}
const float sin_a = sin_from_cos(cos_a);
const float sin_a = safe_sqrtf(1.0f - sqr(cos_a));
const float tan_a = sin_a / cos_a;
return max((tan_half_spread - tan_a) * normalize_spread, 0.0f);
}

View File

@@ -88,6 +88,13 @@ light_sample_shader_eval(KernelGlobals kg,
return eval;
}
/* Test if light sample is from a light or emission from geometry. */
ccl_device_inline bool light_sample_is_light(ccl_private const LightSample *ccl_restrict ls)
{
/* return if it's a lamp for shadow pass */
return (ls->prim == PRIM_NONE && ls->type != LIGHT_BACKGROUND);
}
/* Early path termination of shadow rays. */
ccl_device_inline bool light_sample_terminate(KernelGlobals kg,
ccl_private const LightSample *ccl_restrict ls,

View File

@@ -47,6 +47,11 @@ ccl_device float light_tree_cos_bounding_box_angle(const BoundingBox bbox,
return cos_theta_u;
}
ccl_device_forceinline float sin_from_cos(const float c)
{
return safe_sqrtf(1.0f - sqr(c));
}
/* Compute vector v as in Fig .8. P_v is the corresponding point along the ray. */
ccl_device float3 compute_v(
const float3 centroid, const float3 P, const float3 D, const float3 bcone_axis, const float t)

View File

@@ -63,7 +63,7 @@ ccl_device_forceinline float triangle_light_pdf(KernelGlobals kg,
const float3 e2 = V[2] - V[1];
const float longest_edge_squared = max(len_squared(e0), max(len_squared(e1), len_squared(e2)));
const float3 N = cross(e0, e1);
const float distance_to_plane = fabsf(dot(N, sd->wi * t)) / dot(N, N);
const float distance_to_plane = fabsf(dot(N, sd->I * t)) / dot(N, N);
const float area = 0.5f * len(N);
float pdf;
@@ -71,7 +71,7 @@ ccl_device_forceinline float triangle_light_pdf(KernelGlobals kg,
if (longest_edge_squared > distance_to_plane * distance_to_plane) {
/* sd contains the point on the light source
* calculate Px, the point that we're shading */
const float3 Px = sd->P + sd->wi * t;
const float3 Px = sd->P + sd->I * t;
const float3 v0_p = V[0] - Px;
const float3 v1_p = V[1] - Px;
const float3 v2_p = V[2] - Px;
@@ -99,7 +99,7 @@ ccl_device_forceinline float triangle_light_pdf(KernelGlobals kg,
return 0.0f;
}
pdf = triangle_light_pdf_area_sampling(sd->Ng, sd->wi, t) / area;
pdf = triangle_light_pdf_area_sampling(sd->Ng, sd->I, t) / area;
}
/* Belongs in distribution.h but can reuse computations here. */
@@ -218,7 +218,7 @@ ccl_device_forceinline bool triangle_light_sample(KernelGlobals kg,
/* Finally, select a random point along the edge of the new triangle
* That point on the spherical triangle is the sampled ray direction */
const float z = 1.0f - randv * (1.0f - dot(C_, B));
ls->D = z * B + sin_from_cos(z) * safe_normalize(C_ - dot(C_, B) * B);
ls->D = z * B + safe_sqrtf(1.0f - z * z) * safe_normalize(C_ - dot(C_, B) * B);
/* calculate intersection with the planar triangle */
if (!ray_triangle_intersect(

View File

@@ -80,7 +80,7 @@ ccl_device void osl_closure_diffuse_setup(KernelGlobals kg,
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
sd->flag |= bsdf_diffuse_setup(bsdf);
}
@@ -101,7 +101,7 @@ ccl_device void osl_closure_oren_nayar_setup(KernelGlobals kg,
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
bsdf->roughness = closure->roughness;
sd->flag |= bsdf_oren_nayar_setup(bsdf);
@@ -123,7 +123,7 @@ ccl_device void osl_closure_translucent_setup(KernelGlobals kg,
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
sd->flag |= bsdf_translucent_setup(bsdf);
}
@@ -144,7 +144,7 @@ ccl_device void osl_closure_reflection_setup(KernelGlobals kg,
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
sd->flag |= bsdf_reflection_setup(bsdf);
}
@@ -165,7 +165,7 @@ ccl_device void osl_closure_refraction_setup(KernelGlobals kg,
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
bsdf->ior = closure->ior;
sd->flag |= bsdf_refraction_setup(bsdf);
@@ -199,7 +199,7 @@ ccl_device void osl_closure_microfacet_setup(KernelGlobals kg,
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
bsdf->alpha_x = closure->alpha_x;
bsdf->alpha_y = closure->alpha_y;
bsdf->ior = closure->ior;
@@ -257,7 +257,7 @@ ccl_device void osl_closure_microfacet_ggx_setup(
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
bsdf->alpha_x = closure->alpha_x;
sd->flag |= bsdf_microfacet_ggx_isotropic_setup(bsdf);
@@ -280,7 +280,7 @@ ccl_device void osl_closure_microfacet_ggx_aniso_setup(
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
bsdf->alpha_x = closure->alpha_x;
bsdf->alpha_y = closure->alpha_y;
bsdf->T = closure->T;
@@ -305,7 +305,7 @@ ccl_device void osl_closure_microfacet_ggx_refraction_setup(
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
bsdf->alpha_x = closure->alpha_x;
bsdf->ior = closure->ior;
@@ -337,7 +337,7 @@ ccl_device void osl_closure_microfacet_ggx_fresnel_setup(
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
bsdf->alpha_x = closure->alpha_x;
bsdf->alpha_y = bsdf->alpha_x;
bsdf->ior = closure->ior;
@@ -375,7 +375,7 @@ ccl_device void osl_closure_microfacet_ggx_aniso_fresnel_setup(
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
bsdf->alpha_x = closure->alpha_x;
bsdf->alpha_y = closure->alpha_y;
bsdf->ior = closure->ior;
@@ -418,7 +418,7 @@ ccl_device void osl_closure_microfacet_multi_ggx_setup(
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
bsdf->alpha_x = closure->alpha_x;
bsdf->alpha_y = bsdf->alpha_x;
bsdf->ior = 1.0f;
@@ -459,7 +459,7 @@ ccl_device void osl_closure_microfacet_multi_ggx_glass_setup(
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
bsdf->alpha_x = closure->alpha_x;
bsdf->alpha_y = bsdf->alpha_x;
bsdf->ior = closure->ior;
@@ -500,7 +500,7 @@ ccl_device void osl_closure_microfacet_multi_ggx_aniso_setup(
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
bsdf->alpha_x = closure->alpha_x;
bsdf->alpha_y = closure->alpha_y;
bsdf->ior = 1.0f;
@@ -543,7 +543,7 @@ ccl_device void osl_closure_microfacet_multi_ggx_fresnel_setup(
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
bsdf->alpha_x = closure->alpha_x;
bsdf->alpha_y = bsdf->alpha_x;
bsdf->ior = closure->ior;
@@ -584,7 +584,7 @@ ccl_device void osl_closure_microfacet_multi_ggx_glass_fresnel_setup(
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
bsdf->alpha_x = closure->alpha_x;
bsdf->alpha_y = bsdf->alpha_x;
bsdf->ior = closure->ior;
@@ -625,7 +625,7 @@ ccl_device void osl_closure_microfacet_multi_ggx_aniso_fresnel_setup(
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
bsdf->alpha_x = closure->alpha_x;
bsdf->alpha_y = closure->alpha_y;
bsdf->ior = closure->ior;
@@ -659,7 +659,7 @@ ccl_device void osl_closure_microfacet_beckmann_setup(
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
bsdf->alpha_x = closure->alpha_x;
sd->flag |= bsdf_microfacet_beckmann_isotropic_setup(bsdf);
@@ -682,7 +682,7 @@ ccl_device void osl_closure_microfacet_beckmann_aniso_setup(
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
bsdf->alpha_x = closure->alpha_x;
bsdf->alpha_y = closure->alpha_y;
bsdf->T = closure->T;
@@ -707,7 +707,7 @@ ccl_device void osl_closure_microfacet_beckmann_refraction_setup(
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
bsdf->alpha_x = closure->alpha_x;
bsdf->ior = closure->ior;
@@ -733,7 +733,7 @@ ccl_device void osl_closure_ashikhmin_velvet_setup(
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
bsdf->sigma = closure->sigma;
sd->flag |= bsdf_ashikhmin_velvet_setup(bsdf);
@@ -756,7 +756,7 @@ ccl_device void osl_closure_ashikhmin_shirley_setup(
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
bsdf->alpha_x = closure->alpha_x;
bsdf->alpha_y = closure->alpha_y;
bsdf->T = closure->T;
@@ -780,7 +780,7 @@ ccl_device void osl_closure_diffuse_toon_setup(KernelGlobals kg,
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
bsdf->size = closure->size;
bsdf->smooth = closure->smooth;
@@ -803,7 +803,7 @@ ccl_device void osl_closure_glossy_toon_setup(KernelGlobals kg,
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
bsdf->size = closure->size;
bsdf->smooth = closure->smooth;
@@ -829,7 +829,7 @@ ccl_device void osl_closure_principled_diffuse_setup(
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
bsdf->roughness = closure->roughness;
sd->flag |= bsdf_principled_diffuse_setup(bsdf);
@@ -852,7 +852,7 @@ ccl_device void osl_closure_principled_sheen_setup(
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
bsdf->avg_value = 0.0f;
sd->flag |= bsdf_principled_sheen_setup(sd, bsdf);
@@ -876,7 +876,7 @@ ccl_device void osl_closure_principled_clearcoat_setup(
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
bsdf->alpha_x = closure->clearcoat_roughness;
bsdf->alpha_y = closure->clearcoat_roughness;
bsdf->ior = 1.5f;
@@ -948,7 +948,7 @@ ccl_device void osl_closure_diffuse_ramp_setup(KernelGlobals kg,
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
bsdf->colors = (float3 *)closure_alloc_extra(sd, sizeof(float3) * 8);
if (!bsdf->colors) {
@@ -973,7 +973,7 @@ ccl_device void osl_closure_phong_ramp_setup(KernelGlobals kg,
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
bsdf->exponent = closure->exponent;
bsdf->colors = (float3 *)closure_alloc_extra(sd, sizeof(float3) * 8);
@@ -1024,7 +1024,7 @@ ccl_device void osl_closure_bssrdf_setup(KernelGlobals kg,
/* create one closure per color channel */
bssrdf->albedo = closure->albedo;
bssrdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bssrdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
bssrdf->roughness = closure->roughness;
bssrdf->anisotropy = clamp(closure->anisotropy, 0.0f, 0.9f);
@@ -1049,7 +1049,7 @@ ccl_device void osl_closure_hair_reflection_setup(KernelGlobals kg,
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
bsdf->T = closure->T;
bsdf->roughness1 = closure->roughness1;
bsdf->roughness2 = closure->roughness2;
@@ -1075,7 +1075,7 @@ ccl_device void osl_closure_hair_transmission_setup(
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
bsdf->T = closure->T;
bsdf->roughness1 = closure->roughness1;
bsdf->roughness2 = closure->roughness2;
@@ -1107,7 +1107,7 @@ ccl_device void osl_closure_principled_hair_setup(KernelGlobals kg,
return;
}
bsdf->N = ensure_valid_reflection(sd->Ng, sd->wi, closure->N);
bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
bsdf->sigma = closure->sigma;
bsdf->v = closure->v;
bsdf->s = closure->s;

View File

@@ -25,13 +25,13 @@ ccl_device_inline void shaderdata_to_shaderglobals(KernelGlobals kg,
ccl_private ShaderGlobals *globals)
{
const differential3 dP = differential_from_compact(sd->Ng, sd->dP);
const differential3 dI = differential_from_compact(sd->wi, sd->dI);
const differential3 dI = differential_from_compact(sd->I, sd->dI);
/* copy from shader data to shader globals */
globals->P = sd->P;
globals->dPdx = dP.dx;
globals->dPdy = dP.dy;
globals->I = sd->wi;
globals->I = sd->I;
globals->dIdx = dI.dx;
globals->dIdy = dI.dy;
globals->N = sd->N;

View File

@@ -20,7 +20,6 @@
#include "kernel/osl/globals.h"
#include "kernel/osl/services.h"
#include "kernel/osl/types.h"
#include "util/foreach.h"
#include "util/log.h"
@@ -120,8 +119,6 @@ ustring OSLRenderServices::u_u("u");
ustring OSLRenderServices::u_v("v");
ustring OSLRenderServices::u_empty;
ImageManager *OSLRenderServices::image_manager = nullptr;
OSLRenderServices::OSLRenderServices(OSL::TextureSystem *texture_system, int device_type)
: OSL::RendererServices(texture_system), device_type_(device_type)
{
@@ -1157,7 +1154,7 @@ TextureSystem::TextureHandle *OSLRenderServices::get_texture_handle(ustring file
/* For non-OIIO textures, just return a pointer to our own OSLTextureHandle. */
if (it != textures.end()) {
if (it->second->type != OSLTextureHandle::OIIO) {
return reinterpret_cast<TextureSystem::TextureHandle *>(it->second.get());
return (TextureSystem::TextureHandle *)it->second.get();
}
}
@@ -1176,53 +1173,16 @@ TextureSystem::TextureHandle *OSLRenderServices::get_texture_handle(ustring file
/* Assign OIIO texture handle and return. */
it->second->oiio_handle = handle;
return reinterpret_cast<TextureSystem::TextureHandle *>(it->second.get());
return (TextureSystem::TextureHandle *)it->second.get();
}
else {
/* Construct GPU texture handle for existing textures. */
if (it != textures.end()) {
switch (it->second->type) {
case OSLTextureHandle::OIIO:
return NULL;
case OSLTextureHandle::SVM:
if (!it->second->handle.empty() && it->second->handle.get_manager() != image_manager) {
it.clear();
break;
}
return reinterpret_cast<TextureSystem::TextureHandle *>(OSL_TEXTURE_HANDLE_TYPE_SVM |
it->second->svm_slots[0].y);
case OSLTextureHandle::IES:
if (!it->second->handle.empty() && it->second->handle.get_manager() != image_manager) {
it.clear();
break;
}
return reinterpret_cast<TextureSystem::TextureHandle *>(OSL_TEXTURE_HANDLE_TYPE_IES |
it->second->svm_slots[0].y);
case OSLTextureHandle::AO:
return reinterpret_cast<TextureSystem::TextureHandle *>(
OSL_TEXTURE_HANDLE_TYPE_AO_OR_BEVEL | 1);
case OSLTextureHandle::BEVEL:
return reinterpret_cast<TextureSystem::TextureHandle *>(
OSL_TEXTURE_HANDLE_TYPE_AO_OR_BEVEL | 2);
}
if (it != textures.end() && it->second->type == OSLTextureHandle::SVM &&
it->second->svm_slots[0].w == -1) {
return reinterpret_cast<TextureSystem::TextureHandle *>(
static_cast<uintptr_t>(it->second->svm_slots[0].y + 1));
}
if (!image_manager) {
return NULL;
}
/* Load new textures using SVM image manager. */
ImageHandle handle = image_manager->add_image(filename.string(), ImageParams());
if (handle.empty()) {
return NULL;
}
if (!textures.insert(filename, new OSLTextureHandle(handle))) {
return NULL;
}
return reinterpret_cast<TextureSystem::TextureHandle *>(OSL_TEXTURE_HANDLE_TYPE_SVM |
handle.svm_slot());
return NULL;
}
}
@@ -1760,8 +1720,8 @@ bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg,
return set_attribute_float3(f, type, derivatives, val);
}
else if (name == u_I) {
const differential3 dI = differential_from_compact(sd->wi, sd->dI);
float3 f[3] = {sd->wi, dI.dx, dI.dy};
const differential3 dI = differential_from_compact(sd->I, sd->dI);
float3 f[3] = {sd->I, dI.dx, dI.dy};
return set_attribute_float3(f, type, derivatives, val);
}
else if (name == u_u) {

View File

@@ -16,8 +16,6 @@
#include <OSL/oslexec.h>
#include <OSL/rendererservices.h>
#include "scene/image.h"
#ifdef WITH_PTEX
class PtexCache;
#endif
@@ -56,20 +54,10 @@ struct OSLTextureHandle : public OIIO::RefCnt {
{
}
OSLTextureHandle(const ImageHandle &handle)
: type(SVM),
svm_slots(handle.get_svm_slots()),
oiio_handle(nullptr),
processor(nullptr),
handle(handle)
{
}
Type type;
vector<int4> svm_slots;
OSL::TextureSystem::TextureHandle *oiio_handle;
ColorSpaceProcessor *processor;
ImageHandle handle;
};
typedef OIIO::intrusive_ptr<OSLTextureHandle> OSLTextureHandleRef;
@@ -336,8 +324,6 @@ class OSLRenderServices : public OSL::RendererServices {
* shading system. */
OSLTextureHandleMap textures;
static ImageManager *image_manager;
private:
int device_type_;
};

View File

@@ -1443,8 +1443,6 @@ OSL_NOISE_IMPL(osl_snoise, snoise)
/* Texturing */
#include "kernel/svm/ies.h"
ccl_device_extern ccl_private OSLTextureOptions *osl_get_texture_options(
ccl_private ShaderGlobals *sg)
{
@@ -1550,31 +1548,25 @@ ccl_device_extern bool osl_texture(ccl_private ShaderGlobals *sg,
ccl_private float *dalphady,
ccl_private void *errormessage)
{
const unsigned int type = OSL_TEXTURE_HANDLE_TYPE(texture_handle);
const unsigned int slot = OSL_TEXTURE_HANDLE_SLOT(texture_handle);
switch (type) {
case OSL_TEXTURE_HANDLE_TYPE_SVM: {
const float4 rgba = kernel_tex_image_interp(nullptr, slot, s, 1.0f - t);
if (nchannels > 0)
result[0] = rgba.x;
if (nchannels > 1)
result[1] = rgba.y;
if (nchannels > 2)
result[2] = rgba.z;
if (alpha)
*alpha = rgba.w;
return true;
}
case OSL_TEXTURE_HANDLE_TYPE_IES: {
if (nchannels > 0)
result[0] = kernel_ies_interp(nullptr, slot, s, t);
return true;
}
default: {
return false;
}
if (!texture_handle) {
return false;
}
/* Only SVM textures are supported. */
int id = static_cast<int>(reinterpret_cast<size_t>(texture_handle) - 1);
const float4 rgba = kernel_tex_image_interp(nullptr, id, s, 1.0f - t);
if (nchannels > 0)
result[0] = rgba.x;
if (nchannels > 1)
result[1] = rgba.y;
if (nchannels > 2)
result[2] = rgba.z;
if (alpha)
*alpha = rgba.w;
return true;
}
ccl_device_extern bool osl_texture3d(ccl_private ShaderGlobals *sg,
@@ -1594,26 +1586,25 @@ ccl_device_extern bool osl_texture3d(ccl_private ShaderGlobals *sg,
ccl_private float *dalphady,
ccl_private void *errormessage)
{
const unsigned int type = OSL_TEXTURE_HANDLE_TYPE(texture_handle);
const unsigned int slot = OSL_TEXTURE_HANDLE_SLOT(texture_handle);
switch (type) {
case OSL_TEXTURE_HANDLE_TYPE_SVM: {
const float4 rgba = kernel_tex_image_interp_3d(nullptr, slot, *P, INTERPOLATION_NONE);
if (nchannels > 0)
result[0] = rgba.x;
if (nchannels > 1)
result[1] = rgba.y;
if (nchannels > 2)
result[2] = rgba.z;
if (alpha)
*alpha = rgba.w;
return true;
}
default: {
return false;
}
if (!texture_handle) {
return false;
}
/* Only SVM textures are supported. */
int id = static_cast<int>(reinterpret_cast<size_t>(texture_handle) - 1);
const float4 rgba = kernel_tex_image_interp_3d(nullptr, id, *P, INTERPOLATION_NONE);
if (nchannels > 0)
result[0] = rgba.x;
if (nchannels > 1)
result[1] = rgba.y;
if (nchannels > 2)
result[2] = rgba.z;
if (alpha)
*alpha = rgba.w;
return true;
}
ccl_device_extern bool osl_environment(ccl_private ShaderGlobals *sg,

View File

@@ -111,8 +111,8 @@ shader node_principled_bsdf(string distribution = "Multiscatter GGX",
float eta = backfacing() ? 1.0 / f : f;
if (distribution == "GGX" || Roughness <= 5e-2) {
float cosNI = dot(Normal, I);
float Fr = fresnel_dielectric_cos(cosNI, eta);
float cosNO = dot(Normal, I);
float Fr = fresnel_dielectric_cos(cosNO, eta);
float refl_roughness = Roughness;
if (Roughness <= 1e-2)

Some files were not shown because too many files have changed in this diff Show More