Compare commits
37 Commits
ge_2df_tex
...
surface-de
Author | SHA1 | Date | |
---|---|---|---|
a2ed635a73 | |||
b3aead8fd7 | |||
733b5b8c66 | |||
5a17cb4c08 | |||
e5e44c01f2 | |||
a3e32e2ab5 | |||
e843f42e66 | |||
96f6ec07fb | |||
c38e19ca67 | |||
96d66c7e4d | |||
46821f072d | |||
f870343208 | |||
cf1a7e3944 | |||
cf660b2a02 | |||
6f3957770d | |||
7608f366c7 | |||
8c220c57f9 | |||
a300f80043 | |||
22ce298d73 | |||
3469aa47c1 | |||
097a560bc9 | |||
1b7623fc06 | |||
c546256563 | |||
5c263a9050 | |||
8745cd825a | |||
28622ae81e | |||
d6c7163c06 | |||
0bb57759ec | |||
5e1d438d5e | |||
0721bc0ac4 | |||
7ca0894a17 | |||
751496437b | |||
3014601f3b | |||
b80971ce10 | |||
68f5ce194b | |||
1e9003aea5 | |||
95701b0b04 |
@@ -445,7 +445,6 @@ option(WITH_BOOST "Enable features depending on boost" ON)
|
||||
|
||||
# Unit testsing
|
||||
option(WITH_GTESTS "Enable GTest unit testing" OFF)
|
||||
option(WITH_OPENGL_TESTS "Enable OpenGL related unit testing (Experimental)" OFF)
|
||||
|
||||
|
||||
# Documentation
|
||||
@@ -519,20 +518,18 @@ endif()
|
||||
option(WITH_LEGACY_DEPSGRAPH "Build Blender with legacy dependency graph" ON)
|
||||
mark_as_advanced(WITH_LEGACY_DEPSGRAPH)
|
||||
|
||||
if(WIN32)
|
||||
# Use hardcoded paths or find_package to find externals
|
||||
option(WITH_WINDOWS_FIND_MODULES "Use find_package to locate libraries" OFF)
|
||||
mark_as_advanced(WITH_WINDOWS_FIND_MODULES)
|
||||
# Use hardcoded paths or find_package to find externals
|
||||
option(WITH_WINDOWS_FIND_MODULES "Use find_package to locate libraries" OFF)
|
||||
mark_as_advanced(WITH_WINDOWS_FIND_MODULES)
|
||||
|
||||
option(WITH_WINDOWS_CODESIGN "Use signtool to sign the final binary." OFF)
|
||||
mark_as_advanced(WITH_WINDOWS_CODESIGN)
|
||||
option(WITH_WINDOWS_CODESIGN "Use signtool to sign the final binary." OFF)
|
||||
mark_as_advanced(WITH_WINDOWS_CODESIGN)
|
||||
|
||||
set(WINDOWS_CODESIGN_PFX CACHE FILEPATH "Path to pfx file to use for codesigning.")
|
||||
mark_as_advanced(WINDOWS_CODESIGN_PFX)
|
||||
set(WINDOWS_CODESIGN_PFX CACHE FILEPATH "Path to pfx file to use for codesigning.")
|
||||
mark_as_advanced(WINDOWS_CODESIGN_PFX)
|
||||
|
||||
set(WINDOWS_CODESIGN_PFX_PASSWORD CACHE STRING "password for pfx file used for codesigning.")
|
||||
mark_as_advanced(WINDOWS_CODESIGN_PFX_PASSWORD)
|
||||
endif()
|
||||
set(WINDOWS_CODESIGN_PFX_PASSWORD CACHE STRING "password for pfx file used for codesigning.")
|
||||
mark_as_advanced(WINDOWS_CODESIGN_PFX_PASSWORD)
|
||||
|
||||
# avoid using again
|
||||
option_defaults_clear()
|
||||
@@ -631,12 +628,6 @@ if(APPLE)
|
||||
# to silence sdk not found warning, just overrides CMAKE_OSX_SYSROOT
|
||||
set(CMAKE_XCODE_ATTRIBUTE_SDKROOT macosx${OSX_SYSTEM})
|
||||
endif()
|
||||
|
||||
# QuickTime framework is no longer available in SDK 10.12+
|
||||
if(WITH_CODEC_QUICKTIME AND ${OSX_SYSTEM} VERSION_GREATER 10.11)
|
||||
set(WITH_CODEC_QUICKTIME OFF)
|
||||
message(STATUS "QuickTime not supported by SDK ${OSX_SYSTEM}, disabling WITH_CODEC_QUICKTIME")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(OSX_SYSTEM MATCHES 10.9)
|
||||
@@ -726,7 +717,7 @@ if(NOT WITH_BOOST)
|
||||
macro(set_and_warn
|
||||
_setting _val)
|
||||
if(${${_setting}})
|
||||
message(STATUS "'WITH_BOOST' is disabled: forcing 'set(${_setting} ${_val})'")
|
||||
message(STATUS "'WITH_BOOST' is disabled: forceing 'set(${_setting} ${_val})'")
|
||||
endif()
|
||||
set(${_setting} ${_val})
|
||||
endmacro()
|
||||
@@ -870,7 +861,7 @@ endif()
|
||||
# linux only, not cached
|
||||
set(WITH_BINRELOC OFF)
|
||||
|
||||
# MACOSX only, set to avoid uninitialized
|
||||
# MAXOSX only, set to avoid uninitialized
|
||||
set(EXETYPE "")
|
||||
|
||||
# C/C++ flags
|
||||
@@ -927,7 +918,7 @@ if(WITH_X11)
|
||||
if(WITH_X11_ALPHA)
|
||||
find_library(X11_Xrender_LIB Xrender ${X11_LIB_SEARCH_PATH})
|
||||
mark_as_advanced(X11_Xrender_LIB)
|
||||
if(X11_Xrender_LIB)
|
||||
if (X11_Xrender_LIB)
|
||||
list(APPEND PLATFORM_LINKLIBS ${X11_Xrender_LIB})
|
||||
else()
|
||||
set(WITH_X11_ALPHA OFF)
|
||||
@@ -1576,7 +1567,7 @@ if(WITH_CXX11)
|
||||
if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
# TODO(sergey): Do we want c++11 or gnu-c++11 here?
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
|
||||
elseif(MSVC)
|
||||
elseif(MSVC12)
|
||||
# Nothing special is needed, C++11 features are available by default.
|
||||
else()
|
||||
message(FATAL_ERROR "Compiler ${CMAKE_C_COMPILER_ID} is not supported for C++11 build yet")
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# -*- mode: gnumakefile; tab-width: 4; indent-tabs-mode: t; -*-
|
||||
# -*- mode: gnumakefile; tab-width: 8; indent-tabs-mode: t; -*-
|
||||
# vim: tabstop=4
|
||||
#
|
||||
# ##### BEGIN GPL LICENSE BLOCK #####
|
||||
@@ -113,7 +113,7 @@ CMAKE_CONFIG = cmake $(BUILD_CMAKE_ARGS) \
|
||||
# X11 spesific
|
||||
ifdef DISPLAY
|
||||
CMAKE_CONFIG_TOOL = cmake-gui
|
||||
else
|
||||
else
|
||||
CMAKE_CONFIG_TOOL = ccmake
|
||||
endif
|
||||
|
||||
@@ -127,7 +127,7 @@ all: .FORCE
|
||||
# # if test ! -f $(BUILD_DIR)/CMakeCache.txt ; then \
|
||||
# # $(CMAKE_CONFIG); \
|
||||
# # fi
|
||||
|
||||
|
||||
# # do this always incase of failed initial build, could be smarter here...
|
||||
@$(CMAKE_CONFIG)
|
||||
|
||||
|
@@ -289,7 +289,7 @@ NO_BUILD=false
|
||||
NO_CONFIRM=false
|
||||
USE_CXX11=false
|
||||
|
||||
PYTHON_VERSION="3.5.2"
|
||||
PYTHON_VERSION="3.5.1"
|
||||
PYTHON_VERSION_MIN="3.5"
|
||||
PYTHON_FORCE_BUILD=false
|
||||
PYTHON_FORCE_REBUILD=false
|
||||
@@ -322,7 +322,7 @@ OPENEXR_FORCE_REBUILD=false
|
||||
OPENEXR_SKIP=false
|
||||
_with_built_openexr=false
|
||||
|
||||
OIIO_VERSION="1.7.8"
|
||||
OIIO_VERSION="1.6.9"
|
||||
OIIO_VERSION_MIN="1.6.0"
|
||||
OIIO_VERSION_MAX="1.9.0" # UNKNOWN currently # Not supported by current OSL...
|
||||
OIIO_FORCE_BUILD=false
|
||||
@@ -337,14 +337,14 @@ LLVM_FORCE_REBUILD=false
|
||||
LLVM_SKIP=false
|
||||
|
||||
# OSL needs to be compiled for now!
|
||||
OSL_VERSION="1.7.5"
|
||||
OSL_VERSION="1.7.3"
|
||||
OSL_VERSION_MIN=$OSL_VERSION
|
||||
OSL_FORCE_BUILD=false
|
||||
OSL_FORCE_REBUILD=false
|
||||
OSL_SKIP=false
|
||||
|
||||
# OpenSubdiv needs to be compiled for now
|
||||
OSD_VERSION="3.1.1"
|
||||
OSD_VERSION="3.0.5"
|
||||
OSD_VERSION_MIN=$OSD_VERSION
|
||||
OSD_FORCE_BUILD=false
|
||||
OSD_FORCE_REBUILD=false
|
||||
@@ -372,7 +372,7 @@ OPENCOLLADA_FORCE_BUILD=false
|
||||
OPENCOLLADA_FORCE_REBUILD=false
|
||||
OPENCOLLADA_SKIP=false
|
||||
|
||||
FFMPEG_VERSION="3.2.1"
|
||||
FFMPEG_VERSION="2.8.4"
|
||||
FFMPEG_VERSION_MIN="2.8.4"
|
||||
FFMPEG_FORCE_BUILD=false
|
||||
FFMPEG_FORCE_REBUILD=false
|
||||
@@ -795,7 +795,7 @@ CXXFLAGS_BACK=$CXXFLAGS
|
||||
if [ "$USE_CXX11" = true ]; then
|
||||
WARNING "You are trying to use c++11, this *should* go smoothely with any very recent distribution
|
||||
However, if you are experiencing linking errors (also when building Blender itself), please try the following:
|
||||
* Re-run this script with '--build-all --force-all' options.
|
||||
* Re-run this script with `--build-all --force-all` options.
|
||||
* Ensure your gcc version is at the very least 4.8, if possible you should really rather use gcc-5.1 or above.
|
||||
|
||||
Please note that until the transition to C++11-built libraries if completed in your distribution, situation will
|
||||
@@ -2480,7 +2480,7 @@ compile_FFmpeg() {
|
||||
--enable-avfilter --disable-vdpau \
|
||||
--disable-bzlib --disable-libgsm --disable-libspeex \
|
||||
--enable-pthreads --enable-zlib --enable-stripping --enable-runtime-cpudetect \
|
||||
--disable-vaapi --disable-nonfree --enable-gpl \
|
||||
--disable-vaapi --disable-libfaac --disable-nonfree --enable-gpl \
|
||||
--disable-postproc --disable-librtmp --disable-libopencore-amrnb \
|
||||
--disable-libopencore-amrwb --disable-libdc1394 --disable-version3 --disable-outdev=sdl \
|
||||
--disable-libxcb \
|
||||
|
@@ -297,8 +297,8 @@ def generic_builder(id, libdir='', branch='', rsync=False):
|
||||
# Builders
|
||||
|
||||
add_builder(c, 'mac_x86_64_10_6_cmake', 'darwin-9.x.universal', generic_builder, hour=5)
|
||||
# add_builder(c, 'linux_glibc211_i686_cmake', '', generic_builder, hour=1)
|
||||
# add_builder(c, 'linux_glibc211_x86_64_cmake', '', generic_builder, hour=2)
|
||||
add_builder(c, 'linux_glibc211_i686_cmake', '', generic_builder, hour=1)
|
||||
add_builder(c, 'linux_glibc211_x86_64_cmake', '', generic_builder, hour=2)
|
||||
add_builder(c, 'linux_glibc219_i686_cmake', '', generic_builder, hour=3)
|
||||
add_builder(c, 'linux_glibc219_x86_64_cmake', '', generic_builder, hour=4)
|
||||
add_builder(c, 'win32_cmake_vc2013', 'windows_vc12', generic_builder, hour=3)
|
||||
|
@@ -72,8 +72,10 @@ if 'cmake' in builder:
|
||||
# Set up OSX architecture
|
||||
if builder.endswith('x86_64_10_6_cmake'):
|
||||
cmake_extra_options.append('-DCMAKE_OSX_ARCHITECTURES:STRING=x86_64')
|
||||
cmake_extra_options.append('-DCUDA_NVCC_EXECUTABLE=/usr/local/cuda8-hack/bin/nvcc')
|
||||
cmake_extra_options.append('-DWITH_CODEC_QUICKTIME=OFF')
|
||||
cmake_extra_options.append('-DCMAKE_OSX_DEPLOYMENT_TARGET=10.6')
|
||||
build_cubins = False
|
||||
|
||||
|
||||
elif builder.startswith('win'):
|
||||
|
@@ -45,7 +45,7 @@ macro(BLENDER_SRC_GTEST_EX NAME SRC EXTRA_LIBS DO_ADD_TEST)
|
||||
RUNTIME_OUTPUT_DIRECTORY_DEBUG "${TESTS_OUTPUT_DIR}"
|
||||
INCLUDE_DIRECTORIES "${TEST_INC}")
|
||||
if(${DO_ADD_TEST})
|
||||
add_test(NAME ${NAME}_test COMMAND ${TESTS_OUTPUT_DIR}/${NAME}_test WORKING_DIRECTORY $<TARGET_FILE_DIR:blender>)
|
||||
add_test(${NAME}_test ${TESTS_OUTPUT_DIR}/${NAME}_test)
|
||||
endif()
|
||||
endif()
|
||||
endmacro()
|
||||
|
@@ -56,7 +56,7 @@ if(EXISTS ${SOURCE_DIR}/.git)
|
||||
string(REGEX REPLACE "[\r\n]+" ";" _git_contains_branches "${_git_contains_branches}")
|
||||
string(REGEX REPLACE ";[ \t]+" ";" _git_contains_branches "${_git_contains_branches}")
|
||||
foreach(_branch ${_git_contains_branches})
|
||||
if(NOT "${_branch}" MATCHES "\\(HEAD.*")
|
||||
if (NOT "${_branch}" MATCHES "\\(HEAD.*")
|
||||
set(MY_WC_BRANCH "${_branch}")
|
||||
break()
|
||||
endif()
|
||||
|
@@ -416,7 +416,14 @@ function(setup_liblinks
|
||||
target_link_libraries(${target} ${OPENCOLORIO_LIBRARIES})
|
||||
endif()
|
||||
if(WITH_OPENSUBDIV OR WITH_CYCLES_OPENSUBDIV)
|
||||
if(WIN32 AND NOT UNIX)
|
||||
file_list_suffix(OPENSUBDIV_LIBRARIES_DEBUG "${OPENSUBDIV_LIBRARIES}" "_d")
|
||||
target_link_libraries_debug(${target} "${OPENSUBDIV_LIBRARIES_DEBUG}")
|
||||
target_link_libraries_optimized(${target} "${OPENSUBDIV_LIBRARIES}")
|
||||
unset(OPENSUBDIV_LIBRARIES_DEBUG)
|
||||
else()
|
||||
target_link_libraries(${target} ${OPENSUBDIV_LIBRARIES})
|
||||
endif()
|
||||
endif()
|
||||
if(WITH_OPENVDB)
|
||||
target_link_libraries(${target} ${OPENVDB_LIBRARIES} ${TBB_LIBRARIES})
|
||||
@@ -1574,24 +1581,24 @@ macro(openmp_delayload
|
||||
endmacro()
|
||||
|
||||
MACRO(WINDOWS_SIGN_TARGET target)
|
||||
if(WITH_WINDOWS_CODESIGN)
|
||||
if(!SIGNTOOL_EXE)
|
||||
if (WITH_WINDOWS_CODESIGN)
|
||||
if (!SIGNTOOL_EXE)
|
||||
error("Codesigning is enabled, but signtool is not found")
|
||||
else()
|
||||
if(WINDOWS_CODESIGN_PFX_PASSWORD)
|
||||
if (WINDOWS_CODESIGN_PFX_PASSWORD)
|
||||
set(CODESIGNPASSWORD /p ${WINDOWS_CODESIGN_PFX_PASSWORD})
|
||||
else()
|
||||
if($ENV{PFXPASSWORD})
|
||||
if ($ENV{PFXPASSWORD})
|
||||
set(CODESIGNPASSWORD /p $ENV{PFXPASSWORD})
|
||||
else()
|
||||
message(FATAL_ERROR "WITH_WINDOWS_CODESIGN is on but WINDOWS_CODESIGN_PFX_PASSWORD not set, and environment variable PFXPASSWORD not found, unable to sign code.")
|
||||
message( FATAL_ERROR "WITH_WINDOWS_CODESIGN is on but WINDOWS_CODESIGN_PFX_PASSWORD not set, and environment variable PFXPASSWORD not found, unable to sign code.")
|
||||
endif()
|
||||
endif()
|
||||
add_custom_command(TARGET ${target}
|
||||
POST_BUILD
|
||||
COMMAND ${SIGNTOOL_EXE} sign /f ${WINDOWS_CODESIGN_PFX} ${CODESIGNPASSWORD} $<TARGET_FILE:${target}>
|
||||
VERBATIM
|
||||
)
|
||||
POST_BUILD
|
||||
COMMAND ${SIGNTOOL_EXE} sign /f ${WINDOWS_CODESIGN_PFX} ${CODESIGNPASSWORD} $<TARGET_FILE:${target}>
|
||||
VERBATIM
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
ENDMACRO()
|
||||
|
@@ -1,7 +1,5 @@
|
||||
string(TIMESTAMP CURRENT_YEAR "%Y")
|
||||
|
||||
set(PROJECT_DESCRIPTION "Blender is the free and open source 3D creation suite software.")
|
||||
set(PROJECT_COPYRIGHT "Copyright (C) 2001-${CURRENT_YEAR} Blender Foundation")
|
||||
set(PROJECT_DESCRIPTION "Blender is a very fast and versatile 3D modeller/renderer.")
|
||||
set(PROJECT_COPYRIGHT "Copyright (C) 2001-2012 Blender Foundation")
|
||||
set(PROJECT_CONTACT "foundation@blender.org")
|
||||
set(PROJECT_VENDOR "Blender Foundation")
|
||||
|
||||
@@ -40,8 +38,8 @@ unset(MY_WC_HASH)
|
||||
# Force Package Name
|
||||
execute_process(COMMAND date "+%Y%m%d" OUTPUT_VARIABLE CPACK_DATE OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
string(TOLOWER ${PROJECT_NAME} PROJECT_NAME_LOWER)
|
||||
if(MSVC)
|
||||
if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
|
||||
if (MSVC)
|
||||
if ("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
|
||||
set(PACKAGE_ARCH windows64)
|
||||
else()
|
||||
set(PACKAGE_ARCH windows32)
|
||||
@@ -50,7 +48,7 @@ else(MSVC)
|
||||
set(PACKAGE_ARCH ${CMAKE_SYSTEM_PROCESSOR})
|
||||
endif()
|
||||
|
||||
if(CPACK_OVERRIDE_PACKAGENAME)
|
||||
if (CPACK_OVERRIDE_PACKAGENAME)
|
||||
set(CPACK_PACKAGE_FILE_NAME ${CPACK_OVERRIDE_PACKAGENAME}-${PACKAGE_ARCH})
|
||||
else()
|
||||
set(CPACK_PACKAGE_FILE_NAME ${PROJECT_NAME_LOWER}-${MAJOR_VERSION}.${MINOR_VERSION}.${PATCH_VERSION}-git${CPACK_DATE}.${BUILD_REV}-${PACKAGE_ARCH})
|
||||
@@ -137,3 +135,4 @@ unset(MINOR_VERSION)
|
||||
unset(PATCH_VERSION)
|
||||
|
||||
unset(BUILD_REV)
|
||||
|
||||
|
@@ -158,7 +158,7 @@ if(WITH_CODEC_FFMPEG)
|
||||
mp3lame swscale x264 xvidcore theora theoradec theoraenc vorbis vorbisenc vorbisfile ogg
|
||||
)
|
||||
if(WITH_CXX11)
|
||||
set(FFMPEG_LIBRARIES ${FFMPEG_LIBRARIES} schroedinger orc vpx webp swresample)
|
||||
set(FFMPEG_LIBRARIES ${FFMPEG_LIBRARIES} schroedinger orc vpx)
|
||||
endif()
|
||||
set(FFMPEG_LIBPATH ${FFMPEG}/lib)
|
||||
endif()
|
||||
@@ -316,9 +316,6 @@ if(WITH_OPENIMAGEIO)
|
||||
${OPENEXR_LIBRARIES}
|
||||
${ZLIB_LIBRARIES}
|
||||
)
|
||||
if(WITH_CXX11)
|
||||
set(OPENIMAGEIO_LIBRARIES ${OPENIMAGEIO_LIBRARIES} ${LIBDIR}/ffmpeg/lib/libwebp.a)
|
||||
endif()
|
||||
set(OPENIMAGEIO_LIBPATH
|
||||
${OPENIMAGEIO}/lib
|
||||
${JPEG_LIBPATH}
|
||||
|
@@ -33,7 +33,7 @@ endmacro()
|
||||
macro(windows_find_package package_name
|
||||
)
|
||||
if(WITH_WINDOWS_FIND_MODULES)
|
||||
find_package(${package_name})
|
||||
find_package( ${package_name})
|
||||
endif(WITH_WINDOWS_FIND_MODULES)
|
||||
endmacro()
|
||||
|
||||
@@ -112,7 +112,7 @@ set(CMAKE_STATIC_LINKER_FLAGS "${CMAKE_STATIC_LINKER_FLAGS} /ignore:4221")
|
||||
|
||||
# MSVC only, Mingw doesnt need
|
||||
if(CMAKE_CL_64)
|
||||
set(PLATFORM_LINKFLAGS "/MACHINE:X64 ${PLATFORM_LINKFLAGS}")
|
||||
set(PLATFORM_LINKFLAGS "/MACHINE:X64 /OPT:NOREF ${PLATFORM_LINKFLAGS}")
|
||||
else()
|
||||
set(PLATFORM_LINKFLAGS "/MACHINE:IX86 /LARGEADDRESSAWARE ${PLATFORM_LINKFLAGS}")
|
||||
endif()
|
||||
@@ -238,14 +238,14 @@ if(WITH_CODEC_FFMPEG)
|
||||
windows_find_package(FFMPEG)
|
||||
if(NOT FFMPEG_FOUND)
|
||||
warn_hardcoded_paths(ffmpeg)
|
||||
set(FFMPEG_LIBRARY_VERSION 57)
|
||||
set(FFMPEG_LIBRARY_VERSION_AVU 55)
|
||||
set(FFMPEG_LIBRARY_VERSION 55)
|
||||
set(FFMPEG_LIBRARY_VERSION_AVU 52)
|
||||
set(FFMPEG_LIBRARIES
|
||||
${LIBDIR}/ffmpeg/lib/avcodec.lib
|
||||
${LIBDIR}/ffmpeg/lib/avformat.lib
|
||||
${LIBDIR}/ffmpeg/lib/avdevice.lib
|
||||
${LIBDIR}/ffmpeg/lib/avutil.lib
|
||||
${LIBDIR}/ffmpeg/lib/swscale.lib
|
||||
${LIBDIR}/ffmpeg/lib/avcodec-${FFMPEG_LIBRARY_VERSION}.lib
|
||||
${LIBDIR}/ffmpeg/lib/avformat-${FFMPEG_LIBRARY_VERSION}.lib
|
||||
${LIBDIR}/ffmpeg/lib/avdevice-${FFMPEG_LIBRARY_VERSION}.lib
|
||||
${LIBDIR}/ffmpeg/lib/avutil-${FFMPEG_LIBRARY_VERSION_AVU}.lib
|
||||
${LIBDIR}/ffmpeg/lib/swscale-2.lib
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
@@ -380,7 +380,6 @@ if(WITH_OPENIMAGEIO)
|
||||
set(OPENCOLORIO_DEFINITIONS "-DOCIO_STATIC_BUILD")
|
||||
set(OPENIMAGEIO_IDIFF "${OPENIMAGEIO}/bin/idiff.exe")
|
||||
add_definitions(-DOIIO_STATIC_BUILD)
|
||||
add_definitions(-DOIIO_NO_SSE=1)
|
||||
endif()
|
||||
|
||||
if(WITH_LLVM)
|
||||
@@ -446,20 +445,10 @@ if(WITH_MOD_CLOTH_ELTOPO)
|
||||
endif()
|
||||
|
||||
if(WITH_OPENSUBDIV OR WITH_CYCLES_OPENSUBDIV)
|
||||
set(OPENSUBDIV_INCLUDE_DIR ${LIBDIR}/opensubdiv/include)
|
||||
set(OPENSUBDIV_LIBPATH ${LIBDIR}/opensubdiv/lib)
|
||||
set(OPENSUBDIV_LIBRARIES optimized ${OPENSUBDIV_LIBPATH}/osdCPU.lib
|
||||
optimized ${OPENSUBDIV_LIBPATH}/osdGPU.lib
|
||||
debug ${OPENSUBDIV_LIBPATH}/osdCPU_d.lib
|
||||
debug ${OPENSUBDIV_LIBPATH}/osdGPU_d.lib
|
||||
)
|
||||
set(OPENSUBDIV_HAS_OPENMP TRUE)
|
||||
set(OPENSUBDIV_HAS_TBB FALSE)
|
||||
set(OPENSUBDIV_HAS_OPENCL TRUE)
|
||||
set(OPENSUBDIV_HAS_CUDA FALSE)
|
||||
set(OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK TRUE)
|
||||
set(OPENSUBDIV_HAS_GLSL_COMPUTE TRUE)
|
||||
windows_find_package(OpenSubdiv)
|
||||
set(OPENSUBDIV_INCLUDE_DIR ${LIBDIR}/opensubdiv/include)
|
||||
set(OPENSUBDIV_LIBPATH ${LIBDIR}/opensubdiv/lib)
|
||||
set(OPENSUBDIV_LIBRARIES ${OPENSUBDIV_LIBPATH}/osdCPU.lib ${OPENSUBDIV_LIBPATH}/osdGPU.lib)
|
||||
find_package(OpenSubdiv)
|
||||
endif()
|
||||
|
||||
if(WITH_SDL)
|
||||
|
@@ -681,7 +681,7 @@ Image classes
|
||||
|
||||
.. attribute:: zbuff
|
||||
|
||||
Use depth component of render as grayscale color - suitable for texture source.
|
||||
Use depth component of render as grey scale color - suitable for texture source.
|
||||
|
||||
:type: bool
|
||||
|
||||
@@ -817,7 +817,7 @@ Image classes
|
||||
|
||||
.. attribute:: zbuff
|
||||
|
||||
Use depth component of viewport as grayscale color - suitable for texture source.
|
||||
Use depth component of viewport as grey scale color - suitable for texture source.
|
||||
|
||||
:type: bool
|
||||
|
||||
@@ -1260,8 +1260,8 @@ Filter classes
|
||||
|
||||
.. class:: FilterGray
|
||||
|
||||
Filter for grayscale effect.
|
||||
Proportions of R, G and B contributions in the output grayscale are 28:151:77.
|
||||
Filter for gray scale effect.
|
||||
Proportions of R, G and B contributions in the output gray scale are 28:151:77.
|
||||
|
||||
.. attribute:: previous
|
||||
|
||||
|
@@ -405,7 +405,7 @@ base class --- :class:`SCA_IObject`
|
||||
|
||||
.. note::
|
||||
|
||||
This attribute is experimental and may be removed (but probably wont be).
|
||||
This attribute is experemental and may be removed (but probably wont be).
|
||||
|
||||
.. note::
|
||||
|
||||
@@ -419,7 +419,7 @@ base class --- :class:`SCA_IObject`
|
||||
|
||||
.. note::
|
||||
|
||||
This attribute is experimental and may be removed (but probably wont be).
|
||||
This attribute is experemental and may be removed (but probably wont be).
|
||||
|
||||
.. note::
|
||||
|
||||
@@ -453,7 +453,7 @@ base class --- :class:`SCA_IObject`
|
||||
|
||||
.. attribute:: childrenRecursive
|
||||
|
||||
all children of this object including children's children, (read-only).
|
||||
all children of this object including childrens children, (read-only).
|
||||
|
||||
:type: :class:`CListValue` of :class:`KX_GameObject`'s
|
||||
|
||||
@@ -536,7 +536,7 @@ base class --- :class:`SCA_IObject`
|
||||
|
||||
.. method:: getAxisVect(vect)
|
||||
|
||||
Returns the axis vector rotates by the object's worldspace orientation.
|
||||
Returns the axis vector rotates by the objects worldspace orientation.
|
||||
This is the equivalent of multiplying the vector by the orientation matrix.
|
||||
|
||||
:arg vect: a vector to align the axis.
|
||||
@@ -596,7 +596,7 @@ base class --- :class:`SCA_IObject`
|
||||
|
||||
Gets the game object's linear velocity.
|
||||
|
||||
This method returns the game object's velocity through it's center of mass, ie no angular velocity component.
|
||||
This method returns the game object's velocity through it's centre of mass, ie no angular velocity component.
|
||||
|
||||
:arg local:
|
||||
* False: you get the "global" velocity ie: relative to world orientation.
|
||||
@@ -609,7 +609,7 @@ base class --- :class:`SCA_IObject`
|
||||
|
||||
Sets the game object's linear velocity.
|
||||
|
||||
This method sets game object's velocity through it's center of mass,
|
||||
This method sets game object's velocity through it's centre of mass,
|
||||
ie no angular velocity component.
|
||||
|
||||
This requires a dynamic object.
|
||||
@@ -814,7 +814,7 @@ base class --- :class:`SCA_IObject`
|
||||
# do something
|
||||
pass
|
||||
|
||||
The face parameter determines the orientation of the normal.
|
||||
The face paremeter determines the orientation of the normal.
|
||||
|
||||
* 0 => hit normal is always oriented towards the ray origin (as if you casted the ray from outside)
|
||||
* 1 => hit normal is the real face normal (only for mesh object, otherwise face has no effect)
|
||||
@@ -911,7 +911,7 @@ base class --- :class:`SCA_IObject`
|
||||
|
||||
.. note::
|
||||
|
||||
The gameObject argument has an advantage that it can convert from a mesh with modifiers applied (such as the Subdivision Surface modifier).
|
||||
The gameObject argument has an advantage that it can convert from a mesh with modifiers applied (such as subsurf).
|
||||
|
||||
.. warning::
|
||||
|
||||
@@ -919,7 +919,7 @@ base class --- :class:`SCA_IObject`
|
||||
|
||||
.. warning::
|
||||
|
||||
If the object is a part of a compound object it will fail (parent or child)
|
||||
If the object is a part of a combound object it will fail (parent or child)
|
||||
|
||||
.. warning::
|
||||
|
||||
|
@@ -12,7 +12,7 @@ contents: dir(bgl). A simple search on the web can point to more
|
||||
than enough material to teach OpenGL programming, from books to many
|
||||
collections of tutorials.
|
||||
|
||||
Here is a comprehensive `list of books <https://www.khronos.org/developers/books/>`__ (non free).
|
||||
Here is a comprehensive `list of books <https://www.opengl.org/documentation/books/>`__ (non free).
|
||||
The `arcsynthesis tutorials <https://web.archive.org/web/20150225192611/http://www.arcsynthesis.org/gltut/index.html>`__
|
||||
is one of the best resources to learn modern OpenGL and
|
||||
`g-truc <http://www.g-truc.net/post-opengl-samples.html#menu>`__
|
||||
@@ -2067,7 +2067,7 @@ offers a set of extensive examples, including advanced features.
|
||||
:arg length: Returns the length of the string returned in source (excluding the null terminator).
|
||||
:type source: :class:`bgl.Buffer` char.
|
||||
:arg source: Specifies an array of characters that is used to return the source code string.
|
||||
|
||||
|
||||
|
||||
.. function:: glShaderSource(shader, shader_string):
|
||||
|
||||
|
@@ -204,7 +204,7 @@ Lets say we want to access the texture of a brush via Python, to adjust its ``co
|
||||
|
||||
- Start in the default scene and enable 'Sculpt' mode from the 3D-View header.
|
||||
- From the toolbar expand the **Texture** panel and add a new texture.
|
||||
*Notice the texture button its self doesn't have very useful links (you can check the tooltips).*
|
||||
*Notice the texture button its self doesn't have very useful links (you can check the tool-tips).*
|
||||
- The contrast setting isn't exposed in the sculpt toolbar, so view the texture in the properties panel...
|
||||
|
||||
- In the properties button select the Texture context.
|
||||
|
@@ -19,7 +19,7 @@ This is a typical Python environment so tutorials on how to write Python scripts
|
||||
will work running the scripts in Blender too.
|
||||
Blender provides the :mod:`bpy` module to the Python interpreter.
|
||||
This module can be imported in a script and gives access to Blender data, classes, and functions.
|
||||
Scripts that deal with Blender data will need to import this module.
|
||||
Scripts that deal with Blender data will need to import this module.
|
||||
|
||||
Here is a simple example of moving a vertex of the object named **Cube**:
|
||||
|
||||
@@ -80,7 +80,7 @@ To run as modules:
|
||||
|
||||
|
||||
Add-ons
|
||||
-------
|
||||
------
|
||||
|
||||
Some of Blenders functionality is best kept optional,
|
||||
alongside scripts loaded at startup we have add-ons which are kept in their own directory ``scripts/addons``,
|
||||
@@ -213,7 +213,7 @@ A simple Blender/Python module can look like this:
|
||||
bpy.utils.register_class(SimpleOperator)
|
||||
|
||||
def unregister():
|
||||
bpy.utils.unregister_class(SimpleOperator)
|
||||
bpy.utils.unregister_class(SimpleOperator)
|
||||
|
||||
if __name__ == "__main__":
|
||||
register()
|
||||
@@ -327,7 +327,7 @@ Say you want to store material settings for a custom engine.
|
||||
.. note::
|
||||
|
||||
*The class must be registered before being used in a property, failing to do so will raise an error:*
|
||||
|
||||
|
||||
``ValueError: bpy_struct "Material" registration error: my_custom_props could not register``
|
||||
|
||||
|
||||
@@ -429,3 +429,4 @@ Calling these operators:
|
||||
>>> bpy.ops.object.operator_2()
|
||||
Hello World OBJECT_OT_operator_2
|
||||
{'FINISHED'}
|
||||
|
||||
|
@@ -427,9 +427,9 @@ if BLENDER_REVISION != "Unknown":
|
||||
BLENDER_VERSION_DOTS += " " + BLENDER_REVISION # '2.62.1 SHA1'
|
||||
|
||||
BLENDER_VERSION_PATH = "_".join(blender_version_strings) # '2_62_1'
|
||||
if bpy.app.version_cycle in {"rc", "release"}:
|
||||
# '2_62a_release'
|
||||
BLENDER_VERSION_PATH = "%s%s_release" % ("_".join(blender_version_strings[:2]), bpy.app.version_char)
|
||||
if bpy.app.version_cycle == "release":
|
||||
BLENDER_VERSION_PATH = "%s%s_release" % ("_".join(blender_version_strings[:2]),
|
||||
bpy.app.version_char) # '2_62_release'
|
||||
|
||||
# --------------------------DOWNLOADABLE FILES----------------------------------
|
||||
|
||||
@@ -1565,9 +1565,9 @@ def pyrna2sphinx(basepath):
|
||||
|
||||
# operators
|
||||
def write_ops():
|
||||
API_BASEURL = "https://developer.blender.org/diffusion/B/browse/master/release/scripts "
|
||||
API_BASEURL_ADDON = "https://developer.blender.org/diffusion/BA"
|
||||
API_BASEURL_ADDON_CONTRIB = "https://developer.blender.org/diffusion/BAC"
|
||||
API_BASEURL = "https://developer.blender.org/diffusion/B/browse/master/release/scripts/ "
|
||||
API_BASEURL_ADDON = "https://developer.blender.org/diffusion/BA/"
|
||||
API_BASEURL_ADDON_CONTRIB = "https://developer.blender.org/diffusion/BAC/"
|
||||
|
||||
op_modules = {}
|
||||
for op in ops.values():
|
||||
@@ -1632,9 +1632,13 @@ def write_sphinx_conf_py(basepath):
|
||||
file = open(filepath, "w", encoding="utf-8")
|
||||
fw = file.write
|
||||
|
||||
fw("import sys, os\n\n")
|
||||
fw("extensions = ['sphinx.ext.intersphinx']\n\n")
|
||||
fw("intersphinx_mapping = {'blender_manual': ('https://docs.blender.org/manual/en/dev/', None)}\n\n")
|
||||
fw("import sys, os\n")
|
||||
fw("\n")
|
||||
fw("extensions = ['sphinx.ext.intersphinx']\n")
|
||||
fw("\n")
|
||||
fw("intersphinx_mapping = {'blender_manual': ('https://www.blender.org/manual/', None)}\n")
|
||||
fw("\n")
|
||||
|
||||
fw("project = 'Blender'\n")
|
||||
# fw("master_doc = 'index'\n")
|
||||
fw("copyright = u'Blender Foundation'\n")
|
||||
@@ -1651,16 +1655,12 @@ def write_sphinx_conf_py(basepath):
|
||||
|
||||
# not helpful since the source is generated, adds to upload size.
|
||||
fw("html_copy_source = False\n")
|
||||
fw("html_show_sphinx = False\n")
|
||||
fw("html_split_index = True\n")
|
||||
fw("\n")
|
||||
|
||||
# needed for latex, pdf gen
|
||||
fw("latex_elements = {\n")
|
||||
fw(" 'papersize': 'a4paper',\n")
|
||||
fw("}\n\n")
|
||||
|
||||
fw("latex_documents = [ ('contents', 'contents.tex', 'Blender Index', 'Blender Foundation', 'manual'), ]\n")
|
||||
fw("latex_paper_size = 'a4paper'\n")
|
||||
file.close()
|
||||
|
||||
|
||||
|
@@ -41,9 +41,9 @@ import tempfile
|
||||
import zipfile
|
||||
|
||||
|
||||
DEFAULT_RSYNC_SERVER = "docs.blender.org"
|
||||
DEFAULT_RSYNC_SERVER = "www.blender.org"
|
||||
DEFAULT_RSYNC_ROOT = "/api/"
|
||||
DEFAULT_SYMLINK_ROOT = "/data/www/vhosts/docs.blender.org/api"
|
||||
DEFAULT_SYMLINK_ROOT = "/data/www/vhosts/www.blender.org/api"
|
||||
|
||||
|
||||
def argparse_create():
|
||||
@@ -96,11 +96,6 @@ def main():
|
||||
|
||||
rsync_base = "rsync://%s@%s:%s" % (args.user, args.rsync_server, args.rsync_root)
|
||||
|
||||
blenver = blenver_zip = ""
|
||||
api_name = ""
|
||||
branch = ""
|
||||
is_release = False
|
||||
|
||||
# I) Update local mirror using rsync.
|
||||
rsync_mirror_cmd = ("rsync", "--delete-after", "-avzz", rsync_base, args.mirror_dir)
|
||||
subprocess.run(rsync_mirror_cmd, env=dict(os.environ, RSYNC_PASSWORD=args.password))
|
||||
@@ -113,24 +108,19 @@ def main():
|
||||
subprocess.run(doc_gen_cmd)
|
||||
|
||||
# III) Get Blender version info.
|
||||
blenver = blenver_zip = ""
|
||||
getver_file = os.path.join(tmp_dir, "blendver.txt")
|
||||
getver_script = (""
|
||||
"import sys, bpy\n"
|
||||
"with open(sys.argv[-1], 'w') as f:\n"
|
||||
" is_release = bpy.app.version_cycle in {'rc', 'release'}\n"
|
||||
" branch = bpy.app.build_branch.split()[0].decode()\n"
|
||||
" f.write('%d\\n' % is_release)\n"
|
||||
" f.write('%s\\n' % branch)\n"
|
||||
" f.write('%d.%d%s\\n' % (bpy.app.version[0], bpy.app.version[1], bpy.app.version_char)\n"
|
||||
" if is_release else '%s\\n' % branch)\n"
|
||||
" f.write('%d_%d%s_release' % (bpy.app.version[0], bpy.app.version[1], bpy.app.version_char)\n"
|
||||
" if is_release else '%d_%d_%d' % bpy.app.version)\n")
|
||||
" f.write('%d_%d%s_release\\n' % (bpy.app.version[0], bpy.app.version[1], bpy.app.version_char)\n"
|
||||
" if bpy.app.version_cycle in {'rc', 'release'} else '%d_%d_%d\\n' % bpy.app.version)\n"
|
||||
" f.write('%d_%d_%d' % bpy.app.version)\n")
|
||||
get_ver_cmd = (args.blender, "--background", "-noaudio", "--factory-startup", "--python-exit-code", "1",
|
||||
"--python-expr", getver_script, "--", getver_file)
|
||||
subprocess.run(get_ver_cmd)
|
||||
with open(getver_file) as f:
|
||||
is_release, branch, blenver, blenver_zip = f.read().split("\n")
|
||||
is_release = bool(int(is_release))
|
||||
blenver, blenver_zip = f.read().split("\n")
|
||||
os.remove(getver_file)
|
||||
|
||||
# IV) Build doc.
|
||||
@@ -142,7 +132,7 @@ def main():
|
||||
os.chdir(curr_dir)
|
||||
|
||||
# V) Cleanup existing matching dir in server mirror (if any), and copy new doc.
|
||||
api_name = blenver
|
||||
api_name = "blender_python_api_%s" % blenver
|
||||
api_dir = os.path.join(args.mirror_dir, api_name)
|
||||
if os.path.exists(api_dir):
|
||||
shutil.rmtree(api_dir)
|
||||
@@ -160,15 +150,19 @@ def main():
|
||||
os.rename(zip_path, os.path.join(api_dir, "%s.zip" % zip_name))
|
||||
|
||||
# VII) Create symlinks and html redirects.
|
||||
#~ os.symlink(os.path.join(DEFAULT_SYMLINK_ROOT, api_name, "contents.html"), os.path.join(api_dir, "index.html"))
|
||||
os.symlink("./contents.html", os.path.join(api_dir, "index.html"))
|
||||
if is_release:
|
||||
symlink = os.path.join(args.mirror_dir, "current")
|
||||
if blenver.endswith("release"):
|
||||
symlink = os.path.join(args.mirror_dir, "blender_python_api_current")
|
||||
os.remove(symlink)
|
||||
os.symlink("./%s" % api_name, symlink)
|
||||
with open(os.path.join(args.mirror_dir, "250PythonDoc/index.html"), 'w') as f:
|
||||
f.write("<html><head><title>Redirecting...</title><meta http-equiv=\"REFRESH\""
|
||||
"content=\"0;url=../%s/\"></head><body>Redirecting...</body></html>" % api_name)
|
||||
elif branch == "master":
|
||||
else:
|
||||
symlink = os.path.join(args.mirror_dir, "blender_python_api_master")
|
||||
os.remove(symlink)
|
||||
os.symlink("./%s" % api_name, symlink)
|
||||
with open(os.path.join(args.mirror_dir, "blender_python_api/index.html"), 'w') as f:
|
||||
f.write("<html><head><title>Redirecting...</title><meta http-equiv=\"REFRESH\""
|
||||
"content=\"0;url=../%s/\"></head><body>Redirecting...</body></html>" % api_name)
|
||||
|
@@ -77,7 +77,7 @@ namespace std {
|
||||
void resize(size_type new_size)
|
||||
{ resize(new_size, T()); }
|
||||
|
||||
#if defined(_VECTOR_) && (_MSC_VER<1910)
|
||||
#if defined(_VECTOR_)
|
||||
// workaround MSVC std::vector implementation
|
||||
void resize(size_type new_size, const value_type& x)
|
||||
{
|
||||
@@ -110,7 +110,7 @@ namespace std {
|
||||
vector_base::insert(vector_base::end(), new_size - vector_base::size(), x);
|
||||
}
|
||||
#else
|
||||
// either GCC 4.1, MSVC2017 or non-GCC
|
||||
// either GCC 4.1 or non-GCC
|
||||
// default implementation which should always work.
|
||||
void resize(size_type new_size, const value_type& x)
|
||||
{
|
||||
|
2
extern/cuew/include/cuew.h
vendored
2
extern/cuew/include/cuew.h
vendored
@@ -114,7 +114,7 @@ extern "C" {
|
||||
#define cuGLGetDevices cuGLGetDevices_v2
|
||||
|
||||
/* Types. */
|
||||
#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) || defined (__aarch64__)
|
||||
#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
|
||||
typedef unsigned long long CUdeviceptr;
|
||||
#else
|
||||
typedef unsigned int CUdeviceptr;
|
||||
|
2
extern/curve_fit_nd/curve_fit_nd.h
vendored
2
extern/curve_fit_nd/curve_fit_nd.h
vendored
@@ -137,7 +137,7 @@ int curve_fit_cubic_to_points_refit_db(
|
||||
const double error_threshold,
|
||||
const unsigned int calc_flag,
|
||||
const unsigned int *corners,
|
||||
const unsigned int corners_len,
|
||||
unsigned int corners_len,
|
||||
const double corner_angle,
|
||||
|
||||
double **r_cubic_array, unsigned int *r_cubic_array_len,
|
||||
|
4
extern/gflags/README.blender
vendored
4
extern/gflags/README.blender
vendored
@@ -18,8 +18,6 @@ Local modifications:
|
||||
- Applied some modifications from fork https://github.com/Nazg-Gul/gflags.git
|
||||
(see https://github.com/gflags/gflags/pull/129)
|
||||
|
||||
- Avoid attempt of acquiring mutex lock in FlagRegistry::GlobalRegistry when
|
||||
- Avoid attemot of acquiring mutex lock in FlagRegistry::GlobalRegistry when
|
||||
doing static flags initialization. See d81dd2d in Blender repository.
|
||||
|
||||
- Made `google::{anonymous}::FlagValue::ValueSize() const` inlined, so it does
|
||||
not trigger strict compiler warning.
|
4
extern/gflags/src/gflags.cc
vendored
4
extern/gflags/src/gflags.cc
vendored
@@ -218,7 +218,7 @@ class FlagValue {
|
||||
bool Equal(const FlagValue& x) const;
|
||||
FlagValue* New() const; // creates a new one with default value
|
||||
void CopyFrom(const FlagValue& x);
|
||||
inline int ValueSize() const;
|
||||
int ValueSize() const;
|
||||
|
||||
// Calls the given validate-fn on value_buffer_, and returns
|
||||
// whatever it returns. But first casts validate_fn_proto to a
|
||||
@@ -443,7 +443,7 @@ void FlagValue::CopyFrom(const FlagValue& x) {
|
||||
}
|
||||
}
|
||||
|
||||
inline int FlagValue::ValueSize() const {
|
||||
int FlagValue::ValueSize() const {
|
||||
if (type_ > FV_MAX_INDEX) {
|
||||
assert(false); // unknown type
|
||||
return 0;
|
||||
|
@@ -60,10 +60,6 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#if (__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800)
|
||||
# include <type_traits>
|
||||
#endif
|
||||
|
||||
#include "gtest/gtest-message.h"
|
||||
#include "gtest/internal/gtest-string.h"
|
||||
#include "gtest/internal/gtest-filepath.h"
|
||||
@@ -858,7 +854,6 @@ struct AddReference<T&> { typedef T& type; }; // NOLINT
|
||||
template <typename From, typename To>
|
||||
class ImplicitlyConvertible {
|
||||
private:
|
||||
#if !((__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800))
|
||||
// We need the following helper functions only for their types.
|
||||
// They have no implementations.
|
||||
|
||||
@@ -879,7 +874,6 @@ class ImplicitlyConvertible {
|
||||
// implicitly converted to type To.
|
||||
static char Helper(To);
|
||||
static char (&Helper(...))[2]; // NOLINT
|
||||
#endif
|
||||
|
||||
// We have to put the 'public' section after the 'private' section,
|
||||
// or MSVC refuses to compile the code.
|
||||
@@ -889,8 +883,6 @@ class ImplicitlyConvertible {
|
||||
// instantiation. The simplest workaround is to use its C++0x type traits
|
||||
// functions (C++Builder 2009 and above only).
|
||||
static const bool value = __is_convertible(From, To);
|
||||
#elif (__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800)
|
||||
static const bool value = std::is_convertible<From, To>::value;
|
||||
#else
|
||||
// MSVC warns about implicitly converting from double to int for
|
||||
// possible loss of data, so we need to temporarily disable the
|
||||
|
@@ -34,7 +34,7 @@ add_subdirectory(mikktspace)
|
||||
add_subdirectory(glew-mx)
|
||||
add_subdirectory(eigen)
|
||||
|
||||
if(WITH_GAMEENGINE_DECKLINK)
|
||||
if (WITH_GAMEENGINE_DECKLINK)
|
||||
add_subdirectory(decklink)
|
||||
endif()
|
||||
|
||||
@@ -62,7 +62,7 @@ if(WITH_IK_ITASC)
|
||||
add_subdirectory(itasc)
|
||||
endif()
|
||||
|
||||
if(WITH_GAMEENGINE)
|
||||
if(WITH_IK_SOLVER OR WITH_GAMEENGINE OR WITH_MOD_BOOLEAN)
|
||||
add_subdirectory(moto)
|
||||
endif()
|
||||
|
||||
|
@@ -101,11 +101,11 @@ ATOMIC_INLINE size_t atomic_fetch_and_add_z(size_t *p, size_t x);
|
||||
ATOMIC_INLINE size_t atomic_fetch_and_sub_z(size_t *p, size_t x);
|
||||
ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new);
|
||||
|
||||
ATOMIC_INLINE unsigned int atomic_add_and_fetch_u(unsigned int *p, unsigned int x);
|
||||
ATOMIC_INLINE unsigned int atomic_sub_and_fetch_u(unsigned int *p, unsigned int x);
|
||||
ATOMIC_INLINE unsigned int atomic_fetch_and_add_u(unsigned int *p, unsigned int x);
|
||||
ATOMIC_INLINE unsigned int atomic_fetch_and_sub_u(unsigned int *p, unsigned int x);
|
||||
ATOMIC_INLINE unsigned int atomic_cas_u(unsigned int *v, unsigned int old, unsigned int _new);
|
||||
ATOMIC_INLINE unsigned atomic_add_and_fetch_u(unsigned *p, unsigned x);
|
||||
ATOMIC_INLINE unsigned atomic_sub_and_fetch_u(unsigned *p, unsigned x);
|
||||
ATOMIC_INLINE unsigned atomic_fetch_and_add_u(unsigned *p, unsigned x);
|
||||
ATOMIC_INLINE unsigned atomic_fetch_and_sub_u(unsigned *p, unsigned x);
|
||||
ATOMIC_INLINE unsigned atomic_cas_u(unsigned *v, unsigned old, unsigned _new);
|
||||
|
||||
/* WARNING! Float 'atomics' are really faked ones, those are actually closer to some kind of spinlock-sync'ed operation,
|
||||
* which means they are only efficient if collisions are highly unlikely (i.e. if probability of two threads
|
||||
|
@@ -113,58 +113,58 @@ ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new)
|
||||
|
||||
/******************************************************************************/
|
||||
/* unsigned operations. */
|
||||
ATOMIC_INLINE unsigned int atomic_add_and_fetch_u(unsigned int *p, unsigned int x)
|
||||
ATOMIC_INLINE unsigned atomic_add_and_fetch_u(unsigned *p, unsigned x)
|
||||
{
|
||||
assert(sizeof(unsigned int) == LG_SIZEOF_INT);
|
||||
assert(sizeof(unsigned) == LG_SIZEOF_INT);
|
||||
|
||||
#if (LG_SIZEOF_INT == 8)
|
||||
return (unsigned int)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)x);
|
||||
return (unsigned)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)x);
|
||||
#elif (LG_SIZEOF_INT == 4)
|
||||
return (unsigned int)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)x);
|
||||
return (unsigned)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)x);
|
||||
#endif
|
||||
}
|
||||
|
||||
ATOMIC_INLINE unsigned int atomic_sub_and_fetch_u(unsigned int *p, unsigned int x)
|
||||
ATOMIC_INLINE unsigned atomic_sub_and_fetch_u(unsigned *p, unsigned x)
|
||||
{
|
||||
assert(sizeof(unsigned int) == LG_SIZEOF_INT);
|
||||
assert(sizeof(unsigned) == LG_SIZEOF_INT);
|
||||
|
||||
#if (LG_SIZEOF_INT == 8)
|
||||
return (unsigned int)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
|
||||
return (unsigned)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
|
||||
#elif (LG_SIZEOF_INT == 4)
|
||||
return (unsigned int)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
|
||||
return (unsigned)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
|
||||
#endif
|
||||
}
|
||||
|
||||
ATOMIC_INLINE unsigned int atomic_fetch_and_add_u(unsigned int *p, unsigned int x)
|
||||
ATOMIC_INLINE unsigned atomic_fetch_and_add_u(unsigned *p, unsigned x)
|
||||
{
|
||||
assert(sizeof(unsigned int) == LG_SIZEOF_INT);
|
||||
assert(sizeof(unsigned) == LG_SIZEOF_INT);
|
||||
|
||||
#if (LG_SIZEOF_INT == 8)
|
||||
return (unsigned int)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)x);
|
||||
return (unsigned)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)x);
|
||||
#elif (LG_SIZEOF_INT == 4)
|
||||
return (unsigned int)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)x);
|
||||
return (unsigned)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)x);
|
||||
#endif
|
||||
}
|
||||
|
||||
ATOMIC_INLINE unsigned int atomic_fetch_and_sub_u(unsigned int *p, unsigned int x)
|
||||
ATOMIC_INLINE unsigned atomic_fetch_and_sub_u(unsigned *p, unsigned x)
|
||||
{
|
||||
assert(sizeof(unsigned int) == LG_SIZEOF_INT);
|
||||
assert(sizeof(unsigned) == LG_SIZEOF_INT);
|
||||
|
||||
#if (LG_SIZEOF_INT == 8)
|
||||
return (unsigned int)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
|
||||
return (unsigned)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
|
||||
#elif (LG_SIZEOF_INT == 4)
|
||||
return (unsigned int)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
|
||||
return (unsigned)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
|
||||
#endif
|
||||
}
|
||||
|
||||
ATOMIC_INLINE unsigned int atomic_cas_u(unsigned int *v, unsigned int old, unsigned int _new)
|
||||
ATOMIC_INLINE unsigned atomic_cas_u(unsigned *v, unsigned old, unsigned _new)
|
||||
{
|
||||
assert(sizeof(unsigned int) == LG_SIZEOF_INT);
|
||||
assert(sizeof(unsigned) == LG_SIZEOF_INT);
|
||||
|
||||
#if (LG_SIZEOF_INT == 8)
|
||||
return (unsigned int)atomic_cas_uint64((uint64_t *)v, (uint64_t)old, (uint64_t)_new);
|
||||
return (unsigned)atomic_cas_uint64((uint64_t *)v, (uint64_t)old, (uint64_t)_new);
|
||||
#elif (LG_SIZEOF_INT == 4)
|
||||
return (unsigned int)atomic_cas_uint32((uint32_t *)v, (uint32_t)old, (uint32_t)_new);
|
||||
return (unsigned)atomic_cas_uint32((uint32_t *)v, (uint32_t)old, (uint32_t)_new);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@@ -110,10 +110,10 @@ void AUD_LimiterReader::read(int& length, bool& eos, sample_t* buffer)
|
||||
eos = true;
|
||||
}
|
||||
|
||||
if(position < int(m_start * rate))
|
||||
if(position < m_start * rate)
|
||||
{
|
||||
int len2 = length;
|
||||
for(int len = int(m_start * rate) - position;
|
||||
for(int len = m_start * rate - position;
|
||||
len2 == length && !eos;
|
||||
len -= length)
|
||||
{
|
||||
|
@@ -365,7 +365,6 @@ bool AUD_SoftwareDevice::AUD_SoftwareHandle::seek(float position)
|
||||
if(!m_status)
|
||||
return false;
|
||||
|
||||
m_pitch->setPitch(m_user_pitch);
|
||||
m_reader->seek((int)(position * m_reader->getSpecs().rate));
|
||||
|
||||
if(m_status == AUD_STATUS_STOPPED)
|
||||
|
@@ -74,7 +74,7 @@ elseif(CMAKE_COMPILER_IS_GNUCC)
|
||||
if(CXX_HAS_AVX2)
|
||||
set(CYCLES_AVX2_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mfma -mlzcnt -mbmi -mbmi2 -mf16c -mfpmath=sse")
|
||||
endif()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math -fno-finite-math-only")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math")
|
||||
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||
check_cxx_compiler_flag(-msse CXX_HAS_SSE)
|
||||
check_cxx_compiler_flag(-mavx CXX_HAS_AVX)
|
||||
@@ -90,7 +90,7 @@ elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||
if(CXX_HAS_AVX2)
|
||||
set(CYCLES_AVX2_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mfma -mlzcnt -mbmi -mbmi2 -mf16c")
|
||||
endif()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math -fno-finite-math-only")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math")
|
||||
endif()
|
||||
|
||||
if(CXX_HAS_SSE)
|
||||
|
@@ -72,17 +72,20 @@ static void session_print(const string& str)
|
||||
|
||||
static void session_print_status()
|
||||
{
|
||||
int sample, tile;
|
||||
double total_time, sample_time, render_time;
|
||||
string status, substatus;
|
||||
|
||||
/* get status */
|
||||
float progress = options.session->progress.get_progress();
|
||||
sample = options.session->progress.get_sample();
|
||||
options.session->progress.get_tile(tile, total_time, sample_time, render_time);
|
||||
options.session->progress.get_status(status, substatus);
|
||||
|
||||
if(substatus != "")
|
||||
status += ": " + substatus;
|
||||
|
||||
/* print status */
|
||||
status = string_printf("Progress %05.2f %s", (double) progress*100, status.c_str());
|
||||
status = string_printf("Sample %d %s", sample, status.c_str());
|
||||
session_print(status);
|
||||
}
|
||||
|
||||
@@ -164,12 +167,13 @@ static void display_info(Progress& progress)
|
||||
latency = (elapsed - last);
|
||||
last = elapsed;
|
||||
|
||||
double total_time, sample_time;
|
||||
int sample, tile;
|
||||
double total_time, sample_time, render_time;
|
||||
string status, substatus;
|
||||
|
||||
progress.get_time(total_time, sample_time);
|
||||
sample = progress.get_sample();
|
||||
progress.get_tile(tile, total_time, sample_time, render_time);
|
||||
progress.get_status(status, substatus);
|
||||
float progress_val = progress.get_progress();
|
||||
|
||||
if(substatus != "")
|
||||
status += ": " + substatus;
|
||||
@@ -180,10 +184,10 @@ static void display_info(Progress& progress)
|
||||
"%s"
|
||||
" Time: %.2f"
|
||||
" Latency: %.4f"
|
||||
" Progress: %05.2f"
|
||||
" Sample: %d"
|
||||
" Average: %.4f"
|
||||
" Interactive: %s",
|
||||
status.c_str(), total_time, latency, (double) progress_val*100, sample_time, interactive.c_str());
|
||||
status.c_str(), total_time, latency, sample, sample_time, interactive.c_str());
|
||||
|
||||
view_display_info(str.c_str());
|
||||
|
||||
|
@@ -523,7 +523,7 @@ static void xml_read_mesh(const XMLReadState& state, pugi::xml_node node)
|
||||
|
||||
/* we don't yet support arbitrary attributes, for now add vertex
|
||||
* coordinates as generated coordinates if requested */
|
||||
if(mesh->need_attribute(state.scene, ATTR_STD_GENERATED)) {
|
||||
if (mesh->need_attribute(state.scene, ATTR_STD_GENERATED)) {
|
||||
Attribute *attr = mesh->attributes.add(ATTR_STD_GENERATED);
|
||||
memcpy(attr->data_float3(), mesh->verts.data(), sizeof(float3)*mesh->verts.size());
|
||||
}
|
||||
|
@@ -25,7 +25,6 @@ set(SRC
|
||||
blender_camera.cpp
|
||||
blender_mesh.cpp
|
||||
blender_object.cpp
|
||||
blender_object_cull.cpp
|
||||
blender_particles.cpp
|
||||
blender_curves.cpp
|
||||
blender_logging.cpp
|
||||
@@ -36,7 +35,6 @@ set(SRC
|
||||
blender_texture.cpp
|
||||
|
||||
CCL_api.h
|
||||
blender_object_cull.h
|
||||
blender_sync.h
|
||||
blender_session.h
|
||||
blender_texture.h
|
||||
|
@@ -23,25 +23,11 @@ bl_info = {
|
||||
"location": "Info header, render engine menu",
|
||||
"description": "Cycles Render Engine integration",
|
||||
"warning": "",
|
||||
"wiki_url": "https://docs.blender.org/manual/en/dev/render/cycles/",
|
||||
"wiki_url": "https://www.blender.org/manual/render/cycles/index.html",
|
||||
"tracker_url": "",
|
||||
"support": 'OFFICIAL',
|
||||
"category": "Render"}
|
||||
|
||||
# Support 'reload' case.
|
||||
if "bpy" in locals():
|
||||
import importlib
|
||||
if "engine" in locals():
|
||||
importlib.reload(engine)
|
||||
if "version_update" in locals():
|
||||
importlib.reload(version_update)
|
||||
if "ui" in locals():
|
||||
importlib.reload(ui)
|
||||
if "properties" in locals():
|
||||
importlib.reload(properties)
|
||||
if "presets" in locals():
|
||||
importlib.reload(presets)
|
||||
|
||||
import bpy
|
||||
|
||||
from . import (
|
||||
@@ -107,13 +93,7 @@ def engine_exit():
|
||||
engine.exit()
|
||||
|
||||
|
||||
classes = (
|
||||
CyclesRender,
|
||||
)
|
||||
|
||||
|
||||
def register():
|
||||
from bpy.utils import register_class
|
||||
from . import ui
|
||||
from . import properties
|
||||
from . import presets
|
||||
@@ -128,15 +108,12 @@ def register():
|
||||
properties.register()
|
||||
ui.register()
|
||||
presets.register()
|
||||
|
||||
for cls in classes:
|
||||
register_class(cls)
|
||||
bpy.utils.register_module(__name__)
|
||||
|
||||
bpy.app.handlers.version_update.append(version_update.do_versions)
|
||||
|
||||
|
||||
def unregister():
|
||||
from bpy.utils import unregister_class
|
||||
from . import ui
|
||||
from . import properties
|
||||
from . import presets
|
||||
@@ -147,6 +124,4 @@ def unregister():
|
||||
ui.unregister()
|
||||
properties.unregister()
|
||||
presets.unregister()
|
||||
|
||||
for cls in classes:
|
||||
unregister_class(cls)
|
||||
bpy.utils.unregister_module(__name__)
|
||||
|
@@ -50,24 +50,6 @@ def _workaround_buggy_drivers():
|
||||
_cycles.opencl_disable()
|
||||
|
||||
|
||||
def _configure_argument_parser():
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description="Cycles Addon argument parser")
|
||||
parser.add_argument("--cycles-resumable-num-chunks",
|
||||
help="Number of chunks to split sample range into",
|
||||
default=None)
|
||||
parser.add_argument("--cycles-resumable-current-chunk",
|
||||
help="Current chunk of samples range to render",
|
||||
default=None)
|
||||
parser.add_argument("--cycles-resumable-start-chunk",
|
||||
help="Start chunk to render",
|
||||
default=None)
|
||||
parser.add_argument("--cycles-resumable-end-chunk",
|
||||
help="End chunk to render",
|
||||
default=None)
|
||||
return parser
|
||||
|
||||
|
||||
def _parse_command_line():
|
||||
import sys
|
||||
|
||||
@@ -75,22 +57,25 @@ def _parse_command_line():
|
||||
if "--" not in argv:
|
||||
return
|
||||
|
||||
parser = _configure_argument_parser()
|
||||
args, unknown = parser.parse_known_args(argv[argv.index("--") + 1:])
|
||||
argv = argv[argv.index("--") + 1:]
|
||||
|
||||
if args.cycles_resumable_num_chunks is not None:
|
||||
if args.cycles_resumable_current_chunk is not None:
|
||||
import _cycles
|
||||
_cycles.set_resumable_chunk(
|
||||
int(args.cycles_resumable_num_chunks),
|
||||
int(args.cycles_resumable_current_chunk))
|
||||
elif args.cycles_resumable_start_chunk is not None and \
|
||||
args.cycles_resumable_end_chunk:
|
||||
import _cycles
|
||||
_cycles.set_resumable_chunk_range(
|
||||
int(args.cycles_resumable_num_chunks),
|
||||
int(args.cycles_resumable_start_chunk),
|
||||
int(args.cycles_resumable_end_chunk))
|
||||
num_resumable_chunks = None
|
||||
current_resumable_chunk = None
|
||||
|
||||
# TODO(sergey): Add some nice error ptins if argument is not used properly.
|
||||
idx = 0
|
||||
while idx < len(argv) - 1:
|
||||
arg = argv[idx]
|
||||
if arg == '--cycles-resumable-num-chunks':
|
||||
num_resumable_chunks = int(argv[idx + 1])
|
||||
elif arg == '--cycles-resumable-current-chunk':
|
||||
current_resumable_chunk = int(argv[idx + 1])
|
||||
idx += 1
|
||||
|
||||
if num_resumable_chunks is not None and current_resumable_chunk is not None:
|
||||
import _cycles
|
||||
_cycles.set_resumable_chunks(num_resumable_chunks,
|
||||
current_resumable_chunk)
|
||||
|
||||
|
||||
def init():
|
||||
|
@@ -82,23 +82,12 @@ class AddPresetSampling(AddPresetBase, Operator):
|
||||
preset_subdir = "cycles/sampling"
|
||||
|
||||
|
||||
classes = (
|
||||
AddPresetIntegrator,
|
||||
AddPresetSampling,
|
||||
)
|
||||
|
||||
|
||||
def register():
|
||||
from bpy.utils import register_class
|
||||
for cls in classes:
|
||||
register_class(cls)
|
||||
pass
|
||||
|
||||
|
||||
def unregister():
|
||||
from bpy.utils import unregister_class
|
||||
for cls in classes:
|
||||
unregister_class(cls)
|
||||
|
||||
pass
|
||||
|
||||
if __name__ == "__main__":
|
||||
register()
|
||||
|
@@ -288,7 +288,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
||||
description="Probabilistically terminate light samples when the light contribution is below this threshold (more noise but faster rendering). "
|
||||
"Zero disables the test and never ignores lights",
|
||||
min=0.0, max=1.0,
|
||||
default=0.01,
|
||||
default=0.05,
|
||||
)
|
||||
|
||||
cls.caustics_reflective = BoolProperty(
|
||||
@@ -528,12 +528,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
||||
description="Use special type BVH optimized for hair (uses more ram but renders faster)",
|
||||
default=True,
|
||||
)
|
||||
cls.debug_bvh_time_steps = IntProperty(
|
||||
name="BVH Time Steps",
|
||||
description="Split BVH primitives by this number of time steps to speed up render time in cost of memory",
|
||||
default=0,
|
||||
min=0, max=16,
|
||||
)
|
||||
cls.tile_order = EnumProperty(
|
||||
name="Tile Order",
|
||||
description="Tile order for rendering",
|
||||
@@ -638,20 +632,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
||||
items=enum_texture_limit
|
||||
)
|
||||
|
||||
cls.ao_bounces = IntProperty(
|
||||
name="AO Bounces",
|
||||
default=0,
|
||||
description="Approximate indirect light with background tinted ambient occlusion at the specified bounce, 0 disables this feature",
|
||||
min=0, max=1024,
|
||||
)
|
||||
|
||||
cls.ao_bounces_render = IntProperty(
|
||||
name="AO Bounces Render",
|
||||
default=0,
|
||||
description="Approximate indirect light with background tinted ambient occlusion at the specified bounce, 0 disables this feature",
|
||||
min=0, max=1024,
|
||||
)
|
||||
|
||||
# Various fine-tuning debug flags
|
||||
|
||||
def devices_update_callback(self, context):
|
||||
@@ -665,10 +645,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
||||
cls.debug_use_cpu_sse3 = BoolProperty(name="SSE3", default=True)
|
||||
cls.debug_use_cpu_sse2 = BoolProperty(name="SSE2", default=True)
|
||||
cls.debug_use_qbvh = BoolProperty(name="QBVH", default=True)
|
||||
cls.debug_use_cpu_split_kernel = BoolProperty(name="Split Kernel", default=False)
|
||||
|
||||
cls.debug_use_cuda_adaptive_compile = BoolProperty(name="Adaptive Compile", default=False)
|
||||
cls.debug_use_cuda_split_kernel = BoolProperty(name="Split Kernel", default=False)
|
||||
|
||||
cls.debug_opencl_kernel_type = EnumProperty(
|
||||
name="OpenCL Kernel Type",
|
||||
@@ -695,8 +673,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
||||
update=devices_update_callback
|
||||
)
|
||||
|
||||
cls.debug_opencl_kernel_single_program = BoolProperty(name="Single Program", default=False, update=devices_update_callback);
|
||||
|
||||
cls.debug_use_opencl_debug = BoolProperty(name="Debug OpenCL", default=False)
|
||||
|
||||
@classmethod
|
||||
|
@@ -86,10 +86,12 @@ def use_sample_all_lights(context):
|
||||
|
||||
return cscene.sample_all_lights_direct or cscene.sample_all_lights_indirect
|
||||
|
||||
def show_device_active(context):
|
||||
cscene = context.scene.cycles
|
||||
if cscene.device != 'GPU':
|
||||
def show_device_selection(context):
|
||||
type = get_device_type(context)
|
||||
if type == 'NETWORK':
|
||||
return True
|
||||
if not type in {'CUDA', 'OPENCL'}:
|
||||
return False
|
||||
return context.user_preferences.addons[__package__].preferences.has_active_device()
|
||||
|
||||
|
||||
@@ -215,7 +217,7 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel):
|
||||
draw_samples_info(layout, context)
|
||||
|
||||
|
||||
class CyclesRender_PT_geometry(CyclesButtonsPanel, Panel):
|
||||
class CyclesRender_PT_geometery(CyclesButtonsPanel, Panel):
|
||||
bl_label = "Geometry"
|
||||
bl_options = {'DEFAULT_CLOSED'}
|
||||
|
||||
@@ -224,7 +226,6 @@ class CyclesRender_PT_geometry(CyclesButtonsPanel, Panel):
|
||||
|
||||
scene = context.scene
|
||||
cscene = scene.cycles
|
||||
ccscene = scene.cycles_curves
|
||||
|
||||
if cscene.feature_set == 'EXPERIMENTAL':
|
||||
split = layout.split()
|
||||
@@ -251,25 +252,6 @@ class CyclesRender_PT_geometry(CyclesButtonsPanel, Panel):
|
||||
row.prop(cscene, "volume_step_size")
|
||||
row.prop(cscene, "volume_max_steps")
|
||||
|
||||
layout.prop(ccscene, "use_curves", text="Use Hair")
|
||||
col = layout.column()
|
||||
col.active = ccscene.use_curves
|
||||
|
||||
col.prop(ccscene, "primitive", text="Primitive")
|
||||
col.prop(ccscene, "shape", text="Shape")
|
||||
|
||||
if not (ccscene.primitive in {'CURVE_SEGMENTS', 'LINE_SEGMENTS'} and ccscene.shape == 'RIBBONS'):
|
||||
col.prop(ccscene, "cull_backfacing", text="Cull back-faces")
|
||||
|
||||
if ccscene.primitive == 'TRIANGLES' and ccscene.shape == 'THICK':
|
||||
col.prop(ccscene, "resolution", text="Resolution")
|
||||
elif ccscene.primitive == 'CURVE_SEGMENTS':
|
||||
col.prop(ccscene, "subdivisions", text="Curve subdivisions")
|
||||
|
||||
row = col.row()
|
||||
row.prop(ccscene, "minimum_width", text="Min Pixels")
|
||||
row.prop(ccscene, "maximum_width", text="Max Ext.")
|
||||
|
||||
|
||||
class CyclesRender_PT_light_paths(CyclesButtonsPanel, Panel):
|
||||
bl_label = "Light Paths"
|
||||
@@ -430,10 +412,6 @@ class CyclesRender_PT_performance(CyclesButtonsPanel, Panel):
|
||||
col.prop(cscene, "debug_use_spatial_splits")
|
||||
col.prop(cscene, "debug_use_hair_bvh")
|
||||
|
||||
row = col.row()
|
||||
row.active = not cscene.debug_use_spatial_splits
|
||||
row.prop(cscene, "debug_bvh_time_steps")
|
||||
|
||||
|
||||
class CyclesRender_PT_layer_options(CyclesButtonsPanel, Panel):
|
||||
bl_label = "Layer"
|
||||
@@ -789,13 +767,10 @@ class CyclesObject_PT_cycles_settings(CyclesButtonsPanel, Panel):
|
||||
col = layout.column()
|
||||
col.label(text="Performance:")
|
||||
row = col.row()
|
||||
sub = row.row()
|
||||
sub.active = scene.render.use_simplify and cscene.use_camera_cull
|
||||
sub.prop(cob, "use_camera_cull")
|
||||
|
||||
sub = row.row()
|
||||
sub.active = scene.render.use_simplify and cscene.use_distance_cull
|
||||
sub.prop(cob, "use_distance_cull")
|
||||
row.active = scene.render.use_simplify and cscene.use_camera_cull
|
||||
row.prop(cob, "use_camera_cull")
|
||||
row.active = scene.render.use_simplify and cscene.use_distance_cull
|
||||
row.prop(cob, "use_distance_cull")
|
||||
|
||||
|
||||
class CYCLES_OT_use_shading_nodes(Operator):
|
||||
@@ -1036,11 +1011,10 @@ class CyclesWorld_PT_ambient_occlusion(CyclesButtonsPanel, Panel):
|
||||
layout = self.layout
|
||||
|
||||
light = context.world.light_settings
|
||||
scene = context.scene
|
||||
|
||||
row = layout.row()
|
||||
sub = row.row()
|
||||
sub.active = light.use_ambient_occlusion or scene.render.use_simplify
|
||||
sub.active = light.use_ambient_occlusion
|
||||
sub.prop(light, "ao_factor", text="Factor")
|
||||
row.prop(light, "distance", text="Distance")
|
||||
|
||||
@@ -1417,6 +1391,43 @@ class CyclesParticle_PT_textures(CyclesButtonsPanel, Panel):
|
||||
layout.template_ID(slot, "texture", new="texture.new")
|
||||
|
||||
|
||||
class CyclesRender_PT_CurveRendering(CyclesButtonsPanel, Panel):
|
||||
bl_label = "Cycles Hair Rendering"
|
||||
bl_context = "particle"
|
||||
|
||||
@classmethod
|
||||
def poll(cls, context):
|
||||
psys = context.particle_system
|
||||
return CyclesButtonsPanel.poll(context) and psys and psys.settings.type == 'HAIR'
|
||||
|
||||
def draw_header(self, context):
|
||||
ccscene = context.scene.cycles_curves
|
||||
self.layout.prop(ccscene, "use_curves", text="")
|
||||
|
||||
def draw(self, context):
|
||||
layout = self.layout
|
||||
|
||||
scene = context.scene
|
||||
ccscene = scene.cycles_curves
|
||||
|
||||
layout.active = ccscene.use_curves
|
||||
|
||||
layout.prop(ccscene, "primitive", text="Primitive")
|
||||
layout.prop(ccscene, "shape", text="Shape")
|
||||
|
||||
if not (ccscene.primitive in {'CURVE_SEGMENTS', 'LINE_SEGMENTS'} and ccscene.shape == 'RIBBONS'):
|
||||
layout.prop(ccscene, "cull_backfacing", text="Cull back-faces")
|
||||
|
||||
if ccscene.primitive == 'TRIANGLES' and ccscene.shape == 'THICK':
|
||||
layout.prop(ccscene, "resolution", text="Resolution")
|
||||
elif ccscene.primitive == 'CURVE_SEGMENTS':
|
||||
layout.prop(ccscene, "subdivisions", text="Curve subdivisions")
|
||||
|
||||
row = layout.row()
|
||||
row.prop(ccscene, "minimum_width", text="Min Pixels")
|
||||
row.prop(ccscene, "maximum_width", text="Max Ext.")
|
||||
|
||||
|
||||
class CyclesRender_PT_bake(CyclesButtonsPanel, Panel):
|
||||
bl_label = "Bake"
|
||||
bl_context = "render"
|
||||
@@ -1516,18 +1527,15 @@ class CyclesRender_PT_debug(CyclesButtonsPanel, Panel):
|
||||
row.prop(cscene, "debug_use_cpu_avx", toggle=True)
|
||||
row.prop(cscene, "debug_use_cpu_avx2", toggle=True)
|
||||
col.prop(cscene, "debug_use_qbvh")
|
||||
col.prop(cscene, "debug_use_cpu_split_kernel")
|
||||
|
||||
col = layout.column()
|
||||
col.label('CUDA Flags:')
|
||||
col.prop(cscene, "debug_use_cuda_adaptive_compile")
|
||||
col.prop(cscene, "debug_use_cuda_split_kernel")
|
||||
|
||||
col = layout.column()
|
||||
col.label('OpenCL Flags:')
|
||||
col.prop(cscene, "debug_opencl_kernel_type", text="Kernel")
|
||||
col.prop(cscene, "debug_opencl_device_type", text="Device")
|
||||
col.prop(cscene, "debug_opencl_kernel_single_program", text="Single Program")
|
||||
col.prop(cscene, "debug_use_opencl_debug", text="Debug")
|
||||
|
||||
|
||||
@@ -1614,13 +1622,6 @@ class CyclesScene_PT_simplify(CyclesButtonsPanel, Panel):
|
||||
row.active = cscene.use_distance_cull
|
||||
row.prop(cscene, "distance_cull_margin", text="Distance")
|
||||
|
||||
split = layout.split()
|
||||
col = split.column()
|
||||
col.prop(cscene, "ao_bounces")
|
||||
|
||||
col = split.column()
|
||||
col.prop(cscene, "ao_bounces_render")
|
||||
|
||||
def draw_device(self, context):
|
||||
scene = context.scene
|
||||
layout = self.layout
|
||||
@@ -1634,7 +1635,7 @@ def draw_device(self, context):
|
||||
split = layout.split(percentage=1/3)
|
||||
split.label("Device:")
|
||||
row = split.row()
|
||||
row.active = show_device_active(context)
|
||||
row.active = show_device_selection(context)
|
||||
row.prop(cscene, "device", text="")
|
||||
|
||||
if engine.with_osl() and use_cpu(context):
|
||||
@@ -1713,75 +1714,17 @@ def get_panels():
|
||||
|
||||
return panels
|
||||
|
||||
|
||||
classes = (
|
||||
CYCLES_MT_sampling_presets,
|
||||
CYCLES_MT_integrator_presets,
|
||||
CyclesRender_PT_sampling,
|
||||
CyclesRender_PT_geometry,
|
||||
CyclesRender_PT_light_paths,
|
||||
CyclesRender_PT_motion_blur,
|
||||
CyclesRender_PT_film,
|
||||
CyclesRender_PT_performance,
|
||||
CyclesRender_PT_layer_options,
|
||||
CyclesRender_PT_layer_passes,
|
||||
CyclesRender_PT_views,
|
||||
Cycles_PT_post_processing,
|
||||
CyclesCamera_PT_dof,
|
||||
Cycles_PT_context_material,
|
||||
CyclesObject_PT_motion_blur,
|
||||
CyclesObject_PT_cycles_settings,
|
||||
CYCLES_OT_use_shading_nodes,
|
||||
CyclesLamp_PT_preview,
|
||||
CyclesLamp_PT_lamp,
|
||||
CyclesLamp_PT_nodes,
|
||||
CyclesLamp_PT_spot,
|
||||
CyclesWorld_PT_preview,
|
||||
CyclesWorld_PT_surface,
|
||||
CyclesWorld_PT_volume,
|
||||
CyclesWorld_PT_ambient_occlusion,
|
||||
CyclesWorld_PT_mist,
|
||||
CyclesWorld_PT_ray_visibility,
|
||||
CyclesWorld_PT_settings,
|
||||
CyclesMaterial_PT_preview,
|
||||
CyclesMaterial_PT_surface,
|
||||
CyclesMaterial_PT_volume,
|
||||
CyclesMaterial_PT_displacement,
|
||||
CyclesMaterial_PT_settings,
|
||||
CyclesTexture_PT_context,
|
||||
CyclesTexture_PT_node,
|
||||
CyclesTexture_PT_mapping,
|
||||
CyclesTexture_PT_colors,
|
||||
CyclesParticle_PT_textures,
|
||||
CyclesRender_PT_bake,
|
||||
CyclesRender_PT_debug,
|
||||
CyclesParticle_PT_CurveSettings,
|
||||
CyclesScene_PT_simplify,
|
||||
)
|
||||
|
||||
|
||||
def register():
|
||||
from bpy.utils import register_class
|
||||
|
||||
bpy.types.RENDER_PT_render.append(draw_device)
|
||||
bpy.types.VIEW3D_HT_header.append(draw_pause)
|
||||
|
||||
for panel in get_panels():
|
||||
panel.COMPAT_ENGINES.add('CYCLES')
|
||||
|
||||
for cls in classes:
|
||||
register_class(cls)
|
||||
|
||||
|
||||
def unregister():
|
||||
from bpy.utils import unregister_class
|
||||
|
||||
bpy.types.RENDER_PT_render.remove(draw_device)
|
||||
bpy.types.VIEW3D_HT_header.remove(draw_pause)
|
||||
|
||||
for panel in get_panels():
|
||||
if 'CYCLES' in panel.COMPAT_ENGINES:
|
||||
panel.COMPAT_ENGINES.remove('CYCLES')
|
||||
|
||||
for cls in classes:
|
||||
unregister_class(cls)
|
||||
|
@@ -29,6 +29,24 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Utilities */
|
||||
|
||||
/* Hair curve functions */
|
||||
|
||||
void curveinterp_v3_v3v3v3v3(float3 *p, float3 *v1, float3 *v2, float3 *v3, float3 *v4, const float w[4]);
|
||||
void interp_weights(float t, float data[4]);
|
||||
float shaperadius(float shape, float root, float tip, float time);
|
||||
void InterpolateKeySegments(int seg, int segno, int key, int curve, float3 *keyloc, float *time, ParticleCurveData *CData);
|
||||
bool ObtainCacheParticleUV(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background, int uv_num);
|
||||
bool ObtainCacheParticleVcol(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background, int vcol_num);
|
||||
bool ObtainCacheParticleData(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background);
|
||||
void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CData);
|
||||
void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
|
||||
float3 RotCam, bool is_ortho);
|
||||
void ExportCurveTriangleGeometry(Mesh *mesh, ParticleCurveData *CData, int resolution);
|
||||
void ExportCurveTriangleUV(ParticleCurveData *CData, int vert_offset, int resol, float3 *uvdata);
|
||||
void ExportCurveTriangleVcol(ParticleCurveData *CData, int vert_offset, int resol, uchar4 *cdata);
|
||||
|
||||
ParticleCurveData::ParticleCurveData()
|
||||
{
|
||||
}
|
||||
@@ -37,7 +55,7 @@ ParticleCurveData::~ParticleCurveData()
|
||||
{
|
||||
}
|
||||
|
||||
static void interp_weights(float t, float data[4])
|
||||
void interp_weights(float t, float data[4])
|
||||
{
|
||||
/* Cardinal curve interpolation */
|
||||
float t2 = t * t;
|
||||
@@ -50,19 +68,17 @@ static void interp_weights(float t, float data[4])
|
||||
data[3] = fc * t3 - fc * t2;
|
||||
}
|
||||
|
||||
static void curveinterp_v3_v3v3v3v3(float3 *p,
|
||||
float3 *v1, float3 *v2, float3 *v3, float3 *v4,
|
||||
const float w[4])
|
||||
void curveinterp_v3_v3v3v3v3(float3 *p, float3 *v1, float3 *v2, float3 *v3, float3 *v4, const float w[4])
|
||||
{
|
||||
p->x = v1->x * w[0] + v2->x * w[1] + v3->x * w[2] + v4->x * w[3];
|
||||
p->y = v1->y * w[0] + v2->y * w[1] + v3->y * w[2] + v4->y * w[3];
|
||||
p->z = v1->z * w[0] + v2->z * w[1] + v3->z * w[2] + v4->z * w[3];
|
||||
}
|
||||
|
||||
static float shaperadius(float shape, float root, float tip, float time)
|
||||
float shaperadius(float shape, float root, float tip, float time)
|
||||
{
|
||||
float radius = 1.0f - time;
|
||||
|
||||
|
||||
if(shape != 0.0f) {
|
||||
if(shape < 0.0f)
|
||||
radius = powf(radius, 1.0f + shape);
|
||||
@@ -74,13 +90,7 @@ static float shaperadius(float shape, float root, float tip, float time)
|
||||
|
||||
/* curve functions */
|
||||
|
||||
static void InterpolateKeySegments(int seg,
|
||||
int segno,
|
||||
int key,
|
||||
int curve,
|
||||
float3 *keyloc,
|
||||
float *time,
|
||||
ParticleCurveData *CData)
|
||||
void InterpolateKeySegments(int seg, int segno, int key, int curve, float3 *keyloc, float *time, ParticleCurveData *CData)
|
||||
{
|
||||
float3 ckey_loc1 = CData->curvekey_co[key];
|
||||
float3 ckey_loc2 = ckey_loc1;
|
||||
@@ -109,11 +119,7 @@ static void InterpolateKeySegments(int seg,
|
||||
curveinterp_v3_v3v3v3v3(keyloc, &ckey_loc1, &ckey_loc2, &ckey_loc3, &ckey_loc4, t);
|
||||
}
|
||||
|
||||
static bool ObtainCacheParticleData(Mesh *mesh,
|
||||
BL::Mesh *b_mesh,
|
||||
BL::Object *b_ob,
|
||||
ParticleCurveData *CData,
|
||||
bool background)
|
||||
bool ObtainCacheParticleData(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background)
|
||||
{
|
||||
int curvenum = 0;
|
||||
int keyno = 0;
|
||||
@@ -137,7 +143,7 @@ static bool ObtainCacheParticleData(Mesh *mesh,
|
||||
int totparts = b_psys.particles.length();
|
||||
int totchild = background ? b_psys.child_particles.length() : (int)((float)b_psys.child_particles.length() * (float)b_part.draw_percentage() / 100.0f);
|
||||
int totcurves = totchild;
|
||||
|
||||
|
||||
if(b_part.child_type() == 0 || totchild == 0)
|
||||
totcurves += totparts;
|
||||
|
||||
@@ -155,7 +161,7 @@ static bool ObtainCacheParticleData(Mesh *mesh,
|
||||
CData->psys_shader.push_back_slow(shader);
|
||||
|
||||
float radius = get_float(cpsys, "radius_scale") * 0.5f;
|
||||
|
||||
|
||||
CData->psys_rootradius.push_back_slow(radius * get_float(cpsys, "root_width"));
|
||||
CData->psys_tipradius.push_back_slow(radius * get_float(cpsys, "tip_width"));
|
||||
CData->psys_shape.push_back_slow(get_float(cpsys, "shape"));
|
||||
@@ -175,7 +181,7 @@ static bool ObtainCacheParticleData(Mesh *mesh,
|
||||
for(; pa_no < totparts+totchild; pa_no++) {
|
||||
int keynum = 0;
|
||||
CData->curve_firstkey.push_back_slow(keyno);
|
||||
|
||||
|
||||
float curve_length = 0.0f;
|
||||
float3 pcKey;
|
||||
for(int step_no = 0; step_no < ren_step; step_no++) {
|
||||
@@ -207,12 +213,7 @@ static bool ObtainCacheParticleData(Mesh *mesh,
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ObtainCacheParticleUV(Mesh *mesh,
|
||||
BL::Mesh *b_mesh,
|
||||
BL::Object *b_ob,
|
||||
ParticleCurveData *CData,
|
||||
bool background,
|
||||
int uv_num)
|
||||
bool ObtainCacheParticleUV(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background, int uv_num)
|
||||
{
|
||||
if(!(mesh && b_mesh && b_ob && CData))
|
||||
return false;
|
||||
@@ -230,7 +231,7 @@ static bool ObtainCacheParticleUV(Mesh *mesh,
|
||||
int totparts = b_psys.particles.length();
|
||||
int totchild = background ? b_psys.child_particles.length() : (int)((float)b_psys.child_particles.length() * (float)b_part.draw_percentage() / 100.0f);
|
||||
int totcurves = totchild;
|
||||
|
||||
|
||||
if(b_part.child_type() == 0 || totchild == 0)
|
||||
totcurves += totparts;
|
||||
|
||||
@@ -266,12 +267,7 @@ static bool ObtainCacheParticleUV(Mesh *mesh,
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ObtainCacheParticleVcol(Mesh *mesh,
|
||||
BL::Mesh *b_mesh,
|
||||
BL::Object *b_ob,
|
||||
ParticleCurveData *CData,
|
||||
bool background,
|
||||
int vcol_num)
|
||||
bool ObtainCacheParticleVcol(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background, int vcol_num)
|
||||
{
|
||||
if(!(mesh && b_mesh && b_ob && CData))
|
||||
return false;
|
||||
@@ -289,7 +285,7 @@ static bool ObtainCacheParticleVcol(Mesh *mesh,
|
||||
int totparts = b_psys.particles.length();
|
||||
int totchild = background ? b_psys.child_particles.length() : (int)((float)b_psys.child_particles.length() * (float)b_part.draw_percentage() / 100.0f);
|
||||
int totcurves = totchild;
|
||||
|
||||
|
||||
if(b_part.child_type() == 0 || totchild == 0)
|
||||
totcurves += totparts;
|
||||
|
||||
@@ -337,16 +333,16 @@ static void set_resolution(BL::Object *b_ob, BL::Scene *scene, bool render)
|
||||
}
|
||||
}
|
||||
|
||||
static void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
|
||||
float3 RotCam, bool is_ortho)
|
||||
void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
|
||||
float3 RotCam, bool is_ortho)
|
||||
{
|
||||
int vertexno = mesh->verts.size();
|
||||
int vertexindex = vertexno;
|
||||
int numverts = 0, numtris = 0;
|
||||
|
||||
/* compute and reserve size of arrays */
|
||||
for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
|
||||
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
|
||||
for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
|
||||
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
|
||||
if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
|
||||
continue;
|
||||
|
||||
@@ -358,8 +354,8 @@ static void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
|
||||
mesh->reserve_mesh(mesh->verts.size() + numverts, mesh->num_triangles() + numtris);
|
||||
|
||||
/* actually export */
|
||||
for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
|
||||
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
|
||||
for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
|
||||
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
|
||||
if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
|
||||
continue;
|
||||
|
||||
@@ -384,7 +380,7 @@ static void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
|
||||
|
||||
if(curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1)
|
||||
v1 = CData->curvekey_co[curvekey] - CData->curvekey_co[max(curvekey - 1, CData->curve_firstkey[curve])];
|
||||
else
|
||||
else
|
||||
v1 = CData->curvekey_co[curvekey + 1] - CData->curvekey_co[curvekey - 1];
|
||||
|
||||
time = CData->curvekey_time[curvekey]/CData->curve_length[curve];
|
||||
@@ -411,7 +407,6 @@ static void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
|
||||
}
|
||||
}
|
||||
|
||||
mesh->resize_mesh(mesh->verts.size(), mesh->triangles.size());
|
||||
mesh->attributes.remove(ATTR_STD_VERTEX_NORMAL);
|
||||
mesh->attributes.remove(ATTR_STD_FACE_NORMAL);
|
||||
mesh->add_face_normals();
|
||||
@@ -421,30 +416,28 @@ static void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
|
||||
/* texture coords still needed */
|
||||
}
|
||||
|
||||
static void ExportCurveTriangleGeometry(Mesh *mesh,
|
||||
ParticleCurveData *CData,
|
||||
int resolution)
|
||||
void ExportCurveTriangleGeometry(Mesh *mesh, ParticleCurveData *CData, int resolution)
|
||||
{
|
||||
int vertexno = mesh->verts.size();
|
||||
int vertexindex = vertexno;
|
||||
int numverts = 0, numtris = 0;
|
||||
|
||||
/* compute and reserve size of arrays */
|
||||
for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
|
||||
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
|
||||
for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
|
||||
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
|
||||
if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
|
||||
continue;
|
||||
|
||||
numverts += (CData->curve_keynum[curve] - 1)*resolution + resolution;
|
||||
numtris += (CData->curve_keynum[curve] - 1)*2*resolution;
|
||||
numverts += (CData->curve_keynum[curve] - 2)*2*resolution + resolution;
|
||||
numtris += (CData->curve_keynum[curve] - 2)*resolution;
|
||||
}
|
||||
}
|
||||
|
||||
mesh->reserve_mesh(mesh->verts.size() + numverts, mesh->num_triangles() + numtris);
|
||||
|
||||
/* actually export */
|
||||
for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
|
||||
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
|
||||
for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
|
||||
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
|
||||
if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
|
||||
continue;
|
||||
|
||||
@@ -546,7 +539,6 @@ static void ExportCurveTriangleGeometry(Mesh *mesh,
|
||||
}
|
||||
}
|
||||
|
||||
mesh->resize_mesh(mesh->verts.size(), mesh->triangles.size());
|
||||
mesh->attributes.remove(ATTR_STD_VERTEX_NORMAL);
|
||||
mesh->attributes.remove(ATTR_STD_FACE_NORMAL);
|
||||
mesh->add_face_normals();
|
||||
@@ -556,7 +548,7 @@ static void ExportCurveTriangleGeometry(Mesh *mesh,
|
||||
/* texture coords still needed */
|
||||
}
|
||||
|
||||
static void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CData)
|
||||
void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CData)
|
||||
{
|
||||
int num_keys = 0;
|
||||
int num_curves = 0;
|
||||
@@ -565,13 +557,13 @@ static void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CDa
|
||||
return;
|
||||
|
||||
Attribute *attr_intercept = NULL;
|
||||
|
||||
|
||||
if(mesh->need_attribute(scene, ATTR_STD_CURVE_INTERCEPT))
|
||||
attr_intercept = mesh->curve_attributes.add(ATTR_STD_CURVE_INTERCEPT);
|
||||
|
||||
/* compute and reserve size of arrays */
|
||||
for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
|
||||
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
|
||||
for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
|
||||
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
|
||||
if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
|
||||
continue;
|
||||
|
||||
@@ -590,8 +582,8 @@ static void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CDa
|
||||
num_curves = 0;
|
||||
|
||||
/* actually export */
|
||||
for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
|
||||
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
|
||||
for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
|
||||
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
|
||||
if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
|
||||
continue;
|
||||
|
||||
@@ -685,13 +677,8 @@ static void ExportCurveSegmentsMotion(Mesh *mesh, ParticleCurveData *CData, int
|
||||
/* in case of new attribute, we verify if there really was any motion */
|
||||
if(new_attribute) {
|
||||
if(i != numkeys || !have_motion) {
|
||||
/* No motion or hair "topology" changed, remove attributes again. */
|
||||
if(i != numkeys) {
|
||||
VLOG(1) << "Hair topology changed, removing attribute.";
|
||||
}
|
||||
else {
|
||||
VLOG(1) << "No motion, removing attribute.";
|
||||
}
|
||||
/* no motion, remove attributes again */
|
||||
VLOG(1) << "No motion, removing attribute";
|
||||
mesh->curve_attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
}
|
||||
else if(time_index > 0) {
|
||||
@@ -711,10 +698,7 @@ static void ExportCurveSegmentsMotion(Mesh *mesh, ParticleCurveData *CData, int
|
||||
}
|
||||
}
|
||||
|
||||
static void ExportCurveTriangleUV(ParticleCurveData *CData,
|
||||
int vert_offset,
|
||||
int resol,
|
||||
float3 *uvdata)
|
||||
void ExportCurveTriangleUV(ParticleCurveData *CData, int vert_offset, int resol, float3 *uvdata)
|
||||
{
|
||||
if(uvdata == NULL)
|
||||
return;
|
||||
@@ -724,8 +708,8 @@ static void ExportCurveTriangleUV(ParticleCurveData *CData,
|
||||
|
||||
int vertexindex = vert_offset;
|
||||
|
||||
for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
|
||||
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
|
||||
for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
|
||||
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
|
||||
if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
|
||||
continue;
|
||||
|
||||
@@ -759,18 +743,15 @@ static void ExportCurveTriangleUV(ParticleCurveData *CData,
|
||||
}
|
||||
}
|
||||
|
||||
static void ExportCurveTriangleVcol(ParticleCurveData *CData,
|
||||
int vert_offset,
|
||||
int resol,
|
||||
uchar4 *cdata)
|
||||
void ExportCurveTriangleVcol(ParticleCurveData *CData, int vert_offset, int resol, uchar4 *cdata)
|
||||
{
|
||||
if(cdata == NULL)
|
||||
return;
|
||||
|
||||
int vertexindex = vert_offset;
|
||||
|
||||
for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
|
||||
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
|
||||
for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
|
||||
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
|
||||
if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
|
||||
continue;
|
||||
|
||||
@@ -892,7 +873,7 @@ void BlenderSync::sync_curves(Mesh *mesh,
|
||||
}
|
||||
|
||||
/* obtain general settings */
|
||||
const bool use_curves = scene->curve_system_manager->use_curves;
|
||||
bool use_curves = scene->curve_system_manager->use_curves;
|
||||
|
||||
if(!(use_curves && b_ob.mode() != b_ob.mode_PARTICLE_EDIT)) {
|
||||
if(!motion)
|
||||
@@ -900,11 +881,11 @@ void BlenderSync::sync_curves(Mesh *mesh,
|
||||
return;
|
||||
}
|
||||
|
||||
const int primitive = scene->curve_system_manager->primitive;
|
||||
const int triangle_method = scene->curve_system_manager->triangle_method;
|
||||
const int resolution = scene->curve_system_manager->resolution;
|
||||
const size_t vert_num = mesh->verts.size();
|
||||
const size_t tri_num = mesh->num_triangles();
|
||||
int primitive = scene->curve_system_manager->primitive;
|
||||
int triangle_method = scene->curve_system_manager->triangle_method;
|
||||
int resolution = scene->curve_system_manager->resolution;
|
||||
size_t vert_num = mesh->verts.size();
|
||||
size_t tri_num = mesh->num_triangles();
|
||||
int used_res = 1;
|
||||
|
||||
/* extract particle hair data - should be combined with connecting to mesh later*/
|
||||
@@ -1063,3 +1044,4 @@ void BlenderSync::sync_curves(Mesh *mesh,
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
|
@@ -27,7 +27,6 @@
|
||||
#include "subd_patch.h"
|
||||
#include "subd_split.h"
|
||||
|
||||
#include "util_algorithm.h"
|
||||
#include "util_foreach.h"
|
||||
#include "util_logging.h"
|
||||
#include "util_math.h"
|
||||
@@ -526,177 +525,69 @@ static void attr_create_uv_map(Scene *scene,
|
||||
}
|
||||
|
||||
/* Create vertex pointiness attributes. */
|
||||
|
||||
/* Compare vertices by sum of their coordinates. */
|
||||
class VertexAverageComparator {
|
||||
public:
|
||||
VertexAverageComparator(const array<float3>& verts)
|
||||
: verts_(verts) {
|
||||
}
|
||||
|
||||
bool operator()(const int& vert_idx_a, const int& vert_idx_b)
|
||||
{
|
||||
const float3 &vert_a = verts_[vert_idx_a];
|
||||
const float3 &vert_b = verts_[vert_idx_b];
|
||||
if(vert_a == vert_b) {
|
||||
/* Special case for doubles, so we ensure ordering. */
|
||||
return vert_idx_a > vert_idx_b;
|
||||
}
|
||||
const float x1 = vert_a.x + vert_a.y + vert_a.z;
|
||||
const float x2 = vert_b.x + vert_b.y + vert_b.z;
|
||||
return x1 < x2;
|
||||
}
|
||||
|
||||
protected:
|
||||
const array<float3>& verts_;
|
||||
};
|
||||
|
||||
static void attr_create_pointiness(Scene *scene,
|
||||
Mesh *mesh,
|
||||
BL::Mesh& b_mesh,
|
||||
bool subdivision)
|
||||
{
|
||||
if(!mesh->need_attribute(scene, ATTR_STD_POINTINESS)) {
|
||||
return;
|
||||
}
|
||||
const int num_verts = b_mesh.vertices.length();
|
||||
/* STEP 1: Find out duplicated vertices and point duplicates to a single
|
||||
* original vertex.
|
||||
*/
|
||||
vector<int> sorted_vert_indeices(num_verts);
|
||||
for(int vert_index = 0; vert_index < num_verts; ++vert_index) {
|
||||
sorted_vert_indeices[vert_index] = vert_index;
|
||||
}
|
||||
VertexAverageComparator compare(mesh->verts);
|
||||
sort(sorted_vert_indeices.begin(), sorted_vert_indeices.end(), compare);
|
||||
/* This array stores index of the original vertex for the given vertex
|
||||
* index.
|
||||
*/
|
||||
vector<int> vert_orig_index(num_verts);
|
||||
for(int sorted_vert_index = 0;
|
||||
sorted_vert_index < num_verts;
|
||||
++sorted_vert_index)
|
||||
{
|
||||
const int vert_index = sorted_vert_indeices[sorted_vert_index];
|
||||
const float3 &vert_co = mesh->verts[vert_index];
|
||||
bool found = false;
|
||||
for(int other_sorted_vert_index = sorted_vert_index + 1;
|
||||
other_sorted_vert_index < num_verts;
|
||||
++other_sorted_vert_index)
|
||||
{
|
||||
const int other_vert_index =
|
||||
sorted_vert_indeices[other_sorted_vert_index];
|
||||
const float3 &other_vert_co = mesh->verts[other_vert_index];
|
||||
/* We are too far away now, we wouldn't have duplicate. */
|
||||
if((other_vert_co.x + other_vert_co.y + other_vert_co.z) -
|
||||
(vert_co.x + vert_co.y + vert_co.z) > 3 * FLT_EPSILON)
|
||||
{
|
||||
break;
|
||||
if(mesh->need_attribute(scene, ATTR_STD_POINTINESS)) {
|
||||
const int numverts = b_mesh.vertices.length();
|
||||
AttributeSet& attributes = (subdivision)? mesh->subd_attributes: mesh->attributes;
|
||||
Attribute *attr = attributes.add(ATTR_STD_POINTINESS);
|
||||
float *data = attr->data_float();
|
||||
int *counter = new int[numverts];
|
||||
float *raw_data = new float[numverts];
|
||||
float3 *edge_accum = new float3[numverts];
|
||||
|
||||
/* Calculate pointiness using single ring neighborhood. */
|
||||
memset(counter, 0, sizeof(int) * numverts);
|
||||
memset(raw_data, 0, sizeof(float) * numverts);
|
||||
memset(edge_accum, 0, sizeof(float3) * numverts);
|
||||
BL::Mesh::edges_iterator e;
|
||||
int i = 0;
|
||||
for(b_mesh.edges.begin(e); e != b_mesh.edges.end(); ++e, ++i) {
|
||||
int v0 = b_mesh.edges[i].vertices()[0],
|
||||
v1 = b_mesh.edges[i].vertices()[1];
|
||||
float3 co0 = get_float3(b_mesh.vertices[v0].co()),
|
||||
co1 = get_float3(b_mesh.vertices[v1].co());
|
||||
float3 edge = normalize(co1 - co0);
|
||||
edge_accum[v0] += edge;
|
||||
edge_accum[v1] += -edge;
|
||||
++counter[v0];
|
||||
++counter[v1];
|
||||
}
|
||||
i = 0;
|
||||
BL::Mesh::vertices_iterator v;
|
||||
for(b_mesh.vertices.begin(v); v != b_mesh.vertices.end(); ++v, ++i) {
|
||||
if(counter[i] > 0) {
|
||||
float3 normal = get_float3(b_mesh.vertices[i].normal());
|
||||
float angle = safe_acosf(dot(normal, edge_accum[i] / counter[i]));
|
||||
raw_data[i] = angle * M_1_PI_F;
|
||||
}
|
||||
/* Found duplicate. */
|
||||
if(len_squared(other_vert_co - vert_co) < FLT_EPSILON) {
|
||||
found = true;
|
||||
vert_orig_index[vert_index] = other_vert_index;
|
||||
break;
|
||||
else {
|
||||
raw_data[i] = 0.0f;
|
||||
}
|
||||
}
|
||||
if(!found) {
|
||||
vert_orig_index[vert_index] = vert_index;
|
||||
|
||||
/* Blur vertices to approximate 2 ring neighborhood. */
|
||||
memset(counter, 0, sizeof(int) * numverts);
|
||||
memcpy(data, raw_data, sizeof(float) * numverts);
|
||||
i = 0;
|
||||
for(b_mesh.edges.begin(e); e != b_mesh.edges.end(); ++e, ++i) {
|
||||
int v0 = b_mesh.edges[i].vertices()[0],
|
||||
v1 = b_mesh.edges[i].vertices()[1];
|
||||
data[v0] += raw_data[v1];
|
||||
data[v1] += raw_data[v0];
|
||||
++counter[v0];
|
||||
++counter[v1];
|
||||
}
|
||||
}
|
||||
/* Make sure we always points to the very first orig vertex. */
|
||||
for(int vert_index = 0; vert_index < num_verts; ++vert_index) {
|
||||
int orig_index = vert_orig_index[vert_index];
|
||||
while(orig_index != vert_orig_index[orig_index]) {
|
||||
orig_index = vert_orig_index[orig_index];
|
||||
for(i = 0; i < numverts; ++i) {
|
||||
data[i] /= counter[i] + 1;
|
||||
}
|
||||
vert_orig_index[vert_index] = orig_index;
|
||||
}
|
||||
sorted_vert_indeices.free_memory();
|
||||
/* STEP 2: Calculate vertex normals taking into account their possible
|
||||
* duplicates which gets "welded" together.
|
||||
*/
|
||||
vector<float3> vert_normal(num_verts, make_float3(0.0f, 0.0f, 0.0f));
|
||||
/* First we accumulate all vertex normals in the original index. */
|
||||
for(int vert_index = 0; vert_index < num_verts; ++vert_index) {
|
||||
const float3 normal = get_float3(b_mesh.vertices[vert_index].normal());
|
||||
const int orig_index = vert_orig_index[vert_index];
|
||||
vert_normal[orig_index] += normal;
|
||||
}
|
||||
/* Then we normalize the accumulated result and flush it to all duplicates
|
||||
* as well.
|
||||
*/
|
||||
for(int vert_index = 0; vert_index < num_verts; ++vert_index) {
|
||||
const int orig_index = vert_orig_index[vert_index];
|
||||
vert_normal[vert_index] = normalize(vert_normal[orig_index]);
|
||||
}
|
||||
/* STEP 3: Calculate pointiness using single ring neighborhood. */
|
||||
vector<int> counter(num_verts, 0);
|
||||
vector<float> raw_data(num_verts, 0.0f);
|
||||
vector<float3> edge_accum(num_verts, make_float3(0.0f, 0.0f, 0.0f));
|
||||
BL::Mesh::edges_iterator e;
|
||||
EdgeMap visited_edges;
|
||||
int edge_index = 0;
|
||||
memset(&counter[0], 0, sizeof(int) * counter.size());
|
||||
for(b_mesh.edges.begin(e); e != b_mesh.edges.end(); ++e, ++edge_index) {
|
||||
const int v0 = vert_orig_index[b_mesh.edges[edge_index].vertices()[0]],
|
||||
v1 = vert_orig_index[b_mesh.edges[edge_index].vertices()[1]];
|
||||
if(visited_edges.exists(v0, v1)) {
|
||||
continue;
|
||||
}
|
||||
visited_edges.insert(v0, v1);
|
||||
float3 co0 = get_float3(b_mesh.vertices[v0].co()),
|
||||
co1 = get_float3(b_mesh.vertices[v1].co());
|
||||
float3 edge = normalize(co1 - co0);
|
||||
edge_accum[v0] += edge;
|
||||
edge_accum[v1] += -edge;
|
||||
++counter[v0];
|
||||
++counter[v1];
|
||||
}
|
||||
for(int vert_index = 0; vert_index < num_verts; ++vert_index) {
|
||||
const int orig_index = vert_orig_index[vert_index];
|
||||
if(orig_index != vert_index) {
|
||||
/* Skip duplicates, they'll be overwritten later on. */
|
||||
continue;
|
||||
}
|
||||
if(counter[vert_index] > 0) {
|
||||
const float3 normal = vert_normal[vert_index];
|
||||
const float angle =
|
||||
safe_acosf(dot(normal,
|
||||
edge_accum[vert_index] / counter[vert_index]));
|
||||
raw_data[vert_index] = angle * M_1_PI_F;
|
||||
}
|
||||
else {
|
||||
raw_data[vert_index] = 0.0f;
|
||||
}
|
||||
}
|
||||
/* STEP 3: Blur vertices to approximate 2 ring neighborhood. */
|
||||
AttributeSet& attributes = (subdivision)? mesh->subd_attributes: mesh->attributes;
|
||||
Attribute *attr = attributes.add(ATTR_STD_POINTINESS);
|
||||
float *data = attr->data_float();
|
||||
memcpy(data, &raw_data[0], sizeof(float) * raw_data.size());
|
||||
memset(&counter[0], 0, sizeof(int) * counter.size());
|
||||
edge_index = 0;
|
||||
visited_edges.clear();
|
||||
for(b_mesh.edges.begin(e); e != b_mesh.edges.end(); ++e, ++edge_index) {
|
||||
const int v0 = vert_orig_index[b_mesh.edges[edge_index].vertices()[0]],
|
||||
v1 = vert_orig_index[b_mesh.edges[edge_index].vertices()[1]];
|
||||
if(visited_edges.exists(v0, v1)) {
|
||||
continue;
|
||||
}
|
||||
visited_edges.insert(v0, v1);
|
||||
data[v0] += raw_data[v1];
|
||||
data[v1] += raw_data[v0];
|
||||
++counter[v0];
|
||||
++counter[v1];
|
||||
}
|
||||
for(int vert_index = 0; vert_index < num_verts; ++vert_index) {
|
||||
data[vert_index] /= counter[vert_index] + 1;
|
||||
}
|
||||
/* STEP 4: Copy attribute to the duplicated vertices. */
|
||||
for(int vert_index = 0; vert_index < num_verts; ++vert_index) {
|
||||
const int orig_index = vert_orig_index[vert_index];
|
||||
data[vert_index] = data[orig_index];
|
||||
|
||||
delete [] counter;
|
||||
delete [] raw_data;
|
||||
delete [] edge_accum;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -706,8 +597,8 @@ static void create_mesh(Scene *scene,
|
||||
Mesh *mesh,
|
||||
BL::Mesh& b_mesh,
|
||||
const vector<Shader*>& used_shaders,
|
||||
bool subdivision = false,
|
||||
bool subdivide_uvs = true)
|
||||
bool subdivision=false,
|
||||
bool subdivide_uvs=true)
|
||||
{
|
||||
/* count vertices and faces */
|
||||
int numverts = b_mesh.vertices.length();
|
||||
@@ -765,6 +656,9 @@ static void create_mesh(Scene *scene,
|
||||
generated[i++] = get_float3(v->undeformed_co())*size - loc;
|
||||
}
|
||||
|
||||
/* Create needed vertex attributes. */
|
||||
attr_create_pointiness(scene, mesh, b_mesh, subdivision);
|
||||
|
||||
/* create faces */
|
||||
vector<int> nverts(numfaces);
|
||||
vector<int> face_flags(numfaces, FACE_FLAG_NONE);
|
||||
@@ -777,19 +671,28 @@ static void create_mesh(Scene *scene,
|
||||
int shader = clamp(f->material_index(), 0, used_shaders.size()-1);
|
||||
bool smooth = f->use_smooth() || use_loop_normals;
|
||||
|
||||
/* split vertices if normal is different
|
||||
*
|
||||
* note all vertex attributes must have been set here so we can split
|
||||
* and copy attributes in split_vertex without remapping later */
|
||||
if(use_loop_normals) {
|
||||
BL::Array<float, 12> loop_normals = f->split_normals();
|
||||
|
||||
for(int i = 0; i < n; i++) {
|
||||
N[vi[i]] = make_float3(loop_normals[i * 3],
|
||||
loop_normals[i * 3 + 1],
|
||||
loop_normals[i * 3 + 2]);
|
||||
float3 loop_N = make_float3(loop_normals[i * 3], loop_normals[i * 3 + 1], loop_normals[i * 3 + 2]);
|
||||
|
||||
if(N[vi[i]] != loop_N) {
|
||||
int new_vi = mesh->split_vertex(vi[i]);
|
||||
|
||||
/* set new normal and vertex index */
|
||||
N = attr_N->data_float3();
|
||||
N[new_vi] = loop_N;
|
||||
vi[i] = new_vi;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Create triangles.
|
||||
*
|
||||
* NOTE: Autosmooth is already taken care about.
|
||||
*/
|
||||
/* create triangles */
|
||||
if(n == 4) {
|
||||
if(is_zero(cross(mesh->verts[vi[1]] - mesh->verts[vi[0]], mesh->verts[vi[2]] - mesh->verts[vi[0]])) ||
|
||||
is_zero(cross(mesh->verts[vi[2]] - mesh->verts[vi[0]], mesh->verts[vi[3]] - mesh->verts[vi[0]])))
|
||||
@@ -821,8 +724,24 @@ static void create_mesh(Scene *scene,
|
||||
|
||||
vi.reserve(n);
|
||||
for(int i = 0; i < n; i++) {
|
||||
/* NOTE: Autosmooth is already taken care about. */
|
||||
vi[i] = b_mesh.loops[p->loop_start() + i].vertex_index();
|
||||
|
||||
/* split vertices if normal is different
|
||||
*
|
||||
* note all vertex attributes must have been set here so we can split
|
||||
* and copy attributes in split_vertex without remapping later */
|
||||
if(use_loop_normals) {
|
||||
float3 loop_N = get_float3(b_mesh.loops[p->loop_start() + i].normal());
|
||||
|
||||
if(N[vi[i]] != loop_N) {
|
||||
int new_vi = mesh->split_vertex(vi[i]);
|
||||
|
||||
/* set new normal and vertex index */
|
||||
N = attr_N->data_float3();
|
||||
N[new_vi] = loop_N;
|
||||
vi[i] = new_vi;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* create subd faces */
|
||||
@@ -833,7 +752,6 @@ static void create_mesh(Scene *scene,
|
||||
/* Create all needed attributes.
|
||||
* The calculate functions will check whether they're needed or not.
|
||||
*/
|
||||
attr_create_pointiness(scene, mesh, b_mesh, subdivision);
|
||||
attr_create_vertex_color(scene, mesh, b_mesh, nverts, face_flags, subdivision);
|
||||
attr_create_uv_map(scene, mesh, b_mesh, nverts, face_flags, subdivision, subdivide_uvs);
|
||||
|
||||
@@ -1043,20 +961,7 @@ Mesh *BlenderSync::sync_mesh(BL::Object& b_ob,
|
||||
|
||||
mesh->subdivision_type = object_subdivision_type(b_ob, preview, experimental);
|
||||
|
||||
/* Disable adaptive subdivision while baking as the baking system
|
||||
* currently doesnt support the topology and will crash.
|
||||
*/
|
||||
if(scene->bake_manager->get_baking()) {
|
||||
mesh->subdivision_type = Mesh::SUBDIVISION_NONE;
|
||||
}
|
||||
|
||||
BL::Mesh b_mesh = object_to_mesh(b_data,
|
||||
b_ob,
|
||||
b_scene,
|
||||
true,
|
||||
!preview,
|
||||
need_undeformed,
|
||||
mesh->subdivision_type);
|
||||
BL::Mesh b_mesh = object_to_mesh(b_data, b_ob, b_scene, true, !preview, need_undeformed, mesh->subdivision_type);
|
||||
|
||||
if(b_mesh) {
|
||||
if(render_layer.use_surfaces && !hide_tris) {
|
||||
@@ -1181,13 +1086,7 @@ void BlenderSync::sync_mesh_motion(BL::Object& b_ob,
|
||||
|
||||
if(ccl::BKE_object_is_deform_modified(b_ob, b_scene, preview)) {
|
||||
/* get derived mesh */
|
||||
b_mesh = object_to_mesh(b_data,
|
||||
b_ob,
|
||||
b_scene,
|
||||
true,
|
||||
!preview,
|
||||
false,
|
||||
Mesh::SUBDIVISION_NONE);
|
||||
b_mesh = object_to_mesh(b_data, b_ob, b_scene, true, !preview, false, false);
|
||||
}
|
||||
|
||||
if(!b_mesh) {
|
||||
@@ -1258,12 +1157,10 @@ void BlenderSync::sync_mesh_motion(BL::Object& b_ob,
|
||||
{
|
||||
/* no motion, remove attributes again */
|
||||
if(b_mesh.vertices.length() != numverts) {
|
||||
VLOG(1) << "Topology differs, disabling motion blur for object "
|
||||
<< b_ob.name();
|
||||
VLOG(1) << "Topology differs, disabling motion blur.";
|
||||
}
|
||||
else {
|
||||
VLOG(1) << "No actual deformation motion for object "
|
||||
<< b_ob.name();
|
||||
VLOG(1) << "No actual deformation motion for object " << b_ob.name();
|
||||
}
|
||||
mesh->attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
if(attr_mN)
|
||||
@@ -1294,3 +1191,4 @@ void BlenderSync::sync_mesh_motion(BL::Object& b_ob,
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
|
@@ -25,7 +25,6 @@
|
||||
#include "particles.h"
|
||||
#include "shader.h"
|
||||
|
||||
#include "blender_object_cull.h"
|
||||
#include "blender_sync.h"
|
||||
#include "blender_util.h"
|
||||
|
||||
@@ -89,6 +88,143 @@ static uint object_ray_visibility(BL::Object& b_ob)
|
||||
return flag;
|
||||
}
|
||||
|
||||
/* Culling */
|
||||
|
||||
class BlenderObjectCulling
|
||||
{
|
||||
public:
|
||||
BlenderObjectCulling(Scene *scene, BL::Scene& b_scene)
|
||||
: use_scene_camera_cull(false),
|
||||
use_camera_cull(false),
|
||||
camera_cull_margin(0.0f),
|
||||
use_scene_distance_cull(false),
|
||||
use_distance_cull(false),
|
||||
distance_cull_margin(0.0f)
|
||||
{
|
||||
if(b_scene.render().use_simplify()) {
|
||||
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
|
||||
|
||||
use_scene_camera_cull = scene->camera->type != CAMERA_PANORAMA &&
|
||||
!b_scene.render().use_multiview() &&
|
||||
get_boolean(cscene, "use_camera_cull");
|
||||
use_scene_distance_cull = scene->camera->type != CAMERA_PANORAMA &&
|
||||
!b_scene.render().use_multiview() &&
|
||||
get_boolean(cscene, "use_distance_cull");
|
||||
|
||||
camera_cull_margin = get_float(cscene, "camera_cull_margin");
|
||||
distance_cull_margin = get_float(cscene, "distance_cull_margin");
|
||||
|
||||
if (distance_cull_margin == 0.0f) {
|
||||
use_scene_distance_cull = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void init_object(Scene *scene, BL::Object& b_ob)
|
||||
{
|
||||
if(!use_scene_camera_cull && !use_scene_distance_cull) {
|
||||
return;
|
||||
}
|
||||
|
||||
PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
|
||||
|
||||
use_camera_cull = use_scene_camera_cull && get_boolean(cobject, "use_camera_cull");
|
||||
use_distance_cull = use_scene_distance_cull && get_boolean(cobject, "use_distance_cull");
|
||||
|
||||
if(use_camera_cull || use_distance_cull) {
|
||||
/* Need to have proper projection matrix. */
|
||||
scene->camera->update();
|
||||
}
|
||||
}
|
||||
|
||||
bool test(Scene *scene, BL::Object& b_ob, Transform& tfm)
|
||||
{
|
||||
if(!use_camera_cull && !use_distance_cull) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Compute world space bounding box corners. */
|
||||
float3 bb[8];
|
||||
BL::Array<float, 24> boundbox = b_ob.bound_box();
|
||||
for(int i = 0; i < 8; ++i) {
|
||||
float3 p = make_float3(boundbox[3 * i + 0],
|
||||
boundbox[3 * i + 1],
|
||||
boundbox[3 * i + 2]);
|
||||
bb[i] = transform_point(&tfm, p);
|
||||
}
|
||||
|
||||
bool camera_culled = use_camera_cull && test_camera(scene, bb);
|
||||
bool distance_culled = use_distance_cull && test_distance(scene, bb);
|
||||
|
||||
return ((camera_culled && distance_culled) ||
|
||||
(camera_culled && !use_distance_cull) ||
|
||||
(distance_culled && !use_camera_cull));
|
||||
}
|
||||
|
||||
private:
|
||||
/* TODO(sergey): Not really optimal, consider approaches based on k-DOP in order
|
||||
* to reduce number of objects which are wrongly considered visible.
|
||||
*/
|
||||
bool test_camera(Scene *scene, float3 bb[8])
|
||||
{
|
||||
Camera *cam = scene->camera;
|
||||
Transform& worldtondc = cam->worldtondc;
|
||||
float3 bb_min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX),
|
||||
bb_max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
|
||||
bool all_behind = true;
|
||||
for(int i = 0; i < 8; ++i) {
|
||||
float3 p = bb[i];
|
||||
float4 b = make_float4(p.x, p.y, p.z, 1.0f);
|
||||
float4 c = make_float4(dot(worldtondc.x, b),
|
||||
dot(worldtondc.y, b),
|
||||
dot(worldtondc.z, b),
|
||||
dot(worldtondc.w, b));
|
||||
p = float4_to_float3(c / c.w);
|
||||
if(c.z < 0.0f) {
|
||||
p.x = 1.0f - p.x;
|
||||
p.y = 1.0f - p.y;
|
||||
}
|
||||
if(c.z >= -camera_cull_margin) {
|
||||
all_behind = false;
|
||||
}
|
||||
bb_min = min(bb_min, p);
|
||||
bb_max = max(bb_max, p);
|
||||
}
|
||||
if(all_behind) {
|
||||
return true;
|
||||
}
|
||||
return (bb_min.x >= 1.0f + camera_cull_margin ||
|
||||
bb_min.y >= 1.0f + camera_cull_margin ||
|
||||
bb_max.x <= -camera_cull_margin ||
|
||||
bb_max.y <= -camera_cull_margin);
|
||||
}
|
||||
|
||||
bool test_distance(Scene *scene, float3 bb[8])
|
||||
{
|
||||
float3 camera_position = transform_get_column(&scene->camera->matrix, 3);
|
||||
float3 bb_min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX),
|
||||
bb_max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
|
||||
|
||||
/* Find min & max points for x & y & z on bounding box */
|
||||
for(int i = 0; i < 8; ++i) {
|
||||
float3 p = bb[i];
|
||||
bb_min = min(bb_min, p);
|
||||
bb_max = max(bb_max, p);
|
||||
}
|
||||
|
||||
float3 closest_point = max(min(bb_max,camera_position),bb_min);
|
||||
return (len_squared(camera_position - closest_point) >
|
||||
distance_cull_margin * distance_cull_margin);
|
||||
}
|
||||
|
||||
bool use_scene_camera_cull;
|
||||
bool use_camera_cull;
|
||||
float camera_cull_margin;
|
||||
bool use_scene_distance_cull;
|
||||
bool use_distance_cull;
|
||||
float distance_cull_margin;
|
||||
};
|
||||
|
||||
/* Light */
|
||||
|
||||
void BlenderSync::sync_light(BL::Object& b_parent,
|
||||
|
@@ -1,149 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011-2016 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
#include "camera.h"
|
||||
|
||||
#include "blender_object_cull.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
BlenderObjectCulling::BlenderObjectCulling(Scene *scene, BL::Scene& b_scene)
|
||||
: use_scene_camera_cull_(false),
|
||||
use_camera_cull_(false),
|
||||
camera_cull_margin_(0.0f),
|
||||
use_scene_distance_cull_(false),
|
||||
use_distance_cull_(false),
|
||||
distance_cull_margin_(0.0f)
|
||||
{
|
||||
if(b_scene.render().use_simplify()) {
|
||||
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
|
||||
|
||||
use_scene_camera_cull_ = scene->camera->type != CAMERA_PANORAMA &&
|
||||
!b_scene.render().use_multiview() &&
|
||||
get_boolean(cscene, "use_camera_cull");
|
||||
use_scene_distance_cull_ = scene->camera->type != CAMERA_PANORAMA &&
|
||||
!b_scene.render().use_multiview() &&
|
||||
get_boolean(cscene, "use_distance_cull");
|
||||
|
||||
camera_cull_margin_ = get_float(cscene, "camera_cull_margin");
|
||||
distance_cull_margin_ = get_float(cscene, "distance_cull_margin");
|
||||
|
||||
if(distance_cull_margin_ == 0.0f) {
|
||||
use_scene_distance_cull_ = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BlenderObjectCulling::init_object(Scene *scene, BL::Object& b_ob)
|
||||
{
|
||||
if(!use_scene_camera_cull_ && !use_scene_distance_cull_) {
|
||||
return;
|
||||
}
|
||||
|
||||
PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
|
||||
|
||||
use_camera_cull_ = use_scene_camera_cull_ && get_boolean(cobject, "use_camera_cull");
|
||||
use_distance_cull_ = use_scene_distance_cull_ && get_boolean(cobject, "use_distance_cull");
|
||||
|
||||
if(use_camera_cull_ || use_distance_cull_) {
|
||||
/* Need to have proper projection matrix. */
|
||||
scene->camera->update();
|
||||
}
|
||||
}
|
||||
|
||||
bool BlenderObjectCulling::test(Scene *scene, BL::Object& b_ob, Transform& tfm)
|
||||
{
|
||||
if(!use_camera_cull_ && !use_distance_cull_) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Compute world space bounding box corners. */
|
||||
float3 bb[8];
|
||||
BL::Array<float, 24> boundbox = b_ob.bound_box();
|
||||
for(int i = 0; i < 8; ++i) {
|
||||
float3 p = make_float3(boundbox[3 * i + 0],
|
||||
boundbox[3 * i + 1],
|
||||
boundbox[3 * i + 2]);
|
||||
bb[i] = transform_point(&tfm, p);
|
||||
}
|
||||
|
||||
bool camera_culled = use_camera_cull_ && test_camera(scene, bb);
|
||||
bool distance_culled = use_distance_cull_ && test_distance(scene, bb);
|
||||
|
||||
return ((camera_culled && distance_culled) ||
|
||||
(camera_culled && !use_distance_cull_) ||
|
||||
(distance_culled && !use_camera_cull_));
|
||||
}
|
||||
|
||||
/* TODO(sergey): Not really optimal, consider approaches based on k-DOP in order
|
||||
* to reduce number of objects which are wrongly considered visible.
|
||||
*/
|
||||
bool BlenderObjectCulling::test_camera(Scene *scene, float3 bb[8])
|
||||
{
|
||||
Camera *cam = scene->camera;
|
||||
Transform& worldtondc = cam->worldtondc;
|
||||
float3 bb_min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX),
|
||||
bb_max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
|
||||
bool all_behind = true;
|
||||
for(int i = 0; i < 8; ++i) {
|
||||
float3 p = bb[i];
|
||||
float4 b = make_float4(p.x, p.y, p.z, 1.0f);
|
||||
float4 c = make_float4(dot(worldtondc.x, b),
|
||||
dot(worldtondc.y, b),
|
||||
dot(worldtondc.z, b),
|
||||
dot(worldtondc.w, b));
|
||||
p = float4_to_float3(c / c.w);
|
||||
if(c.z < 0.0f) {
|
||||
p.x = 1.0f - p.x;
|
||||
p.y = 1.0f - p.y;
|
||||
}
|
||||
if(c.z >= -camera_cull_margin_) {
|
||||
all_behind = false;
|
||||
}
|
||||
bb_min = min(bb_min, p);
|
||||
bb_max = max(bb_max, p);
|
||||
}
|
||||
if(all_behind) {
|
||||
return true;
|
||||
}
|
||||
return (bb_min.x >= 1.0f + camera_cull_margin_ ||
|
||||
bb_min.y >= 1.0f + camera_cull_margin_ ||
|
||||
bb_max.x <= -camera_cull_margin_ ||
|
||||
bb_max.y <= -camera_cull_margin_);
|
||||
}
|
||||
|
||||
bool BlenderObjectCulling::test_distance(Scene *scene, float3 bb[8])
|
||||
{
|
||||
float3 camera_position = transform_get_column(&scene->camera->matrix, 3);
|
||||
float3 bb_min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX),
|
||||
bb_max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
|
||||
|
||||
/* Find min & max points for x & y & z on bounding box */
|
||||
for(int i = 0; i < 8; ++i) {
|
||||
float3 p = bb[i];
|
||||
bb_min = min(bb_min, p);
|
||||
bb_max = max(bb_max, p);
|
||||
}
|
||||
|
||||
float3 closest_point = max(min(bb_max,camera_position),bb_min);
|
||||
return (len_squared(camera_position - closest_point) >
|
||||
distance_cull_margin_ * distance_cull_margin_);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -1,49 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011-2016 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef __BLENDER_OBJECT_CULL_H__
|
||||
#define __BLENDER_OBJECT_CULL_H__
|
||||
|
||||
#include "blender_sync.h"
|
||||
#include "util_types.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class Scene;
|
||||
|
||||
class BlenderObjectCulling
|
||||
{
|
||||
public:
|
||||
BlenderObjectCulling(Scene *scene, BL::Scene& b_scene);
|
||||
|
||||
void init_object(Scene *scene, BL::Object& b_ob);
|
||||
bool test(Scene *scene, BL::Object& b_ob, Transform& tfm);
|
||||
|
||||
private:
|
||||
bool test_camera(Scene *scene, float3 bb[8]);
|
||||
bool test_distance(Scene *scene, float3 bb[8]);
|
||||
|
||||
bool use_scene_camera_cull_;
|
||||
bool use_camera_cull_;
|
||||
float camera_cull_margin_;
|
||||
bool use_scene_distance_cull_;
|
||||
bool use_distance_cull_;
|
||||
float distance_cull_margin_;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* __BLENDER_OBJECT_CULL_H__ */
|
@@ -67,10 +67,8 @@ bool debug_flags_sync_from_scene(BL::Scene b_scene)
|
||||
flags.cpu.sse3 = get_boolean(cscene, "debug_use_cpu_sse3");
|
||||
flags.cpu.sse2 = get_boolean(cscene, "debug_use_cpu_sse2");
|
||||
flags.cpu.qbvh = get_boolean(cscene, "debug_use_qbvh");
|
||||
flags.cpu.split_kernel = get_boolean(cscene, "debug_use_cpu_split_kernel");
|
||||
/* Synchronize CUDA flags. */
|
||||
flags.cuda.adaptive_compile = get_boolean(cscene, "debug_use_cuda_adaptive_compile");
|
||||
flags.cuda.split_kernel = get_boolean(cscene, "debug_use_cuda_split_kernel");
|
||||
/* Synchronize OpenCL kernel type. */
|
||||
switch(get_enum(cscene, "debug_opencl_kernel_type")) {
|
||||
case 0:
|
||||
@@ -106,7 +104,6 @@ bool debug_flags_sync_from_scene(BL::Scene b_scene)
|
||||
}
|
||||
/* Synchronize other OpenCL flags. */
|
||||
flags.opencl.debug = get_boolean(cscene, "debug_use_opencl_debug");
|
||||
flags.opencl.single_program = get_boolean(cscene, "debug_opencl_kernel_single_program");
|
||||
return flags.opencl.device_type != opencl_device_type ||
|
||||
flags.opencl.kernel_type != opencl_kernel_type;
|
||||
}
|
||||
@@ -644,7 +641,7 @@ static PyObject *debug_flags_reset_func(PyObject * /*self*/, PyObject * /*args*/
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
static PyObject *set_resumable_chunk_func(PyObject * /*self*/, PyObject *args)
|
||||
static PyObject *set_resumable_chunks_func(PyObject * /*self*/, PyObject *args)
|
||||
{
|
||||
int num_resumable_chunks, current_resumable_chunk;
|
||||
if(!PyArg_ParseTuple(args, "ii",
|
||||
@@ -679,53 +676,6 @@ static PyObject *set_resumable_chunk_func(PyObject * /*self*/, PyObject *args)
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
static PyObject *set_resumable_chunk_range_func(PyObject * /*self*/, PyObject *args)
|
||||
{
|
||||
int num_chunks, start_chunk, end_chunk;
|
||||
if(!PyArg_ParseTuple(args, "iii",
|
||||
&num_chunks,
|
||||
&start_chunk,
|
||||
&end_chunk)) {
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
if(num_chunks <= 0) {
|
||||
fprintf(stderr, "Cycles: Bad value for number of resumable chunks.\n");
|
||||
abort();
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
if(start_chunk < 1 || start_chunk > num_chunks) {
|
||||
fprintf(stderr, "Cycles: Bad value for start chunk number.\n");
|
||||
abort();
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
if(end_chunk < 1 || end_chunk > num_chunks) {
|
||||
fprintf(stderr, "Cycles: Bad value for start chunk number.\n");
|
||||
abort();
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
if(start_chunk > end_chunk) {
|
||||
fprintf(stderr, "Cycles: End chunk should be higher than start one.\n");
|
||||
abort();
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
VLOG(1) << "Initialized resumable render: "
|
||||
<< "num_resumable_chunks=" << num_chunks << ", "
|
||||
<< "start_resumable_chunk=" << start_chunk
|
||||
<< "end_resumable_chunk=" << end_chunk;
|
||||
BlenderSession::num_resumable_chunks = num_chunks;
|
||||
BlenderSession::start_resumable_chunk = start_chunk;
|
||||
BlenderSession::end_resumable_chunk = end_chunk;
|
||||
|
||||
printf("Cycles: Will render chunks %d to %d of %d\n",
|
||||
start_chunk,
|
||||
end_chunk,
|
||||
num_chunks);
|
||||
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
static PyObject *get_device_types_func(PyObject * /*self*/, PyObject * /*args*/)
|
||||
{
|
||||
vector<DeviceInfo>& devices = Device::available_devices();
|
||||
@@ -765,8 +715,7 @@ static PyMethodDef methods[] = {
|
||||
{"debug_flags_reset", debug_flags_reset_func, METH_NOARGS, ""},
|
||||
|
||||
/* Resumable render */
|
||||
{"set_resumable_chunk", set_resumable_chunk_func, METH_VARARGS, ""},
|
||||
{"set_resumable_chunk_range", set_resumable_chunk_range_func, METH_VARARGS, ""},
|
||||
{"set_resumable_chunks", set_resumable_chunks_func, METH_VARARGS, ""},
|
||||
|
||||
/* Compute Device selection */
|
||||
{"get_device_types", get_device_types_func, METH_VARARGS, ""},
|
||||
|
@@ -46,8 +46,6 @@ CCL_NAMESPACE_BEGIN
|
||||
bool BlenderSession::headless = false;
|
||||
int BlenderSession::num_resumable_chunks = 0;
|
||||
int BlenderSession::current_resumable_chunk = 0;
|
||||
int BlenderSession::start_resumable_chunk = 0;
|
||||
int BlenderSession::end_resumable_chunk = 0;
|
||||
|
||||
BlenderSession::BlenderSession(BL::RenderEngine& b_engine,
|
||||
BL::UserPreferences& b_userpref,
|
||||
@@ -70,7 +68,6 @@ BlenderSession::BlenderSession(BL::RenderEngine& b_engine,
|
||||
background = true;
|
||||
last_redraw_time = 0.0;
|
||||
start_resize_time = 0.0;
|
||||
last_status_time = 0.0;
|
||||
}
|
||||
|
||||
BlenderSession::BlenderSession(BL::RenderEngine& b_engine,
|
||||
@@ -96,7 +93,6 @@ BlenderSession::BlenderSession(BL::RenderEngine& b_engine,
|
||||
background = false;
|
||||
last_redraw_time = 0.0;
|
||||
start_resize_time = 0.0;
|
||||
last_status_time = 0.0;
|
||||
}
|
||||
|
||||
BlenderSession::~BlenderSession()
|
||||
@@ -130,8 +126,8 @@ void BlenderSession::create_session()
|
||||
|
||||
/* setup callbacks for builtin image support */
|
||||
scene->image_manager->builtin_image_info_cb = function_bind(&BlenderSession::builtin_image_info, this, _1, _2, _3, _4, _5, _6, _7);
|
||||
scene->image_manager->builtin_image_pixels_cb = function_bind(&BlenderSession::builtin_image_pixels, this, _1, _2, _3, _4);
|
||||
scene->image_manager->builtin_image_float_pixels_cb = function_bind(&BlenderSession::builtin_image_float_pixels, this, _1, _2, _3, _4);
|
||||
scene->image_manager->builtin_image_pixels_cb = function_bind(&BlenderSession::builtin_image_pixels, this, _1, _2, _3);
|
||||
scene->image_manager->builtin_image_float_pixels_cb = function_bind(&BlenderSession::builtin_image_float_pixels, this, _1, _2, _3);
|
||||
|
||||
/* create session */
|
||||
session = new Session(session_params);
|
||||
@@ -309,16 +305,12 @@ static PassType get_pass_type(BL::RenderPass& b_pass)
|
||||
#ifdef WITH_CYCLES_DEBUG
|
||||
case BL::RenderPass::type_DEBUG:
|
||||
{
|
||||
switch(b_pass.debug_type()) {
|
||||
case BL::RenderPass::debug_type_BVH_TRAVERSED_NODES:
|
||||
return PASS_BVH_TRAVERSED_NODES;
|
||||
case BL::RenderPass::debug_type_BVH_TRAVERSED_INSTANCES:
|
||||
return PASS_BVH_TRAVERSED_INSTANCES;
|
||||
case BL::RenderPass::debug_type_BVH_INTERSECTIONS:
|
||||
return PASS_BVH_INTERSECTIONS;
|
||||
case BL::RenderPass::debug_type_RAY_BOUNCES:
|
||||
return PASS_RAY_BOUNCES;
|
||||
}
|
||||
if(b_pass.debug_type() == BL::RenderPass::debug_type_BVH_TRAVERSAL_STEPS)
|
||||
return PASS_BVH_TRAVERSAL_STEPS;
|
||||
if(b_pass.debug_type() == BL::RenderPass::debug_type_BVH_TRAVERSED_INSTANCES)
|
||||
return PASS_BVH_TRAVERSED_INSTANCES;
|
||||
if(b_pass.debug_type() == BL::RenderPass::debug_type_RAY_BOUNCES)
|
||||
return PASS_RAY_BOUNCES;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
@@ -588,7 +580,7 @@ static void populate_bake_data(BakeData *data, const
|
||||
BL::BakePixel bp = pixel_array;
|
||||
|
||||
int i;
|
||||
for(i = 0; i < num_pixels; i++) {
|
||||
for(i=0; i < num_pixels; i++) {
|
||||
if(bp.object_id() == object_id) {
|
||||
data->set(i, bp.primitive_id(), bp.uv(), bp.du_dx(), bp.du_dy(), bp.dv_dx(), bp.dv_dy());
|
||||
} else {
|
||||
@@ -938,13 +930,38 @@ void BlenderSession::get_status(string& status, string& substatus)
|
||||
|
||||
void BlenderSession::get_progress(float& progress, double& total_time, double& render_time)
|
||||
{
|
||||
session->progress.get_time(total_time, render_time);
|
||||
progress = session->progress.get_progress();
|
||||
double tile_time;
|
||||
int tile, sample, samples_per_tile;
|
||||
int tile_total = session->tile_manager.state.num_tiles;
|
||||
int samples = session->tile_manager.state.sample + 1;
|
||||
int total_samples = session->tile_manager.get_num_effective_samples();
|
||||
|
||||
session->progress.get_tile(tile, total_time, render_time, tile_time);
|
||||
|
||||
sample = session->progress.get_sample();
|
||||
samples_per_tile = session->tile_manager.get_num_effective_samples();
|
||||
|
||||
if(background && samples_per_tile && tile_total)
|
||||
progress = ((float)sample / (float)(tile_total * samples_per_tile));
|
||||
else if(!background && samples > 0 && total_samples != INT_MAX)
|
||||
progress = ((float)samples) / total_samples;
|
||||
else
|
||||
progress = 0.0;
|
||||
}
|
||||
|
||||
void BlenderSession::update_bake_progress()
|
||||
{
|
||||
float progress = session->progress.get_progress();
|
||||
float progress;
|
||||
int sample, samples_per_task, parts_total;
|
||||
|
||||
sample = session->progress.get_sample();
|
||||
samples_per_task = scene->bake_manager->num_samples;
|
||||
parts_total = scene->bake_manager->num_parts;
|
||||
|
||||
if(samples_per_task)
|
||||
progress = ((float)sample / (float)(parts_total * samples_per_task));
|
||||
else
|
||||
progress = 0.0;
|
||||
|
||||
if(progress != last_progress) {
|
||||
b_engine.update_progress(progress);
|
||||
@@ -993,14 +1010,10 @@ void BlenderSession::update_status_progress()
|
||||
if(substatus.size() > 0)
|
||||
status += " | " + substatus;
|
||||
|
||||
double current_time = time_dt();
|
||||
/* When rendering in a window, redraw the status at least once per second to keep the elapsed and remaining time up-to-date.
|
||||
* For headless rendering, only report when something significant changes to keep the console output readable. */
|
||||
if(status != last_status || (!headless && (current_time - last_status_time) > 1.0)) {
|
||||
if(status != last_status) {
|
||||
b_engine.update_stats("", (timestatus + scene + status).c_str());
|
||||
b_engine.update_memory_stats(mem_used, mem_peak);
|
||||
last_status = status;
|
||||
last_status_time = current_time;
|
||||
}
|
||||
if(progress != last_progress) {
|
||||
b_engine.update_progress(progress);
|
||||
@@ -1067,13 +1080,7 @@ int BlenderSession::builtin_image_frame(const string &builtin_name)
|
||||
return atoi(builtin_name.substr(last + 1, builtin_name.size() - last - 1).c_str());
|
||||
}
|
||||
|
||||
void BlenderSession::builtin_image_info(const string &builtin_name,
|
||||
void *builtin_data,
|
||||
bool &is_float,
|
||||
int &width,
|
||||
int &height,
|
||||
int &depth,
|
||||
int &channels)
|
||||
void BlenderSession::builtin_image_info(const string &builtin_name, void *builtin_data, bool &is_float, int &width, int &height, int &depth, int &channels)
|
||||
{
|
||||
/* empty image */
|
||||
is_float = false;
|
||||
@@ -1151,67 +1158,60 @@ void BlenderSession::builtin_image_info(const string &builtin_name,
|
||||
}
|
||||
}
|
||||
|
||||
bool BlenderSession::builtin_image_pixels(const string &builtin_name,
|
||||
void *builtin_data,
|
||||
unsigned char *pixels,
|
||||
const size_t pixels_size)
|
||||
bool BlenderSession::builtin_image_pixels(const string &builtin_name, void *builtin_data, unsigned char *pixels)
|
||||
{
|
||||
if(!builtin_data) {
|
||||
if(!builtin_data)
|
||||
return false;
|
||||
}
|
||||
|
||||
const int frame = builtin_image_frame(builtin_name);
|
||||
int frame = builtin_image_frame(builtin_name);
|
||||
|
||||
PointerRNA ptr;
|
||||
RNA_id_pointer_create((ID*)builtin_data, &ptr);
|
||||
BL::Image b_image(ptr);
|
||||
|
||||
const int width = b_image.size()[0];
|
||||
const int height = b_image.size()[1];
|
||||
const int channels = b_image.channels();
|
||||
int width = b_image.size()[0];
|
||||
int height = b_image.size()[1];
|
||||
int channels = b_image.channels();
|
||||
|
||||
unsigned char *image_pixels = image_get_pixels_for_frame(b_image, frame);
|
||||
const size_t num_pixels = ((size_t)width) * height;
|
||||
unsigned char *image_pixels;
|
||||
image_pixels = image_get_pixels_for_frame(b_image, frame);
|
||||
size_t num_pixels = ((size_t)width) * height;
|
||||
|
||||
if(image_pixels && num_pixels * channels == pixels_size) {
|
||||
memcpy(pixels, image_pixels, pixels_size * sizeof(unsigned char));
|
||||
if(image_pixels) {
|
||||
memcpy(pixels, image_pixels, num_pixels * channels * sizeof(unsigned char));
|
||||
MEM_freeN(image_pixels);
|
||||
}
|
||||
else {
|
||||
if(channels == 1) {
|
||||
memset(pixels, 0, pixels_size * sizeof(unsigned char));
|
||||
memset(pixels, 0, num_pixels * sizeof(unsigned char));
|
||||
}
|
||||
else {
|
||||
const size_t num_pixels_safe = pixels_size / channels;
|
||||
unsigned char *cp = pixels;
|
||||
for(size_t i = 0; i < num_pixels_safe; i++, cp += channels) {
|
||||
for(size_t i = 0; i < num_pixels; i++, cp += channels) {
|
||||
cp[0] = 255;
|
||||
cp[1] = 0;
|
||||
cp[2] = 255;
|
||||
if(channels == 4) {
|
||||
if(channels == 4)
|
||||
cp[3] = 255;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Premultiply, byte images are always straight for Blender. */
|
||||
|
||||
/* premultiply, byte images are always straight for blender */
|
||||
unsigned char *cp = pixels;
|
||||
for(size_t i = 0; i < num_pixels; i++, cp += channels) {
|
||||
cp[0] = (cp[0] * cp[3]) >> 8;
|
||||
cp[1] = (cp[1] * cp[3]) >> 8;
|
||||
cp[2] = (cp[2] * cp[3]) >> 8;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BlenderSession::builtin_image_float_pixels(const string &builtin_name,
|
||||
void *builtin_data,
|
||||
float *pixels,
|
||||
const size_t pixels_size)
|
||||
bool BlenderSession::builtin_image_float_pixels(const string &builtin_name, void *builtin_data, float *pixels)
|
||||
{
|
||||
if(!builtin_data) {
|
||||
if(!builtin_data)
|
||||
return false;
|
||||
}
|
||||
|
||||
PointerRNA ptr;
|
||||
RNA_id_pointer_create((ID*)builtin_data, &ptr);
|
||||
@@ -1222,16 +1222,16 @@ bool BlenderSession::builtin_image_float_pixels(const string &builtin_name,
|
||||
BL::Image b_image(b_id);
|
||||
int frame = builtin_image_frame(builtin_name);
|
||||
|
||||
const int width = b_image.size()[0];
|
||||
const int height = b_image.size()[1];
|
||||
const int channels = b_image.channels();
|
||||
int width = b_image.size()[0];
|
||||
int height = b_image.size()[1];
|
||||
int channels = b_image.channels();
|
||||
|
||||
float *image_pixels;
|
||||
image_pixels = image_get_float_pixels_for_frame(b_image, frame);
|
||||
const size_t num_pixels = ((size_t)width) * height;
|
||||
size_t num_pixels = ((size_t)width) * height;
|
||||
|
||||
if(image_pixels && num_pixels * channels == pixels_size) {
|
||||
memcpy(pixels, image_pixels, pixels_size * sizeof(float));
|
||||
if(image_pixels) {
|
||||
memcpy(pixels, image_pixels, num_pixels * channels * sizeof(float));
|
||||
MEM_freeN(image_pixels);
|
||||
}
|
||||
else {
|
||||
@@ -1239,15 +1239,13 @@ bool BlenderSession::builtin_image_float_pixels(const string &builtin_name,
|
||||
memset(pixels, 0, num_pixels * sizeof(float));
|
||||
}
|
||||
else {
|
||||
const size_t num_pixels_safe = pixels_size / channels;
|
||||
float *fp = pixels;
|
||||
for(int i = 0; i < num_pixels_safe; i++, fp += channels) {
|
||||
for(int i = 0; i < num_pixels; i++, fp += channels) {
|
||||
fp[0] = 1.0f;
|
||||
fp[1] = 0.0f;
|
||||
fp[2] = 1.0f;
|
||||
if(channels == 4) {
|
||||
if(channels == 4)
|
||||
fp[3] = 1.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1259,9 +1257,8 @@ bool BlenderSession::builtin_image_float_pixels(const string &builtin_name,
|
||||
BL::Object b_ob(b_id);
|
||||
BL::SmokeDomainSettings b_domain = object_smoke_domain_find(b_ob);
|
||||
|
||||
if(!b_domain) {
|
||||
if(!b_domain)
|
||||
return false;
|
||||
}
|
||||
|
||||
int3 resolution = get_int3(b_domain.domain_resolution());
|
||||
int length, amplify = (b_domain.use_high_resolution())? b_domain.amplify() + 1: 1;
|
||||
@@ -1273,10 +1270,10 @@ bool BlenderSession::builtin_image_float_pixels(const string &builtin_name,
|
||||
amplify = 1;
|
||||
}
|
||||
|
||||
const int width = resolution.x * amplify;
|
||||
const int height = resolution.y * amplify;
|
||||
const int depth = resolution.z * amplify;
|
||||
const size_t num_pixels = ((size_t)width) * height * depth;
|
||||
int width = resolution.x * amplify;
|
||||
int height = resolution.y * amplify;
|
||||
int depth = resolution.z * amplify;
|
||||
size_t num_pixels = ((size_t)width) * height * depth;
|
||||
|
||||
if(builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_DENSITY)) {
|
||||
SmokeDomainSettings_density_grid_get_length(&b_domain.ptr, &length);
|
||||
@@ -1350,21 +1347,9 @@ void BlenderSession::update_resumable_tile_manager(int num_samples)
|
||||
return;
|
||||
}
|
||||
|
||||
const int num_samples_per_chunk = (int)ceilf((float)num_samples / num_resumable_chunks);
|
||||
|
||||
int range_start_sample, range_num_samples;
|
||||
if(current_resumable_chunk != 0) {
|
||||
/* Single chunk rendering. */
|
||||
range_start_sample = num_samples_per_chunk * (current_resumable_chunk - 1);
|
||||
range_num_samples = num_samples_per_chunk;
|
||||
}
|
||||
else {
|
||||
/* Ranged-chunks. */
|
||||
const int num_chunks = end_resumable_chunk - start_resumable_chunk + 1;
|
||||
range_start_sample = num_samples_per_chunk * (start_resumable_chunk - 1);
|
||||
range_num_samples = num_chunks * num_samples_per_chunk;
|
||||
}
|
||||
/* Make sure we don't overshoot. */
|
||||
int num_samples_per_chunk = (int)ceilf((float)num_samples / num_resumable_chunks);
|
||||
int range_start_sample = num_samples_per_chunk * (current_resumable_chunk - 1);
|
||||
int range_num_samples = num_samples_per_chunk;
|
||||
if(range_start_sample + range_num_samples > num_samples) {
|
||||
range_num_samples = num_samples - range_num_samples;
|
||||
}
|
||||
@@ -1372,9 +1357,6 @@ void BlenderSession::update_resumable_tile_manager(int num_samples)
|
||||
VLOG(1) << "Samples range start is " << range_start_sample << ", "
|
||||
<< "number of samples to render is " << range_num_samples;
|
||||
|
||||
scene->integrator->start_sample = range_start_sample;
|
||||
scene->integrator->tag_update(scene);
|
||||
|
||||
session->tile_manager.range_start_sample = range_start_sample;
|
||||
session->tile_manager.range_num_samples = range_num_samples;
|
||||
}
|
||||
|
@@ -113,7 +113,6 @@ public:
|
||||
string last_status;
|
||||
string last_error;
|
||||
float last_progress;
|
||||
double last_status_time;
|
||||
|
||||
int width, height;
|
||||
double start_resize_time;
|
||||
@@ -138,10 +137,6 @@ public:
|
||||
/* Current resumable chunk index to render. */
|
||||
static int current_resumable_chunk;
|
||||
|
||||
/* Alternative to single-chunk rendering to render a range of chunks. */
|
||||
static int start_resumable_chunk;
|
||||
static int end_resumable_chunk;
|
||||
|
||||
protected:
|
||||
void do_write_update_render_result(BL::RenderResult& b_rr,
|
||||
BL::RenderLayer& b_rlay,
|
||||
@@ -150,21 +145,9 @@ protected:
|
||||
void do_write_update_render_tile(RenderTile& rtile, bool do_update_only);
|
||||
|
||||
int builtin_image_frame(const string &builtin_name);
|
||||
void builtin_image_info(const string &builtin_name,
|
||||
void *builtin_data,
|
||||
bool &is_float,
|
||||
int &width,
|
||||
int &height,
|
||||
int &depth,
|
||||
int &channels);
|
||||
bool builtin_image_pixels(const string &builtin_name,
|
||||
void *builtin_data,
|
||||
unsigned char *pixels,
|
||||
const size_t pixels_size);
|
||||
bool builtin_image_float_pixels(const string &builtin_name,
|
||||
void *builtin_data,
|
||||
float *pixels,
|
||||
const size_t pixels_size);
|
||||
void builtin_image_info(const string &builtin_name, void *builtin_data, bool &is_float, int &width, int &height, int &depth, int &channels);
|
||||
bool builtin_image_pixels(const string &builtin_name, void *builtin_data, unsigned char *pixels);
|
||||
bool builtin_image_float_pixels(const string &builtin_name, void *builtin_data, float *pixels);
|
||||
|
||||
/* Update tile manager to reflect resumable render settings. */
|
||||
void update_resumable_tile_manager(int num_samples);
|
||||
|
@@ -609,8 +609,7 @@ static ShaderNode *add_node(Scene *scene,
|
||||
bool is_builtin = b_image.packed_file() ||
|
||||
b_image.source() == BL::Image::source_GENERATED ||
|
||||
b_image.source() == BL::Image::source_MOVIE ||
|
||||
(b_engine.is_preview() &&
|
||||
b_image.source() != BL::Image::source_SEQUENCE);
|
||||
b_engine.is_preview();
|
||||
|
||||
if(is_builtin) {
|
||||
/* for builtin images we're using image datablock name to find an image to
|
||||
@@ -641,8 +640,7 @@ static ShaderNode *add_node(Scene *scene,
|
||||
image->filename.string(),
|
||||
image->builtin_data,
|
||||
get_image_interpolation(b_image_node),
|
||||
get_image_extension(b_image_node),
|
||||
image->use_alpha);
|
||||
get_image_extension(b_image_node));
|
||||
}
|
||||
}
|
||||
image->color_space = (NodeImageColorSpace)b_image_node.color_space();
|
||||
@@ -663,8 +661,7 @@ static ShaderNode *add_node(Scene *scene,
|
||||
bool is_builtin = b_image.packed_file() ||
|
||||
b_image.source() == BL::Image::source_GENERATED ||
|
||||
b_image.source() == BL::Image::source_MOVIE ||
|
||||
(b_engine.is_preview() &&
|
||||
b_image.source() != BL::Image::source_SEQUENCE);
|
||||
b_engine.is_preview();
|
||||
|
||||
if(is_builtin) {
|
||||
int scene_frame = b_scene.frame_current();
|
||||
@@ -689,8 +686,7 @@ static ShaderNode *add_node(Scene *scene,
|
||||
env->filename.string(),
|
||||
env->builtin_data,
|
||||
get_image_interpolation(b_env_node),
|
||||
EXTENSION_REPEAT,
|
||||
env->use_alpha);
|
||||
EXTENSION_REPEAT);
|
||||
}
|
||||
}
|
||||
env->color_space = (NodeImageColorSpace)b_env_node.color_space();
|
||||
@@ -827,8 +823,7 @@ static ShaderNode *add_node(Scene *scene,
|
||||
point_density->filename.string(),
|
||||
point_density->builtin_data,
|
||||
point_density->interpolation,
|
||||
EXTENSION_CLIP,
|
||||
true);
|
||||
EXTENSION_CLIP);
|
||||
}
|
||||
node = point_density;
|
||||
|
||||
|
@@ -322,15 +322,6 @@ void BlenderSync::sync_integrator()
|
||||
integrator->volume_samples = volume_samples;
|
||||
}
|
||||
|
||||
if(b_scene.render().use_simplify()) {
|
||||
if(preview) {
|
||||
integrator->ao_bounces = get_int(cscene, "ao_bounces");
|
||||
}
|
||||
else {
|
||||
integrator->ao_bounces = get_int(cscene, "ao_bounces_render");
|
||||
}
|
||||
}
|
||||
|
||||
if(integrator->modified(previntegrator))
|
||||
integrator->tag_update(scene);
|
||||
}
|
||||
@@ -507,7 +498,6 @@ SceneParams BlenderSync::get_scene_params(BL::Scene& b_scene,
|
||||
|
||||
params.use_bvh_spatial_split = RNA_boolean_get(&cscene, "debug_use_spatial_splits");
|
||||
params.use_bvh_unaligned_nodes = RNA_boolean_get(&cscene, "debug_use_hair_bvh");
|
||||
params.num_bvh_time_steps = RNA_int_get(&cscene, "debug_bvh_time_steps");
|
||||
|
||||
if(background && params.shadingsystem != SHADINGSYSTEM_OSL)
|
||||
params.persistent_data = r.use_persistent_data();
|
||||
|
@@ -19,7 +19,6 @@
|
||||
|
||||
#include "mesh.h"
|
||||
|
||||
#include "util_algorithm.h"
|
||||
#include "util_map.h"
|
||||
#include "util_path.h"
|
||||
#include "util_set.h"
|
||||
@@ -49,12 +48,12 @@ static inline BL::Mesh object_to_mesh(BL::BlendData& data,
|
||||
bool apply_modifiers,
|
||||
bool render,
|
||||
bool calc_undeformed,
|
||||
Mesh::SubdivisionType subdivision_type)
|
||||
bool subdivision)
|
||||
{
|
||||
bool subsurf_mod_show_render;
|
||||
bool subsurf_mod_show_viewport;
|
||||
|
||||
if(subdivision_type != Mesh::SUBDIVISION_NONE) {
|
||||
if(subdivision) {
|
||||
BL::Modifier subsurf_mod = object.modifiers[object.modifiers.length()-1];
|
||||
|
||||
subsurf_mod_show_render = subsurf_mod.show_render();
|
||||
@@ -66,7 +65,7 @@ static inline BL::Mesh object_to_mesh(BL::BlendData& data,
|
||||
|
||||
BL::Mesh me = data.meshes.new_from_object(scene, object, apply_modifiers, (render)? 2: 1, false, calc_undeformed);
|
||||
|
||||
if(subdivision_type != Mesh::SUBDIVISION_NONE) {
|
||||
if(subdivision) {
|
||||
BL::Modifier subsurf_mod = object.modifiers[object.modifiers.length()-1];
|
||||
|
||||
subsurf_mod.show_render(subsurf_mod_show_render);
|
||||
@@ -75,14 +74,9 @@ static inline BL::Mesh object_to_mesh(BL::BlendData& data,
|
||||
|
||||
if((bool)me) {
|
||||
if(me.use_auto_smooth()) {
|
||||
if(subdivision_type == Mesh::SUBDIVISION_CATMULL_CLARK) {
|
||||
me.calc_normals_split();
|
||||
}
|
||||
else {
|
||||
me.split_faces(false);
|
||||
}
|
||||
me.calc_normals_split();
|
||||
}
|
||||
if(subdivision_type == Mesh::SUBDIVISION_NONE) {
|
||||
if(!subdivision) {
|
||||
me.calc_tessface(true);
|
||||
}
|
||||
}
|
||||
@@ -787,35 +781,6 @@ struct ParticleSystemKey {
|
||||
}
|
||||
};
|
||||
|
||||
class EdgeMap {
|
||||
public:
|
||||
EdgeMap() {
|
||||
}
|
||||
|
||||
void clear() {
|
||||
edges_.clear();
|
||||
}
|
||||
|
||||
void insert(int v0, int v1) {
|
||||
get_sorted_verts(v0, v1);
|
||||
edges_.insert(std::pair<int, int>(v0, v1));
|
||||
}
|
||||
|
||||
bool exists(int v0, int v1) {
|
||||
get_sorted_verts(v0, v1);
|
||||
return edges_.find(std::pair<int, int>(v0, v1)) != edges_.end();
|
||||
}
|
||||
|
||||
protected:
|
||||
void get_sorted_verts(int& v0, int& v1) {
|
||||
if(v0 > v1) {
|
||||
swap(v0, v1);
|
||||
}
|
||||
}
|
||||
|
||||
set< std::pair<int, int> > edges_;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* __BLENDER_UTIL_H__ */
|
||||
|
@@ -81,7 +81,6 @@ void BVH::build(Progress& progress)
|
||||
pack.prim_type,
|
||||
pack.prim_index,
|
||||
pack.prim_object,
|
||||
pack.prim_time,
|
||||
params,
|
||||
progress);
|
||||
BVHNode *root = bvh_build.run();
|
||||
@@ -257,10 +256,6 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
|
||||
pack.leaf_nodes.resize(leaf_nodes_size);
|
||||
pack.object_node.resize(objects.size());
|
||||
|
||||
if(params.num_motion_curve_steps > 0 || params.num_motion_triangle_steps > 0) {
|
||||
pack.prim_time.resize(prim_index_size);
|
||||
}
|
||||
|
||||
int *pack_prim_index = (pack.prim_index.size())? &pack.prim_index[0]: NULL;
|
||||
int *pack_prim_type = (pack.prim_type.size())? &pack.prim_type[0]: NULL;
|
||||
int *pack_prim_object = (pack.prim_object.size())? &pack.prim_object[0]: NULL;
|
||||
@@ -269,7 +264,6 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
|
||||
uint *pack_prim_tri_index = (pack.prim_tri_index.size())? &pack.prim_tri_index[0]: NULL;
|
||||
int4 *pack_nodes = (pack.nodes.size())? &pack.nodes[0]: NULL;
|
||||
int4 *pack_leaf_nodes = (pack.leaf_nodes.size())? &pack.leaf_nodes[0]: NULL;
|
||||
float2 *pack_prim_time = (pack.prim_time.size())? &pack.prim_time[0]: NULL;
|
||||
|
||||
/* merge */
|
||||
foreach(Object *ob, objects) {
|
||||
@@ -315,7 +309,6 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
|
||||
int *bvh_prim_type = &bvh->pack.prim_type[0];
|
||||
uint *bvh_prim_visibility = &bvh->pack.prim_visibility[0];
|
||||
uint *bvh_prim_tri_index = &bvh->pack.prim_tri_index[0];
|
||||
float2 *bvh_prim_time = bvh->pack.prim_time.size()? &bvh->pack.prim_time[0]: NULL;
|
||||
|
||||
for(size_t i = 0; i < bvh_prim_index_size; i++) {
|
||||
if(bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
|
||||
@@ -331,9 +324,6 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
|
||||
pack_prim_type[pack_prim_index_offset] = bvh_prim_type[i];
|
||||
pack_prim_visibility[pack_prim_index_offset] = bvh_prim_visibility[i];
|
||||
pack_prim_object[pack_prim_index_offset] = 0; // unused for instances
|
||||
if(bvh_prim_time != NULL) {
|
||||
pack_prim_time[pack_prim_index_offset] = bvh_prim_time[i];
|
||||
}
|
||||
pack_prim_index_offset++;
|
||||
}
|
||||
}
|
||||
@@ -855,8 +845,6 @@ void QBVH::pack_aligned_inner(const BVHStackEntry& e,
|
||||
bounds,
|
||||
child,
|
||||
e.node->m_visibility,
|
||||
e.node->m_time_from,
|
||||
e.node->m_time_to,
|
||||
num);
|
||||
}
|
||||
|
||||
@@ -864,17 +852,12 @@ void QBVH::pack_aligned_node(int idx,
|
||||
const BoundBox *bounds,
|
||||
const int *child,
|
||||
const uint visibility,
|
||||
const float time_from,
|
||||
const float time_to,
|
||||
const int num)
|
||||
{
|
||||
float4 data[BVH_QNODE_SIZE];
|
||||
memset(data, 0, sizeof(data));
|
||||
|
||||
data[0].x = __uint_as_float(visibility & ~PATH_RAY_NODE_UNALIGNED);
|
||||
data[0].y = time_from;
|
||||
data[0].z = time_to;
|
||||
|
||||
for(int i = 0; i < num; i++) {
|
||||
float3 bb_min = bounds[i].min;
|
||||
float3 bb_max = bounds[i].max;
|
||||
@@ -925,8 +908,6 @@ void QBVH::pack_unaligned_inner(const BVHStackEntry& e,
|
||||
bounds,
|
||||
child,
|
||||
e.node->m_visibility,
|
||||
e.node->m_time_from,
|
||||
e.node->m_time_to,
|
||||
num);
|
||||
}
|
||||
|
||||
@@ -935,16 +916,12 @@ void QBVH::pack_unaligned_node(int idx,
|
||||
const BoundBox *bounds,
|
||||
const int *child,
|
||||
const uint visibility,
|
||||
const float time_from,
|
||||
const float time_to,
|
||||
const int num)
|
||||
{
|
||||
float4 data[BVH_UNALIGNED_QNODE_SIZE];
|
||||
memset(data, 0, sizeof(data));
|
||||
|
||||
data[0].x = __uint_as_float(visibility | PATH_RAY_NODE_UNALIGNED);
|
||||
data[0].y = time_from;
|
||||
data[0].z = time_to;
|
||||
|
||||
for(int i = 0; i < num; i++) {
|
||||
Transform space = BVHUnaligned::compute_node_transform(
|
||||
@@ -1230,8 +1207,6 @@ void QBVH::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
|
||||
child_bbox,
|
||||
&c[0],
|
||||
visibility,
|
||||
0.0f,
|
||||
1.0f,
|
||||
4);
|
||||
}
|
||||
else {
|
||||
@@ -1239,8 +1214,6 @@ void QBVH::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
|
||||
child_bbox,
|
||||
&c[0],
|
||||
visibility,
|
||||
0.0f,
|
||||
1.0f,
|
||||
4);
|
||||
}
|
||||
}
|
||||
|
@@ -68,8 +68,6 @@ struct PackedBVH {
|
||||
array<int> prim_index;
|
||||
/* mapping from BVH primitive index, to the object id of that primitive. */
|
||||
array<int> prim_object;
|
||||
/* Time range of BVH primitive. */
|
||||
array<float2> prim_time;
|
||||
|
||||
/* index of the root node. */
|
||||
int root_index;
|
||||
@@ -177,8 +175,6 @@ protected:
|
||||
const BoundBox *bounds,
|
||||
const int *child,
|
||||
const uint visibility,
|
||||
const float time_from,
|
||||
const float time_to,
|
||||
const int num);
|
||||
|
||||
void pack_unaligned_inner(const BVHStackEntry& e,
|
||||
@@ -189,8 +185,6 @@ protected:
|
||||
const BoundBox *bounds,
|
||||
const int *child,
|
||||
const uint visibility,
|
||||
const float time_from,
|
||||
const float time_to,
|
||||
const int num);
|
||||
|
||||
/* refit */
|
||||
|
@@ -26,7 +26,6 @@
|
||||
#include "scene.h"
|
||||
#include "curves.h"
|
||||
|
||||
#include "util_algorithm.h"
|
||||
#include "util_debug.h"
|
||||
#include "util_foreach.h"
|
||||
#include "util_logging.h"
|
||||
@@ -93,14 +92,12 @@ BVHBuild::BVHBuild(const vector<Object*>& objects_,
|
||||
array<int>& prim_type_,
|
||||
array<int>& prim_index_,
|
||||
array<int>& prim_object_,
|
||||
array<float2>& prim_time_,
|
||||
const BVHParams& params_,
|
||||
Progress& progress_)
|
||||
: objects(objects_),
|
||||
prim_type(prim_type_),
|
||||
prim_index(prim_index_),
|
||||
prim_object(prim_object_),
|
||||
prim_time(prim_time_),
|
||||
params(params_),
|
||||
progress(progress_),
|
||||
progress_start_time(0.0),
|
||||
@@ -115,237 +112,81 @@ BVHBuild::~BVHBuild()
|
||||
|
||||
/* Adding References */
|
||||
|
||||
void BVHBuild::add_reference_triangles(BoundBox& root, BoundBox& center, Mesh *mesh, int i)
|
||||
void BVHBuild::add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i)
|
||||
{
|
||||
const Attribute *attr_mP = NULL;
|
||||
if(mesh->has_motion_blur()) {
|
||||
attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
}
|
||||
const size_t num_triangles = mesh->num_triangles();
|
||||
for(uint j = 0; j < num_triangles; j++) {
|
||||
Mesh::Triangle t = mesh->get_triangle(j);
|
||||
const float3 *verts = &mesh->verts[0];
|
||||
if(attr_mP == NULL) {
|
||||
BoundBox bounds = BoundBox::empty;
|
||||
t.bounds_grow(verts, bounds);
|
||||
if(bounds.valid()) {
|
||||
references.push_back(BVHReference(bounds,
|
||||
j,
|
||||
i,
|
||||
PRIMITIVE_TRIANGLE));
|
||||
root.grow(bounds);
|
||||
center.grow(bounds.center2());
|
||||
}
|
||||
}
|
||||
else if(params.num_motion_triangle_steps == 0 || params.use_spatial_split) {
|
||||
/* Motion triangles, simple case: single node for the whole
|
||||
* primitive. Lowest memory footprint and faster BVH build but
|
||||
* least optimal ray-tracing.
|
||||
*/
|
||||
/* TODO(sergey): Support motion steps for spatially split BVH. */
|
||||
const size_t num_verts = mesh->verts.size();
|
||||
const size_t num_steps = mesh->motion_steps;
|
||||
const float3 *vert_steps = attr_mP->data_float3();
|
||||
BoundBox bounds = BoundBox::empty;
|
||||
t.bounds_grow(verts, bounds);
|
||||
for(size_t step = 0; step < num_steps - 1; step++) {
|
||||
t.bounds_grow(vert_steps + step*num_verts, bounds);
|
||||
}
|
||||
if(bounds.valid()) {
|
||||
references.push_back(
|
||||
BVHReference(bounds,
|
||||
j,
|
||||
i,
|
||||
PRIMITIVE_MOTION_TRIANGLE));
|
||||
root.grow(bounds);
|
||||
center.grow(bounds.center2());
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Motion triangles, trace optimized case: we split triangle
|
||||
* primitives into separate nodes for each of the time steps.
|
||||
* This way we minimize overlap of neighbor curve primitives.
|
||||
*/
|
||||
const int num_bvh_steps = params.num_motion_curve_steps * 2 + 1;
|
||||
const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
|
||||
const size_t num_verts = mesh->verts.size();
|
||||
const size_t num_steps = mesh->motion_steps;
|
||||
const float3 *vert_steps = attr_mP->data_float3();
|
||||
/* Calculate bounding box of the previous time step.
|
||||
* Will be reused later to avoid duplicated work on
|
||||
* calculating BVH time step boundbox.
|
||||
*/
|
||||
float3 prev_verts[3];
|
||||
t.motion_verts(verts,
|
||||
vert_steps,
|
||||
num_verts,
|
||||
num_steps,
|
||||
0.0f,
|
||||
prev_verts);
|
||||
BoundBox prev_bounds = BoundBox::empty;
|
||||
prev_bounds.grow(prev_verts[0]);
|
||||
prev_bounds.grow(prev_verts[1]);
|
||||
prev_bounds.grow(prev_verts[2]);
|
||||
/* Create all primitive time steps, */
|
||||
for(int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
|
||||
const float curr_time = (float)(bvh_step) * num_bvh_steps_inv_1;
|
||||
float3 curr_verts[3];
|
||||
t.motion_verts(verts,
|
||||
vert_steps,
|
||||
num_verts,
|
||||
num_steps,
|
||||
curr_time,
|
||||
curr_verts);
|
||||
BoundBox curr_bounds = BoundBox::empty;
|
||||
curr_bounds.grow(curr_verts[0]);
|
||||
curr_bounds.grow(curr_verts[1]);
|
||||
curr_bounds.grow(curr_verts[2]);
|
||||
BoundBox bounds = prev_bounds;
|
||||
bounds.grow(curr_bounds);
|
||||
if(bounds.valid()) {
|
||||
const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
|
||||
references.push_back(
|
||||
BVHReference(bounds,
|
||||
j,
|
||||
i,
|
||||
PRIMITIVE_MOTION_TRIANGLE,
|
||||
prev_time,
|
||||
curr_time));
|
||||
root.grow(bounds);
|
||||
center.grow(bounds.center2());
|
||||
}
|
||||
/* Current time boundbox becomes previous one for the
|
||||
* next time step.
|
||||
*/
|
||||
prev_bounds = curr_bounds;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
|
||||
Attribute *attr_mP = NULL;
|
||||
|
||||
void BVHBuild::add_reference_curves(BoundBox& root, BoundBox& center, Mesh *mesh, int i)
|
||||
{
|
||||
const Attribute *curve_attr_mP = NULL;
|
||||
if(mesh->has_motion_blur()) {
|
||||
curve_attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
if(mesh->has_motion_blur())
|
||||
attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
|
||||
size_t num_triangles = mesh->num_triangles();
|
||||
for(uint j = 0; j < num_triangles; j++) {
|
||||
Mesh::Triangle t = mesh->get_triangle(j);
|
||||
BoundBox bounds = BoundBox::empty;
|
||||
PrimitiveType type = PRIMITIVE_TRIANGLE;
|
||||
|
||||
t.bounds_grow(&mesh->verts[0], bounds);
|
||||
|
||||
/* motion triangles */
|
||||
if(attr_mP) {
|
||||
size_t mesh_size = mesh->verts.size();
|
||||
size_t steps = mesh->motion_steps - 1;
|
||||
float3 *vert_steps = attr_mP->data_float3();
|
||||
|
||||
for(size_t i = 0; i < steps; i++)
|
||||
t.bounds_grow(vert_steps + i*mesh_size, bounds);
|
||||
|
||||
type = PRIMITIVE_MOTION_TRIANGLE;
|
||||
}
|
||||
|
||||
if(bounds.valid()) {
|
||||
references.push_back(BVHReference(bounds, j, i, type));
|
||||
root.grow(bounds);
|
||||
center.grow(bounds.center2());
|
||||
}
|
||||
}
|
||||
}
|
||||
const size_t num_curves = mesh->num_curves();
|
||||
for(uint j = 0; j < num_curves; j++) {
|
||||
const Mesh::Curve curve = mesh->get_curve(j);
|
||||
const float *curve_radius = &mesh->curve_radius[0];
|
||||
for(int k = 0; k < curve.num_keys - 1; k++) {
|
||||
if(curve_attr_mP == NULL) {
|
||||
/* Really simple logic for static hair. */
|
||||
|
||||
if(params.primitive_mask & PRIMITIVE_ALL_CURVE) {
|
||||
Attribute *curve_attr_mP = NULL;
|
||||
|
||||
if(mesh->has_motion_blur())
|
||||
curve_attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
|
||||
size_t num_curves = mesh->num_curves();
|
||||
for(uint j = 0; j < num_curves; j++) {
|
||||
Mesh::Curve curve = mesh->get_curve(j);
|
||||
PrimitiveType type = PRIMITIVE_CURVE;
|
||||
|
||||
for(int k = 0; k < curve.num_keys - 1; k++) {
|
||||
BoundBox bounds = BoundBox::empty;
|
||||
curve.bounds_grow(k, &mesh->curve_keys[0], curve_radius, bounds);
|
||||
curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bounds);
|
||||
|
||||
/* motion curve */
|
||||
if(curve_attr_mP) {
|
||||
size_t mesh_size = mesh->curve_keys.size();
|
||||
size_t steps = mesh->motion_steps - 1;
|
||||
float3 *key_steps = curve_attr_mP->data_float3();
|
||||
|
||||
for(size_t i = 0; i < steps; i++)
|
||||
curve.bounds_grow(k, key_steps + i*mesh_size, &mesh->curve_radius[0], bounds);
|
||||
|
||||
type = PRIMITIVE_MOTION_CURVE;
|
||||
}
|
||||
|
||||
if(bounds.valid()) {
|
||||
int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_CURVE, k);
|
||||
int packed_type = PRIMITIVE_PACK_SEGMENT(type, k);
|
||||
|
||||
references.push_back(BVHReference(bounds, j, i, packed_type));
|
||||
root.grow(bounds);
|
||||
center.grow(bounds.center2());
|
||||
}
|
||||
}
|
||||
else if(params.num_motion_curve_steps == 0 || params.use_spatial_split) {
|
||||
/* Simple case of motion curves: single node for the while
|
||||
* shutter time. Lowest memory usage but less optimal
|
||||
* rendering.
|
||||
*/
|
||||
/* TODO(sergey): Support motion steps for spatially split BVH. */
|
||||
BoundBox bounds = BoundBox::empty;
|
||||
curve.bounds_grow(k, &mesh->curve_keys[0], curve_radius, bounds);
|
||||
const size_t num_keys = mesh->curve_keys.size();
|
||||
const size_t num_steps = mesh->motion_steps;
|
||||
const float3 *key_steps = curve_attr_mP->data_float3();
|
||||
for(size_t step = 0; step < num_steps - 1; step++) {
|
||||
curve.bounds_grow(k,
|
||||
key_steps + step*num_keys,
|
||||
curve_radius,
|
||||
bounds);
|
||||
}
|
||||
if(bounds.valid()) {
|
||||
int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_MOTION_CURVE, k);
|
||||
references.push_back(BVHReference(bounds,
|
||||
j,
|
||||
i,
|
||||
packed_type));
|
||||
root.grow(bounds);
|
||||
center.grow(bounds.center2());
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Motion curves, trace optimized case: we split curve keys
|
||||
* primitives into separate nodes for each of the time steps.
|
||||
* This way we minimize overlap of neighbor curve primitives.
|
||||
*/
|
||||
const int num_bvh_steps = params.num_motion_curve_steps * 2 + 1;
|
||||
const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
|
||||
const size_t num_steps = mesh->motion_steps;
|
||||
const float3 *curve_keys = &mesh->curve_keys[0];
|
||||
const float3 *key_steps = curve_attr_mP->data_float3();
|
||||
const size_t num_keys = mesh->curve_keys.size();
|
||||
/* Calculate bounding box of the previous time step.
|
||||
* Will be reused later to avoid duplicated work on
|
||||
* calculating BVH time step boundbox.
|
||||
*/
|
||||
float4 prev_keys[4];
|
||||
curve.cardinal_motion_keys(curve_keys,
|
||||
curve_radius,
|
||||
key_steps,
|
||||
num_keys,
|
||||
num_steps,
|
||||
0.0f,
|
||||
k - 1, k, k + 1, k + 2,
|
||||
prev_keys);
|
||||
BoundBox prev_bounds = BoundBox::empty;
|
||||
curve.bounds_grow(prev_keys, prev_bounds);
|
||||
/* Create all primitive time steps, */
|
||||
for(int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
|
||||
const float curr_time = (float)(bvh_step) * num_bvh_steps_inv_1;
|
||||
float4 curr_keys[4];
|
||||
curve.cardinal_motion_keys(curve_keys,
|
||||
curve_radius,
|
||||
key_steps,
|
||||
num_keys,
|
||||
num_steps,
|
||||
curr_time,
|
||||
k - 1, k, k + 1, k + 2,
|
||||
curr_keys);
|
||||
BoundBox curr_bounds = BoundBox::empty;
|
||||
curve.bounds_grow(curr_keys, curr_bounds);
|
||||
BoundBox bounds = prev_bounds;
|
||||
bounds.grow(curr_bounds);
|
||||
if(bounds.valid()) {
|
||||
const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
|
||||
int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_MOTION_CURVE, k);
|
||||
references.push_back(BVHReference(bounds,
|
||||
j,
|
||||
i,
|
||||
packed_type,
|
||||
prev_time,
|
||||
curr_time));
|
||||
root.grow(bounds);
|
||||
center.grow(bounds.center2());
|
||||
}
|
||||
/* Current time boundbox becomes previous one for the
|
||||
* next time step.
|
||||
*/
|
||||
prev_bounds = curr_bounds;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BVHBuild::add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i)
|
||||
{
|
||||
if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
|
||||
add_reference_triangles(root, center, mesh, i);
|
||||
}
|
||||
if(params.primitive_mask & PRIMITIVE_ALL_CURVE) {
|
||||
add_reference_curves(root, center, mesh, i);
|
||||
}
|
||||
}
|
||||
|
||||
void BVHBuild::add_reference_object(BoundBox& root, BoundBox& center, Object *ob, int i)
|
||||
{
|
||||
references.push_back(BVHReference(ob->bounds, -1, i, 0));
|
||||
@@ -359,7 +200,7 @@ static size_t count_curve_segments(Mesh *mesh)
|
||||
|
||||
for(size_t i = 0; i < num_curves; i++)
|
||||
num += mesh->get_curve(i).num_keys - 1;
|
||||
|
||||
|
||||
return num;
|
||||
}
|
||||
|
||||
@@ -467,9 +308,6 @@ BVHNode* BVHBuild::run()
|
||||
}
|
||||
spatial_free_index = 0;
|
||||
|
||||
need_prim_time = params.num_motion_curve_steps > 0 ||
|
||||
params.num_motion_triangle_steps > 0;
|
||||
|
||||
/* init progress updates */
|
||||
double build_start_time;
|
||||
build_start_time = progress_start_time = time_dt();
|
||||
@@ -480,12 +318,6 @@ BVHNode* BVHBuild::run()
|
||||
prim_type.resize(references.size());
|
||||
prim_index.resize(references.size());
|
||||
prim_object.resize(references.size());
|
||||
if(need_prim_time) {
|
||||
prim_time.resize(references.size());
|
||||
}
|
||||
else {
|
||||
prim_time.resize(0);
|
||||
}
|
||||
|
||||
/* build recursively */
|
||||
BVHNode *rootnode;
|
||||
@@ -512,7 +344,6 @@ BVHNode* BVHBuild::run()
|
||||
else {
|
||||
/*rotate(rootnode, 4, 5);*/
|
||||
rootnode->update_visibility();
|
||||
rootnode->update_time();
|
||||
}
|
||||
if(rootnode != NULL) {
|
||||
VLOG(1) << "BVH build statistics:\n"
|
||||
@@ -540,7 +371,7 @@ void BVHBuild::progress_update()
|
||||
{
|
||||
if(time_dt() - progress_start_time < 0.25)
|
||||
return;
|
||||
|
||||
|
||||
double progress_start = (double)progress_count/(double)progress_total;
|
||||
double duplicates = (double)(progress_total - progress_original_total)/(double)progress_total;
|
||||
|
||||
@@ -548,7 +379,7 @@ void BVHBuild::progress_update()
|
||||
progress_start * 100.0, duplicates * 100.0);
|
||||
|
||||
progress.set_substatus(msg);
|
||||
progress_start_time = time_dt();
|
||||
progress_start_time = time_dt();
|
||||
}
|
||||
|
||||
void BVHBuild::thread_build_node(InnerNode *inner,
|
||||
@@ -604,7 +435,6 @@ bool BVHBuild::range_within_max_leaf_size(const BVHRange& range,
|
||||
return false;
|
||||
|
||||
size_t num_triangles = 0;
|
||||
size_t num_motion_triangles = 0;
|
||||
size_t num_curves = 0;
|
||||
size_t num_motion_curves = 0;
|
||||
|
||||
@@ -615,16 +445,13 @@ bool BVHBuild::range_within_max_leaf_size(const BVHRange& range,
|
||||
num_curves++;
|
||||
if(ref.prim_type() & PRIMITIVE_MOTION_CURVE)
|
||||
num_motion_curves++;
|
||||
else if(ref.prim_type() & PRIMITIVE_TRIANGLE)
|
||||
else if(ref.prim_type() & PRIMITIVE_ALL_TRIANGLE)
|
||||
num_triangles++;
|
||||
else if(ref.prim_type() & PRIMITIVE_MOTION_TRIANGLE)
|
||||
num_motion_triangles++;
|
||||
}
|
||||
|
||||
return (num_triangles <= params.max_triangle_leaf_size) &&
|
||||
(num_motion_triangles <= params.max_motion_triangle_leaf_size) &&
|
||||
(num_curves <= params.max_curve_leaf_size) &&
|
||||
(num_motion_curves <= params.max_motion_curve_leaf_size);
|
||||
return (num_triangles < params.max_triangle_leaf_size) &&
|
||||
(num_curves < params.max_curve_leaf_size) &&
|
||||
(num_motion_curves < params.max_curve_leaf_size);
|
||||
}
|
||||
|
||||
/* multithreaded binning builder */
|
||||
@@ -860,29 +687,20 @@ BVHNode *BVHBuild::create_object_leaf_nodes(const BVHReference *ref, int start,
|
||||
prim_type[start] = ref->prim_type();
|
||||
prim_index[start] = ref->prim_index();
|
||||
prim_object[start] = ref->prim_object();
|
||||
if(need_prim_time) {
|
||||
prim_time[start] = make_float2(ref->time_from(), ref->time_to());
|
||||
}
|
||||
|
||||
uint visibility = objects[ref->prim_object()]->visibility;
|
||||
BVHNode *leaf_node = new LeafNode(ref->bounds(), visibility, start, start+1);
|
||||
leaf_node->m_time_from = ref->time_from();
|
||||
leaf_node->m_time_to = ref->time_to();
|
||||
return leaf_node;
|
||||
return new LeafNode(ref->bounds(), visibility, start, start+1);
|
||||
}
|
||||
else {
|
||||
int mid = num/2;
|
||||
BVHNode *leaf0 = create_object_leaf_nodes(ref, start, mid);
|
||||
BVHNode *leaf1 = create_object_leaf_nodes(ref+mid, start+mid, num-mid);
|
||||
BVHNode *leaf0 = create_object_leaf_nodes(ref, start, mid);
|
||||
BVHNode *leaf1 = create_object_leaf_nodes(ref+mid, start+mid, num-mid);
|
||||
|
||||
BoundBox bounds = BoundBox::empty;
|
||||
bounds.grow(leaf0->m_bounds);
|
||||
bounds.grow(leaf1->m_bounds);
|
||||
|
||||
BVHNode *inner_node = new InnerNode(bounds, leaf0, leaf1);
|
||||
inner_node->m_time_from = min(leaf0->m_time_from, leaf1->m_time_from);
|
||||
inner_node->m_time_to = max(leaf0->m_time_to, leaf1->m_time_to);
|
||||
return inner_node;
|
||||
return new InnerNode(bounds, leaf0, leaf1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -905,13 +723,11 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
|
||||
* can not control.
|
||||
*/
|
||||
typedef StackAllocator<256, int> LeafStackAllocator;
|
||||
typedef StackAllocator<256, float2> LeafTimeStackAllocator;
|
||||
typedef StackAllocator<256, BVHReference> LeafReferenceStackAllocator;
|
||||
|
||||
vector<int, LeafStackAllocator> p_type[PRIMITIVE_NUM_TOTAL];
|
||||
vector<int, LeafStackAllocator> p_index[PRIMITIVE_NUM_TOTAL];
|
||||
vector<int, LeafStackAllocator> p_object[PRIMITIVE_NUM_TOTAL];
|
||||
vector<float2, LeafTimeStackAllocator> p_time[PRIMITIVE_NUM_TOTAL];
|
||||
vector<BVHReference, LeafReferenceStackAllocator> p_ref[PRIMITIVE_NUM_TOTAL];
|
||||
|
||||
/* TODO(sergey): In theory we should be able to store references. */
|
||||
@@ -934,8 +750,6 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
|
||||
p_type[type_index].push_back(ref.prim_type());
|
||||
p_index[type_index].push_back(ref.prim_index());
|
||||
p_object[type_index].push_back(ref.prim_object());
|
||||
p_time[type_index].push_back(make_float2(ref.time_from(),
|
||||
ref.time_to()));
|
||||
|
||||
bounds[type_index].grow(ref.bounds());
|
||||
visibility[type_index] |= objects[ref.prim_object()]->visibility;
|
||||
@@ -965,13 +779,9 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
|
||||
vector<int, LeafStackAllocator> local_prim_type,
|
||||
local_prim_index,
|
||||
local_prim_object;
|
||||
vector<float2, LeafTimeStackAllocator> local_prim_time;
|
||||
local_prim_type.resize(num_new_prims);
|
||||
local_prim_index.resize(num_new_prims);
|
||||
local_prim_object.resize(num_new_prims);
|
||||
if(need_prim_time) {
|
||||
local_prim_time.resize(num_new_prims);
|
||||
}
|
||||
for(int i = 0; i < PRIMITIVE_NUM_TOTAL; ++i) {
|
||||
int num = (int)p_type[i].size();
|
||||
if(num != 0) {
|
||||
@@ -984,9 +794,6 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
|
||||
local_prim_type[index] = p_type[i][j];
|
||||
local_prim_index[index] = p_index[i][j];
|
||||
local_prim_object[index] = p_object[i][j];
|
||||
if(need_prim_time) {
|
||||
local_prim_time[index] = p_time[i][j];
|
||||
}
|
||||
if(params.use_unaligned_nodes && !alignment_found) {
|
||||
alignment_found =
|
||||
unaligned_heuristic.compute_aligned_space(p_ref[i][j],
|
||||
@@ -997,16 +804,6 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
|
||||
visibility[i],
|
||||
start_index,
|
||||
start_index + num);
|
||||
if(true) {
|
||||
float time_from = 1.0f, time_to = 0.0f;
|
||||
for(int j = 0; j < num; ++j) {
|
||||
const BVHReference &ref = p_ref[i][j];
|
||||
time_from = min(time_from, ref.time_from());
|
||||
time_to = max(time_to, ref.time_to());
|
||||
}
|
||||
leaf_node->m_time_from = time_from;
|
||||
leaf_node->m_time_to = time_to;
|
||||
}
|
||||
if(alignment_found) {
|
||||
/* Need to recalculate leaf bounds with new alignment. */
|
||||
leaf_node->m_bounds = BoundBox::empty;
|
||||
@@ -1053,17 +850,11 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
|
||||
prim_type.reserve(reserve);
|
||||
prim_index.reserve(reserve);
|
||||
prim_object.reserve(reserve);
|
||||
if(need_prim_time) {
|
||||
prim_time.reserve(reserve);
|
||||
}
|
||||
}
|
||||
|
||||
prim_type.resize(range_end);
|
||||
prim_index.resize(range_end);
|
||||
prim_object.resize(range_end);
|
||||
if(need_prim_time) {
|
||||
prim_time.resize(range_end);
|
||||
}
|
||||
}
|
||||
spatial_spin_lock.unlock();
|
||||
|
||||
@@ -1072,9 +863,6 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
|
||||
memcpy(&prim_type[start_index], &local_prim_type[0], new_leaf_data_size);
|
||||
memcpy(&prim_index[start_index], &local_prim_index[0], new_leaf_data_size);
|
||||
memcpy(&prim_object[start_index], &local_prim_object[0], new_leaf_data_size);
|
||||
if(need_prim_time) {
|
||||
memcpy(&prim_time[start_index], &local_prim_time[0], sizeof(float2)*num_new_leaf_data);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
@@ -1087,9 +875,6 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
|
||||
memcpy(&prim_type[start_index], &local_prim_type[0], new_leaf_data_size);
|
||||
memcpy(&prim_index[start_index], &local_prim_index[0], new_leaf_data_size);
|
||||
memcpy(&prim_object[start_index], &local_prim_object[0], new_leaf_data_size);
|
||||
if(need_prim_time) {
|
||||
memcpy(&prim_time[start_index], &local_prim_time[0], sizeof(float2)*num_new_leaf_data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1133,7 +918,7 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
|
||||
BVHNode *inner = new InnerNode(inner_bounds, leaves[1], leaves[2]);
|
||||
return new InnerNode(range.bounds(), leaves[0], inner);
|
||||
} else {
|
||||
/* Should be doing more branches if more primitive types added. */
|
||||
/* Shpuld be doing more branches if more primitive types added. */
|
||||
assert(num_leaves <= 5);
|
||||
BoundBox inner_bounds_a = merge(leaves[0]->m_bounds, leaves[1]->m_bounds);
|
||||
BoundBox inner_bounds_b = merge(leaves[2]->m_bounds, leaves[3]->m_bounds);
|
||||
@@ -1166,7 +951,7 @@ void BVHBuild::rotate(BVHNode *node, int max_depth)
|
||||
/* nothing to rotate if we reached a leaf node. */
|
||||
if(node->is_leaf() || max_depth < 0)
|
||||
return;
|
||||
|
||||
|
||||
InnerNode *parent = (InnerNode*)node;
|
||||
|
||||
/* rotate all children first */
|
||||
|
@@ -48,7 +48,6 @@ public:
|
||||
array<int>& prim_type,
|
||||
array<int>& prim_index,
|
||||
array<int>& prim_object,
|
||||
array<float2>& prim_time,
|
||||
const BVHParams& params,
|
||||
Progress& progress);
|
||||
~BVHBuild();
|
||||
@@ -64,8 +63,6 @@ protected:
|
||||
friend class BVHObjectBinning;
|
||||
|
||||
/* Adding references. */
|
||||
void add_reference_triangles(BoundBox& root, BoundBox& center, Mesh *mesh, int i);
|
||||
void add_reference_curves(BoundBox& root, BoundBox& center, Mesh *mesh, int i);
|
||||
void add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i);
|
||||
void add_reference_object(BoundBox& root, BoundBox& center, Object *ob, int i);
|
||||
void add_references(BVHRange& root);
|
||||
@@ -113,9 +110,6 @@ protected:
|
||||
array<int>& prim_type;
|
||||
array<int>& prim_index;
|
||||
array<int>& prim_object;
|
||||
array<float2>& prim_time;
|
||||
|
||||
bool need_prim_time;
|
||||
|
||||
/* Build parameters. */
|
||||
BVHParams params;
|
||||
|
@@ -176,19 +176,6 @@ uint BVHNode::update_visibility()
|
||||
return m_visibility;
|
||||
}
|
||||
|
||||
void BVHNode::update_time()
|
||||
{
|
||||
if(!is_leaf()) {
|
||||
InnerNode *inner = (InnerNode*)this;
|
||||
BVHNode *child0 = inner->children[0];
|
||||
BVHNode *child1 = inner->children[1];
|
||||
child0->update_time();
|
||||
child1->update_time();
|
||||
m_time_from = min(child0->m_time_from, child1->m_time_from);
|
||||
m_time_to = max(child0->m_time_to, child1->m_time_to);
|
||||
}
|
||||
}
|
||||
|
||||
/* Inner Node */
|
||||
|
||||
void InnerNode::print(int depth) const
|
||||
|
@@ -47,9 +47,7 @@ class BVHNode
|
||||
{
|
||||
public:
|
||||
BVHNode() : m_is_unaligned(false),
|
||||
m_aligned_space(NULL),
|
||||
m_time_from(0.0f),
|
||||
m_time_to(1.0f)
|
||||
m_aligned_space(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -93,15 +91,12 @@ public:
|
||||
void deleteSubtree();
|
||||
|
||||
uint update_visibility();
|
||||
void update_time();
|
||||
|
||||
bool m_is_unaligned;
|
||||
|
||||
// TODO(sergey): Can be stored as 3x3 matrix, but better to have some
|
||||
// utilities and type defines in util_transform first.
|
||||
Transform *m_aligned_space;
|
||||
|
||||
float m_time_from, m_time_to;
|
||||
};
|
||||
|
||||
class InnerNode : public BVHNode
|
||||
|
@@ -43,9 +43,7 @@ public:
|
||||
/* number of primitives in leaf */
|
||||
int min_leaf_size;
|
||||
int max_triangle_leaf_size;
|
||||
int max_motion_triangle_leaf_size;
|
||||
int max_curve_leaf_size;
|
||||
int max_motion_curve_leaf_size;
|
||||
|
||||
/* object or mesh level bvh */
|
||||
bool top_level;
|
||||
@@ -61,17 +59,6 @@ public:
|
||||
*/
|
||||
bool use_unaligned_nodes;
|
||||
|
||||
/* Split time range to this number of steps and create leaf node for each
|
||||
* of this time steps.
|
||||
*
|
||||
* Speeds up rendering of motion curve primitives in the cost of higher
|
||||
* memory usage.
|
||||
*/
|
||||
int num_motion_curve_steps;
|
||||
|
||||
/* Same as above, but for triangle primitives. */
|
||||
int num_motion_triangle_steps;
|
||||
|
||||
/* fixed parameters */
|
||||
enum {
|
||||
MAX_DEPTH = 64,
|
||||
@@ -93,18 +80,13 @@ public:
|
||||
|
||||
min_leaf_size = 1;
|
||||
max_triangle_leaf_size = 8;
|
||||
max_motion_triangle_leaf_size = 8;
|
||||
max_curve_leaf_size = 1;
|
||||
max_motion_curve_leaf_size = 4;
|
||||
max_curve_leaf_size = 2;
|
||||
|
||||
top_level = false;
|
||||
use_qbvh = false;
|
||||
use_unaligned_nodes = false;
|
||||
|
||||
primitive_mask = PRIMITIVE_ALL;
|
||||
|
||||
num_motion_curve_steps = 0;
|
||||
num_motion_triangle_steps = 0;
|
||||
}
|
||||
|
||||
/* SAH costs */
|
||||
@@ -131,15 +113,8 @@ class BVHReference
|
||||
public:
|
||||
__forceinline BVHReference() {}
|
||||
|
||||
__forceinline BVHReference(const BoundBox& bounds_,
|
||||
int prim_index_,
|
||||
int prim_object_,
|
||||
int prim_type,
|
||||
float time_from = 0.0f,
|
||||
float time_to = 1.0f)
|
||||
: rbounds(bounds_),
|
||||
time_from_(time_from),
|
||||
time_to_(time_to)
|
||||
__forceinline BVHReference(const BoundBox& bounds_, int prim_index_, int prim_object_, int prim_type)
|
||||
: rbounds(bounds_)
|
||||
{
|
||||
rbounds.min.w = __int_as_float(prim_index_);
|
||||
rbounds.max.w = __int_as_float(prim_object_);
|
||||
@@ -150,9 +125,6 @@ public:
|
||||
__forceinline int prim_index() const { return __float_as_int(rbounds.min.w); }
|
||||
__forceinline int prim_object() const { return __float_as_int(rbounds.max.w); }
|
||||
__forceinline int prim_type() const { return type; }
|
||||
__forceinline float time_from() const { return time_from_; }
|
||||
__forceinline float time_to() const { return time_to_; }
|
||||
|
||||
|
||||
BVHReference& operator=(const BVHReference &arg) {
|
||||
if(&arg != this) {
|
||||
@@ -161,11 +133,9 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
protected:
|
||||
BoundBox rbounds;
|
||||
uint type;
|
||||
float time_from_, time_to_;
|
||||
};
|
||||
|
||||
/* BVH Range
|
||||
|
@@ -3,7 +3,6 @@ set(INC
|
||||
.
|
||||
../graph
|
||||
../kernel
|
||||
../kernel/split
|
||||
../kernel/svm
|
||||
../kernel/osl
|
||||
../util
|
||||
@@ -34,7 +33,6 @@ set(SRC
|
||||
device_cuda.cpp
|
||||
device_multi.cpp
|
||||
device_opencl.cpp
|
||||
device_split_kernel.cpp
|
||||
device_task.cpp
|
||||
)
|
||||
|
||||
@@ -58,7 +56,6 @@ set(SRC_HEADERS
|
||||
device_memory.h
|
||||
device_intern.h
|
||||
device_network.h
|
||||
device_split_kernel.h
|
||||
device_task.h
|
||||
)
|
||||
|
||||
|
@@ -64,8 +64,6 @@ std::ostream& operator <<(std::ostream &os,
|
||||
<< string_from_bool(requested_features.use_integrator_branched) << std::endl;
|
||||
os << "Use Patch Evaluation: "
|
||||
<< string_from_bool(requested_features.use_patch_evaluation) << std::endl;
|
||||
os << "Use Transparent Shadows: "
|
||||
<< string_from_bool(requested_features.use_transparent) << std::endl;
|
||||
return os;
|
||||
}
|
||||
|
||||
@@ -80,7 +78,7 @@ Device::~Device()
|
||||
|
||||
void Device::pixels_alloc(device_memory& mem)
|
||||
{
|
||||
mem_alloc("pixels", mem, MEM_READ_WRITE);
|
||||
mem_alloc(mem, MEM_READ_WRITE);
|
||||
}
|
||||
|
||||
void Device::pixels_copy_from(device_memory& mem, int y, int w, int h)
|
||||
|
@@ -117,9 +117,6 @@ public:
|
||||
|
||||
/* Use OpenSubdiv patch evaluation */
|
||||
bool use_patch_evaluation;
|
||||
|
||||
/* Use Transparent shadows */
|
||||
bool use_transparent;
|
||||
|
||||
DeviceRequestedFeatures()
|
||||
{
|
||||
@@ -136,7 +133,6 @@ public:
|
||||
use_volume = false;
|
||||
use_integrator_branched = false;
|
||||
use_patch_evaluation = false;
|
||||
use_transparent = false;
|
||||
}
|
||||
|
||||
bool modified(const DeviceRequestedFeatures& requested_features)
|
||||
@@ -152,8 +148,7 @@ public:
|
||||
use_subsurface == requested_features.use_subsurface &&
|
||||
use_volume == requested_features.use_volume &&
|
||||
use_integrator_branched == requested_features.use_integrator_branched &&
|
||||
use_patch_evaluation == requested_features.use_patch_evaluation &&
|
||||
use_transparent == requested_features.use_transparent);
|
||||
use_patch_evaluation == requested_features.use_patch_evaluation);
|
||||
}
|
||||
|
||||
/* Convert the requested features structure to a build options,
|
||||
@@ -194,9 +189,6 @@ public:
|
||||
if(!use_patch_evaluation) {
|
||||
build_options += " -D__NO_PATCH_EVAL__";
|
||||
}
|
||||
if(!use_transparent && !use_volume) {
|
||||
build_options += " -D__NO_TRANSPARENT__";
|
||||
}
|
||||
return build_options;
|
||||
}
|
||||
};
|
||||
@@ -228,21 +220,12 @@ public:
|
||||
DeviceInfo info;
|
||||
virtual const string& error_message() { return error_msg; }
|
||||
bool have_error() { return !error_message().empty(); }
|
||||
virtual void set_error(const string& error)
|
||||
{
|
||||
if(!have_error()) {
|
||||
error_msg = error;
|
||||
}
|
||||
fprintf(stderr, "%s\n", error.c_str());
|
||||
fflush(stderr);
|
||||
}
|
||||
virtual bool show_samples() const { return false; }
|
||||
|
||||
/* statistics */
|
||||
Stats &stats;
|
||||
|
||||
/* regular memory */
|
||||
virtual void mem_alloc(const char *name, device_memory& mem, MemoryType type) = 0;
|
||||
virtual void mem_alloc(device_memory& mem, MemoryType type) = 0;
|
||||
virtual void mem_copy_to(device_memory& mem) = 0;
|
||||
virtual void mem_copy_from(device_memory& mem,
|
||||
int y, int w, int h, int elem) = 0;
|
||||
|
@@ -26,12 +26,10 @@
|
||||
|
||||
#include "device.h"
|
||||
#include "device_intern.h"
|
||||
#include "device_split_kernel.h"
|
||||
|
||||
#include "kernel.h"
|
||||
#include "kernel_compat_cpu.h"
|
||||
#include "kernel_types.h"
|
||||
#include "split/kernel_split_data.h"
|
||||
#include "kernel_globals.h"
|
||||
|
||||
#include "osl_shader.h"
|
||||
@@ -43,7 +41,6 @@
|
||||
#include "util_foreach.h"
|
||||
#include "util_function.h"
|
||||
#include "util_logging.h"
|
||||
#include "util_map.h"
|
||||
#include "util_opengl.h"
|
||||
#include "util_progress.h"
|
||||
#include "util_system.h"
|
||||
@@ -51,93 +48,8 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class CPUDevice;
|
||||
|
||||
class CPUSplitKernel : public DeviceSplitKernel {
|
||||
CPUDevice *device;
|
||||
public:
|
||||
explicit CPUSplitKernel(CPUDevice *device);
|
||||
|
||||
virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
|
||||
RenderTile& rtile,
|
||||
int num_global_elements,
|
||||
device_memory& kernel_globals,
|
||||
device_memory& kernel_data_,
|
||||
device_memory& split_data,
|
||||
device_memory& ray_state,
|
||||
device_memory& queue_index,
|
||||
device_memory& use_queues_flag,
|
||||
device_memory& work_pool_wgs);
|
||||
|
||||
virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&);
|
||||
virtual int2 split_kernel_local_size();
|
||||
virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task);
|
||||
virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads);
|
||||
};
|
||||
|
||||
class CPUDevice : public Device
|
||||
{
|
||||
static unordered_map<string, void*> kernel_functions;
|
||||
|
||||
static void register_kernel_function(const char* name, void* func)
|
||||
{
|
||||
kernel_functions[name] = func;
|
||||
}
|
||||
|
||||
static const char* get_arch_name()
|
||||
{
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
|
||||
if(system_cpu_support_avx2()) {
|
||||
return "cpu_avx2";
|
||||
}
|
||||
else
|
||||
#endif
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
|
||||
if(system_cpu_support_avx()) {
|
||||
return "cpu_avx";
|
||||
}
|
||||
else
|
||||
#endif
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
|
||||
if(system_cpu_support_sse41()) {
|
||||
return "cpu_sse41";
|
||||
}
|
||||
else
|
||||
#endif
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
|
||||
if(system_cpu_support_sse3()) {
|
||||
return "cpu_sse3";
|
||||
}
|
||||
else
|
||||
#endif
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
|
||||
if(system_cpu_support_sse2()) {
|
||||
return "cpu_sse2";
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
return "cpu";
|
||||
}
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
static F get_kernel_function(string name)
|
||||
{
|
||||
name = string("kernel_") + get_arch_name() + "_" + name;
|
||||
|
||||
unordered_map<string, void*>::iterator it = kernel_functions.find(name);
|
||||
|
||||
if(it == kernel_functions.end()) {
|
||||
assert(!"kernel function not found");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return (F)it->second;
|
||||
}
|
||||
|
||||
friend class CPUSplitKernel;
|
||||
|
||||
public:
|
||||
TaskPool task_pool;
|
||||
KernelGlobals kernel_globals;
|
||||
@@ -145,15 +57,10 @@ public:
|
||||
#ifdef WITH_OSL
|
||||
OSLGlobals osl_globals;
|
||||
#endif
|
||||
|
||||
bool use_split_kernel;
|
||||
|
||||
DeviceRequestedFeatures requested_features;
|
||||
|
||||
CPUDevice(DeviceInfo& info, Stats &stats, bool background)
|
||||
: Device(info, stats, background)
|
||||
{
|
||||
|
||||
#ifdef WITH_OSL
|
||||
kernel_globals.osl = &osl_globals;
|
||||
#endif
|
||||
@@ -198,28 +105,6 @@ public:
|
||||
{
|
||||
VLOG(1) << "Will be using regular kernels.";
|
||||
}
|
||||
|
||||
use_split_kernel = DebugFlags().cpu.split_kernel;
|
||||
if(use_split_kernel) {
|
||||
VLOG(1) << "Will be using split kernel.";
|
||||
}
|
||||
|
||||
kernel_cpu_register_functions(register_kernel_function);
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
|
||||
kernel_cpu_sse2_register_functions(register_kernel_function);
|
||||
#endif
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
|
||||
kernel_cpu_sse3_register_functions(register_kernel_function);
|
||||
#endif
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
|
||||
kernel_cpu_sse41_register_functions(register_kernel_function);
|
||||
#endif
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
|
||||
kernel_cpu_avx_register_functions(register_kernel_function);
|
||||
#endif
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
|
||||
kernel_cpu_avx2_register_functions(register_kernel_function);
|
||||
#endif
|
||||
}
|
||||
|
||||
~CPUDevice()
|
||||
@@ -227,25 +112,9 @@ public:
|
||||
task_pool.stop();
|
||||
}
|
||||
|
||||
virtual bool show_samples() const
|
||||
void mem_alloc(device_memory& mem, MemoryType /*type*/)
|
||||
{
|
||||
return (TaskScheduler::num_threads() == 1);
|
||||
}
|
||||
|
||||
void mem_alloc(const char *name, device_memory& mem, MemoryType /*type*/)
|
||||
{
|
||||
if(name) {
|
||||
VLOG(1) << "Buffer allocate: " << name << ", "
|
||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||
}
|
||||
|
||||
mem.device_pointer = mem.data_pointer;
|
||||
|
||||
if(!mem.device_pointer) {
|
||||
mem.device_pointer = (device_ptr)malloc(mem.memory_size());
|
||||
}
|
||||
|
||||
mem.device_size = mem.memory_size();
|
||||
stats.mem_alloc(mem.device_size);
|
||||
}
|
||||
@@ -270,10 +139,6 @@ public:
|
||||
void mem_free(device_memory& mem)
|
||||
{
|
||||
if(mem.device_pointer) {
|
||||
if(!mem.data_pointer) {
|
||||
free((void*)mem.device_pointer);
|
||||
}
|
||||
|
||||
mem.device_pointer = 0;
|
||||
stats.mem_free(mem.device_size);
|
||||
mem.device_size = 0;
|
||||
@@ -326,14 +191,8 @@ public:
|
||||
|
||||
void thread_run(DeviceTask *task)
|
||||
{
|
||||
if(task->type == DeviceTask::PATH_TRACE) {
|
||||
if(!use_split_kernel) {
|
||||
thread_path_trace(*task);
|
||||
}
|
||||
else {
|
||||
thread_path_trace_split(*task);
|
||||
}
|
||||
}
|
||||
if(task->type == DeviceTask::PATH_TRACE)
|
||||
thread_path_trace(*task);
|
||||
else if(task->type == DeviceTask::FILM_CONVERT)
|
||||
thread_film_convert(*task);
|
||||
else if(task->type == DeviceTask::SHADER)
|
||||
@@ -394,7 +253,7 @@ public:
|
||||
{
|
||||
path_trace_kernel = kernel_cpu_path_trace;
|
||||
}
|
||||
|
||||
|
||||
while(task.acquire_tile(this, tile)) {
|
||||
float *render_buffer = (float*)tile.buffer;
|
||||
uint *rng_state = (uint*)tile.rng_state;
|
||||
@@ -416,7 +275,7 @@ public:
|
||||
|
||||
tile.sample = sample + 1;
|
||||
|
||||
task.update_progress(&tile, tile.w*tile.h);
|
||||
task.update_progress(&tile);
|
||||
}
|
||||
|
||||
task.release_tile(tile);
|
||||
@@ -430,49 +289,6 @@ public:
|
||||
thread_kernel_globals_free(&kg);
|
||||
}
|
||||
|
||||
void thread_path_trace_split(DeviceTask& task)
|
||||
{
|
||||
if(task_pool.canceled()) {
|
||||
if(task.need_finish_queue == false)
|
||||
return;
|
||||
}
|
||||
|
||||
RenderTile tile;
|
||||
|
||||
CPUSplitKernel split_kernel(this);
|
||||
|
||||
/* allocate buffer for kernel globals */
|
||||
device_memory kgbuffer;
|
||||
kgbuffer.resize(sizeof(KernelGlobals));
|
||||
mem_alloc("kernel_globals", kgbuffer, MEM_READ_WRITE);
|
||||
|
||||
KernelGlobals *kg = (KernelGlobals*)kgbuffer.device_pointer;
|
||||
*kg = thread_kernel_globals_init();
|
||||
|
||||
requested_features.max_closure = MAX_CLOSURE;
|
||||
if(!split_kernel.load_kernels(requested_features)) {
|
||||
thread_kernel_globals_free((KernelGlobals*)kgbuffer.device_pointer);
|
||||
mem_free(kgbuffer);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
while(task.acquire_tile(this, tile)) {
|
||||
device_memory data;
|
||||
split_kernel.path_trace(&task, tile, kgbuffer, data);
|
||||
|
||||
task.release_tile(tile);
|
||||
|
||||
if(task_pool.canceled()) {
|
||||
if(task.need_finish_queue == false)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
thread_kernel_globals_free((KernelGlobals*)kgbuffer.device_pointer);
|
||||
mem_free(kgbuffer);
|
||||
}
|
||||
|
||||
void thread_film_convert(DeviceTask& task)
|
||||
{
|
||||
float sample_scale = 1.0f/(task.sample + 1);
|
||||
@@ -680,10 +496,6 @@ protected:
|
||||
|
||||
inline void thread_kernel_globals_free(KernelGlobals *kg)
|
||||
{
|
||||
if(kg == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
if(kg->transparent_shadow_intersections != NULL) {
|
||||
free(kg->transparent_shadow_intersections);
|
||||
}
|
||||
@@ -698,175 +510,8 @@ protected:
|
||||
OSLShader::thread_free(kg);
|
||||
#endif
|
||||
}
|
||||
|
||||
virtual bool load_kernels(DeviceRequestedFeatures& requested_features_) {
|
||||
requested_features = requested_features_;
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
/* split kernel */
|
||||
|
||||
class CPUSplitKernelFunction : public SplitKernelFunction {
|
||||
public:
|
||||
CPUDevice* device;
|
||||
void (*func)(KernelGlobals *kg, KernelData *data);
|
||||
|
||||
CPUSplitKernelFunction(CPUDevice* device) : device(device), func(NULL) {}
|
||||
~CPUSplitKernelFunction() {}
|
||||
|
||||
virtual bool enqueue(const KernelDimensions& dim, device_memory& kernel_globals, device_memory& data)
|
||||
{
|
||||
if(!func) {
|
||||
return false;
|
||||
}
|
||||
|
||||
KernelGlobals *kg = (KernelGlobals*)kernel_globals.device_pointer;
|
||||
kg->global_size = make_int2(dim.global_size[0], dim.global_size[1]);
|
||||
|
||||
for(int y = 0; y < dim.global_size[1]; y++) {
|
||||
for(int x = 0; x < dim.global_size[0]; x++) {
|
||||
kg->global_id = make_int2(x, y);
|
||||
|
||||
func(kg, (KernelData*)data.device_pointer);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
CPUSplitKernel::CPUSplitKernel(CPUDevice *device) : DeviceSplitKernel(device), device(device)
|
||||
{
|
||||
}
|
||||
|
||||
bool CPUSplitKernel::enqueue_split_kernel_data_init(const KernelDimensions& dim,
|
||||
RenderTile& rtile,
|
||||
int num_global_elements,
|
||||
device_memory& kernel_globals,
|
||||
device_memory& data,
|
||||
device_memory& split_data,
|
||||
device_memory& ray_state,
|
||||
device_memory& queue_index,
|
||||
device_memory& use_queues_flags,
|
||||
device_memory& work_pool_wgs)
|
||||
{
|
||||
typedef void(*data_init_t)(KernelGlobals *kg,
|
||||
ccl_constant KernelData *data,
|
||||
ccl_global void *split_data_buffer,
|
||||
int num_elements,
|
||||
ccl_global char *ray_state,
|
||||
ccl_global uint *rng_state,
|
||||
int start_sample,
|
||||
int end_sample,
|
||||
int sx, int sy, int sw, int sh, int offset, int stride,
|
||||
ccl_global int *Queue_index,
|
||||
int queuesize,
|
||||
ccl_global char *use_queues_flag,
|
||||
ccl_global unsigned int *work_pool_wgs,
|
||||
unsigned int num_samples,
|
||||
ccl_global float *buffer);
|
||||
|
||||
data_init_t data_init;
|
||||
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
|
||||
if(system_cpu_support_avx2()) {
|
||||
data_init = kernel_cpu_avx2_data_init;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
|
||||
if(system_cpu_support_avx()) {
|
||||
data_init = kernel_cpu_avx_data_init;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
|
||||
if(system_cpu_support_sse41()) {
|
||||
data_init = kernel_cpu_sse41_data_init;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
|
||||
if(system_cpu_support_sse3()) {
|
||||
data_init = kernel_cpu_sse3_data_init;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
|
||||
if(system_cpu_support_sse2()) {
|
||||
data_init = kernel_cpu_sse2_data_init;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
data_init = kernel_cpu_data_init;
|
||||
}
|
||||
|
||||
KernelGlobals *kg = (KernelGlobals*)kernel_globals.device_pointer;
|
||||
kg->global_size = make_int2(dim.global_size[0], dim.global_size[1]);
|
||||
|
||||
for(int y = 0; y < dim.global_size[1]; y++) {
|
||||
for(int x = 0; x < dim.global_size[0]; x++) {
|
||||
kg->global_id = make_int2(x, y);
|
||||
|
||||
data_init((KernelGlobals*)kernel_globals.device_pointer,
|
||||
(KernelData*)data.device_pointer,
|
||||
(void*)split_data.device_pointer,
|
||||
num_global_elements,
|
||||
(char*)ray_state.device_pointer,
|
||||
(uint*)rtile.rng_state,
|
||||
rtile.start_sample,
|
||||
rtile.start_sample + rtile.num_samples,
|
||||
rtile.x,
|
||||
rtile.y,
|
||||
rtile.w,
|
||||
rtile.h,
|
||||
rtile.offset,
|
||||
rtile.stride,
|
||||
(int*)queue_index.device_pointer,
|
||||
dim.global_size[0] * dim.global_size[1],
|
||||
(char*)use_queues_flags.device_pointer,
|
||||
(uint*)work_pool_wgs.device_pointer,
|
||||
rtile.num_samples,
|
||||
(float*)rtile.buffer);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
SplitKernelFunction* CPUSplitKernel::get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&)
|
||||
{
|
||||
CPUSplitKernelFunction *kernel = new CPUSplitKernelFunction(device);
|
||||
|
||||
kernel->func = device->get_kernel_function<void(*)(KernelGlobals*, KernelData*)>(kernel_name);
|
||||
if(!kernel->func) {
|
||||
delete kernel;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return kernel;
|
||||
}
|
||||
|
||||
int2 CPUSplitKernel::split_kernel_local_size()
|
||||
{
|
||||
return make_int2(1, 1);
|
||||
}
|
||||
|
||||
int2 CPUSplitKernel::split_kernel_global_size(device_memory& /*kg*/, device_memory& /*data*/, DeviceTask * /*task*/) {
|
||||
return make_int2(64, 1);
|
||||
}
|
||||
|
||||
uint64_t CPUSplitKernel::state_buffer_size(device_memory& kernel_globals, device_memory& /*data*/, size_t num_threads) {
|
||||
KernelGlobals *kg = (KernelGlobals*)kernel_globals.device_pointer;
|
||||
|
||||
return split_data_buffer_size(kg, num_threads);
|
||||
}
|
||||
|
||||
unordered_map<string, void*> CPUDevice::kernel_functions;
|
||||
|
||||
Device *device_cpu_create(DeviceInfo& info, Stats &stats, bool background)
|
||||
{
|
||||
return new CPUDevice(info, stats, background);
|
||||
|
@@ -15,14 +15,12 @@
|
||||
*/
|
||||
|
||||
#include <climits>
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "device.h"
|
||||
#include "device_intern.h"
|
||||
#include "device_split_kernel.h"
|
||||
|
||||
#include "buffers.h"
|
||||
|
||||
@@ -44,8 +42,6 @@
|
||||
#include "util_types.h"
|
||||
#include "util_time.h"
|
||||
|
||||
#include "split/kernel_split_data_types.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#ifndef WITH_CUDA_DYNLOAD
|
||||
@@ -82,31 +78,6 @@ int cuewCompilerVersion(void)
|
||||
} /* namespace */
|
||||
#endif /* WITH_CUDA_DYNLOAD */
|
||||
|
||||
class CUDADevice;
|
||||
|
||||
class CUDASplitKernel : public DeviceSplitKernel {
|
||||
CUDADevice *device;
|
||||
public:
|
||||
explicit CUDASplitKernel(CUDADevice *device);
|
||||
|
||||
virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads);
|
||||
|
||||
virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
|
||||
RenderTile& rtile,
|
||||
int num_global_elements,
|
||||
device_memory& kernel_globals,
|
||||
device_memory& kernel_data_,
|
||||
device_memory& split_data,
|
||||
device_memory& ray_state,
|
||||
device_memory& queue_index,
|
||||
device_memory& use_queues_flag,
|
||||
device_memory& work_pool_wgs);
|
||||
|
||||
virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&);
|
||||
virtual int2 split_kernel_local_size();
|
||||
virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task);
|
||||
};
|
||||
|
||||
class CUDADevice : public Device
|
||||
{
|
||||
public:
|
||||
@@ -144,12 +115,6 @@ public:
|
||||
return path_exists(cubins_path);
|
||||
}
|
||||
|
||||
virtual bool show_samples() const
|
||||
{
|
||||
/* The CUDADevice only processes one tile at a time, so showing samples is fine. */
|
||||
return true;
|
||||
}
|
||||
|
||||
/*#ifdef NDEBUG
|
||||
#define cuda_abort()
|
||||
#else
|
||||
@@ -159,7 +124,7 @@ public:
|
||||
{
|
||||
if(first_error) {
|
||||
fprintf(stderr, "\nRefer to the Cycles GPU rendering documentation for possible solutions:\n");
|
||||
fprintf(stderr, "https://docs.blender.org/manual/en/dev/render/cycles/gpu_rendering.html\n\n");
|
||||
fprintf(stderr, "http://www.blender.org/manual/render/cycles/gpu_rendering.html\n\n");
|
||||
first_error = false;
|
||||
}
|
||||
}
|
||||
@@ -287,16 +252,11 @@ public:
|
||||
return DebugFlags().cuda.adaptive_compile;
|
||||
}
|
||||
|
||||
bool use_split_kernel()
|
||||
{
|
||||
return DebugFlags().cuda.split_kernel;
|
||||
}
|
||||
|
||||
/* Common NVCC flags which stays the same regardless of shading model,
|
||||
* kernel sources md5 and only depends on compiler or compilation settings.
|
||||
*/
|
||||
string compile_kernel_get_common_cflags(
|
||||
const DeviceRequestedFeatures& requested_features, bool split=false)
|
||||
const DeviceRequestedFeatures& requested_features)
|
||||
{
|
||||
const int cuda_version = cuewCompilerVersion();
|
||||
const int machine = system_cpu_bits();
|
||||
@@ -321,11 +281,6 @@ public:
|
||||
#ifdef WITH_CYCLES_DEBUG
|
||||
cflags += " -D__KERNEL_DEBUG__";
|
||||
#endif
|
||||
|
||||
if(split) {
|
||||
cflags += " -D__SPLIT__";
|
||||
}
|
||||
|
||||
return cflags;
|
||||
}
|
||||
|
||||
@@ -359,7 +314,7 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
string compile_kernel(const DeviceRequestedFeatures& requested_features, bool split=false)
|
||||
string compile_kernel(const DeviceRequestedFeatures& requested_features)
|
||||
{
|
||||
/* Compute cubin name. */
|
||||
int major, minor;
|
||||
@@ -368,8 +323,7 @@ public:
|
||||
|
||||
/* Attempt to use kernel provided with Blender. */
|
||||
if(!use_adaptive_compilation()) {
|
||||
const string cubin = path_get(string_printf(split ? "lib/kernel_split_sm_%d%d.cubin"
|
||||
: "lib/kernel_sm_%d%d.cubin",
|
||||
const string cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin",
|
||||
major, minor));
|
||||
VLOG(1) << "Testing for pre-compiled kernel " << cubin << ".";
|
||||
if(path_exists(cubin)) {
|
||||
@@ -379,7 +333,7 @@ public:
|
||||
}
|
||||
|
||||
const string common_cflags =
|
||||
compile_kernel_get_common_cflags(requested_features, split);
|
||||
compile_kernel_get_common_cflags(requested_features);
|
||||
|
||||
/* Try to use locally compiled kernel. */
|
||||
const string kernel_path = path_get("kernel");
|
||||
@@ -390,8 +344,7 @@ public:
|
||||
*/
|
||||
const string cubin_md5 = util_md5_string(kernel_md5 + common_cflags);
|
||||
|
||||
const string cubin_file = string_printf(split ? "cycles_kernel_split_sm%d%d_%s.cubin"
|
||||
: "cycles_kernel_sm%d%d_%s.cubin",
|
||||
const string cubin_file = string_printf("cycles_kernel_sm%d%d_%s.cubin",
|
||||
major, minor,
|
||||
cubin_md5.c_str());
|
||||
const string cubin = path_cache_get(path_join("kernels", cubin_file));
|
||||
@@ -426,7 +379,7 @@ public:
|
||||
const char *nvcc = cuewCompilerPath();
|
||||
const string kernel = path_join(kernel_path,
|
||||
path_join("kernels",
|
||||
path_join("cuda", split ? "kernel_split.cu" : "kernel.cu")));
|
||||
path_join("cuda", "kernel.cu")));
|
||||
double starttime = time_dt();
|
||||
printf("Compiling CUDA kernel ...\n");
|
||||
|
||||
@@ -474,7 +427,7 @@ public:
|
||||
return false;
|
||||
|
||||
/* get kernel */
|
||||
string cubin = compile_kernel(requested_features, use_split_kernel());
|
||||
string cubin = compile_kernel(requested_features);
|
||||
|
||||
if(cubin == "")
|
||||
return false;
|
||||
@@ -507,14 +460,8 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void mem_alloc(const char *name, device_memory& mem, MemoryType /*type*/)
|
||||
void mem_alloc(device_memory& mem, MemoryType /*type*/)
|
||||
{
|
||||
if(name) {
|
||||
VLOG(1) << "Buffer allocate: " << name << ", "
|
||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||
}
|
||||
|
||||
cuda_push_context();
|
||||
CUdeviceptr device_pointer;
|
||||
size_t size = mem.memory_size();
|
||||
@@ -551,9 +498,7 @@ public:
|
||||
|
||||
void mem_zero(device_memory& mem)
|
||||
{
|
||||
if(mem.data_pointer) {
|
||||
memset((void*)mem.data_pointer, 0, mem.memory_size());
|
||||
}
|
||||
memset((void*)mem.data_pointer, 0, mem.memory_size());
|
||||
|
||||
cuda_push_context();
|
||||
if(mem.device_pointer)
|
||||
@@ -666,7 +611,7 @@ public:
|
||||
/* Data Storage */
|
||||
if(interpolation == INTERPOLATION_NONE) {
|
||||
if(has_bindless_textures) {
|
||||
mem_alloc(NULL, mem, MEM_READ_ONLY);
|
||||
mem_alloc(mem, MEM_READ_ONLY);
|
||||
mem_copy_to(mem);
|
||||
|
||||
cuda_push_context();
|
||||
@@ -690,7 +635,7 @@ public:
|
||||
cuda_pop_context();
|
||||
}
|
||||
else {
|
||||
mem_alloc(NULL, mem, MEM_READ_ONLY);
|
||||
mem_alloc(mem, MEM_READ_ONLY);
|
||||
mem_copy_to(mem);
|
||||
|
||||
cuda_push_context();
|
||||
@@ -1307,48 +1252,25 @@ public:
|
||||
/* Upload Bindless Mapping */
|
||||
load_bindless_mapping();
|
||||
|
||||
if(!use_split_kernel()) {
|
||||
/* keep rendering tiles until done */
|
||||
while(task->acquire_tile(this, tile)) {
|
||||
int start_sample = tile.start_sample;
|
||||
int end_sample = tile.start_sample + tile.num_samples;
|
||||
|
||||
for(int sample = start_sample; sample < end_sample; sample++) {
|
||||
if(task->get_cancel()) {
|
||||
if(task->need_finish_queue == false)
|
||||
break;
|
||||
}
|
||||
|
||||
path_trace(tile, sample, branched);
|
||||
|
||||
tile.sample = sample + 1;
|
||||
|
||||
task->update_progress(&tile, tile.w*tile.h);
|
||||
}
|
||||
|
||||
task->release_tile(tile);
|
||||
}
|
||||
}
|
||||
else {
|
||||
DeviceRequestedFeatures requested_features;
|
||||
if(!use_adaptive_compilation()) {
|
||||
requested_features.max_closure = 64;
|
||||
}
|
||||
|
||||
CUDASplitKernel split_kernel(this);
|
||||
split_kernel.load_kernels(requested_features);
|
||||
|
||||
while(task->acquire_tile(this, tile)) {
|
||||
device_memory void_buffer;
|
||||
split_kernel.path_trace(task, tile, void_buffer, void_buffer);
|
||||
|
||||
task->release_tile(tile);
|
||||
/* keep rendering tiles until done */
|
||||
while(task->acquire_tile(this, tile)) {
|
||||
int start_sample = tile.start_sample;
|
||||
int end_sample = tile.start_sample + tile.num_samples;
|
||||
|
||||
for(int sample = start_sample; sample < end_sample; sample++) {
|
||||
if(task->get_cancel()) {
|
||||
if(task->need_finish_queue == false)
|
||||
break;
|
||||
}
|
||||
|
||||
path_trace(tile, sample, branched);
|
||||
|
||||
tile.sample = sample + 1;
|
||||
|
||||
task->update_progress(&tile);
|
||||
}
|
||||
|
||||
task->release_tile(tile);
|
||||
}
|
||||
}
|
||||
else if(task->type == DeviceTask::SHADER) {
|
||||
@@ -1401,223 +1323,8 @@ public:
|
||||
{
|
||||
task_pool.cancel();
|
||||
}
|
||||
|
||||
friend class CUDASplitKernelFunction;
|
||||
friend class CUDASplitKernel;
|
||||
};
|
||||
|
||||
/* redefine the cuda_assert macro so it can be used outside of the CUDADevice class
|
||||
* now that the definition of that class is complete
|
||||
*/
|
||||
#undef cuda_assert
|
||||
#define cuda_assert(stmt) \
|
||||
{ \
|
||||
CUresult result = stmt; \
|
||||
\
|
||||
if(result != CUDA_SUCCESS) { \
|
||||
string message = string_printf("CUDA error: %s in %s", cuewErrorString(result), #stmt); \
|
||||
if(device->error_msg == "") \
|
||||
device->error_msg = message; \
|
||||
fprintf(stderr, "%s\n", message.c_str()); \
|
||||
/*cuda_abort();*/ \
|
||||
device->cuda_error_documentation(); \
|
||||
} \
|
||||
} (void)0
|
||||
|
||||
/* split kernel */
|
||||
|
||||
class CUDASplitKernelFunction : public SplitKernelFunction{
|
||||
CUDADevice* device;
|
||||
CUfunction func;
|
||||
public:
|
||||
CUDASplitKernelFunction(CUDADevice *device, CUfunction func) : device(device), func(func) {}
|
||||
|
||||
/* enqueue the kernel, returns false if there is an error */
|
||||
bool enqueue(const KernelDimensions &dim, device_memory &/*kg*/, device_memory &/*data*/)
|
||||
{
|
||||
return enqueue(dim, NULL);
|
||||
}
|
||||
|
||||
/* enqueue the kernel, returns false if there is an error */
|
||||
bool enqueue(const KernelDimensions &dim, void *args[])
|
||||
{
|
||||
device->cuda_push_context();
|
||||
|
||||
if(device->have_error())
|
||||
return false;
|
||||
|
||||
/* we ignore dim.local_size for now, as this is faster */
|
||||
int threads_per_block;
|
||||
cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func));
|
||||
|
||||
int xthreads = (int)sqrt(threads_per_block);
|
||||
int ythreads = (int)sqrt(threads_per_block);
|
||||
|
||||
int xblocks = (dim.global_size[0] + xthreads - 1)/xthreads;
|
||||
int yblocks = (dim.global_size[1] + ythreads - 1)/ythreads;
|
||||
|
||||
cuda_assert(cuFuncSetCacheConfig(func, CU_FUNC_CACHE_PREFER_L1));
|
||||
|
||||
cuda_assert(cuLaunchKernel(func,
|
||||
xblocks , yblocks, 1, /* blocks */
|
||||
xthreads, ythreads, 1, /* threads */
|
||||
0, 0, args, 0));
|
||||
|
||||
device->cuda_pop_context();
|
||||
|
||||
return !device->have_error();
|
||||
}
|
||||
};
|
||||
|
||||
CUDASplitKernel::CUDASplitKernel(CUDADevice *device) : DeviceSplitKernel(device), device(device)
|
||||
{
|
||||
}
|
||||
|
||||
uint64_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory& /*data*/, size_t num_threads)
|
||||
{
|
||||
device_vector<uint64_t> size_buffer;
|
||||
size_buffer.resize(1);
|
||||
device->mem_alloc(NULL, size_buffer, MEM_READ_WRITE);
|
||||
|
||||
device->cuda_push_context();
|
||||
|
||||
uint threads = num_threads;
|
||||
CUdeviceptr d_size = device->cuda_device_ptr(size_buffer.device_pointer);
|
||||
|
||||
struct args_t {
|
||||
uint* num_threads;
|
||||
CUdeviceptr* size;
|
||||
};
|
||||
|
||||
args_t args = {
|
||||
&threads,
|
||||
&d_size
|
||||
};
|
||||
|
||||
CUfunction state_buffer_size;
|
||||
cuda_assert(cuModuleGetFunction(&state_buffer_size, device->cuModule, "kernel_cuda_state_buffer_size"));
|
||||
|
||||
cuda_assert(cuLaunchKernel(state_buffer_size,
|
||||
1, 1, 1,
|
||||
1, 1, 1,
|
||||
0, 0, &args, 0));
|
||||
|
||||
device->cuda_pop_context();
|
||||
|
||||
device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint64_t));
|
||||
device->mem_free(size_buffer);
|
||||
|
||||
return *size_buffer.get_data();
|
||||
}
|
||||
|
||||
bool CUDASplitKernel::enqueue_split_kernel_data_init(const KernelDimensions& dim,
|
||||
RenderTile& rtile,
|
||||
int num_global_elements,
|
||||
device_memory& /*kernel_globals*/,
|
||||
device_memory& /*kernel_data*/,
|
||||
device_memory& split_data,
|
||||
device_memory& ray_state,
|
||||
device_memory& queue_index,
|
||||
device_memory& use_queues_flag,
|
||||
device_memory& work_pool_wgs)
|
||||
{
|
||||
device->cuda_push_context();
|
||||
|
||||
CUdeviceptr d_split_data = device->cuda_device_ptr(split_data.device_pointer);
|
||||
CUdeviceptr d_ray_state = device->cuda_device_ptr(ray_state.device_pointer);
|
||||
CUdeviceptr d_queue_index = device->cuda_device_ptr(queue_index.device_pointer);
|
||||
CUdeviceptr d_use_queues_flag = device->cuda_device_ptr(use_queues_flag.device_pointer);
|
||||
CUdeviceptr d_work_pool_wgs = device->cuda_device_ptr(work_pool_wgs.device_pointer);
|
||||
|
||||
CUdeviceptr d_rng_state = device->cuda_device_ptr(rtile.rng_state);
|
||||
CUdeviceptr d_buffer = device->cuda_device_ptr(rtile.buffer);
|
||||
|
||||
int end_sample = rtile.start_sample + rtile.num_samples;
|
||||
int queue_size = dim.global_size[0] * dim.global_size[1];
|
||||
|
||||
struct args_t {
|
||||
CUdeviceptr* split_data_buffer;
|
||||
int* num_elements;
|
||||
CUdeviceptr* ray_state;
|
||||
CUdeviceptr* rng_state;
|
||||
int* start_sample;
|
||||
int* end_sample;
|
||||
int* sx;
|
||||
int* sy;
|
||||
int* sw;
|
||||
int* sh;
|
||||
int* offset;
|
||||
int* stride;
|
||||
CUdeviceptr* queue_index;
|
||||
int* queuesize;
|
||||
CUdeviceptr* use_queues_flag;
|
||||
CUdeviceptr* work_pool_wgs;
|
||||
int* num_samples;
|
||||
CUdeviceptr* buffer;
|
||||
};
|
||||
|
||||
args_t args = {
|
||||
&d_split_data,
|
||||
&num_global_elements,
|
||||
&d_ray_state,
|
||||
&d_rng_state,
|
||||
&rtile.start_sample,
|
||||
&end_sample,
|
||||
&rtile.x,
|
||||
&rtile.y,
|
||||
&rtile.w,
|
||||
&rtile.h,
|
||||
&rtile.offset,
|
||||
&rtile.stride,
|
||||
&d_queue_index,
|
||||
&queue_size,
|
||||
&d_use_queues_flag,
|
||||
&d_work_pool_wgs,
|
||||
&rtile.num_samples,
|
||||
&d_buffer
|
||||
};
|
||||
|
||||
CUfunction data_init;
|
||||
cuda_assert(cuModuleGetFunction(&data_init, device->cuModule, "kernel_cuda_path_trace_data_init"));
|
||||
if(device->have_error()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
CUDASplitKernelFunction(device, data_init).enqueue(dim, (void**)&args);
|
||||
|
||||
device->cuda_pop_context();
|
||||
|
||||
return !device->have_error();
|
||||
}
|
||||
|
||||
SplitKernelFunction* CUDASplitKernel::get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&)
|
||||
{
|
||||
CUfunction func;
|
||||
|
||||
device->cuda_push_context();
|
||||
|
||||
cuda_assert(cuModuleGetFunction(&func, device->cuModule, (string("kernel_cuda_") + kernel_name).data()));
|
||||
if(device->have_error()) {
|
||||
device->cuda_error_message(string_printf("kernel \"kernel_cuda_%s\" not found in module", kernel_name.data()));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
device->cuda_pop_context();
|
||||
|
||||
return new CUDASplitKernelFunction(device, func);
|
||||
}
|
||||
|
||||
int2 CUDASplitKernel::split_kernel_local_size()
|
||||
{
|
||||
return make_int2(32, 1);
|
||||
}
|
||||
|
||||
int2 CUDASplitKernel::split_kernel_global_size(device_memory& /*kg*/, device_memory& /*data*/, DeviceTask */*task*/)
|
||||
{
|
||||
/* TODO(mai): implement something here to detect ideal work size */
|
||||
return make_int2(256, 256);
|
||||
}
|
||||
|
||||
bool device_cuda_init(void)
|
||||
{
|
||||
#ifdef WITH_CUDA_DYNLOAD
|
||||
|
@@ -48,8 +48,7 @@ enum DataType {
|
||||
TYPE_UINT,
|
||||
TYPE_INT,
|
||||
TYPE_FLOAT,
|
||||
TYPE_HALF,
|
||||
TYPE_UINT64,
|
||||
TYPE_HALF
|
||||
};
|
||||
|
||||
static inline size_t datatype_size(DataType datatype)
|
||||
@@ -60,7 +59,6 @@ static inline size_t datatype_size(DataType datatype)
|
||||
case TYPE_UINT: return sizeof(uint);
|
||||
case TYPE_INT: return sizeof(int);
|
||||
case TYPE_HALF: return sizeof(half);
|
||||
case TYPE_UINT64: return sizeof(uint64_t);
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
@@ -162,11 +160,6 @@ template<> struct device_type_traits<half4> {
|
||||
static const int num_elements = 4;
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uint64_t> {
|
||||
static const DataType data_type = TYPE_UINT64;
|
||||
static const int num_elements = 1;
|
||||
};
|
||||
|
||||
/* Device Memory */
|
||||
|
||||
class device_memory
|
||||
@@ -187,27 +180,10 @@ public:
|
||||
/* device pointer */
|
||||
device_ptr device_pointer;
|
||||
|
||||
device_memory()
|
||||
{
|
||||
data_type = device_type_traits<uchar>::data_type;
|
||||
data_elements = device_type_traits<uchar>::num_elements;
|
||||
data_pointer = 0;
|
||||
data_size = 0;
|
||||
device_size = 0;
|
||||
data_width = 0;
|
||||
data_height = 0;
|
||||
data_depth = 0;
|
||||
device_pointer = 0;
|
||||
}
|
||||
protected:
|
||||
device_memory() {}
|
||||
virtual ~device_memory() { assert(!device_pointer); }
|
||||
|
||||
void resize(size_t size)
|
||||
{
|
||||
data_size = size;
|
||||
data_width = size;
|
||||
}
|
||||
|
||||
protected:
|
||||
/* no copying */
|
||||
device_memory(const device_memory&);
|
||||
device_memory& operator = (const device_memory&);
|
||||
@@ -222,8 +198,16 @@ public:
|
||||
{
|
||||
data_type = device_type_traits<T>::data_type;
|
||||
data_elements = device_type_traits<T>::num_elements;
|
||||
data_pointer = 0;
|
||||
data_size = 0;
|
||||
device_size = 0;
|
||||
data_width = 0;
|
||||
data_height = 0;
|
||||
data_depth = 0;
|
||||
|
||||
assert(data_elements > 0);
|
||||
|
||||
device_pointer = 0;
|
||||
}
|
||||
|
||||
virtual ~device_vector() {}
|
||||
@@ -282,7 +266,6 @@ public:
|
||||
data_height = 0;
|
||||
data_depth = 0;
|
||||
data_size = 0;
|
||||
device_pointer = 0;
|
||||
}
|
||||
|
||||
size_t size()
|
||||
|
@@ -89,14 +89,6 @@ public:
|
||||
return error_msg;
|
||||
}
|
||||
|
||||
virtual bool show_samples() const
|
||||
{
|
||||
if(devices.size() > 1) {
|
||||
return false;
|
||||
}
|
||||
return devices.front().device->show_samples();
|
||||
}
|
||||
|
||||
bool load_kernels(const DeviceRequestedFeatures& requested_features)
|
||||
{
|
||||
foreach(SubDevice& sub, devices)
|
||||
@@ -106,11 +98,11 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
void mem_alloc(const char *name, device_memory& mem, MemoryType type)
|
||||
void mem_alloc(device_memory& mem, MemoryType type)
|
||||
{
|
||||
foreach(SubDevice& sub, devices) {
|
||||
mem.device_pointer = 0;
|
||||
sub.device->mem_alloc(name, mem, type);
|
||||
sub.device->mem_alloc(mem, type);
|
||||
sub.ptr_map[unique_ptr] = mem.device_pointer;
|
||||
}
|
||||
|
||||
@@ -162,7 +154,6 @@ public:
|
||||
void mem_free(device_memory& mem)
|
||||
{
|
||||
device_ptr tmp = mem.device_pointer;
|
||||
stats.mem_free(mem.device_size);
|
||||
|
||||
foreach(SubDevice& sub, devices) {
|
||||
mem.device_pointer = sub.ptr_map[tmp];
|
||||
@@ -171,6 +162,7 @@ public:
|
||||
}
|
||||
|
||||
mem.device_pointer = 0;
|
||||
stats.mem_free(mem.device_size);
|
||||
}
|
||||
|
||||
void const_copy_to(const char *name, void *host, size_t size)
|
||||
@@ -202,7 +194,6 @@ public:
|
||||
void tex_free(device_memory& mem)
|
||||
{
|
||||
device_ptr tmp = mem.device_pointer;
|
||||
stats.mem_free(mem.device_size);
|
||||
|
||||
foreach(SubDevice& sub, devices) {
|
||||
mem.device_pointer = sub.ptr_map[tmp];
|
||||
@@ -211,6 +202,7 @@ public:
|
||||
}
|
||||
|
||||
mem.device_pointer = 0;
|
||||
stats.mem_free(mem.device_size);
|
||||
}
|
||||
|
||||
void pixels_alloc(device_memory& mem)
|
||||
|
@@ -51,11 +51,6 @@ public:
|
||||
|
||||
thread_mutex rpc_lock;
|
||||
|
||||
virtual bool show_samples() const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
NetworkDevice(DeviceInfo& info, Stats &stats, const char *address)
|
||||
: Device(info, stats, true), socket(io_service)
|
||||
{
|
||||
@@ -87,14 +82,8 @@ public:
|
||||
snd.write();
|
||||
}
|
||||
|
||||
void mem_alloc(const char *name, device_memory& mem, MemoryType type)
|
||||
void mem_alloc(device_memory& mem, MemoryType type)
|
||||
{
|
||||
if(name) {
|
||||
VLOG(1) << "Buffer allocate: " << name << ", "
|
||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||
}
|
||||
|
||||
thread_scoped_lock lock(rpc_lock);
|
||||
|
||||
mem.device_pointer = ++mem_counter;
|
||||
@@ -487,7 +476,7 @@ protected:
|
||||
mem.data_pointer = 0;
|
||||
|
||||
/* perform the allocation on the actual device */
|
||||
device->mem_alloc(NULL, mem, type);
|
||||
device->mem_alloc(mem, type);
|
||||
|
||||
/* store a mapping to/from client_pointer and real device pointer */
|
||||
pointer_mapping_insert(client_pointer, mem.device_pointer);
|
||||
|
@@ -1,306 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011-2016 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "device_split_kernel.h"
|
||||
|
||||
#include "kernel_types.h"
|
||||
#include "kernel_split_data_types.h"
|
||||
|
||||
#include "util_time.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
static const double alpha = 0.1; /* alpha for rolling average */
|
||||
|
||||
DeviceSplitKernel::DeviceSplitKernel(Device *device) : device(device)
|
||||
{
|
||||
current_max_closure = -1;
|
||||
first_tile = true;
|
||||
|
||||
avg_time_per_sample = 0.0;
|
||||
|
||||
kernel_path_init = NULL;
|
||||
kernel_scene_intersect = NULL;
|
||||
kernel_lamp_emission = NULL;
|
||||
kernel_do_volume = NULL;
|
||||
kernel_queue_enqueue = NULL;
|
||||
kernel_indirect_background = NULL;
|
||||
kernel_shader_eval = NULL;
|
||||
kernel_holdout_emission_blurring_pathtermination_ao = NULL;
|
||||
kernel_subsurface_scatter = NULL;
|
||||
kernel_direct_lighting = NULL;
|
||||
kernel_shadow_blocked_ao = NULL;
|
||||
kernel_shadow_blocked_dl = NULL;
|
||||
kernel_next_iteration_setup = NULL;
|
||||
kernel_indirect_subsurface = NULL;
|
||||
kernel_buffer_update = NULL;
|
||||
}
|
||||
|
||||
DeviceSplitKernel::~DeviceSplitKernel()
|
||||
{
|
||||
device->mem_free(split_data);
|
||||
device->mem_free(ray_state);
|
||||
device->mem_free(use_queues_flag);
|
||||
device->mem_free(queue_index);
|
||||
device->mem_free(work_pool_wgs);
|
||||
|
||||
delete kernel_path_init;
|
||||
delete kernel_scene_intersect;
|
||||
delete kernel_lamp_emission;
|
||||
delete kernel_do_volume;
|
||||
delete kernel_queue_enqueue;
|
||||
delete kernel_indirect_background;
|
||||
delete kernel_shader_eval;
|
||||
delete kernel_holdout_emission_blurring_pathtermination_ao;
|
||||
delete kernel_subsurface_scatter;
|
||||
delete kernel_direct_lighting;
|
||||
delete kernel_shadow_blocked_ao;
|
||||
delete kernel_shadow_blocked_dl;
|
||||
delete kernel_next_iteration_setup;
|
||||
delete kernel_indirect_subsurface;
|
||||
delete kernel_buffer_update;
|
||||
}
|
||||
|
||||
bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_features)
|
||||
{
|
||||
#define LOAD_KERNEL(name) \
|
||||
kernel_##name = get_split_kernel_function(#name, requested_features); \
|
||||
if(!kernel_##name) { \
|
||||
return false; \
|
||||
}
|
||||
|
||||
LOAD_KERNEL(path_init);
|
||||
LOAD_KERNEL(scene_intersect);
|
||||
LOAD_KERNEL(lamp_emission);
|
||||
LOAD_KERNEL(do_volume);
|
||||
LOAD_KERNEL(queue_enqueue);
|
||||
LOAD_KERNEL(indirect_background);
|
||||
LOAD_KERNEL(shader_eval);
|
||||
LOAD_KERNEL(holdout_emission_blurring_pathtermination_ao);
|
||||
LOAD_KERNEL(subsurface_scatter);
|
||||
LOAD_KERNEL(direct_lighting);
|
||||
LOAD_KERNEL(shadow_blocked_ao);
|
||||
LOAD_KERNEL(shadow_blocked_dl);
|
||||
LOAD_KERNEL(next_iteration_setup);
|
||||
LOAD_KERNEL(indirect_subsurface);
|
||||
LOAD_KERNEL(buffer_update);
|
||||
|
||||
#undef LOAD_KERNEL
|
||||
|
||||
current_max_closure = requested_features.max_closure;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t DeviceSplitKernel::max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, uint64_t max_buffer_size)
|
||||
{
|
||||
uint64_t size_per_element = state_buffer_size(kg, data, 1024) / 1024;
|
||||
return max_buffer_size / size_per_element;
|
||||
}
|
||||
|
||||
bool DeviceSplitKernel::path_trace(DeviceTask *task,
|
||||
RenderTile& tile,
|
||||
device_memory& kgbuffer,
|
||||
device_memory& kernel_data)
|
||||
{
|
||||
if(device->have_error()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Get local size */
|
||||
size_t local_size[2];
|
||||
{
|
||||
int2 lsize = split_kernel_local_size();
|
||||
local_size[0] = lsize[0];
|
||||
local_size[1] = lsize[1];
|
||||
}
|
||||
|
||||
/* Set gloabl size */
|
||||
size_t global_size[2];
|
||||
{
|
||||
int2 gsize = split_kernel_global_size(kgbuffer, kernel_data, task);
|
||||
|
||||
/* Make sure that set work size is a multiple of local
|
||||
* work size dimensions.
|
||||
*/
|
||||
global_size[0] = round_up(gsize[0], local_size[0]);
|
||||
global_size[1] = round_up(gsize[1], local_size[1]);
|
||||
}
|
||||
|
||||
/* Number of elements in the global state buffer */
|
||||
int num_global_elements = global_size[0] * global_size[1];
|
||||
assert(num_global_elements % WORK_POOL_SIZE == 0);
|
||||
|
||||
/* Allocate all required global memory once. */
|
||||
if(first_tile) {
|
||||
first_tile = false;
|
||||
|
||||
/* Calculate max groups */
|
||||
|
||||
/* Denotes the maximum work groups possible w.r.t. current requested tile size. */
|
||||
unsigned int max_work_groups = num_global_elements / WORK_POOL_SIZE + 1;
|
||||
|
||||
/* Allocate work_pool_wgs memory. */
|
||||
work_pool_wgs.resize(max_work_groups * sizeof(unsigned int));
|
||||
device->mem_alloc("work_pool_wgs", work_pool_wgs, MEM_READ_WRITE);
|
||||
|
||||
queue_index.resize(NUM_QUEUES * sizeof(int));
|
||||
device->mem_alloc("queue_index", queue_index, MEM_READ_WRITE);
|
||||
|
||||
use_queues_flag.resize(sizeof(char));
|
||||
device->mem_alloc("use_queues_flag", use_queues_flag, MEM_READ_WRITE);
|
||||
|
||||
ray_state.resize(num_global_elements);
|
||||
device->mem_alloc("ray_state", ray_state, MEM_READ_WRITE);
|
||||
|
||||
split_data.resize(state_buffer_size(kgbuffer, kernel_data, num_global_elements));
|
||||
device->mem_alloc("split_data", split_data, MEM_READ_WRITE);
|
||||
}
|
||||
|
||||
#define ENQUEUE_SPLIT_KERNEL(name, global_size, local_size) \
|
||||
if(device->have_error()) { \
|
||||
return false; \
|
||||
} \
|
||||
if(!kernel_##name->enqueue(KernelDimensions(global_size, local_size), kgbuffer, kernel_data)) { \
|
||||
return false; \
|
||||
}
|
||||
|
||||
tile.sample = tile.start_sample;
|
||||
|
||||
/* for exponential increase between tile updates */
|
||||
int time_multiplier = 1;
|
||||
|
||||
while(tile.sample < tile.start_sample + tile.num_samples) {
|
||||
/* to keep track of how long it takes to run a number of samples */
|
||||
double start_time = time_dt();
|
||||
|
||||
/* initial guess to start rolling average */
|
||||
const int initial_num_samples = 1;
|
||||
/* approx number of samples per second */
|
||||
int samples_per_second = (avg_time_per_sample > 0.0) ?
|
||||
int(double(time_multiplier) / avg_time_per_sample) + 1 : initial_num_samples;
|
||||
|
||||
RenderTile subtile = tile;
|
||||
subtile.start_sample = tile.sample;
|
||||
subtile.num_samples = min(samples_per_second, tile.start_sample + tile.num_samples - tile.sample);
|
||||
|
||||
if(device->have_error()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* reset state memory here as global size for data_init
|
||||
* kernel might not be large enough to do in kernel
|
||||
*/
|
||||
device->mem_zero(work_pool_wgs);
|
||||
device->mem_zero(split_data);
|
||||
device->mem_zero(ray_state);
|
||||
|
||||
if(!enqueue_split_kernel_data_init(KernelDimensions(global_size, local_size),
|
||||
subtile,
|
||||
num_global_elements,
|
||||
kgbuffer,
|
||||
kernel_data,
|
||||
split_data,
|
||||
ray_state,
|
||||
queue_index,
|
||||
use_queues_flag,
|
||||
work_pool_wgs))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
ENQUEUE_SPLIT_KERNEL(path_init, global_size, local_size);
|
||||
|
||||
bool activeRaysAvailable = true;
|
||||
|
||||
while(activeRaysAvailable) {
|
||||
/* Do path-iteration in host [Enqueue Path-iteration kernels. */
|
||||
for(int PathIter = 0; PathIter < 16; PathIter++) {
|
||||
ENQUEUE_SPLIT_KERNEL(scene_intersect, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(lamp_emission, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(do_volume, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(indirect_background, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(subsurface_scatter, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(shadow_blocked_ao, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(shadow_blocked_dl, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(next_iteration_setup, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(indirect_subsurface, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(buffer_update, global_size, local_size);
|
||||
|
||||
if(task->get_cancel()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/* Decide if we should exit path-iteration in host. */
|
||||
device->mem_copy_from(ray_state, 0, global_size[0] * global_size[1] * sizeof(char), 1, 1);
|
||||
|
||||
activeRaysAvailable = false;
|
||||
|
||||
for(int rayStateIter = 0; rayStateIter < global_size[0] * global_size[1]; ++rayStateIter) {
|
||||
int8_t state = ray_state.get_data()[rayStateIter];
|
||||
|
||||
if(state != RAY_INACTIVE) {
|
||||
if(state == RAY_INVALID) {
|
||||
/* Something went wrong, abort to avoid looping endlessly. */
|
||||
device->set_error("Split kernel error: invalid ray state");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Not all rays are RAY_INACTIVE. */
|
||||
activeRaysAvailable = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(task->get_cancel()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
double time_per_sample = ((time_dt()-start_time) / subtile.num_samples);
|
||||
|
||||
if(avg_time_per_sample == 0.0) {
|
||||
/* start rolling average */
|
||||
avg_time_per_sample = time_per_sample;
|
||||
}
|
||||
else {
|
||||
avg_time_per_sample = alpha*time_per_sample + (1.0-alpha)*avg_time_per_sample;
|
||||
}
|
||||
|
||||
#undef ENQUEUE_SPLIT_KERNEL
|
||||
|
||||
tile.sample += subtile.num_samples;
|
||||
task->update_progress(&tile, tile.w*tile.h*subtile.num_samples);
|
||||
|
||||
time_multiplier = min(time_multiplier << 1, 10);
|
||||
|
||||
if(task->get_cancel()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
|
@@ -1,132 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011-2016 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef __DEVICE_SPLIT_KERNEL_H__
|
||||
#define __DEVICE_SPLIT_KERNEL_H__
|
||||
|
||||
#include "device.h"
|
||||
#include "buffers.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* When allocate global memory in chunks. We may not be able to
|
||||
* allocate exactly "CL_DEVICE_MAX_MEM_ALLOC_SIZE" bytes in chunks;
|
||||
* Since some bytes may be needed for aligning chunks of memory;
|
||||
* This is the amount of memory that we dedicate for that purpose.
|
||||
*/
|
||||
#define DATA_ALLOCATION_MEM_FACTOR 5000000 //5MB
|
||||
|
||||
/* Types used for split kernel */
|
||||
|
||||
class KernelDimensions {
|
||||
public:
|
||||
size_t global_size[2];
|
||||
size_t local_size[2];
|
||||
|
||||
KernelDimensions(size_t global_size_[2], size_t local_size_[2])
|
||||
{
|
||||
memcpy(global_size, global_size_, sizeof(global_size));
|
||||
memcpy(local_size, local_size_, sizeof(local_size));
|
||||
}
|
||||
};
|
||||
|
||||
class SplitKernelFunction {
|
||||
public:
|
||||
virtual ~SplitKernelFunction() {}
|
||||
|
||||
/* enqueue the kernel, returns false if there is an error */
|
||||
virtual bool enqueue(const KernelDimensions& dim, device_memory& kg, device_memory& data) = 0;
|
||||
};
|
||||
|
||||
class DeviceSplitKernel {
|
||||
private:
|
||||
Device *device;
|
||||
|
||||
SplitKernelFunction *kernel_path_init;
|
||||
SplitKernelFunction *kernel_scene_intersect;
|
||||
SplitKernelFunction *kernel_lamp_emission;
|
||||
SplitKernelFunction *kernel_do_volume;
|
||||
SplitKernelFunction *kernel_queue_enqueue;
|
||||
SplitKernelFunction *kernel_indirect_background;
|
||||
SplitKernelFunction *kernel_shader_eval;
|
||||
SplitKernelFunction *kernel_holdout_emission_blurring_pathtermination_ao;
|
||||
SplitKernelFunction *kernel_subsurface_scatter;
|
||||
SplitKernelFunction *kernel_direct_lighting;
|
||||
SplitKernelFunction *kernel_shadow_blocked_ao;
|
||||
SplitKernelFunction *kernel_shadow_blocked_dl;
|
||||
SplitKernelFunction *kernel_next_iteration_setup;
|
||||
SplitKernelFunction *kernel_indirect_subsurface;
|
||||
SplitKernelFunction *kernel_buffer_update;
|
||||
|
||||
/* Global memory variables [porting]; These memory is used for
|
||||
* co-operation between different kernels; Data written by one
|
||||
* kernel will be available to another kernel via this global
|
||||
* memory.
|
||||
*/
|
||||
device_memory split_data;
|
||||
device_vector<uchar> ray_state;
|
||||
device_memory queue_index; /* Array of size num_queues * sizeof(int) that tracks the size of each queue. */
|
||||
|
||||
/* Flag to make sceneintersect and lampemission kernel use queues. */
|
||||
device_memory use_queues_flag;
|
||||
|
||||
/* Approximate time it takes to complete one sample */
|
||||
double avg_time_per_sample;
|
||||
|
||||
/* Work pool with respect to each work group. */
|
||||
device_memory work_pool_wgs;
|
||||
|
||||
/* clos_max value for which the kernels have been loaded currently. */
|
||||
int current_max_closure;
|
||||
|
||||
/* Marked True in constructor and marked false at the end of path_trace(). */
|
||||
bool first_tile;
|
||||
|
||||
public:
|
||||
explicit DeviceSplitKernel(Device* device);
|
||||
virtual ~DeviceSplitKernel();
|
||||
|
||||
bool load_kernels(const DeviceRequestedFeatures& requested_features);
|
||||
bool path_trace(DeviceTask *task,
|
||||
RenderTile& rtile,
|
||||
device_memory& kgbuffer,
|
||||
device_memory& kernel_data);
|
||||
|
||||
virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) = 0;
|
||||
size_t max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, uint64_t max_buffer_size);
|
||||
|
||||
virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
|
||||
RenderTile& rtile,
|
||||
int num_global_elements,
|
||||
device_memory& kernel_globals,
|
||||
device_memory& kernel_data_,
|
||||
device_memory& split_data,
|
||||
device_memory& ray_state,
|
||||
device_memory& queue_index,
|
||||
device_memory& use_queues_flag,
|
||||
device_memory& work_pool_wgs) = 0;
|
||||
|
||||
virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&) = 0;
|
||||
virtual int2 split_kernel_local_size() = 0;
|
||||
virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task) = 0;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* __DEVICE_SPLIT_KERNEL_H__ */
|
||||
|
||||
|
||||
|
@@ -19,8 +19,6 @@
|
||||
|
||||
#include "device_task.h"
|
||||
|
||||
#include "buffers.h"
|
||||
|
||||
#include "util_algorithm.h"
|
||||
#include "util_time.h"
|
||||
|
||||
@@ -101,18 +99,14 @@ void DeviceTask::split(list<DeviceTask>& tasks, int num, int max_size)
|
||||
}
|
||||
}
|
||||
|
||||
void DeviceTask::update_progress(RenderTile *rtile, int pixel_samples)
|
||||
void DeviceTask::update_progress(RenderTile *rtile)
|
||||
{
|
||||
if((type != PATH_TRACE) &&
|
||||
(type != SHADER))
|
||||
return;
|
||||
|
||||
if(update_progress_sample) {
|
||||
if(pixel_samples == -1) {
|
||||
pixel_samples = shader_w;
|
||||
}
|
||||
update_progress_sample(pixel_samples, rtile? rtile->sample : 0);
|
||||
}
|
||||
if(update_progress_sample)
|
||||
update_progress_sample();
|
||||
|
||||
if(update_tile_sample) {
|
||||
double current_time = time_dt();
|
||||
|
@@ -51,17 +51,15 @@ public:
|
||||
int shader_filter;
|
||||
int shader_x, shader_w;
|
||||
|
||||
int passes_size;
|
||||
|
||||
explicit DeviceTask(Type type = PATH_TRACE);
|
||||
|
||||
int get_subtask_count(int num, int max_size = 0);
|
||||
void split(list<DeviceTask>& tasks, int num, int max_size = 0);
|
||||
|
||||
void update_progress(RenderTile *rtile, int pixel_samples = -1);
|
||||
void update_progress(RenderTile *rtile);
|
||||
|
||||
function<bool(Device *device, RenderTile&)> acquire_tile;
|
||||
function<void(long, int)> update_progress_sample;
|
||||
function<void(void)> update_progress_sample;
|
||||
function<void(RenderTile&)> update_tile_sample;
|
||||
function<void(RenderTile&)> release_tile;
|
||||
function<bool(void)> get_cancel;
|
||||
|
@@ -26,30 +26,30 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Define CYCLES_DISABLE_DRIVER_WORKAROUNDS to disable workaounds for testing */
|
||||
#ifndef CYCLES_DISABLE_DRIVER_WORKAROUNDS
|
||||
/* Work around AMD driver hangs by ensuring each command is finished before doing anything else. */
|
||||
# undef clEnqueueNDRangeKernel
|
||||
# define clEnqueueNDRangeKernel(a, b, c, d, e, f, g, h, i) \
|
||||
clFinish(a); \
|
||||
CLEW_GET_FUN(__clewEnqueueNDRangeKernel)(a, b, c, d, e, f, g, h, i); \
|
||||
clFinish(a);
|
||||
|
||||
# undef clEnqueueWriteBuffer
|
||||
# define clEnqueueWriteBuffer(a, b, c, d, e, f, g, h, i) \
|
||||
clFinish(a); \
|
||||
CLEW_GET_FUN(__clewEnqueueWriteBuffer)(a, b, c, d, e, f, g, h, i); \
|
||||
clFinish(a);
|
||||
|
||||
# undef clEnqueueReadBuffer
|
||||
# define clEnqueueReadBuffer(a, b, c, d, e, f, g, h, i) \
|
||||
clFinish(a); \
|
||||
CLEW_GET_FUN(__clewEnqueueReadBuffer)(a, b, c, d, e, f, g, h, i); \
|
||||
clFinish(a);
|
||||
#endif /* CYCLES_DISABLE_DRIVER_WORKAROUNDS */
|
||||
|
||||
#define CL_MEM_PTR(p) ((cl_mem)(uintptr_t)(p))
|
||||
|
||||
/* Macro declarations used with split kernel */
|
||||
|
||||
/* Macro to enable/disable work-stealing */
|
||||
#define __WORK_STEALING__
|
||||
|
||||
#define SPLIT_KERNEL_LOCAL_SIZE_X 64
|
||||
#define SPLIT_KERNEL_LOCAL_SIZE_Y 1
|
||||
|
||||
/* This value may be tuned according to the scene we are rendering.
|
||||
*
|
||||
* Modifying PATH_ITER_INC_FACTOR value proportional to number of expected
|
||||
* ray-bounces will improve performance.
|
||||
*/
|
||||
#define PATH_ITER_INC_FACTOR 8
|
||||
|
||||
/* When allocate global memory in chunks. We may not be able to
|
||||
* allocate exactly "CL_DEVICE_MAX_MEM_ALLOC_SIZE" bytes in chunks;
|
||||
* Since some bytes may be needed for aligning chunks of memory;
|
||||
* This is the amount of memory that we dedicate for that purpose.
|
||||
*/
|
||||
#define DATA_ALLOCATION_MEM_FACTOR 5000000 //5MB
|
||||
|
||||
struct OpenCLPlatformDevice {
|
||||
OpenCLPlatformDevice(cl_platform_id platform_id,
|
||||
const string& platform_name,
|
||||
@@ -90,7 +90,6 @@ public:
|
||||
cl_device_id device_id);
|
||||
static void get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices,
|
||||
bool force_all = false);
|
||||
static bool use_single_program();
|
||||
};
|
||||
|
||||
/* Thread safe cache for contexts and programs.
|
||||
@@ -249,7 +248,6 @@ public:
|
||||
|
||||
bool device_initialized;
|
||||
string platform_name;
|
||||
string device_name;
|
||||
|
||||
bool opencl_error(cl_int err);
|
||||
void opencl_error(const string& message);
|
||||
@@ -268,10 +266,10 @@ public:
|
||||
|
||||
/* Has to be implemented by the real device classes.
|
||||
* The base device will then load all these programs. */
|
||||
virtual bool load_kernels(const DeviceRequestedFeatures& requested_features,
|
||||
virtual void load_kernels(const DeviceRequestedFeatures& requested_features,
|
||||
vector<OpenCLProgram*> &programs) = 0;
|
||||
|
||||
void mem_alloc(const char *name, device_memory& mem, MemoryType type);
|
||||
void mem_alloc(device_memory& mem, MemoryType type);
|
||||
void mem_copy_to(device_memory& mem);
|
||||
void mem_copy_from(device_memory& mem, int y, int w, int h, int elem);
|
||||
void mem_zero(device_memory& mem);
|
||||
@@ -328,39 +326,16 @@ protected:
|
||||
|
||||
class ArgumentWrapper {
|
||||
public:
|
||||
ArgumentWrapper() : size(0), pointer(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
ArgumentWrapper(device_memory& argument) : size(sizeof(void*)),
|
||||
pointer((void*)(&argument.device_pointer))
|
||||
{
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
ArgumentWrapper(device_vector<T>& argument) : size(sizeof(void*)),
|
||||
pointer((void*)(&argument.device_pointer))
|
||||
{
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
ArgumentWrapper() : size(0), pointer(NULL) {}
|
||||
template <typename T>
|
||||
ArgumentWrapper(T& argument) : size(sizeof(argument)),
|
||||
pointer(&argument)
|
||||
{
|
||||
}
|
||||
|
||||
pointer(&argument) { }
|
||||
ArgumentWrapper(int argument) : size(sizeof(int)),
|
||||
int_value(argument),
|
||||
pointer(&int_value)
|
||||
{
|
||||
}
|
||||
|
||||
pointer(&int_value) { }
|
||||
ArgumentWrapper(float argument) : size(sizeof(float)),
|
||||
float_value(argument),
|
||||
pointer(&float_value)
|
||||
{
|
||||
}
|
||||
|
||||
pointer(&float_value) { }
|
||||
size_t size;
|
||||
int int_value;
|
||||
float float_value;
|
||||
|
@@ -82,10 +82,9 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool backgrou
|
||||
cpPlatform = platform_device.platform_id;
|
||||
cdDevice = platform_device.device_id;
|
||||
platform_name = platform_device.platform_name;
|
||||
device_name = platform_device.device_name;
|
||||
VLOG(2) << "Creating new Cycles device for OpenCL platform "
|
||||
<< platform_name << ", device "
|
||||
<< device_name << ".";
|
||||
<< platform_device.device_name << ".";
|
||||
|
||||
{
|
||||
/* try to use cached context */
|
||||
@@ -114,16 +113,12 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool backgrou
|
||||
}
|
||||
|
||||
cqCommandQueue = clCreateCommandQueue(cxContext, cdDevice, 0, &ciErr);
|
||||
if(opencl_error(ciErr)) {
|
||||
opencl_error("OpenCL: Error creating command queue");
|
||||
if(opencl_error(ciErr))
|
||||
return;
|
||||
}
|
||||
|
||||
null_mem = (device_ptr)clCreateBuffer(cxContext, CL_MEM_READ_ONLY, 1, NULL, &ciErr);
|
||||
if(opencl_error(ciErr)) {
|
||||
opencl_error("OpenCL: Error creating memory buffer for NULL");
|
||||
if(opencl_error(ciErr))
|
||||
return;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Device init success\n");
|
||||
device_initialized = true;
|
||||
@@ -196,8 +191,6 @@ string OpenCLDeviceBase::device_md5_hash(string kernel_custom_build_options)
|
||||
|
||||
bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_features)
|
||||
{
|
||||
VLOG(2) << "Loading kernels for platform " << platform_name
|
||||
<< ", device " << device_name << ".";
|
||||
/* Verify if device was initialized. */
|
||||
if(!device_initialized) {
|
||||
fprintf(stderr, "OpenCL: failed to initialize device.\n");
|
||||
@@ -213,14 +206,11 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
|
||||
base_program.add_kernel(ustring("convert_to_half_float"));
|
||||
base_program.add_kernel(ustring("shader"));
|
||||
base_program.add_kernel(ustring("bake"));
|
||||
base_program.add_kernel(ustring("zero_buffer"));
|
||||
|
||||
vector<OpenCLProgram*> programs;
|
||||
programs.push_back(&base_program);
|
||||
/* Call actual class to fill the vector with its programs. */
|
||||
if(!load_kernels(requested_features, programs)) {
|
||||
return false;
|
||||
}
|
||||
load_kernels(requested_features, programs);
|
||||
|
||||
/* Parallel compilation is supported by Cycles, but currently all OpenCL frameworks
|
||||
* serialize the calls internally, so it's not much use right now.
|
||||
@@ -252,14 +242,8 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
|
||||
return true;
|
||||
}
|
||||
|
||||
void OpenCLDeviceBase::mem_alloc(const char *name, device_memory& mem, MemoryType type)
|
||||
void OpenCLDeviceBase::mem_alloc(device_memory& mem, MemoryType type)
|
||||
{
|
||||
if(name) {
|
||||
VLOG(1) << "Buffer allocate: " << name << ", "
|
||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||
}
|
||||
|
||||
size_t size = mem.memory_size();
|
||||
|
||||
cl_mem_flags mem_flag;
|
||||
@@ -327,61 +311,8 @@ void OpenCLDeviceBase::mem_copy_from(device_memory& mem, int y, int w, int h, in
|
||||
void OpenCLDeviceBase::mem_zero(device_memory& mem)
|
||||
{
|
||||
if(mem.device_pointer) {
|
||||
if(base_program.is_loaded()) {
|
||||
cl_kernel ckZeroBuffer = base_program(ustring("zero_buffer"));
|
||||
|
||||
size_t global_size[] = {1024, 1024};
|
||||
size_t num_threads = global_size[0] * global_size[1];
|
||||
|
||||
cl_mem d_buffer = CL_MEM_PTR(mem.device_pointer);
|
||||
cl_ulong d_offset = 0;
|
||||
cl_ulong d_size = 0;
|
||||
|
||||
while(d_offset < mem.memory_size()) {
|
||||
d_size = std::min<cl_ulong>(num_threads*sizeof(float4), mem.memory_size() - d_offset);
|
||||
|
||||
kernel_set_args(ckZeroBuffer, 0, d_buffer, d_size, d_offset);
|
||||
|
||||
ciErr = clEnqueueNDRangeKernel(cqCommandQueue,
|
||||
ckZeroBuffer,
|
||||
2,
|
||||
NULL,
|
||||
global_size,
|
||||
NULL,
|
||||
0,
|
||||
NULL,
|
||||
NULL);
|
||||
opencl_assert_err(ciErr, "clEnqueueNDRangeKernel");
|
||||
|
||||
d_offset += d_size;
|
||||
}
|
||||
}
|
||||
|
||||
if(mem.data_pointer) {
|
||||
memset((void*)mem.data_pointer, 0, mem.memory_size());
|
||||
}
|
||||
|
||||
if(!base_program.is_loaded()) {
|
||||
void* zero = (void*)mem.data_pointer;
|
||||
|
||||
if(!mem.data_pointer) {
|
||||
zero = util_aligned_malloc(mem.memory_size(), 16);
|
||||
memset(zero, 0, mem.memory_size());
|
||||
}
|
||||
|
||||
opencl_assert(clEnqueueWriteBuffer(cqCommandQueue,
|
||||
CL_MEM_PTR(mem.device_pointer),
|
||||
CL_TRUE,
|
||||
0,
|
||||
mem.memory_size(),
|
||||
zero,
|
||||
0,
|
||||
NULL, NULL));
|
||||
|
||||
if(!mem.data_pointer) {
|
||||
util_aligned_free(zero);
|
||||
}
|
||||
}
|
||||
memset((void*)mem.data_pointer, 0, mem.memory_size());
|
||||
mem_copy_to(mem);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -406,7 +337,7 @@ void OpenCLDeviceBase::const_copy_to(const char *name, void *host, size_t size)
|
||||
device_vector<uchar> *data = new device_vector<uchar>();
|
||||
data->copy((uchar*)host, size);
|
||||
|
||||
mem_alloc(name, *data, MEM_READ_ONLY);
|
||||
mem_alloc(*data, MEM_READ_ONLY);
|
||||
i = const_mem_map.insert(ConstMemMap::value_type(name, data)).first;
|
||||
}
|
||||
else {
|
||||
@@ -425,7 +356,7 @@ void OpenCLDeviceBase::tex_alloc(const char *name,
|
||||
VLOG(1) << "Texture allocate: " << name << ", "
|
||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||
mem_alloc(NULL, mem, MEM_READ_ONLY);
|
||||
mem_alloc(mem, MEM_READ_ONLY);
|
||||
mem_copy_to(mem);
|
||||
assert(mem_map.find(name) == mem_map.end());
|
||||
mem_map.insert(MemMap::value_type(name, mem.device_pointer));
|
||||
|
@@ -39,16 +39,11 @@ public:
|
||||
{
|
||||
}
|
||||
|
||||
virtual bool show_samples() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool load_kernels(const DeviceRequestedFeatures& /*requested_features*/,
|
||||
virtual void load_kernels(const DeviceRequestedFeatures& /*requested_features*/,
|
||||
vector<OpenCLProgram*> &programs)
|
||||
{
|
||||
path_trace_program.add_kernel(ustring("path_trace"));
|
||||
programs.push_back(&path_trace_program);
|
||||
return true;
|
||||
}
|
||||
|
||||
~OpenCLDeviceMegaKernel()
|
||||
@@ -125,7 +120,7 @@ public:
|
||||
|
||||
tile.sample = sample + 1;
|
||||
|
||||
task->update_progress(&tile, tile.w*tile.h);
|
||||
task->update_progress(&tile);
|
||||
}
|
||||
|
||||
/* Complete kernel execution before release tile */
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -19,7 +19,6 @@
|
||||
#include "opencl.h"
|
||||
|
||||
#include "util_logging.h"
|
||||
#include "util_md5.h"
|
||||
#include "util_path.h"
|
||||
#include "util_time.h"
|
||||
|
||||
@@ -310,8 +309,6 @@ bool OpenCLDeviceBase::OpenCLProgram::build_kernel(const string *debug_src)
|
||||
string build_options;
|
||||
build_options = device->kernel_build_options(debug_src) + kernel_build_options;
|
||||
|
||||
VLOG(1) << "Build options passed to clBuildProgram: '"
|
||||
<< build_options << "'.";
|
||||
cl_int ciErr = clBuildProgram(program, 0, NULL, build_options.c_str(), NULL, NULL);
|
||||
|
||||
/* show warnings even if build is successful */
|
||||
@@ -339,13 +336,12 @@ bool OpenCLDeviceBase::OpenCLProgram::build_kernel(const string *debug_src)
|
||||
|
||||
bool OpenCLDeviceBase::OpenCLProgram::compile_kernel(const string *debug_src)
|
||||
{
|
||||
string source = "#include \"kernels/opencl/" + kernel_file + "\"\n";
|
||||
string source = "#include \"kernels/opencl/" + kernel_file + "\" // " + OpenCLCache::get_kernel_md5() + "\n";
|
||||
/* We compile kernels consisting of many files. unfortunately OpenCL
|
||||
* kernel caches do not seem to recognize changes in included files.
|
||||
* so we force recompile on changes by adding the md5 hash of all files.
|
||||
*/
|
||||
source = path_source_replace_includes(source, path_get("kernel"));
|
||||
source += "\n// " + util_md5_string(source) + "\n";
|
||||
|
||||
if(debug_src) {
|
||||
path_write_text(*debug_src, source);
|
||||
@@ -356,10 +352,10 @@ bool OpenCLDeviceBase::OpenCLProgram::compile_kernel(const string *debug_src)
|
||||
cl_int ciErr;
|
||||
|
||||
program = clCreateProgramWithSource(device->cxContext,
|
||||
1,
|
||||
&source_str,
|
||||
&source_len,
|
||||
&ciErr);
|
||||
1,
|
||||
&source_str,
|
||||
&source_len,
|
||||
&ciErr);
|
||||
|
||||
if(ciErr != CL_SUCCESS) {
|
||||
add_error(string("OpenCL program creation failed: ") + clewErrorString(ciErr));
|
||||
@@ -442,11 +438,7 @@ void OpenCLDeviceBase::OpenCLProgram::load()
|
||||
if(!program) {
|
||||
add_log(string("OpenCL program ") + program_name + " not found in cache.", true);
|
||||
|
||||
/* need to create source to get md5 */
|
||||
string source = "#include \"kernels/opencl/" + kernel_file + "\"\n";
|
||||
source = path_source_replace_includes(source, path_get("kernel"));
|
||||
|
||||
string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" + util_md5_string(source);
|
||||
string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" + OpenCLCache::get_kernel_md5();
|
||||
basename = path_cache_get(path_join("kernels", basename));
|
||||
string clbin = basename + ".clbin";
|
||||
|
||||
@@ -552,11 +544,6 @@ bool OpenCLInfo::use_debug()
|
||||
return DebugFlags().opencl.debug;
|
||||
}
|
||||
|
||||
bool OpenCLInfo::use_single_program()
|
||||
{
|
||||
return DebugFlags().opencl.single_program;
|
||||
}
|
||||
|
||||
bool OpenCLInfo::kernel_use_advanced_shading(const string& platform)
|
||||
{
|
||||
/* keep this in sync with kernel_types.h! */
|
||||
@@ -605,19 +592,6 @@ bool OpenCLInfo::device_supported(const string& platform_name,
|
||||
sizeof(cl_device_type),
|
||||
&device_type,
|
||||
NULL);
|
||||
char device_name[1024] = "\0";
|
||||
clGetDeviceInfo(device_id,
|
||||
CL_DEVICE_NAME,
|
||||
sizeof(device_name),
|
||||
&device_name,
|
||||
NULL);
|
||||
/* It is possible tyo have Iris GPU on AMD/Apple OpenCL framework
|
||||
* (aka, it will not be on Intel framework). This isn't supported
|
||||
* and needs an explicit blacklist.
|
||||
*/
|
||||
if(strstr(device_name, "Iris")) {
|
||||
return false;
|
||||
}
|
||||
if(platform_name == "AMD Accelerated Parallel Processing" &&
|
||||
device_type == CL_DEVICE_TYPE_GPU)
|
||||
{
|
||||
@@ -774,10 +748,10 @@ void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices
|
||||
num_devices = 0;
|
||||
cl_int ciErr;
|
||||
if((ciErr = clGetDeviceIDs(platform_id,
|
||||
device_type,
|
||||
0,
|
||||
NULL,
|
||||
&num_devices)) != CL_SUCCESS || num_devices == 0)
|
||||
device_type,
|
||||
0,
|
||||
NULL,
|
||||
&num_devices)) != CL_SUCCESS || num_devices == 0)
|
||||
{
|
||||
FIRST_VLOG(2) << "Ignoring platform " << platform_name
|
||||
<< ", failed to fetch number of devices: " << string(clewErrorString(ciErr));
|
||||
|
@@ -13,28 +13,19 @@ set(INC_SYS
|
||||
|
||||
set(SRC
|
||||
kernels/cpu/kernel.cpp
|
||||
kernels/cpu/kernel_split.cpp
|
||||
kernels/opencl/kernel.cl
|
||||
kernels/opencl/kernel_state_buffer_size.cl
|
||||
kernels/opencl/kernel_split.cl
|
||||
kernels/opencl/kernel_data_init.cl
|
||||
kernels/opencl/kernel_path_init.cl
|
||||
kernels/opencl/kernel_queue_enqueue.cl
|
||||
kernels/opencl/kernel_scene_intersect.cl
|
||||
kernels/opencl/kernel_lamp_emission.cl
|
||||
kernels/opencl/kernel_do_volume.cl
|
||||
kernels/opencl/kernel_indirect_background.cl
|
||||
kernels/opencl/kernel_background_buffer_update.cl
|
||||
kernels/opencl/kernel_shader_eval.cl
|
||||
kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
|
||||
kernels/opencl/kernel_subsurface_scatter.cl
|
||||
kernels/opencl/kernel_direct_lighting.cl
|
||||
kernels/opencl/kernel_shadow_blocked_ao.cl
|
||||
kernels/opencl/kernel_shadow_blocked_dl.cl
|
||||
kernels/opencl/kernel_shadow_blocked.cl
|
||||
kernels/opencl/kernel_next_iteration_setup.cl
|
||||
kernels/opencl/kernel_indirect_subsurface.cl
|
||||
kernels/opencl/kernel_buffer_update.cl
|
||||
kernels/opencl/kernel_sum_all_radiance.cl
|
||||
kernels/cuda/kernel.cu
|
||||
kernels/cuda/kernel_split.cu
|
||||
)
|
||||
|
||||
set(SRC_BVH_HEADERS
|
||||
@@ -77,7 +68,6 @@ set(SRC_HEADERS
|
||||
kernel_path_common.h
|
||||
kernel_path_state.h
|
||||
kernel_path_surface.h
|
||||
kernel_path_subsurface.h
|
||||
kernel_path_volume.h
|
||||
kernel_projection.h
|
||||
kernel_queues.h
|
||||
@@ -98,10 +88,6 @@ set(SRC_KERNELS_CPU_HEADERS
|
||||
kernels/cpu/kernel_cpu_image.h
|
||||
)
|
||||
|
||||
set(SRC_KERNELS_CUDA_HEADERS
|
||||
kernels/cuda/kernel_config.h
|
||||
)
|
||||
|
||||
set(SRC_CLOSURE_HEADERS
|
||||
closure/alloc.h
|
||||
closure/bsdf.h
|
||||
@@ -178,8 +164,6 @@ set(SRC_GEOM_HEADERS
|
||||
geom/geom_curve.h
|
||||
geom/geom_motion_curve.h
|
||||
geom/geom_motion_triangle.h
|
||||
geom/geom_motion_triangle_intersect.h
|
||||
geom/geom_motion_triangle_shader.h
|
||||
geom/geom_object.h
|
||||
geom/geom_patch.h
|
||||
geom/geom_primitive.h
|
||||
@@ -203,25 +187,17 @@ set(SRC_UTIL_HEADERS
|
||||
)
|
||||
|
||||
set(SRC_SPLIT_HEADERS
|
||||
split/kernel_buffer_update.h
|
||||
split/kernel_background_buffer_update.h
|
||||
split/kernel_data_init.h
|
||||
split/kernel_direct_lighting.h
|
||||
split/kernel_do_volume.h
|
||||
split/kernel_holdout_emission_blurring_pathtermination_ao.h
|
||||
split/kernel_indirect_background.h
|
||||
split/kernel_indirect_subsurface.h
|
||||
split/kernel_lamp_emission.h
|
||||
split/kernel_next_iteration_setup.h
|
||||
split/kernel_path_init.h
|
||||
split/kernel_queue_enqueue.h
|
||||
split/kernel_scene_intersect.h
|
||||
split/kernel_shader_eval.h
|
||||
split/kernel_shadow_blocked_ao.h
|
||||
split/kernel_shadow_blocked_dl.h
|
||||
split/kernel_shadow_blocked.h
|
||||
split/kernel_split_common.h
|
||||
split/kernel_split_data.h
|
||||
split/kernel_split_data_types.h
|
||||
split/kernel_subsurface_scatter.h
|
||||
split/kernel_sum_all_radiance.h
|
||||
)
|
||||
|
||||
# CUDA module
|
||||
@@ -249,9 +225,8 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
||||
endif()
|
||||
|
||||
# build for each arch
|
||||
set(cuda_sources kernels/cuda/kernel.cu kernels/cuda/kernel_split.cu
|
||||
set(cuda_sources kernels/cuda/kernel.cu
|
||||
${SRC_HEADERS}
|
||||
${SRC_KERNELS_CUDA_HEADERS}
|
||||
${SRC_BVH_HEADERS}
|
||||
${SRC_SVM_HEADERS}
|
||||
${SRC_GEOM_HEADERS}
|
||||
@@ -260,22 +235,15 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
||||
)
|
||||
set(cuda_cubins)
|
||||
|
||||
macro(CYCLES_CUDA_KERNEL_ADD arch split experimental)
|
||||
if(${split})
|
||||
set(cuda_extra_flags "-D__SPLIT__")
|
||||
set(cuda_cubin kernel_split)
|
||||
macro(CYCLES_CUDA_KERNEL_ADD arch experimental)
|
||||
if(${experimental})
|
||||
set(cuda_extra_flags "-D__KERNEL_EXPERIMENTAL__")
|
||||
set(cuda_cubin kernel_experimental_${arch}.cubin)
|
||||
else()
|
||||
set(cuda_extra_flags "")
|
||||
set(cuda_cubin kernel)
|
||||
set(cuda_cubin kernel_${arch}.cubin)
|
||||
endif()
|
||||
|
||||
if(${experimental})
|
||||
set(cuda_extra_flags ${cuda_extra_flags} -D__KERNEL_EXPERIMENTAL__)
|
||||
set(cuda_cubin ${cuda_cubin}_experimental)
|
||||
endif()
|
||||
|
||||
set(cuda_cubin ${cuda_cubin}_${arch}.cubin)
|
||||
|
||||
if(WITH_CYCLES_DEBUG)
|
||||
set(cuda_debug_flags "-D__KERNEL_DEBUG__")
|
||||
else()
|
||||
@@ -288,19 +256,13 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
||||
set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${cuda_nvcc_version}")
|
||||
set(cuda_math_flags "--use_fast_math")
|
||||
|
||||
if(split)
|
||||
set(cuda_kernel_src "/kernels/cuda/kernel_split.cu")
|
||||
else()
|
||||
set(cuda_kernel_src "/kernels/cuda/kernel.cu")
|
||||
endif()
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${cuda_cubin}
|
||||
COMMAND ${cuda_nvcc_command}
|
||||
-arch=${arch}
|
||||
${CUDA_NVCC_FLAGS}
|
||||
-m${CUDA_BITS}
|
||||
--cubin ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
|
||||
--cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda/kernel.cu
|
||||
-o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin}
|
||||
--ptxas-options="-v"
|
||||
${cuda_arch_flags}
|
||||
@@ -327,12 +289,7 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
||||
|
||||
foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
|
||||
# Compile regular kernel
|
||||
CYCLES_CUDA_KERNEL_ADD(${arch} FALSE FALSE)
|
||||
|
||||
if(WITH_CYCLES_CUDA_SPLIT_KERNEL_BINARIES)
|
||||
# Compile split kernel
|
||||
CYCLES_CUDA_KERNEL_ADD(${arch} TRUE FALSE)
|
||||
endif()
|
||||
CYCLES_CUDA_KERNEL_ADD(${arch} FALSE)
|
||||
endforeach()
|
||||
|
||||
add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins})
|
||||
@@ -355,42 +312,31 @@ if(CXX_HAS_SSE)
|
||||
kernels/cpu/kernel_sse2.cpp
|
||||
kernels/cpu/kernel_sse3.cpp
|
||||
kernels/cpu/kernel_sse41.cpp
|
||||
kernels/cpu/kernel_split_sse2.cpp
|
||||
kernels/cpu/kernel_split_sse3.cpp
|
||||
kernels/cpu/kernel_split_sse41.cpp
|
||||
)
|
||||
|
||||
set_source_files_properties(kernels/cpu/kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
|
||||
set_source_files_properties(kernels/cpu/kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
|
||||
set_source_files_properties(kernels/cpu/kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
|
||||
set_source_files_properties(kernels/cpu/kernel_split_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
|
||||
set_source_files_properties(kernels/cpu/kernel_split_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
|
||||
set_source_files_properties(kernels/cpu/kernel_split_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
|
||||
endif()
|
||||
|
||||
if(CXX_HAS_AVX)
|
||||
list(APPEND SRC
|
||||
kernels/cpu/kernel_avx.cpp
|
||||
kernels/cpu/kernel_split_avx.cpp
|
||||
)
|
||||
set_source_files_properties(kernels/cpu/kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
|
||||
set_source_files_properties(kernels/cpu/kernel_split_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
|
||||
endif()
|
||||
|
||||
if(CXX_HAS_AVX2)
|
||||
list(APPEND SRC
|
||||
kernels/cpu/kernel_avx2.cpp
|
||||
kernels/cpu/kernel_split_avx2.cpp
|
||||
)
|
||||
set_source_files_properties(kernels/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
|
||||
set_source_files_properties(kernels/cpu/kernel_split_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
|
||||
endif()
|
||||
|
||||
add_library(cycles_kernel
|
||||
${SRC}
|
||||
${SRC_HEADERS}
|
||||
${SRC_KERNELS_CPU_HEADERS}
|
||||
${SRC_KERNELS_CUDA_HEADERS}
|
||||
${SRC_BVH_HEADERS}
|
||||
${SRC_CLOSURE_HEADERS}
|
||||
${SRC_SVM_HEADERS}
|
||||
@@ -413,28 +359,19 @@ endif()
|
||||
#delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel)
|
||||
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_state_buffer_size.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_split.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_data_init.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_path_init.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_queue_enqueue.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_scene_intersect.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_lamp_emission.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_do_volume.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_background.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_background_buffer_update.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shader_eval.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_subsurface_scatter.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_direct_lighting.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked_ao.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked_dl.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_next_iteration_setup.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_subsurface.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_buffer_update.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_sum_all_radiance.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel.cu" ${CYCLES_INSTALL_PATH}/kernel/kernels/cuda)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel_split.cu" ${CYCLES_INSTALL_PATH}/kernel/kernels/cuda)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNELS_CUDA_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/kernels/cuda)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_BVH_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/bvh)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CLOSURE_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/closure)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SVM_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/svm)
|
||||
|
@@ -357,7 +357,7 @@ ccl_device_inline float3 ray_offset(float3 P, float3 Ng)
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(__VOLUME_RECORD_ALL__) || (defined(__SHADOW_RECORD_ALL__) && defined(__KERNEL_CPU__))
|
||||
#if defined(__SHADOW_RECORD_ALL__) || defined (__VOLUME_RECORD_ALL__)
|
||||
/* ToDo: Move to another file? */
|
||||
ccl_device int intersections_compare(const void *a, const void *b)
|
||||
{
|
||||
@@ -373,28 +373,5 @@ ccl_device int intersections_compare(const void *a, const void *b)
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__SHADOW_RECORD_ALL__)
|
||||
ccl_device_inline void sort_intersections(Intersection *hits, uint num_hits)
|
||||
{
|
||||
#ifdef __KERNEL_GPU__
|
||||
/* Use bubble sort which has more friendly memory pattern on GPU. */
|
||||
bool swapped;
|
||||
do {
|
||||
swapped = false;
|
||||
for(int j = 0; j < num_hits - 1; ++j) {
|
||||
if(hits[j].t > hits[j + 1].t) {
|
||||
struct Intersection tmp = hits[j];
|
||||
hits[j] = hits[j + 1];
|
||||
hits[j + 1] = tmp;
|
||||
swapped = true;
|
||||
}
|
||||
}
|
||||
--num_hits;
|
||||
} while(swapped);
|
||||
#else
|
||||
qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
|
||||
#endif
|
||||
}
|
||||
#endif /* __SHADOW_RECORD_ALL__ | __VOLUME_RECORD_ALL__ */
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
|
@@ -454,7 +454,7 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
|
||||
Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
|
||||
|
||||
float3 aligned_dir0 = transform_direction(&space0, dir),
|
||||
aligned_dir1 = transform_direction(&space1, dir);
|
||||
aligned_dir1 = transform_direction(&space1, dir);;
|
||||
float3 aligned_P0 = transform_point(&space0, P),
|
||||
aligned_P1 = transform_point(&space1, P);
|
||||
float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
|
||||
@@ -516,7 +516,7 @@ ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg
|
||||
Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
|
||||
|
||||
float3 aligned_dir0 = transform_direction(&space0, dir),
|
||||
aligned_dir1 = transform_direction(&space1, dir);
|
||||
aligned_dir1 = transform_direction(&space1, dir);;
|
||||
float3 aligned_P0 = transform_point(&space0, P),
|
||||
aligned_P1 = transform_point(&space1, P);
|
||||
float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
|
||||
|
@@ -187,7 +187,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
|
||||
/* primitive intersection */
|
||||
while(prim_addr < prim_addr2) {
|
||||
kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
|
||||
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
|
||||
|
||||
bool hit;
|
||||
|
||||
@@ -222,7 +222,6 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
#if BVH_FEATURE(BVH_HAIR)
|
||||
case PRIMITIVE_CURVE:
|
||||
case PRIMITIVE_MOTION_CURVE: {
|
||||
const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
|
||||
if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
|
||||
hit = bvh_cardinal_curve_intersect(kg,
|
||||
isect_array,
|
||||
@@ -232,7 +231,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
object,
|
||||
prim_addr,
|
||||
ray->time,
|
||||
curve_type,
|
||||
type,
|
||||
NULL,
|
||||
0, 0);
|
||||
}
|
||||
@@ -245,7 +244,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
object,
|
||||
prim_addr,
|
||||
ray->time,
|
||||
curve_type,
|
||||
type,
|
||||
NULL,
|
||||
0, 0);
|
||||
}
|
||||
@@ -309,9 +308,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
|
||||
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
|
||||
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
|
||||
# else
|
||||
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
|
||||
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
|
||||
# endif
|
||||
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
@@ -362,10 +361,12 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
}
|
||||
}
|
||||
else {
|
||||
float ignore_t = FLT_MAX;
|
||||
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
|
||||
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
|
||||
# else
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
|
||||
# endif
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
}
|
||||
|
@@ -72,19 +72,19 @@ void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
ss_isect->num_hits = 0;
|
||||
|
||||
const int object_flag = kernel_tex_fetch(__object_flag, subsurface_object);
|
||||
if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
|
||||
if(!(object_flag & SD_TRANSFORM_APPLIED)) {
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
Transform ob_itfm;
|
||||
isect_t = bvh_instance_motion_push(kg,
|
||||
subsurface_object,
|
||||
ray,
|
||||
&P,
|
||||
&dir,
|
||||
&idir,
|
||||
isect_t,
|
||||
&ob_itfm);
|
||||
bvh_instance_motion_push(kg,
|
||||
subsurface_object,
|
||||
ray,
|
||||
&P,
|
||||
&dir,
|
||||
&idir,
|
||||
&isect_t,
|
||||
&ob_itfm);
|
||||
#else
|
||||
isect_t = bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, isect_t);
|
||||
bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, &isect_t);
|
||||
#endif
|
||||
object = subsurface_object;
|
||||
}
|
||||
|
@@ -213,7 +213,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
--stack_ptr;
|
||||
}
|
||||
}
|
||||
BVH_DEBUG_NEXT_NODE();
|
||||
BVH_DEBUG_NEXT_STEP();
|
||||
}
|
||||
|
||||
/* if node is leaf, fetch triangle list */
|
||||
@@ -235,7 +235,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
switch(type & PRIMITIVE_ALL) {
|
||||
case PRIMITIVE_TRIANGLE: {
|
||||
for(; prim_addr < prim_addr2; prim_addr++) {
|
||||
BVH_DEBUG_NEXT_INTERSECTION();
|
||||
BVH_DEBUG_NEXT_STEP();
|
||||
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
|
||||
if(triangle_intersect(kg,
|
||||
&isect_precalc,
|
||||
@@ -264,7 +264,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
case PRIMITIVE_MOTION_TRIANGLE: {
|
||||
for(; prim_addr < prim_addr2; prim_addr++) {
|
||||
BVH_DEBUG_NEXT_INTERSECTION();
|
||||
BVH_DEBUG_NEXT_STEP();
|
||||
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
|
||||
if(motion_triangle_intersect(kg,
|
||||
isect,
|
||||
@@ -296,9 +296,8 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
case PRIMITIVE_CURVE:
|
||||
case PRIMITIVE_MOTION_CURVE: {
|
||||
for(; prim_addr < prim_addr2; prim_addr++) {
|
||||
BVH_DEBUG_NEXT_INTERSECTION();
|
||||
const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
|
||||
kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
|
||||
BVH_DEBUG_NEXT_STEP();
|
||||
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
|
||||
bool hit;
|
||||
if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
|
||||
hit = bvh_cardinal_curve_intersect(kg,
|
||||
@@ -309,7 +308,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
object,
|
||||
prim_addr,
|
||||
ray->time,
|
||||
curve_type,
|
||||
type,
|
||||
lcg_state,
|
||||
difl,
|
||||
extmax);
|
||||
@@ -323,7 +322,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
object,
|
||||
prim_addr,
|
||||
ray->time,
|
||||
curve_type,
|
||||
type,
|
||||
lcg_state,
|
||||
difl,
|
||||
extmax);
|
||||
@@ -354,9 +353,9 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
|
||||
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
|
||||
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
|
||||
# else
|
||||
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
|
||||
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
|
||||
# endif
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
|
||||
@@ -391,9 +390,9 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
|
||||
/* instance pop */
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
|
||||
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
|
||||
# else
|
||||
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
|
||||
# endif
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
|
||||
|
@@ -50,17 +50,12 @@ CCL_NAMESPACE_BEGIN
|
||||
#ifdef __KERNEL_DEBUG__
|
||||
# define BVH_DEBUG_INIT() \
|
||||
do { \
|
||||
isect->num_traversed_nodes = 0; \
|
||||
isect->num_traversal_steps = 0; \
|
||||
isect->num_traversed_instances = 0; \
|
||||
isect->num_intersections = 0; \
|
||||
} while(0)
|
||||
# define BVH_DEBUG_NEXT_NODE() \
|
||||
# define BVH_DEBUG_NEXT_STEP() \
|
||||
do { \
|
||||
++isect->num_traversed_nodes; \
|
||||
} while(0)
|
||||
# define BVH_DEBUG_NEXT_INTERSECTION() \
|
||||
do { \
|
||||
++isect->num_intersections; \
|
||||
++isect->num_traversal_steps; \
|
||||
} while(0)
|
||||
# define BVH_DEBUG_NEXT_INSTANCE() \
|
||||
do { \
|
||||
@@ -68,8 +63,7 @@ CCL_NAMESPACE_BEGIN
|
||||
} while(0)
|
||||
#else /* __KERNEL_DEBUG__ */
|
||||
# define BVH_DEBUG_INIT()
|
||||
# define BVH_DEBUG_NEXT_NODE()
|
||||
# define BVH_DEBUG_NEXT_INTERSECTION()
|
||||
# define BVH_DEBUG_NEXT_STEP()
|
||||
# define BVH_DEBUG_NEXT_INSTANCE()
|
||||
#endif /* __KERNEL_DEBUG__ */
|
||||
|
||||
|
@@ -236,11 +236,13 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
/* instance push */
|
||||
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
|
||||
int object_flag = kernel_tex_fetch(__object_flag, object);
|
||||
|
||||
if(object_flag & SD_OBJECT_HAS_VOLUME) {
|
||||
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
|
||||
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
|
||||
# else
|
||||
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
|
||||
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
|
||||
# endif
|
||||
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
@@ -281,9 +283,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
|
||||
/* instance pop */
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
|
||||
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
|
||||
# else
|
||||
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
|
||||
# endif
|
||||
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
|
@@ -287,11 +287,13 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
/* instance push */
|
||||
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
|
||||
int object_flag = kernel_tex_fetch(__object_flag, object);
|
||||
|
||||
if(object_flag & SD_OBJECT_HAS_VOLUME) {
|
||||
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
|
||||
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
|
||||
# else
|
||||
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
|
||||
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
|
||||
# endif
|
||||
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
@@ -347,10 +349,11 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
}
|
||||
}
|
||||
else {
|
||||
float ignore_t = FLT_MAX;
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
|
||||
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
|
||||
# else
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
|
||||
# endif
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
}
|
||||
|
@@ -106,20 +106,14 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
|
||||
float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
|
||||
|
||||
if(false
|
||||
#ifdef __VISIBILITY_FLAG__
|
||||
|| ((__float_as_uint(inodes.x) & PATH_RAY_SHADOW) == 0)
|
||||
#endif
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
|| UNLIKELY(ray->time < inodes.y)
|
||||
|| UNLIKELY(ray->time > inodes.z)
|
||||
#endif
|
||||
) {
|
||||
if((__float_as_uint(inodes.x) & PATH_RAY_SHADOW) == 0) {
|
||||
/* Pop. */
|
||||
node_addr = traversal_stack[stack_ptr].addr;
|
||||
--stack_ptr;
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
ssef dist;
|
||||
int child_mask = NODE_INTERSECT(kg,
|
||||
@@ -268,7 +262,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
|
||||
/* Primitive intersection. */
|
||||
while(prim_addr < prim_addr2) {
|
||||
kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
|
||||
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
|
||||
|
||||
bool hit;
|
||||
|
||||
@@ -303,7 +297,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
#if BVH_FEATURE(BVH_HAIR)
|
||||
case PRIMITIVE_CURVE:
|
||||
case PRIMITIVE_MOTION_CURVE: {
|
||||
const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
|
||||
if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
|
||||
hit = bvh_cardinal_curve_intersect(kg,
|
||||
isect_array,
|
||||
@@ -313,7 +306,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
object,
|
||||
prim_addr,
|
||||
ray->time,
|
||||
curve_type,
|
||||
type,
|
||||
NULL,
|
||||
0, 0);
|
||||
}
|
||||
@@ -326,7 +319,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
object,
|
||||
prim_addr,
|
||||
ray->time,
|
||||
curve_type,
|
||||
type,
|
||||
NULL,
|
||||
0, 0);
|
||||
}
|
||||
@@ -390,9 +383,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
|
||||
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
|
||||
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
|
||||
# else
|
||||
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
|
||||
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
|
||||
# endif
|
||||
|
||||
num_hits_in_instance = 0;
|
||||
@@ -445,10 +438,11 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
}
|
||||
}
|
||||
else {
|
||||
float ignore_t = FLT_MAX;
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
|
||||
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
|
||||
# else
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
|
||||
# endif
|
||||
}
|
||||
|
||||
|
@@ -61,19 +61,19 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
ss_isect->num_hits = 0;
|
||||
|
||||
const int object_flag = kernel_tex_fetch(__object_flag, subsurface_object);
|
||||
if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
|
||||
if(!(object_flag & SD_TRANSFORM_APPLIED)) {
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
Transform ob_itfm;
|
||||
isect_t = bvh_instance_motion_push(kg,
|
||||
subsurface_object,
|
||||
ray,
|
||||
&P,
|
||||
&dir,
|
||||
&idir,
|
||||
isect_t,
|
||||
&ob_itfm);
|
||||
bvh_instance_motion_push(kg,
|
||||
subsurface_object,
|
||||
ray,
|
||||
&P,
|
||||
&dir,
|
||||
&idir,
|
||||
&isect_t,
|
||||
&ob_itfm);
|
||||
#else
|
||||
isect_t = bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, isect_t);
|
||||
bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, &isect_t);
|
||||
#endif
|
||||
object = subsurface_object;
|
||||
}
|
||||
|
@@ -117,10 +117,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
|
||||
|
||||
if(UNLIKELY(node_dist > isect->t)
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
|| UNLIKELY(ray->time < inodes.y)
|
||||
|| UNLIKELY(ray->time > inodes.z)
|
||||
#endif
|
||||
#ifdef __VISIBILITY_FLAG__
|
||||
|| (__float_as_uint(inodes.x) & visibility) == 0)
|
||||
#endif
|
||||
@@ -135,7 +131,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
int child_mask;
|
||||
ssef dist;
|
||||
|
||||
BVH_DEBUG_NEXT_NODE();
|
||||
BVH_DEBUG_NEXT_STEP();
|
||||
|
||||
#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
|
||||
if(difl != 0.0f) {
|
||||
@@ -330,7 +326,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
switch(type & PRIMITIVE_ALL) {
|
||||
case PRIMITIVE_TRIANGLE: {
|
||||
for(; prim_addr < prim_addr2; prim_addr++) {
|
||||
BVH_DEBUG_NEXT_INTERSECTION();
|
||||
BVH_DEBUG_NEXT_STEP();
|
||||
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
|
||||
if(triangle_intersect(kg,
|
||||
&isect_precalc,
|
||||
@@ -351,7 +347,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
case PRIMITIVE_MOTION_TRIANGLE: {
|
||||
for(; prim_addr < prim_addr2; prim_addr++) {
|
||||
BVH_DEBUG_NEXT_INTERSECTION();
|
||||
BVH_DEBUG_NEXT_STEP();
|
||||
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
|
||||
if(motion_triangle_intersect(kg,
|
||||
isect,
|
||||
@@ -375,9 +371,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
case PRIMITIVE_CURVE:
|
||||
case PRIMITIVE_MOTION_CURVE: {
|
||||
for(; prim_addr < prim_addr2; prim_addr++) {
|
||||
BVH_DEBUG_NEXT_INTERSECTION();
|
||||
const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
|
||||
kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
|
||||
BVH_DEBUG_NEXT_STEP();
|
||||
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
|
||||
bool hit;
|
||||
if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
|
||||
hit = bvh_cardinal_curve_intersect(kg,
|
||||
@@ -388,7 +383,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
object,
|
||||
prim_addr,
|
||||
ray->time,
|
||||
curve_type,
|
||||
type,
|
||||
lcg_state,
|
||||
difl,
|
||||
extmax);
|
||||
@@ -402,7 +397,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
object,
|
||||
prim_addr,
|
||||
ray->time,
|
||||
curve_type,
|
||||
type,
|
||||
lcg_state,
|
||||
difl,
|
||||
extmax);
|
||||
@@ -468,9 +463,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
|
||||
/* Instance pop. */
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
|
||||
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
|
||||
# else
|
||||
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
|
||||
# endif
|
||||
|
||||
qbvh_near_far_idx_calc(idir,
|
||||
|
@@ -293,11 +293,13 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
/* Instance push. */
|
||||
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
|
||||
int object_flag = kernel_tex_fetch(__object_flag, object);
|
||||
|
||||
if(object_flag & SD_OBJECT_HAS_VOLUME) {
|
||||
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
|
||||
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
|
||||
# else
|
||||
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
|
||||
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
|
||||
# endif
|
||||
|
||||
qbvh_near_far_idx_calc(idir,
|
||||
@@ -341,9 +343,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
|
||||
/* Instance pop. */
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
|
||||
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
|
||||
# else
|
||||
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
|
||||
# endif
|
||||
|
||||
qbvh_near_far_idx_calc(idir,
|
||||
|
@@ -344,11 +344,13 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
/* Instance push. */
|
||||
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
|
||||
int object_flag = kernel_tex_fetch(__object_flag, object);
|
||||
|
||||
if(object_flag & SD_OBJECT_HAS_VOLUME) {
|
||||
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
|
||||
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
|
||||
# else
|
||||
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
|
||||
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
|
||||
# endif
|
||||
|
||||
qbvh_near_far_idx_calc(idir,
|
||||
@@ -406,10 +408,11 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
}
|
||||
}
|
||||
else {
|
||||
float ignore_t = FLT_MAX;
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
|
||||
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
|
||||
# else
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
|
||||
# endif
|
||||
}
|
||||
|
||||
|
@@ -20,17 +20,17 @@ ccl_device ShaderClosure *closure_alloc(ShaderData *sd, int size, ClosureType ty
|
||||
{
|
||||
kernel_assert(size <= sizeof(ShaderClosure));
|
||||
|
||||
int num_closure = sd->num_closure;
|
||||
int num_closure_extra = sd->num_closure_extra;
|
||||
int num_closure = ccl_fetch(sd, num_closure);
|
||||
int num_closure_extra = ccl_fetch(sd, num_closure_extra);
|
||||
if(num_closure + num_closure_extra >= MAX_CLOSURE)
|
||||
return NULL;
|
||||
|
||||
ShaderClosure *sc = &sd->closure[num_closure];
|
||||
ShaderClosure *sc = &ccl_fetch(sd, closure)[num_closure];
|
||||
|
||||
sc->type = type;
|
||||
sc->weight = weight;
|
||||
|
||||
sd->num_closure++;
|
||||
ccl_fetch(sd, num_closure)++;
|
||||
|
||||
return sc;
|
||||
}
|
||||
@@ -44,25 +44,25 @@ ccl_device ccl_addr_space void *closure_alloc_extra(ShaderData *sd, int size)
|
||||
* This lets us keep the same fast array iteration over closures, as we
|
||||
* found linked list iteration and iteration with skipping to be slower. */
|
||||
int num_extra = ((size + sizeof(ShaderClosure) - 1) / sizeof(ShaderClosure));
|
||||
int num_closure = sd->num_closure;
|
||||
int num_closure_extra = sd->num_closure_extra + num_extra;
|
||||
int num_closure = ccl_fetch(sd, num_closure);
|
||||
int num_closure_extra = ccl_fetch(sd, num_closure_extra) + num_extra;
|
||||
|
||||
if(num_closure + num_closure_extra > MAX_CLOSURE) {
|
||||
/* Remove previous closure. */
|
||||
sd->num_closure--;
|
||||
sd->num_closure_extra++;
|
||||
ccl_fetch(sd, num_closure)--;
|
||||
ccl_fetch(sd, num_closure_extra)++;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
sd->num_closure_extra = num_closure_extra;
|
||||
return (ccl_addr_space void*)(sd->closure + MAX_CLOSURE - num_closure_extra);
|
||||
ccl_fetch(sd, num_closure_extra) = num_closure_extra;
|
||||
return (ccl_addr_space void*)(ccl_fetch(sd, closure) + MAX_CLOSURE - num_closure_extra);
|
||||
}
|
||||
|
||||
ccl_device_inline ShaderClosure *bsdf_alloc(ShaderData *sd, int size, float3 weight)
|
||||
{
|
||||
ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight);
|
||||
|
||||
if(sc == NULL)
|
||||
if(!sc)
|
||||
return NULL;
|
||||
|
||||
float sample_weight = fabsf(average(weight));
|
||||
|
@@ -51,89 +51,89 @@ ccl_device_forceinline int bsdf_sample(KernelGlobals *kg,
|
||||
switch(sc->type) {
|
||||
case CLOSURE_BSDF_DIFFUSE_ID:
|
||||
case CLOSURE_BSDF_BSSRDF_ID:
|
||||
label = bsdf_diffuse_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
|
||||
label = bsdf_diffuse_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
|
||||
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
|
||||
break;
|
||||
#ifdef __SVM__
|
||||
case CLOSURE_BSDF_OREN_NAYAR_ID:
|
||||
label = bsdf_oren_nayar_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
|
||||
label = bsdf_oren_nayar_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
|
||||
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
|
||||
break;
|
||||
#ifdef __OSL__
|
||||
case CLOSURE_BSDF_PHONG_RAMP_ID:
|
||||
label = bsdf_phong_ramp_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
|
||||
label = bsdf_phong_ramp_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
|
||||
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_DIFFUSE_RAMP_ID:
|
||||
label = bsdf_diffuse_ramp_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
|
||||
label = bsdf_diffuse_ramp_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
|
||||
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
|
||||
break;
|
||||
#endif
|
||||
case CLOSURE_BSDF_TRANSLUCENT_ID:
|
||||
label = bsdf_translucent_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
|
||||
label = bsdf_translucent_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
|
||||
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_REFLECTION_ID:
|
||||
label = bsdf_reflection_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
|
||||
label = bsdf_reflection_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
|
||||
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_REFRACTION_ID:
|
||||
label = bsdf_refraction_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
|
||||
label = bsdf_refraction_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
|
||||
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_TRANSPARENT_ID:
|
||||
label = bsdf_transparent_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
|
||||
label = bsdf_transparent_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
|
||||
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_MICROFACET_GGX_ID:
|
||||
case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
|
||||
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
|
||||
label = bsdf_microfacet_ggx_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
|
||||
label = bsdf_microfacet_ggx_sample(kg, sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
|
||||
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
|
||||
label = bsdf_microfacet_multi_ggx_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
|
||||
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf, &sd->lcg_state);
|
||||
label = bsdf_microfacet_multi_ggx_sample(kg, sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
|
||||
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf, &ccl_fetch(sd, lcg_state));
|
||||
break;
|
||||
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
|
||||
label = bsdf_microfacet_multi_ggx_glass_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
|
||||
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf, &sd->lcg_state);
|
||||
label = bsdf_microfacet_multi_ggx_glass_sample(kg, sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
|
||||
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf, &ccl_fetch(sd, lcg_state));
|
||||
break;
|
||||
case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
|
||||
case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
|
||||
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
|
||||
label = bsdf_microfacet_beckmann_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
|
||||
label = bsdf_microfacet_beckmann_sample(kg, sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
|
||||
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
|
||||
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
|
||||
label = bsdf_ashikhmin_shirley_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
|
||||
label = bsdf_ashikhmin_shirley_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
|
||||
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
|
||||
label = bsdf_ashikhmin_velvet_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
|
||||
label = bsdf_ashikhmin_velvet_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
|
||||
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_DIFFUSE_TOON_ID:
|
||||
label = bsdf_diffuse_toon_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
|
||||
label = bsdf_diffuse_toon_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
|
||||
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_GLOSSY_TOON_ID:
|
||||
label = bsdf_glossy_toon_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
|
||||
label = bsdf_glossy_toon_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
|
||||
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_HAIR_REFLECTION_ID:
|
||||
label = bsdf_hair_reflection_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
|
||||
label = bsdf_hair_reflection_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
|
||||
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
|
||||
label = bsdf_hair_transmission_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
|
||||
label = bsdf_hair_transmission_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
|
||||
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
|
||||
break;
|
||||
#endif
|
||||
#ifdef __VOLUME__
|
||||
case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
|
||||
label = volume_henyey_greenstein_sample(sc, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
|
||||
label = volume_henyey_greenstein_sample(sc, ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
@@ -157,75 +157,75 @@ float3 bsdf_eval(KernelGlobals *kg,
|
||||
{
|
||||
float3 eval;
|
||||
|
||||
if(dot(sd->Ng, omega_in) >= 0.0f) {
|
||||
if(dot(ccl_fetch(sd, Ng), omega_in) >= 0.0f) {
|
||||
switch(sc->type) {
|
||||
case CLOSURE_BSDF_DIFFUSE_ID:
|
||||
case CLOSURE_BSDF_BSSRDF_ID:
|
||||
eval = bsdf_diffuse_eval_reflect(sc, sd->I, omega_in, pdf);
|
||||
eval = bsdf_diffuse_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
#ifdef __SVM__
|
||||
case CLOSURE_BSDF_OREN_NAYAR_ID:
|
||||
eval = bsdf_oren_nayar_eval_reflect(sc, sd->I, omega_in, pdf);
|
||||
eval = bsdf_oren_nayar_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
#ifdef __OSL__
|
||||
case CLOSURE_BSDF_PHONG_RAMP_ID:
|
||||
eval = bsdf_phong_ramp_eval_reflect(sc, sd->I, omega_in, pdf);
|
||||
eval = bsdf_phong_ramp_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_DIFFUSE_RAMP_ID:
|
||||
eval = bsdf_diffuse_ramp_eval_reflect(sc, sd->I, omega_in, pdf);
|
||||
eval = bsdf_diffuse_ramp_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
#endif
|
||||
case CLOSURE_BSDF_TRANSLUCENT_ID:
|
||||
eval = bsdf_translucent_eval_reflect(sc, sd->I, omega_in, pdf);
|
||||
eval = bsdf_translucent_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_REFLECTION_ID:
|
||||
eval = bsdf_reflection_eval_reflect(sc, sd->I, omega_in, pdf);
|
||||
eval = bsdf_reflection_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_REFRACTION_ID:
|
||||
eval = bsdf_refraction_eval_reflect(sc, sd->I, omega_in, pdf);
|
||||
eval = bsdf_refraction_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_TRANSPARENT_ID:
|
||||
eval = bsdf_transparent_eval_reflect(sc, sd->I, omega_in, pdf);
|
||||
eval = bsdf_transparent_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_MICROFACET_GGX_ID:
|
||||
case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
|
||||
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
|
||||
eval = bsdf_microfacet_ggx_eval_reflect(sc, sd->I, omega_in, pdf);
|
||||
eval = bsdf_microfacet_ggx_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
|
||||
eval = bsdf_microfacet_multi_ggx_eval_reflect(sc, sd->I, omega_in, pdf, &sd->lcg_state);
|
||||
eval = bsdf_microfacet_multi_ggx_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf, &ccl_fetch(sd, lcg_state));
|
||||
break;
|
||||
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
|
||||
eval = bsdf_microfacet_multi_ggx_glass_eval_reflect(sc, sd->I, omega_in, pdf, &sd->lcg_state);
|
||||
eval = bsdf_microfacet_multi_ggx_glass_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf, &ccl_fetch(sd, lcg_state));
|
||||
break;
|
||||
case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
|
||||
case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
|
||||
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
|
||||
eval = bsdf_microfacet_beckmann_eval_reflect(sc, sd->I, omega_in, pdf);
|
||||
eval = bsdf_microfacet_beckmann_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
|
||||
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
|
||||
eval = bsdf_ashikhmin_shirley_eval_reflect(sc, sd->I, omega_in, pdf);
|
||||
eval = bsdf_ashikhmin_shirley_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
|
||||
eval = bsdf_ashikhmin_velvet_eval_reflect(sc, sd->I, omega_in, pdf);
|
||||
eval = bsdf_ashikhmin_velvet_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_DIFFUSE_TOON_ID:
|
||||
eval = bsdf_diffuse_toon_eval_reflect(sc, sd->I, omega_in, pdf);
|
||||
eval = bsdf_diffuse_toon_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_GLOSSY_TOON_ID:
|
||||
eval = bsdf_glossy_toon_eval_reflect(sc, sd->I, omega_in, pdf);
|
||||
eval = bsdf_glossy_toon_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_HAIR_REFLECTION_ID:
|
||||
eval = bsdf_hair_reflection_eval_reflect(sc, sd->I, omega_in, pdf);
|
||||
eval = bsdf_hair_reflection_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
|
||||
eval = bsdf_hair_transmission_eval_reflect(sc, sd->I, omega_in, pdf);
|
||||
eval = bsdf_hair_transmission_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
#endif
|
||||
#ifdef __VOLUME__
|
||||
case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
|
||||
eval = volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf);
|
||||
eval = volume_henyey_greenstein_eval_phase(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
@@ -237,63 +237,63 @@ float3 bsdf_eval(KernelGlobals *kg,
|
||||
switch(sc->type) {
|
||||
case CLOSURE_BSDF_DIFFUSE_ID:
|
||||
case CLOSURE_BSDF_BSSRDF_ID:
|
||||
eval = bsdf_diffuse_eval_transmit(sc, sd->I, omega_in, pdf);
|
||||
eval = bsdf_diffuse_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
#ifdef __SVM__
|
||||
case CLOSURE_BSDF_OREN_NAYAR_ID:
|
||||
eval = bsdf_oren_nayar_eval_transmit(sc, sd->I, omega_in, pdf);
|
||||
eval = bsdf_oren_nayar_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_TRANSLUCENT_ID:
|
||||
eval = bsdf_translucent_eval_transmit(sc, sd->I, omega_in, pdf);
|
||||
eval = bsdf_translucent_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_REFLECTION_ID:
|
||||
eval = bsdf_reflection_eval_transmit(sc, sd->I, omega_in, pdf);
|
||||
eval = bsdf_reflection_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_REFRACTION_ID:
|
||||
eval = bsdf_refraction_eval_transmit(sc, sd->I, omega_in, pdf);
|
||||
eval = bsdf_refraction_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_TRANSPARENT_ID:
|
||||
eval = bsdf_transparent_eval_transmit(sc, sd->I, omega_in, pdf);
|
||||
eval = bsdf_transparent_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_MICROFACET_GGX_ID:
|
||||
case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
|
||||
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
|
||||
eval = bsdf_microfacet_ggx_eval_transmit(sc, sd->I, omega_in, pdf);
|
||||
eval = bsdf_microfacet_ggx_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
|
||||
eval = bsdf_microfacet_multi_ggx_eval_transmit(sc, sd->I, omega_in, pdf, &sd->lcg_state);
|
||||
eval = bsdf_microfacet_multi_ggx_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf, &ccl_fetch(sd, lcg_state));
|
||||
break;
|
||||
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
|
||||
eval = bsdf_microfacet_multi_ggx_glass_eval_transmit(sc, sd->I, omega_in, pdf, &sd->lcg_state);
|
||||
eval = bsdf_microfacet_multi_ggx_glass_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf, &ccl_fetch(sd, lcg_state));
|
||||
break;
|
||||
case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
|
||||
case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
|
||||
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
|
||||
eval = bsdf_microfacet_beckmann_eval_transmit(sc, sd->I, omega_in, pdf);
|
||||
eval = bsdf_microfacet_beckmann_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
|
||||
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
|
||||
eval = bsdf_ashikhmin_shirley_eval_transmit(sc, sd->I, omega_in, pdf);
|
||||
eval = bsdf_ashikhmin_shirley_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
|
||||
eval = bsdf_ashikhmin_velvet_eval_transmit(sc, sd->I, omega_in, pdf);
|
||||
eval = bsdf_ashikhmin_velvet_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_DIFFUSE_TOON_ID:
|
||||
eval = bsdf_diffuse_toon_eval_transmit(sc, sd->I, omega_in, pdf);
|
||||
eval = bsdf_diffuse_toon_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_GLOSSY_TOON_ID:
|
||||
eval = bsdf_glossy_toon_eval_transmit(sc, sd->I, omega_in, pdf);
|
||||
eval = bsdf_glossy_toon_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_HAIR_REFLECTION_ID:
|
||||
eval = bsdf_hair_reflection_eval_transmit(sc, sd->I, omega_in, pdf);
|
||||
eval = bsdf_hair_reflection_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
|
||||
eval = bsdf_hair_transmission_eval_transmit(sc, sd->I, omega_in, pdf);
|
||||
eval = bsdf_hair_transmission_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
#endif
|
||||
#ifdef __VOLUME__
|
||||
case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
|
||||
eval = volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf);
|
||||
eval = volume_henyey_greenstein_eval_phase(sc, ccl_fetch(sd, I), omega_in, pdf);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
|
@@ -143,7 +143,6 @@ ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng,
|
||||
{
|
||||
const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
|
||||
float3 N = bsdf->N;
|
||||
int label = LABEL_REFLECT | LABEL_GLOSSY;
|
||||
|
||||
float NdotI = dot(N, I);
|
||||
if(NdotI > 0.0f) {
|
||||
@@ -212,7 +211,6 @@ ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng,
|
||||
/* Some high number for MIS. */
|
||||
*pdf = 1e6f;
|
||||
*eval = make_float3(1e6f, 1e6f, 1e6f);
|
||||
label = LABEL_REFLECT | LABEL_SINGULAR;
|
||||
}
|
||||
else {
|
||||
/* leave the rest to eval_reflect */
|
||||
@@ -226,7 +224,7 @@ ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng,
|
||||
#endif
|
||||
}
|
||||
|
||||
return label;
|
||||
return LABEL_REFLECT|LABEL_GLOSSY;
|
||||
}
|
||||
|
||||
|
||||
|
@@ -267,10 +267,7 @@ ccl_device int bsdf_hair_transmission_sample(const ShaderClosure *sc, float3 Ng,
|
||||
|
||||
*eval = make_float3(*pdf, *pdf, *pdf);
|
||||
|
||||
/* TODO(sergey): Should always be negative, but seems some precision issue
|
||||
* is involved here.
|
||||
*/
|
||||
kernel_assert(dot(locy, *omega_in) < 1e-4f);
|
||||
kernel_assert(dot(locy, *omega_in) < 0.0f);
|
||||
|
||||
return LABEL_TRANSMIT|LABEL_GLOSSY;
|
||||
}
|
||||
|
@@ -266,7 +266,7 @@ ccl_device bool bsdf_microfacet_merge(const ShaderClosure *a, const ShaderClosur
|
||||
(bsdf_a->alpha_y == bsdf_b->alpha_y) &&
|
||||
(isequal_float3(bsdf_a->T, bsdf_b->T)) &&
|
||||
(bsdf_a->ior == bsdf_b->ior) &&
|
||||
((bsdf_a->extra == NULL && bsdf_b->extra == NULL) ||
|
||||
((!bsdf_a->extra && !bsdf_b->extra) ||
|
||||
((bsdf_a->extra && bsdf_b->extra) &&
|
||||
(isequal_float3(bsdf_a->extra->color, bsdf_b->extra->color))));
|
||||
}
|
||||
@@ -452,7 +452,6 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
|
||||
float alpha_y = bsdf->alpha_y;
|
||||
bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
|
||||
float3 N = bsdf->N;
|
||||
int label;
|
||||
|
||||
float cosNO = dot(N, I);
|
||||
if(cosNO > 0) {
|
||||
@@ -478,7 +477,6 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
|
||||
/* reflection or refraction? */
|
||||
if(!m_refractive) {
|
||||
float cosMO = dot(m, I);
|
||||
label = LABEL_REFLECT | LABEL_GLOSSY;
|
||||
|
||||
if(cosMO > 0) {
|
||||
/* eq. 39 - compute actual reflected direction */
|
||||
@@ -489,7 +487,6 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
|
||||
/* some high number for MIS */
|
||||
*pdf = 1e6f;
|
||||
*eval = make_float3(1e6f, 1e6f, 1e6f);
|
||||
label = LABEL_REFLECT | LABEL_SINGULAR;
|
||||
}
|
||||
else {
|
||||
/* microfacet normal is visible to this ray */
|
||||
@@ -552,8 +549,6 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
|
||||
}
|
||||
}
|
||||
else {
|
||||
label = LABEL_TRANSMIT | LABEL_GLOSSY;
|
||||
|
||||
/* CAUTION: the i and o variables are inverted relative to the paper
|
||||
* eq. 39 - compute actual refractive direction */
|
||||
float3 R, T;
|
||||
@@ -581,7 +576,6 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
|
||||
/* some high number for MIS */
|
||||
*pdf = 1e6f;
|
||||
*eval = make_float3(1e6f, 1e6f, 1e6f);
|
||||
label = LABEL_TRANSMIT | LABEL_SINGULAR;
|
||||
}
|
||||
else {
|
||||
/* eq. 33 */
|
||||
@@ -613,10 +607,7 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
label = (m_refractive) ? LABEL_TRANSMIT|LABEL_GLOSSY : LABEL_REFLECT|LABEL_GLOSSY;
|
||||
}
|
||||
return label;
|
||||
return (m_refractive) ? LABEL_TRANSMIT|LABEL_GLOSSY : LABEL_REFLECT|LABEL_GLOSSY;
|
||||
}
|
||||
|
||||
/* Beckmann microfacet with Smith shadow-masking from:
|
||||
@@ -824,7 +815,6 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
|
||||
float alpha_y = bsdf->alpha_y;
|
||||
bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
|
||||
float3 N = bsdf->N;
|
||||
int label;
|
||||
|
||||
float cosNO = dot(N, I);
|
||||
if(cosNO > 0) {
|
||||
@@ -849,7 +839,6 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
|
||||
|
||||
/* reflection or refraction? */
|
||||
if(!m_refractive) {
|
||||
label = LABEL_REFLECT | LABEL_GLOSSY;
|
||||
float cosMO = dot(m, I);
|
||||
|
||||
if(cosMO > 0) {
|
||||
@@ -861,7 +850,6 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
|
||||
/* some high number for MIS */
|
||||
*pdf = 1e6f;
|
||||
*eval = make_float3(1e6f, 1e6f, 1e6f);
|
||||
label = LABEL_REFLECT | LABEL_SINGULAR;
|
||||
}
|
||||
else {
|
||||
/* microfacet normal is visible to this ray
|
||||
@@ -916,8 +904,6 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
|
||||
}
|
||||
}
|
||||
else {
|
||||
label = LABEL_TRANSMIT | LABEL_GLOSSY;
|
||||
|
||||
/* CAUTION: the i and o variables are inverted relative to the paper
|
||||
* eq. 39 - compute actual refractive direction */
|
||||
float3 R, T;
|
||||
@@ -945,7 +931,6 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
|
||||
/* some high number for MIS */
|
||||
*pdf = 1e6f;
|
||||
*eval = make_float3(1e6f, 1e6f, 1e6f);
|
||||
label = LABEL_TRANSMIT | LABEL_SINGULAR;
|
||||
}
|
||||
else {
|
||||
/* eq. 33 */
|
||||
@@ -978,10 +963,7 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
label = (m_refractive) ? LABEL_TRANSMIT|LABEL_GLOSSY : LABEL_REFLECT|LABEL_GLOSSY;
|
||||
}
|
||||
return label;
|
||||
return (m_refractive) ? LABEL_TRANSMIT|LABEL_GLOSSY : LABEL_REFLECT|LABEL_GLOSSY;
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -43,7 +43,7 @@ ccl_device_forceinline float D_ggx_aniso(const float3 wm, const float2 alpha)
|
||||
ccl_device_forceinline float2 mf_sampleP22_11(const float cosI, const float2 randU)
|
||||
{
|
||||
if(cosI > 0.9999f || cosI < 1e-6f) {
|
||||
const float r = sqrtf(randU.x / max(1.0f - randU.x, 1e-7f));
|
||||
const float r = sqrtf(randU.x / (1.0f - randU.x));
|
||||
const float phi = M_2PI_F * randU.y;
|
||||
return make_float2(r*cosf(phi), r*sinf(phi));
|
||||
}
|
||||
@@ -83,7 +83,7 @@ ccl_device_forceinline float3 mf_sample_vndf(const float3 wi, const float2 alpha
|
||||
const float3 wi_11 = normalize(make_float3(alpha.x*wi.x, alpha.y*wi.y, wi.z));
|
||||
const float2 slope_11 = mf_sampleP22_11(wi_11.z, randU);
|
||||
|
||||
const float2 cossin_phi = safe_normalize(make_float2(wi_11.x, wi_11.y));
|
||||
const float2 cossin_phi = normalize(make_float2(wi_11.x, wi_11.y));
|
||||
const float slope_x = alpha.x*(cossin_phi.x * slope_11.x - cossin_phi.y * slope_11.y);
|
||||
const float slope_y = alpha.y*(cossin_phi.y * slope_11.x + cossin_phi.x * slope_11.y);
|
||||
|
||||
|
@@ -23,8 +23,6 @@
|
||||
#include "geom_subd_triangle.h"
|
||||
#include "geom_triangle_intersect.h"
|
||||
#include "geom_motion_triangle.h"
|
||||
#include "geom_motion_triangle_intersect.h"
|
||||
#include "geom_motion_triangle_shader.h"
|
||||
#include "geom_motion_curve.h"
|
||||
#include "geom_curve.h"
|
||||
#include "geom_volume.h"
|
||||
|
@@ -30,7 +30,7 @@ ccl_device_inline uint subd_triangle_patch(KernelGlobals *kg, const ShaderData *
|
||||
ccl_device_inline uint attribute_primitive_type(KernelGlobals *kg, const ShaderData *sd)
|
||||
{
|
||||
#ifdef __HAIR__
|
||||
if(sd->type & PRIMITIVE_ALL_CURVE) {
|
||||
if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) {
|
||||
return ATTR_PRIM_CURVE;
|
||||
}
|
||||
else
|
||||
@@ -53,12 +53,12 @@ ccl_device_inline AttributeDescriptor attribute_not_found()
|
||||
|
||||
ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id)
|
||||
{
|
||||
if(sd->object == PRIM_NONE) {
|
||||
if(ccl_fetch(sd, object) == PRIM_NONE) {
|
||||
return attribute_not_found();
|
||||
}
|
||||
|
||||
/* for SVM, find attribute by unique id */
|
||||
uint attr_offset = sd->object*kernel_data.bvh.attributes_map_stride;
|
||||
uint attr_offset = ccl_fetch(sd, object)*kernel_data.bvh.attributes_map_stride;
|
||||
attr_offset += attribute_primitive_type(kg, sd);
|
||||
uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
|
||||
|
||||
@@ -73,7 +73,7 @@ ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals *kg, const Sh
|
||||
AttributeDescriptor desc;
|
||||
desc.element = (AttributeElement)attr_map.y;
|
||||
|
||||
if(sd->prim == PRIM_NONE &&
|
||||
if(ccl_fetch(sd, prim) == PRIM_NONE &&
|
||||
desc.element != ATTR_ELEMENT_MESH &&
|
||||
desc.element != ATTR_ELEMENT_VOXEL &&
|
||||
desc.element != ATTR_ELEMENT_OBJECT)
|
||||
|
@@ -32,22 +32,22 @@ ccl_device float curve_attribute_float(KernelGlobals *kg, const ShaderData *sd,
|
||||
if(dy) *dy = 0.0f;
|
||||
#endif
|
||||
|
||||
return kernel_tex_fetch(__attributes_float, desc.offset + sd->prim);
|
||||
return kernel_tex_fetch(__attributes_float, desc.offset + ccl_fetch(sd, prim));
|
||||
}
|
||||
else if(desc.element == ATTR_ELEMENT_CURVE_KEY || desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) {
|
||||
float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
|
||||
int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
|
||||
float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim));
|
||||
int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type));
|
||||
int k1 = k0 + 1;
|
||||
|
||||
float f0 = kernel_tex_fetch(__attributes_float, desc.offset + k0);
|
||||
float f1 = kernel_tex_fetch(__attributes_float, desc.offset + k1);
|
||||
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
if(dx) *dx = sd->du.dx*(f1 - f0);
|
||||
if(dx) *dx = ccl_fetch(sd, du).dx*(f1 - f0);
|
||||
if(dy) *dy = 0.0f;
|
||||
#endif
|
||||
|
||||
return (1.0f - sd->u)*f0 + sd->u*f1;
|
||||
return (1.0f - ccl_fetch(sd, u))*f0 + ccl_fetch(sd, u)*f1;
|
||||
}
|
||||
else {
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
@@ -71,22 +71,22 @@ ccl_device float3 curve_attribute_float3(KernelGlobals *kg, const ShaderData *sd
|
||||
if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
|
||||
#endif
|
||||
|
||||
return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + sd->prim));
|
||||
return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + ccl_fetch(sd, prim)));
|
||||
}
|
||||
else if(desc.element == ATTR_ELEMENT_CURVE_KEY || desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) {
|
||||
float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
|
||||
int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
|
||||
float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim));
|
||||
int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type));
|
||||
int k1 = k0 + 1;
|
||||
|
||||
float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k0));
|
||||
float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k1));
|
||||
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
if(dx) *dx = sd->du.dx*(f1 - f0);
|
||||
if(dx) *dx = ccl_fetch(sd, du).dx*(f1 - f0);
|
||||
if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
|
||||
#endif
|
||||
|
||||
return (1.0f - sd->u)*f0 + sd->u*f1;
|
||||
return (1.0f - ccl_fetch(sd, u))*f0 + ccl_fetch(sd, u)*f1;
|
||||
}
|
||||
else {
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
@@ -104,22 +104,22 @@ ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd)
|
||||
{
|
||||
float r = 0.0f;
|
||||
|
||||
if(sd->type & PRIMITIVE_ALL_CURVE) {
|
||||
float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
|
||||
int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
|
||||
if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) {
|
||||
float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim));
|
||||
int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type));
|
||||
int k1 = k0 + 1;
|
||||
|
||||
float4 P_curve[2];
|
||||
|
||||
if(sd->type & PRIMITIVE_CURVE) {
|
||||
if(ccl_fetch(sd, type) & PRIMITIVE_CURVE) {
|
||||
P_curve[0]= kernel_tex_fetch(__curve_keys, k0);
|
||||
P_curve[1]= kernel_tex_fetch(__curve_keys, k1);
|
||||
}
|
||||
else {
|
||||
motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve);
|
||||
motion_curve_keys(kg, ccl_fetch(sd, object), ccl_fetch(sd, prim), ccl_fetch(sd, time), k0, k1, P_curve);
|
||||
}
|
||||
|
||||
r = (P_curve[1].w - P_curve[0].w) * sd->u + P_curve[0].w;
|
||||
r = (P_curve[1].w - P_curve[0].w) * ccl_fetch(sd, u) + P_curve[0].w;
|
||||
}
|
||||
|
||||
return r*2.0f;
|
||||
@@ -130,8 +130,8 @@ ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd)
|
||||
|
||||
ccl_device float3 curve_motion_center_location(KernelGlobals *kg, ShaderData *sd)
|
||||
{
|
||||
float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
|
||||
int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
|
||||
float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim));
|
||||
int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type));
|
||||
int k1 = k0 + 1;
|
||||
|
||||
float4 P_curve[2];
|
||||
@@ -139,7 +139,7 @@ ccl_device float3 curve_motion_center_location(KernelGlobals *kg, ShaderData *sd
|
||||
P_curve[0]= kernel_tex_fetch(__curve_keys, k0);
|
||||
P_curve[1]= kernel_tex_fetch(__curve_keys, k1);
|
||||
|
||||
return float4_to_float3(P_curve[1]) * sd->u + float4_to_float3(P_curve[0]) * (1.0f - sd->u);
|
||||
return float4_to_float3(P_curve[1]) * ccl_fetch(sd, u) + float4_to_float3(P_curve[0]) * (1.0f - ccl_fetch(sd, u));
|
||||
}
|
||||
|
||||
/* Curve tangent normal */
|
||||
@@ -148,14 +148,14 @@ ccl_device float3 curve_tangent_normal(KernelGlobals *kg, ShaderData *sd)
|
||||
{
|
||||
float3 tgN = make_float3(0.0f,0.0f,0.0f);
|
||||
|
||||
if(sd->type & PRIMITIVE_ALL_CURVE) {
|
||||
if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) {
|
||||
|
||||
tgN = -(-sd->I - sd->dPdu * (dot(sd->dPdu,-sd->I) / len_squared(sd->dPdu)));
|
||||
tgN = -(-ccl_fetch(sd, I) - ccl_fetch(sd, dPdu) * (dot(ccl_fetch(sd, dPdu),-ccl_fetch(sd, I)) / len_squared(ccl_fetch(sd, dPdu))));
|
||||
tgN = normalize(tgN);
|
||||
|
||||
/* need to find suitable scaled gd for corrected normal */
|
||||
#if 0
|
||||
tgN = normalize(tgN - gd * sd->dPdu);
|
||||
tgN = normalize(tgN - gd * ccl_fetch(sd, dPdu));
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -229,15 +229,6 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
|
||||
float3 P, float3 dir, uint visibility, int object, int curveAddr, float time,int type, uint *lcg_state, float difl, float extmax)
|
||||
#endif
|
||||
{
|
||||
const bool is_curve_primitive = (type & PRIMITIVE_CURVE);
|
||||
|
||||
if(!is_curve_primitive && kernel_data.bvh.use_bvh_steps) {
|
||||
const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr);
|
||||
if(time < prim_time.x || time > prim_time.y) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
int segment = PRIMITIVE_UNPACK_SEGMENT(type);
|
||||
float epsilon = 0.0f;
|
||||
float r_st, r_en;
|
||||
@@ -264,20 +255,9 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
|
||||
int ka = max(k0 - 1, v00.x);
|
||||
int kb = min(k1 + 1, v00.x + v00.y - 1);
|
||||
|
||||
#ifdef __KERNEL_AVX2__
|
||||
avxf P_curve_0_1, P_curve_2_3;
|
||||
if(is_curve_primitive) {
|
||||
P_curve_0_1 = _mm256_loadu2_m128(&kg->__curve_keys.data[k0].x, &kg->__curve_keys.data[ka].x);
|
||||
P_curve_2_3 = _mm256_loadu2_m128(&kg->__curve_keys.data[kb].x, &kg->__curve_keys.data[k1].x);
|
||||
}
|
||||
else {
|
||||
int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
|
||||
motion_cardinal_curve_keys_avx(kg, fobject, prim, time, ka, k0, k1, kb, &P_curve_0_1,&P_curve_2_3);
|
||||
}
|
||||
#else /* __KERNEL_AVX2__ */
|
||||
ssef P_curve[4];
|
||||
|
||||
if(is_curve_primitive) {
|
||||
if(type & PRIMITIVE_CURVE) {
|
||||
P_curve[0] = load4f(&kg->__curve_keys.data[ka].x);
|
||||
P_curve[1] = load4f(&kg->__curve_keys.data[k0].x);
|
||||
P_curve[2] = load4f(&kg->__curve_keys.data[k1].x);
|
||||
@@ -287,7 +267,6 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
|
||||
int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object;
|
||||
motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, (float4*)&P_curve);
|
||||
}
|
||||
#endif /* __KERNEL_AVX2__ */
|
||||
|
||||
ssef rd_sgn = set_sign_bit<0, 1, 1, 1>(shuffle<0>(rd_ss));
|
||||
ssef mul_zxxy = shuffle<2, 0, 0, 1>(vdir) * rd_sgn;
|
||||
@@ -299,33 +278,6 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
|
||||
ssef htfm1 = shuffle<1, 0, 1, 3>(load1f_first(extract<0>(d_ss)), vdir0);
|
||||
ssef htfm2 = shuffle<1, 3, 2, 3>(mul_shuf, vdir0);
|
||||
|
||||
#ifdef __KERNEL_AVX2__
|
||||
const avxf vPP = _mm256_broadcast_ps(&P.m128);
|
||||
const avxf htfm00 = avxf(htfm0.m128, htfm0.m128);
|
||||
const avxf htfm11 = avxf(htfm1.m128, htfm1.m128);
|
||||
const avxf htfm22 = avxf(htfm2.m128, htfm2.m128);
|
||||
|
||||
const avxf p01 = madd(shuffle<0>(P_curve_0_1 - vPP),
|
||||
htfm00,
|
||||
madd(shuffle<1>(P_curve_0_1 - vPP),
|
||||
htfm11,
|
||||
shuffle<2>(P_curve_0_1 - vPP) * htfm22));
|
||||
const avxf p23 = madd(shuffle<0>(P_curve_2_3 - vPP),
|
||||
htfm00,
|
||||
madd(shuffle<1>(P_curve_2_3 - vPP),
|
||||
htfm11,
|
||||
shuffle<2>(P_curve_2_3 - vPP)*htfm22));
|
||||
|
||||
const ssef p0 = _mm256_castps256_ps128(p01);
|
||||
const ssef p1 = _mm256_extractf128_ps(p01, 1);
|
||||
const ssef p2 = _mm256_castps256_ps128(p23);
|
||||
const ssef p3 = _mm256_extractf128_ps(p23, 1);
|
||||
|
||||
const ssef P_curve_1 = _mm256_extractf128_ps(P_curve_0_1, 1);
|
||||
r_st = ((float4 &)P_curve_1).w;
|
||||
const ssef P_curve_2 = _mm256_castps256_ps128(P_curve_2_3);
|
||||
r_en = ((float4 &)P_curve_2).w;
|
||||
#else /* __KERNEL_AVX2__ */
|
||||
ssef htfm[] = { htfm0, htfm1, htfm2 };
|
||||
ssef vP = load4f(P);
|
||||
ssef p0 = transform_point_T3(htfm, P_curve[0] - vP);
|
||||
@@ -333,10 +285,6 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
|
||||
ssef p2 = transform_point_T3(htfm, P_curve[2] - vP);
|
||||
ssef p3 = transform_point_T3(htfm, P_curve[3] - vP);
|
||||
|
||||
r_st = ((float4 &)P_curve[1]).w;
|
||||
r_en = ((float4 &)P_curve[2]).w;
|
||||
#endif /* __KERNEL_AVX2__ */
|
||||
|
||||
float fc = 0.71f;
|
||||
ssef vfc = ssef(fc);
|
||||
ssef vfcxp3 = vfc * p3;
|
||||
@@ -346,6 +294,8 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
|
||||
vcurve_coef[2] = madd(ssef(fc * 2.0f), p0, madd(ssef(fc - 3.0f), p1, msub(ssef(3.0f - 2.0f * fc), p2, vfcxp3)));
|
||||
vcurve_coef[3] = msub(ssef(fc - 2.0f), p2 - p1, msub(vfc, p0, vfcxp3));
|
||||
|
||||
r_st = ((float4 &)P_curve[1]).w;
|
||||
r_en = ((float4 &)P_curve[2]).w;
|
||||
}
|
||||
#else
|
||||
float3 curve_coef[4];
|
||||
@@ -372,7 +322,7 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
|
||||
|
||||
float4 P_curve[4];
|
||||
|
||||
if(is_curve_primitive) {
|
||||
if(type & PRIMITIVE_CURVE) {
|
||||
P_curve[0] = kernel_tex_fetch(__curve_keys, ka);
|
||||
P_curve[1] = kernel_tex_fetch(__curve_keys, k0);
|
||||
P_curve[2] = kernel_tex_fetch(__curve_keys, k1);
|
||||
@@ -433,9 +383,8 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
|
||||
|
||||
/* begin loop */
|
||||
while(!(tree >> (depth))) {
|
||||
const float i_st = tree * resol;
|
||||
const float i_en = i_st + (level * resol);
|
||||
|
||||
float i_st = tree * resol;
|
||||
float i_en = i_st + (level * resol);
|
||||
#ifdef __KERNEL_SSE2__
|
||||
ssef vi_st = ssef(i_st), vi_en = ssef(i_en);
|
||||
ssef vp_st = madd(madd(madd(vcurve_coef[3], vi_st, vcurve_coef[2]), vi_st, vcurve_coef[1]), vi_st, vcurve_coef[0]);
|
||||
@@ -509,23 +458,13 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
|
||||
|
||||
if(flags & CURVE_KN_RIBBONS) {
|
||||
float3 tg = (p_en - p_st);
|
||||
#ifdef __KERNEL_SSE__
|
||||
const float3 tg_sq = tg * tg;
|
||||
float w = tg_sq.x + tg_sq.y;
|
||||
#else
|
||||
float w = tg.x * tg.x + tg.y * tg.y;
|
||||
#endif
|
||||
if(w == 0) {
|
||||
tree++;
|
||||
level = tree & -tree;
|
||||
continue;
|
||||
}
|
||||
#ifdef __KERNEL_SSE__
|
||||
const float3 p_sttg = p_st * tg;
|
||||
w = -(p_sttg.x + p_sttg.y) / w;
|
||||
#else
|
||||
w = -(p_st.x * tg.x + p_st.y * tg.y) / w;
|
||||
#endif
|
||||
w = saturate(w);
|
||||
|
||||
/* compute u on the curve segment */
|
||||
@@ -557,13 +496,7 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
|
||||
if(difl != 0.0f) {
|
||||
mw_extension = min(difl * fabsf(bmaxz), extmax);
|
||||
r_ext = mw_extension + r_curr;
|
||||
#ifdef __KERNEL_SSE__
|
||||
const float3 p_curr_sq = p_curr * p_curr;
|
||||
const float3 dxxx = _mm_sqrt_ss(_mm_hadd_ps(p_curr_sq.m128, p_curr_sq.m128));
|
||||
float d = dxxx.x;
|
||||
#else
|
||||
float d = sqrtf(p_curr.x * p_curr.x + p_curr.y * p_curr.y);
|
||||
#endif
|
||||
float d0 = d - r_curr;
|
||||
float d1 = d + r_curr;
|
||||
float inv_mw_extension = 1.0f/mw_extension;
|
||||
@@ -698,15 +631,6 @@ ccl_device_forceinline bool bvh_curve_intersect(KernelGlobals *kg, Intersection
|
||||
# define dot3(x, y) dot(x, y)
|
||||
#endif
|
||||
|
||||
const bool is_curve_primitive = (type & PRIMITIVE_CURVE);
|
||||
|
||||
if(!is_curve_primitive && kernel_data.bvh.use_bvh_steps) {
|
||||
const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr);
|
||||
if(time < prim_time.x || time > prim_time.y) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
int segment = PRIMITIVE_UNPACK_SEGMENT(type);
|
||||
/* curve Intersection check */
|
||||
int flags = kernel_data.curve.curveflags;
|
||||
@@ -721,7 +645,7 @@ ccl_device_forceinline bool bvh_curve_intersect(KernelGlobals *kg, Intersection
|
||||
#ifndef __KERNEL_SSE2__
|
||||
float4 P_curve[2];
|
||||
|
||||
if(is_curve_primitive) {
|
||||
if(type & PRIMITIVE_CURVE) {
|
||||
P_curve[0] = kernel_tex_fetch(__curve_keys, k0);
|
||||
P_curve[1] = kernel_tex_fetch(__curve_keys, k1);
|
||||
}
|
||||
@@ -756,7 +680,7 @@ ccl_device_forceinline bool bvh_curve_intersect(KernelGlobals *kg, Intersection
|
||||
#else
|
||||
ssef P_curve[2];
|
||||
|
||||
if(is_curve_primitive) {
|
||||
if(type & PRIMITIVE_CURVE) {
|
||||
P_curve[0] = load4f(&kg->__curve_keys.data[k0].x);
|
||||
P_curve[1] = load4f(&kg->__curve_keys.data[k1].x);
|
||||
}
|
||||
@@ -929,7 +853,7 @@ ccl_device_forceinline bool bvh_curve_intersect(KernelGlobals *kg, Intersection
|
||||
# undef len3_squared
|
||||
# undef len3
|
||||
# undef dot3
|
||||
#endif
|
||||
# endif
|
||||
}
|
||||
|
||||
ccl_device_inline float3 curvetangent(float t, float3 p0, float3 p1, float3 p2, float3 p3)
|
||||
@@ -966,7 +890,7 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con
|
||||
|
||||
if(isect->object != OBJECT_NONE) {
|
||||
#ifdef __OBJECT_MOTION__
|
||||
Transform tfm = sd->ob_itfm;
|
||||
Transform tfm = ccl_fetch(sd, ob_itfm);
|
||||
#else
|
||||
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
|
||||
#endif
|
||||
@@ -979,7 +903,7 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con
|
||||
int prim = kernel_tex_fetch(__prim_index, isect->prim);
|
||||
float4 v00 = kernel_tex_fetch(__curves, prim);
|
||||
|
||||
int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
|
||||
int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type));
|
||||
int k1 = k0 + 1;
|
||||
|
||||
float3 tg;
|
||||
@@ -990,14 +914,14 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con
|
||||
|
||||
float4 P_curve[4];
|
||||
|
||||
if(sd->type & PRIMITIVE_CURVE) {
|
||||
if(ccl_fetch(sd, type) & PRIMITIVE_CURVE) {
|
||||
P_curve[0] = kernel_tex_fetch(__curve_keys, ka);
|
||||
P_curve[1] = kernel_tex_fetch(__curve_keys, k0);
|
||||
P_curve[2] = kernel_tex_fetch(__curve_keys, k1);
|
||||
P_curve[3] = kernel_tex_fetch(__curve_keys, kb);
|
||||
}
|
||||
else {
|
||||
motion_cardinal_curve_keys(kg, sd->object, sd->prim, sd->time, ka, k0, k1, kb, P_curve);
|
||||
motion_cardinal_curve_keys(kg, ccl_fetch(sd, object), ccl_fetch(sd, prim), ccl_fetch(sd, time), ka, k0, k1, kb, P_curve);
|
||||
}
|
||||
|
||||
float3 p[4];
|
||||
@@ -1009,43 +933,43 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con
|
||||
P = P + D*t;
|
||||
|
||||
#ifdef __UV__
|
||||
sd->u = isect->u;
|
||||
sd->v = 0.0f;
|
||||
ccl_fetch(sd, u) = isect->u;
|
||||
ccl_fetch(sd, v) = 0.0f;
|
||||
#endif
|
||||
|
||||
tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3]));
|
||||
|
||||
if(kernel_data.curve.curveflags & CURVE_KN_RIBBONS) {
|
||||
sd->Ng = normalize(-(D - tg * (dot(tg, D))));
|
||||
ccl_fetch(sd, Ng) = normalize(-(D - tg * (dot(tg, D))));
|
||||
}
|
||||
else {
|
||||
/* direction from inside to surface of curve */
|
||||
float3 p_curr = curvepoint(isect->u, p[0], p[1], p[2], p[3]);
|
||||
sd->Ng = normalize(P - p_curr);
|
||||
ccl_fetch(sd, Ng) = normalize(P - p_curr);
|
||||
|
||||
/* adjustment for changing radius */
|
||||
float gd = isect->v;
|
||||
|
||||
if(gd != 0.0f) {
|
||||
sd->Ng = sd->Ng - gd * tg;
|
||||
sd->Ng = normalize(sd->Ng);
|
||||
ccl_fetch(sd, Ng) = ccl_fetch(sd, Ng) - gd * tg;
|
||||
ccl_fetch(sd, Ng) = normalize(ccl_fetch(sd, Ng));
|
||||
}
|
||||
}
|
||||
|
||||
/* todo: sometimes the normal is still so that this is detected as
|
||||
* backfacing even if cull backfaces is enabled */
|
||||
|
||||
sd->N = sd->Ng;
|
||||
ccl_fetch(sd, N) = ccl_fetch(sd, Ng);
|
||||
}
|
||||
else {
|
||||
float4 P_curve[2];
|
||||
|
||||
if(sd->type & PRIMITIVE_CURVE) {
|
||||
if(ccl_fetch(sd, type) & PRIMITIVE_CURVE) {
|
||||
P_curve[0]= kernel_tex_fetch(__curve_keys, k0);
|
||||
P_curve[1]= kernel_tex_fetch(__curve_keys, k1);
|
||||
}
|
||||
else {
|
||||
motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve);
|
||||
motion_curve_keys(kg, ccl_fetch(sd, object), ccl_fetch(sd, prim), ccl_fetch(sd, time), k0, k1, P_curve);
|
||||
}
|
||||
|
||||
float l = 1.0f;
|
||||
@@ -1056,39 +980,39 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con
|
||||
float3 dif = P - float4_to_float3(P_curve[0]);
|
||||
|
||||
#ifdef __UV__
|
||||
sd->u = dot(dif,tg)/l;
|
||||
sd->v = 0.0f;
|
||||
ccl_fetch(sd, u) = dot(dif,tg)/l;
|
||||
ccl_fetch(sd, v) = 0.0f;
|
||||
#endif
|
||||
|
||||
if(flag & CURVE_KN_TRUETANGENTGNORMAL) {
|
||||
sd->Ng = -(D - tg * dot(tg, D));
|
||||
sd->Ng = normalize(sd->Ng);
|
||||
ccl_fetch(sd, Ng) = -(D - tg * dot(tg, D));
|
||||
ccl_fetch(sd, Ng) = normalize(ccl_fetch(sd, Ng));
|
||||
}
|
||||
else {
|
||||
float gd = isect->v;
|
||||
|
||||
/* direction from inside to surface of curve */
|
||||
sd->Ng = (dif - tg * sd->u * l) / (P_curve[0].w + sd->u * l * gd);
|
||||
ccl_fetch(sd, Ng) = (dif - tg * ccl_fetch(sd, u) * l) / (P_curve[0].w + ccl_fetch(sd, u) * l * gd);
|
||||
|
||||
/* adjustment for changing radius */
|
||||
if(gd != 0.0f) {
|
||||
sd->Ng = sd->Ng - gd * tg;
|
||||
sd->Ng = normalize(sd->Ng);
|
||||
ccl_fetch(sd, Ng) = ccl_fetch(sd, Ng) - gd * tg;
|
||||
ccl_fetch(sd, Ng) = normalize(ccl_fetch(sd, Ng));
|
||||
}
|
||||
}
|
||||
|
||||
sd->N = sd->Ng;
|
||||
ccl_fetch(sd, N) = ccl_fetch(sd, Ng);
|
||||
}
|
||||
|
||||
#ifdef __DPDU__
|
||||
/* dPdu/dPdv */
|
||||
sd->dPdu = tg;
|
||||
sd->dPdv = cross(tg, sd->Ng);
|
||||
ccl_fetch(sd, dPdu) = tg;
|
||||
ccl_fetch(sd, dPdv) = cross(tg, ccl_fetch(sd, Ng));
|
||||
#endif
|
||||
|
||||
if(isect->object != OBJECT_NONE) {
|
||||
#ifdef __OBJECT_MOTION__
|
||||
Transform tfm = sd->ob_tfm;
|
||||
Transform tfm = ccl_fetch(sd, ob_tfm);
|
||||
#else
|
||||
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
|
||||
#endif
|
||||
|
@@ -50,12 +50,12 @@ ccl_device_inline int find_attribute_curve_motion(KernelGlobals *kg, int object,
|
||||
ccl_device_inline void motion_curve_keys_for_step(KernelGlobals *kg, int offset, int numkeys, int numsteps, int step, int k0, int k1, float4 keys[2])
|
||||
{
|
||||
if(step == numsteps) {
|
||||
/* center step: regular key location */
|
||||
/* center step: regular vertex location */
|
||||
keys[0] = kernel_tex_fetch(__curve_keys, k0);
|
||||
keys[1] = kernel_tex_fetch(__curve_keys, k1);
|
||||
}
|
||||
else {
|
||||
/* center step is not stored in this array */
|
||||
/* center step not stored in this array */
|
||||
if(step > numsteps)
|
||||
step--;
|
||||
|
||||
@@ -97,14 +97,14 @@ ccl_device_inline void motion_curve_keys(KernelGlobals *kg, int object, int prim
|
||||
ccl_device_inline void motion_cardinal_curve_keys_for_step(KernelGlobals *kg, int offset, int numkeys, int numsteps, int step, int k0, int k1, int k2, int k3, float4 keys[4])
|
||||
{
|
||||
if(step == numsteps) {
|
||||
/* center step: regular key location */
|
||||
/* center step: regular vertex location */
|
||||
keys[0] = kernel_tex_fetch(__curve_keys, k0);
|
||||
keys[1] = kernel_tex_fetch(__curve_keys, k1);
|
||||
keys[2] = kernel_tex_fetch(__curve_keys, k2);
|
||||
keys[3] = kernel_tex_fetch(__curve_keys, k3);
|
||||
}
|
||||
else {
|
||||
/* center step is not stored in this array */
|
||||
/* center step not store in this array */
|
||||
if(step > numsteps)
|
||||
step--;
|
||||
|
||||
@@ -118,12 +118,7 @@ ccl_device_inline void motion_cardinal_curve_keys_for_step(KernelGlobals *kg, in
|
||||
}
|
||||
|
||||
/* return 2 curve key locations */
|
||||
ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg,
|
||||
int object,
|
||||
int prim,
|
||||
float time,
|
||||
int k0, int k1, int k2, int k3,
|
||||
float4 keys[4])
|
||||
ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg, int object, int prim, float time, int k0, int k1, int k2, int k3, float4 keys[4])
|
||||
{
|
||||
/* get motion info */
|
||||
int numsteps, numkeys;
|
||||
@@ -152,65 +147,6 @@ ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg,
|
||||
keys[3] = (1.0f - t)*keys[3] + t*next_keys[3];
|
||||
}
|
||||
|
||||
#ifdef __KERNEL_AVX2__
|
||||
/* Similar to above, but returns keys as pair of two AVX registers with each
|
||||
* holding two float4.
|
||||
*/
|
||||
ccl_device_inline void motion_cardinal_curve_keys_avx(KernelGlobals *kg,
|
||||
int object,
|
||||
int prim,
|
||||
float time,
|
||||
int k0, int k1,
|
||||
int k2, int k3,
|
||||
avxf *out_keys_0_1,
|
||||
avxf *out_keys_2_3)
|
||||
{
|
||||
/* Get motion info. */
|
||||
int numsteps, numkeys;
|
||||
object_motion_info(kg, object, &numsteps, NULL, &numkeys);
|
||||
|
||||
/* Figure out which steps we need to fetch and their interpolation factor. */
|
||||
int maxstep = numsteps * 2;
|
||||
int step = min((int)(time*maxstep), maxstep - 1);
|
||||
float t = time*maxstep - step;
|
||||
|
||||
/* Find attribute. */
|
||||
AttributeElement elem;
|
||||
int offset = find_attribute_curve_motion(kg,
|
||||
object,
|
||||
ATTR_STD_MOTION_VERTEX_POSITION,
|
||||
&elem);
|
||||
kernel_assert(offset != ATTR_STD_NOT_FOUND);
|
||||
|
||||
/* Fetch key coordinates. */
|
||||
float4 next_keys[4];
|
||||
float4 keys[4];
|
||||
motion_cardinal_curve_keys_for_step(kg,
|
||||
offset,
|
||||
numkeys,
|
||||
numsteps,
|
||||
step,
|
||||
k0, k1, k2, k3,
|
||||
keys);
|
||||
motion_cardinal_curve_keys_for_step(kg,
|
||||
offset,
|
||||
numkeys,
|
||||
numsteps,
|
||||
step + 1,
|
||||
k0, k1, k2, k3,
|
||||
next_keys);
|
||||
|
||||
const avxf keys_0_1 = avxf(keys[0].m128, keys[1].m128);
|
||||
const avxf keys_2_3 = avxf(keys[2].m128, keys[3].m128);
|
||||
const avxf next_keys_0_1 = avxf(next_keys[0].m128, next_keys[1].m128);
|
||||
const avxf next_keys_2_3 = avxf(next_keys[2].m128, next_keys[3].m128);
|
||||
|
||||
/* Interpolate between steps. */
|
||||
*out_keys_0_1 = (1.0f - t) * keys_0_1 + t*next_keys_0_1;
|
||||
*out_keys_2_3 = (1.0f - t) * keys_2_3 + t*next_keys_2_3;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -76,7 +76,7 @@ ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals *kg, uint4
|
||||
normals[2] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z));
|
||||
}
|
||||
else {
|
||||
/* center step is not stored in this array */
|
||||
/* center step not stored in this array */
|
||||
if(step > numsteps)
|
||||
step--;
|
||||
|
||||
@@ -117,4 +117,312 @@ ccl_device_inline void motion_triangle_vertices(KernelGlobals *kg, int object, i
|
||||
verts[2] = (1.0f - t)*verts[2] + t*next_verts[2];
|
||||
}
|
||||
|
||||
/* Refine triangle intersection to more precise hit point. For rays that travel
|
||||
* far the precision is often not so good, this reintersects the primitive from
|
||||
* a closer distance. */
|
||||
|
||||
ccl_device_inline float3 motion_triangle_refine(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, float3 verts[3])
|
||||
{
|
||||
float3 P = ray->P;
|
||||
float3 D = ray->D;
|
||||
float t = isect->t;
|
||||
|
||||
#ifdef __INTERSECTION_REFINE__
|
||||
if(isect->object != OBJECT_NONE) {
|
||||
if(UNLIKELY(t == 0.0f)) {
|
||||
return P;
|
||||
}
|
||||
# ifdef __OBJECT_MOTION__
|
||||
Transform tfm = ccl_fetch(sd, ob_itfm);
|
||||
# else
|
||||
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
|
||||
# endif
|
||||
|
||||
P = transform_point(&tfm, P);
|
||||
D = transform_direction(&tfm, D*t);
|
||||
D = normalize_len(D, &t);
|
||||
}
|
||||
|
||||
P = P + D*t;
|
||||
|
||||
/* compute refined intersection distance */
|
||||
const float3 e1 = verts[0] - verts[2];
|
||||
const float3 e2 = verts[1] - verts[2];
|
||||
const float3 s1 = cross(D, e2);
|
||||
|
||||
const float invdivisor = 1.0f/dot(s1, e1);
|
||||
const float3 d = P - verts[2];
|
||||
const float3 s2 = cross(d, e1);
|
||||
float rt = dot(e2, s2)*invdivisor;
|
||||
|
||||
/* compute refined position */
|
||||
P = P + D*rt;
|
||||
|
||||
if(isect->object != OBJECT_NONE) {
|
||||
# ifdef __OBJECT_MOTION__
|
||||
Transform tfm = ccl_fetch(sd, ob_tfm);
|
||||
# else
|
||||
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
|
||||
# endif
|
||||
|
||||
P = transform_point(&tfm, P);
|
||||
}
|
||||
|
||||
return P;
|
||||
#else
|
||||
return P + D*t;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Same as above, except that isect->t is assumed to be in object space for instancing */
|
||||
|
||||
#ifdef __SUBSURFACE__
|
||||
# if defined(__KERNEL_CUDA__) && (defined(i386) || defined(_M_IX86))
|
||||
ccl_device_noinline
|
||||
# else
|
||||
ccl_device_inline
|
||||
# endif
|
||||
float3 motion_triangle_refine_subsurface(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, float3 verts[3])
|
||||
{
|
||||
float3 P = ray->P;
|
||||
float3 D = ray->D;
|
||||
float t = isect->t;
|
||||
|
||||
# ifdef __INTERSECTION_REFINE__
|
||||
if(isect->object != OBJECT_NONE) {
|
||||
# ifdef __OBJECT_MOTION__
|
||||
Transform tfm = ccl_fetch(sd, ob_itfm);
|
||||
# else
|
||||
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
|
||||
# endif
|
||||
|
||||
P = transform_point(&tfm, P);
|
||||
D = transform_direction(&tfm, D);
|
||||
D = normalize(D);
|
||||
}
|
||||
|
||||
P = P + D*t;
|
||||
|
||||
/* compute refined intersection distance */
|
||||
const float3 e1 = verts[0] - verts[2];
|
||||
const float3 e2 = verts[1] - verts[2];
|
||||
const float3 s1 = cross(D, e2);
|
||||
|
||||
const float invdivisor = 1.0f/dot(s1, e1);
|
||||
const float3 d = P - verts[2];
|
||||
const float3 s2 = cross(d, e1);
|
||||
float rt = dot(e2, s2)*invdivisor;
|
||||
|
||||
P = P + D*rt;
|
||||
|
||||
if(isect->object != OBJECT_NONE) {
|
||||
# ifdef __OBJECT_MOTION__
|
||||
Transform tfm = ccl_fetch(sd, ob_tfm);
|
||||
# else
|
||||
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
|
||||
# endif
|
||||
|
||||
P = transform_point(&tfm, P);
|
||||
}
|
||||
|
||||
return P;
|
||||
# else
|
||||
return P + D*t;
|
||||
# endif
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Setup of motion triangle specific parts of ShaderData, moved into this one
|
||||
* function to more easily share computation of interpolated positions and
|
||||
* normals */
|
||||
|
||||
/* return 3 triangle vertex normals */
|
||||
ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, bool subsurface)
|
||||
{
|
||||
/* get shader */
|
||||
ccl_fetch(sd, shader) = kernel_tex_fetch(__tri_shader, ccl_fetch(sd, prim));
|
||||
|
||||
/* get motion info */
|
||||
int numsteps, numverts;
|
||||
object_motion_info(kg, ccl_fetch(sd, object), &numsteps, &numverts, NULL);
|
||||
|
||||
/* figure out which steps we need to fetch and their interpolation factor */
|
||||
int maxstep = numsteps*2;
|
||||
int step = min((int)(ccl_fetch(sd, time)*maxstep), maxstep-1);
|
||||
float t = ccl_fetch(sd, time)*maxstep - step;
|
||||
|
||||
/* find attribute */
|
||||
AttributeElement elem;
|
||||
int offset = find_attribute_motion(kg, ccl_fetch(sd, object), ATTR_STD_MOTION_VERTEX_POSITION, &elem);
|
||||
kernel_assert(offset != ATTR_STD_NOT_FOUND);
|
||||
|
||||
/* fetch vertex coordinates */
|
||||
float3 verts[3], next_verts[3];
|
||||
uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim));
|
||||
|
||||
motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
|
||||
motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_verts);
|
||||
|
||||
/* interpolate between steps */
|
||||
verts[0] = (1.0f - t)*verts[0] + t*next_verts[0];
|
||||
verts[1] = (1.0f - t)*verts[1] + t*next_verts[1];
|
||||
verts[2] = (1.0f - t)*verts[2] + t*next_verts[2];
|
||||
|
||||
/* compute refined position */
|
||||
#ifdef __SUBSURFACE__
|
||||
if(!subsurface)
|
||||
#endif
|
||||
ccl_fetch(sd, P) = motion_triangle_refine(kg, sd, isect, ray, verts);
|
||||
#ifdef __SUBSURFACE__
|
||||
else
|
||||
ccl_fetch(sd, P) = motion_triangle_refine_subsurface(kg, sd, isect, ray, verts);
|
||||
#endif
|
||||
|
||||
/* compute face normal */
|
||||
float3 Ng;
|
||||
if(ccl_fetch(sd, flag) & SD_NEGATIVE_SCALE_APPLIED)
|
||||
Ng = normalize(cross(verts[2] - verts[0], verts[1] - verts[0]));
|
||||
else
|
||||
Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0]));
|
||||
|
||||
ccl_fetch(sd, Ng) = Ng;
|
||||
ccl_fetch(sd, N) = Ng;
|
||||
|
||||
/* compute derivatives of P w.r.t. uv */
|
||||
#ifdef __DPDU__
|
||||
ccl_fetch(sd, dPdu) = (verts[0] - verts[2]);
|
||||
ccl_fetch(sd, dPdv) = (verts[1] - verts[2]);
|
||||
#endif
|
||||
|
||||
/* compute smooth normal */
|
||||
if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) {
|
||||
/* find attribute */
|
||||
AttributeElement elem;
|
||||
int offset = find_attribute_motion(kg, ccl_fetch(sd, object), ATTR_STD_MOTION_VERTEX_NORMAL, &elem);
|
||||
kernel_assert(offset != ATTR_STD_NOT_FOUND);
|
||||
|
||||
/* fetch vertex coordinates */
|
||||
float3 normals[3], next_normals[3];
|
||||
motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals);
|
||||
motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_normals);
|
||||
|
||||
/* interpolate between steps */
|
||||
normals[0] = (1.0f - t)*normals[0] + t*next_normals[0];
|
||||
normals[1] = (1.0f - t)*normals[1] + t*next_normals[1];
|
||||
normals[2] = (1.0f - t)*normals[2] + t*next_normals[2];
|
||||
|
||||
/* interpolate between vertices */
|
||||
float u = ccl_fetch(sd, u);
|
||||
float v = ccl_fetch(sd, v);
|
||||
float w = 1.0f - u - v;
|
||||
ccl_fetch(sd, N) = (u*normals[0] + v*normals[1] + w*normals[2]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Ray intersection. We simply compute the vertex positions at the given ray
|
||||
* time and do a ray intersection with the resulting triangle */
|
||||
|
||||
ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg, Intersection *isect,
|
||||
float3 P, float3 dir, float time, uint visibility, int object, int triAddr)
|
||||
{
|
||||
/* primitive index for vertex location lookup */
|
||||
int prim = kernel_tex_fetch(__prim_index, triAddr);
|
||||
int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, triAddr): object;
|
||||
|
||||
/* get vertex locations for intersection */
|
||||
float3 verts[3];
|
||||
motion_triangle_vertices(kg, fobject, prim, time, verts);
|
||||
|
||||
/* ray-triangle intersection, unoptimized */
|
||||
float t, u, v;
|
||||
|
||||
if(ray_triangle_intersect_uv(P, dir, isect->t, verts[2], verts[0], verts[1], &u, &v, &t)) {
|
||||
#ifdef __VISIBILITY_FLAG__
|
||||
/* visibility flag test. we do it here under the assumption
|
||||
* that most triangles are culled by node flags */
|
||||
if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility)
|
||||
#endif
|
||||
{
|
||||
isect->t = t;
|
||||
isect->u = u;
|
||||
isect->v = v;
|
||||
isect->prim = triAddr;
|
||||
isect->object = object;
|
||||
isect->type = PRIMITIVE_MOTION_TRIANGLE;
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Special ray intersection routines for subsurface scattering. In that case we
|
||||
* only want to intersect with primitives in the same object, and if case of
|
||||
* multiple hits we pick a single random primitive as the intersection point. */
|
||||
|
||||
#ifdef __SUBSURFACE__
|
||||
ccl_device_inline void motion_triangle_intersect_subsurface(
|
||||
KernelGlobals *kg,
|
||||
SubsurfaceIntersection *ss_isect,
|
||||
float3 P,
|
||||
float3 dir,
|
||||
float time,
|
||||
int object,
|
||||
int triAddr,
|
||||
float tmax,
|
||||
uint *lcg_state,
|
||||
int max_hits)
|
||||
{
|
||||
/* primitive index for vertex location lookup */
|
||||
int prim = kernel_tex_fetch(__prim_index, triAddr);
|
||||
int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, triAddr): object;
|
||||
|
||||
/* get vertex locations for intersection */
|
||||
float3 verts[3];
|
||||
motion_triangle_vertices(kg, fobject, prim, time, verts);
|
||||
|
||||
/* ray-triangle intersection, unoptimized */
|
||||
float t, u, v;
|
||||
|
||||
if(ray_triangle_intersect_uv(P, dir, tmax, verts[2], verts[0], verts[1], &u, &v, &t)) {
|
||||
for(int i = min(max_hits, ss_isect->num_hits) - 1; i >= 0; --i) {
|
||||
if(ss_isect->hits[i].t == t) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
ss_isect->num_hits++;
|
||||
|
||||
int hit;
|
||||
|
||||
if(ss_isect->num_hits <= max_hits) {
|
||||
hit = ss_isect->num_hits - 1;
|
||||
}
|
||||
else {
|
||||
/* reservoir sampling: if we are at the maximum number of
|
||||
* hits, randomly replace element or skip it */
|
||||
hit = lcg_step_uint(lcg_state) % ss_isect->num_hits;
|
||||
|
||||
if(hit >= max_hits)
|
||||
return;
|
||||
}
|
||||
|
||||
/* record intersection */
|
||||
Intersection *isect = &ss_isect->hits[hit];
|
||||
isect->t = t;
|
||||
isect->u = u;
|
||||
isect->v = v;
|
||||
isect->prim = triAddr;
|
||||
isect->object = object;
|
||||
isect->type = PRIMITIVE_MOTION_TRIANGLE;
|
||||
|
||||
/* Record geometric normal. */
|
||||
ss_isect->Ng[hit] = normalize(cross(verts[1] - verts[0],
|
||||
verts[2] - verts[0]));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
|
@@ -1,280 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011-2016 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/* Motion Triangle Primitive
|
||||
*
|
||||
* These are stored as regular triangles, plus extra positions and normals at
|
||||
* times other than the frame center. Computing the triangle vertex positions
|
||||
* or normals at a given ray time is a matter of interpolation of the two steps
|
||||
* between which the ray time lies.
|
||||
*
|
||||
* The extra positions and normals are stored as ATTR_STD_MOTION_VERTEX_POSITION
|
||||
* and ATTR_STD_MOTION_VERTEX_NORMAL mesh attributes.
|
||||
*/
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Refine triangle intersection to more precise hit point. For rays that travel
|
||||
* far the precision is often not so good, this reintersects the primitive from
|
||||
* a closer distance.
|
||||
*/
|
||||
|
||||
ccl_device_inline float3 motion_triangle_refine(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
const Intersection *isect,
|
||||
const Ray *ray,
|
||||
float3 verts[3])
|
||||
{
|
||||
float3 P = ray->P;
|
||||
float3 D = ray->D;
|
||||
float t = isect->t;
|
||||
|
||||
#ifdef __INTERSECTION_REFINE__
|
||||
if(isect->object != OBJECT_NONE) {
|
||||
if(UNLIKELY(t == 0.0f)) {
|
||||
return P;
|
||||
}
|
||||
# ifdef __OBJECT_MOTION__
|
||||
Transform tfm = sd->ob_itfm;
|
||||
# else
|
||||
Transform tfm = object_fetch_transform(kg,
|
||||
isect->object,
|
||||
OBJECT_INVERSE_TRANSFORM);
|
||||
# endif
|
||||
|
||||
P = transform_point(&tfm, P);
|
||||
D = transform_direction(&tfm, D*t);
|
||||
D = normalize_len(D, &t);
|
||||
}
|
||||
|
||||
P = P + D*t;
|
||||
|
||||
/* Compute refined intersection distance. */
|
||||
const float3 e1 = verts[0] - verts[2];
|
||||
const float3 e2 = verts[1] - verts[2];
|
||||
const float3 s1 = cross(D, e2);
|
||||
|
||||
const float invdivisor = 1.0f/dot(s1, e1);
|
||||
const float3 d = P - verts[2];
|
||||
const float3 s2 = cross(d, e1);
|
||||
float rt = dot(e2, s2)*invdivisor;
|
||||
|
||||
/* Compute refined position. */
|
||||
P = P + D*rt;
|
||||
|
||||
if(isect->object != OBJECT_NONE) {
|
||||
# ifdef __OBJECT_MOTION__
|
||||
Transform tfm = sd->ob_tfm;
|
||||
# else
|
||||
Transform tfm = object_fetch_transform(kg,
|
||||
isect->object,
|
||||
OBJECT_TRANSFORM);
|
||||
# endif
|
||||
|
||||
P = transform_point(&tfm, P);
|
||||
}
|
||||
|
||||
return P;
|
||||
#else
|
||||
return P + D*t;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Same as above, except that isect->t is assumed to be in object space
|
||||
* for instancing.
|
||||
*/
|
||||
|
||||
#ifdef __SUBSURFACE__
|
||||
# if defined(__KERNEL_CUDA__) && (defined(i386) || defined(_M_IX86))
|
||||
ccl_device_noinline
|
||||
# else
|
||||
ccl_device_inline
|
||||
# endif
|
||||
float3 motion_triangle_refine_subsurface(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
const Intersection *isect,
|
||||
const Ray *ray,
|
||||
float3 verts[3])
|
||||
{
|
||||
float3 P = ray->P;
|
||||
float3 D = ray->D;
|
||||
float t = isect->t;
|
||||
|
||||
# ifdef __INTERSECTION_REFINE__
|
||||
if(isect->object != OBJECT_NONE) {
|
||||
# ifdef __OBJECT_MOTION__
|
||||
Transform tfm = sd->ob_itfm;
|
||||
# else
|
||||
Transform tfm = object_fetch_transform(kg,
|
||||
isect->object,
|
||||
OBJECT_INVERSE_TRANSFORM);
|
||||
# endif
|
||||
|
||||
P = transform_point(&tfm, P);
|
||||
D = transform_direction(&tfm, D);
|
||||
D = normalize(D);
|
||||
}
|
||||
|
||||
P = P + D*t;
|
||||
|
||||
/* compute refined intersection distance */
|
||||
const float3 e1 = verts[0] - verts[2];
|
||||
const float3 e2 = verts[1] - verts[2];
|
||||
const float3 s1 = cross(D, e2);
|
||||
|
||||
const float invdivisor = 1.0f/dot(s1, e1);
|
||||
const float3 d = P - verts[2];
|
||||
const float3 s2 = cross(d, e1);
|
||||
float rt = dot(e2, s2)*invdivisor;
|
||||
|
||||
P = P + D*rt;
|
||||
|
||||
if(isect->object != OBJECT_NONE) {
|
||||
# ifdef __OBJECT_MOTION__
|
||||
Transform tfm = sd->ob_tfm;
|
||||
# else
|
||||
Transform tfm = object_fetch_transform(kg,
|
||||
isect->object,
|
||||
OBJECT_TRANSFORM);
|
||||
# endif
|
||||
|
||||
P = transform_point(&tfm, P);
|
||||
}
|
||||
|
||||
return P;
|
||||
# else /* __INTERSECTION_REFINE__ */
|
||||
return P + D*t;
|
||||
# endif /* __INTERSECTION_REFINE__ */
|
||||
}
|
||||
#endif /* __SUBSURFACE__ */
|
||||
|
||||
|
||||
/* Ray intersection. We simply compute the vertex positions at the given ray
|
||||
* time and do a ray intersection with the resulting triangle.
|
||||
*/
|
||||
|
||||
ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg,
|
||||
Intersection *isect,
|
||||
float3 P,
|
||||
float3 dir,
|
||||
float time,
|
||||
uint visibility,
|
||||
int object,
|
||||
int prim_addr)
|
||||
{
|
||||
/* Primitive index for vertex location lookup. */
|
||||
int prim = kernel_tex_fetch(__prim_index, prim_addr);
|
||||
int fobject = (object == OBJECT_NONE)
|
||||
? kernel_tex_fetch(__prim_object, prim_addr)
|
||||
: object;
|
||||
/* Get vertex locations for intersection. */
|
||||
float3 verts[3];
|
||||
motion_triangle_vertices(kg, fobject, prim, time, verts);
|
||||
/* Ray-triangle intersection, unoptimized. */
|
||||
float t, u, v;
|
||||
if(ray_triangle_intersect_uv(P,
|
||||
dir,
|
||||
isect->t,
|
||||
verts[2], verts[0], verts[1],
|
||||
&u, &v, &t))
|
||||
{
|
||||
#ifdef __VISIBILITY_FLAG__
|
||||
/* Visibility flag test. we do it here under the assumption
|
||||
* that most triangles are culled by node flags.
|
||||
*/
|
||||
if(kernel_tex_fetch(__prim_visibility, prim_addr) & visibility)
|
||||
#endif
|
||||
{
|
||||
isect->t = t;
|
||||
isect->u = u;
|
||||
isect->v = v;
|
||||
isect->prim = prim_addr;
|
||||
isect->object = object;
|
||||
isect->type = PRIMITIVE_MOTION_TRIANGLE;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Special ray intersection routines for subsurface scattering. In that case we
|
||||
* only want to intersect with primitives in the same object, and if case of
|
||||
* multiple hits we pick a single random primitive as the intersection point.
|
||||
*/
|
||||
#ifdef __SUBSURFACE__
|
||||
ccl_device_inline void motion_triangle_intersect_subsurface(
|
||||
KernelGlobals *kg,
|
||||
SubsurfaceIntersection *ss_isect,
|
||||
float3 P,
|
||||
float3 dir,
|
||||
float time,
|
||||
int object,
|
||||
int prim_addr,
|
||||
float tmax,
|
||||
uint *lcg_state,
|
||||
int max_hits)
|
||||
{
|
||||
/* Primitive index for vertex location lookup. */
|
||||
int prim = kernel_tex_fetch(__prim_index, prim_addr);
|
||||
int fobject = (object == OBJECT_NONE)
|
||||
? kernel_tex_fetch(__prim_object, prim_addr)
|
||||
: object;
|
||||
/* Get vertex locations for intersection. */
|
||||
float3 verts[3];
|
||||
motion_triangle_vertices(kg, fobject, prim, time, verts);
|
||||
/* Ray-triangle intersection, unoptimized. */
|
||||
float t, u, v;
|
||||
if(ray_triangle_intersect_uv(P,
|
||||
dir,
|
||||
tmax,
|
||||
verts[2], verts[0], verts[1],
|
||||
&u, &v, &t))
|
||||
{
|
||||
for(int i = min(max_hits, ss_isect->num_hits) - 1; i >= 0; --i) {
|
||||
if(ss_isect->hits[i].t == t) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
ss_isect->num_hits++;
|
||||
int hit;
|
||||
if(ss_isect->num_hits <= max_hits) {
|
||||
hit = ss_isect->num_hits - 1;
|
||||
}
|
||||
else {
|
||||
/* Reservoir sampling: if we are at the maximum number of
|
||||
* hits, randomly replace element or skip it.
|
||||
*/
|
||||
hit = lcg_step_uint(lcg_state) % ss_isect->num_hits;
|
||||
|
||||
if(hit >= max_hits)
|
||||
return;
|
||||
}
|
||||
/* Record intersection. */
|
||||
Intersection *isect = &ss_isect->hits[hit];
|
||||
isect->t = t;
|
||||
isect->u = u;
|
||||
isect->v = v;
|
||||
isect->prim = prim_addr;
|
||||
isect->object = object;
|
||||
isect->type = PRIMITIVE_MOTION_TRIANGLE;
|
||||
/* Record geometric normal. */
|
||||
ss_isect->Ng[hit] = normalize(cross(verts[1] - verts[0],
|
||||
verts[2] - verts[0]));
|
||||
}
|
||||
}
|
||||
#endif /* __SUBSURFACE__ */
|
||||
|
||||
CCL_NAMESPACE_END
|
@@ -1,123 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011-2016 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/* Motion Triangle Primitive
|
||||
*
|
||||
* These are stored as regular triangles, plus extra positions and normals at
|
||||
* times other than the frame center. Computing the triangle vertex positions
|
||||
* or normals at a given ray time is a matter of interpolation of the two steps
|
||||
* between which the ray time lies.
|
||||
*
|
||||
* The extra positions and normals are stored as ATTR_STD_MOTION_VERTEX_POSITION
|
||||
* and ATTR_STD_MOTION_VERTEX_NORMAL mesh attributes.
|
||||
*/
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Setup of motion triangle specific parts of ShaderData, moved into this one
|
||||
* function to more easily share computation of interpolated positions and
|
||||
* normals */
|
||||
|
||||
/* return 3 triangle vertex normals */
|
||||
ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg,
|
||||
ShaderData *sd, const
|
||||
Intersection *isect,
|
||||
const Ray *ray,
|
||||
bool subsurface)
|
||||
{
|
||||
/* Get shader. */
|
||||
sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
|
||||
/* Get motion info. */
|
||||
/* TODO(sergey): This logic is really similar to motion_triangle_vertices(),
|
||||
* can we de-duplicate something here?
|
||||
*/
|
||||
int numsteps, numverts;
|
||||
object_motion_info(kg, sd->object, &numsteps, &numverts, NULL);
|
||||
/* Figure out which steps we need to fetch and their interpolation factor. */
|
||||
int maxstep = numsteps*2;
|
||||
int step = min((int)(sd->time*maxstep), maxstep-1);
|
||||
float t = sd->time*maxstep - step;
|
||||
/* Find attribute. */
|
||||
AttributeElement elem;
|
||||
int offset = find_attribute_motion(kg, sd->object,
|
||||
ATTR_STD_MOTION_VERTEX_POSITION,
|
||||
&elem);
|
||||
kernel_assert(offset != ATTR_STD_NOT_FOUND);
|
||||
/* Fetch vertex coordinates. */
|
||||
float3 verts[3], next_verts[3];
|
||||
uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
|
||||
motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
|
||||
motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_verts);
|
||||
/* Interpolate between steps. */
|
||||
verts[0] = (1.0f - t)*verts[0] + t*next_verts[0];
|
||||
verts[1] = (1.0f - t)*verts[1] + t*next_verts[1];
|
||||
verts[2] = (1.0f - t)*verts[2] + t*next_verts[2];
|
||||
/* Compute refined position. */
|
||||
#ifdef __SUBSURFACE__
|
||||
if(subsurface) {
|
||||
sd->P = motion_triangle_refine_subsurface(kg,
|
||||
sd,
|
||||
isect,
|
||||
ray,
|
||||
verts);
|
||||
}
|
||||
else
|
||||
#endif /* __SUBSURFACE__*/
|
||||
{
|
||||
sd->P = motion_triangle_refine(kg, sd, isect, ray, verts);
|
||||
}
|
||||
/* Compute face normal. */
|
||||
float3 Ng;
|
||||
if(sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
|
||||
Ng = normalize(cross(verts[2] - verts[0], verts[1] - verts[0]));
|
||||
}
|
||||
else {
|
||||
Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0]));
|
||||
}
|
||||
sd->Ng = Ng;
|
||||
sd->N = Ng;
|
||||
/* Compute derivatives of P w.r.t. uv. */
|
||||
#ifdef __DPDU__
|
||||
sd->dPdu = (verts[0] - verts[2]);
|
||||
sd->dPdv = (verts[1] - verts[2]);
|
||||
#endif
|
||||
/* Compute smooth normal. */
|
||||
if(sd->shader & SHADER_SMOOTH_NORMAL) {
|
||||
/* Find attribute. */
|
||||
AttributeElement elem;
|
||||
int offset = find_attribute_motion(kg,
|
||||
sd->object,
|
||||
ATTR_STD_MOTION_VERTEX_NORMAL,
|
||||
&elem);
|
||||
kernel_assert(offset != ATTR_STD_NOT_FOUND);
|
||||
/* Fetch vertex coordinates. */
|
||||
float3 normals[3], next_normals[3];
|
||||
motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals);
|
||||
motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_normals);
|
||||
/* Interpolate between steps. */
|
||||
normals[0] = (1.0f - t)*normals[0] + t*next_normals[0];
|
||||
normals[1] = (1.0f - t)*normals[1] + t*next_normals[1];
|
||||
normals[2] = (1.0f - t)*normals[2] + t*next_normals[2];
|
||||
/* Interpolate between vertices. */
|
||||
float u = sd->u;
|
||||
float v = sd->v;
|
||||
float w = 1.0f - u - v;
|
||||
sd->N = (u*normals[0] + v*normals[1] + w*normals[2]);
|
||||
}
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user