Compare commits

..

37 Commits

Author SHA1 Message Date
a2ed635a73 Fix typo and change .enabled to .active 2017-02-18 04:12:29 -02:00
b3aead8fd7 Use parenthesis for bit shifts 2017-02-14 18:13:08 -02:00
733b5b8c66 Remove unused weight_components 2017-02-14 18:13:08 -02:00
5a17cb4c08 Use poll callback to disable bind operator instead of Python 2017-02-14 18:13:08 -02:00
e5e44c01f2 icon 2017-02-06 01:23:18 -02:00
a3e32e2ab5 Review: Multithread deform code 2017-01-25 04:05:53 -02:00
e843f42e66 Review: Cleanup 2017-01-25 04:05:52 -02:00
96f6ec07fb Review: Add infinite weight flags enum 2017-01-25 04:05:52 -02:00
c38e19ca67 Review: Move stuff to helper func and more cleanup 2017-01-25 04:05:52 -02:00
96d66c7e4d Review: Optimize numpoly calculation 2017-01-25 04:05:52 -02:00
46821f072d Review: Join allocations and some bpoly refactor 2017-01-25 04:05:52 -02:00
f870343208 Review: Combine allocations and minor cleanup 2017-01-25 04:05:52 -02:00
cf1a7e3944 Review: Report errors in UI and some more cleanup 2017-01-25 04:05:52 -02:00
cf660b2a02 Review: Fix depsgraph relation 2017-01-25 04:05:52 -02:00
6f3957770d Review: Fix indentations and use MEM_SAFE_FREE 2017-01-25 04:05:52 -02:00
7608f366c7 Review: Replace weight_components with individual variables 2017-01-25 04:05:52 -02:00
8c220c57f9 Review: More cleanup... 2017-01-25 04:05:52 -02:00
a300f80043 Review: Inline loop indices
Also fixed endian switch sign, and UI Python thingy...
2017-01-25 04:05:52 -02:00
22ce298d73 General cleanup (unsigned stuff and loop counter inlining) 2017-01-15 16:54:46 -02:00
3469aa47c1 Remove warnings 2017-01-15 16:54:46 -02:00
097a560bc9 Fix silly mistake in nearestVert 2017-01-15 16:54:45 -02:00
1b7623fc06 Change angle function calls in sdef 2017-01-14 01:51:55 -02:00
c546256563 Change angle function call in 3d to 2d mapping function 2017-01-14 01:51:55 -02:00
5c263a9050 Split interp_weights_face_v3 into specific functions for tris and quads 2017-01-11 15:52:52 -02:00
8745cd825a Remove custom weight interp func in favor of Blender's built-in implementation 2017-01-11 15:52:32 -02:00
28622ae81e Fix VS 2015 issue (change isnanf to isnan) 2017-01-11 13:05:53 -02:00
d6c7163c06 Fix 2d mapping function's name 2017-01-11 03:59:30 -02:00
0bb57759ec Replace "cent" functions from math_geom with "mid" ones from math_vector 2017-01-10 20:29:22 -02:00
5e1d438d5e Constify some stuff (for clarity and correctness) 2017-01-10 16:42:34 -02:00
0721bc0ac4 Silly const mistake (missed in refactor...) 2017-01-03 20:02:49 -02:00
7ca0894a17 Implement target poly influence interpolation 2017-01-03 19:27:08 -02:00
751496437b Add 3d to 2d plane mapping functions to math lib 2017-01-03 19:26:03 -02:00
3014601f3b Fix out of bounds memory access in interp_weights_face_v3
interp_weights_face_v3 required a length four array for weights even when
calculating weights for a tri, otherwise, it would access unkown memory.
This fix allows a weight array of size three to be passed when only
calculating tri weights.
2017-01-03 19:22:08 -02:00
b80971ce10 Initial Surface Deform Modifier implementation 2016-11-29 23:04:40 -02:00
68f5ce194b Add cent_poly_v3 function 2016-11-27 00:44:48 -02:00
1e9003aea5 Add is_poly_convex_v3 function 2016-11-25 14:56:09 -02:00
95701b0b04 Fix (unreported) looptri array not being recalculated in ccgDM and emDM
In ccgDM and emDM, looptri array recalculation was being handled
directly by `*DM_getLoopTriArray` (`getLoopTriArray` callback), while
`*DM_recalcLoopTri` (`recalcLoopTri` callback) was doing nothing.

This results in the array not being recalculated when other functions
that depend on the array data are called. These functions, such as
`getNumLoopTris`, call `recalcLoopTri` to ensure the data is up to date,
but in the case of CCGDerivedMesh that was doing nothing.

This moves all the recalculation code to `ccgDM_recalcLoopTri` and makes
`ccgDM_getLoopTriArray` call that.

Reviewed By: mont29

Differential Revision: https://developer.blender.org/D2375
2016-11-25 14:49:58 -02:00
940 changed files with 16993 additions and 28389 deletions

View File

@@ -445,7 +445,6 @@ option(WITH_BOOST "Enable features depending on boost" ON)
# Unit testsing
option(WITH_GTESTS "Enable GTest unit testing" OFF)
option(WITH_OPENGL_TESTS "Enable OpenGL related unit testing (Experimental)" OFF)
# Documentation
@@ -519,20 +518,18 @@ endif()
option(WITH_LEGACY_DEPSGRAPH "Build Blender with legacy dependency graph" ON)
mark_as_advanced(WITH_LEGACY_DEPSGRAPH)
if(WIN32)
# Use hardcoded paths or find_package to find externals
option(WITH_WINDOWS_FIND_MODULES "Use find_package to locate libraries" OFF)
mark_as_advanced(WITH_WINDOWS_FIND_MODULES)
# Use hardcoded paths or find_package to find externals
option(WITH_WINDOWS_FIND_MODULES "Use find_package to locate libraries" OFF)
mark_as_advanced(WITH_WINDOWS_FIND_MODULES)
option(WITH_WINDOWS_CODESIGN "Use signtool to sign the final binary." OFF)
mark_as_advanced(WITH_WINDOWS_CODESIGN)
option(WITH_WINDOWS_CODESIGN "Use signtool to sign the final binary." OFF)
mark_as_advanced(WITH_WINDOWS_CODESIGN)
set(WINDOWS_CODESIGN_PFX CACHE FILEPATH "Path to pfx file to use for codesigning.")
mark_as_advanced(WINDOWS_CODESIGN_PFX)
set(WINDOWS_CODESIGN_PFX CACHE FILEPATH "Path to pfx file to use for codesigning.")
mark_as_advanced(WINDOWS_CODESIGN_PFX)
set(WINDOWS_CODESIGN_PFX_PASSWORD CACHE STRING "password for pfx file used for codesigning.")
mark_as_advanced(WINDOWS_CODESIGN_PFX_PASSWORD)
endif()
set(WINDOWS_CODESIGN_PFX_PASSWORD CACHE STRING "password for pfx file used for codesigning.")
mark_as_advanced(WINDOWS_CODESIGN_PFX_PASSWORD)
# avoid using again
option_defaults_clear()
@@ -631,12 +628,6 @@ if(APPLE)
# to silence sdk not found warning, just overrides CMAKE_OSX_SYSROOT
set(CMAKE_XCODE_ATTRIBUTE_SDKROOT macosx${OSX_SYSTEM})
endif()
# QuickTime framework is no longer available in SDK 10.12+
if(WITH_CODEC_QUICKTIME AND ${OSX_SYSTEM} VERSION_GREATER 10.11)
set(WITH_CODEC_QUICKTIME OFF)
message(STATUS "QuickTime not supported by SDK ${OSX_SYSTEM}, disabling WITH_CODEC_QUICKTIME")
endif()
endif()
if(OSX_SYSTEM MATCHES 10.9)
@@ -726,7 +717,7 @@ if(NOT WITH_BOOST)
macro(set_and_warn
_setting _val)
if(${${_setting}})
message(STATUS "'WITH_BOOST' is disabled: forcing 'set(${_setting} ${_val})'")
message(STATUS "'WITH_BOOST' is disabled: forceing 'set(${_setting} ${_val})'")
endif()
set(${_setting} ${_val})
endmacro()
@@ -870,7 +861,7 @@ endif()
# linux only, not cached
set(WITH_BINRELOC OFF)
# MACOSX only, set to avoid uninitialized
# MAXOSX only, set to avoid uninitialized
set(EXETYPE "")
# C/C++ flags
@@ -927,7 +918,7 @@ if(WITH_X11)
if(WITH_X11_ALPHA)
find_library(X11_Xrender_LIB Xrender ${X11_LIB_SEARCH_PATH})
mark_as_advanced(X11_Xrender_LIB)
if(X11_Xrender_LIB)
if (X11_Xrender_LIB)
list(APPEND PLATFORM_LINKLIBS ${X11_Xrender_LIB})
else()
set(WITH_X11_ALPHA OFF)
@@ -1576,7 +1567,7 @@ if(WITH_CXX11)
if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang")
# TODO(sergey): Do we want c++11 or gnu-c++11 here?
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
elseif(MSVC)
elseif(MSVC12)
# Nothing special is needed, C++11 features are available by default.
else()
message(FATAL_ERROR "Compiler ${CMAKE_C_COMPILER_ID} is not supported for C++11 build yet")

View File

@@ -1,4 +1,4 @@
# -*- mode: gnumakefile; tab-width: 4; indent-tabs-mode: t; -*-
# -*- mode: gnumakefile; tab-width: 8; indent-tabs-mode: t; -*-
# vim: tabstop=4
#
# ##### BEGIN GPL LICENSE BLOCK #####
@@ -113,7 +113,7 @@ CMAKE_CONFIG = cmake $(BUILD_CMAKE_ARGS) \
# X11 spesific
ifdef DISPLAY
CMAKE_CONFIG_TOOL = cmake-gui
else
else
CMAKE_CONFIG_TOOL = ccmake
endif
@@ -127,7 +127,7 @@ all: .FORCE
# # if test ! -f $(BUILD_DIR)/CMakeCache.txt ; then \
# # $(CMAKE_CONFIG); \
# # fi
# # do this always incase of failed initial build, could be smarter here...
@$(CMAKE_CONFIG)

View File

@@ -289,7 +289,7 @@ NO_BUILD=false
NO_CONFIRM=false
USE_CXX11=false
PYTHON_VERSION="3.5.2"
PYTHON_VERSION="3.5.1"
PYTHON_VERSION_MIN="3.5"
PYTHON_FORCE_BUILD=false
PYTHON_FORCE_REBUILD=false
@@ -322,7 +322,7 @@ OPENEXR_FORCE_REBUILD=false
OPENEXR_SKIP=false
_with_built_openexr=false
OIIO_VERSION="1.7.8"
OIIO_VERSION="1.6.9"
OIIO_VERSION_MIN="1.6.0"
OIIO_VERSION_MAX="1.9.0" # UNKNOWN currently # Not supported by current OSL...
OIIO_FORCE_BUILD=false
@@ -337,14 +337,14 @@ LLVM_FORCE_REBUILD=false
LLVM_SKIP=false
# OSL needs to be compiled for now!
OSL_VERSION="1.7.5"
OSL_VERSION="1.7.3"
OSL_VERSION_MIN=$OSL_VERSION
OSL_FORCE_BUILD=false
OSL_FORCE_REBUILD=false
OSL_SKIP=false
# OpenSubdiv needs to be compiled for now
OSD_VERSION="3.1.1"
OSD_VERSION="3.0.5"
OSD_VERSION_MIN=$OSD_VERSION
OSD_FORCE_BUILD=false
OSD_FORCE_REBUILD=false
@@ -372,7 +372,7 @@ OPENCOLLADA_FORCE_BUILD=false
OPENCOLLADA_FORCE_REBUILD=false
OPENCOLLADA_SKIP=false
FFMPEG_VERSION="3.2.1"
FFMPEG_VERSION="2.8.4"
FFMPEG_VERSION_MIN="2.8.4"
FFMPEG_FORCE_BUILD=false
FFMPEG_FORCE_REBUILD=false
@@ -795,7 +795,7 @@ CXXFLAGS_BACK=$CXXFLAGS
if [ "$USE_CXX11" = true ]; then
WARNING "You are trying to use c++11, this *should* go smoothely with any very recent distribution
However, if you are experiencing linking errors (also when building Blender itself), please try the following:
* Re-run this script with '--build-all --force-all' options.
* Re-run this script with `--build-all --force-all` options.
* Ensure your gcc version is at the very least 4.8, if possible you should really rather use gcc-5.1 or above.
Please note that until the transition to C++11-built libraries if completed in your distribution, situation will
@@ -2480,7 +2480,7 @@ compile_FFmpeg() {
--enable-avfilter --disable-vdpau \
--disable-bzlib --disable-libgsm --disable-libspeex \
--enable-pthreads --enable-zlib --enable-stripping --enable-runtime-cpudetect \
--disable-vaapi --disable-nonfree --enable-gpl \
--disable-vaapi --disable-libfaac --disable-nonfree --enable-gpl \
--disable-postproc --disable-librtmp --disable-libopencore-amrnb \
--disable-libopencore-amrwb --disable-libdc1394 --disable-version3 --disable-outdev=sdl \
--disable-libxcb \

View File

@@ -297,8 +297,8 @@ def generic_builder(id, libdir='', branch='', rsync=False):
# Builders
add_builder(c, 'mac_x86_64_10_6_cmake', 'darwin-9.x.universal', generic_builder, hour=5)
# add_builder(c, 'linux_glibc211_i686_cmake', '', generic_builder, hour=1)
# add_builder(c, 'linux_glibc211_x86_64_cmake', '', generic_builder, hour=2)
add_builder(c, 'linux_glibc211_i686_cmake', '', generic_builder, hour=1)
add_builder(c, 'linux_glibc211_x86_64_cmake', '', generic_builder, hour=2)
add_builder(c, 'linux_glibc219_i686_cmake', '', generic_builder, hour=3)
add_builder(c, 'linux_glibc219_x86_64_cmake', '', generic_builder, hour=4)
add_builder(c, 'win32_cmake_vc2013', 'windows_vc12', generic_builder, hour=3)

View File

@@ -72,8 +72,10 @@ if 'cmake' in builder:
# Set up OSX architecture
if builder.endswith('x86_64_10_6_cmake'):
cmake_extra_options.append('-DCMAKE_OSX_ARCHITECTURES:STRING=x86_64')
cmake_extra_options.append('-DCUDA_NVCC_EXECUTABLE=/usr/local/cuda8-hack/bin/nvcc')
cmake_extra_options.append('-DWITH_CODEC_QUICKTIME=OFF')
cmake_extra_options.append('-DCMAKE_OSX_DEPLOYMENT_TARGET=10.6')
build_cubins = False
elif builder.startswith('win'):

View File

@@ -45,7 +45,7 @@ macro(BLENDER_SRC_GTEST_EX NAME SRC EXTRA_LIBS DO_ADD_TEST)
RUNTIME_OUTPUT_DIRECTORY_DEBUG "${TESTS_OUTPUT_DIR}"
INCLUDE_DIRECTORIES "${TEST_INC}")
if(${DO_ADD_TEST})
add_test(NAME ${NAME}_test COMMAND ${TESTS_OUTPUT_DIR}/${NAME}_test WORKING_DIRECTORY $<TARGET_FILE_DIR:blender>)
add_test(${NAME}_test ${TESTS_OUTPUT_DIR}/${NAME}_test)
endif()
endif()
endmacro()

View File

@@ -56,7 +56,7 @@ if(EXISTS ${SOURCE_DIR}/.git)
string(REGEX REPLACE "[\r\n]+" ";" _git_contains_branches "${_git_contains_branches}")
string(REGEX REPLACE ";[ \t]+" ";" _git_contains_branches "${_git_contains_branches}")
foreach(_branch ${_git_contains_branches})
if(NOT "${_branch}" MATCHES "\\(HEAD.*")
if (NOT "${_branch}" MATCHES "\\(HEAD.*")
set(MY_WC_BRANCH "${_branch}")
break()
endif()

View File

@@ -416,7 +416,14 @@ function(setup_liblinks
target_link_libraries(${target} ${OPENCOLORIO_LIBRARIES})
endif()
if(WITH_OPENSUBDIV OR WITH_CYCLES_OPENSUBDIV)
if(WIN32 AND NOT UNIX)
file_list_suffix(OPENSUBDIV_LIBRARIES_DEBUG "${OPENSUBDIV_LIBRARIES}" "_d")
target_link_libraries_debug(${target} "${OPENSUBDIV_LIBRARIES_DEBUG}")
target_link_libraries_optimized(${target} "${OPENSUBDIV_LIBRARIES}")
unset(OPENSUBDIV_LIBRARIES_DEBUG)
else()
target_link_libraries(${target} ${OPENSUBDIV_LIBRARIES})
endif()
endif()
if(WITH_OPENVDB)
target_link_libraries(${target} ${OPENVDB_LIBRARIES} ${TBB_LIBRARIES})
@@ -1574,24 +1581,24 @@ macro(openmp_delayload
endmacro()
MACRO(WINDOWS_SIGN_TARGET target)
if(WITH_WINDOWS_CODESIGN)
if(!SIGNTOOL_EXE)
if (WITH_WINDOWS_CODESIGN)
if (!SIGNTOOL_EXE)
error("Codesigning is enabled, but signtool is not found")
else()
if(WINDOWS_CODESIGN_PFX_PASSWORD)
if (WINDOWS_CODESIGN_PFX_PASSWORD)
set(CODESIGNPASSWORD /p ${WINDOWS_CODESIGN_PFX_PASSWORD})
else()
if($ENV{PFXPASSWORD})
if ($ENV{PFXPASSWORD})
set(CODESIGNPASSWORD /p $ENV{PFXPASSWORD})
else()
message(FATAL_ERROR "WITH_WINDOWS_CODESIGN is on but WINDOWS_CODESIGN_PFX_PASSWORD not set, and environment variable PFXPASSWORD not found, unable to sign code.")
message( FATAL_ERROR "WITH_WINDOWS_CODESIGN is on but WINDOWS_CODESIGN_PFX_PASSWORD not set, and environment variable PFXPASSWORD not found, unable to sign code.")
endif()
endif()
add_custom_command(TARGET ${target}
POST_BUILD
COMMAND ${SIGNTOOL_EXE} sign /f ${WINDOWS_CODESIGN_PFX} ${CODESIGNPASSWORD} $<TARGET_FILE:${target}>
VERBATIM
)
POST_BUILD
COMMAND ${SIGNTOOL_EXE} sign /f ${WINDOWS_CODESIGN_PFX} ${CODESIGNPASSWORD} $<TARGET_FILE:${target}>
VERBATIM
)
endif()
endif()
ENDMACRO()

View File

@@ -1,7 +1,5 @@
string(TIMESTAMP CURRENT_YEAR "%Y")
set(PROJECT_DESCRIPTION "Blender is the free and open source 3D creation suite software.")
set(PROJECT_COPYRIGHT "Copyright (C) 2001-${CURRENT_YEAR} Blender Foundation")
set(PROJECT_DESCRIPTION "Blender is a very fast and versatile 3D modeller/renderer.")
set(PROJECT_COPYRIGHT "Copyright (C) 2001-2012 Blender Foundation")
set(PROJECT_CONTACT "foundation@blender.org")
set(PROJECT_VENDOR "Blender Foundation")
@@ -40,8 +38,8 @@ unset(MY_WC_HASH)
# Force Package Name
execute_process(COMMAND date "+%Y%m%d" OUTPUT_VARIABLE CPACK_DATE OUTPUT_STRIP_TRAILING_WHITESPACE)
string(TOLOWER ${PROJECT_NAME} PROJECT_NAME_LOWER)
if(MSVC)
if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
if (MSVC)
if ("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
set(PACKAGE_ARCH windows64)
else()
set(PACKAGE_ARCH windows32)
@@ -50,7 +48,7 @@ else(MSVC)
set(PACKAGE_ARCH ${CMAKE_SYSTEM_PROCESSOR})
endif()
if(CPACK_OVERRIDE_PACKAGENAME)
if (CPACK_OVERRIDE_PACKAGENAME)
set(CPACK_PACKAGE_FILE_NAME ${CPACK_OVERRIDE_PACKAGENAME}-${PACKAGE_ARCH})
else()
set(CPACK_PACKAGE_FILE_NAME ${PROJECT_NAME_LOWER}-${MAJOR_VERSION}.${MINOR_VERSION}.${PATCH_VERSION}-git${CPACK_DATE}.${BUILD_REV}-${PACKAGE_ARCH})
@@ -137,3 +135,4 @@ unset(MINOR_VERSION)
unset(PATCH_VERSION)
unset(BUILD_REV)

View File

@@ -158,7 +158,7 @@ if(WITH_CODEC_FFMPEG)
mp3lame swscale x264 xvidcore theora theoradec theoraenc vorbis vorbisenc vorbisfile ogg
)
if(WITH_CXX11)
set(FFMPEG_LIBRARIES ${FFMPEG_LIBRARIES} schroedinger orc vpx webp swresample)
set(FFMPEG_LIBRARIES ${FFMPEG_LIBRARIES} schroedinger orc vpx)
endif()
set(FFMPEG_LIBPATH ${FFMPEG}/lib)
endif()
@@ -316,9 +316,6 @@ if(WITH_OPENIMAGEIO)
${OPENEXR_LIBRARIES}
${ZLIB_LIBRARIES}
)
if(WITH_CXX11)
set(OPENIMAGEIO_LIBRARIES ${OPENIMAGEIO_LIBRARIES} ${LIBDIR}/ffmpeg/lib/libwebp.a)
endif()
set(OPENIMAGEIO_LIBPATH
${OPENIMAGEIO}/lib
${JPEG_LIBPATH}

View File

@@ -33,7 +33,7 @@ endmacro()
macro(windows_find_package package_name
)
if(WITH_WINDOWS_FIND_MODULES)
find_package(${package_name})
find_package( ${package_name})
endif(WITH_WINDOWS_FIND_MODULES)
endmacro()
@@ -112,7 +112,7 @@ set(CMAKE_STATIC_LINKER_FLAGS "${CMAKE_STATIC_LINKER_FLAGS} /ignore:4221")
# MSVC only, Mingw doesnt need
if(CMAKE_CL_64)
set(PLATFORM_LINKFLAGS "/MACHINE:X64 ${PLATFORM_LINKFLAGS}")
set(PLATFORM_LINKFLAGS "/MACHINE:X64 /OPT:NOREF ${PLATFORM_LINKFLAGS}")
else()
set(PLATFORM_LINKFLAGS "/MACHINE:IX86 /LARGEADDRESSAWARE ${PLATFORM_LINKFLAGS}")
endif()
@@ -238,14 +238,14 @@ if(WITH_CODEC_FFMPEG)
windows_find_package(FFMPEG)
if(NOT FFMPEG_FOUND)
warn_hardcoded_paths(ffmpeg)
set(FFMPEG_LIBRARY_VERSION 57)
set(FFMPEG_LIBRARY_VERSION_AVU 55)
set(FFMPEG_LIBRARY_VERSION 55)
set(FFMPEG_LIBRARY_VERSION_AVU 52)
set(FFMPEG_LIBRARIES
${LIBDIR}/ffmpeg/lib/avcodec.lib
${LIBDIR}/ffmpeg/lib/avformat.lib
${LIBDIR}/ffmpeg/lib/avdevice.lib
${LIBDIR}/ffmpeg/lib/avutil.lib
${LIBDIR}/ffmpeg/lib/swscale.lib
${LIBDIR}/ffmpeg/lib/avcodec-${FFMPEG_LIBRARY_VERSION}.lib
${LIBDIR}/ffmpeg/lib/avformat-${FFMPEG_LIBRARY_VERSION}.lib
${LIBDIR}/ffmpeg/lib/avdevice-${FFMPEG_LIBRARY_VERSION}.lib
${LIBDIR}/ffmpeg/lib/avutil-${FFMPEG_LIBRARY_VERSION_AVU}.lib
${LIBDIR}/ffmpeg/lib/swscale-2.lib
)
endif()
endif()
@@ -380,7 +380,6 @@ if(WITH_OPENIMAGEIO)
set(OPENCOLORIO_DEFINITIONS "-DOCIO_STATIC_BUILD")
set(OPENIMAGEIO_IDIFF "${OPENIMAGEIO}/bin/idiff.exe")
add_definitions(-DOIIO_STATIC_BUILD)
add_definitions(-DOIIO_NO_SSE=1)
endif()
if(WITH_LLVM)
@@ -446,20 +445,10 @@ if(WITH_MOD_CLOTH_ELTOPO)
endif()
if(WITH_OPENSUBDIV OR WITH_CYCLES_OPENSUBDIV)
set(OPENSUBDIV_INCLUDE_DIR ${LIBDIR}/opensubdiv/include)
set(OPENSUBDIV_LIBPATH ${LIBDIR}/opensubdiv/lib)
set(OPENSUBDIV_LIBRARIES optimized ${OPENSUBDIV_LIBPATH}/osdCPU.lib
optimized ${OPENSUBDIV_LIBPATH}/osdGPU.lib
debug ${OPENSUBDIV_LIBPATH}/osdCPU_d.lib
debug ${OPENSUBDIV_LIBPATH}/osdGPU_d.lib
)
set(OPENSUBDIV_HAS_OPENMP TRUE)
set(OPENSUBDIV_HAS_TBB FALSE)
set(OPENSUBDIV_HAS_OPENCL TRUE)
set(OPENSUBDIV_HAS_CUDA FALSE)
set(OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK TRUE)
set(OPENSUBDIV_HAS_GLSL_COMPUTE TRUE)
windows_find_package(OpenSubdiv)
set(OPENSUBDIV_INCLUDE_DIR ${LIBDIR}/opensubdiv/include)
set(OPENSUBDIV_LIBPATH ${LIBDIR}/opensubdiv/lib)
set(OPENSUBDIV_LIBRARIES ${OPENSUBDIV_LIBPATH}/osdCPU.lib ${OPENSUBDIV_LIBPATH}/osdGPU.lib)
find_package(OpenSubdiv)
endif()
if(WITH_SDL)

View File

@@ -681,7 +681,7 @@ Image classes
.. attribute:: zbuff
Use depth component of render as grayscale color - suitable for texture source.
Use depth component of render as grey scale color - suitable for texture source.
:type: bool
@@ -817,7 +817,7 @@ Image classes
.. attribute:: zbuff
Use depth component of viewport as grayscale color - suitable for texture source.
Use depth component of viewport as grey scale color - suitable for texture source.
:type: bool
@@ -1260,8 +1260,8 @@ Filter classes
.. class:: FilterGray
Filter for grayscale effect.
Proportions of R, G and B contributions in the output grayscale are 28:151:77.
Filter for gray scale effect.
Proportions of R, G and B contributions in the output gray scale are 28:151:77.
.. attribute:: previous

View File

@@ -405,7 +405,7 @@ base class --- :class:`SCA_IObject`
.. note::
This attribute is experimental and may be removed (but probably wont be).
This attribute is experemental and may be removed (but probably wont be).
.. note::
@@ -419,7 +419,7 @@ base class --- :class:`SCA_IObject`
.. note::
This attribute is experimental and may be removed (but probably wont be).
This attribute is experemental and may be removed (but probably wont be).
.. note::
@@ -453,7 +453,7 @@ base class --- :class:`SCA_IObject`
.. attribute:: childrenRecursive
all children of this object including children's children, (read-only).
all children of this object including childrens children, (read-only).
:type: :class:`CListValue` of :class:`KX_GameObject`'s
@@ -536,7 +536,7 @@ base class --- :class:`SCA_IObject`
.. method:: getAxisVect(vect)
Returns the axis vector rotates by the object's worldspace orientation.
Returns the axis vector rotates by the objects worldspace orientation.
This is the equivalent of multiplying the vector by the orientation matrix.
:arg vect: a vector to align the axis.
@@ -596,7 +596,7 @@ base class --- :class:`SCA_IObject`
Gets the game object's linear velocity.
This method returns the game object's velocity through it's center of mass, ie no angular velocity component.
This method returns the game object's velocity through it's centre of mass, ie no angular velocity component.
:arg local:
* False: you get the "global" velocity ie: relative to world orientation.
@@ -609,7 +609,7 @@ base class --- :class:`SCA_IObject`
Sets the game object's linear velocity.
This method sets game object's velocity through it's center of mass,
This method sets game object's velocity through it's centre of mass,
ie no angular velocity component.
This requires a dynamic object.
@@ -814,7 +814,7 @@ base class --- :class:`SCA_IObject`
# do something
pass
The face parameter determines the orientation of the normal.
The face paremeter determines the orientation of the normal.
* 0 => hit normal is always oriented towards the ray origin (as if you casted the ray from outside)
* 1 => hit normal is the real face normal (only for mesh object, otherwise face has no effect)
@@ -911,7 +911,7 @@ base class --- :class:`SCA_IObject`
.. note::
The gameObject argument has an advantage that it can convert from a mesh with modifiers applied (such as the Subdivision Surface modifier).
The gameObject argument has an advantage that it can convert from a mesh with modifiers applied (such as subsurf).
.. warning::
@@ -919,7 +919,7 @@ base class --- :class:`SCA_IObject`
.. warning::
If the object is a part of a compound object it will fail (parent or child)
If the object is a part of a combound object it will fail (parent or child)
.. warning::

View File

@@ -12,7 +12,7 @@ contents: dir(bgl). A simple search on the web can point to more
than enough material to teach OpenGL programming, from books to many
collections of tutorials.
Here is a comprehensive `list of books <https://www.khronos.org/developers/books/>`__ (non free).
Here is a comprehensive `list of books <https://www.opengl.org/documentation/books/>`__ (non free).
The `arcsynthesis tutorials <https://web.archive.org/web/20150225192611/http://www.arcsynthesis.org/gltut/index.html>`__
is one of the best resources to learn modern OpenGL and
`g-truc <http://www.g-truc.net/post-opengl-samples.html#menu>`__
@@ -2067,7 +2067,7 @@ offers a set of extensive examples, including advanced features.
:arg length: Returns the length of the string returned in source (excluding the null terminator).
:type source: :class:`bgl.Buffer` char.
:arg source: Specifies an array of characters that is used to return the source code string.
.. function:: glShaderSource(shader, shader_string):

View File

@@ -204,7 +204,7 @@ Lets say we want to access the texture of a brush via Python, to adjust its ``co
- Start in the default scene and enable 'Sculpt' mode from the 3D-View header.
- From the toolbar expand the **Texture** panel and add a new texture.
*Notice the texture button its self doesn't have very useful links (you can check the tooltips).*
*Notice the texture button its self doesn't have very useful links (you can check the tool-tips).*
- The contrast setting isn't exposed in the sculpt toolbar, so view the texture in the properties panel...
- In the properties button select the Texture context.

View File

@@ -19,7 +19,7 @@ This is a typical Python environment so tutorials on how to write Python scripts
will work running the scripts in Blender too.
Blender provides the :mod:`bpy` module to the Python interpreter.
This module can be imported in a script and gives access to Blender data, classes, and functions.
Scripts that deal with Blender data will need to import this module.
Scripts that deal with Blender data will need to import this module.
Here is a simple example of moving a vertex of the object named **Cube**:
@@ -80,7 +80,7 @@ To run as modules:
Add-ons
-------
------
Some of Blenders functionality is best kept optional,
alongside scripts loaded at startup we have add-ons which are kept in their own directory ``scripts/addons``,
@@ -213,7 +213,7 @@ A simple Blender/Python module can look like this:
bpy.utils.register_class(SimpleOperator)
def unregister():
bpy.utils.unregister_class(SimpleOperator)
bpy.utils.unregister_class(SimpleOperator)
if __name__ == "__main__":
register()
@@ -327,7 +327,7 @@ Say you want to store material settings for a custom engine.
.. note::
*The class must be registered before being used in a property, failing to do so will raise an error:*
``ValueError: bpy_struct "Material" registration error: my_custom_props could not register``
@@ -429,3 +429,4 @@ Calling these operators:
>>> bpy.ops.object.operator_2()
Hello World OBJECT_OT_operator_2
{'FINISHED'}

View File

@@ -427,9 +427,9 @@ if BLENDER_REVISION != "Unknown":
BLENDER_VERSION_DOTS += " " + BLENDER_REVISION # '2.62.1 SHA1'
BLENDER_VERSION_PATH = "_".join(blender_version_strings) # '2_62_1'
if bpy.app.version_cycle in {"rc", "release"}:
# '2_62a_release'
BLENDER_VERSION_PATH = "%s%s_release" % ("_".join(blender_version_strings[:2]), bpy.app.version_char)
if bpy.app.version_cycle == "release":
BLENDER_VERSION_PATH = "%s%s_release" % ("_".join(blender_version_strings[:2]),
bpy.app.version_char) # '2_62_release'
# --------------------------DOWNLOADABLE FILES----------------------------------
@@ -1565,9 +1565,9 @@ def pyrna2sphinx(basepath):
# operators
def write_ops():
API_BASEURL = "https://developer.blender.org/diffusion/B/browse/master/release/scripts "
API_BASEURL_ADDON = "https://developer.blender.org/diffusion/BA"
API_BASEURL_ADDON_CONTRIB = "https://developer.blender.org/diffusion/BAC"
API_BASEURL = "https://developer.blender.org/diffusion/B/browse/master/release/scripts/ "
API_BASEURL_ADDON = "https://developer.blender.org/diffusion/BA/"
API_BASEURL_ADDON_CONTRIB = "https://developer.blender.org/diffusion/BAC/"
op_modules = {}
for op in ops.values():
@@ -1632,9 +1632,13 @@ def write_sphinx_conf_py(basepath):
file = open(filepath, "w", encoding="utf-8")
fw = file.write
fw("import sys, os\n\n")
fw("extensions = ['sphinx.ext.intersphinx']\n\n")
fw("intersphinx_mapping = {'blender_manual': ('https://docs.blender.org/manual/en/dev/', None)}\n\n")
fw("import sys, os\n")
fw("\n")
fw("extensions = ['sphinx.ext.intersphinx']\n")
fw("\n")
fw("intersphinx_mapping = {'blender_manual': ('https://www.blender.org/manual/', None)}\n")
fw("\n")
fw("project = 'Blender'\n")
# fw("master_doc = 'index'\n")
fw("copyright = u'Blender Foundation'\n")
@@ -1651,16 +1655,12 @@ def write_sphinx_conf_py(basepath):
# not helpful since the source is generated, adds to upload size.
fw("html_copy_source = False\n")
fw("html_show_sphinx = False\n")
fw("html_split_index = True\n")
fw("\n")
# needed for latex, pdf gen
fw("latex_elements = {\n")
fw(" 'papersize': 'a4paper',\n")
fw("}\n\n")
fw("latex_documents = [ ('contents', 'contents.tex', 'Blender Index', 'Blender Foundation', 'manual'), ]\n")
fw("latex_paper_size = 'a4paper'\n")
file.close()

View File

@@ -41,9 +41,9 @@ import tempfile
import zipfile
DEFAULT_RSYNC_SERVER = "docs.blender.org"
DEFAULT_RSYNC_SERVER = "www.blender.org"
DEFAULT_RSYNC_ROOT = "/api/"
DEFAULT_SYMLINK_ROOT = "/data/www/vhosts/docs.blender.org/api"
DEFAULT_SYMLINK_ROOT = "/data/www/vhosts/www.blender.org/api"
def argparse_create():
@@ -96,11 +96,6 @@ def main():
rsync_base = "rsync://%s@%s:%s" % (args.user, args.rsync_server, args.rsync_root)
blenver = blenver_zip = ""
api_name = ""
branch = ""
is_release = False
# I) Update local mirror using rsync.
rsync_mirror_cmd = ("rsync", "--delete-after", "-avzz", rsync_base, args.mirror_dir)
subprocess.run(rsync_mirror_cmd, env=dict(os.environ, RSYNC_PASSWORD=args.password))
@@ -113,24 +108,19 @@ def main():
subprocess.run(doc_gen_cmd)
# III) Get Blender version info.
blenver = blenver_zip = ""
getver_file = os.path.join(tmp_dir, "blendver.txt")
getver_script = (""
"import sys, bpy\n"
"with open(sys.argv[-1], 'w') as f:\n"
" is_release = bpy.app.version_cycle in {'rc', 'release'}\n"
" branch = bpy.app.build_branch.split()[0].decode()\n"
" f.write('%d\\n' % is_release)\n"
" f.write('%s\\n' % branch)\n"
" f.write('%d.%d%s\\n' % (bpy.app.version[0], bpy.app.version[1], bpy.app.version_char)\n"
" if is_release else '%s\\n' % branch)\n"
" f.write('%d_%d%s_release' % (bpy.app.version[0], bpy.app.version[1], bpy.app.version_char)\n"
" if is_release else '%d_%d_%d' % bpy.app.version)\n")
" f.write('%d_%d%s_release\\n' % (bpy.app.version[0], bpy.app.version[1], bpy.app.version_char)\n"
" if bpy.app.version_cycle in {'rc', 'release'} else '%d_%d_%d\\n' % bpy.app.version)\n"
" f.write('%d_%d_%d' % bpy.app.version)\n")
get_ver_cmd = (args.blender, "--background", "-noaudio", "--factory-startup", "--python-exit-code", "1",
"--python-expr", getver_script, "--", getver_file)
subprocess.run(get_ver_cmd)
with open(getver_file) as f:
is_release, branch, blenver, blenver_zip = f.read().split("\n")
is_release = bool(int(is_release))
blenver, blenver_zip = f.read().split("\n")
os.remove(getver_file)
# IV) Build doc.
@@ -142,7 +132,7 @@ def main():
os.chdir(curr_dir)
# V) Cleanup existing matching dir in server mirror (if any), and copy new doc.
api_name = blenver
api_name = "blender_python_api_%s" % blenver
api_dir = os.path.join(args.mirror_dir, api_name)
if os.path.exists(api_dir):
shutil.rmtree(api_dir)
@@ -160,15 +150,19 @@ def main():
os.rename(zip_path, os.path.join(api_dir, "%s.zip" % zip_name))
# VII) Create symlinks and html redirects.
#~ os.symlink(os.path.join(DEFAULT_SYMLINK_ROOT, api_name, "contents.html"), os.path.join(api_dir, "index.html"))
os.symlink("./contents.html", os.path.join(api_dir, "index.html"))
if is_release:
symlink = os.path.join(args.mirror_dir, "current")
if blenver.endswith("release"):
symlink = os.path.join(args.mirror_dir, "blender_python_api_current")
os.remove(symlink)
os.symlink("./%s" % api_name, symlink)
with open(os.path.join(args.mirror_dir, "250PythonDoc/index.html"), 'w') as f:
f.write("<html><head><title>Redirecting...</title><meta http-equiv=\"REFRESH\""
"content=\"0;url=../%s/\"></head><body>Redirecting...</body></html>" % api_name)
elif branch == "master":
else:
symlink = os.path.join(args.mirror_dir, "blender_python_api_master")
os.remove(symlink)
os.symlink("./%s" % api_name, symlink)
with open(os.path.join(args.mirror_dir, "blender_python_api/index.html"), 'w') as f:
f.write("<html><head><title>Redirecting...</title><meta http-equiv=\"REFRESH\""
"content=\"0;url=../%s/\"></head><body>Redirecting...</body></html>" % api_name)

View File

@@ -77,7 +77,7 @@ namespace std {
void resize(size_type new_size)
{ resize(new_size, T()); }
#if defined(_VECTOR_) && (_MSC_VER<1910)
#if defined(_VECTOR_)
// workaround MSVC std::vector implementation
void resize(size_type new_size, const value_type& x)
{
@@ -110,7 +110,7 @@ namespace std {
vector_base::insert(vector_base::end(), new_size - vector_base::size(), x);
}
#else
// either GCC 4.1, MSVC2017 or non-GCC
// either GCC 4.1 or non-GCC
// default implementation which should always work.
void resize(size_type new_size, const value_type& x)
{

View File

@@ -114,7 +114,7 @@ extern "C" {
#define cuGLGetDevices cuGLGetDevices_v2
/* Types. */
#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) || defined (__aarch64__)
#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
typedef unsigned long long CUdeviceptr;
#else
typedef unsigned int CUdeviceptr;

View File

@@ -137,7 +137,7 @@ int curve_fit_cubic_to_points_refit_db(
const double error_threshold,
const unsigned int calc_flag,
const unsigned int *corners,
const unsigned int corners_len,
unsigned int corners_len,
const double corner_angle,
double **r_cubic_array, unsigned int *r_cubic_array_len,

View File

@@ -18,8 +18,6 @@ Local modifications:
- Applied some modifications from fork https://github.com/Nazg-Gul/gflags.git
(see https://github.com/gflags/gflags/pull/129)
- Avoid attempt of acquiring mutex lock in FlagRegistry::GlobalRegistry when
- Avoid attemot of acquiring mutex lock in FlagRegistry::GlobalRegistry when
doing static flags initialization. See d81dd2d in Blender repository.
- Made `google::{anonymous}::FlagValue::ValueSize() const` inlined, so it does
not trigger strict compiler warning.

View File

@@ -218,7 +218,7 @@ class FlagValue {
bool Equal(const FlagValue& x) const;
FlagValue* New() const; // creates a new one with default value
void CopyFrom(const FlagValue& x);
inline int ValueSize() const;
int ValueSize() const;
// Calls the given validate-fn on value_buffer_, and returns
// whatever it returns. But first casts validate_fn_proto to a
@@ -443,7 +443,7 @@ void FlagValue::CopyFrom(const FlagValue& x) {
}
}
inline int FlagValue::ValueSize() const {
int FlagValue::ValueSize() const {
if (type_ > FV_MAX_INDEX) {
assert(false); // unknown type
return 0;

View File

@@ -60,10 +60,6 @@
#include <string>
#include <vector>
#if (__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800)
# include <type_traits>
#endif
#include "gtest/gtest-message.h"
#include "gtest/internal/gtest-string.h"
#include "gtest/internal/gtest-filepath.h"
@@ -858,7 +854,6 @@ struct AddReference<T&> { typedef T& type; }; // NOLINT
template <typename From, typename To>
class ImplicitlyConvertible {
private:
#if !((__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800))
// We need the following helper functions only for their types.
// They have no implementations.
@@ -879,7 +874,6 @@ class ImplicitlyConvertible {
// implicitly converted to type To.
static char Helper(To);
static char (&Helper(...))[2]; // NOLINT
#endif
// We have to put the 'public' section after the 'private' section,
// or MSVC refuses to compile the code.
@@ -889,8 +883,6 @@ class ImplicitlyConvertible {
// instantiation. The simplest workaround is to use its C++0x type traits
// functions (C++Builder 2009 and above only).
static const bool value = __is_convertible(From, To);
#elif (__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800)
static const bool value = std::is_convertible<From, To>::value;
#else
// MSVC warns about implicitly converting from double to int for
// possible loss of data, so we need to temporarily disable the

View File

@@ -34,7 +34,7 @@ add_subdirectory(mikktspace)
add_subdirectory(glew-mx)
add_subdirectory(eigen)
if(WITH_GAMEENGINE_DECKLINK)
if (WITH_GAMEENGINE_DECKLINK)
add_subdirectory(decklink)
endif()
@@ -62,7 +62,7 @@ if(WITH_IK_ITASC)
add_subdirectory(itasc)
endif()
if(WITH_GAMEENGINE)
if(WITH_IK_SOLVER OR WITH_GAMEENGINE OR WITH_MOD_BOOLEAN)
add_subdirectory(moto)
endif()

View File

@@ -101,11 +101,11 @@ ATOMIC_INLINE size_t atomic_fetch_and_add_z(size_t *p, size_t x);
ATOMIC_INLINE size_t atomic_fetch_and_sub_z(size_t *p, size_t x);
ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new);
ATOMIC_INLINE unsigned int atomic_add_and_fetch_u(unsigned int *p, unsigned int x);
ATOMIC_INLINE unsigned int atomic_sub_and_fetch_u(unsigned int *p, unsigned int x);
ATOMIC_INLINE unsigned int atomic_fetch_and_add_u(unsigned int *p, unsigned int x);
ATOMIC_INLINE unsigned int atomic_fetch_and_sub_u(unsigned int *p, unsigned int x);
ATOMIC_INLINE unsigned int atomic_cas_u(unsigned int *v, unsigned int old, unsigned int _new);
ATOMIC_INLINE unsigned atomic_add_and_fetch_u(unsigned *p, unsigned x);
ATOMIC_INLINE unsigned atomic_sub_and_fetch_u(unsigned *p, unsigned x);
ATOMIC_INLINE unsigned atomic_fetch_and_add_u(unsigned *p, unsigned x);
ATOMIC_INLINE unsigned atomic_fetch_and_sub_u(unsigned *p, unsigned x);
ATOMIC_INLINE unsigned atomic_cas_u(unsigned *v, unsigned old, unsigned _new);
/* WARNING! Float 'atomics' are really faked ones, those are actually closer to some kind of spinlock-sync'ed operation,
* which means they are only efficient if collisions are highly unlikely (i.e. if probability of two threads

View File

@@ -113,58 +113,58 @@ ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new)
/******************************************************************************/
/* unsigned operations. */
ATOMIC_INLINE unsigned int atomic_add_and_fetch_u(unsigned int *p, unsigned int x)
ATOMIC_INLINE unsigned atomic_add_and_fetch_u(unsigned *p, unsigned x)
{
assert(sizeof(unsigned int) == LG_SIZEOF_INT);
assert(sizeof(unsigned) == LG_SIZEOF_INT);
#if (LG_SIZEOF_INT == 8)
return (unsigned int)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)x);
return (unsigned)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)x);
#elif (LG_SIZEOF_INT == 4)
return (unsigned int)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)x);
return (unsigned)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)x);
#endif
}
ATOMIC_INLINE unsigned int atomic_sub_and_fetch_u(unsigned int *p, unsigned int x)
ATOMIC_INLINE unsigned atomic_sub_and_fetch_u(unsigned *p, unsigned x)
{
assert(sizeof(unsigned int) == LG_SIZEOF_INT);
assert(sizeof(unsigned) == LG_SIZEOF_INT);
#if (LG_SIZEOF_INT == 8)
return (unsigned int)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
return (unsigned)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
#elif (LG_SIZEOF_INT == 4)
return (unsigned int)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
return (unsigned)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
#endif
}
ATOMIC_INLINE unsigned int atomic_fetch_and_add_u(unsigned int *p, unsigned int x)
ATOMIC_INLINE unsigned atomic_fetch_and_add_u(unsigned *p, unsigned x)
{
assert(sizeof(unsigned int) == LG_SIZEOF_INT);
assert(sizeof(unsigned) == LG_SIZEOF_INT);
#if (LG_SIZEOF_INT == 8)
return (unsigned int)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)x);
return (unsigned)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)x);
#elif (LG_SIZEOF_INT == 4)
return (unsigned int)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)x);
return (unsigned)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)x);
#endif
}
ATOMIC_INLINE unsigned int atomic_fetch_and_sub_u(unsigned int *p, unsigned int x)
ATOMIC_INLINE unsigned atomic_fetch_and_sub_u(unsigned *p, unsigned x)
{
assert(sizeof(unsigned int) == LG_SIZEOF_INT);
assert(sizeof(unsigned) == LG_SIZEOF_INT);
#if (LG_SIZEOF_INT == 8)
return (unsigned int)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
return (unsigned)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
#elif (LG_SIZEOF_INT == 4)
return (unsigned int)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
return (unsigned)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
#endif
}
ATOMIC_INLINE unsigned int atomic_cas_u(unsigned int *v, unsigned int old, unsigned int _new)
ATOMIC_INLINE unsigned atomic_cas_u(unsigned *v, unsigned old, unsigned _new)
{
assert(sizeof(unsigned int) == LG_SIZEOF_INT);
assert(sizeof(unsigned) == LG_SIZEOF_INT);
#if (LG_SIZEOF_INT == 8)
return (unsigned int)atomic_cas_uint64((uint64_t *)v, (uint64_t)old, (uint64_t)_new);
return (unsigned)atomic_cas_uint64((uint64_t *)v, (uint64_t)old, (uint64_t)_new);
#elif (LG_SIZEOF_INT == 4)
return (unsigned int)atomic_cas_uint32((uint32_t *)v, (uint32_t)old, (uint32_t)_new);
return (unsigned)atomic_cas_uint32((uint32_t *)v, (uint32_t)old, (uint32_t)_new);
#endif
}

View File

@@ -110,10 +110,10 @@ void AUD_LimiterReader::read(int& length, bool& eos, sample_t* buffer)
eos = true;
}
if(position < int(m_start * rate))
if(position < m_start * rate)
{
int len2 = length;
for(int len = int(m_start * rate) - position;
for(int len = m_start * rate - position;
len2 == length && !eos;
len -= length)
{

View File

@@ -365,7 +365,6 @@ bool AUD_SoftwareDevice::AUD_SoftwareHandle::seek(float position)
if(!m_status)
return false;
m_pitch->setPitch(m_user_pitch);
m_reader->seek((int)(position * m_reader->getSpecs().rate));
if(m_status == AUD_STATUS_STOPPED)

View File

@@ -74,7 +74,7 @@ elseif(CMAKE_COMPILER_IS_GNUCC)
if(CXX_HAS_AVX2)
set(CYCLES_AVX2_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mfma -mlzcnt -mbmi -mbmi2 -mf16c -mfpmath=sse")
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math -fno-finite-math-only")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math")
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
check_cxx_compiler_flag(-msse CXX_HAS_SSE)
check_cxx_compiler_flag(-mavx CXX_HAS_AVX)
@@ -90,7 +90,7 @@ elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
if(CXX_HAS_AVX2)
set(CYCLES_AVX2_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mfma -mlzcnt -mbmi -mbmi2 -mf16c")
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math -fno-finite-math-only")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math")
endif()
if(CXX_HAS_SSE)

View File

@@ -72,17 +72,20 @@ static void session_print(const string& str)
static void session_print_status()
{
int sample, tile;
double total_time, sample_time, render_time;
string status, substatus;
/* get status */
float progress = options.session->progress.get_progress();
sample = options.session->progress.get_sample();
options.session->progress.get_tile(tile, total_time, sample_time, render_time);
options.session->progress.get_status(status, substatus);
if(substatus != "")
status += ": " + substatus;
/* print status */
status = string_printf("Progress %05.2f %s", (double) progress*100, status.c_str());
status = string_printf("Sample %d %s", sample, status.c_str());
session_print(status);
}
@@ -164,12 +167,13 @@ static void display_info(Progress& progress)
latency = (elapsed - last);
last = elapsed;
double total_time, sample_time;
int sample, tile;
double total_time, sample_time, render_time;
string status, substatus;
progress.get_time(total_time, sample_time);
sample = progress.get_sample();
progress.get_tile(tile, total_time, sample_time, render_time);
progress.get_status(status, substatus);
float progress_val = progress.get_progress();
if(substatus != "")
status += ": " + substatus;
@@ -180,10 +184,10 @@ static void display_info(Progress& progress)
"%s"
" Time: %.2f"
" Latency: %.4f"
" Progress: %05.2f"
" Sample: %d"
" Average: %.4f"
" Interactive: %s",
status.c_str(), total_time, latency, (double) progress_val*100, sample_time, interactive.c_str());
status.c_str(), total_time, latency, sample, sample_time, interactive.c_str());
view_display_info(str.c_str());

View File

@@ -523,7 +523,7 @@ static void xml_read_mesh(const XMLReadState& state, pugi::xml_node node)
/* we don't yet support arbitrary attributes, for now add vertex
* coordinates as generated coordinates if requested */
if(mesh->need_attribute(state.scene, ATTR_STD_GENERATED)) {
if (mesh->need_attribute(state.scene, ATTR_STD_GENERATED)) {
Attribute *attr = mesh->attributes.add(ATTR_STD_GENERATED);
memcpy(attr->data_float3(), mesh->verts.data(), sizeof(float3)*mesh->verts.size());
}

View File

@@ -25,7 +25,6 @@ set(SRC
blender_camera.cpp
blender_mesh.cpp
blender_object.cpp
blender_object_cull.cpp
blender_particles.cpp
blender_curves.cpp
blender_logging.cpp
@@ -36,7 +35,6 @@ set(SRC
blender_texture.cpp
CCL_api.h
blender_object_cull.h
blender_sync.h
blender_session.h
blender_texture.h

View File

@@ -23,25 +23,11 @@ bl_info = {
"location": "Info header, render engine menu",
"description": "Cycles Render Engine integration",
"warning": "",
"wiki_url": "https://docs.blender.org/manual/en/dev/render/cycles/",
"wiki_url": "https://www.blender.org/manual/render/cycles/index.html",
"tracker_url": "",
"support": 'OFFICIAL',
"category": "Render"}
# Support 'reload' case.
if "bpy" in locals():
import importlib
if "engine" in locals():
importlib.reload(engine)
if "version_update" in locals():
importlib.reload(version_update)
if "ui" in locals():
importlib.reload(ui)
if "properties" in locals():
importlib.reload(properties)
if "presets" in locals():
importlib.reload(presets)
import bpy
from . import (
@@ -107,13 +93,7 @@ def engine_exit():
engine.exit()
classes = (
CyclesRender,
)
def register():
from bpy.utils import register_class
from . import ui
from . import properties
from . import presets
@@ -128,15 +108,12 @@ def register():
properties.register()
ui.register()
presets.register()
for cls in classes:
register_class(cls)
bpy.utils.register_module(__name__)
bpy.app.handlers.version_update.append(version_update.do_versions)
def unregister():
from bpy.utils import unregister_class
from . import ui
from . import properties
from . import presets
@@ -147,6 +124,4 @@ def unregister():
ui.unregister()
properties.unregister()
presets.unregister()
for cls in classes:
unregister_class(cls)
bpy.utils.unregister_module(__name__)

View File

@@ -50,24 +50,6 @@ def _workaround_buggy_drivers():
_cycles.opencl_disable()
def _configure_argument_parser():
import argparse
parser = argparse.ArgumentParser(description="Cycles Addon argument parser")
parser.add_argument("--cycles-resumable-num-chunks",
help="Number of chunks to split sample range into",
default=None)
parser.add_argument("--cycles-resumable-current-chunk",
help="Current chunk of samples range to render",
default=None)
parser.add_argument("--cycles-resumable-start-chunk",
help="Start chunk to render",
default=None)
parser.add_argument("--cycles-resumable-end-chunk",
help="End chunk to render",
default=None)
return parser
def _parse_command_line():
import sys
@@ -75,22 +57,25 @@ def _parse_command_line():
if "--" not in argv:
return
parser = _configure_argument_parser()
args, unknown = parser.parse_known_args(argv[argv.index("--") + 1:])
argv = argv[argv.index("--") + 1:]
if args.cycles_resumable_num_chunks is not None:
if args.cycles_resumable_current_chunk is not None:
import _cycles
_cycles.set_resumable_chunk(
int(args.cycles_resumable_num_chunks),
int(args.cycles_resumable_current_chunk))
elif args.cycles_resumable_start_chunk is not None and \
args.cycles_resumable_end_chunk:
import _cycles
_cycles.set_resumable_chunk_range(
int(args.cycles_resumable_num_chunks),
int(args.cycles_resumable_start_chunk),
int(args.cycles_resumable_end_chunk))
num_resumable_chunks = None
current_resumable_chunk = None
# TODO(sergey): Add some nice error ptins if argument is not used properly.
idx = 0
while idx < len(argv) - 1:
arg = argv[idx]
if arg == '--cycles-resumable-num-chunks':
num_resumable_chunks = int(argv[idx + 1])
elif arg == '--cycles-resumable-current-chunk':
current_resumable_chunk = int(argv[idx + 1])
idx += 1
if num_resumable_chunks is not None and current_resumable_chunk is not None:
import _cycles
_cycles.set_resumable_chunks(num_resumable_chunks,
current_resumable_chunk)
def init():

View File

@@ -82,23 +82,12 @@ class AddPresetSampling(AddPresetBase, Operator):
preset_subdir = "cycles/sampling"
classes = (
AddPresetIntegrator,
AddPresetSampling,
)
def register():
from bpy.utils import register_class
for cls in classes:
register_class(cls)
pass
def unregister():
from bpy.utils import unregister_class
for cls in classes:
unregister_class(cls)
pass
if __name__ == "__main__":
register()

View File

@@ -288,7 +288,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
description="Probabilistically terminate light samples when the light contribution is below this threshold (more noise but faster rendering). "
"Zero disables the test and never ignores lights",
min=0.0, max=1.0,
default=0.01,
default=0.05,
)
cls.caustics_reflective = BoolProperty(
@@ -528,12 +528,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
description="Use special type BVH optimized for hair (uses more ram but renders faster)",
default=True,
)
cls.debug_bvh_time_steps = IntProperty(
name="BVH Time Steps",
description="Split BVH primitives by this number of time steps to speed up render time in cost of memory",
default=0,
min=0, max=16,
)
cls.tile_order = EnumProperty(
name="Tile Order",
description="Tile order for rendering",
@@ -638,20 +632,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
items=enum_texture_limit
)
cls.ao_bounces = IntProperty(
name="AO Bounces",
default=0,
description="Approximate indirect light with background tinted ambient occlusion at the specified bounce, 0 disables this feature",
min=0, max=1024,
)
cls.ao_bounces_render = IntProperty(
name="AO Bounces Render",
default=0,
description="Approximate indirect light with background tinted ambient occlusion at the specified bounce, 0 disables this feature",
min=0, max=1024,
)
# Various fine-tuning debug flags
def devices_update_callback(self, context):
@@ -665,10 +645,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
cls.debug_use_cpu_sse3 = BoolProperty(name="SSE3", default=True)
cls.debug_use_cpu_sse2 = BoolProperty(name="SSE2", default=True)
cls.debug_use_qbvh = BoolProperty(name="QBVH", default=True)
cls.debug_use_cpu_split_kernel = BoolProperty(name="Split Kernel", default=False)
cls.debug_use_cuda_adaptive_compile = BoolProperty(name="Adaptive Compile", default=False)
cls.debug_use_cuda_split_kernel = BoolProperty(name="Split Kernel", default=False)
cls.debug_opencl_kernel_type = EnumProperty(
name="OpenCL Kernel Type",
@@ -695,8 +673,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
update=devices_update_callback
)
cls.debug_opencl_kernel_single_program = BoolProperty(name="Single Program", default=False, update=devices_update_callback);
cls.debug_use_opencl_debug = BoolProperty(name="Debug OpenCL", default=False)
@classmethod

View File

@@ -86,10 +86,12 @@ def use_sample_all_lights(context):
return cscene.sample_all_lights_direct or cscene.sample_all_lights_indirect
def show_device_active(context):
cscene = context.scene.cycles
if cscene.device != 'GPU':
def show_device_selection(context):
type = get_device_type(context)
if type == 'NETWORK':
return True
if not type in {'CUDA', 'OPENCL'}:
return False
return context.user_preferences.addons[__package__].preferences.has_active_device()
@@ -215,7 +217,7 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel):
draw_samples_info(layout, context)
class CyclesRender_PT_geometry(CyclesButtonsPanel, Panel):
class CyclesRender_PT_geometery(CyclesButtonsPanel, Panel):
bl_label = "Geometry"
bl_options = {'DEFAULT_CLOSED'}
@@ -224,7 +226,6 @@ class CyclesRender_PT_geometry(CyclesButtonsPanel, Panel):
scene = context.scene
cscene = scene.cycles
ccscene = scene.cycles_curves
if cscene.feature_set == 'EXPERIMENTAL':
split = layout.split()
@@ -251,25 +252,6 @@ class CyclesRender_PT_geometry(CyclesButtonsPanel, Panel):
row.prop(cscene, "volume_step_size")
row.prop(cscene, "volume_max_steps")
layout.prop(ccscene, "use_curves", text="Use Hair")
col = layout.column()
col.active = ccscene.use_curves
col.prop(ccscene, "primitive", text="Primitive")
col.prop(ccscene, "shape", text="Shape")
if not (ccscene.primitive in {'CURVE_SEGMENTS', 'LINE_SEGMENTS'} and ccscene.shape == 'RIBBONS'):
col.prop(ccscene, "cull_backfacing", text="Cull back-faces")
if ccscene.primitive == 'TRIANGLES' and ccscene.shape == 'THICK':
col.prop(ccscene, "resolution", text="Resolution")
elif ccscene.primitive == 'CURVE_SEGMENTS':
col.prop(ccscene, "subdivisions", text="Curve subdivisions")
row = col.row()
row.prop(ccscene, "minimum_width", text="Min Pixels")
row.prop(ccscene, "maximum_width", text="Max Ext.")
class CyclesRender_PT_light_paths(CyclesButtonsPanel, Panel):
bl_label = "Light Paths"
@@ -430,10 +412,6 @@ class CyclesRender_PT_performance(CyclesButtonsPanel, Panel):
col.prop(cscene, "debug_use_spatial_splits")
col.prop(cscene, "debug_use_hair_bvh")
row = col.row()
row.active = not cscene.debug_use_spatial_splits
row.prop(cscene, "debug_bvh_time_steps")
class CyclesRender_PT_layer_options(CyclesButtonsPanel, Panel):
bl_label = "Layer"
@@ -789,13 +767,10 @@ class CyclesObject_PT_cycles_settings(CyclesButtonsPanel, Panel):
col = layout.column()
col.label(text="Performance:")
row = col.row()
sub = row.row()
sub.active = scene.render.use_simplify and cscene.use_camera_cull
sub.prop(cob, "use_camera_cull")
sub = row.row()
sub.active = scene.render.use_simplify and cscene.use_distance_cull
sub.prop(cob, "use_distance_cull")
row.active = scene.render.use_simplify and cscene.use_camera_cull
row.prop(cob, "use_camera_cull")
row.active = scene.render.use_simplify and cscene.use_distance_cull
row.prop(cob, "use_distance_cull")
class CYCLES_OT_use_shading_nodes(Operator):
@@ -1036,11 +1011,10 @@ class CyclesWorld_PT_ambient_occlusion(CyclesButtonsPanel, Panel):
layout = self.layout
light = context.world.light_settings
scene = context.scene
row = layout.row()
sub = row.row()
sub.active = light.use_ambient_occlusion or scene.render.use_simplify
sub.active = light.use_ambient_occlusion
sub.prop(light, "ao_factor", text="Factor")
row.prop(light, "distance", text="Distance")
@@ -1417,6 +1391,43 @@ class CyclesParticle_PT_textures(CyclesButtonsPanel, Panel):
layout.template_ID(slot, "texture", new="texture.new")
class CyclesRender_PT_CurveRendering(CyclesButtonsPanel, Panel):
bl_label = "Cycles Hair Rendering"
bl_context = "particle"
@classmethod
def poll(cls, context):
psys = context.particle_system
return CyclesButtonsPanel.poll(context) and psys and psys.settings.type == 'HAIR'
def draw_header(self, context):
ccscene = context.scene.cycles_curves
self.layout.prop(ccscene, "use_curves", text="")
def draw(self, context):
layout = self.layout
scene = context.scene
ccscene = scene.cycles_curves
layout.active = ccscene.use_curves
layout.prop(ccscene, "primitive", text="Primitive")
layout.prop(ccscene, "shape", text="Shape")
if not (ccscene.primitive in {'CURVE_SEGMENTS', 'LINE_SEGMENTS'} and ccscene.shape == 'RIBBONS'):
layout.prop(ccscene, "cull_backfacing", text="Cull back-faces")
if ccscene.primitive == 'TRIANGLES' and ccscene.shape == 'THICK':
layout.prop(ccscene, "resolution", text="Resolution")
elif ccscene.primitive == 'CURVE_SEGMENTS':
layout.prop(ccscene, "subdivisions", text="Curve subdivisions")
row = layout.row()
row.prop(ccscene, "minimum_width", text="Min Pixels")
row.prop(ccscene, "maximum_width", text="Max Ext.")
class CyclesRender_PT_bake(CyclesButtonsPanel, Panel):
bl_label = "Bake"
bl_context = "render"
@@ -1516,18 +1527,15 @@ class CyclesRender_PT_debug(CyclesButtonsPanel, Panel):
row.prop(cscene, "debug_use_cpu_avx", toggle=True)
row.prop(cscene, "debug_use_cpu_avx2", toggle=True)
col.prop(cscene, "debug_use_qbvh")
col.prop(cscene, "debug_use_cpu_split_kernel")
col = layout.column()
col.label('CUDA Flags:')
col.prop(cscene, "debug_use_cuda_adaptive_compile")
col.prop(cscene, "debug_use_cuda_split_kernel")
col = layout.column()
col.label('OpenCL Flags:')
col.prop(cscene, "debug_opencl_kernel_type", text="Kernel")
col.prop(cscene, "debug_opencl_device_type", text="Device")
col.prop(cscene, "debug_opencl_kernel_single_program", text="Single Program")
col.prop(cscene, "debug_use_opencl_debug", text="Debug")
@@ -1614,13 +1622,6 @@ class CyclesScene_PT_simplify(CyclesButtonsPanel, Panel):
row.active = cscene.use_distance_cull
row.prop(cscene, "distance_cull_margin", text="Distance")
split = layout.split()
col = split.column()
col.prop(cscene, "ao_bounces")
col = split.column()
col.prop(cscene, "ao_bounces_render")
def draw_device(self, context):
scene = context.scene
layout = self.layout
@@ -1634,7 +1635,7 @@ def draw_device(self, context):
split = layout.split(percentage=1/3)
split.label("Device:")
row = split.row()
row.active = show_device_active(context)
row.active = show_device_selection(context)
row.prop(cscene, "device", text="")
if engine.with_osl() and use_cpu(context):
@@ -1713,75 +1714,17 @@ def get_panels():
return panels
classes = (
CYCLES_MT_sampling_presets,
CYCLES_MT_integrator_presets,
CyclesRender_PT_sampling,
CyclesRender_PT_geometry,
CyclesRender_PT_light_paths,
CyclesRender_PT_motion_blur,
CyclesRender_PT_film,
CyclesRender_PT_performance,
CyclesRender_PT_layer_options,
CyclesRender_PT_layer_passes,
CyclesRender_PT_views,
Cycles_PT_post_processing,
CyclesCamera_PT_dof,
Cycles_PT_context_material,
CyclesObject_PT_motion_blur,
CyclesObject_PT_cycles_settings,
CYCLES_OT_use_shading_nodes,
CyclesLamp_PT_preview,
CyclesLamp_PT_lamp,
CyclesLamp_PT_nodes,
CyclesLamp_PT_spot,
CyclesWorld_PT_preview,
CyclesWorld_PT_surface,
CyclesWorld_PT_volume,
CyclesWorld_PT_ambient_occlusion,
CyclesWorld_PT_mist,
CyclesWorld_PT_ray_visibility,
CyclesWorld_PT_settings,
CyclesMaterial_PT_preview,
CyclesMaterial_PT_surface,
CyclesMaterial_PT_volume,
CyclesMaterial_PT_displacement,
CyclesMaterial_PT_settings,
CyclesTexture_PT_context,
CyclesTexture_PT_node,
CyclesTexture_PT_mapping,
CyclesTexture_PT_colors,
CyclesParticle_PT_textures,
CyclesRender_PT_bake,
CyclesRender_PT_debug,
CyclesParticle_PT_CurveSettings,
CyclesScene_PT_simplify,
)
def register():
from bpy.utils import register_class
bpy.types.RENDER_PT_render.append(draw_device)
bpy.types.VIEW3D_HT_header.append(draw_pause)
for panel in get_panels():
panel.COMPAT_ENGINES.add('CYCLES')
for cls in classes:
register_class(cls)
def unregister():
from bpy.utils import unregister_class
bpy.types.RENDER_PT_render.remove(draw_device)
bpy.types.VIEW3D_HT_header.remove(draw_pause)
for panel in get_panels():
if 'CYCLES' in panel.COMPAT_ENGINES:
panel.COMPAT_ENGINES.remove('CYCLES')
for cls in classes:
unregister_class(cls)

View File

@@ -29,6 +29,24 @@
CCL_NAMESPACE_BEGIN
/* Utilities */
/* Hair curve functions */
void curveinterp_v3_v3v3v3v3(float3 *p, float3 *v1, float3 *v2, float3 *v3, float3 *v4, const float w[4]);
void interp_weights(float t, float data[4]);
float shaperadius(float shape, float root, float tip, float time);
void InterpolateKeySegments(int seg, int segno, int key, int curve, float3 *keyloc, float *time, ParticleCurveData *CData);
bool ObtainCacheParticleUV(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background, int uv_num);
bool ObtainCacheParticleVcol(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background, int vcol_num);
bool ObtainCacheParticleData(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background);
void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CData);
void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
float3 RotCam, bool is_ortho);
void ExportCurveTriangleGeometry(Mesh *mesh, ParticleCurveData *CData, int resolution);
void ExportCurveTriangleUV(ParticleCurveData *CData, int vert_offset, int resol, float3 *uvdata);
void ExportCurveTriangleVcol(ParticleCurveData *CData, int vert_offset, int resol, uchar4 *cdata);
ParticleCurveData::ParticleCurveData()
{
}
@@ -37,7 +55,7 @@ ParticleCurveData::~ParticleCurveData()
{
}
static void interp_weights(float t, float data[4])
void interp_weights(float t, float data[4])
{
/* Cardinal curve interpolation */
float t2 = t * t;
@@ -50,19 +68,17 @@ static void interp_weights(float t, float data[4])
data[3] = fc * t3 - fc * t2;
}
static void curveinterp_v3_v3v3v3v3(float3 *p,
float3 *v1, float3 *v2, float3 *v3, float3 *v4,
const float w[4])
void curveinterp_v3_v3v3v3v3(float3 *p, float3 *v1, float3 *v2, float3 *v3, float3 *v4, const float w[4])
{
p->x = v1->x * w[0] + v2->x * w[1] + v3->x * w[2] + v4->x * w[3];
p->y = v1->y * w[0] + v2->y * w[1] + v3->y * w[2] + v4->y * w[3];
p->z = v1->z * w[0] + v2->z * w[1] + v3->z * w[2] + v4->z * w[3];
}
static float shaperadius(float shape, float root, float tip, float time)
float shaperadius(float shape, float root, float tip, float time)
{
float radius = 1.0f - time;
if(shape != 0.0f) {
if(shape < 0.0f)
radius = powf(radius, 1.0f + shape);
@@ -74,13 +90,7 @@ static float shaperadius(float shape, float root, float tip, float time)
/* curve functions */
static void InterpolateKeySegments(int seg,
int segno,
int key,
int curve,
float3 *keyloc,
float *time,
ParticleCurveData *CData)
void InterpolateKeySegments(int seg, int segno, int key, int curve, float3 *keyloc, float *time, ParticleCurveData *CData)
{
float3 ckey_loc1 = CData->curvekey_co[key];
float3 ckey_loc2 = ckey_loc1;
@@ -109,11 +119,7 @@ static void InterpolateKeySegments(int seg,
curveinterp_v3_v3v3v3v3(keyloc, &ckey_loc1, &ckey_loc2, &ckey_loc3, &ckey_loc4, t);
}
static bool ObtainCacheParticleData(Mesh *mesh,
BL::Mesh *b_mesh,
BL::Object *b_ob,
ParticleCurveData *CData,
bool background)
bool ObtainCacheParticleData(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background)
{
int curvenum = 0;
int keyno = 0;
@@ -137,7 +143,7 @@ static bool ObtainCacheParticleData(Mesh *mesh,
int totparts = b_psys.particles.length();
int totchild = background ? b_psys.child_particles.length() : (int)((float)b_psys.child_particles.length() * (float)b_part.draw_percentage() / 100.0f);
int totcurves = totchild;
if(b_part.child_type() == 0 || totchild == 0)
totcurves += totparts;
@@ -155,7 +161,7 @@ static bool ObtainCacheParticleData(Mesh *mesh,
CData->psys_shader.push_back_slow(shader);
float radius = get_float(cpsys, "radius_scale") * 0.5f;
CData->psys_rootradius.push_back_slow(radius * get_float(cpsys, "root_width"));
CData->psys_tipradius.push_back_slow(radius * get_float(cpsys, "tip_width"));
CData->psys_shape.push_back_slow(get_float(cpsys, "shape"));
@@ -175,7 +181,7 @@ static bool ObtainCacheParticleData(Mesh *mesh,
for(; pa_no < totparts+totchild; pa_no++) {
int keynum = 0;
CData->curve_firstkey.push_back_slow(keyno);
float curve_length = 0.0f;
float3 pcKey;
for(int step_no = 0; step_no < ren_step; step_no++) {
@@ -207,12 +213,7 @@ static bool ObtainCacheParticleData(Mesh *mesh,
return true;
}
static bool ObtainCacheParticleUV(Mesh *mesh,
BL::Mesh *b_mesh,
BL::Object *b_ob,
ParticleCurveData *CData,
bool background,
int uv_num)
bool ObtainCacheParticleUV(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background, int uv_num)
{
if(!(mesh && b_mesh && b_ob && CData))
return false;
@@ -230,7 +231,7 @@ static bool ObtainCacheParticleUV(Mesh *mesh,
int totparts = b_psys.particles.length();
int totchild = background ? b_psys.child_particles.length() : (int)((float)b_psys.child_particles.length() * (float)b_part.draw_percentage() / 100.0f);
int totcurves = totchild;
if(b_part.child_type() == 0 || totchild == 0)
totcurves += totparts;
@@ -266,12 +267,7 @@ static bool ObtainCacheParticleUV(Mesh *mesh,
return true;
}
static bool ObtainCacheParticleVcol(Mesh *mesh,
BL::Mesh *b_mesh,
BL::Object *b_ob,
ParticleCurveData *CData,
bool background,
int vcol_num)
bool ObtainCacheParticleVcol(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background, int vcol_num)
{
if(!(mesh && b_mesh && b_ob && CData))
return false;
@@ -289,7 +285,7 @@ static bool ObtainCacheParticleVcol(Mesh *mesh,
int totparts = b_psys.particles.length();
int totchild = background ? b_psys.child_particles.length() : (int)((float)b_psys.child_particles.length() * (float)b_part.draw_percentage() / 100.0f);
int totcurves = totchild;
if(b_part.child_type() == 0 || totchild == 0)
totcurves += totparts;
@@ -337,16 +333,16 @@ static void set_resolution(BL::Object *b_ob, BL::Scene *scene, bool render)
}
}
static void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
float3 RotCam, bool is_ortho)
void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
float3 RotCam, bool is_ortho)
{
int vertexno = mesh->verts.size();
int vertexindex = vertexno;
int numverts = 0, numtris = 0;
/* compute and reserve size of arrays */
for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
continue;
@@ -358,8 +354,8 @@ static void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
mesh->reserve_mesh(mesh->verts.size() + numverts, mesh->num_triangles() + numtris);
/* actually export */
for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
continue;
@@ -384,7 +380,7 @@ static void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
if(curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1)
v1 = CData->curvekey_co[curvekey] - CData->curvekey_co[max(curvekey - 1, CData->curve_firstkey[curve])];
else
else
v1 = CData->curvekey_co[curvekey + 1] - CData->curvekey_co[curvekey - 1];
time = CData->curvekey_time[curvekey]/CData->curve_length[curve];
@@ -411,7 +407,6 @@ static void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
}
}
mesh->resize_mesh(mesh->verts.size(), mesh->triangles.size());
mesh->attributes.remove(ATTR_STD_VERTEX_NORMAL);
mesh->attributes.remove(ATTR_STD_FACE_NORMAL);
mesh->add_face_normals();
@@ -421,30 +416,28 @@ static void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
/* texture coords still needed */
}
static void ExportCurveTriangleGeometry(Mesh *mesh,
ParticleCurveData *CData,
int resolution)
void ExportCurveTriangleGeometry(Mesh *mesh, ParticleCurveData *CData, int resolution)
{
int vertexno = mesh->verts.size();
int vertexindex = vertexno;
int numverts = 0, numtris = 0;
/* compute and reserve size of arrays */
for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
continue;
numverts += (CData->curve_keynum[curve] - 1)*resolution + resolution;
numtris += (CData->curve_keynum[curve] - 1)*2*resolution;
numverts += (CData->curve_keynum[curve] - 2)*2*resolution + resolution;
numtris += (CData->curve_keynum[curve] - 2)*resolution;
}
}
mesh->reserve_mesh(mesh->verts.size() + numverts, mesh->num_triangles() + numtris);
/* actually export */
for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
continue;
@@ -546,7 +539,6 @@ static void ExportCurveTriangleGeometry(Mesh *mesh,
}
}
mesh->resize_mesh(mesh->verts.size(), mesh->triangles.size());
mesh->attributes.remove(ATTR_STD_VERTEX_NORMAL);
mesh->attributes.remove(ATTR_STD_FACE_NORMAL);
mesh->add_face_normals();
@@ -556,7 +548,7 @@ static void ExportCurveTriangleGeometry(Mesh *mesh,
/* texture coords still needed */
}
static void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CData)
void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CData)
{
int num_keys = 0;
int num_curves = 0;
@@ -565,13 +557,13 @@ static void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CDa
return;
Attribute *attr_intercept = NULL;
if(mesh->need_attribute(scene, ATTR_STD_CURVE_INTERCEPT))
attr_intercept = mesh->curve_attributes.add(ATTR_STD_CURVE_INTERCEPT);
/* compute and reserve size of arrays */
for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
continue;
@@ -590,8 +582,8 @@ static void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CDa
num_curves = 0;
/* actually export */
for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
continue;
@@ -685,13 +677,8 @@ static void ExportCurveSegmentsMotion(Mesh *mesh, ParticleCurveData *CData, int
/* in case of new attribute, we verify if there really was any motion */
if(new_attribute) {
if(i != numkeys || !have_motion) {
/* No motion or hair "topology" changed, remove attributes again. */
if(i != numkeys) {
VLOG(1) << "Hair topology changed, removing attribute.";
}
else {
VLOG(1) << "No motion, removing attribute.";
}
/* no motion, remove attributes again */
VLOG(1) << "No motion, removing attribute";
mesh->curve_attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION);
}
else if(time_index > 0) {
@@ -711,10 +698,7 @@ static void ExportCurveSegmentsMotion(Mesh *mesh, ParticleCurveData *CData, int
}
}
static void ExportCurveTriangleUV(ParticleCurveData *CData,
int vert_offset,
int resol,
float3 *uvdata)
void ExportCurveTriangleUV(ParticleCurveData *CData, int vert_offset, int resol, float3 *uvdata)
{
if(uvdata == NULL)
return;
@@ -724,8 +708,8 @@ static void ExportCurveTriangleUV(ParticleCurveData *CData,
int vertexindex = vert_offset;
for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
continue;
@@ -759,18 +743,15 @@ static void ExportCurveTriangleUV(ParticleCurveData *CData,
}
}
static void ExportCurveTriangleVcol(ParticleCurveData *CData,
int vert_offset,
int resol,
uchar4 *cdata)
void ExportCurveTriangleVcol(ParticleCurveData *CData, int vert_offset, int resol, uchar4 *cdata)
{
if(cdata == NULL)
return;
int vertexindex = vert_offset;
for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
continue;
@@ -892,7 +873,7 @@ void BlenderSync::sync_curves(Mesh *mesh,
}
/* obtain general settings */
const bool use_curves = scene->curve_system_manager->use_curves;
bool use_curves = scene->curve_system_manager->use_curves;
if(!(use_curves && b_ob.mode() != b_ob.mode_PARTICLE_EDIT)) {
if(!motion)
@@ -900,11 +881,11 @@ void BlenderSync::sync_curves(Mesh *mesh,
return;
}
const int primitive = scene->curve_system_manager->primitive;
const int triangle_method = scene->curve_system_manager->triangle_method;
const int resolution = scene->curve_system_manager->resolution;
const size_t vert_num = mesh->verts.size();
const size_t tri_num = mesh->num_triangles();
int primitive = scene->curve_system_manager->primitive;
int triangle_method = scene->curve_system_manager->triangle_method;
int resolution = scene->curve_system_manager->resolution;
size_t vert_num = mesh->verts.size();
size_t tri_num = mesh->num_triangles();
int used_res = 1;
/* extract particle hair data - should be combined with connecting to mesh later*/
@@ -1063,3 +1044,4 @@ void BlenderSync::sync_curves(Mesh *mesh,
}
CCL_NAMESPACE_END

View File

@@ -27,7 +27,6 @@
#include "subd_patch.h"
#include "subd_split.h"
#include "util_algorithm.h"
#include "util_foreach.h"
#include "util_logging.h"
#include "util_math.h"
@@ -526,177 +525,69 @@ static void attr_create_uv_map(Scene *scene,
}
/* Create vertex pointiness attributes. */
/* Compare vertices by sum of their coordinates. */
class VertexAverageComparator {
public:
VertexAverageComparator(const array<float3>& verts)
: verts_(verts) {
}
bool operator()(const int& vert_idx_a, const int& vert_idx_b)
{
const float3 &vert_a = verts_[vert_idx_a];
const float3 &vert_b = verts_[vert_idx_b];
if(vert_a == vert_b) {
/* Special case for doubles, so we ensure ordering. */
return vert_idx_a > vert_idx_b;
}
const float x1 = vert_a.x + vert_a.y + vert_a.z;
const float x2 = vert_b.x + vert_b.y + vert_b.z;
return x1 < x2;
}
protected:
const array<float3>& verts_;
};
static void attr_create_pointiness(Scene *scene,
Mesh *mesh,
BL::Mesh& b_mesh,
bool subdivision)
{
if(!mesh->need_attribute(scene, ATTR_STD_POINTINESS)) {
return;
}
const int num_verts = b_mesh.vertices.length();
/* STEP 1: Find out duplicated vertices and point duplicates to a single
* original vertex.
*/
vector<int> sorted_vert_indeices(num_verts);
for(int vert_index = 0; vert_index < num_verts; ++vert_index) {
sorted_vert_indeices[vert_index] = vert_index;
}
VertexAverageComparator compare(mesh->verts);
sort(sorted_vert_indeices.begin(), sorted_vert_indeices.end(), compare);
/* This array stores index of the original vertex for the given vertex
* index.
*/
vector<int> vert_orig_index(num_verts);
for(int sorted_vert_index = 0;
sorted_vert_index < num_verts;
++sorted_vert_index)
{
const int vert_index = sorted_vert_indeices[sorted_vert_index];
const float3 &vert_co = mesh->verts[vert_index];
bool found = false;
for(int other_sorted_vert_index = sorted_vert_index + 1;
other_sorted_vert_index < num_verts;
++other_sorted_vert_index)
{
const int other_vert_index =
sorted_vert_indeices[other_sorted_vert_index];
const float3 &other_vert_co = mesh->verts[other_vert_index];
/* We are too far away now, we wouldn't have duplicate. */
if((other_vert_co.x + other_vert_co.y + other_vert_co.z) -
(vert_co.x + vert_co.y + vert_co.z) > 3 * FLT_EPSILON)
{
break;
if(mesh->need_attribute(scene, ATTR_STD_POINTINESS)) {
const int numverts = b_mesh.vertices.length();
AttributeSet& attributes = (subdivision)? mesh->subd_attributes: mesh->attributes;
Attribute *attr = attributes.add(ATTR_STD_POINTINESS);
float *data = attr->data_float();
int *counter = new int[numverts];
float *raw_data = new float[numverts];
float3 *edge_accum = new float3[numverts];
/* Calculate pointiness using single ring neighborhood. */
memset(counter, 0, sizeof(int) * numverts);
memset(raw_data, 0, sizeof(float) * numverts);
memset(edge_accum, 0, sizeof(float3) * numverts);
BL::Mesh::edges_iterator e;
int i = 0;
for(b_mesh.edges.begin(e); e != b_mesh.edges.end(); ++e, ++i) {
int v0 = b_mesh.edges[i].vertices()[0],
v1 = b_mesh.edges[i].vertices()[1];
float3 co0 = get_float3(b_mesh.vertices[v0].co()),
co1 = get_float3(b_mesh.vertices[v1].co());
float3 edge = normalize(co1 - co0);
edge_accum[v0] += edge;
edge_accum[v1] += -edge;
++counter[v0];
++counter[v1];
}
i = 0;
BL::Mesh::vertices_iterator v;
for(b_mesh.vertices.begin(v); v != b_mesh.vertices.end(); ++v, ++i) {
if(counter[i] > 0) {
float3 normal = get_float3(b_mesh.vertices[i].normal());
float angle = safe_acosf(dot(normal, edge_accum[i] / counter[i]));
raw_data[i] = angle * M_1_PI_F;
}
/* Found duplicate. */
if(len_squared(other_vert_co - vert_co) < FLT_EPSILON) {
found = true;
vert_orig_index[vert_index] = other_vert_index;
break;
else {
raw_data[i] = 0.0f;
}
}
if(!found) {
vert_orig_index[vert_index] = vert_index;
/* Blur vertices to approximate 2 ring neighborhood. */
memset(counter, 0, sizeof(int) * numverts);
memcpy(data, raw_data, sizeof(float) * numverts);
i = 0;
for(b_mesh.edges.begin(e); e != b_mesh.edges.end(); ++e, ++i) {
int v0 = b_mesh.edges[i].vertices()[0],
v1 = b_mesh.edges[i].vertices()[1];
data[v0] += raw_data[v1];
data[v1] += raw_data[v0];
++counter[v0];
++counter[v1];
}
}
/* Make sure we always points to the very first orig vertex. */
for(int vert_index = 0; vert_index < num_verts; ++vert_index) {
int orig_index = vert_orig_index[vert_index];
while(orig_index != vert_orig_index[orig_index]) {
orig_index = vert_orig_index[orig_index];
for(i = 0; i < numverts; ++i) {
data[i] /= counter[i] + 1;
}
vert_orig_index[vert_index] = orig_index;
}
sorted_vert_indeices.free_memory();
/* STEP 2: Calculate vertex normals taking into account their possible
* duplicates which gets "welded" together.
*/
vector<float3> vert_normal(num_verts, make_float3(0.0f, 0.0f, 0.0f));
/* First we accumulate all vertex normals in the original index. */
for(int vert_index = 0; vert_index < num_verts; ++vert_index) {
const float3 normal = get_float3(b_mesh.vertices[vert_index].normal());
const int orig_index = vert_orig_index[vert_index];
vert_normal[orig_index] += normal;
}
/* Then we normalize the accumulated result and flush it to all duplicates
* as well.
*/
for(int vert_index = 0; vert_index < num_verts; ++vert_index) {
const int orig_index = vert_orig_index[vert_index];
vert_normal[vert_index] = normalize(vert_normal[orig_index]);
}
/* STEP 3: Calculate pointiness using single ring neighborhood. */
vector<int> counter(num_verts, 0);
vector<float> raw_data(num_verts, 0.0f);
vector<float3> edge_accum(num_verts, make_float3(0.0f, 0.0f, 0.0f));
BL::Mesh::edges_iterator e;
EdgeMap visited_edges;
int edge_index = 0;
memset(&counter[0], 0, sizeof(int) * counter.size());
for(b_mesh.edges.begin(e); e != b_mesh.edges.end(); ++e, ++edge_index) {
const int v0 = vert_orig_index[b_mesh.edges[edge_index].vertices()[0]],
v1 = vert_orig_index[b_mesh.edges[edge_index].vertices()[1]];
if(visited_edges.exists(v0, v1)) {
continue;
}
visited_edges.insert(v0, v1);
float3 co0 = get_float3(b_mesh.vertices[v0].co()),
co1 = get_float3(b_mesh.vertices[v1].co());
float3 edge = normalize(co1 - co0);
edge_accum[v0] += edge;
edge_accum[v1] += -edge;
++counter[v0];
++counter[v1];
}
for(int vert_index = 0; vert_index < num_verts; ++vert_index) {
const int orig_index = vert_orig_index[vert_index];
if(orig_index != vert_index) {
/* Skip duplicates, they'll be overwritten later on. */
continue;
}
if(counter[vert_index] > 0) {
const float3 normal = vert_normal[vert_index];
const float angle =
safe_acosf(dot(normal,
edge_accum[vert_index] / counter[vert_index]));
raw_data[vert_index] = angle * M_1_PI_F;
}
else {
raw_data[vert_index] = 0.0f;
}
}
/* STEP 3: Blur vertices to approximate 2 ring neighborhood. */
AttributeSet& attributes = (subdivision)? mesh->subd_attributes: mesh->attributes;
Attribute *attr = attributes.add(ATTR_STD_POINTINESS);
float *data = attr->data_float();
memcpy(data, &raw_data[0], sizeof(float) * raw_data.size());
memset(&counter[0], 0, sizeof(int) * counter.size());
edge_index = 0;
visited_edges.clear();
for(b_mesh.edges.begin(e); e != b_mesh.edges.end(); ++e, ++edge_index) {
const int v0 = vert_orig_index[b_mesh.edges[edge_index].vertices()[0]],
v1 = vert_orig_index[b_mesh.edges[edge_index].vertices()[1]];
if(visited_edges.exists(v0, v1)) {
continue;
}
visited_edges.insert(v0, v1);
data[v0] += raw_data[v1];
data[v1] += raw_data[v0];
++counter[v0];
++counter[v1];
}
for(int vert_index = 0; vert_index < num_verts; ++vert_index) {
data[vert_index] /= counter[vert_index] + 1;
}
/* STEP 4: Copy attribute to the duplicated vertices. */
for(int vert_index = 0; vert_index < num_verts; ++vert_index) {
const int orig_index = vert_orig_index[vert_index];
data[vert_index] = data[orig_index];
delete [] counter;
delete [] raw_data;
delete [] edge_accum;
}
}
@@ -706,8 +597,8 @@ static void create_mesh(Scene *scene,
Mesh *mesh,
BL::Mesh& b_mesh,
const vector<Shader*>& used_shaders,
bool subdivision = false,
bool subdivide_uvs = true)
bool subdivision=false,
bool subdivide_uvs=true)
{
/* count vertices and faces */
int numverts = b_mesh.vertices.length();
@@ -765,6 +656,9 @@ static void create_mesh(Scene *scene,
generated[i++] = get_float3(v->undeformed_co())*size - loc;
}
/* Create needed vertex attributes. */
attr_create_pointiness(scene, mesh, b_mesh, subdivision);
/* create faces */
vector<int> nverts(numfaces);
vector<int> face_flags(numfaces, FACE_FLAG_NONE);
@@ -777,19 +671,28 @@ static void create_mesh(Scene *scene,
int shader = clamp(f->material_index(), 0, used_shaders.size()-1);
bool smooth = f->use_smooth() || use_loop_normals;
/* split vertices if normal is different
*
* note all vertex attributes must have been set here so we can split
* and copy attributes in split_vertex without remapping later */
if(use_loop_normals) {
BL::Array<float, 12> loop_normals = f->split_normals();
for(int i = 0; i < n; i++) {
N[vi[i]] = make_float3(loop_normals[i * 3],
loop_normals[i * 3 + 1],
loop_normals[i * 3 + 2]);
float3 loop_N = make_float3(loop_normals[i * 3], loop_normals[i * 3 + 1], loop_normals[i * 3 + 2]);
if(N[vi[i]] != loop_N) {
int new_vi = mesh->split_vertex(vi[i]);
/* set new normal and vertex index */
N = attr_N->data_float3();
N[new_vi] = loop_N;
vi[i] = new_vi;
}
}
}
/* Create triangles.
*
* NOTE: Autosmooth is already taken care about.
*/
/* create triangles */
if(n == 4) {
if(is_zero(cross(mesh->verts[vi[1]] - mesh->verts[vi[0]], mesh->verts[vi[2]] - mesh->verts[vi[0]])) ||
is_zero(cross(mesh->verts[vi[2]] - mesh->verts[vi[0]], mesh->verts[vi[3]] - mesh->verts[vi[0]])))
@@ -821,8 +724,24 @@ static void create_mesh(Scene *scene,
vi.reserve(n);
for(int i = 0; i < n; i++) {
/* NOTE: Autosmooth is already taken care about. */
vi[i] = b_mesh.loops[p->loop_start() + i].vertex_index();
/* split vertices if normal is different
*
* note all vertex attributes must have been set here so we can split
* and copy attributes in split_vertex without remapping later */
if(use_loop_normals) {
float3 loop_N = get_float3(b_mesh.loops[p->loop_start() + i].normal());
if(N[vi[i]] != loop_N) {
int new_vi = mesh->split_vertex(vi[i]);
/* set new normal and vertex index */
N = attr_N->data_float3();
N[new_vi] = loop_N;
vi[i] = new_vi;
}
}
}
/* create subd faces */
@@ -833,7 +752,6 @@ static void create_mesh(Scene *scene,
/* Create all needed attributes.
* The calculate functions will check whether they're needed or not.
*/
attr_create_pointiness(scene, mesh, b_mesh, subdivision);
attr_create_vertex_color(scene, mesh, b_mesh, nverts, face_flags, subdivision);
attr_create_uv_map(scene, mesh, b_mesh, nverts, face_flags, subdivision, subdivide_uvs);
@@ -1043,20 +961,7 @@ Mesh *BlenderSync::sync_mesh(BL::Object& b_ob,
mesh->subdivision_type = object_subdivision_type(b_ob, preview, experimental);
/* Disable adaptive subdivision while baking as the baking system
* currently doesnt support the topology and will crash.
*/
if(scene->bake_manager->get_baking()) {
mesh->subdivision_type = Mesh::SUBDIVISION_NONE;
}
BL::Mesh b_mesh = object_to_mesh(b_data,
b_ob,
b_scene,
true,
!preview,
need_undeformed,
mesh->subdivision_type);
BL::Mesh b_mesh = object_to_mesh(b_data, b_ob, b_scene, true, !preview, need_undeformed, mesh->subdivision_type);
if(b_mesh) {
if(render_layer.use_surfaces && !hide_tris) {
@@ -1181,13 +1086,7 @@ void BlenderSync::sync_mesh_motion(BL::Object& b_ob,
if(ccl::BKE_object_is_deform_modified(b_ob, b_scene, preview)) {
/* get derived mesh */
b_mesh = object_to_mesh(b_data,
b_ob,
b_scene,
true,
!preview,
false,
Mesh::SUBDIVISION_NONE);
b_mesh = object_to_mesh(b_data, b_ob, b_scene, true, !preview, false, false);
}
if(!b_mesh) {
@@ -1258,12 +1157,10 @@ void BlenderSync::sync_mesh_motion(BL::Object& b_ob,
{
/* no motion, remove attributes again */
if(b_mesh.vertices.length() != numverts) {
VLOG(1) << "Topology differs, disabling motion blur for object "
<< b_ob.name();
VLOG(1) << "Topology differs, disabling motion blur.";
}
else {
VLOG(1) << "No actual deformation motion for object "
<< b_ob.name();
VLOG(1) << "No actual deformation motion for object " << b_ob.name();
}
mesh->attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION);
if(attr_mN)
@@ -1294,3 +1191,4 @@ void BlenderSync::sync_mesh_motion(BL::Object& b_ob,
}
CCL_NAMESPACE_END

View File

@@ -25,7 +25,6 @@
#include "particles.h"
#include "shader.h"
#include "blender_object_cull.h"
#include "blender_sync.h"
#include "blender_util.h"
@@ -89,6 +88,143 @@ static uint object_ray_visibility(BL::Object& b_ob)
return flag;
}
/* Culling */
class BlenderObjectCulling
{
public:
BlenderObjectCulling(Scene *scene, BL::Scene& b_scene)
: use_scene_camera_cull(false),
use_camera_cull(false),
camera_cull_margin(0.0f),
use_scene_distance_cull(false),
use_distance_cull(false),
distance_cull_margin(0.0f)
{
if(b_scene.render().use_simplify()) {
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
use_scene_camera_cull = scene->camera->type != CAMERA_PANORAMA &&
!b_scene.render().use_multiview() &&
get_boolean(cscene, "use_camera_cull");
use_scene_distance_cull = scene->camera->type != CAMERA_PANORAMA &&
!b_scene.render().use_multiview() &&
get_boolean(cscene, "use_distance_cull");
camera_cull_margin = get_float(cscene, "camera_cull_margin");
distance_cull_margin = get_float(cscene, "distance_cull_margin");
if (distance_cull_margin == 0.0f) {
use_scene_distance_cull = false;
}
}
}
void init_object(Scene *scene, BL::Object& b_ob)
{
if(!use_scene_camera_cull && !use_scene_distance_cull) {
return;
}
PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
use_camera_cull = use_scene_camera_cull && get_boolean(cobject, "use_camera_cull");
use_distance_cull = use_scene_distance_cull && get_boolean(cobject, "use_distance_cull");
if(use_camera_cull || use_distance_cull) {
/* Need to have proper projection matrix. */
scene->camera->update();
}
}
bool test(Scene *scene, BL::Object& b_ob, Transform& tfm)
{
if(!use_camera_cull && !use_distance_cull) {
return false;
}
/* Compute world space bounding box corners. */
float3 bb[8];
BL::Array<float, 24> boundbox = b_ob.bound_box();
for(int i = 0; i < 8; ++i) {
float3 p = make_float3(boundbox[3 * i + 0],
boundbox[3 * i + 1],
boundbox[3 * i + 2]);
bb[i] = transform_point(&tfm, p);
}
bool camera_culled = use_camera_cull && test_camera(scene, bb);
bool distance_culled = use_distance_cull && test_distance(scene, bb);
return ((camera_culled && distance_culled) ||
(camera_culled && !use_distance_cull) ||
(distance_culled && !use_camera_cull));
}
private:
/* TODO(sergey): Not really optimal, consider approaches based on k-DOP in order
* to reduce number of objects which are wrongly considered visible.
*/
bool test_camera(Scene *scene, float3 bb[8])
{
Camera *cam = scene->camera;
Transform& worldtondc = cam->worldtondc;
float3 bb_min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX),
bb_max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
bool all_behind = true;
for(int i = 0; i < 8; ++i) {
float3 p = bb[i];
float4 b = make_float4(p.x, p.y, p.z, 1.0f);
float4 c = make_float4(dot(worldtondc.x, b),
dot(worldtondc.y, b),
dot(worldtondc.z, b),
dot(worldtondc.w, b));
p = float4_to_float3(c / c.w);
if(c.z < 0.0f) {
p.x = 1.0f - p.x;
p.y = 1.0f - p.y;
}
if(c.z >= -camera_cull_margin) {
all_behind = false;
}
bb_min = min(bb_min, p);
bb_max = max(bb_max, p);
}
if(all_behind) {
return true;
}
return (bb_min.x >= 1.0f + camera_cull_margin ||
bb_min.y >= 1.0f + camera_cull_margin ||
bb_max.x <= -camera_cull_margin ||
bb_max.y <= -camera_cull_margin);
}
bool test_distance(Scene *scene, float3 bb[8])
{
float3 camera_position = transform_get_column(&scene->camera->matrix, 3);
float3 bb_min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX),
bb_max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
/* Find min & max points for x & y & z on bounding box */
for(int i = 0; i < 8; ++i) {
float3 p = bb[i];
bb_min = min(bb_min, p);
bb_max = max(bb_max, p);
}
float3 closest_point = max(min(bb_max,camera_position),bb_min);
return (len_squared(camera_position - closest_point) >
distance_cull_margin * distance_cull_margin);
}
bool use_scene_camera_cull;
bool use_camera_cull;
float camera_cull_margin;
bool use_scene_distance_cull;
bool use_distance_cull;
float distance_cull_margin;
};
/* Light */
void BlenderSync::sync_light(BL::Object& b_parent,

View File

@@ -1,149 +0,0 @@
/*
* Copyright 2011-2016 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cstdlib>
#include "camera.h"
#include "blender_object_cull.h"
CCL_NAMESPACE_BEGIN
BlenderObjectCulling::BlenderObjectCulling(Scene *scene, BL::Scene& b_scene)
: use_scene_camera_cull_(false),
use_camera_cull_(false),
camera_cull_margin_(0.0f),
use_scene_distance_cull_(false),
use_distance_cull_(false),
distance_cull_margin_(0.0f)
{
if(b_scene.render().use_simplify()) {
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
use_scene_camera_cull_ = scene->camera->type != CAMERA_PANORAMA &&
!b_scene.render().use_multiview() &&
get_boolean(cscene, "use_camera_cull");
use_scene_distance_cull_ = scene->camera->type != CAMERA_PANORAMA &&
!b_scene.render().use_multiview() &&
get_boolean(cscene, "use_distance_cull");
camera_cull_margin_ = get_float(cscene, "camera_cull_margin");
distance_cull_margin_ = get_float(cscene, "distance_cull_margin");
if(distance_cull_margin_ == 0.0f) {
use_scene_distance_cull_ = false;
}
}
}
void BlenderObjectCulling::init_object(Scene *scene, BL::Object& b_ob)
{
if(!use_scene_camera_cull_ && !use_scene_distance_cull_) {
return;
}
PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
use_camera_cull_ = use_scene_camera_cull_ && get_boolean(cobject, "use_camera_cull");
use_distance_cull_ = use_scene_distance_cull_ && get_boolean(cobject, "use_distance_cull");
if(use_camera_cull_ || use_distance_cull_) {
/* Need to have proper projection matrix. */
scene->camera->update();
}
}
bool BlenderObjectCulling::test(Scene *scene, BL::Object& b_ob, Transform& tfm)
{
if(!use_camera_cull_ && !use_distance_cull_) {
return false;
}
/* Compute world space bounding box corners. */
float3 bb[8];
BL::Array<float, 24> boundbox = b_ob.bound_box();
for(int i = 0; i < 8; ++i) {
float3 p = make_float3(boundbox[3 * i + 0],
boundbox[3 * i + 1],
boundbox[3 * i + 2]);
bb[i] = transform_point(&tfm, p);
}
bool camera_culled = use_camera_cull_ && test_camera(scene, bb);
bool distance_culled = use_distance_cull_ && test_distance(scene, bb);
return ((camera_culled && distance_culled) ||
(camera_culled && !use_distance_cull_) ||
(distance_culled && !use_camera_cull_));
}
/* TODO(sergey): Not really optimal, consider approaches based on k-DOP in order
* to reduce number of objects which are wrongly considered visible.
*/
bool BlenderObjectCulling::test_camera(Scene *scene, float3 bb[8])
{
Camera *cam = scene->camera;
Transform& worldtondc = cam->worldtondc;
float3 bb_min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX),
bb_max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
bool all_behind = true;
for(int i = 0; i < 8; ++i) {
float3 p = bb[i];
float4 b = make_float4(p.x, p.y, p.z, 1.0f);
float4 c = make_float4(dot(worldtondc.x, b),
dot(worldtondc.y, b),
dot(worldtondc.z, b),
dot(worldtondc.w, b));
p = float4_to_float3(c / c.w);
if(c.z < 0.0f) {
p.x = 1.0f - p.x;
p.y = 1.0f - p.y;
}
if(c.z >= -camera_cull_margin_) {
all_behind = false;
}
bb_min = min(bb_min, p);
bb_max = max(bb_max, p);
}
if(all_behind) {
return true;
}
return (bb_min.x >= 1.0f + camera_cull_margin_ ||
bb_min.y >= 1.0f + camera_cull_margin_ ||
bb_max.x <= -camera_cull_margin_ ||
bb_max.y <= -camera_cull_margin_);
}
bool BlenderObjectCulling::test_distance(Scene *scene, float3 bb[8])
{
float3 camera_position = transform_get_column(&scene->camera->matrix, 3);
float3 bb_min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX),
bb_max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
/* Find min & max points for x & y & z on bounding box */
for(int i = 0; i < 8; ++i) {
float3 p = bb[i];
bb_min = min(bb_min, p);
bb_max = max(bb_max, p);
}
float3 closest_point = max(min(bb_max,camera_position),bb_min);
return (len_squared(camera_position - closest_point) >
distance_cull_margin_ * distance_cull_margin_);
}
CCL_NAMESPACE_END

View File

@@ -1,49 +0,0 @@
/*
* Copyright 2011-2016 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __BLENDER_OBJECT_CULL_H__
#define __BLENDER_OBJECT_CULL_H__
#include "blender_sync.h"
#include "util_types.h"
CCL_NAMESPACE_BEGIN
class Scene;
class BlenderObjectCulling
{
public:
BlenderObjectCulling(Scene *scene, BL::Scene& b_scene);
void init_object(Scene *scene, BL::Object& b_ob);
bool test(Scene *scene, BL::Object& b_ob, Transform& tfm);
private:
bool test_camera(Scene *scene, float3 bb[8]);
bool test_distance(Scene *scene, float3 bb[8]);
bool use_scene_camera_cull_;
bool use_camera_cull_;
float camera_cull_margin_;
bool use_scene_distance_cull_;
bool use_distance_cull_;
float distance_cull_margin_;
};
CCL_NAMESPACE_END
#endif /* __BLENDER_OBJECT_CULL_H__ */

View File

@@ -67,10 +67,8 @@ bool debug_flags_sync_from_scene(BL::Scene b_scene)
flags.cpu.sse3 = get_boolean(cscene, "debug_use_cpu_sse3");
flags.cpu.sse2 = get_boolean(cscene, "debug_use_cpu_sse2");
flags.cpu.qbvh = get_boolean(cscene, "debug_use_qbvh");
flags.cpu.split_kernel = get_boolean(cscene, "debug_use_cpu_split_kernel");
/* Synchronize CUDA flags. */
flags.cuda.adaptive_compile = get_boolean(cscene, "debug_use_cuda_adaptive_compile");
flags.cuda.split_kernel = get_boolean(cscene, "debug_use_cuda_split_kernel");
/* Synchronize OpenCL kernel type. */
switch(get_enum(cscene, "debug_opencl_kernel_type")) {
case 0:
@@ -106,7 +104,6 @@ bool debug_flags_sync_from_scene(BL::Scene b_scene)
}
/* Synchronize other OpenCL flags. */
flags.opencl.debug = get_boolean(cscene, "debug_use_opencl_debug");
flags.opencl.single_program = get_boolean(cscene, "debug_opencl_kernel_single_program");
return flags.opencl.device_type != opencl_device_type ||
flags.opencl.kernel_type != opencl_kernel_type;
}
@@ -644,7 +641,7 @@ static PyObject *debug_flags_reset_func(PyObject * /*self*/, PyObject * /*args*/
Py_RETURN_NONE;
}
static PyObject *set_resumable_chunk_func(PyObject * /*self*/, PyObject *args)
static PyObject *set_resumable_chunks_func(PyObject * /*self*/, PyObject *args)
{
int num_resumable_chunks, current_resumable_chunk;
if(!PyArg_ParseTuple(args, "ii",
@@ -679,53 +676,6 @@ static PyObject *set_resumable_chunk_func(PyObject * /*self*/, PyObject *args)
Py_RETURN_NONE;
}
static PyObject *set_resumable_chunk_range_func(PyObject * /*self*/, PyObject *args)
{
int num_chunks, start_chunk, end_chunk;
if(!PyArg_ParseTuple(args, "iii",
&num_chunks,
&start_chunk,
&end_chunk)) {
Py_RETURN_NONE;
}
if(num_chunks <= 0) {
fprintf(stderr, "Cycles: Bad value for number of resumable chunks.\n");
abort();
Py_RETURN_NONE;
}
if(start_chunk < 1 || start_chunk > num_chunks) {
fprintf(stderr, "Cycles: Bad value for start chunk number.\n");
abort();
Py_RETURN_NONE;
}
if(end_chunk < 1 || end_chunk > num_chunks) {
fprintf(stderr, "Cycles: Bad value for start chunk number.\n");
abort();
Py_RETURN_NONE;
}
if(start_chunk > end_chunk) {
fprintf(stderr, "Cycles: End chunk should be higher than start one.\n");
abort();
Py_RETURN_NONE;
}
VLOG(1) << "Initialized resumable render: "
<< "num_resumable_chunks=" << num_chunks << ", "
<< "start_resumable_chunk=" << start_chunk
<< "end_resumable_chunk=" << end_chunk;
BlenderSession::num_resumable_chunks = num_chunks;
BlenderSession::start_resumable_chunk = start_chunk;
BlenderSession::end_resumable_chunk = end_chunk;
printf("Cycles: Will render chunks %d to %d of %d\n",
start_chunk,
end_chunk,
num_chunks);
Py_RETURN_NONE;
}
static PyObject *get_device_types_func(PyObject * /*self*/, PyObject * /*args*/)
{
vector<DeviceInfo>& devices = Device::available_devices();
@@ -765,8 +715,7 @@ static PyMethodDef methods[] = {
{"debug_flags_reset", debug_flags_reset_func, METH_NOARGS, ""},
/* Resumable render */
{"set_resumable_chunk", set_resumable_chunk_func, METH_VARARGS, ""},
{"set_resumable_chunk_range", set_resumable_chunk_range_func, METH_VARARGS, ""},
{"set_resumable_chunks", set_resumable_chunks_func, METH_VARARGS, ""},
/* Compute Device selection */
{"get_device_types", get_device_types_func, METH_VARARGS, ""},

View File

@@ -46,8 +46,6 @@ CCL_NAMESPACE_BEGIN
bool BlenderSession::headless = false;
int BlenderSession::num_resumable_chunks = 0;
int BlenderSession::current_resumable_chunk = 0;
int BlenderSession::start_resumable_chunk = 0;
int BlenderSession::end_resumable_chunk = 0;
BlenderSession::BlenderSession(BL::RenderEngine& b_engine,
BL::UserPreferences& b_userpref,
@@ -70,7 +68,6 @@ BlenderSession::BlenderSession(BL::RenderEngine& b_engine,
background = true;
last_redraw_time = 0.0;
start_resize_time = 0.0;
last_status_time = 0.0;
}
BlenderSession::BlenderSession(BL::RenderEngine& b_engine,
@@ -96,7 +93,6 @@ BlenderSession::BlenderSession(BL::RenderEngine& b_engine,
background = false;
last_redraw_time = 0.0;
start_resize_time = 0.0;
last_status_time = 0.0;
}
BlenderSession::~BlenderSession()
@@ -130,8 +126,8 @@ void BlenderSession::create_session()
/* setup callbacks for builtin image support */
scene->image_manager->builtin_image_info_cb = function_bind(&BlenderSession::builtin_image_info, this, _1, _2, _3, _4, _5, _6, _7);
scene->image_manager->builtin_image_pixels_cb = function_bind(&BlenderSession::builtin_image_pixels, this, _1, _2, _3, _4);
scene->image_manager->builtin_image_float_pixels_cb = function_bind(&BlenderSession::builtin_image_float_pixels, this, _1, _2, _3, _4);
scene->image_manager->builtin_image_pixels_cb = function_bind(&BlenderSession::builtin_image_pixels, this, _1, _2, _3);
scene->image_manager->builtin_image_float_pixels_cb = function_bind(&BlenderSession::builtin_image_float_pixels, this, _1, _2, _3);
/* create session */
session = new Session(session_params);
@@ -309,16 +305,12 @@ static PassType get_pass_type(BL::RenderPass& b_pass)
#ifdef WITH_CYCLES_DEBUG
case BL::RenderPass::type_DEBUG:
{
switch(b_pass.debug_type()) {
case BL::RenderPass::debug_type_BVH_TRAVERSED_NODES:
return PASS_BVH_TRAVERSED_NODES;
case BL::RenderPass::debug_type_BVH_TRAVERSED_INSTANCES:
return PASS_BVH_TRAVERSED_INSTANCES;
case BL::RenderPass::debug_type_BVH_INTERSECTIONS:
return PASS_BVH_INTERSECTIONS;
case BL::RenderPass::debug_type_RAY_BOUNCES:
return PASS_RAY_BOUNCES;
}
if(b_pass.debug_type() == BL::RenderPass::debug_type_BVH_TRAVERSAL_STEPS)
return PASS_BVH_TRAVERSAL_STEPS;
if(b_pass.debug_type() == BL::RenderPass::debug_type_BVH_TRAVERSED_INSTANCES)
return PASS_BVH_TRAVERSED_INSTANCES;
if(b_pass.debug_type() == BL::RenderPass::debug_type_RAY_BOUNCES)
return PASS_RAY_BOUNCES;
break;
}
#endif
@@ -588,7 +580,7 @@ static void populate_bake_data(BakeData *data, const
BL::BakePixel bp = pixel_array;
int i;
for(i = 0; i < num_pixels; i++) {
for(i=0; i < num_pixels; i++) {
if(bp.object_id() == object_id) {
data->set(i, bp.primitive_id(), bp.uv(), bp.du_dx(), bp.du_dy(), bp.dv_dx(), bp.dv_dy());
} else {
@@ -938,13 +930,38 @@ void BlenderSession::get_status(string& status, string& substatus)
void BlenderSession::get_progress(float& progress, double& total_time, double& render_time)
{
session->progress.get_time(total_time, render_time);
progress = session->progress.get_progress();
double tile_time;
int tile, sample, samples_per_tile;
int tile_total = session->tile_manager.state.num_tiles;
int samples = session->tile_manager.state.sample + 1;
int total_samples = session->tile_manager.get_num_effective_samples();
session->progress.get_tile(tile, total_time, render_time, tile_time);
sample = session->progress.get_sample();
samples_per_tile = session->tile_manager.get_num_effective_samples();
if(background && samples_per_tile && tile_total)
progress = ((float)sample / (float)(tile_total * samples_per_tile));
else if(!background && samples > 0 && total_samples != INT_MAX)
progress = ((float)samples) / total_samples;
else
progress = 0.0;
}
void BlenderSession::update_bake_progress()
{
float progress = session->progress.get_progress();
float progress;
int sample, samples_per_task, parts_total;
sample = session->progress.get_sample();
samples_per_task = scene->bake_manager->num_samples;
parts_total = scene->bake_manager->num_parts;
if(samples_per_task)
progress = ((float)sample / (float)(parts_total * samples_per_task));
else
progress = 0.0;
if(progress != last_progress) {
b_engine.update_progress(progress);
@@ -993,14 +1010,10 @@ void BlenderSession::update_status_progress()
if(substatus.size() > 0)
status += " | " + substatus;
double current_time = time_dt();
/* When rendering in a window, redraw the status at least once per second to keep the elapsed and remaining time up-to-date.
* For headless rendering, only report when something significant changes to keep the console output readable. */
if(status != last_status || (!headless && (current_time - last_status_time) > 1.0)) {
if(status != last_status) {
b_engine.update_stats("", (timestatus + scene + status).c_str());
b_engine.update_memory_stats(mem_used, mem_peak);
last_status = status;
last_status_time = current_time;
}
if(progress != last_progress) {
b_engine.update_progress(progress);
@@ -1067,13 +1080,7 @@ int BlenderSession::builtin_image_frame(const string &builtin_name)
return atoi(builtin_name.substr(last + 1, builtin_name.size() - last - 1).c_str());
}
void BlenderSession::builtin_image_info(const string &builtin_name,
void *builtin_data,
bool &is_float,
int &width,
int &height,
int &depth,
int &channels)
void BlenderSession::builtin_image_info(const string &builtin_name, void *builtin_data, bool &is_float, int &width, int &height, int &depth, int &channels)
{
/* empty image */
is_float = false;
@@ -1151,67 +1158,60 @@ void BlenderSession::builtin_image_info(const string &builtin_name,
}
}
bool BlenderSession::builtin_image_pixels(const string &builtin_name,
void *builtin_data,
unsigned char *pixels,
const size_t pixels_size)
bool BlenderSession::builtin_image_pixels(const string &builtin_name, void *builtin_data, unsigned char *pixels)
{
if(!builtin_data) {
if(!builtin_data)
return false;
}
const int frame = builtin_image_frame(builtin_name);
int frame = builtin_image_frame(builtin_name);
PointerRNA ptr;
RNA_id_pointer_create((ID*)builtin_data, &ptr);
BL::Image b_image(ptr);
const int width = b_image.size()[0];
const int height = b_image.size()[1];
const int channels = b_image.channels();
int width = b_image.size()[0];
int height = b_image.size()[1];
int channels = b_image.channels();
unsigned char *image_pixels = image_get_pixels_for_frame(b_image, frame);
const size_t num_pixels = ((size_t)width) * height;
unsigned char *image_pixels;
image_pixels = image_get_pixels_for_frame(b_image, frame);
size_t num_pixels = ((size_t)width) * height;
if(image_pixels && num_pixels * channels == pixels_size) {
memcpy(pixels, image_pixels, pixels_size * sizeof(unsigned char));
if(image_pixels) {
memcpy(pixels, image_pixels, num_pixels * channels * sizeof(unsigned char));
MEM_freeN(image_pixels);
}
else {
if(channels == 1) {
memset(pixels, 0, pixels_size * sizeof(unsigned char));
memset(pixels, 0, num_pixels * sizeof(unsigned char));
}
else {
const size_t num_pixels_safe = pixels_size / channels;
unsigned char *cp = pixels;
for(size_t i = 0; i < num_pixels_safe; i++, cp += channels) {
for(size_t i = 0; i < num_pixels; i++, cp += channels) {
cp[0] = 255;
cp[1] = 0;
cp[2] = 255;
if(channels == 4) {
if(channels == 4)
cp[3] = 255;
}
}
}
}
/* Premultiply, byte images are always straight for Blender. */
/* premultiply, byte images are always straight for blender */
unsigned char *cp = pixels;
for(size_t i = 0; i < num_pixels; i++, cp += channels) {
cp[0] = (cp[0] * cp[3]) >> 8;
cp[1] = (cp[1] * cp[3]) >> 8;
cp[2] = (cp[2] * cp[3]) >> 8;
}
return true;
}
bool BlenderSession::builtin_image_float_pixels(const string &builtin_name,
void *builtin_data,
float *pixels,
const size_t pixels_size)
bool BlenderSession::builtin_image_float_pixels(const string &builtin_name, void *builtin_data, float *pixels)
{
if(!builtin_data) {
if(!builtin_data)
return false;
}
PointerRNA ptr;
RNA_id_pointer_create((ID*)builtin_data, &ptr);
@@ -1222,16 +1222,16 @@ bool BlenderSession::builtin_image_float_pixels(const string &builtin_name,
BL::Image b_image(b_id);
int frame = builtin_image_frame(builtin_name);
const int width = b_image.size()[0];
const int height = b_image.size()[1];
const int channels = b_image.channels();
int width = b_image.size()[0];
int height = b_image.size()[1];
int channels = b_image.channels();
float *image_pixels;
image_pixels = image_get_float_pixels_for_frame(b_image, frame);
const size_t num_pixels = ((size_t)width) * height;
size_t num_pixels = ((size_t)width) * height;
if(image_pixels && num_pixels * channels == pixels_size) {
memcpy(pixels, image_pixels, pixels_size * sizeof(float));
if(image_pixels) {
memcpy(pixels, image_pixels, num_pixels * channels * sizeof(float));
MEM_freeN(image_pixels);
}
else {
@@ -1239,15 +1239,13 @@ bool BlenderSession::builtin_image_float_pixels(const string &builtin_name,
memset(pixels, 0, num_pixels * sizeof(float));
}
else {
const size_t num_pixels_safe = pixels_size / channels;
float *fp = pixels;
for(int i = 0; i < num_pixels_safe; i++, fp += channels) {
for(int i = 0; i < num_pixels; i++, fp += channels) {
fp[0] = 1.0f;
fp[1] = 0.0f;
fp[2] = 1.0f;
if(channels == 4) {
if(channels == 4)
fp[3] = 1.0f;
}
}
}
}
@@ -1259,9 +1257,8 @@ bool BlenderSession::builtin_image_float_pixels(const string &builtin_name,
BL::Object b_ob(b_id);
BL::SmokeDomainSettings b_domain = object_smoke_domain_find(b_ob);
if(!b_domain) {
if(!b_domain)
return false;
}
int3 resolution = get_int3(b_domain.domain_resolution());
int length, amplify = (b_domain.use_high_resolution())? b_domain.amplify() + 1: 1;
@@ -1273,10 +1270,10 @@ bool BlenderSession::builtin_image_float_pixels(const string &builtin_name,
amplify = 1;
}
const int width = resolution.x * amplify;
const int height = resolution.y * amplify;
const int depth = resolution.z * amplify;
const size_t num_pixels = ((size_t)width) * height * depth;
int width = resolution.x * amplify;
int height = resolution.y * amplify;
int depth = resolution.z * amplify;
size_t num_pixels = ((size_t)width) * height * depth;
if(builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_DENSITY)) {
SmokeDomainSettings_density_grid_get_length(&b_domain.ptr, &length);
@@ -1350,21 +1347,9 @@ void BlenderSession::update_resumable_tile_manager(int num_samples)
return;
}
const int num_samples_per_chunk = (int)ceilf((float)num_samples / num_resumable_chunks);
int range_start_sample, range_num_samples;
if(current_resumable_chunk != 0) {
/* Single chunk rendering. */
range_start_sample = num_samples_per_chunk * (current_resumable_chunk - 1);
range_num_samples = num_samples_per_chunk;
}
else {
/* Ranged-chunks. */
const int num_chunks = end_resumable_chunk - start_resumable_chunk + 1;
range_start_sample = num_samples_per_chunk * (start_resumable_chunk - 1);
range_num_samples = num_chunks * num_samples_per_chunk;
}
/* Make sure we don't overshoot. */
int num_samples_per_chunk = (int)ceilf((float)num_samples / num_resumable_chunks);
int range_start_sample = num_samples_per_chunk * (current_resumable_chunk - 1);
int range_num_samples = num_samples_per_chunk;
if(range_start_sample + range_num_samples > num_samples) {
range_num_samples = num_samples - range_num_samples;
}
@@ -1372,9 +1357,6 @@ void BlenderSession::update_resumable_tile_manager(int num_samples)
VLOG(1) << "Samples range start is " << range_start_sample << ", "
<< "number of samples to render is " << range_num_samples;
scene->integrator->start_sample = range_start_sample;
scene->integrator->tag_update(scene);
session->tile_manager.range_start_sample = range_start_sample;
session->tile_manager.range_num_samples = range_num_samples;
}

View File

@@ -113,7 +113,6 @@ public:
string last_status;
string last_error;
float last_progress;
double last_status_time;
int width, height;
double start_resize_time;
@@ -138,10 +137,6 @@ public:
/* Current resumable chunk index to render. */
static int current_resumable_chunk;
/* Alternative to single-chunk rendering to render a range of chunks. */
static int start_resumable_chunk;
static int end_resumable_chunk;
protected:
void do_write_update_render_result(BL::RenderResult& b_rr,
BL::RenderLayer& b_rlay,
@@ -150,21 +145,9 @@ protected:
void do_write_update_render_tile(RenderTile& rtile, bool do_update_only);
int builtin_image_frame(const string &builtin_name);
void builtin_image_info(const string &builtin_name,
void *builtin_data,
bool &is_float,
int &width,
int &height,
int &depth,
int &channels);
bool builtin_image_pixels(const string &builtin_name,
void *builtin_data,
unsigned char *pixels,
const size_t pixels_size);
bool builtin_image_float_pixels(const string &builtin_name,
void *builtin_data,
float *pixels,
const size_t pixels_size);
void builtin_image_info(const string &builtin_name, void *builtin_data, bool &is_float, int &width, int &height, int &depth, int &channels);
bool builtin_image_pixels(const string &builtin_name, void *builtin_data, unsigned char *pixels);
bool builtin_image_float_pixels(const string &builtin_name, void *builtin_data, float *pixels);
/* Update tile manager to reflect resumable render settings. */
void update_resumable_tile_manager(int num_samples);

View File

@@ -609,8 +609,7 @@ static ShaderNode *add_node(Scene *scene,
bool is_builtin = b_image.packed_file() ||
b_image.source() == BL::Image::source_GENERATED ||
b_image.source() == BL::Image::source_MOVIE ||
(b_engine.is_preview() &&
b_image.source() != BL::Image::source_SEQUENCE);
b_engine.is_preview();
if(is_builtin) {
/* for builtin images we're using image datablock name to find an image to
@@ -641,8 +640,7 @@ static ShaderNode *add_node(Scene *scene,
image->filename.string(),
image->builtin_data,
get_image_interpolation(b_image_node),
get_image_extension(b_image_node),
image->use_alpha);
get_image_extension(b_image_node));
}
}
image->color_space = (NodeImageColorSpace)b_image_node.color_space();
@@ -663,8 +661,7 @@ static ShaderNode *add_node(Scene *scene,
bool is_builtin = b_image.packed_file() ||
b_image.source() == BL::Image::source_GENERATED ||
b_image.source() == BL::Image::source_MOVIE ||
(b_engine.is_preview() &&
b_image.source() != BL::Image::source_SEQUENCE);
b_engine.is_preview();
if(is_builtin) {
int scene_frame = b_scene.frame_current();
@@ -689,8 +686,7 @@ static ShaderNode *add_node(Scene *scene,
env->filename.string(),
env->builtin_data,
get_image_interpolation(b_env_node),
EXTENSION_REPEAT,
env->use_alpha);
EXTENSION_REPEAT);
}
}
env->color_space = (NodeImageColorSpace)b_env_node.color_space();
@@ -827,8 +823,7 @@ static ShaderNode *add_node(Scene *scene,
point_density->filename.string(),
point_density->builtin_data,
point_density->interpolation,
EXTENSION_CLIP,
true);
EXTENSION_CLIP);
}
node = point_density;

View File

@@ -322,15 +322,6 @@ void BlenderSync::sync_integrator()
integrator->volume_samples = volume_samples;
}
if(b_scene.render().use_simplify()) {
if(preview) {
integrator->ao_bounces = get_int(cscene, "ao_bounces");
}
else {
integrator->ao_bounces = get_int(cscene, "ao_bounces_render");
}
}
if(integrator->modified(previntegrator))
integrator->tag_update(scene);
}
@@ -507,7 +498,6 @@ SceneParams BlenderSync::get_scene_params(BL::Scene& b_scene,
params.use_bvh_spatial_split = RNA_boolean_get(&cscene, "debug_use_spatial_splits");
params.use_bvh_unaligned_nodes = RNA_boolean_get(&cscene, "debug_use_hair_bvh");
params.num_bvh_time_steps = RNA_int_get(&cscene, "debug_bvh_time_steps");
if(background && params.shadingsystem != SHADINGSYSTEM_OSL)
params.persistent_data = r.use_persistent_data();

View File

@@ -19,7 +19,6 @@
#include "mesh.h"
#include "util_algorithm.h"
#include "util_map.h"
#include "util_path.h"
#include "util_set.h"
@@ -49,12 +48,12 @@ static inline BL::Mesh object_to_mesh(BL::BlendData& data,
bool apply_modifiers,
bool render,
bool calc_undeformed,
Mesh::SubdivisionType subdivision_type)
bool subdivision)
{
bool subsurf_mod_show_render;
bool subsurf_mod_show_viewport;
if(subdivision_type != Mesh::SUBDIVISION_NONE) {
if(subdivision) {
BL::Modifier subsurf_mod = object.modifiers[object.modifiers.length()-1];
subsurf_mod_show_render = subsurf_mod.show_render();
@@ -66,7 +65,7 @@ static inline BL::Mesh object_to_mesh(BL::BlendData& data,
BL::Mesh me = data.meshes.new_from_object(scene, object, apply_modifiers, (render)? 2: 1, false, calc_undeformed);
if(subdivision_type != Mesh::SUBDIVISION_NONE) {
if(subdivision) {
BL::Modifier subsurf_mod = object.modifiers[object.modifiers.length()-1];
subsurf_mod.show_render(subsurf_mod_show_render);
@@ -75,14 +74,9 @@ static inline BL::Mesh object_to_mesh(BL::BlendData& data,
if((bool)me) {
if(me.use_auto_smooth()) {
if(subdivision_type == Mesh::SUBDIVISION_CATMULL_CLARK) {
me.calc_normals_split();
}
else {
me.split_faces(false);
}
me.calc_normals_split();
}
if(subdivision_type == Mesh::SUBDIVISION_NONE) {
if(!subdivision) {
me.calc_tessface(true);
}
}
@@ -787,35 +781,6 @@ struct ParticleSystemKey {
}
};
class EdgeMap {
public:
EdgeMap() {
}
void clear() {
edges_.clear();
}
void insert(int v0, int v1) {
get_sorted_verts(v0, v1);
edges_.insert(std::pair<int, int>(v0, v1));
}
bool exists(int v0, int v1) {
get_sorted_verts(v0, v1);
return edges_.find(std::pair<int, int>(v0, v1)) != edges_.end();
}
protected:
void get_sorted_verts(int& v0, int& v1) {
if(v0 > v1) {
swap(v0, v1);
}
}
set< std::pair<int, int> > edges_;
};
CCL_NAMESPACE_END
#endif /* __BLENDER_UTIL_H__ */

View File

@@ -81,7 +81,6 @@ void BVH::build(Progress& progress)
pack.prim_type,
pack.prim_index,
pack.prim_object,
pack.prim_time,
params,
progress);
BVHNode *root = bvh_build.run();
@@ -257,10 +256,6 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
pack.leaf_nodes.resize(leaf_nodes_size);
pack.object_node.resize(objects.size());
if(params.num_motion_curve_steps > 0 || params.num_motion_triangle_steps > 0) {
pack.prim_time.resize(prim_index_size);
}
int *pack_prim_index = (pack.prim_index.size())? &pack.prim_index[0]: NULL;
int *pack_prim_type = (pack.prim_type.size())? &pack.prim_type[0]: NULL;
int *pack_prim_object = (pack.prim_object.size())? &pack.prim_object[0]: NULL;
@@ -269,7 +264,6 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
uint *pack_prim_tri_index = (pack.prim_tri_index.size())? &pack.prim_tri_index[0]: NULL;
int4 *pack_nodes = (pack.nodes.size())? &pack.nodes[0]: NULL;
int4 *pack_leaf_nodes = (pack.leaf_nodes.size())? &pack.leaf_nodes[0]: NULL;
float2 *pack_prim_time = (pack.prim_time.size())? &pack.prim_time[0]: NULL;
/* merge */
foreach(Object *ob, objects) {
@@ -315,7 +309,6 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
int *bvh_prim_type = &bvh->pack.prim_type[0];
uint *bvh_prim_visibility = &bvh->pack.prim_visibility[0];
uint *bvh_prim_tri_index = &bvh->pack.prim_tri_index[0];
float2 *bvh_prim_time = bvh->pack.prim_time.size()? &bvh->pack.prim_time[0]: NULL;
for(size_t i = 0; i < bvh_prim_index_size; i++) {
if(bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
@@ -331,9 +324,6 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
pack_prim_type[pack_prim_index_offset] = bvh_prim_type[i];
pack_prim_visibility[pack_prim_index_offset] = bvh_prim_visibility[i];
pack_prim_object[pack_prim_index_offset] = 0; // unused for instances
if(bvh_prim_time != NULL) {
pack_prim_time[pack_prim_index_offset] = bvh_prim_time[i];
}
pack_prim_index_offset++;
}
}
@@ -855,8 +845,6 @@ void QBVH::pack_aligned_inner(const BVHStackEntry& e,
bounds,
child,
e.node->m_visibility,
e.node->m_time_from,
e.node->m_time_to,
num);
}
@@ -864,17 +852,12 @@ void QBVH::pack_aligned_node(int idx,
const BoundBox *bounds,
const int *child,
const uint visibility,
const float time_from,
const float time_to,
const int num)
{
float4 data[BVH_QNODE_SIZE];
memset(data, 0, sizeof(data));
data[0].x = __uint_as_float(visibility & ~PATH_RAY_NODE_UNALIGNED);
data[0].y = time_from;
data[0].z = time_to;
for(int i = 0; i < num; i++) {
float3 bb_min = bounds[i].min;
float3 bb_max = bounds[i].max;
@@ -925,8 +908,6 @@ void QBVH::pack_unaligned_inner(const BVHStackEntry& e,
bounds,
child,
e.node->m_visibility,
e.node->m_time_from,
e.node->m_time_to,
num);
}
@@ -935,16 +916,12 @@ void QBVH::pack_unaligned_node(int idx,
const BoundBox *bounds,
const int *child,
const uint visibility,
const float time_from,
const float time_to,
const int num)
{
float4 data[BVH_UNALIGNED_QNODE_SIZE];
memset(data, 0, sizeof(data));
data[0].x = __uint_as_float(visibility | PATH_RAY_NODE_UNALIGNED);
data[0].y = time_from;
data[0].z = time_to;
for(int i = 0; i < num; i++) {
Transform space = BVHUnaligned::compute_node_transform(
@@ -1230,8 +1207,6 @@ void QBVH::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
child_bbox,
&c[0],
visibility,
0.0f,
1.0f,
4);
}
else {
@@ -1239,8 +1214,6 @@ void QBVH::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
child_bbox,
&c[0],
visibility,
0.0f,
1.0f,
4);
}
}

View File

@@ -68,8 +68,6 @@ struct PackedBVH {
array<int> prim_index;
/* mapping from BVH primitive index, to the object id of that primitive. */
array<int> prim_object;
/* Time range of BVH primitive. */
array<float2> prim_time;
/* index of the root node. */
int root_index;
@@ -177,8 +175,6 @@ protected:
const BoundBox *bounds,
const int *child,
const uint visibility,
const float time_from,
const float time_to,
const int num);
void pack_unaligned_inner(const BVHStackEntry& e,
@@ -189,8 +185,6 @@ protected:
const BoundBox *bounds,
const int *child,
const uint visibility,
const float time_from,
const float time_to,
const int num);
/* refit */

View File

@@ -26,7 +26,6 @@
#include "scene.h"
#include "curves.h"
#include "util_algorithm.h"
#include "util_debug.h"
#include "util_foreach.h"
#include "util_logging.h"
@@ -93,14 +92,12 @@ BVHBuild::BVHBuild(const vector<Object*>& objects_,
array<int>& prim_type_,
array<int>& prim_index_,
array<int>& prim_object_,
array<float2>& prim_time_,
const BVHParams& params_,
Progress& progress_)
: objects(objects_),
prim_type(prim_type_),
prim_index(prim_index_),
prim_object(prim_object_),
prim_time(prim_time_),
params(params_),
progress(progress_),
progress_start_time(0.0),
@@ -115,237 +112,81 @@ BVHBuild::~BVHBuild()
/* Adding References */
void BVHBuild::add_reference_triangles(BoundBox& root, BoundBox& center, Mesh *mesh, int i)
void BVHBuild::add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i)
{
const Attribute *attr_mP = NULL;
if(mesh->has_motion_blur()) {
attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
}
const size_t num_triangles = mesh->num_triangles();
for(uint j = 0; j < num_triangles; j++) {
Mesh::Triangle t = mesh->get_triangle(j);
const float3 *verts = &mesh->verts[0];
if(attr_mP == NULL) {
BoundBox bounds = BoundBox::empty;
t.bounds_grow(verts, bounds);
if(bounds.valid()) {
references.push_back(BVHReference(bounds,
j,
i,
PRIMITIVE_TRIANGLE));
root.grow(bounds);
center.grow(bounds.center2());
}
}
else if(params.num_motion_triangle_steps == 0 || params.use_spatial_split) {
/* Motion triangles, simple case: single node for the whole
* primitive. Lowest memory footprint and faster BVH build but
* least optimal ray-tracing.
*/
/* TODO(sergey): Support motion steps for spatially split BVH. */
const size_t num_verts = mesh->verts.size();
const size_t num_steps = mesh->motion_steps;
const float3 *vert_steps = attr_mP->data_float3();
BoundBox bounds = BoundBox::empty;
t.bounds_grow(verts, bounds);
for(size_t step = 0; step < num_steps - 1; step++) {
t.bounds_grow(vert_steps + step*num_verts, bounds);
}
if(bounds.valid()) {
references.push_back(
BVHReference(bounds,
j,
i,
PRIMITIVE_MOTION_TRIANGLE));
root.grow(bounds);
center.grow(bounds.center2());
}
}
else {
/* Motion triangles, trace optimized case: we split triangle
* primitives into separate nodes for each of the time steps.
* This way we minimize overlap of neighbor curve primitives.
*/
const int num_bvh_steps = params.num_motion_curve_steps * 2 + 1;
const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
const size_t num_verts = mesh->verts.size();
const size_t num_steps = mesh->motion_steps;
const float3 *vert_steps = attr_mP->data_float3();
/* Calculate bounding box of the previous time step.
* Will be reused later to avoid duplicated work on
* calculating BVH time step boundbox.
*/
float3 prev_verts[3];
t.motion_verts(verts,
vert_steps,
num_verts,
num_steps,
0.0f,
prev_verts);
BoundBox prev_bounds = BoundBox::empty;
prev_bounds.grow(prev_verts[0]);
prev_bounds.grow(prev_verts[1]);
prev_bounds.grow(prev_verts[2]);
/* Create all primitive time steps, */
for(int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
const float curr_time = (float)(bvh_step) * num_bvh_steps_inv_1;
float3 curr_verts[3];
t.motion_verts(verts,
vert_steps,
num_verts,
num_steps,
curr_time,
curr_verts);
BoundBox curr_bounds = BoundBox::empty;
curr_bounds.grow(curr_verts[0]);
curr_bounds.grow(curr_verts[1]);
curr_bounds.grow(curr_verts[2]);
BoundBox bounds = prev_bounds;
bounds.grow(curr_bounds);
if(bounds.valid()) {
const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
references.push_back(
BVHReference(bounds,
j,
i,
PRIMITIVE_MOTION_TRIANGLE,
prev_time,
curr_time));
root.grow(bounds);
center.grow(bounds.center2());
}
/* Current time boundbox becomes previous one for the
* next time step.
*/
prev_bounds = curr_bounds;
}
}
}
}
if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
Attribute *attr_mP = NULL;
void BVHBuild::add_reference_curves(BoundBox& root, BoundBox& center, Mesh *mesh, int i)
{
const Attribute *curve_attr_mP = NULL;
if(mesh->has_motion_blur()) {
curve_attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if(mesh->has_motion_blur())
attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
size_t num_triangles = mesh->num_triangles();
for(uint j = 0; j < num_triangles; j++) {
Mesh::Triangle t = mesh->get_triangle(j);
BoundBox bounds = BoundBox::empty;
PrimitiveType type = PRIMITIVE_TRIANGLE;
t.bounds_grow(&mesh->verts[0], bounds);
/* motion triangles */
if(attr_mP) {
size_t mesh_size = mesh->verts.size();
size_t steps = mesh->motion_steps - 1;
float3 *vert_steps = attr_mP->data_float3();
for(size_t i = 0; i < steps; i++)
t.bounds_grow(vert_steps + i*mesh_size, bounds);
type = PRIMITIVE_MOTION_TRIANGLE;
}
if(bounds.valid()) {
references.push_back(BVHReference(bounds, j, i, type));
root.grow(bounds);
center.grow(bounds.center2());
}
}
}
const size_t num_curves = mesh->num_curves();
for(uint j = 0; j < num_curves; j++) {
const Mesh::Curve curve = mesh->get_curve(j);
const float *curve_radius = &mesh->curve_radius[0];
for(int k = 0; k < curve.num_keys - 1; k++) {
if(curve_attr_mP == NULL) {
/* Really simple logic for static hair. */
if(params.primitive_mask & PRIMITIVE_ALL_CURVE) {
Attribute *curve_attr_mP = NULL;
if(mesh->has_motion_blur())
curve_attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
size_t num_curves = mesh->num_curves();
for(uint j = 0; j < num_curves; j++) {
Mesh::Curve curve = mesh->get_curve(j);
PrimitiveType type = PRIMITIVE_CURVE;
for(int k = 0; k < curve.num_keys - 1; k++) {
BoundBox bounds = BoundBox::empty;
curve.bounds_grow(k, &mesh->curve_keys[0], curve_radius, bounds);
curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bounds);
/* motion curve */
if(curve_attr_mP) {
size_t mesh_size = mesh->curve_keys.size();
size_t steps = mesh->motion_steps - 1;
float3 *key_steps = curve_attr_mP->data_float3();
for(size_t i = 0; i < steps; i++)
curve.bounds_grow(k, key_steps + i*mesh_size, &mesh->curve_radius[0], bounds);
type = PRIMITIVE_MOTION_CURVE;
}
if(bounds.valid()) {
int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_CURVE, k);
int packed_type = PRIMITIVE_PACK_SEGMENT(type, k);
references.push_back(BVHReference(bounds, j, i, packed_type));
root.grow(bounds);
center.grow(bounds.center2());
}
}
else if(params.num_motion_curve_steps == 0 || params.use_spatial_split) {
/* Simple case of motion curves: single node for the while
* shutter time. Lowest memory usage but less optimal
* rendering.
*/
/* TODO(sergey): Support motion steps for spatially split BVH. */
BoundBox bounds = BoundBox::empty;
curve.bounds_grow(k, &mesh->curve_keys[0], curve_radius, bounds);
const size_t num_keys = mesh->curve_keys.size();
const size_t num_steps = mesh->motion_steps;
const float3 *key_steps = curve_attr_mP->data_float3();
for(size_t step = 0; step < num_steps - 1; step++) {
curve.bounds_grow(k,
key_steps + step*num_keys,
curve_radius,
bounds);
}
if(bounds.valid()) {
int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_MOTION_CURVE, k);
references.push_back(BVHReference(bounds,
j,
i,
packed_type));
root.grow(bounds);
center.grow(bounds.center2());
}
}
else {
/* Motion curves, trace optimized case: we split curve keys
* primitives into separate nodes for each of the time steps.
* This way we minimize overlap of neighbor curve primitives.
*/
const int num_bvh_steps = params.num_motion_curve_steps * 2 + 1;
const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
const size_t num_steps = mesh->motion_steps;
const float3 *curve_keys = &mesh->curve_keys[0];
const float3 *key_steps = curve_attr_mP->data_float3();
const size_t num_keys = mesh->curve_keys.size();
/* Calculate bounding box of the previous time step.
* Will be reused later to avoid duplicated work on
* calculating BVH time step boundbox.
*/
float4 prev_keys[4];
curve.cardinal_motion_keys(curve_keys,
curve_radius,
key_steps,
num_keys,
num_steps,
0.0f,
k - 1, k, k + 1, k + 2,
prev_keys);
BoundBox prev_bounds = BoundBox::empty;
curve.bounds_grow(prev_keys, prev_bounds);
/* Create all primitive time steps, */
for(int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
const float curr_time = (float)(bvh_step) * num_bvh_steps_inv_1;
float4 curr_keys[4];
curve.cardinal_motion_keys(curve_keys,
curve_radius,
key_steps,
num_keys,
num_steps,
curr_time,
k - 1, k, k + 1, k + 2,
curr_keys);
BoundBox curr_bounds = BoundBox::empty;
curve.bounds_grow(curr_keys, curr_bounds);
BoundBox bounds = prev_bounds;
bounds.grow(curr_bounds);
if(bounds.valid()) {
const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_MOTION_CURVE, k);
references.push_back(BVHReference(bounds,
j,
i,
packed_type,
prev_time,
curr_time));
root.grow(bounds);
center.grow(bounds.center2());
}
/* Current time boundbox becomes previous one for the
* next time step.
*/
prev_bounds = curr_bounds;
}
}
}
}
}
void BVHBuild::add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i)
{
if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
add_reference_triangles(root, center, mesh, i);
}
if(params.primitive_mask & PRIMITIVE_ALL_CURVE) {
add_reference_curves(root, center, mesh, i);
}
}
void BVHBuild::add_reference_object(BoundBox& root, BoundBox& center, Object *ob, int i)
{
references.push_back(BVHReference(ob->bounds, -1, i, 0));
@@ -359,7 +200,7 @@ static size_t count_curve_segments(Mesh *mesh)
for(size_t i = 0; i < num_curves; i++)
num += mesh->get_curve(i).num_keys - 1;
return num;
}
@@ -467,9 +308,6 @@ BVHNode* BVHBuild::run()
}
spatial_free_index = 0;
need_prim_time = params.num_motion_curve_steps > 0 ||
params.num_motion_triangle_steps > 0;
/* init progress updates */
double build_start_time;
build_start_time = progress_start_time = time_dt();
@@ -480,12 +318,6 @@ BVHNode* BVHBuild::run()
prim_type.resize(references.size());
prim_index.resize(references.size());
prim_object.resize(references.size());
if(need_prim_time) {
prim_time.resize(references.size());
}
else {
prim_time.resize(0);
}
/* build recursively */
BVHNode *rootnode;
@@ -512,7 +344,6 @@ BVHNode* BVHBuild::run()
else {
/*rotate(rootnode, 4, 5);*/
rootnode->update_visibility();
rootnode->update_time();
}
if(rootnode != NULL) {
VLOG(1) << "BVH build statistics:\n"
@@ -540,7 +371,7 @@ void BVHBuild::progress_update()
{
if(time_dt() - progress_start_time < 0.25)
return;
double progress_start = (double)progress_count/(double)progress_total;
double duplicates = (double)(progress_total - progress_original_total)/(double)progress_total;
@@ -548,7 +379,7 @@ void BVHBuild::progress_update()
progress_start * 100.0, duplicates * 100.0);
progress.set_substatus(msg);
progress_start_time = time_dt();
progress_start_time = time_dt();
}
void BVHBuild::thread_build_node(InnerNode *inner,
@@ -604,7 +435,6 @@ bool BVHBuild::range_within_max_leaf_size(const BVHRange& range,
return false;
size_t num_triangles = 0;
size_t num_motion_triangles = 0;
size_t num_curves = 0;
size_t num_motion_curves = 0;
@@ -615,16 +445,13 @@ bool BVHBuild::range_within_max_leaf_size(const BVHRange& range,
num_curves++;
if(ref.prim_type() & PRIMITIVE_MOTION_CURVE)
num_motion_curves++;
else if(ref.prim_type() & PRIMITIVE_TRIANGLE)
else if(ref.prim_type() & PRIMITIVE_ALL_TRIANGLE)
num_triangles++;
else if(ref.prim_type() & PRIMITIVE_MOTION_TRIANGLE)
num_motion_triangles++;
}
return (num_triangles <= params.max_triangle_leaf_size) &&
(num_motion_triangles <= params.max_motion_triangle_leaf_size) &&
(num_curves <= params.max_curve_leaf_size) &&
(num_motion_curves <= params.max_motion_curve_leaf_size);
return (num_triangles < params.max_triangle_leaf_size) &&
(num_curves < params.max_curve_leaf_size) &&
(num_motion_curves < params.max_curve_leaf_size);
}
/* multithreaded binning builder */
@@ -860,29 +687,20 @@ BVHNode *BVHBuild::create_object_leaf_nodes(const BVHReference *ref, int start,
prim_type[start] = ref->prim_type();
prim_index[start] = ref->prim_index();
prim_object[start] = ref->prim_object();
if(need_prim_time) {
prim_time[start] = make_float2(ref->time_from(), ref->time_to());
}
uint visibility = objects[ref->prim_object()]->visibility;
BVHNode *leaf_node = new LeafNode(ref->bounds(), visibility, start, start+1);
leaf_node->m_time_from = ref->time_from();
leaf_node->m_time_to = ref->time_to();
return leaf_node;
return new LeafNode(ref->bounds(), visibility, start, start+1);
}
else {
int mid = num/2;
BVHNode *leaf0 = create_object_leaf_nodes(ref, start, mid);
BVHNode *leaf1 = create_object_leaf_nodes(ref+mid, start+mid, num-mid);
BVHNode *leaf0 = create_object_leaf_nodes(ref, start, mid);
BVHNode *leaf1 = create_object_leaf_nodes(ref+mid, start+mid, num-mid);
BoundBox bounds = BoundBox::empty;
bounds.grow(leaf0->m_bounds);
bounds.grow(leaf1->m_bounds);
BVHNode *inner_node = new InnerNode(bounds, leaf0, leaf1);
inner_node->m_time_from = min(leaf0->m_time_from, leaf1->m_time_from);
inner_node->m_time_to = max(leaf0->m_time_to, leaf1->m_time_to);
return inner_node;
return new InnerNode(bounds, leaf0, leaf1);
}
}
@@ -905,13 +723,11 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
* can not control.
*/
typedef StackAllocator<256, int> LeafStackAllocator;
typedef StackAllocator<256, float2> LeafTimeStackAllocator;
typedef StackAllocator<256, BVHReference> LeafReferenceStackAllocator;
vector<int, LeafStackAllocator> p_type[PRIMITIVE_NUM_TOTAL];
vector<int, LeafStackAllocator> p_index[PRIMITIVE_NUM_TOTAL];
vector<int, LeafStackAllocator> p_object[PRIMITIVE_NUM_TOTAL];
vector<float2, LeafTimeStackAllocator> p_time[PRIMITIVE_NUM_TOTAL];
vector<BVHReference, LeafReferenceStackAllocator> p_ref[PRIMITIVE_NUM_TOTAL];
/* TODO(sergey): In theory we should be able to store references. */
@@ -934,8 +750,6 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
p_type[type_index].push_back(ref.prim_type());
p_index[type_index].push_back(ref.prim_index());
p_object[type_index].push_back(ref.prim_object());
p_time[type_index].push_back(make_float2(ref.time_from(),
ref.time_to()));
bounds[type_index].grow(ref.bounds());
visibility[type_index] |= objects[ref.prim_object()]->visibility;
@@ -965,13 +779,9 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
vector<int, LeafStackAllocator> local_prim_type,
local_prim_index,
local_prim_object;
vector<float2, LeafTimeStackAllocator> local_prim_time;
local_prim_type.resize(num_new_prims);
local_prim_index.resize(num_new_prims);
local_prim_object.resize(num_new_prims);
if(need_prim_time) {
local_prim_time.resize(num_new_prims);
}
for(int i = 0; i < PRIMITIVE_NUM_TOTAL; ++i) {
int num = (int)p_type[i].size();
if(num != 0) {
@@ -984,9 +794,6 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
local_prim_type[index] = p_type[i][j];
local_prim_index[index] = p_index[i][j];
local_prim_object[index] = p_object[i][j];
if(need_prim_time) {
local_prim_time[index] = p_time[i][j];
}
if(params.use_unaligned_nodes && !alignment_found) {
alignment_found =
unaligned_heuristic.compute_aligned_space(p_ref[i][j],
@@ -997,16 +804,6 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
visibility[i],
start_index,
start_index + num);
if(true) {
float time_from = 1.0f, time_to = 0.0f;
for(int j = 0; j < num; ++j) {
const BVHReference &ref = p_ref[i][j];
time_from = min(time_from, ref.time_from());
time_to = max(time_to, ref.time_to());
}
leaf_node->m_time_from = time_from;
leaf_node->m_time_to = time_to;
}
if(alignment_found) {
/* Need to recalculate leaf bounds with new alignment. */
leaf_node->m_bounds = BoundBox::empty;
@@ -1053,17 +850,11 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
prim_type.reserve(reserve);
prim_index.reserve(reserve);
prim_object.reserve(reserve);
if(need_prim_time) {
prim_time.reserve(reserve);
}
}
prim_type.resize(range_end);
prim_index.resize(range_end);
prim_object.resize(range_end);
if(need_prim_time) {
prim_time.resize(range_end);
}
}
spatial_spin_lock.unlock();
@@ -1072,9 +863,6 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
memcpy(&prim_type[start_index], &local_prim_type[0], new_leaf_data_size);
memcpy(&prim_index[start_index], &local_prim_index[0], new_leaf_data_size);
memcpy(&prim_object[start_index], &local_prim_object[0], new_leaf_data_size);
if(need_prim_time) {
memcpy(&prim_time[start_index], &local_prim_time[0], sizeof(float2)*num_new_leaf_data);
}
}
}
else {
@@ -1087,9 +875,6 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
memcpy(&prim_type[start_index], &local_prim_type[0], new_leaf_data_size);
memcpy(&prim_index[start_index], &local_prim_index[0], new_leaf_data_size);
memcpy(&prim_object[start_index], &local_prim_object[0], new_leaf_data_size);
if(need_prim_time) {
memcpy(&prim_time[start_index], &local_prim_time[0], sizeof(float2)*num_new_leaf_data);
}
}
}
@@ -1133,7 +918,7 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
BVHNode *inner = new InnerNode(inner_bounds, leaves[1], leaves[2]);
return new InnerNode(range.bounds(), leaves[0], inner);
} else {
/* Should be doing more branches if more primitive types added. */
/* Shpuld be doing more branches if more primitive types added. */
assert(num_leaves <= 5);
BoundBox inner_bounds_a = merge(leaves[0]->m_bounds, leaves[1]->m_bounds);
BoundBox inner_bounds_b = merge(leaves[2]->m_bounds, leaves[3]->m_bounds);
@@ -1166,7 +951,7 @@ void BVHBuild::rotate(BVHNode *node, int max_depth)
/* nothing to rotate if we reached a leaf node. */
if(node->is_leaf() || max_depth < 0)
return;
InnerNode *parent = (InnerNode*)node;
/* rotate all children first */

View File

@@ -48,7 +48,6 @@ public:
array<int>& prim_type,
array<int>& prim_index,
array<int>& prim_object,
array<float2>& prim_time,
const BVHParams& params,
Progress& progress);
~BVHBuild();
@@ -64,8 +63,6 @@ protected:
friend class BVHObjectBinning;
/* Adding references. */
void add_reference_triangles(BoundBox& root, BoundBox& center, Mesh *mesh, int i);
void add_reference_curves(BoundBox& root, BoundBox& center, Mesh *mesh, int i);
void add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i);
void add_reference_object(BoundBox& root, BoundBox& center, Object *ob, int i);
void add_references(BVHRange& root);
@@ -113,9 +110,6 @@ protected:
array<int>& prim_type;
array<int>& prim_index;
array<int>& prim_object;
array<float2>& prim_time;
bool need_prim_time;
/* Build parameters. */
BVHParams params;

View File

@@ -176,19 +176,6 @@ uint BVHNode::update_visibility()
return m_visibility;
}
void BVHNode::update_time()
{
if(!is_leaf()) {
InnerNode *inner = (InnerNode*)this;
BVHNode *child0 = inner->children[0];
BVHNode *child1 = inner->children[1];
child0->update_time();
child1->update_time();
m_time_from = min(child0->m_time_from, child1->m_time_from);
m_time_to = max(child0->m_time_to, child1->m_time_to);
}
}
/* Inner Node */
void InnerNode::print(int depth) const

View File

@@ -47,9 +47,7 @@ class BVHNode
{
public:
BVHNode() : m_is_unaligned(false),
m_aligned_space(NULL),
m_time_from(0.0f),
m_time_to(1.0f)
m_aligned_space(NULL)
{
}
@@ -93,15 +91,12 @@ public:
void deleteSubtree();
uint update_visibility();
void update_time();
bool m_is_unaligned;
// TODO(sergey): Can be stored as 3x3 matrix, but better to have some
// utilities and type defines in util_transform first.
Transform *m_aligned_space;
float m_time_from, m_time_to;
};
class InnerNode : public BVHNode

View File

@@ -43,9 +43,7 @@ public:
/* number of primitives in leaf */
int min_leaf_size;
int max_triangle_leaf_size;
int max_motion_triangle_leaf_size;
int max_curve_leaf_size;
int max_motion_curve_leaf_size;
/* object or mesh level bvh */
bool top_level;
@@ -61,17 +59,6 @@ public:
*/
bool use_unaligned_nodes;
/* Split time range to this number of steps and create leaf node for each
* of this time steps.
*
* Speeds up rendering of motion curve primitives in the cost of higher
* memory usage.
*/
int num_motion_curve_steps;
/* Same as above, but for triangle primitives. */
int num_motion_triangle_steps;
/* fixed parameters */
enum {
MAX_DEPTH = 64,
@@ -93,18 +80,13 @@ public:
min_leaf_size = 1;
max_triangle_leaf_size = 8;
max_motion_triangle_leaf_size = 8;
max_curve_leaf_size = 1;
max_motion_curve_leaf_size = 4;
max_curve_leaf_size = 2;
top_level = false;
use_qbvh = false;
use_unaligned_nodes = false;
primitive_mask = PRIMITIVE_ALL;
num_motion_curve_steps = 0;
num_motion_triangle_steps = 0;
}
/* SAH costs */
@@ -131,15 +113,8 @@ class BVHReference
public:
__forceinline BVHReference() {}
__forceinline BVHReference(const BoundBox& bounds_,
int prim_index_,
int prim_object_,
int prim_type,
float time_from = 0.0f,
float time_to = 1.0f)
: rbounds(bounds_),
time_from_(time_from),
time_to_(time_to)
__forceinline BVHReference(const BoundBox& bounds_, int prim_index_, int prim_object_, int prim_type)
: rbounds(bounds_)
{
rbounds.min.w = __int_as_float(prim_index_);
rbounds.max.w = __int_as_float(prim_object_);
@@ -150,9 +125,6 @@ public:
__forceinline int prim_index() const { return __float_as_int(rbounds.min.w); }
__forceinline int prim_object() const { return __float_as_int(rbounds.max.w); }
__forceinline int prim_type() const { return type; }
__forceinline float time_from() const { return time_from_; }
__forceinline float time_to() const { return time_to_; }
BVHReference& operator=(const BVHReference &arg) {
if(&arg != this) {
@@ -161,11 +133,9 @@ public:
return *this;
}
protected:
BoundBox rbounds;
uint type;
float time_from_, time_to_;
};
/* BVH Range

View File

@@ -3,7 +3,6 @@ set(INC
.
../graph
../kernel
../kernel/split
../kernel/svm
../kernel/osl
../util
@@ -34,7 +33,6 @@ set(SRC
device_cuda.cpp
device_multi.cpp
device_opencl.cpp
device_split_kernel.cpp
device_task.cpp
)
@@ -58,7 +56,6 @@ set(SRC_HEADERS
device_memory.h
device_intern.h
device_network.h
device_split_kernel.h
device_task.h
)

View File

@@ -64,8 +64,6 @@ std::ostream& operator <<(std::ostream &os,
<< string_from_bool(requested_features.use_integrator_branched) << std::endl;
os << "Use Patch Evaluation: "
<< string_from_bool(requested_features.use_patch_evaluation) << std::endl;
os << "Use Transparent Shadows: "
<< string_from_bool(requested_features.use_transparent) << std::endl;
return os;
}
@@ -80,7 +78,7 @@ Device::~Device()
void Device::pixels_alloc(device_memory& mem)
{
mem_alloc("pixels", mem, MEM_READ_WRITE);
mem_alloc(mem, MEM_READ_WRITE);
}
void Device::pixels_copy_from(device_memory& mem, int y, int w, int h)

View File

@@ -117,9 +117,6 @@ public:
/* Use OpenSubdiv patch evaluation */
bool use_patch_evaluation;
/* Use Transparent shadows */
bool use_transparent;
DeviceRequestedFeatures()
{
@@ -136,7 +133,6 @@ public:
use_volume = false;
use_integrator_branched = false;
use_patch_evaluation = false;
use_transparent = false;
}
bool modified(const DeviceRequestedFeatures& requested_features)
@@ -152,8 +148,7 @@ public:
use_subsurface == requested_features.use_subsurface &&
use_volume == requested_features.use_volume &&
use_integrator_branched == requested_features.use_integrator_branched &&
use_patch_evaluation == requested_features.use_patch_evaluation &&
use_transparent == requested_features.use_transparent);
use_patch_evaluation == requested_features.use_patch_evaluation);
}
/* Convert the requested features structure to a build options,
@@ -194,9 +189,6 @@ public:
if(!use_patch_evaluation) {
build_options += " -D__NO_PATCH_EVAL__";
}
if(!use_transparent && !use_volume) {
build_options += " -D__NO_TRANSPARENT__";
}
return build_options;
}
};
@@ -228,21 +220,12 @@ public:
DeviceInfo info;
virtual const string& error_message() { return error_msg; }
bool have_error() { return !error_message().empty(); }
virtual void set_error(const string& error)
{
if(!have_error()) {
error_msg = error;
}
fprintf(stderr, "%s\n", error.c_str());
fflush(stderr);
}
virtual bool show_samples() const { return false; }
/* statistics */
Stats &stats;
/* regular memory */
virtual void mem_alloc(const char *name, device_memory& mem, MemoryType type) = 0;
virtual void mem_alloc(device_memory& mem, MemoryType type) = 0;
virtual void mem_copy_to(device_memory& mem) = 0;
virtual void mem_copy_from(device_memory& mem,
int y, int w, int h, int elem) = 0;

View File

@@ -26,12 +26,10 @@
#include "device.h"
#include "device_intern.h"
#include "device_split_kernel.h"
#include "kernel.h"
#include "kernel_compat_cpu.h"
#include "kernel_types.h"
#include "split/kernel_split_data.h"
#include "kernel_globals.h"
#include "osl_shader.h"
@@ -43,7 +41,6 @@
#include "util_foreach.h"
#include "util_function.h"
#include "util_logging.h"
#include "util_map.h"
#include "util_opengl.h"
#include "util_progress.h"
#include "util_system.h"
@@ -51,93 +48,8 @@
CCL_NAMESPACE_BEGIN
class CPUDevice;
class CPUSplitKernel : public DeviceSplitKernel {
CPUDevice *device;
public:
explicit CPUSplitKernel(CPUDevice *device);
virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
RenderTile& rtile,
int num_global_elements,
device_memory& kernel_globals,
device_memory& kernel_data_,
device_memory& split_data,
device_memory& ray_state,
device_memory& queue_index,
device_memory& use_queues_flag,
device_memory& work_pool_wgs);
virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&);
virtual int2 split_kernel_local_size();
virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task);
virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads);
};
class CPUDevice : public Device
{
static unordered_map<string, void*> kernel_functions;
static void register_kernel_function(const char* name, void* func)
{
kernel_functions[name] = func;
}
static const char* get_arch_name()
{
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
if(system_cpu_support_avx2()) {
return "cpu_avx2";
}
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
if(system_cpu_support_avx()) {
return "cpu_avx";
}
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
if(system_cpu_support_sse41()) {
return "cpu_sse41";
}
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
if(system_cpu_support_sse3()) {
return "cpu_sse3";
}
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
if(system_cpu_support_sse2()) {
return "cpu_sse2";
}
else
#endif
{
return "cpu";
}
}
template<typename F>
static F get_kernel_function(string name)
{
name = string("kernel_") + get_arch_name() + "_" + name;
unordered_map<string, void*>::iterator it = kernel_functions.find(name);
if(it == kernel_functions.end()) {
assert(!"kernel function not found");
return NULL;
}
return (F)it->second;
}
friend class CPUSplitKernel;
public:
TaskPool task_pool;
KernelGlobals kernel_globals;
@@ -145,15 +57,10 @@ public:
#ifdef WITH_OSL
OSLGlobals osl_globals;
#endif
bool use_split_kernel;
DeviceRequestedFeatures requested_features;
CPUDevice(DeviceInfo& info, Stats &stats, bool background)
: Device(info, stats, background)
{
#ifdef WITH_OSL
kernel_globals.osl = &osl_globals;
#endif
@@ -198,28 +105,6 @@ public:
{
VLOG(1) << "Will be using regular kernels.";
}
use_split_kernel = DebugFlags().cpu.split_kernel;
if(use_split_kernel) {
VLOG(1) << "Will be using split kernel.";
}
kernel_cpu_register_functions(register_kernel_function);
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
kernel_cpu_sse2_register_functions(register_kernel_function);
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
kernel_cpu_sse3_register_functions(register_kernel_function);
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
kernel_cpu_sse41_register_functions(register_kernel_function);
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
kernel_cpu_avx_register_functions(register_kernel_function);
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
kernel_cpu_avx2_register_functions(register_kernel_function);
#endif
}
~CPUDevice()
@@ -227,25 +112,9 @@ public:
task_pool.stop();
}
virtual bool show_samples() const
void mem_alloc(device_memory& mem, MemoryType /*type*/)
{
return (TaskScheduler::num_threads() == 1);
}
void mem_alloc(const char *name, device_memory& mem, MemoryType /*type*/)
{
if(name) {
VLOG(1) << "Buffer allocate: " << name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")";
}
mem.device_pointer = mem.data_pointer;
if(!mem.device_pointer) {
mem.device_pointer = (device_ptr)malloc(mem.memory_size());
}
mem.device_size = mem.memory_size();
stats.mem_alloc(mem.device_size);
}
@@ -270,10 +139,6 @@ public:
void mem_free(device_memory& mem)
{
if(mem.device_pointer) {
if(!mem.data_pointer) {
free((void*)mem.device_pointer);
}
mem.device_pointer = 0;
stats.mem_free(mem.device_size);
mem.device_size = 0;
@@ -326,14 +191,8 @@ public:
void thread_run(DeviceTask *task)
{
if(task->type == DeviceTask::PATH_TRACE) {
if(!use_split_kernel) {
thread_path_trace(*task);
}
else {
thread_path_trace_split(*task);
}
}
if(task->type == DeviceTask::PATH_TRACE)
thread_path_trace(*task);
else if(task->type == DeviceTask::FILM_CONVERT)
thread_film_convert(*task);
else if(task->type == DeviceTask::SHADER)
@@ -394,7 +253,7 @@ public:
{
path_trace_kernel = kernel_cpu_path_trace;
}
while(task.acquire_tile(this, tile)) {
float *render_buffer = (float*)tile.buffer;
uint *rng_state = (uint*)tile.rng_state;
@@ -416,7 +275,7 @@ public:
tile.sample = sample + 1;
task.update_progress(&tile, tile.w*tile.h);
task.update_progress(&tile);
}
task.release_tile(tile);
@@ -430,49 +289,6 @@ public:
thread_kernel_globals_free(&kg);
}
void thread_path_trace_split(DeviceTask& task)
{
if(task_pool.canceled()) {
if(task.need_finish_queue == false)
return;
}
RenderTile tile;
CPUSplitKernel split_kernel(this);
/* allocate buffer for kernel globals */
device_memory kgbuffer;
kgbuffer.resize(sizeof(KernelGlobals));
mem_alloc("kernel_globals", kgbuffer, MEM_READ_WRITE);
KernelGlobals *kg = (KernelGlobals*)kgbuffer.device_pointer;
*kg = thread_kernel_globals_init();
requested_features.max_closure = MAX_CLOSURE;
if(!split_kernel.load_kernels(requested_features)) {
thread_kernel_globals_free((KernelGlobals*)kgbuffer.device_pointer);
mem_free(kgbuffer);
return;
}
while(task.acquire_tile(this, tile)) {
device_memory data;
split_kernel.path_trace(&task, tile, kgbuffer, data);
task.release_tile(tile);
if(task_pool.canceled()) {
if(task.need_finish_queue == false)
break;
}
}
thread_kernel_globals_free((KernelGlobals*)kgbuffer.device_pointer);
mem_free(kgbuffer);
}
void thread_film_convert(DeviceTask& task)
{
float sample_scale = 1.0f/(task.sample + 1);
@@ -680,10 +496,6 @@ protected:
inline void thread_kernel_globals_free(KernelGlobals *kg)
{
if(kg == NULL) {
return;
}
if(kg->transparent_shadow_intersections != NULL) {
free(kg->transparent_shadow_intersections);
}
@@ -698,175 +510,8 @@ protected:
OSLShader::thread_free(kg);
#endif
}
virtual bool load_kernels(DeviceRequestedFeatures& requested_features_) {
requested_features = requested_features_;
return true;
}
};
/* split kernel */
class CPUSplitKernelFunction : public SplitKernelFunction {
public:
CPUDevice* device;
void (*func)(KernelGlobals *kg, KernelData *data);
CPUSplitKernelFunction(CPUDevice* device) : device(device), func(NULL) {}
~CPUSplitKernelFunction() {}
virtual bool enqueue(const KernelDimensions& dim, device_memory& kernel_globals, device_memory& data)
{
if(!func) {
return false;
}
KernelGlobals *kg = (KernelGlobals*)kernel_globals.device_pointer;
kg->global_size = make_int2(dim.global_size[0], dim.global_size[1]);
for(int y = 0; y < dim.global_size[1]; y++) {
for(int x = 0; x < dim.global_size[0]; x++) {
kg->global_id = make_int2(x, y);
func(kg, (KernelData*)data.device_pointer);
}
}
return true;
}
};
CPUSplitKernel::CPUSplitKernel(CPUDevice *device) : DeviceSplitKernel(device), device(device)
{
}
bool CPUSplitKernel::enqueue_split_kernel_data_init(const KernelDimensions& dim,
RenderTile& rtile,
int num_global_elements,
device_memory& kernel_globals,
device_memory& data,
device_memory& split_data,
device_memory& ray_state,
device_memory& queue_index,
device_memory& use_queues_flags,
device_memory& work_pool_wgs)
{
typedef void(*data_init_t)(KernelGlobals *kg,
ccl_constant KernelData *data,
ccl_global void *split_data_buffer,
int num_elements,
ccl_global char *ray_state,
ccl_global uint *rng_state,
int start_sample,
int end_sample,
int sx, int sy, int sw, int sh, int offset, int stride,
ccl_global int *Queue_index,
int queuesize,
ccl_global char *use_queues_flag,
ccl_global unsigned int *work_pool_wgs,
unsigned int num_samples,
ccl_global float *buffer);
data_init_t data_init;
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
if(system_cpu_support_avx2()) {
data_init = kernel_cpu_avx2_data_init;
}
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
if(system_cpu_support_avx()) {
data_init = kernel_cpu_avx_data_init;
}
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
if(system_cpu_support_sse41()) {
data_init = kernel_cpu_sse41_data_init;
}
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
if(system_cpu_support_sse3()) {
data_init = kernel_cpu_sse3_data_init;
}
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
if(system_cpu_support_sse2()) {
data_init = kernel_cpu_sse2_data_init;
}
else
#endif
{
data_init = kernel_cpu_data_init;
}
KernelGlobals *kg = (KernelGlobals*)kernel_globals.device_pointer;
kg->global_size = make_int2(dim.global_size[0], dim.global_size[1]);
for(int y = 0; y < dim.global_size[1]; y++) {
for(int x = 0; x < dim.global_size[0]; x++) {
kg->global_id = make_int2(x, y);
data_init((KernelGlobals*)kernel_globals.device_pointer,
(KernelData*)data.device_pointer,
(void*)split_data.device_pointer,
num_global_elements,
(char*)ray_state.device_pointer,
(uint*)rtile.rng_state,
rtile.start_sample,
rtile.start_sample + rtile.num_samples,
rtile.x,
rtile.y,
rtile.w,
rtile.h,
rtile.offset,
rtile.stride,
(int*)queue_index.device_pointer,
dim.global_size[0] * dim.global_size[1],
(char*)use_queues_flags.device_pointer,
(uint*)work_pool_wgs.device_pointer,
rtile.num_samples,
(float*)rtile.buffer);
}
}
return true;
}
SplitKernelFunction* CPUSplitKernel::get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&)
{
CPUSplitKernelFunction *kernel = new CPUSplitKernelFunction(device);
kernel->func = device->get_kernel_function<void(*)(KernelGlobals*, KernelData*)>(kernel_name);
if(!kernel->func) {
delete kernel;
return NULL;
}
return kernel;
}
int2 CPUSplitKernel::split_kernel_local_size()
{
return make_int2(1, 1);
}
int2 CPUSplitKernel::split_kernel_global_size(device_memory& /*kg*/, device_memory& /*data*/, DeviceTask * /*task*/) {
return make_int2(64, 1);
}
uint64_t CPUSplitKernel::state_buffer_size(device_memory& kernel_globals, device_memory& /*data*/, size_t num_threads) {
KernelGlobals *kg = (KernelGlobals*)kernel_globals.device_pointer;
return split_data_buffer_size(kg, num_threads);
}
unordered_map<string, void*> CPUDevice::kernel_functions;
Device *device_cpu_create(DeviceInfo& info, Stats &stats, bool background)
{
return new CPUDevice(info, stats, background);

View File

@@ -15,14 +15,12 @@
*/
#include <climits>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "device.h"
#include "device_intern.h"
#include "device_split_kernel.h"
#include "buffers.h"
@@ -44,8 +42,6 @@
#include "util_types.h"
#include "util_time.h"
#include "split/kernel_split_data_types.h"
CCL_NAMESPACE_BEGIN
#ifndef WITH_CUDA_DYNLOAD
@@ -82,31 +78,6 @@ int cuewCompilerVersion(void)
} /* namespace */
#endif /* WITH_CUDA_DYNLOAD */
class CUDADevice;
class CUDASplitKernel : public DeviceSplitKernel {
CUDADevice *device;
public:
explicit CUDASplitKernel(CUDADevice *device);
virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads);
virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
RenderTile& rtile,
int num_global_elements,
device_memory& kernel_globals,
device_memory& kernel_data_,
device_memory& split_data,
device_memory& ray_state,
device_memory& queue_index,
device_memory& use_queues_flag,
device_memory& work_pool_wgs);
virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&);
virtual int2 split_kernel_local_size();
virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task);
};
class CUDADevice : public Device
{
public:
@@ -144,12 +115,6 @@ public:
return path_exists(cubins_path);
}
virtual bool show_samples() const
{
/* The CUDADevice only processes one tile at a time, so showing samples is fine. */
return true;
}
/*#ifdef NDEBUG
#define cuda_abort()
#else
@@ -159,7 +124,7 @@ public:
{
if(first_error) {
fprintf(stderr, "\nRefer to the Cycles GPU rendering documentation for possible solutions:\n");
fprintf(stderr, "https://docs.blender.org/manual/en/dev/render/cycles/gpu_rendering.html\n\n");
fprintf(stderr, "http://www.blender.org/manual/render/cycles/gpu_rendering.html\n\n");
first_error = false;
}
}
@@ -287,16 +252,11 @@ public:
return DebugFlags().cuda.adaptive_compile;
}
bool use_split_kernel()
{
return DebugFlags().cuda.split_kernel;
}
/* Common NVCC flags which stays the same regardless of shading model,
* kernel sources md5 and only depends on compiler or compilation settings.
*/
string compile_kernel_get_common_cflags(
const DeviceRequestedFeatures& requested_features, bool split=false)
const DeviceRequestedFeatures& requested_features)
{
const int cuda_version = cuewCompilerVersion();
const int machine = system_cpu_bits();
@@ -321,11 +281,6 @@ public:
#ifdef WITH_CYCLES_DEBUG
cflags += " -D__KERNEL_DEBUG__";
#endif
if(split) {
cflags += " -D__SPLIT__";
}
return cflags;
}
@@ -359,7 +314,7 @@ public:
return true;
}
string compile_kernel(const DeviceRequestedFeatures& requested_features, bool split=false)
string compile_kernel(const DeviceRequestedFeatures& requested_features)
{
/* Compute cubin name. */
int major, minor;
@@ -368,8 +323,7 @@ public:
/* Attempt to use kernel provided with Blender. */
if(!use_adaptive_compilation()) {
const string cubin = path_get(string_printf(split ? "lib/kernel_split_sm_%d%d.cubin"
: "lib/kernel_sm_%d%d.cubin",
const string cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin",
major, minor));
VLOG(1) << "Testing for pre-compiled kernel " << cubin << ".";
if(path_exists(cubin)) {
@@ -379,7 +333,7 @@ public:
}
const string common_cflags =
compile_kernel_get_common_cflags(requested_features, split);
compile_kernel_get_common_cflags(requested_features);
/* Try to use locally compiled kernel. */
const string kernel_path = path_get("kernel");
@@ -390,8 +344,7 @@ public:
*/
const string cubin_md5 = util_md5_string(kernel_md5 + common_cflags);
const string cubin_file = string_printf(split ? "cycles_kernel_split_sm%d%d_%s.cubin"
: "cycles_kernel_sm%d%d_%s.cubin",
const string cubin_file = string_printf("cycles_kernel_sm%d%d_%s.cubin",
major, minor,
cubin_md5.c_str());
const string cubin = path_cache_get(path_join("kernels", cubin_file));
@@ -426,7 +379,7 @@ public:
const char *nvcc = cuewCompilerPath();
const string kernel = path_join(kernel_path,
path_join("kernels",
path_join("cuda", split ? "kernel_split.cu" : "kernel.cu")));
path_join("cuda", "kernel.cu")));
double starttime = time_dt();
printf("Compiling CUDA kernel ...\n");
@@ -474,7 +427,7 @@ public:
return false;
/* get kernel */
string cubin = compile_kernel(requested_features, use_split_kernel());
string cubin = compile_kernel(requested_features);
if(cubin == "")
return false;
@@ -507,14 +460,8 @@ public:
}
}
void mem_alloc(const char *name, device_memory& mem, MemoryType /*type*/)
void mem_alloc(device_memory& mem, MemoryType /*type*/)
{
if(name) {
VLOG(1) << "Buffer allocate: " << name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")";
}
cuda_push_context();
CUdeviceptr device_pointer;
size_t size = mem.memory_size();
@@ -551,9 +498,7 @@ public:
void mem_zero(device_memory& mem)
{
if(mem.data_pointer) {
memset((void*)mem.data_pointer, 0, mem.memory_size());
}
memset((void*)mem.data_pointer, 0, mem.memory_size());
cuda_push_context();
if(mem.device_pointer)
@@ -666,7 +611,7 @@ public:
/* Data Storage */
if(interpolation == INTERPOLATION_NONE) {
if(has_bindless_textures) {
mem_alloc(NULL, mem, MEM_READ_ONLY);
mem_alloc(mem, MEM_READ_ONLY);
mem_copy_to(mem);
cuda_push_context();
@@ -690,7 +635,7 @@ public:
cuda_pop_context();
}
else {
mem_alloc(NULL, mem, MEM_READ_ONLY);
mem_alloc(mem, MEM_READ_ONLY);
mem_copy_to(mem);
cuda_push_context();
@@ -1307,48 +1252,25 @@ public:
/* Upload Bindless Mapping */
load_bindless_mapping();
if(!use_split_kernel()) {
/* keep rendering tiles until done */
while(task->acquire_tile(this, tile)) {
int start_sample = tile.start_sample;
int end_sample = tile.start_sample + tile.num_samples;
for(int sample = start_sample; sample < end_sample; sample++) {
if(task->get_cancel()) {
if(task->need_finish_queue == false)
break;
}
path_trace(tile, sample, branched);
tile.sample = sample + 1;
task->update_progress(&tile, tile.w*tile.h);
}
task->release_tile(tile);
}
}
else {
DeviceRequestedFeatures requested_features;
if(!use_adaptive_compilation()) {
requested_features.max_closure = 64;
}
CUDASplitKernel split_kernel(this);
split_kernel.load_kernels(requested_features);
while(task->acquire_tile(this, tile)) {
device_memory void_buffer;
split_kernel.path_trace(task, tile, void_buffer, void_buffer);
task->release_tile(tile);
/* keep rendering tiles until done */
while(task->acquire_tile(this, tile)) {
int start_sample = tile.start_sample;
int end_sample = tile.start_sample + tile.num_samples;
for(int sample = start_sample; sample < end_sample; sample++) {
if(task->get_cancel()) {
if(task->need_finish_queue == false)
break;
}
path_trace(tile, sample, branched);
tile.sample = sample + 1;
task->update_progress(&tile);
}
task->release_tile(tile);
}
}
else if(task->type == DeviceTask::SHADER) {
@@ -1401,223 +1323,8 @@ public:
{
task_pool.cancel();
}
friend class CUDASplitKernelFunction;
friend class CUDASplitKernel;
};
/* redefine the cuda_assert macro so it can be used outside of the CUDADevice class
* now that the definition of that class is complete
*/
#undef cuda_assert
#define cuda_assert(stmt) \
{ \
CUresult result = stmt; \
\
if(result != CUDA_SUCCESS) { \
string message = string_printf("CUDA error: %s in %s", cuewErrorString(result), #stmt); \
if(device->error_msg == "") \
device->error_msg = message; \
fprintf(stderr, "%s\n", message.c_str()); \
/*cuda_abort();*/ \
device->cuda_error_documentation(); \
} \
} (void)0
/* split kernel */
class CUDASplitKernelFunction : public SplitKernelFunction{
CUDADevice* device;
CUfunction func;
public:
CUDASplitKernelFunction(CUDADevice *device, CUfunction func) : device(device), func(func) {}
/* enqueue the kernel, returns false if there is an error */
bool enqueue(const KernelDimensions &dim, device_memory &/*kg*/, device_memory &/*data*/)
{
return enqueue(dim, NULL);
}
/* enqueue the kernel, returns false if there is an error */
bool enqueue(const KernelDimensions &dim, void *args[])
{
device->cuda_push_context();
if(device->have_error())
return false;
/* we ignore dim.local_size for now, as this is faster */
int threads_per_block;
cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func));
int xthreads = (int)sqrt(threads_per_block);
int ythreads = (int)sqrt(threads_per_block);
int xblocks = (dim.global_size[0] + xthreads - 1)/xthreads;
int yblocks = (dim.global_size[1] + ythreads - 1)/ythreads;
cuda_assert(cuFuncSetCacheConfig(func, CU_FUNC_CACHE_PREFER_L1));
cuda_assert(cuLaunchKernel(func,
xblocks , yblocks, 1, /* blocks */
xthreads, ythreads, 1, /* threads */
0, 0, args, 0));
device->cuda_pop_context();
return !device->have_error();
}
};
CUDASplitKernel::CUDASplitKernel(CUDADevice *device) : DeviceSplitKernel(device), device(device)
{
}
uint64_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory& /*data*/, size_t num_threads)
{
device_vector<uint64_t> size_buffer;
size_buffer.resize(1);
device->mem_alloc(NULL, size_buffer, MEM_READ_WRITE);
device->cuda_push_context();
uint threads = num_threads;
CUdeviceptr d_size = device->cuda_device_ptr(size_buffer.device_pointer);
struct args_t {
uint* num_threads;
CUdeviceptr* size;
};
args_t args = {
&threads,
&d_size
};
CUfunction state_buffer_size;
cuda_assert(cuModuleGetFunction(&state_buffer_size, device->cuModule, "kernel_cuda_state_buffer_size"));
cuda_assert(cuLaunchKernel(state_buffer_size,
1, 1, 1,
1, 1, 1,
0, 0, &args, 0));
device->cuda_pop_context();
device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint64_t));
device->mem_free(size_buffer);
return *size_buffer.get_data();
}
bool CUDASplitKernel::enqueue_split_kernel_data_init(const KernelDimensions& dim,
RenderTile& rtile,
int num_global_elements,
device_memory& /*kernel_globals*/,
device_memory& /*kernel_data*/,
device_memory& split_data,
device_memory& ray_state,
device_memory& queue_index,
device_memory& use_queues_flag,
device_memory& work_pool_wgs)
{
device->cuda_push_context();
CUdeviceptr d_split_data = device->cuda_device_ptr(split_data.device_pointer);
CUdeviceptr d_ray_state = device->cuda_device_ptr(ray_state.device_pointer);
CUdeviceptr d_queue_index = device->cuda_device_ptr(queue_index.device_pointer);
CUdeviceptr d_use_queues_flag = device->cuda_device_ptr(use_queues_flag.device_pointer);
CUdeviceptr d_work_pool_wgs = device->cuda_device_ptr(work_pool_wgs.device_pointer);
CUdeviceptr d_rng_state = device->cuda_device_ptr(rtile.rng_state);
CUdeviceptr d_buffer = device->cuda_device_ptr(rtile.buffer);
int end_sample = rtile.start_sample + rtile.num_samples;
int queue_size = dim.global_size[0] * dim.global_size[1];
struct args_t {
CUdeviceptr* split_data_buffer;
int* num_elements;
CUdeviceptr* ray_state;
CUdeviceptr* rng_state;
int* start_sample;
int* end_sample;
int* sx;
int* sy;
int* sw;
int* sh;
int* offset;
int* stride;
CUdeviceptr* queue_index;
int* queuesize;
CUdeviceptr* use_queues_flag;
CUdeviceptr* work_pool_wgs;
int* num_samples;
CUdeviceptr* buffer;
};
args_t args = {
&d_split_data,
&num_global_elements,
&d_ray_state,
&d_rng_state,
&rtile.start_sample,
&end_sample,
&rtile.x,
&rtile.y,
&rtile.w,
&rtile.h,
&rtile.offset,
&rtile.stride,
&d_queue_index,
&queue_size,
&d_use_queues_flag,
&d_work_pool_wgs,
&rtile.num_samples,
&d_buffer
};
CUfunction data_init;
cuda_assert(cuModuleGetFunction(&data_init, device->cuModule, "kernel_cuda_path_trace_data_init"));
if(device->have_error()) {
return false;
}
CUDASplitKernelFunction(device, data_init).enqueue(dim, (void**)&args);
device->cuda_pop_context();
return !device->have_error();
}
SplitKernelFunction* CUDASplitKernel::get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&)
{
CUfunction func;
device->cuda_push_context();
cuda_assert(cuModuleGetFunction(&func, device->cuModule, (string("kernel_cuda_") + kernel_name).data()));
if(device->have_error()) {
device->cuda_error_message(string_printf("kernel \"kernel_cuda_%s\" not found in module", kernel_name.data()));
return NULL;
}
device->cuda_pop_context();
return new CUDASplitKernelFunction(device, func);
}
int2 CUDASplitKernel::split_kernel_local_size()
{
return make_int2(32, 1);
}
int2 CUDASplitKernel::split_kernel_global_size(device_memory& /*kg*/, device_memory& /*data*/, DeviceTask */*task*/)
{
/* TODO(mai): implement something here to detect ideal work size */
return make_int2(256, 256);
}
bool device_cuda_init(void)
{
#ifdef WITH_CUDA_DYNLOAD

View File

@@ -48,8 +48,7 @@ enum DataType {
TYPE_UINT,
TYPE_INT,
TYPE_FLOAT,
TYPE_HALF,
TYPE_UINT64,
TYPE_HALF
};
static inline size_t datatype_size(DataType datatype)
@@ -60,7 +59,6 @@ static inline size_t datatype_size(DataType datatype)
case TYPE_UINT: return sizeof(uint);
case TYPE_INT: return sizeof(int);
case TYPE_HALF: return sizeof(half);
case TYPE_UINT64: return sizeof(uint64_t);
default: return 0;
}
}
@@ -162,11 +160,6 @@ template<> struct device_type_traits<half4> {
static const int num_elements = 4;
};
template<> struct device_type_traits<uint64_t> {
static const DataType data_type = TYPE_UINT64;
static const int num_elements = 1;
};
/* Device Memory */
class device_memory
@@ -187,27 +180,10 @@ public:
/* device pointer */
device_ptr device_pointer;
device_memory()
{
data_type = device_type_traits<uchar>::data_type;
data_elements = device_type_traits<uchar>::num_elements;
data_pointer = 0;
data_size = 0;
device_size = 0;
data_width = 0;
data_height = 0;
data_depth = 0;
device_pointer = 0;
}
protected:
device_memory() {}
virtual ~device_memory() { assert(!device_pointer); }
void resize(size_t size)
{
data_size = size;
data_width = size;
}
protected:
/* no copying */
device_memory(const device_memory&);
device_memory& operator = (const device_memory&);
@@ -222,8 +198,16 @@ public:
{
data_type = device_type_traits<T>::data_type;
data_elements = device_type_traits<T>::num_elements;
data_pointer = 0;
data_size = 0;
device_size = 0;
data_width = 0;
data_height = 0;
data_depth = 0;
assert(data_elements > 0);
device_pointer = 0;
}
virtual ~device_vector() {}
@@ -282,7 +266,6 @@ public:
data_height = 0;
data_depth = 0;
data_size = 0;
device_pointer = 0;
}
size_t size()

View File

@@ -89,14 +89,6 @@ public:
return error_msg;
}
virtual bool show_samples() const
{
if(devices.size() > 1) {
return false;
}
return devices.front().device->show_samples();
}
bool load_kernels(const DeviceRequestedFeatures& requested_features)
{
foreach(SubDevice& sub, devices)
@@ -106,11 +98,11 @@ public:
return true;
}
void mem_alloc(const char *name, device_memory& mem, MemoryType type)
void mem_alloc(device_memory& mem, MemoryType type)
{
foreach(SubDevice& sub, devices) {
mem.device_pointer = 0;
sub.device->mem_alloc(name, mem, type);
sub.device->mem_alloc(mem, type);
sub.ptr_map[unique_ptr] = mem.device_pointer;
}
@@ -162,7 +154,6 @@ public:
void mem_free(device_memory& mem)
{
device_ptr tmp = mem.device_pointer;
stats.mem_free(mem.device_size);
foreach(SubDevice& sub, devices) {
mem.device_pointer = sub.ptr_map[tmp];
@@ -171,6 +162,7 @@ public:
}
mem.device_pointer = 0;
stats.mem_free(mem.device_size);
}
void const_copy_to(const char *name, void *host, size_t size)
@@ -202,7 +194,6 @@ public:
void tex_free(device_memory& mem)
{
device_ptr tmp = mem.device_pointer;
stats.mem_free(mem.device_size);
foreach(SubDevice& sub, devices) {
mem.device_pointer = sub.ptr_map[tmp];
@@ -211,6 +202,7 @@ public:
}
mem.device_pointer = 0;
stats.mem_free(mem.device_size);
}
void pixels_alloc(device_memory& mem)

View File

@@ -51,11 +51,6 @@ public:
thread_mutex rpc_lock;
virtual bool show_samples() const
{
return false;
}
NetworkDevice(DeviceInfo& info, Stats &stats, const char *address)
: Device(info, stats, true), socket(io_service)
{
@@ -87,14 +82,8 @@ public:
snd.write();
}
void mem_alloc(const char *name, device_memory& mem, MemoryType type)
void mem_alloc(device_memory& mem, MemoryType type)
{
if(name) {
VLOG(1) << "Buffer allocate: " << name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")";
}
thread_scoped_lock lock(rpc_lock);
mem.device_pointer = ++mem_counter;
@@ -487,7 +476,7 @@ protected:
mem.data_pointer = 0;
/* perform the allocation on the actual device */
device->mem_alloc(NULL, mem, type);
device->mem_alloc(mem, type);
/* store a mapping to/from client_pointer and real device pointer */
pointer_mapping_insert(client_pointer, mem.device_pointer);

View File

@@ -1,306 +0,0 @@
/*
* Copyright 2011-2016 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "device_split_kernel.h"
#include "kernel_types.h"
#include "kernel_split_data_types.h"
#include "util_time.h"
CCL_NAMESPACE_BEGIN
static const double alpha = 0.1; /* alpha for rolling average */
DeviceSplitKernel::DeviceSplitKernel(Device *device) : device(device)
{
current_max_closure = -1;
first_tile = true;
avg_time_per_sample = 0.0;
kernel_path_init = NULL;
kernel_scene_intersect = NULL;
kernel_lamp_emission = NULL;
kernel_do_volume = NULL;
kernel_queue_enqueue = NULL;
kernel_indirect_background = NULL;
kernel_shader_eval = NULL;
kernel_holdout_emission_blurring_pathtermination_ao = NULL;
kernel_subsurface_scatter = NULL;
kernel_direct_lighting = NULL;
kernel_shadow_blocked_ao = NULL;
kernel_shadow_blocked_dl = NULL;
kernel_next_iteration_setup = NULL;
kernel_indirect_subsurface = NULL;
kernel_buffer_update = NULL;
}
DeviceSplitKernel::~DeviceSplitKernel()
{
device->mem_free(split_data);
device->mem_free(ray_state);
device->mem_free(use_queues_flag);
device->mem_free(queue_index);
device->mem_free(work_pool_wgs);
delete kernel_path_init;
delete kernel_scene_intersect;
delete kernel_lamp_emission;
delete kernel_do_volume;
delete kernel_queue_enqueue;
delete kernel_indirect_background;
delete kernel_shader_eval;
delete kernel_holdout_emission_blurring_pathtermination_ao;
delete kernel_subsurface_scatter;
delete kernel_direct_lighting;
delete kernel_shadow_blocked_ao;
delete kernel_shadow_blocked_dl;
delete kernel_next_iteration_setup;
delete kernel_indirect_subsurface;
delete kernel_buffer_update;
}
bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_features)
{
#define LOAD_KERNEL(name) \
kernel_##name = get_split_kernel_function(#name, requested_features); \
if(!kernel_##name) { \
return false; \
}
LOAD_KERNEL(path_init);
LOAD_KERNEL(scene_intersect);
LOAD_KERNEL(lamp_emission);
LOAD_KERNEL(do_volume);
LOAD_KERNEL(queue_enqueue);
LOAD_KERNEL(indirect_background);
LOAD_KERNEL(shader_eval);
LOAD_KERNEL(holdout_emission_blurring_pathtermination_ao);
LOAD_KERNEL(subsurface_scatter);
LOAD_KERNEL(direct_lighting);
LOAD_KERNEL(shadow_blocked_ao);
LOAD_KERNEL(shadow_blocked_dl);
LOAD_KERNEL(next_iteration_setup);
LOAD_KERNEL(indirect_subsurface);
LOAD_KERNEL(buffer_update);
#undef LOAD_KERNEL
current_max_closure = requested_features.max_closure;
return true;
}
size_t DeviceSplitKernel::max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, uint64_t max_buffer_size)
{
uint64_t size_per_element = state_buffer_size(kg, data, 1024) / 1024;
return max_buffer_size / size_per_element;
}
bool DeviceSplitKernel::path_trace(DeviceTask *task,
RenderTile& tile,
device_memory& kgbuffer,
device_memory& kernel_data)
{
if(device->have_error()) {
return false;
}
/* Get local size */
size_t local_size[2];
{
int2 lsize = split_kernel_local_size();
local_size[0] = lsize[0];
local_size[1] = lsize[1];
}
/* Set gloabl size */
size_t global_size[2];
{
int2 gsize = split_kernel_global_size(kgbuffer, kernel_data, task);
/* Make sure that set work size is a multiple of local
* work size dimensions.
*/
global_size[0] = round_up(gsize[0], local_size[0]);
global_size[1] = round_up(gsize[1], local_size[1]);
}
/* Number of elements in the global state buffer */
int num_global_elements = global_size[0] * global_size[1];
assert(num_global_elements % WORK_POOL_SIZE == 0);
/* Allocate all required global memory once. */
if(first_tile) {
first_tile = false;
/* Calculate max groups */
/* Denotes the maximum work groups possible w.r.t. current requested tile size. */
unsigned int max_work_groups = num_global_elements / WORK_POOL_SIZE + 1;
/* Allocate work_pool_wgs memory. */
work_pool_wgs.resize(max_work_groups * sizeof(unsigned int));
device->mem_alloc("work_pool_wgs", work_pool_wgs, MEM_READ_WRITE);
queue_index.resize(NUM_QUEUES * sizeof(int));
device->mem_alloc("queue_index", queue_index, MEM_READ_WRITE);
use_queues_flag.resize(sizeof(char));
device->mem_alloc("use_queues_flag", use_queues_flag, MEM_READ_WRITE);
ray_state.resize(num_global_elements);
device->mem_alloc("ray_state", ray_state, MEM_READ_WRITE);
split_data.resize(state_buffer_size(kgbuffer, kernel_data, num_global_elements));
device->mem_alloc("split_data", split_data, MEM_READ_WRITE);
}
#define ENQUEUE_SPLIT_KERNEL(name, global_size, local_size) \
if(device->have_error()) { \
return false; \
} \
if(!kernel_##name->enqueue(KernelDimensions(global_size, local_size), kgbuffer, kernel_data)) { \
return false; \
}
tile.sample = tile.start_sample;
/* for exponential increase between tile updates */
int time_multiplier = 1;
while(tile.sample < tile.start_sample + tile.num_samples) {
/* to keep track of how long it takes to run a number of samples */
double start_time = time_dt();
/* initial guess to start rolling average */
const int initial_num_samples = 1;
/* approx number of samples per second */
int samples_per_second = (avg_time_per_sample > 0.0) ?
int(double(time_multiplier) / avg_time_per_sample) + 1 : initial_num_samples;
RenderTile subtile = tile;
subtile.start_sample = tile.sample;
subtile.num_samples = min(samples_per_second, tile.start_sample + tile.num_samples - tile.sample);
if(device->have_error()) {
return false;
}
/* reset state memory here as global size for data_init
* kernel might not be large enough to do in kernel
*/
device->mem_zero(work_pool_wgs);
device->mem_zero(split_data);
device->mem_zero(ray_state);
if(!enqueue_split_kernel_data_init(KernelDimensions(global_size, local_size),
subtile,
num_global_elements,
kgbuffer,
kernel_data,
split_data,
ray_state,
queue_index,
use_queues_flag,
work_pool_wgs))
{
return false;
}
ENQUEUE_SPLIT_KERNEL(path_init, global_size, local_size);
bool activeRaysAvailable = true;
while(activeRaysAvailable) {
/* Do path-iteration in host [Enqueue Path-iteration kernels. */
for(int PathIter = 0; PathIter < 16; PathIter++) {
ENQUEUE_SPLIT_KERNEL(scene_intersect, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(lamp_emission, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(do_volume, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(indirect_background, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(subsurface_scatter, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shadow_blocked_ao, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shadow_blocked_dl, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(next_iteration_setup, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(indirect_subsurface, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(buffer_update, global_size, local_size);
if(task->get_cancel()) {
return true;
}
}
/* Decide if we should exit path-iteration in host. */
device->mem_copy_from(ray_state, 0, global_size[0] * global_size[1] * sizeof(char), 1, 1);
activeRaysAvailable = false;
for(int rayStateIter = 0; rayStateIter < global_size[0] * global_size[1]; ++rayStateIter) {
int8_t state = ray_state.get_data()[rayStateIter];
if(state != RAY_INACTIVE) {
if(state == RAY_INVALID) {
/* Something went wrong, abort to avoid looping endlessly. */
device->set_error("Split kernel error: invalid ray state");
return false;
}
/* Not all rays are RAY_INACTIVE. */
activeRaysAvailable = true;
break;
}
}
if(task->get_cancel()) {
return true;
}
}
double time_per_sample = ((time_dt()-start_time) / subtile.num_samples);
if(avg_time_per_sample == 0.0) {
/* start rolling average */
avg_time_per_sample = time_per_sample;
}
else {
avg_time_per_sample = alpha*time_per_sample + (1.0-alpha)*avg_time_per_sample;
}
#undef ENQUEUE_SPLIT_KERNEL
tile.sample += subtile.num_samples;
task->update_progress(&tile, tile.w*tile.h*subtile.num_samples);
time_multiplier = min(time_multiplier << 1, 10);
if(task->get_cancel()) {
return true;
}
}
return true;
}
CCL_NAMESPACE_END

View File

@@ -1,132 +0,0 @@
/*
* Copyright 2011-2016 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __DEVICE_SPLIT_KERNEL_H__
#define __DEVICE_SPLIT_KERNEL_H__
#include "device.h"
#include "buffers.h"
CCL_NAMESPACE_BEGIN
/* When allocate global memory in chunks. We may not be able to
* allocate exactly "CL_DEVICE_MAX_MEM_ALLOC_SIZE" bytes in chunks;
* Since some bytes may be needed for aligning chunks of memory;
* This is the amount of memory that we dedicate for that purpose.
*/
#define DATA_ALLOCATION_MEM_FACTOR 5000000 //5MB
/* Types used for split kernel */
class KernelDimensions {
public:
size_t global_size[2];
size_t local_size[2];
KernelDimensions(size_t global_size_[2], size_t local_size_[2])
{
memcpy(global_size, global_size_, sizeof(global_size));
memcpy(local_size, local_size_, sizeof(local_size));
}
};
class SplitKernelFunction {
public:
virtual ~SplitKernelFunction() {}
/* enqueue the kernel, returns false if there is an error */
virtual bool enqueue(const KernelDimensions& dim, device_memory& kg, device_memory& data) = 0;
};
class DeviceSplitKernel {
private:
Device *device;
SplitKernelFunction *kernel_path_init;
SplitKernelFunction *kernel_scene_intersect;
SplitKernelFunction *kernel_lamp_emission;
SplitKernelFunction *kernel_do_volume;
SplitKernelFunction *kernel_queue_enqueue;
SplitKernelFunction *kernel_indirect_background;
SplitKernelFunction *kernel_shader_eval;
SplitKernelFunction *kernel_holdout_emission_blurring_pathtermination_ao;
SplitKernelFunction *kernel_subsurface_scatter;
SplitKernelFunction *kernel_direct_lighting;
SplitKernelFunction *kernel_shadow_blocked_ao;
SplitKernelFunction *kernel_shadow_blocked_dl;
SplitKernelFunction *kernel_next_iteration_setup;
SplitKernelFunction *kernel_indirect_subsurface;
SplitKernelFunction *kernel_buffer_update;
/* Global memory variables [porting]; These memory is used for
* co-operation between different kernels; Data written by one
* kernel will be available to another kernel via this global
* memory.
*/
device_memory split_data;
device_vector<uchar> ray_state;
device_memory queue_index; /* Array of size num_queues * sizeof(int) that tracks the size of each queue. */
/* Flag to make sceneintersect and lampemission kernel use queues. */
device_memory use_queues_flag;
/* Approximate time it takes to complete one sample */
double avg_time_per_sample;
/* Work pool with respect to each work group. */
device_memory work_pool_wgs;
/* clos_max value for which the kernels have been loaded currently. */
int current_max_closure;
/* Marked True in constructor and marked false at the end of path_trace(). */
bool first_tile;
public:
explicit DeviceSplitKernel(Device* device);
virtual ~DeviceSplitKernel();
bool load_kernels(const DeviceRequestedFeatures& requested_features);
bool path_trace(DeviceTask *task,
RenderTile& rtile,
device_memory& kgbuffer,
device_memory& kernel_data);
virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) = 0;
size_t max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, uint64_t max_buffer_size);
virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
RenderTile& rtile,
int num_global_elements,
device_memory& kernel_globals,
device_memory& kernel_data_,
device_memory& split_data,
device_memory& ray_state,
device_memory& queue_index,
device_memory& use_queues_flag,
device_memory& work_pool_wgs) = 0;
virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&) = 0;
virtual int2 split_kernel_local_size() = 0;
virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task) = 0;
};
CCL_NAMESPACE_END
#endif /* __DEVICE_SPLIT_KERNEL_H__ */

View File

@@ -19,8 +19,6 @@
#include "device_task.h"
#include "buffers.h"
#include "util_algorithm.h"
#include "util_time.h"
@@ -101,18 +99,14 @@ void DeviceTask::split(list<DeviceTask>& tasks, int num, int max_size)
}
}
void DeviceTask::update_progress(RenderTile *rtile, int pixel_samples)
void DeviceTask::update_progress(RenderTile *rtile)
{
if((type != PATH_TRACE) &&
(type != SHADER))
return;
if(update_progress_sample) {
if(pixel_samples == -1) {
pixel_samples = shader_w;
}
update_progress_sample(pixel_samples, rtile? rtile->sample : 0);
}
if(update_progress_sample)
update_progress_sample();
if(update_tile_sample) {
double current_time = time_dt();

View File

@@ -51,17 +51,15 @@ public:
int shader_filter;
int shader_x, shader_w;
int passes_size;
explicit DeviceTask(Type type = PATH_TRACE);
int get_subtask_count(int num, int max_size = 0);
void split(list<DeviceTask>& tasks, int num, int max_size = 0);
void update_progress(RenderTile *rtile, int pixel_samples = -1);
void update_progress(RenderTile *rtile);
function<bool(Device *device, RenderTile&)> acquire_tile;
function<void(long, int)> update_progress_sample;
function<void(void)> update_progress_sample;
function<void(RenderTile&)> update_tile_sample;
function<void(RenderTile&)> release_tile;
function<bool(void)> get_cancel;

View File

@@ -26,30 +26,30 @@
CCL_NAMESPACE_BEGIN
/* Define CYCLES_DISABLE_DRIVER_WORKAROUNDS to disable workaounds for testing */
#ifndef CYCLES_DISABLE_DRIVER_WORKAROUNDS
/* Work around AMD driver hangs by ensuring each command is finished before doing anything else. */
# undef clEnqueueNDRangeKernel
# define clEnqueueNDRangeKernel(a, b, c, d, e, f, g, h, i) \
clFinish(a); \
CLEW_GET_FUN(__clewEnqueueNDRangeKernel)(a, b, c, d, e, f, g, h, i); \
clFinish(a);
# undef clEnqueueWriteBuffer
# define clEnqueueWriteBuffer(a, b, c, d, e, f, g, h, i) \
clFinish(a); \
CLEW_GET_FUN(__clewEnqueueWriteBuffer)(a, b, c, d, e, f, g, h, i); \
clFinish(a);
# undef clEnqueueReadBuffer
# define clEnqueueReadBuffer(a, b, c, d, e, f, g, h, i) \
clFinish(a); \
CLEW_GET_FUN(__clewEnqueueReadBuffer)(a, b, c, d, e, f, g, h, i); \
clFinish(a);
#endif /* CYCLES_DISABLE_DRIVER_WORKAROUNDS */
#define CL_MEM_PTR(p) ((cl_mem)(uintptr_t)(p))
/* Macro declarations used with split kernel */
/* Macro to enable/disable work-stealing */
#define __WORK_STEALING__
#define SPLIT_KERNEL_LOCAL_SIZE_X 64
#define SPLIT_KERNEL_LOCAL_SIZE_Y 1
/* This value may be tuned according to the scene we are rendering.
*
* Modifying PATH_ITER_INC_FACTOR value proportional to number of expected
* ray-bounces will improve performance.
*/
#define PATH_ITER_INC_FACTOR 8
/* When allocate global memory in chunks. We may not be able to
* allocate exactly "CL_DEVICE_MAX_MEM_ALLOC_SIZE" bytes in chunks;
* Since some bytes may be needed for aligning chunks of memory;
* This is the amount of memory that we dedicate for that purpose.
*/
#define DATA_ALLOCATION_MEM_FACTOR 5000000 //5MB
struct OpenCLPlatformDevice {
OpenCLPlatformDevice(cl_platform_id platform_id,
const string& platform_name,
@@ -90,7 +90,6 @@ public:
cl_device_id device_id);
static void get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices,
bool force_all = false);
static bool use_single_program();
};
/* Thread safe cache for contexts and programs.
@@ -249,7 +248,6 @@ public:
bool device_initialized;
string platform_name;
string device_name;
bool opencl_error(cl_int err);
void opencl_error(const string& message);
@@ -268,10 +266,10 @@ public:
/* Has to be implemented by the real device classes.
* The base device will then load all these programs. */
virtual bool load_kernels(const DeviceRequestedFeatures& requested_features,
virtual void load_kernels(const DeviceRequestedFeatures& requested_features,
vector<OpenCLProgram*> &programs) = 0;
void mem_alloc(const char *name, device_memory& mem, MemoryType type);
void mem_alloc(device_memory& mem, MemoryType type);
void mem_copy_to(device_memory& mem);
void mem_copy_from(device_memory& mem, int y, int w, int h, int elem);
void mem_zero(device_memory& mem);
@@ -328,39 +326,16 @@ protected:
class ArgumentWrapper {
public:
ArgumentWrapper() : size(0), pointer(NULL)
{
}
ArgumentWrapper(device_memory& argument) : size(sizeof(void*)),
pointer((void*)(&argument.device_pointer))
{
}
template<typename T>
ArgumentWrapper(device_vector<T>& argument) : size(sizeof(void*)),
pointer((void*)(&argument.device_pointer))
{
}
template<typename T>
ArgumentWrapper() : size(0), pointer(NULL) {}
template <typename T>
ArgumentWrapper(T& argument) : size(sizeof(argument)),
pointer(&argument)
{
}
pointer(&argument) { }
ArgumentWrapper(int argument) : size(sizeof(int)),
int_value(argument),
pointer(&int_value)
{
}
pointer(&int_value) { }
ArgumentWrapper(float argument) : size(sizeof(float)),
float_value(argument),
pointer(&float_value)
{
}
pointer(&float_value) { }
size_t size;
int int_value;
float float_value;

View File

@@ -82,10 +82,9 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool backgrou
cpPlatform = platform_device.platform_id;
cdDevice = platform_device.device_id;
platform_name = platform_device.platform_name;
device_name = platform_device.device_name;
VLOG(2) << "Creating new Cycles device for OpenCL platform "
<< platform_name << ", device "
<< device_name << ".";
<< platform_device.device_name << ".";
{
/* try to use cached context */
@@ -114,16 +113,12 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool backgrou
}
cqCommandQueue = clCreateCommandQueue(cxContext, cdDevice, 0, &ciErr);
if(opencl_error(ciErr)) {
opencl_error("OpenCL: Error creating command queue");
if(opencl_error(ciErr))
return;
}
null_mem = (device_ptr)clCreateBuffer(cxContext, CL_MEM_READ_ONLY, 1, NULL, &ciErr);
if(opencl_error(ciErr)) {
opencl_error("OpenCL: Error creating memory buffer for NULL");
if(opencl_error(ciErr))
return;
}
fprintf(stderr, "Device init success\n");
device_initialized = true;
@@ -196,8 +191,6 @@ string OpenCLDeviceBase::device_md5_hash(string kernel_custom_build_options)
bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_features)
{
VLOG(2) << "Loading kernels for platform " << platform_name
<< ", device " << device_name << ".";
/* Verify if device was initialized. */
if(!device_initialized) {
fprintf(stderr, "OpenCL: failed to initialize device.\n");
@@ -213,14 +206,11 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
base_program.add_kernel(ustring("convert_to_half_float"));
base_program.add_kernel(ustring("shader"));
base_program.add_kernel(ustring("bake"));
base_program.add_kernel(ustring("zero_buffer"));
vector<OpenCLProgram*> programs;
programs.push_back(&base_program);
/* Call actual class to fill the vector with its programs. */
if(!load_kernels(requested_features, programs)) {
return false;
}
load_kernels(requested_features, programs);
/* Parallel compilation is supported by Cycles, but currently all OpenCL frameworks
* serialize the calls internally, so it's not much use right now.
@@ -252,14 +242,8 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
return true;
}
void OpenCLDeviceBase::mem_alloc(const char *name, device_memory& mem, MemoryType type)
void OpenCLDeviceBase::mem_alloc(device_memory& mem, MemoryType type)
{
if(name) {
VLOG(1) << "Buffer allocate: " << name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")";
}
size_t size = mem.memory_size();
cl_mem_flags mem_flag;
@@ -327,61 +311,8 @@ void OpenCLDeviceBase::mem_copy_from(device_memory& mem, int y, int w, int h, in
void OpenCLDeviceBase::mem_zero(device_memory& mem)
{
if(mem.device_pointer) {
if(base_program.is_loaded()) {
cl_kernel ckZeroBuffer = base_program(ustring("zero_buffer"));
size_t global_size[] = {1024, 1024};
size_t num_threads = global_size[0] * global_size[1];
cl_mem d_buffer = CL_MEM_PTR(mem.device_pointer);
cl_ulong d_offset = 0;
cl_ulong d_size = 0;
while(d_offset < mem.memory_size()) {
d_size = std::min<cl_ulong>(num_threads*sizeof(float4), mem.memory_size() - d_offset);
kernel_set_args(ckZeroBuffer, 0, d_buffer, d_size, d_offset);
ciErr = clEnqueueNDRangeKernel(cqCommandQueue,
ckZeroBuffer,
2,
NULL,
global_size,
NULL,
0,
NULL,
NULL);
opencl_assert_err(ciErr, "clEnqueueNDRangeKernel");
d_offset += d_size;
}
}
if(mem.data_pointer) {
memset((void*)mem.data_pointer, 0, mem.memory_size());
}
if(!base_program.is_loaded()) {
void* zero = (void*)mem.data_pointer;
if(!mem.data_pointer) {
zero = util_aligned_malloc(mem.memory_size(), 16);
memset(zero, 0, mem.memory_size());
}
opencl_assert(clEnqueueWriteBuffer(cqCommandQueue,
CL_MEM_PTR(mem.device_pointer),
CL_TRUE,
0,
mem.memory_size(),
zero,
0,
NULL, NULL));
if(!mem.data_pointer) {
util_aligned_free(zero);
}
}
memset((void*)mem.data_pointer, 0, mem.memory_size());
mem_copy_to(mem);
}
}
@@ -406,7 +337,7 @@ void OpenCLDeviceBase::const_copy_to(const char *name, void *host, size_t size)
device_vector<uchar> *data = new device_vector<uchar>();
data->copy((uchar*)host, size);
mem_alloc(name, *data, MEM_READ_ONLY);
mem_alloc(*data, MEM_READ_ONLY);
i = const_mem_map.insert(ConstMemMap::value_type(name, data)).first;
}
else {
@@ -425,7 +356,7 @@ void OpenCLDeviceBase::tex_alloc(const char *name,
VLOG(1) << "Texture allocate: " << name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")";
mem_alloc(NULL, mem, MEM_READ_ONLY);
mem_alloc(mem, MEM_READ_ONLY);
mem_copy_to(mem);
assert(mem_map.find(name) == mem_map.end());
mem_map.insert(MemMap::value_type(name, mem.device_pointer));

View File

@@ -39,16 +39,11 @@ public:
{
}
virtual bool show_samples() const {
return true;
}
virtual bool load_kernels(const DeviceRequestedFeatures& /*requested_features*/,
virtual void load_kernels(const DeviceRequestedFeatures& /*requested_features*/,
vector<OpenCLProgram*> &programs)
{
path_trace_program.add_kernel(ustring("path_trace"));
programs.push_back(&path_trace_program);
return true;
}
~OpenCLDeviceMegaKernel()
@@ -125,7 +120,7 @@ public:
tile.sample = sample + 1;
task->update_progress(&tile, tile.w*tile.h);
task->update_progress(&tile);
}
/* Complete kernel execution before release tile */

File diff suppressed because it is too large Load Diff

View File

@@ -19,7 +19,6 @@
#include "opencl.h"
#include "util_logging.h"
#include "util_md5.h"
#include "util_path.h"
#include "util_time.h"
@@ -310,8 +309,6 @@ bool OpenCLDeviceBase::OpenCLProgram::build_kernel(const string *debug_src)
string build_options;
build_options = device->kernel_build_options(debug_src) + kernel_build_options;
VLOG(1) << "Build options passed to clBuildProgram: '"
<< build_options << "'.";
cl_int ciErr = clBuildProgram(program, 0, NULL, build_options.c_str(), NULL, NULL);
/* show warnings even if build is successful */
@@ -339,13 +336,12 @@ bool OpenCLDeviceBase::OpenCLProgram::build_kernel(const string *debug_src)
bool OpenCLDeviceBase::OpenCLProgram::compile_kernel(const string *debug_src)
{
string source = "#include \"kernels/opencl/" + kernel_file + "\"\n";
string source = "#include \"kernels/opencl/" + kernel_file + "\" // " + OpenCLCache::get_kernel_md5() + "\n";
/* We compile kernels consisting of many files. unfortunately OpenCL
* kernel caches do not seem to recognize changes in included files.
* so we force recompile on changes by adding the md5 hash of all files.
*/
source = path_source_replace_includes(source, path_get("kernel"));
source += "\n// " + util_md5_string(source) + "\n";
if(debug_src) {
path_write_text(*debug_src, source);
@@ -356,10 +352,10 @@ bool OpenCLDeviceBase::OpenCLProgram::compile_kernel(const string *debug_src)
cl_int ciErr;
program = clCreateProgramWithSource(device->cxContext,
1,
&source_str,
&source_len,
&ciErr);
1,
&source_str,
&source_len,
&ciErr);
if(ciErr != CL_SUCCESS) {
add_error(string("OpenCL program creation failed: ") + clewErrorString(ciErr));
@@ -442,11 +438,7 @@ void OpenCLDeviceBase::OpenCLProgram::load()
if(!program) {
add_log(string("OpenCL program ") + program_name + " not found in cache.", true);
/* need to create source to get md5 */
string source = "#include \"kernels/opencl/" + kernel_file + "\"\n";
source = path_source_replace_includes(source, path_get("kernel"));
string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" + util_md5_string(source);
string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" + OpenCLCache::get_kernel_md5();
basename = path_cache_get(path_join("kernels", basename));
string clbin = basename + ".clbin";
@@ -552,11 +544,6 @@ bool OpenCLInfo::use_debug()
return DebugFlags().opencl.debug;
}
bool OpenCLInfo::use_single_program()
{
return DebugFlags().opencl.single_program;
}
bool OpenCLInfo::kernel_use_advanced_shading(const string& platform)
{
/* keep this in sync with kernel_types.h! */
@@ -605,19 +592,6 @@ bool OpenCLInfo::device_supported(const string& platform_name,
sizeof(cl_device_type),
&device_type,
NULL);
char device_name[1024] = "\0";
clGetDeviceInfo(device_id,
CL_DEVICE_NAME,
sizeof(device_name),
&device_name,
NULL);
/* It is possible tyo have Iris GPU on AMD/Apple OpenCL framework
* (aka, it will not be on Intel framework). This isn't supported
* and needs an explicit blacklist.
*/
if(strstr(device_name, "Iris")) {
return false;
}
if(platform_name == "AMD Accelerated Parallel Processing" &&
device_type == CL_DEVICE_TYPE_GPU)
{
@@ -774,10 +748,10 @@ void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices
num_devices = 0;
cl_int ciErr;
if((ciErr = clGetDeviceIDs(platform_id,
device_type,
0,
NULL,
&num_devices)) != CL_SUCCESS || num_devices == 0)
device_type,
0,
NULL,
&num_devices)) != CL_SUCCESS || num_devices == 0)
{
FIRST_VLOG(2) << "Ignoring platform " << platform_name
<< ", failed to fetch number of devices: " << string(clewErrorString(ciErr));

View File

@@ -13,28 +13,19 @@ set(INC_SYS
set(SRC
kernels/cpu/kernel.cpp
kernels/cpu/kernel_split.cpp
kernels/opencl/kernel.cl
kernels/opencl/kernel_state_buffer_size.cl
kernels/opencl/kernel_split.cl
kernels/opencl/kernel_data_init.cl
kernels/opencl/kernel_path_init.cl
kernels/opencl/kernel_queue_enqueue.cl
kernels/opencl/kernel_scene_intersect.cl
kernels/opencl/kernel_lamp_emission.cl
kernels/opencl/kernel_do_volume.cl
kernels/opencl/kernel_indirect_background.cl
kernels/opencl/kernel_background_buffer_update.cl
kernels/opencl/kernel_shader_eval.cl
kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
kernels/opencl/kernel_subsurface_scatter.cl
kernels/opencl/kernel_direct_lighting.cl
kernels/opencl/kernel_shadow_blocked_ao.cl
kernels/opencl/kernel_shadow_blocked_dl.cl
kernels/opencl/kernel_shadow_blocked.cl
kernels/opencl/kernel_next_iteration_setup.cl
kernels/opencl/kernel_indirect_subsurface.cl
kernels/opencl/kernel_buffer_update.cl
kernels/opencl/kernel_sum_all_radiance.cl
kernels/cuda/kernel.cu
kernels/cuda/kernel_split.cu
)
set(SRC_BVH_HEADERS
@@ -77,7 +68,6 @@ set(SRC_HEADERS
kernel_path_common.h
kernel_path_state.h
kernel_path_surface.h
kernel_path_subsurface.h
kernel_path_volume.h
kernel_projection.h
kernel_queues.h
@@ -98,10 +88,6 @@ set(SRC_KERNELS_CPU_HEADERS
kernels/cpu/kernel_cpu_image.h
)
set(SRC_KERNELS_CUDA_HEADERS
kernels/cuda/kernel_config.h
)
set(SRC_CLOSURE_HEADERS
closure/alloc.h
closure/bsdf.h
@@ -178,8 +164,6 @@ set(SRC_GEOM_HEADERS
geom/geom_curve.h
geom/geom_motion_curve.h
geom/geom_motion_triangle.h
geom/geom_motion_triangle_intersect.h
geom/geom_motion_triangle_shader.h
geom/geom_object.h
geom/geom_patch.h
geom/geom_primitive.h
@@ -203,25 +187,17 @@ set(SRC_UTIL_HEADERS
)
set(SRC_SPLIT_HEADERS
split/kernel_buffer_update.h
split/kernel_background_buffer_update.h
split/kernel_data_init.h
split/kernel_direct_lighting.h
split/kernel_do_volume.h
split/kernel_holdout_emission_blurring_pathtermination_ao.h
split/kernel_indirect_background.h
split/kernel_indirect_subsurface.h
split/kernel_lamp_emission.h
split/kernel_next_iteration_setup.h
split/kernel_path_init.h
split/kernel_queue_enqueue.h
split/kernel_scene_intersect.h
split/kernel_shader_eval.h
split/kernel_shadow_blocked_ao.h
split/kernel_shadow_blocked_dl.h
split/kernel_shadow_blocked.h
split/kernel_split_common.h
split/kernel_split_data.h
split/kernel_split_data_types.h
split/kernel_subsurface_scatter.h
split/kernel_sum_all_radiance.h
)
# CUDA module
@@ -249,9 +225,8 @@ if(WITH_CYCLES_CUDA_BINARIES)
endif()
# build for each arch
set(cuda_sources kernels/cuda/kernel.cu kernels/cuda/kernel_split.cu
set(cuda_sources kernels/cuda/kernel.cu
${SRC_HEADERS}
${SRC_KERNELS_CUDA_HEADERS}
${SRC_BVH_HEADERS}
${SRC_SVM_HEADERS}
${SRC_GEOM_HEADERS}
@@ -260,22 +235,15 @@ if(WITH_CYCLES_CUDA_BINARIES)
)
set(cuda_cubins)
macro(CYCLES_CUDA_KERNEL_ADD arch split experimental)
if(${split})
set(cuda_extra_flags "-D__SPLIT__")
set(cuda_cubin kernel_split)
macro(CYCLES_CUDA_KERNEL_ADD arch experimental)
if(${experimental})
set(cuda_extra_flags "-D__KERNEL_EXPERIMENTAL__")
set(cuda_cubin kernel_experimental_${arch}.cubin)
else()
set(cuda_extra_flags "")
set(cuda_cubin kernel)
set(cuda_cubin kernel_${arch}.cubin)
endif()
if(${experimental})
set(cuda_extra_flags ${cuda_extra_flags} -D__KERNEL_EXPERIMENTAL__)
set(cuda_cubin ${cuda_cubin}_experimental)
endif()
set(cuda_cubin ${cuda_cubin}_${arch}.cubin)
if(WITH_CYCLES_DEBUG)
set(cuda_debug_flags "-D__KERNEL_DEBUG__")
else()
@@ -288,19 +256,13 @@ if(WITH_CYCLES_CUDA_BINARIES)
set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${cuda_nvcc_version}")
set(cuda_math_flags "--use_fast_math")
if(split)
set(cuda_kernel_src "/kernels/cuda/kernel_split.cu")
else()
set(cuda_kernel_src "/kernels/cuda/kernel.cu")
endif()
add_custom_command(
OUTPUT ${cuda_cubin}
COMMAND ${cuda_nvcc_command}
-arch=${arch}
${CUDA_NVCC_FLAGS}
-m${CUDA_BITS}
--cubin ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
--cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda/kernel.cu
-o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin}
--ptxas-options="-v"
${cuda_arch_flags}
@@ -327,12 +289,7 @@ if(WITH_CYCLES_CUDA_BINARIES)
foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
# Compile regular kernel
CYCLES_CUDA_KERNEL_ADD(${arch} FALSE FALSE)
if(WITH_CYCLES_CUDA_SPLIT_KERNEL_BINARIES)
# Compile split kernel
CYCLES_CUDA_KERNEL_ADD(${arch} TRUE FALSE)
endif()
CYCLES_CUDA_KERNEL_ADD(${arch} FALSE)
endforeach()
add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins})
@@ -355,42 +312,31 @@ if(CXX_HAS_SSE)
kernels/cpu/kernel_sse2.cpp
kernels/cpu/kernel_sse3.cpp
kernels/cpu/kernel_sse41.cpp
kernels/cpu/kernel_split_sse2.cpp
kernels/cpu/kernel_split_sse3.cpp
kernels/cpu/kernel_split_sse41.cpp
)
set_source_files_properties(kernels/cpu/kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
endif()
if(CXX_HAS_AVX)
list(APPEND SRC
kernels/cpu/kernel_avx.cpp
kernels/cpu/kernel_split_avx.cpp
)
set_source_files_properties(kernels/cpu/kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
endif()
if(CXX_HAS_AVX2)
list(APPEND SRC
kernels/cpu/kernel_avx2.cpp
kernels/cpu/kernel_split_avx2.cpp
)
set_source_files_properties(kernels/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
endif()
add_library(cycles_kernel
${SRC}
${SRC_HEADERS}
${SRC_KERNELS_CPU_HEADERS}
${SRC_KERNELS_CUDA_HEADERS}
${SRC_BVH_HEADERS}
${SRC_CLOSURE_HEADERS}
${SRC_SVM_HEADERS}
@@ -413,28 +359,19 @@ endif()
#delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_state_buffer_size.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_split.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_data_init.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_path_init.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_queue_enqueue.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_scene_intersect.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_lamp_emission.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_do_volume.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_background.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_background_buffer_update.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shader_eval.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_subsurface_scatter.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_direct_lighting.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked_ao.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked_dl.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_next_iteration_setup.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_subsurface.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_buffer_update.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_sum_all_radiance.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel.cu" ${CYCLES_INSTALL_PATH}/kernel/kernels/cuda)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel_split.cu" ${CYCLES_INSTALL_PATH}/kernel/kernels/cuda)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNELS_CUDA_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/kernels/cuda)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_BVH_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/bvh)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CLOSURE_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/closure)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SVM_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/svm)

View File

@@ -357,7 +357,7 @@ ccl_device_inline float3 ray_offset(float3 P, float3 Ng)
#endif
}
#if defined(__VOLUME_RECORD_ALL__) || (defined(__SHADOW_RECORD_ALL__) && defined(__KERNEL_CPU__))
#if defined(__SHADOW_RECORD_ALL__) || defined (__VOLUME_RECORD_ALL__)
/* ToDo: Move to another file? */
ccl_device int intersections_compare(const void *a, const void *b)
{
@@ -373,28 +373,5 @@ ccl_device int intersections_compare(const void *a, const void *b)
}
#endif
#if defined(__SHADOW_RECORD_ALL__)
ccl_device_inline void sort_intersections(Intersection *hits, uint num_hits)
{
#ifdef __KERNEL_GPU__
/* Use bubble sort which has more friendly memory pattern on GPU. */
bool swapped;
do {
swapped = false;
for(int j = 0; j < num_hits - 1; ++j) {
if(hits[j].t > hits[j + 1].t) {
struct Intersection tmp = hits[j];
hits[j] = hits[j + 1];
hits[j + 1] = tmp;
swapped = true;
}
}
--num_hits;
} while(swapped);
#else
qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
#endif
}
#endif /* __SHADOW_RECORD_ALL__ | __VOLUME_RECORD_ALL__ */
CCL_NAMESPACE_END

View File

@@ -454,7 +454,7 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
float3 aligned_dir0 = transform_direction(&space0, dir),
aligned_dir1 = transform_direction(&space1, dir);
aligned_dir1 = transform_direction(&space1, dir);;
float3 aligned_P0 = transform_point(&space0, P),
aligned_P1 = transform_point(&space1, P);
float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
@@ -516,7 +516,7 @@ ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg
Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
float3 aligned_dir0 = transform_direction(&space0, dir),
aligned_dir1 = transform_direction(&space1, dir);
aligned_dir1 = transform_direction(&space1, dir);;
float3 aligned_P0 = transform_point(&space0, P),
aligned_P1 = transform_point(&space1, P);
float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),

View File

@@ -187,7 +187,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
/* primitive intersection */
while(prim_addr < prim_addr2) {
kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
bool hit;
@@ -222,7 +222,6 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_HAIR)
case PRIMITIVE_CURVE:
case PRIMITIVE_MOTION_CURVE: {
const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
hit = bvh_cardinal_curve_intersect(kg,
isect_array,
@@ -232,7 +231,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
object,
prim_addr,
ray->time,
curve_type,
type,
NULL,
0, 0);
}
@@ -245,7 +244,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
object,
prim_addr,
ray->time,
curve_type,
type,
NULL,
0, 0);
}
@@ -309,9 +308,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
# if BVH_FEATURE(BVH_MOTION)
isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
# else
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
# endif
triangle_intersect_precalc(dir, &isect_precalc);
@@ -362,10 +361,12 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
}
}
else {
float ignore_t = FLT_MAX;
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
# else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
# endif
triangle_intersect_precalc(dir, &isect_precalc);
}

View File

@@ -72,19 +72,19 @@ void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
ss_isect->num_hits = 0;
const int object_flag = kernel_tex_fetch(__object_flag, subsurface_object);
if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
if(!(object_flag & SD_TRANSFORM_APPLIED)) {
#if BVH_FEATURE(BVH_MOTION)
Transform ob_itfm;
isect_t = bvh_instance_motion_push(kg,
subsurface_object,
ray,
&P,
&dir,
&idir,
isect_t,
&ob_itfm);
bvh_instance_motion_push(kg,
subsurface_object,
ray,
&P,
&dir,
&idir,
&isect_t,
&ob_itfm);
#else
isect_t = bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, isect_t);
bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, &isect_t);
#endif
object = subsurface_object;
}

View File

@@ -213,7 +213,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
--stack_ptr;
}
}
BVH_DEBUG_NEXT_NODE();
BVH_DEBUG_NEXT_STEP();
}
/* if node is leaf, fetch triangle list */
@@ -235,7 +235,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
switch(type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
for(; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_INTERSECTION();
BVH_DEBUG_NEXT_STEP();
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(triangle_intersect(kg,
&isect_precalc,
@@ -264,7 +264,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
for(; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_INTERSECTION();
BVH_DEBUG_NEXT_STEP();
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(motion_triangle_intersect(kg,
isect,
@@ -296,9 +296,8 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
case PRIMITIVE_CURVE:
case PRIMITIVE_MOTION_CURVE: {
for(; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_INTERSECTION();
const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
BVH_DEBUG_NEXT_STEP();
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
bool hit;
if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
hit = bvh_cardinal_curve_intersect(kg,
@@ -309,7 +308,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
object,
prim_addr,
ray->time,
curve_type,
type,
lcg_state,
difl,
extmax);
@@ -323,7 +322,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
object,
prim_addr,
ray->time,
curve_type,
type,
lcg_state,
difl,
extmax);
@@ -354,9 +353,9 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
# if BVH_FEATURE(BVH_MOTION)
isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
# else
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
# endif
triangle_intersect_precalc(dir, &isect_precalc);
@@ -391,9 +390,9 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
/* instance pop */
# if BVH_FEATURE(BVH_MOTION)
isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
# else
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
# endif
triangle_intersect_precalc(dir, &isect_precalc);

View File

@@ -50,17 +50,12 @@ CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_DEBUG__
# define BVH_DEBUG_INIT() \
do { \
isect->num_traversed_nodes = 0; \
isect->num_traversal_steps = 0; \
isect->num_traversed_instances = 0; \
isect->num_intersections = 0; \
} while(0)
# define BVH_DEBUG_NEXT_NODE() \
# define BVH_DEBUG_NEXT_STEP() \
do { \
++isect->num_traversed_nodes; \
} while(0)
# define BVH_DEBUG_NEXT_INTERSECTION() \
do { \
++isect->num_intersections; \
++isect->num_traversal_steps; \
} while(0)
# define BVH_DEBUG_NEXT_INSTANCE() \
do { \
@@ -68,8 +63,7 @@ CCL_NAMESPACE_BEGIN
} while(0)
#else /* __KERNEL_DEBUG__ */
# define BVH_DEBUG_INIT()
# define BVH_DEBUG_NEXT_NODE()
# define BVH_DEBUG_NEXT_INTERSECTION()
# define BVH_DEBUG_NEXT_STEP()
# define BVH_DEBUG_NEXT_INSTANCE()
#endif /* __KERNEL_DEBUG__ */

View File

@@ -236,11 +236,13 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
/* instance push */
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
int object_flag = kernel_tex_fetch(__object_flag, object);
if(object_flag & SD_OBJECT_HAS_VOLUME) {
# if BVH_FEATURE(BVH_MOTION)
isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
# else
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
# endif
triangle_intersect_precalc(dir, &isect_precalc);
@@ -281,9 +283,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
/* instance pop */
# if BVH_FEATURE(BVH_MOTION)
isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
# else
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
# endif
triangle_intersect_precalc(dir, &isect_precalc);

View File

@@ -287,11 +287,13 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
/* instance push */
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
int object_flag = kernel_tex_fetch(__object_flag, object);
if(object_flag & SD_OBJECT_HAS_VOLUME) {
# if BVH_FEATURE(BVH_MOTION)
isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
# else
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
# endif
triangle_intersect_precalc(dir, &isect_precalc);
@@ -347,10 +349,11 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
}
}
else {
float ignore_t = FLT_MAX;
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
# else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
# endif
triangle_intersect_precalc(dir, &isect_precalc);
}

View File

@@ -106,20 +106,14 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
if(false
#ifdef __VISIBILITY_FLAG__
|| ((__float_as_uint(inodes.x) & PATH_RAY_SHADOW) == 0)
#endif
#if BVH_FEATURE(BVH_MOTION)
|| UNLIKELY(ray->time < inodes.y)
|| UNLIKELY(ray->time > inodes.z)
#endif
) {
if((__float_as_uint(inodes.x) & PATH_RAY_SHADOW) == 0) {
/* Pop. */
node_addr = traversal_stack[stack_ptr].addr;
--stack_ptr;
continue;
}
#endif
ssef dist;
int child_mask = NODE_INTERSECT(kg,
@@ -268,7 +262,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Primitive intersection. */
while(prim_addr < prim_addr2) {
kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
bool hit;
@@ -303,7 +297,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_HAIR)
case PRIMITIVE_CURVE:
case PRIMITIVE_MOTION_CURVE: {
const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
hit = bvh_cardinal_curve_intersect(kg,
isect_array,
@@ -313,7 +306,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
object,
prim_addr,
ray->time,
curve_type,
type,
NULL,
0, 0);
}
@@ -326,7 +319,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
object,
prim_addr,
ray->time,
curve_type,
type,
NULL,
0, 0);
}
@@ -390,9 +383,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
# if BVH_FEATURE(BVH_MOTION)
isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
# else
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
# endif
num_hits_in_instance = 0;
@@ -445,10 +438,11 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
}
}
else {
float ignore_t = FLT_MAX;
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
# else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
# endif
}

View File

@@ -61,19 +61,19 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
ss_isect->num_hits = 0;
const int object_flag = kernel_tex_fetch(__object_flag, subsurface_object);
if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
if(!(object_flag & SD_TRANSFORM_APPLIED)) {
#if BVH_FEATURE(BVH_MOTION)
Transform ob_itfm;
isect_t = bvh_instance_motion_push(kg,
subsurface_object,
ray,
&P,
&dir,
&idir,
isect_t,
&ob_itfm);
bvh_instance_motion_push(kg,
subsurface_object,
ray,
&P,
&dir,
&idir,
&isect_t,
&ob_itfm);
#else
isect_t = bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, isect_t);
bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, &isect_t);
#endif
object = subsurface_object;
}

View File

@@ -117,10 +117,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
if(UNLIKELY(node_dist > isect->t)
#if BVH_FEATURE(BVH_MOTION)
|| UNLIKELY(ray->time < inodes.y)
|| UNLIKELY(ray->time > inodes.z)
#endif
#ifdef __VISIBILITY_FLAG__
|| (__float_as_uint(inodes.x) & visibility) == 0)
#endif
@@ -135,7 +131,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
int child_mask;
ssef dist;
BVH_DEBUG_NEXT_NODE();
BVH_DEBUG_NEXT_STEP();
#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
if(difl != 0.0f) {
@@ -330,7 +326,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
switch(type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
for(; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_INTERSECTION();
BVH_DEBUG_NEXT_STEP();
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(triangle_intersect(kg,
&isect_precalc,
@@ -351,7 +347,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
for(; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_INTERSECTION();
BVH_DEBUG_NEXT_STEP();
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(motion_triangle_intersect(kg,
isect,
@@ -375,9 +371,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
case PRIMITIVE_CURVE:
case PRIMITIVE_MOTION_CURVE: {
for(; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_INTERSECTION();
const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
BVH_DEBUG_NEXT_STEP();
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
bool hit;
if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
hit = bvh_cardinal_curve_intersect(kg,
@@ -388,7 +383,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
object,
prim_addr,
ray->time,
curve_type,
type,
lcg_state,
difl,
extmax);
@@ -402,7 +397,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
object,
prim_addr,
ray->time,
curve_type,
type,
lcg_state,
difl,
extmax);
@@ -468,9 +463,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Instance pop. */
# if BVH_FEATURE(BVH_MOTION)
isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
# else
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
# endif
qbvh_near_far_idx_calc(idir,

View File

@@ -293,11 +293,13 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Instance push. */
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
int object_flag = kernel_tex_fetch(__object_flag, object);
if(object_flag & SD_OBJECT_HAS_VOLUME) {
# if BVH_FEATURE(BVH_MOTION)
isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
# else
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
# endif
qbvh_near_far_idx_calc(idir,
@@ -341,9 +343,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Instance pop. */
# if BVH_FEATURE(BVH_MOTION)
isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
# else
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
# endif
qbvh_near_far_idx_calc(idir,

View File

@@ -344,11 +344,13 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Instance push. */
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
int object_flag = kernel_tex_fetch(__object_flag, object);
if(object_flag & SD_OBJECT_HAS_VOLUME) {
# if BVH_FEATURE(BVH_MOTION)
isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
# else
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
# endif
qbvh_near_far_idx_calc(idir,
@@ -406,10 +408,11 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
}
}
else {
float ignore_t = FLT_MAX;
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
# else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
# endif
}

View File

@@ -20,17 +20,17 @@ ccl_device ShaderClosure *closure_alloc(ShaderData *sd, int size, ClosureType ty
{
kernel_assert(size <= sizeof(ShaderClosure));
int num_closure = sd->num_closure;
int num_closure_extra = sd->num_closure_extra;
int num_closure = ccl_fetch(sd, num_closure);
int num_closure_extra = ccl_fetch(sd, num_closure_extra);
if(num_closure + num_closure_extra >= MAX_CLOSURE)
return NULL;
ShaderClosure *sc = &sd->closure[num_closure];
ShaderClosure *sc = &ccl_fetch(sd, closure)[num_closure];
sc->type = type;
sc->weight = weight;
sd->num_closure++;
ccl_fetch(sd, num_closure)++;
return sc;
}
@@ -44,25 +44,25 @@ ccl_device ccl_addr_space void *closure_alloc_extra(ShaderData *sd, int size)
* This lets us keep the same fast array iteration over closures, as we
* found linked list iteration and iteration with skipping to be slower. */
int num_extra = ((size + sizeof(ShaderClosure) - 1) / sizeof(ShaderClosure));
int num_closure = sd->num_closure;
int num_closure_extra = sd->num_closure_extra + num_extra;
int num_closure = ccl_fetch(sd, num_closure);
int num_closure_extra = ccl_fetch(sd, num_closure_extra) + num_extra;
if(num_closure + num_closure_extra > MAX_CLOSURE) {
/* Remove previous closure. */
sd->num_closure--;
sd->num_closure_extra++;
ccl_fetch(sd, num_closure)--;
ccl_fetch(sd, num_closure_extra)++;
return NULL;
}
sd->num_closure_extra = num_closure_extra;
return (ccl_addr_space void*)(sd->closure + MAX_CLOSURE - num_closure_extra);
ccl_fetch(sd, num_closure_extra) = num_closure_extra;
return (ccl_addr_space void*)(ccl_fetch(sd, closure) + MAX_CLOSURE - num_closure_extra);
}
ccl_device_inline ShaderClosure *bsdf_alloc(ShaderData *sd, int size, float3 weight)
{
ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight);
if(sc == NULL)
if(!sc)
return NULL;
float sample_weight = fabsf(average(weight));

View File

@@ -51,89 +51,89 @@ ccl_device_forceinline int bsdf_sample(KernelGlobals *kg,
switch(sc->type) {
case CLOSURE_BSDF_DIFFUSE_ID:
case CLOSURE_BSDF_BSSRDF_ID:
label = bsdf_diffuse_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
label = bsdf_diffuse_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
#ifdef __SVM__
case CLOSURE_BSDF_OREN_NAYAR_ID:
label = bsdf_oren_nayar_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
label = bsdf_oren_nayar_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
#ifdef __OSL__
case CLOSURE_BSDF_PHONG_RAMP_ID:
label = bsdf_phong_ramp_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
label = bsdf_phong_ramp_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
case CLOSURE_BSDF_DIFFUSE_RAMP_ID:
label = bsdf_diffuse_ramp_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
label = bsdf_diffuse_ramp_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
#endif
case CLOSURE_BSDF_TRANSLUCENT_ID:
label = bsdf_translucent_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
label = bsdf_translucent_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
case CLOSURE_BSDF_REFLECTION_ID:
label = bsdf_reflection_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
label = bsdf_reflection_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
case CLOSURE_BSDF_REFRACTION_ID:
label = bsdf_refraction_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
label = bsdf_refraction_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
case CLOSURE_BSDF_TRANSPARENT_ID:
label = bsdf_transparent_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
label = bsdf_transparent_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
case CLOSURE_BSDF_MICROFACET_GGX_ID:
case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
label = bsdf_microfacet_ggx_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
label = bsdf_microfacet_ggx_sample(kg, sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
label = bsdf_microfacet_multi_ggx_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf, &sd->lcg_state);
label = bsdf_microfacet_multi_ggx_sample(kg, sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf, &ccl_fetch(sd, lcg_state));
break;
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
label = bsdf_microfacet_multi_ggx_glass_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf, &sd->lcg_state);
label = bsdf_microfacet_multi_ggx_glass_sample(kg, sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf, &ccl_fetch(sd, lcg_state));
break;
case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
label = bsdf_microfacet_beckmann_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
label = bsdf_microfacet_beckmann_sample(kg, sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
label = bsdf_ashikhmin_shirley_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
label = bsdf_ashikhmin_shirley_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
label = bsdf_ashikhmin_velvet_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
label = bsdf_ashikhmin_velvet_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
case CLOSURE_BSDF_DIFFUSE_TOON_ID:
label = bsdf_diffuse_toon_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
label = bsdf_diffuse_toon_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
case CLOSURE_BSDF_GLOSSY_TOON_ID:
label = bsdf_glossy_toon_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
label = bsdf_glossy_toon_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
case CLOSURE_BSDF_HAIR_REFLECTION_ID:
label = bsdf_hair_reflection_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
label = bsdf_hair_reflection_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
label = bsdf_hair_transmission_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
label = bsdf_hair_transmission_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
#endif
#ifdef __VOLUME__
case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
label = volume_henyey_greenstein_sample(sc, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
label = volume_henyey_greenstein_sample(sc, ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
#endif
default:
@@ -157,75 +157,75 @@ float3 bsdf_eval(KernelGlobals *kg,
{
float3 eval;
if(dot(sd->Ng, omega_in) >= 0.0f) {
if(dot(ccl_fetch(sd, Ng), omega_in) >= 0.0f) {
switch(sc->type) {
case CLOSURE_BSDF_DIFFUSE_ID:
case CLOSURE_BSDF_BSSRDF_ID:
eval = bsdf_diffuse_eval_reflect(sc, sd->I, omega_in, pdf);
eval = bsdf_diffuse_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
#ifdef __SVM__
case CLOSURE_BSDF_OREN_NAYAR_ID:
eval = bsdf_oren_nayar_eval_reflect(sc, sd->I, omega_in, pdf);
eval = bsdf_oren_nayar_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
#ifdef __OSL__
case CLOSURE_BSDF_PHONG_RAMP_ID:
eval = bsdf_phong_ramp_eval_reflect(sc, sd->I, omega_in, pdf);
eval = bsdf_phong_ramp_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_DIFFUSE_RAMP_ID:
eval = bsdf_diffuse_ramp_eval_reflect(sc, sd->I, omega_in, pdf);
eval = bsdf_diffuse_ramp_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
#endif
case CLOSURE_BSDF_TRANSLUCENT_ID:
eval = bsdf_translucent_eval_reflect(sc, sd->I, omega_in, pdf);
eval = bsdf_translucent_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_REFLECTION_ID:
eval = bsdf_reflection_eval_reflect(sc, sd->I, omega_in, pdf);
eval = bsdf_reflection_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_REFRACTION_ID:
eval = bsdf_refraction_eval_reflect(sc, sd->I, omega_in, pdf);
eval = bsdf_refraction_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_TRANSPARENT_ID:
eval = bsdf_transparent_eval_reflect(sc, sd->I, omega_in, pdf);
eval = bsdf_transparent_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_MICROFACET_GGX_ID:
case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
eval = bsdf_microfacet_ggx_eval_reflect(sc, sd->I, omega_in, pdf);
eval = bsdf_microfacet_ggx_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
eval = bsdf_microfacet_multi_ggx_eval_reflect(sc, sd->I, omega_in, pdf, &sd->lcg_state);
eval = bsdf_microfacet_multi_ggx_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf, &ccl_fetch(sd, lcg_state));
break;
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
eval = bsdf_microfacet_multi_ggx_glass_eval_reflect(sc, sd->I, omega_in, pdf, &sd->lcg_state);
eval = bsdf_microfacet_multi_ggx_glass_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf, &ccl_fetch(sd, lcg_state));
break;
case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
eval = bsdf_microfacet_beckmann_eval_reflect(sc, sd->I, omega_in, pdf);
eval = bsdf_microfacet_beckmann_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
eval = bsdf_ashikhmin_shirley_eval_reflect(sc, sd->I, omega_in, pdf);
eval = bsdf_ashikhmin_shirley_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
eval = bsdf_ashikhmin_velvet_eval_reflect(sc, sd->I, omega_in, pdf);
eval = bsdf_ashikhmin_velvet_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_DIFFUSE_TOON_ID:
eval = bsdf_diffuse_toon_eval_reflect(sc, sd->I, omega_in, pdf);
eval = bsdf_diffuse_toon_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_GLOSSY_TOON_ID:
eval = bsdf_glossy_toon_eval_reflect(sc, sd->I, omega_in, pdf);
eval = bsdf_glossy_toon_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_HAIR_REFLECTION_ID:
eval = bsdf_hair_reflection_eval_reflect(sc, sd->I, omega_in, pdf);
eval = bsdf_hair_reflection_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
eval = bsdf_hair_transmission_eval_reflect(sc, sd->I, omega_in, pdf);
eval = bsdf_hair_transmission_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
#endif
#ifdef __VOLUME__
case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
eval = volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf);
eval = volume_henyey_greenstein_eval_phase(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
#endif
default:
@@ -237,63 +237,63 @@ float3 bsdf_eval(KernelGlobals *kg,
switch(sc->type) {
case CLOSURE_BSDF_DIFFUSE_ID:
case CLOSURE_BSDF_BSSRDF_ID:
eval = bsdf_diffuse_eval_transmit(sc, sd->I, omega_in, pdf);
eval = bsdf_diffuse_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
#ifdef __SVM__
case CLOSURE_BSDF_OREN_NAYAR_ID:
eval = bsdf_oren_nayar_eval_transmit(sc, sd->I, omega_in, pdf);
eval = bsdf_oren_nayar_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_TRANSLUCENT_ID:
eval = bsdf_translucent_eval_transmit(sc, sd->I, omega_in, pdf);
eval = bsdf_translucent_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_REFLECTION_ID:
eval = bsdf_reflection_eval_transmit(sc, sd->I, omega_in, pdf);
eval = bsdf_reflection_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_REFRACTION_ID:
eval = bsdf_refraction_eval_transmit(sc, sd->I, omega_in, pdf);
eval = bsdf_refraction_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_TRANSPARENT_ID:
eval = bsdf_transparent_eval_transmit(sc, sd->I, omega_in, pdf);
eval = bsdf_transparent_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_MICROFACET_GGX_ID:
case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
eval = bsdf_microfacet_ggx_eval_transmit(sc, sd->I, omega_in, pdf);
eval = bsdf_microfacet_ggx_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
eval = bsdf_microfacet_multi_ggx_eval_transmit(sc, sd->I, omega_in, pdf, &sd->lcg_state);
eval = bsdf_microfacet_multi_ggx_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf, &ccl_fetch(sd, lcg_state));
break;
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
eval = bsdf_microfacet_multi_ggx_glass_eval_transmit(sc, sd->I, omega_in, pdf, &sd->lcg_state);
eval = bsdf_microfacet_multi_ggx_glass_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf, &ccl_fetch(sd, lcg_state));
break;
case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
eval = bsdf_microfacet_beckmann_eval_transmit(sc, sd->I, omega_in, pdf);
eval = bsdf_microfacet_beckmann_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
eval = bsdf_ashikhmin_shirley_eval_transmit(sc, sd->I, omega_in, pdf);
eval = bsdf_ashikhmin_shirley_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
eval = bsdf_ashikhmin_velvet_eval_transmit(sc, sd->I, omega_in, pdf);
eval = bsdf_ashikhmin_velvet_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_DIFFUSE_TOON_ID:
eval = bsdf_diffuse_toon_eval_transmit(sc, sd->I, omega_in, pdf);
eval = bsdf_diffuse_toon_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_GLOSSY_TOON_ID:
eval = bsdf_glossy_toon_eval_transmit(sc, sd->I, omega_in, pdf);
eval = bsdf_glossy_toon_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_HAIR_REFLECTION_ID:
eval = bsdf_hair_reflection_eval_transmit(sc, sd->I, omega_in, pdf);
eval = bsdf_hair_reflection_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
eval = bsdf_hair_transmission_eval_transmit(sc, sd->I, omega_in, pdf);
eval = bsdf_hair_transmission_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
#endif
#ifdef __VOLUME__
case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
eval = volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf);
eval = volume_henyey_greenstein_eval_phase(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
#endif
default:

View File

@@ -143,7 +143,6 @@ ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng,
{
const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
float3 N = bsdf->N;
int label = LABEL_REFLECT | LABEL_GLOSSY;
float NdotI = dot(N, I);
if(NdotI > 0.0f) {
@@ -212,7 +211,6 @@ ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng,
/* Some high number for MIS. */
*pdf = 1e6f;
*eval = make_float3(1e6f, 1e6f, 1e6f);
label = LABEL_REFLECT | LABEL_SINGULAR;
}
else {
/* leave the rest to eval_reflect */
@@ -226,7 +224,7 @@ ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng,
#endif
}
return label;
return LABEL_REFLECT|LABEL_GLOSSY;
}

View File

@@ -267,10 +267,7 @@ ccl_device int bsdf_hair_transmission_sample(const ShaderClosure *sc, float3 Ng,
*eval = make_float3(*pdf, *pdf, *pdf);
/* TODO(sergey): Should always be negative, but seems some precision issue
* is involved here.
*/
kernel_assert(dot(locy, *omega_in) < 1e-4f);
kernel_assert(dot(locy, *omega_in) < 0.0f);
return LABEL_TRANSMIT|LABEL_GLOSSY;
}

View File

@@ -266,7 +266,7 @@ ccl_device bool bsdf_microfacet_merge(const ShaderClosure *a, const ShaderClosur
(bsdf_a->alpha_y == bsdf_b->alpha_y) &&
(isequal_float3(bsdf_a->T, bsdf_b->T)) &&
(bsdf_a->ior == bsdf_b->ior) &&
((bsdf_a->extra == NULL && bsdf_b->extra == NULL) ||
((!bsdf_a->extra && !bsdf_b->extra) ||
((bsdf_a->extra && bsdf_b->extra) &&
(isequal_float3(bsdf_a->extra->color, bsdf_b->extra->color))));
}
@@ -452,7 +452,6 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
float alpha_y = bsdf->alpha_y;
bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
float3 N = bsdf->N;
int label;
float cosNO = dot(N, I);
if(cosNO > 0) {
@@ -478,7 +477,6 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
/* reflection or refraction? */
if(!m_refractive) {
float cosMO = dot(m, I);
label = LABEL_REFLECT | LABEL_GLOSSY;
if(cosMO > 0) {
/* eq. 39 - compute actual reflected direction */
@@ -489,7 +487,6 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
/* some high number for MIS */
*pdf = 1e6f;
*eval = make_float3(1e6f, 1e6f, 1e6f);
label = LABEL_REFLECT | LABEL_SINGULAR;
}
else {
/* microfacet normal is visible to this ray */
@@ -552,8 +549,6 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
}
}
else {
label = LABEL_TRANSMIT | LABEL_GLOSSY;
/* CAUTION: the i and o variables are inverted relative to the paper
* eq. 39 - compute actual refractive direction */
float3 R, T;
@@ -581,7 +576,6 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
/* some high number for MIS */
*pdf = 1e6f;
*eval = make_float3(1e6f, 1e6f, 1e6f);
label = LABEL_TRANSMIT | LABEL_SINGULAR;
}
else {
/* eq. 33 */
@@ -613,10 +607,7 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
}
}
}
else {
label = (m_refractive) ? LABEL_TRANSMIT|LABEL_GLOSSY : LABEL_REFLECT|LABEL_GLOSSY;
}
return label;
return (m_refractive) ? LABEL_TRANSMIT|LABEL_GLOSSY : LABEL_REFLECT|LABEL_GLOSSY;
}
/* Beckmann microfacet with Smith shadow-masking from:
@@ -824,7 +815,6 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
float alpha_y = bsdf->alpha_y;
bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
float3 N = bsdf->N;
int label;
float cosNO = dot(N, I);
if(cosNO > 0) {
@@ -849,7 +839,6 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
/* reflection or refraction? */
if(!m_refractive) {
label = LABEL_REFLECT | LABEL_GLOSSY;
float cosMO = dot(m, I);
if(cosMO > 0) {
@@ -861,7 +850,6 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
/* some high number for MIS */
*pdf = 1e6f;
*eval = make_float3(1e6f, 1e6f, 1e6f);
label = LABEL_REFLECT | LABEL_SINGULAR;
}
else {
/* microfacet normal is visible to this ray
@@ -916,8 +904,6 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
}
}
else {
label = LABEL_TRANSMIT | LABEL_GLOSSY;
/* CAUTION: the i and o variables are inverted relative to the paper
* eq. 39 - compute actual refractive direction */
float3 R, T;
@@ -945,7 +931,6 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
/* some high number for MIS */
*pdf = 1e6f;
*eval = make_float3(1e6f, 1e6f, 1e6f);
label = LABEL_TRANSMIT | LABEL_SINGULAR;
}
else {
/* eq. 33 */
@@ -978,10 +963,7 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
}
}
}
else {
label = (m_refractive) ? LABEL_TRANSMIT|LABEL_GLOSSY : LABEL_REFLECT|LABEL_GLOSSY;
}
return label;
return (m_refractive) ? LABEL_TRANSMIT|LABEL_GLOSSY : LABEL_REFLECT|LABEL_GLOSSY;
}
CCL_NAMESPACE_END

View File

@@ -43,7 +43,7 @@ ccl_device_forceinline float D_ggx_aniso(const float3 wm, const float2 alpha)
ccl_device_forceinline float2 mf_sampleP22_11(const float cosI, const float2 randU)
{
if(cosI > 0.9999f || cosI < 1e-6f) {
const float r = sqrtf(randU.x / max(1.0f - randU.x, 1e-7f));
const float r = sqrtf(randU.x / (1.0f - randU.x));
const float phi = M_2PI_F * randU.y;
return make_float2(r*cosf(phi), r*sinf(phi));
}
@@ -83,7 +83,7 @@ ccl_device_forceinline float3 mf_sample_vndf(const float3 wi, const float2 alpha
const float3 wi_11 = normalize(make_float3(alpha.x*wi.x, alpha.y*wi.y, wi.z));
const float2 slope_11 = mf_sampleP22_11(wi_11.z, randU);
const float2 cossin_phi = safe_normalize(make_float2(wi_11.x, wi_11.y));
const float2 cossin_phi = normalize(make_float2(wi_11.x, wi_11.y));
const float slope_x = alpha.x*(cossin_phi.x * slope_11.x - cossin_phi.y * slope_11.y);
const float slope_y = alpha.y*(cossin_phi.y * slope_11.x + cossin_phi.x * slope_11.y);

View File

@@ -23,8 +23,6 @@
#include "geom_subd_triangle.h"
#include "geom_triangle_intersect.h"
#include "geom_motion_triangle.h"
#include "geom_motion_triangle_intersect.h"
#include "geom_motion_triangle_shader.h"
#include "geom_motion_curve.h"
#include "geom_curve.h"
#include "geom_volume.h"

View File

@@ -30,7 +30,7 @@ ccl_device_inline uint subd_triangle_patch(KernelGlobals *kg, const ShaderData *
ccl_device_inline uint attribute_primitive_type(KernelGlobals *kg, const ShaderData *sd)
{
#ifdef __HAIR__
if(sd->type & PRIMITIVE_ALL_CURVE) {
if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) {
return ATTR_PRIM_CURVE;
}
else
@@ -53,12 +53,12 @@ ccl_device_inline AttributeDescriptor attribute_not_found()
ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id)
{
if(sd->object == PRIM_NONE) {
if(ccl_fetch(sd, object) == PRIM_NONE) {
return attribute_not_found();
}
/* for SVM, find attribute by unique id */
uint attr_offset = sd->object*kernel_data.bvh.attributes_map_stride;
uint attr_offset = ccl_fetch(sd, object)*kernel_data.bvh.attributes_map_stride;
attr_offset += attribute_primitive_type(kg, sd);
uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
@@ -73,7 +73,7 @@ ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals *kg, const Sh
AttributeDescriptor desc;
desc.element = (AttributeElement)attr_map.y;
if(sd->prim == PRIM_NONE &&
if(ccl_fetch(sd, prim) == PRIM_NONE &&
desc.element != ATTR_ELEMENT_MESH &&
desc.element != ATTR_ELEMENT_VOXEL &&
desc.element != ATTR_ELEMENT_OBJECT)

View File

@@ -32,22 +32,22 @@ ccl_device float curve_attribute_float(KernelGlobals *kg, const ShaderData *sd,
if(dy) *dy = 0.0f;
#endif
return kernel_tex_fetch(__attributes_float, desc.offset + sd->prim);
return kernel_tex_fetch(__attributes_float, desc.offset + ccl_fetch(sd, prim));
}
else if(desc.element == ATTR_ELEMENT_CURVE_KEY || desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) {
float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim));
int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type));
int k1 = k0 + 1;
float f0 = kernel_tex_fetch(__attributes_float, desc.offset + k0);
float f1 = kernel_tex_fetch(__attributes_float, desc.offset + k1);
#ifdef __RAY_DIFFERENTIALS__
if(dx) *dx = sd->du.dx*(f1 - f0);
if(dx) *dx = ccl_fetch(sd, du).dx*(f1 - f0);
if(dy) *dy = 0.0f;
#endif
return (1.0f - sd->u)*f0 + sd->u*f1;
return (1.0f - ccl_fetch(sd, u))*f0 + ccl_fetch(sd, u)*f1;
}
else {
#ifdef __RAY_DIFFERENTIALS__
@@ -71,22 +71,22 @@ ccl_device float3 curve_attribute_float3(KernelGlobals *kg, const ShaderData *sd
if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
#endif
return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + sd->prim));
return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + ccl_fetch(sd, prim)));
}
else if(desc.element == ATTR_ELEMENT_CURVE_KEY || desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) {
float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim));
int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type));
int k1 = k0 + 1;
float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k0));
float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k1));
#ifdef __RAY_DIFFERENTIALS__
if(dx) *dx = sd->du.dx*(f1 - f0);
if(dx) *dx = ccl_fetch(sd, du).dx*(f1 - f0);
if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
#endif
return (1.0f - sd->u)*f0 + sd->u*f1;
return (1.0f - ccl_fetch(sd, u))*f0 + ccl_fetch(sd, u)*f1;
}
else {
#ifdef __RAY_DIFFERENTIALS__
@@ -104,22 +104,22 @@ ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd)
{
float r = 0.0f;
if(sd->type & PRIMITIVE_ALL_CURVE) {
float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) {
float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim));
int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type));
int k1 = k0 + 1;
float4 P_curve[2];
if(sd->type & PRIMITIVE_CURVE) {
if(ccl_fetch(sd, type) & PRIMITIVE_CURVE) {
P_curve[0]= kernel_tex_fetch(__curve_keys, k0);
P_curve[1]= kernel_tex_fetch(__curve_keys, k1);
}
else {
motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve);
motion_curve_keys(kg, ccl_fetch(sd, object), ccl_fetch(sd, prim), ccl_fetch(sd, time), k0, k1, P_curve);
}
r = (P_curve[1].w - P_curve[0].w) * sd->u + P_curve[0].w;
r = (P_curve[1].w - P_curve[0].w) * ccl_fetch(sd, u) + P_curve[0].w;
}
return r*2.0f;
@@ -130,8 +130,8 @@ ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd)
ccl_device float3 curve_motion_center_location(KernelGlobals *kg, ShaderData *sd)
{
float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim));
int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type));
int k1 = k0 + 1;
float4 P_curve[2];
@@ -139,7 +139,7 @@ ccl_device float3 curve_motion_center_location(KernelGlobals *kg, ShaderData *sd
P_curve[0]= kernel_tex_fetch(__curve_keys, k0);
P_curve[1]= kernel_tex_fetch(__curve_keys, k1);
return float4_to_float3(P_curve[1]) * sd->u + float4_to_float3(P_curve[0]) * (1.0f - sd->u);
return float4_to_float3(P_curve[1]) * ccl_fetch(sd, u) + float4_to_float3(P_curve[0]) * (1.0f - ccl_fetch(sd, u));
}
/* Curve tangent normal */
@@ -148,14 +148,14 @@ ccl_device float3 curve_tangent_normal(KernelGlobals *kg, ShaderData *sd)
{
float3 tgN = make_float3(0.0f,0.0f,0.0f);
if(sd->type & PRIMITIVE_ALL_CURVE) {
if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) {
tgN = -(-sd->I - sd->dPdu * (dot(sd->dPdu,-sd->I) / len_squared(sd->dPdu)));
tgN = -(-ccl_fetch(sd, I) - ccl_fetch(sd, dPdu) * (dot(ccl_fetch(sd, dPdu),-ccl_fetch(sd, I)) / len_squared(ccl_fetch(sd, dPdu))));
tgN = normalize(tgN);
/* need to find suitable scaled gd for corrected normal */
#if 0
tgN = normalize(tgN - gd * sd->dPdu);
tgN = normalize(tgN - gd * ccl_fetch(sd, dPdu));
#endif
}
@@ -229,15 +229,6 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
float3 P, float3 dir, uint visibility, int object, int curveAddr, float time,int type, uint *lcg_state, float difl, float extmax)
#endif
{
const bool is_curve_primitive = (type & PRIMITIVE_CURVE);
if(!is_curve_primitive && kernel_data.bvh.use_bvh_steps) {
const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr);
if(time < prim_time.x || time > prim_time.y) {
return false;
}
}
int segment = PRIMITIVE_UNPACK_SEGMENT(type);
float epsilon = 0.0f;
float r_st, r_en;
@@ -264,20 +255,9 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
int ka = max(k0 - 1, v00.x);
int kb = min(k1 + 1, v00.x + v00.y - 1);
#ifdef __KERNEL_AVX2__
avxf P_curve_0_1, P_curve_2_3;
if(is_curve_primitive) {
P_curve_0_1 = _mm256_loadu2_m128(&kg->__curve_keys.data[k0].x, &kg->__curve_keys.data[ka].x);
P_curve_2_3 = _mm256_loadu2_m128(&kg->__curve_keys.data[kb].x, &kg->__curve_keys.data[k1].x);
}
else {
int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
motion_cardinal_curve_keys_avx(kg, fobject, prim, time, ka, k0, k1, kb, &P_curve_0_1,&P_curve_2_3);
}
#else /* __KERNEL_AVX2__ */
ssef P_curve[4];
if(is_curve_primitive) {
if(type & PRIMITIVE_CURVE) {
P_curve[0] = load4f(&kg->__curve_keys.data[ka].x);
P_curve[1] = load4f(&kg->__curve_keys.data[k0].x);
P_curve[2] = load4f(&kg->__curve_keys.data[k1].x);
@@ -287,7 +267,6 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object;
motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, (float4*)&P_curve);
}
#endif /* __KERNEL_AVX2__ */
ssef rd_sgn = set_sign_bit<0, 1, 1, 1>(shuffle<0>(rd_ss));
ssef mul_zxxy = shuffle<2, 0, 0, 1>(vdir) * rd_sgn;
@@ -299,33 +278,6 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
ssef htfm1 = shuffle<1, 0, 1, 3>(load1f_first(extract<0>(d_ss)), vdir0);
ssef htfm2 = shuffle<1, 3, 2, 3>(mul_shuf, vdir0);
#ifdef __KERNEL_AVX2__
const avxf vPP = _mm256_broadcast_ps(&P.m128);
const avxf htfm00 = avxf(htfm0.m128, htfm0.m128);
const avxf htfm11 = avxf(htfm1.m128, htfm1.m128);
const avxf htfm22 = avxf(htfm2.m128, htfm2.m128);
const avxf p01 = madd(shuffle<0>(P_curve_0_1 - vPP),
htfm00,
madd(shuffle<1>(P_curve_0_1 - vPP),
htfm11,
shuffle<2>(P_curve_0_1 - vPP) * htfm22));
const avxf p23 = madd(shuffle<0>(P_curve_2_3 - vPP),
htfm00,
madd(shuffle<1>(P_curve_2_3 - vPP),
htfm11,
shuffle<2>(P_curve_2_3 - vPP)*htfm22));
const ssef p0 = _mm256_castps256_ps128(p01);
const ssef p1 = _mm256_extractf128_ps(p01, 1);
const ssef p2 = _mm256_castps256_ps128(p23);
const ssef p3 = _mm256_extractf128_ps(p23, 1);
const ssef P_curve_1 = _mm256_extractf128_ps(P_curve_0_1, 1);
r_st = ((float4 &)P_curve_1).w;
const ssef P_curve_2 = _mm256_castps256_ps128(P_curve_2_3);
r_en = ((float4 &)P_curve_2).w;
#else /* __KERNEL_AVX2__ */
ssef htfm[] = { htfm0, htfm1, htfm2 };
ssef vP = load4f(P);
ssef p0 = transform_point_T3(htfm, P_curve[0] - vP);
@@ -333,10 +285,6 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
ssef p2 = transform_point_T3(htfm, P_curve[2] - vP);
ssef p3 = transform_point_T3(htfm, P_curve[3] - vP);
r_st = ((float4 &)P_curve[1]).w;
r_en = ((float4 &)P_curve[2]).w;
#endif /* __KERNEL_AVX2__ */
float fc = 0.71f;
ssef vfc = ssef(fc);
ssef vfcxp3 = vfc * p3;
@@ -346,6 +294,8 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
vcurve_coef[2] = madd(ssef(fc * 2.0f), p0, madd(ssef(fc - 3.0f), p1, msub(ssef(3.0f - 2.0f * fc), p2, vfcxp3)));
vcurve_coef[3] = msub(ssef(fc - 2.0f), p2 - p1, msub(vfc, p0, vfcxp3));
r_st = ((float4 &)P_curve[1]).w;
r_en = ((float4 &)P_curve[2]).w;
}
#else
float3 curve_coef[4];
@@ -372,7 +322,7 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
float4 P_curve[4];
if(is_curve_primitive) {
if(type & PRIMITIVE_CURVE) {
P_curve[0] = kernel_tex_fetch(__curve_keys, ka);
P_curve[1] = kernel_tex_fetch(__curve_keys, k0);
P_curve[2] = kernel_tex_fetch(__curve_keys, k1);
@@ -433,9 +383,8 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
/* begin loop */
while(!(tree >> (depth))) {
const float i_st = tree * resol;
const float i_en = i_st + (level * resol);
float i_st = tree * resol;
float i_en = i_st + (level * resol);
#ifdef __KERNEL_SSE2__
ssef vi_st = ssef(i_st), vi_en = ssef(i_en);
ssef vp_st = madd(madd(madd(vcurve_coef[3], vi_st, vcurve_coef[2]), vi_st, vcurve_coef[1]), vi_st, vcurve_coef[0]);
@@ -509,23 +458,13 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
if(flags & CURVE_KN_RIBBONS) {
float3 tg = (p_en - p_st);
#ifdef __KERNEL_SSE__
const float3 tg_sq = tg * tg;
float w = tg_sq.x + tg_sq.y;
#else
float w = tg.x * tg.x + tg.y * tg.y;
#endif
if(w == 0) {
tree++;
level = tree & -tree;
continue;
}
#ifdef __KERNEL_SSE__
const float3 p_sttg = p_st * tg;
w = -(p_sttg.x + p_sttg.y) / w;
#else
w = -(p_st.x * tg.x + p_st.y * tg.y) / w;
#endif
w = saturate(w);
/* compute u on the curve segment */
@@ -557,13 +496,7 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
if(difl != 0.0f) {
mw_extension = min(difl * fabsf(bmaxz), extmax);
r_ext = mw_extension + r_curr;
#ifdef __KERNEL_SSE__
const float3 p_curr_sq = p_curr * p_curr;
const float3 dxxx = _mm_sqrt_ss(_mm_hadd_ps(p_curr_sq.m128, p_curr_sq.m128));
float d = dxxx.x;
#else
float d = sqrtf(p_curr.x * p_curr.x + p_curr.y * p_curr.y);
#endif
float d0 = d - r_curr;
float d1 = d + r_curr;
float inv_mw_extension = 1.0f/mw_extension;
@@ -698,15 +631,6 @@ ccl_device_forceinline bool bvh_curve_intersect(KernelGlobals *kg, Intersection
# define dot3(x, y) dot(x, y)
#endif
const bool is_curve_primitive = (type & PRIMITIVE_CURVE);
if(!is_curve_primitive && kernel_data.bvh.use_bvh_steps) {
const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr);
if(time < prim_time.x || time > prim_time.y) {
return false;
}
}
int segment = PRIMITIVE_UNPACK_SEGMENT(type);
/* curve Intersection check */
int flags = kernel_data.curve.curveflags;
@@ -721,7 +645,7 @@ ccl_device_forceinline bool bvh_curve_intersect(KernelGlobals *kg, Intersection
#ifndef __KERNEL_SSE2__
float4 P_curve[2];
if(is_curve_primitive) {
if(type & PRIMITIVE_CURVE) {
P_curve[0] = kernel_tex_fetch(__curve_keys, k0);
P_curve[1] = kernel_tex_fetch(__curve_keys, k1);
}
@@ -756,7 +680,7 @@ ccl_device_forceinline bool bvh_curve_intersect(KernelGlobals *kg, Intersection
#else
ssef P_curve[2];
if(is_curve_primitive) {
if(type & PRIMITIVE_CURVE) {
P_curve[0] = load4f(&kg->__curve_keys.data[k0].x);
P_curve[1] = load4f(&kg->__curve_keys.data[k1].x);
}
@@ -929,7 +853,7 @@ ccl_device_forceinline bool bvh_curve_intersect(KernelGlobals *kg, Intersection
# undef len3_squared
# undef len3
# undef dot3
#endif
# endif
}
ccl_device_inline float3 curvetangent(float t, float3 p0, float3 p1, float3 p2, float3 p3)
@@ -966,7 +890,7 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con
if(isect->object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
Transform tfm = sd->ob_itfm;
Transform tfm = ccl_fetch(sd, ob_itfm);
#else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
#endif
@@ -979,7 +903,7 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con
int prim = kernel_tex_fetch(__prim_index, isect->prim);
float4 v00 = kernel_tex_fetch(__curves, prim);
int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type));
int k1 = k0 + 1;
float3 tg;
@@ -990,14 +914,14 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con
float4 P_curve[4];
if(sd->type & PRIMITIVE_CURVE) {
if(ccl_fetch(sd, type) & PRIMITIVE_CURVE) {
P_curve[0] = kernel_tex_fetch(__curve_keys, ka);
P_curve[1] = kernel_tex_fetch(__curve_keys, k0);
P_curve[2] = kernel_tex_fetch(__curve_keys, k1);
P_curve[3] = kernel_tex_fetch(__curve_keys, kb);
}
else {
motion_cardinal_curve_keys(kg, sd->object, sd->prim, sd->time, ka, k0, k1, kb, P_curve);
motion_cardinal_curve_keys(kg, ccl_fetch(sd, object), ccl_fetch(sd, prim), ccl_fetch(sd, time), ka, k0, k1, kb, P_curve);
}
float3 p[4];
@@ -1009,43 +933,43 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con
P = P + D*t;
#ifdef __UV__
sd->u = isect->u;
sd->v = 0.0f;
ccl_fetch(sd, u) = isect->u;
ccl_fetch(sd, v) = 0.0f;
#endif
tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3]));
if(kernel_data.curve.curveflags & CURVE_KN_RIBBONS) {
sd->Ng = normalize(-(D - tg * (dot(tg, D))));
ccl_fetch(sd, Ng) = normalize(-(D - tg * (dot(tg, D))));
}
else {
/* direction from inside to surface of curve */
float3 p_curr = curvepoint(isect->u, p[0], p[1], p[2], p[3]);
sd->Ng = normalize(P - p_curr);
ccl_fetch(sd, Ng) = normalize(P - p_curr);
/* adjustment for changing radius */
float gd = isect->v;
if(gd != 0.0f) {
sd->Ng = sd->Ng - gd * tg;
sd->Ng = normalize(sd->Ng);
ccl_fetch(sd, Ng) = ccl_fetch(sd, Ng) - gd * tg;
ccl_fetch(sd, Ng) = normalize(ccl_fetch(sd, Ng));
}
}
/* todo: sometimes the normal is still so that this is detected as
* backfacing even if cull backfaces is enabled */
sd->N = sd->Ng;
ccl_fetch(sd, N) = ccl_fetch(sd, Ng);
}
else {
float4 P_curve[2];
if(sd->type & PRIMITIVE_CURVE) {
if(ccl_fetch(sd, type) & PRIMITIVE_CURVE) {
P_curve[0]= kernel_tex_fetch(__curve_keys, k0);
P_curve[1]= kernel_tex_fetch(__curve_keys, k1);
}
else {
motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve);
motion_curve_keys(kg, ccl_fetch(sd, object), ccl_fetch(sd, prim), ccl_fetch(sd, time), k0, k1, P_curve);
}
float l = 1.0f;
@@ -1056,39 +980,39 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con
float3 dif = P - float4_to_float3(P_curve[0]);
#ifdef __UV__
sd->u = dot(dif,tg)/l;
sd->v = 0.0f;
ccl_fetch(sd, u) = dot(dif,tg)/l;
ccl_fetch(sd, v) = 0.0f;
#endif
if(flag & CURVE_KN_TRUETANGENTGNORMAL) {
sd->Ng = -(D - tg * dot(tg, D));
sd->Ng = normalize(sd->Ng);
ccl_fetch(sd, Ng) = -(D - tg * dot(tg, D));
ccl_fetch(sd, Ng) = normalize(ccl_fetch(sd, Ng));
}
else {
float gd = isect->v;
/* direction from inside to surface of curve */
sd->Ng = (dif - tg * sd->u * l) / (P_curve[0].w + sd->u * l * gd);
ccl_fetch(sd, Ng) = (dif - tg * ccl_fetch(sd, u) * l) / (P_curve[0].w + ccl_fetch(sd, u) * l * gd);
/* adjustment for changing radius */
if(gd != 0.0f) {
sd->Ng = sd->Ng - gd * tg;
sd->Ng = normalize(sd->Ng);
ccl_fetch(sd, Ng) = ccl_fetch(sd, Ng) - gd * tg;
ccl_fetch(sd, Ng) = normalize(ccl_fetch(sd, Ng));
}
}
sd->N = sd->Ng;
ccl_fetch(sd, N) = ccl_fetch(sd, Ng);
}
#ifdef __DPDU__
/* dPdu/dPdv */
sd->dPdu = tg;
sd->dPdv = cross(tg, sd->Ng);
ccl_fetch(sd, dPdu) = tg;
ccl_fetch(sd, dPdv) = cross(tg, ccl_fetch(sd, Ng));
#endif
if(isect->object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
Transform tfm = sd->ob_tfm;
Transform tfm = ccl_fetch(sd, ob_tfm);
#else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
#endif

View File

@@ -50,12 +50,12 @@ ccl_device_inline int find_attribute_curve_motion(KernelGlobals *kg, int object,
ccl_device_inline void motion_curve_keys_for_step(KernelGlobals *kg, int offset, int numkeys, int numsteps, int step, int k0, int k1, float4 keys[2])
{
if(step == numsteps) {
/* center step: regular key location */
/* center step: regular vertex location */
keys[0] = kernel_tex_fetch(__curve_keys, k0);
keys[1] = kernel_tex_fetch(__curve_keys, k1);
}
else {
/* center step is not stored in this array */
/* center step not stored in this array */
if(step > numsteps)
step--;
@@ -97,14 +97,14 @@ ccl_device_inline void motion_curve_keys(KernelGlobals *kg, int object, int prim
ccl_device_inline void motion_cardinal_curve_keys_for_step(KernelGlobals *kg, int offset, int numkeys, int numsteps, int step, int k0, int k1, int k2, int k3, float4 keys[4])
{
if(step == numsteps) {
/* center step: regular key location */
/* center step: regular vertex location */
keys[0] = kernel_tex_fetch(__curve_keys, k0);
keys[1] = kernel_tex_fetch(__curve_keys, k1);
keys[2] = kernel_tex_fetch(__curve_keys, k2);
keys[3] = kernel_tex_fetch(__curve_keys, k3);
}
else {
/* center step is not stored in this array */
/* center step not store in this array */
if(step > numsteps)
step--;
@@ -118,12 +118,7 @@ ccl_device_inline void motion_cardinal_curve_keys_for_step(KernelGlobals *kg, in
}
/* return 2 curve key locations */
ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg,
int object,
int prim,
float time,
int k0, int k1, int k2, int k3,
float4 keys[4])
ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg, int object, int prim, float time, int k0, int k1, int k2, int k3, float4 keys[4])
{
/* get motion info */
int numsteps, numkeys;
@@ -152,65 +147,6 @@ ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg,
keys[3] = (1.0f - t)*keys[3] + t*next_keys[3];
}
#ifdef __KERNEL_AVX2__
/* Similar to above, but returns keys as pair of two AVX registers with each
* holding two float4.
*/
ccl_device_inline void motion_cardinal_curve_keys_avx(KernelGlobals *kg,
int object,
int prim,
float time,
int k0, int k1,
int k2, int k3,
avxf *out_keys_0_1,
avxf *out_keys_2_3)
{
/* Get motion info. */
int numsteps, numkeys;
object_motion_info(kg, object, &numsteps, NULL, &numkeys);
/* Figure out which steps we need to fetch and their interpolation factor. */
int maxstep = numsteps * 2;
int step = min((int)(time*maxstep), maxstep - 1);
float t = time*maxstep - step;
/* Find attribute. */
AttributeElement elem;
int offset = find_attribute_curve_motion(kg,
object,
ATTR_STD_MOTION_VERTEX_POSITION,
&elem);
kernel_assert(offset != ATTR_STD_NOT_FOUND);
/* Fetch key coordinates. */
float4 next_keys[4];
float4 keys[4];
motion_cardinal_curve_keys_for_step(kg,
offset,
numkeys,
numsteps,
step,
k0, k1, k2, k3,
keys);
motion_cardinal_curve_keys_for_step(kg,
offset,
numkeys,
numsteps,
step + 1,
k0, k1, k2, k3,
next_keys);
const avxf keys_0_1 = avxf(keys[0].m128, keys[1].m128);
const avxf keys_2_3 = avxf(keys[2].m128, keys[3].m128);
const avxf next_keys_0_1 = avxf(next_keys[0].m128, next_keys[1].m128);
const avxf next_keys_2_3 = avxf(next_keys[2].m128, next_keys[3].m128);
/* Interpolate between steps. */
*out_keys_0_1 = (1.0f - t) * keys_0_1 + t*next_keys_0_1;
*out_keys_2_3 = (1.0f - t) * keys_2_3 + t*next_keys_2_3;
}
#endif
#endif
CCL_NAMESPACE_END

View File

@@ -76,7 +76,7 @@ ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals *kg, uint4
normals[2] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z));
}
else {
/* center step is not stored in this array */
/* center step not stored in this array */
if(step > numsteps)
step--;
@@ -117,4 +117,312 @@ ccl_device_inline void motion_triangle_vertices(KernelGlobals *kg, int object, i
verts[2] = (1.0f - t)*verts[2] + t*next_verts[2];
}
/* Refine triangle intersection to more precise hit point. For rays that travel
* far the precision is often not so good, this reintersects the primitive from
* a closer distance. */
ccl_device_inline float3 motion_triangle_refine(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, float3 verts[3])
{
float3 P = ray->P;
float3 D = ray->D;
float t = isect->t;
#ifdef __INTERSECTION_REFINE__
if(isect->object != OBJECT_NONE) {
if(UNLIKELY(t == 0.0f)) {
return P;
}
# ifdef __OBJECT_MOTION__
Transform tfm = ccl_fetch(sd, ob_itfm);
# else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
# endif
P = transform_point(&tfm, P);
D = transform_direction(&tfm, D*t);
D = normalize_len(D, &t);
}
P = P + D*t;
/* compute refined intersection distance */
const float3 e1 = verts[0] - verts[2];
const float3 e2 = verts[1] - verts[2];
const float3 s1 = cross(D, e2);
const float invdivisor = 1.0f/dot(s1, e1);
const float3 d = P - verts[2];
const float3 s2 = cross(d, e1);
float rt = dot(e2, s2)*invdivisor;
/* compute refined position */
P = P + D*rt;
if(isect->object != OBJECT_NONE) {
# ifdef __OBJECT_MOTION__
Transform tfm = ccl_fetch(sd, ob_tfm);
# else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
# endif
P = transform_point(&tfm, P);
}
return P;
#else
return P + D*t;
#endif
}
/* Same as above, except that isect->t is assumed to be in object space for instancing */
#ifdef __SUBSURFACE__
# if defined(__KERNEL_CUDA__) && (defined(i386) || defined(_M_IX86))
ccl_device_noinline
# else
ccl_device_inline
# endif
float3 motion_triangle_refine_subsurface(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, float3 verts[3])
{
float3 P = ray->P;
float3 D = ray->D;
float t = isect->t;
# ifdef __INTERSECTION_REFINE__
if(isect->object != OBJECT_NONE) {
# ifdef __OBJECT_MOTION__
Transform tfm = ccl_fetch(sd, ob_itfm);
# else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
# endif
P = transform_point(&tfm, P);
D = transform_direction(&tfm, D);
D = normalize(D);
}
P = P + D*t;
/* compute refined intersection distance */
const float3 e1 = verts[0] - verts[2];
const float3 e2 = verts[1] - verts[2];
const float3 s1 = cross(D, e2);
const float invdivisor = 1.0f/dot(s1, e1);
const float3 d = P - verts[2];
const float3 s2 = cross(d, e1);
float rt = dot(e2, s2)*invdivisor;
P = P + D*rt;
if(isect->object != OBJECT_NONE) {
# ifdef __OBJECT_MOTION__
Transform tfm = ccl_fetch(sd, ob_tfm);
# else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
# endif
P = transform_point(&tfm, P);
}
return P;
# else
return P + D*t;
# endif
}
#endif
/* Setup of motion triangle specific parts of ShaderData, moved into this one
* function to more easily share computation of interpolated positions and
* normals */
/* return 3 triangle vertex normals */
ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, bool subsurface)
{
/* get shader */
ccl_fetch(sd, shader) = kernel_tex_fetch(__tri_shader, ccl_fetch(sd, prim));
/* get motion info */
int numsteps, numverts;
object_motion_info(kg, ccl_fetch(sd, object), &numsteps, &numverts, NULL);
/* figure out which steps we need to fetch and their interpolation factor */
int maxstep = numsteps*2;
int step = min((int)(ccl_fetch(sd, time)*maxstep), maxstep-1);
float t = ccl_fetch(sd, time)*maxstep - step;
/* find attribute */
AttributeElement elem;
int offset = find_attribute_motion(kg, ccl_fetch(sd, object), ATTR_STD_MOTION_VERTEX_POSITION, &elem);
kernel_assert(offset != ATTR_STD_NOT_FOUND);
/* fetch vertex coordinates */
float3 verts[3], next_verts[3];
uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim));
motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_verts);
/* interpolate between steps */
verts[0] = (1.0f - t)*verts[0] + t*next_verts[0];
verts[1] = (1.0f - t)*verts[1] + t*next_verts[1];
verts[2] = (1.0f - t)*verts[2] + t*next_verts[2];
/* compute refined position */
#ifdef __SUBSURFACE__
if(!subsurface)
#endif
ccl_fetch(sd, P) = motion_triangle_refine(kg, sd, isect, ray, verts);
#ifdef __SUBSURFACE__
else
ccl_fetch(sd, P) = motion_triangle_refine_subsurface(kg, sd, isect, ray, verts);
#endif
/* compute face normal */
float3 Ng;
if(ccl_fetch(sd, flag) & SD_NEGATIVE_SCALE_APPLIED)
Ng = normalize(cross(verts[2] - verts[0], verts[1] - verts[0]));
else
Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0]));
ccl_fetch(sd, Ng) = Ng;
ccl_fetch(sd, N) = Ng;
/* compute derivatives of P w.r.t. uv */
#ifdef __DPDU__
ccl_fetch(sd, dPdu) = (verts[0] - verts[2]);
ccl_fetch(sd, dPdv) = (verts[1] - verts[2]);
#endif
/* compute smooth normal */
if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) {
/* find attribute */
AttributeElement elem;
int offset = find_attribute_motion(kg, ccl_fetch(sd, object), ATTR_STD_MOTION_VERTEX_NORMAL, &elem);
kernel_assert(offset != ATTR_STD_NOT_FOUND);
/* fetch vertex coordinates */
float3 normals[3], next_normals[3];
motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals);
motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_normals);
/* interpolate between steps */
normals[0] = (1.0f - t)*normals[0] + t*next_normals[0];
normals[1] = (1.0f - t)*normals[1] + t*next_normals[1];
normals[2] = (1.0f - t)*normals[2] + t*next_normals[2];
/* interpolate between vertices */
float u = ccl_fetch(sd, u);
float v = ccl_fetch(sd, v);
float w = 1.0f - u - v;
ccl_fetch(sd, N) = (u*normals[0] + v*normals[1] + w*normals[2]);
}
}
/* Ray intersection. We simply compute the vertex positions at the given ray
* time and do a ray intersection with the resulting triangle */
ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg, Intersection *isect,
float3 P, float3 dir, float time, uint visibility, int object, int triAddr)
{
/* primitive index for vertex location lookup */
int prim = kernel_tex_fetch(__prim_index, triAddr);
int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, triAddr): object;
/* get vertex locations for intersection */
float3 verts[3];
motion_triangle_vertices(kg, fobject, prim, time, verts);
/* ray-triangle intersection, unoptimized */
float t, u, v;
if(ray_triangle_intersect_uv(P, dir, isect->t, verts[2], verts[0], verts[1], &u, &v, &t)) {
#ifdef __VISIBILITY_FLAG__
/* visibility flag test. we do it here under the assumption
* that most triangles are culled by node flags */
if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility)
#endif
{
isect->t = t;
isect->u = u;
isect->v = v;
isect->prim = triAddr;
isect->object = object;
isect->type = PRIMITIVE_MOTION_TRIANGLE;
return true;
}
}
return false;
}
/* Special ray intersection routines for subsurface scattering. In that case we
* only want to intersect with primitives in the same object, and if case of
* multiple hits we pick a single random primitive as the intersection point. */
#ifdef __SUBSURFACE__
ccl_device_inline void motion_triangle_intersect_subsurface(
KernelGlobals *kg,
SubsurfaceIntersection *ss_isect,
float3 P,
float3 dir,
float time,
int object,
int triAddr,
float tmax,
uint *lcg_state,
int max_hits)
{
/* primitive index for vertex location lookup */
int prim = kernel_tex_fetch(__prim_index, triAddr);
int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, triAddr): object;
/* get vertex locations for intersection */
float3 verts[3];
motion_triangle_vertices(kg, fobject, prim, time, verts);
/* ray-triangle intersection, unoptimized */
float t, u, v;
if(ray_triangle_intersect_uv(P, dir, tmax, verts[2], verts[0], verts[1], &u, &v, &t)) {
for(int i = min(max_hits, ss_isect->num_hits) - 1; i >= 0; --i) {
if(ss_isect->hits[i].t == t) {
return;
}
}
ss_isect->num_hits++;
int hit;
if(ss_isect->num_hits <= max_hits) {
hit = ss_isect->num_hits - 1;
}
else {
/* reservoir sampling: if we are at the maximum number of
* hits, randomly replace element or skip it */
hit = lcg_step_uint(lcg_state) % ss_isect->num_hits;
if(hit >= max_hits)
return;
}
/* record intersection */
Intersection *isect = &ss_isect->hits[hit];
isect->t = t;
isect->u = u;
isect->v = v;
isect->prim = triAddr;
isect->object = object;
isect->type = PRIMITIVE_MOTION_TRIANGLE;
/* Record geometric normal. */
ss_isect->Ng[hit] = normalize(cross(verts[1] - verts[0],
verts[2] - verts[0]));
}
}
#endif
CCL_NAMESPACE_END

View File

@@ -1,280 +0,0 @@
/*
* Copyright 2011-2016 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* Motion Triangle Primitive
*
* These are stored as regular triangles, plus extra positions and normals at
* times other than the frame center. Computing the triangle vertex positions
* or normals at a given ray time is a matter of interpolation of the two steps
* between which the ray time lies.
*
* The extra positions and normals are stored as ATTR_STD_MOTION_VERTEX_POSITION
* and ATTR_STD_MOTION_VERTEX_NORMAL mesh attributes.
*/
CCL_NAMESPACE_BEGIN
/* Refine triangle intersection to more precise hit point. For rays that travel
* far the precision is often not so good, this reintersects the primitive from
* a closer distance.
*/
ccl_device_inline float3 motion_triangle_refine(KernelGlobals *kg,
ShaderData *sd,
const Intersection *isect,
const Ray *ray,
float3 verts[3])
{
float3 P = ray->P;
float3 D = ray->D;
float t = isect->t;
#ifdef __INTERSECTION_REFINE__
if(isect->object != OBJECT_NONE) {
if(UNLIKELY(t == 0.0f)) {
return P;
}
# ifdef __OBJECT_MOTION__
Transform tfm = sd->ob_itfm;
# else
Transform tfm = object_fetch_transform(kg,
isect->object,
OBJECT_INVERSE_TRANSFORM);
# endif
P = transform_point(&tfm, P);
D = transform_direction(&tfm, D*t);
D = normalize_len(D, &t);
}
P = P + D*t;
/* Compute refined intersection distance. */
const float3 e1 = verts[0] - verts[2];
const float3 e2 = verts[1] - verts[2];
const float3 s1 = cross(D, e2);
const float invdivisor = 1.0f/dot(s1, e1);
const float3 d = P - verts[2];
const float3 s2 = cross(d, e1);
float rt = dot(e2, s2)*invdivisor;
/* Compute refined position. */
P = P + D*rt;
if(isect->object != OBJECT_NONE) {
# ifdef __OBJECT_MOTION__
Transform tfm = sd->ob_tfm;
# else
Transform tfm = object_fetch_transform(kg,
isect->object,
OBJECT_TRANSFORM);
# endif
P = transform_point(&tfm, P);
}
return P;
#else
return P + D*t;
#endif
}
/* Same as above, except that isect->t is assumed to be in object space
* for instancing.
*/
#ifdef __SUBSURFACE__
# if defined(__KERNEL_CUDA__) && (defined(i386) || defined(_M_IX86))
ccl_device_noinline
# else
ccl_device_inline
# endif
float3 motion_triangle_refine_subsurface(KernelGlobals *kg,
ShaderData *sd,
const Intersection *isect,
const Ray *ray,
float3 verts[3])
{
float3 P = ray->P;
float3 D = ray->D;
float t = isect->t;
# ifdef __INTERSECTION_REFINE__
if(isect->object != OBJECT_NONE) {
# ifdef __OBJECT_MOTION__
Transform tfm = sd->ob_itfm;
# else
Transform tfm = object_fetch_transform(kg,
isect->object,
OBJECT_INVERSE_TRANSFORM);
# endif
P = transform_point(&tfm, P);
D = transform_direction(&tfm, D);
D = normalize(D);
}
P = P + D*t;
/* compute refined intersection distance */
const float3 e1 = verts[0] - verts[2];
const float3 e2 = verts[1] - verts[2];
const float3 s1 = cross(D, e2);
const float invdivisor = 1.0f/dot(s1, e1);
const float3 d = P - verts[2];
const float3 s2 = cross(d, e1);
float rt = dot(e2, s2)*invdivisor;
P = P + D*rt;
if(isect->object != OBJECT_NONE) {
# ifdef __OBJECT_MOTION__
Transform tfm = sd->ob_tfm;
# else
Transform tfm = object_fetch_transform(kg,
isect->object,
OBJECT_TRANSFORM);
# endif
P = transform_point(&tfm, P);
}
return P;
# else /* __INTERSECTION_REFINE__ */
return P + D*t;
# endif /* __INTERSECTION_REFINE__ */
}
#endif /* __SUBSURFACE__ */
/* Ray intersection. We simply compute the vertex positions at the given ray
* time and do a ray intersection with the resulting triangle.
*/
ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg,
Intersection *isect,
float3 P,
float3 dir,
float time,
uint visibility,
int object,
int prim_addr)
{
/* Primitive index for vertex location lookup. */
int prim = kernel_tex_fetch(__prim_index, prim_addr);
int fobject = (object == OBJECT_NONE)
? kernel_tex_fetch(__prim_object, prim_addr)
: object;
/* Get vertex locations for intersection. */
float3 verts[3];
motion_triangle_vertices(kg, fobject, prim, time, verts);
/* Ray-triangle intersection, unoptimized. */
float t, u, v;
if(ray_triangle_intersect_uv(P,
dir,
isect->t,
verts[2], verts[0], verts[1],
&u, &v, &t))
{
#ifdef __VISIBILITY_FLAG__
/* Visibility flag test. we do it here under the assumption
* that most triangles are culled by node flags.
*/
if(kernel_tex_fetch(__prim_visibility, prim_addr) & visibility)
#endif
{
isect->t = t;
isect->u = u;
isect->v = v;
isect->prim = prim_addr;
isect->object = object;
isect->type = PRIMITIVE_MOTION_TRIANGLE;
return true;
}
}
return false;
}
/* Special ray intersection routines for subsurface scattering. In that case we
* only want to intersect with primitives in the same object, and if case of
* multiple hits we pick a single random primitive as the intersection point.
*/
#ifdef __SUBSURFACE__
ccl_device_inline void motion_triangle_intersect_subsurface(
KernelGlobals *kg,
SubsurfaceIntersection *ss_isect,
float3 P,
float3 dir,
float time,
int object,
int prim_addr,
float tmax,
uint *lcg_state,
int max_hits)
{
/* Primitive index for vertex location lookup. */
int prim = kernel_tex_fetch(__prim_index, prim_addr);
int fobject = (object == OBJECT_NONE)
? kernel_tex_fetch(__prim_object, prim_addr)
: object;
/* Get vertex locations for intersection. */
float3 verts[3];
motion_triangle_vertices(kg, fobject, prim, time, verts);
/* Ray-triangle intersection, unoptimized. */
float t, u, v;
if(ray_triangle_intersect_uv(P,
dir,
tmax,
verts[2], verts[0], verts[1],
&u, &v, &t))
{
for(int i = min(max_hits, ss_isect->num_hits) - 1; i >= 0; --i) {
if(ss_isect->hits[i].t == t) {
return;
}
}
ss_isect->num_hits++;
int hit;
if(ss_isect->num_hits <= max_hits) {
hit = ss_isect->num_hits - 1;
}
else {
/* Reservoir sampling: if we are at the maximum number of
* hits, randomly replace element or skip it.
*/
hit = lcg_step_uint(lcg_state) % ss_isect->num_hits;
if(hit >= max_hits)
return;
}
/* Record intersection. */
Intersection *isect = &ss_isect->hits[hit];
isect->t = t;
isect->u = u;
isect->v = v;
isect->prim = prim_addr;
isect->object = object;
isect->type = PRIMITIVE_MOTION_TRIANGLE;
/* Record geometric normal. */
ss_isect->Ng[hit] = normalize(cross(verts[1] - verts[0],
verts[2] - verts[0]));
}
}
#endif /* __SUBSURFACE__ */
CCL_NAMESPACE_END

View File

@@ -1,123 +0,0 @@
/*
* Copyright 2011-2016 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* Motion Triangle Primitive
*
* These are stored as regular triangles, plus extra positions and normals at
* times other than the frame center. Computing the triangle vertex positions
* or normals at a given ray time is a matter of interpolation of the two steps
* between which the ray time lies.
*
* The extra positions and normals are stored as ATTR_STD_MOTION_VERTEX_POSITION
* and ATTR_STD_MOTION_VERTEX_NORMAL mesh attributes.
*/
CCL_NAMESPACE_BEGIN
/* Setup of motion triangle specific parts of ShaderData, moved into this one
* function to more easily share computation of interpolated positions and
* normals */
/* return 3 triangle vertex normals */
ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg,
ShaderData *sd, const
Intersection *isect,
const Ray *ray,
bool subsurface)
{
/* Get shader. */
sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
/* Get motion info. */
/* TODO(sergey): This logic is really similar to motion_triangle_vertices(),
* can we de-duplicate something here?
*/
int numsteps, numverts;
object_motion_info(kg, sd->object, &numsteps, &numverts, NULL);
/* Figure out which steps we need to fetch and their interpolation factor. */
int maxstep = numsteps*2;
int step = min((int)(sd->time*maxstep), maxstep-1);
float t = sd->time*maxstep - step;
/* Find attribute. */
AttributeElement elem;
int offset = find_attribute_motion(kg, sd->object,
ATTR_STD_MOTION_VERTEX_POSITION,
&elem);
kernel_assert(offset != ATTR_STD_NOT_FOUND);
/* Fetch vertex coordinates. */
float3 verts[3], next_verts[3];
uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_verts);
/* Interpolate between steps. */
verts[0] = (1.0f - t)*verts[0] + t*next_verts[0];
verts[1] = (1.0f - t)*verts[1] + t*next_verts[1];
verts[2] = (1.0f - t)*verts[2] + t*next_verts[2];
/* Compute refined position. */
#ifdef __SUBSURFACE__
if(subsurface) {
sd->P = motion_triangle_refine_subsurface(kg,
sd,
isect,
ray,
verts);
}
else
#endif /* __SUBSURFACE__*/
{
sd->P = motion_triangle_refine(kg, sd, isect, ray, verts);
}
/* Compute face normal. */
float3 Ng;
if(sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
Ng = normalize(cross(verts[2] - verts[0], verts[1] - verts[0]));
}
else {
Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0]));
}
sd->Ng = Ng;
sd->N = Ng;
/* Compute derivatives of P w.r.t. uv. */
#ifdef __DPDU__
sd->dPdu = (verts[0] - verts[2]);
sd->dPdv = (verts[1] - verts[2]);
#endif
/* Compute smooth normal. */
if(sd->shader & SHADER_SMOOTH_NORMAL) {
/* Find attribute. */
AttributeElement elem;
int offset = find_attribute_motion(kg,
sd->object,
ATTR_STD_MOTION_VERTEX_NORMAL,
&elem);
kernel_assert(offset != ATTR_STD_NOT_FOUND);
/* Fetch vertex coordinates. */
float3 normals[3], next_normals[3];
motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals);
motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_normals);
/* Interpolate between steps. */
normals[0] = (1.0f - t)*normals[0] + t*next_normals[0];
normals[1] = (1.0f - t)*normals[1] + t*next_normals[1];
normals[2] = (1.0f - t)*normals[2] + t*next_normals[2];
/* Interpolate between vertices. */
float u = sd->u;
float v = sd->v;
float w = 1.0f - u - v;
sd->N = (u*normals[0] + v*normals[1] + w*normals[2]);
}
}
CCL_NAMESPACE_END

Some files were not shown because too many files have changed in this diff Show More