Merge branch 'master' into cycles_disney_brdf
This commit is contained in:
@@ -404,7 +404,7 @@ option(WITH_CYCLES_STANDALONE_GUI "Build Cycles standalone with GUI" OFF)
|
||||
option(WITH_CYCLES_OSL "Build Cycles with OSL support" ${_init_CYCLES_OSL})
|
||||
option(WITH_CYCLES_OPENSUBDIV "Build Cycles with OpenSubdiv support" ${_init_CYCLES_OPENSUBDIV})
|
||||
option(WITH_CYCLES_CUDA_BINARIES "Build Cycles CUDA binaries" OFF)
|
||||
set(CYCLES_CUDA_BINARIES_ARCH sm_20 sm_21 sm_30 sm_35 sm_37 sm_50 sm_52 CACHE STRING "CUDA architectures to build binaries for")
|
||||
set(CYCLES_CUDA_BINARIES_ARCH sm_20 sm_21 sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 CACHE STRING "CUDA architectures to build binaries for")
|
||||
mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
|
||||
unset(PLATFORM_DEFAULT)
|
||||
option(WITH_CYCLES_LOGGING "Build Cycles with logging support" ON)
|
||||
@@ -629,9 +629,21 @@ if(APPLE)
|
||||
set(CMAKE_FIND_ROOT_PATH ${CMAKE_OSX_SYSROOT})
|
||||
endif()
|
||||
|
||||
if(NOT CMAKE_OSX_DEPLOYMENT_TARGET)
|
||||
# 10.6 is our min. target, if you use higher sdk, weak linking happens
|
||||
set(CMAKE_OSX_DEPLOYMENT_TARGET "10.6" CACHE STRING "" FORCE)
|
||||
if(WITH_CXX11)
|
||||
# 10.9 is our min. target, if you use higher sdk, weak linking happens
|
||||
if(CMAKE_OSX_DEPLOYMENT_TARGET)
|
||||
if(${CMAKE_OSX_DEPLOYMENT_TARGET} VERSION_LESS 10.9)
|
||||
message(STATUS "Setting deployment target to 10.9, lower versions are incompatible with WITH_CXX11")
|
||||
set(CMAKE_OSX_DEPLOYMENT_TARGET "10.9" CACHE STRING "" FORCE)
|
||||
endif()
|
||||
else()
|
||||
set(CMAKE_OSX_DEPLOYMENT_TARGET "10.9" CACHE STRING "" FORCE)
|
||||
endif()
|
||||
else()
|
||||
if(NOT CMAKE_OSX_DEPLOYMENT_TARGET)
|
||||
# 10.6 is our min. target, if you use higher sdk, weak linking happens
|
||||
set(CMAKE_OSX_DEPLOYMENT_TARGET "10.6" CACHE STRING "" FORCE)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT ${CMAKE_GENERATOR} MATCHES "Xcode")
|
||||
@@ -974,7 +986,7 @@ if(SUPPORT_SSE_BUILD)
|
||||
add_definitions(-D__SSE__ -D__MMX__)
|
||||
endif()
|
||||
if(SUPPORT_SSE2_BUILD)
|
||||
set(PLATFORM_CFLAGS " ${COMPILER_SSE2_FLAG} ${PLATFORM_CFLAGS}")
|
||||
set(PLATFORM_CFLAGS " ${PLATFORM_CFLAGS} ${COMPILER_SSE2_FLAG}")
|
||||
add_definitions(-D__SSE2__)
|
||||
if(NOT SUPPORT_SSE_BUILD) # dont double up
|
||||
add_definitions(-D__MMX__)
|
||||
|
||||
@@ -25,7 +25,8 @@
|
||||
ARGS=$( \
|
||||
getopt \
|
||||
-o s:i:t:h \
|
||||
--long source:,install:,tmp:,info:,threads:,help,show-deps,no-sudo,no-build,no-confirm,with-all,with-opencollada,\
|
||||
--long source:,install:,tmp:,info:,threads:,help,show-deps,no-sudo,no-build,no-confirm,use-cxx11,\
|
||||
with-all,with-opencollada,\
|
||||
ver-ocio:,ver-oiio:,ver-llvm:,ver-osl:,ver-osd:,ver-openvdb:,\
|
||||
force-all,force-python,force-numpy,force-boost,\
|
||||
force-ocio,force-openexr,force-oiio,force-llvm,force-osl,force-osd,force-openvdb,\
|
||||
@@ -103,6 +104,11 @@ ARGUMENTS_INFO="\"COMMAND LINE ARGUMENTS:
|
||||
--no-confirm
|
||||
Disable any interaction with user (suitable for automated run).
|
||||
|
||||
--use-cxx11
|
||||
Build all libraries in cpp11 'mode' (will be mandatory soon in blender2.8 branch).
|
||||
NOTE: If your compiler is gcc-6.0 or above, you probably *want* to enable this option (since it's default
|
||||
standard starting from this version).
|
||||
|
||||
--with-all
|
||||
By default, a number of optional and not-so-often needed libraries are not installed.
|
||||
This option will try to install them, at the cost of potential conflicts (depending on
|
||||
@@ -281,6 +287,7 @@ SUDO="sudo"
|
||||
|
||||
NO_BUILD=false
|
||||
NO_CONFIRM=false
|
||||
USE_CXX11=false
|
||||
|
||||
PYTHON_VERSION="3.5.1"
|
||||
PYTHON_VERSION_MIN="3.5"
|
||||
@@ -492,6 +499,9 @@ while true; do
|
||||
--no-confirm)
|
||||
NO_CONFIRM=true; shift; continue
|
||||
;;
|
||||
--use-cxx11)
|
||||
USE_CXX11=true; shift; continue
|
||||
;;
|
||||
--with-all)
|
||||
WITH_ALL=true; shift; continue
|
||||
;;
|
||||
@@ -766,7 +776,18 @@ OPENCOLLADA_REPO_BRANCH="master"
|
||||
|
||||
FFMPEG_SOURCE=( "http://ffmpeg.org/releases/ffmpeg-$FFMPEG_VERSION.tar.bz2" )
|
||||
|
||||
CXXFLAGS_BACK=$CXXFLAGS
|
||||
if [ "$USE_CXX11" = true ]; then
|
||||
WARNING "You are trying to use c++11, this *should* go smoothely with any very recent distribution
|
||||
However, if you are experiencing linking errors (also when building Blender itself), please try the following:
|
||||
* Re-run this script with `--build-all --force-all` options.
|
||||
* Ensure your gcc version is at the very least 4.8, if possible you should really rather use gcc-5.1 or above.
|
||||
|
||||
Please note that until the transition to C++11-built libraries if completed in your distribution, situation will
|
||||
remain fuzzy and incompatibilities may happen..."
|
||||
CXXFLAGS="$CXXFLAGS -std=c++11"
|
||||
export CXXFLAGS
|
||||
fi
|
||||
|
||||
#### Show Dependencies ####
|
||||
|
||||
@@ -779,7 +800,7 @@ Those libraries should be available as packages in all recent distributions (opt
|
||||
* libjpeg, libpng, libtiff, [libopenjpeg], [libopenal].
|
||||
* libx11, libxcursor, libxi, libxrandr, libxinerama (and other libx... as needed).
|
||||
* libsqlite3, libbz2, libssl, libfftw3, libxml2, libtinyxml, yasm, libyaml-cpp.
|
||||
* libsdl1.2, libglew, libglewmx.\""
|
||||
* libsdl1.2, libglew, [libglewmx].\""
|
||||
|
||||
DEPS_SPECIFIC_INFO="\"BUILDABLE DEPENDENCIES:
|
||||
|
||||
@@ -953,7 +974,7 @@ prepare_opt() {
|
||||
|
||||
# Check whether the current package needs to be recompiled, based on a dummy file containing a magic number in its name...
|
||||
magic_compile_check() {
|
||||
if [ -f $INST/.$1-magiccheck-$2 ]; then
|
||||
if [ -f $INST/.$1-magiccheck-$2-$USE_CXX11 ]; then
|
||||
return 0
|
||||
else
|
||||
return 1
|
||||
@@ -962,7 +983,7 @@ magic_compile_check() {
|
||||
|
||||
magic_compile_set() {
|
||||
rm -f $INST/.$1-magiccheck-*
|
||||
touch $INST/.$1-magiccheck-$2
|
||||
touch $INST/.$1-magiccheck-$2-$USE_CXX11
|
||||
}
|
||||
|
||||
# Note: should clean nicely in $INST, but not in $SRC, when we switch to a new version of a lib...
|
||||
@@ -1622,6 +1643,10 @@ compile_OIIO() {
|
||||
# fi
|
||||
cmake_d="$cmake_d -D USE_OCIO=OFF"
|
||||
|
||||
if [ "$USE_CXX11" = true ]; then
|
||||
cmake_d="$cmake_d -D OIIO_BUILD_CPP11=ON"
|
||||
fi
|
||||
|
||||
if file /bin/cp | grep -q '32-bit'; then
|
||||
cflags="-fPIC -m32 -march=i686"
|
||||
else
|
||||
@@ -2562,8 +2587,9 @@ install_DEB() {
|
||||
git libfreetype6-dev libx11-dev flex bison libtbb-dev libxxf86vm-dev \
|
||||
libxcursor-dev libxi-dev wget libsqlite3-dev libxrandr-dev libxinerama-dev \
|
||||
libbz2-dev libncurses5-dev libssl-dev liblzma-dev libreadline-dev $OPENJPEG_DEV \
|
||||
libopenal-dev libglew-dev libglewmx-dev yasm $THEORA_DEV $VORBIS_DEV $OGG_DEV \
|
||||
libopenal-dev libglew-dev yasm $THEORA_DEV $VORBIS_DEV $OGG_DEV \
|
||||
libsdl1.2-dev libfftw3-dev patch bzip2 libxml2-dev libtinyxml-dev libjemalloc-dev"
|
||||
# libglewmx-dev (broken in deb testing currently...)
|
||||
|
||||
OPENJPEG_USE=true
|
||||
VORBIS_USE=true
|
||||
@@ -4164,6 +4190,12 @@ print_info() {
|
||||
_buildargs="$_buildargs -U *OPENCOLORIO* -U *OPENEXR* -U *OPENIMAGEIO* -U *LLVM* -U *CYCLES*"
|
||||
_buildargs="$_buildargs -U *OPENSUBDIV* -U *OPENVDB* -U *COLLADA* -U *FFMPEG* -U *ALEMBIC*"
|
||||
|
||||
if [ "$USE_CXX11" = true ]; then
|
||||
_1="-D WITH_CXX11=ON"
|
||||
PRINT " $_1"
|
||||
_buildargs="$_buildargs $_1"
|
||||
fi
|
||||
|
||||
_1="-D WITH_CODEC_SNDFILE=ON"
|
||||
PRINT " $_1"
|
||||
_buildargs="$_buildargs $_1"
|
||||
@@ -4327,3 +4359,6 @@ PRINT ""
|
||||
# Switch back to user language.
|
||||
LANG=LANG_BACK
|
||||
export LANG
|
||||
|
||||
CXXFLAGS=$CXXFLAGS_BACK
|
||||
export CXXFLAGS
|
||||
|
||||
@@ -72,8 +72,7 @@ if 'cmake' in builder:
|
||||
# Set up OSX architecture
|
||||
if builder.endswith('x86_64_10_6_cmake'):
|
||||
cmake_extra_options.append('-DCMAKE_OSX_ARCHITECTURES:STRING=x86_64')
|
||||
cmake_extra_options.append('-DCUDA_NVCC_EXECUTABLE=/usr/local/cuda-hack/bin/nvcc')
|
||||
cmake_extra_options.append('-DCUDA_NVCC8_EXECUTABLE=/usr/local/cuda8-hack/bin/nvcc')
|
||||
cmake_extra_options.append('-DCUDA_NVCC_EXECUTABLE=/usr/local/cuda8-hack/bin/nvcc')
|
||||
cmake_extra_options.append('-DWITH_CODEC_QUICKTIME=OFF')
|
||||
cmake_extra_options.append('-DCMAKE_OSX_DEPLOYMENT_TARGET=10.6')
|
||||
build_cubins = False
|
||||
@@ -94,8 +93,7 @@ if 'cmake' in builder:
|
||||
elif builder.startswith('win32'):
|
||||
bits = 32
|
||||
cmake_options.extend(['-G', 'Visual Studio 12 2013'])
|
||||
cmake_extra_options.append('-DCUDA_NVCC_EXECUTABLE:FILEPATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v7.5/bin/nvcc.exe')
|
||||
cmake_extra_options.append('-DCUDA_NVCC8_EXECUTABLE:FILEPATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/bin/nvcc.exe')
|
||||
cmake_extra_options.append('-DCUDA_NVCC_EXECUTABLE:FILEPATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/bin/nvcc.exe')
|
||||
|
||||
elif builder.startswith('linux'):
|
||||
tokens = builder.split("_")
|
||||
@@ -115,8 +113,7 @@ if 'cmake' in builder:
|
||||
cuda_chroot_name = 'buildbot_' + deb_name + '_x86_64'
|
||||
targets = ['player', 'blender', 'cuda']
|
||||
|
||||
cmake_extra_options.append('-DCUDA_NVCC_EXECUTABLE=/usr/local/cuda-7.5/bin/nvcc')
|
||||
cmake_extra_options.append('-DCUDA_NVCC8_EXECUTABLE=/usr/local/cuda-8.0/bin/nvcc')
|
||||
cmake_extra_options.append('-DCUDA_NVCC_EXECUTABLE=/usr/local/cuda-8.0/bin/nvcc')
|
||||
|
||||
cmake_options.append("-C" + os.path.join(blender_dir, cmake_config_file))
|
||||
|
||||
|
||||
@@ -97,6 +97,8 @@ if(WIN32)
|
||||
endif()
|
||||
|
||||
set(CPACK_PACKAGE_EXECUTABLES "blender" "blender")
|
||||
set(CPACK_CREATE_DESKTOP_LINKS "blender" "blender")
|
||||
|
||||
include(CPack)
|
||||
|
||||
# Target for build_archive.py script, to automatically pass along
|
||||
|
||||
@@ -24,7 +24,11 @@
|
||||
# Libraries configuration for Apple.
|
||||
|
||||
if(NOT DEFINED LIBDIR)
|
||||
set(LIBDIR ${CMAKE_SOURCE_DIR}/../lib/darwin-9.x.universal)
|
||||
if(WITH_CXX11)
|
||||
set(LIBDIR ${CMAKE_SOURCE_DIR}/../lib/darwin)
|
||||
else()
|
||||
set(LIBDIR ${CMAKE_SOURCE_DIR}/../lib/darwin-9.x.universal)
|
||||
endif()
|
||||
else()
|
||||
message(STATUS "Using pre-compiled LIBDIR: ${LIBDIR}")
|
||||
endif()
|
||||
@@ -74,7 +78,7 @@ if(WITH_CODEC_SNDFILE)
|
||||
set(SNDFILE ${LIBDIR}/sndfile)
|
||||
set(SNDFILE_INCLUDE_DIRS ${SNDFILE}/include)
|
||||
set(SNDFILE_LIBRARIES sndfile FLAC ogg vorbis vorbisenc)
|
||||
set(SNDFILE_LIBPATH ${SNDFILE}/lib ${FFMPEG}/lib) # TODO, deprecate
|
||||
set(SNDFILE_LIBPATH ${SNDFILE}/lib ${LIBDIR}/ffmpeg/lib) # TODO, deprecate
|
||||
endif()
|
||||
|
||||
if(WITH_PYTHON)
|
||||
@@ -132,7 +136,17 @@ if(WITH_IMAGE_OPENEXR)
|
||||
set(OPENEXR ${LIBDIR}/openexr)
|
||||
set(OPENEXR_INCLUDE_DIR ${OPENEXR}/include)
|
||||
set(OPENEXR_INCLUDE_DIRS ${OPENEXR_INCLUDE_DIR} ${OPENEXR}/include/OpenEXR)
|
||||
set(OPENEXR_LIBRARIES Iex Half IlmImf Imath IlmThread)
|
||||
if(WITH_CXX11)
|
||||
set(OPENEXR_POSTFIX -2_2)
|
||||
else()
|
||||
set(OPENEXR_POSTFIX)
|
||||
endif()
|
||||
set(OPENEXR_LIBRARIES
|
||||
Iex${OPENEXR_POSTFIX}
|
||||
Half
|
||||
IlmImf${OPENEXR_POSTFIX}
|
||||
Imath${OPENEXR_POSTFIX}
|
||||
IlmThread${OPENEXR_POSTFIX})
|
||||
set(OPENEXR_LIBPATH ${OPENEXR}/lib)
|
||||
endif()
|
||||
|
||||
@@ -143,9 +157,22 @@ if(WITH_CODEC_FFMPEG)
|
||||
avcodec avdevice avformat avutil
|
||||
mp3lame swscale x264 xvidcore theora theoradec theoraenc vorbis vorbisenc vorbisfile ogg
|
||||
)
|
||||
if(WITH_CXX11)
|
||||
set(FFMPEG_LIBRARIES ${FFMPEG_LIBRARIES} schroedinger orc vpx)
|
||||
endif()
|
||||
set(FFMPEG_LIBPATH ${FFMPEG}/lib)
|
||||
endif()
|
||||
|
||||
if(WITH_OPENJPEG OR WITH_CODEC_FFMPEG)
|
||||
# use openjpeg from libdir that is linked into ffmpeg
|
||||
if(WITH_CXX11)
|
||||
set(OPENJPEG ${LIBDIR}/openjpeg)
|
||||
set(WITH_SYSTEM_OPENJPEG ON)
|
||||
set(OPENJPEG_INCLUDE_DIRS ${OPENJPEG}/include)
|
||||
set(OPENJPEG_LIBRARIES ${OPENJPEG}/lib/libopenjpeg.a)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
find_library(SYSTEMSTUBS_LIBRARY
|
||||
NAMES
|
||||
SystemStubs
|
||||
@@ -223,7 +250,11 @@ if(WITH_SDL)
|
||||
set(SDL_INCLUDE_DIR ${SDL}/include)
|
||||
set(SDL_LIBRARY SDL2)
|
||||
set(SDL_LIBPATH ${SDL}/lib)
|
||||
set(PLATFORM_LINKFLAGS "${PLATFORM_LINKFLAGS} -lazy_framework ForceFeedback")
|
||||
if(WITH_CXX11)
|
||||
set(PLATFORM_LINKFLAGS "${PLATFORM_LINKFLAGS} -framework ForceFeedback")
|
||||
else()
|
||||
set(PLATFORM_LINKFLAGS "${PLATFORM_LINKFLAGS} -lazy_framework ForceFeedback")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(PNG "${LIBDIR}/png")
|
||||
@@ -244,22 +275,27 @@ endif()
|
||||
if(WITH_BOOST)
|
||||
set(BOOST ${LIBDIR}/boost)
|
||||
set(BOOST_INCLUDE_DIR ${BOOST}/include)
|
||||
if(WITH_CXX11)
|
||||
set(BOOST_POSTFIX)
|
||||
else()
|
||||
set(BOOST_POSTFIX -mt)
|
||||
endif()
|
||||
set(BOOST_LIBRARIES
|
||||
boost_date_time-mt
|
||||
boost_filesystem-mt
|
||||
boost_regex-mt
|
||||
boost_system-mt
|
||||
boost_thread-mt
|
||||
boost_wave-mt
|
||||
boost_date_time${BOOST_POSTFIX}
|
||||
boost_filesystem${BOOST_POSTFIX}
|
||||
boost_regex${BOOST_POSTFIX}
|
||||
boost_system${BOOST_POSTFIX}
|
||||
boost_thread${BOOST_POSTFIX}
|
||||
boost_wave${BOOST_POSTFIX}
|
||||
)
|
||||
if(WITH_INTERNATIONAL)
|
||||
list(APPEND BOOST_LIBRARIES boost_locale-mt)
|
||||
list(APPEND BOOST_LIBRARIES boost_locale${BOOST_POSTFIX})
|
||||
endif()
|
||||
if(WITH_CYCLES_NETWORK)
|
||||
list(APPEND BOOST_LIBRARIES boost_serialization-mt)
|
||||
list(APPEND BOOST_LIBRARIES boost_serialization${BOOST_POSTFIX})
|
||||
endif()
|
||||
if(WITH_OPENVDB)
|
||||
list(APPEND BOOST_LIBRARIES boost_iostreams-mt)
|
||||
list(APPEND BOOST_LIBRARIES boost_iostreams${BOOST_POSTFIX})
|
||||
endif()
|
||||
set(BOOST_LIBPATH ${BOOST}/lib)
|
||||
set(BOOST_DEFINITIONS)
|
||||
|
||||
@@ -6,10 +6,10 @@
|
||||
BASE_DIR="$PWD"
|
||||
|
||||
blender_srcdir=$(dirname -- $0)/../..
|
||||
blender_version=$(grep "BLENDER_VERSION\s" "$blender_srcdir/source/blender/blenkernel/BKE_blender.h" | awk '{print $3}')
|
||||
blender_version_char=$(grep "BLENDER_VERSION_CHAR\s" "$blender_srcdir/source/blender/blenkernel/BKE_blender.h" | awk '{print $3}')
|
||||
blender_version_cycle=$(grep "BLENDER_VERSION_CYCLE\s" "$blender_srcdir/source/blender/blenkernel/BKE_blender.h" | awk '{print $3}')
|
||||
blender_subversion=$(grep "BLENDER_SUBVERSION\s" "$blender_srcdir/source/blender/blenkernel/BKE_blender.h" | awk '{print $3}')
|
||||
blender_version=$(grep "BLENDER_VERSION\s" "$blender_srcdir/source/blender/blenkernel/BKE_blender_version.h" | awk '{print $3}')
|
||||
blender_version_char=$(grep "BLENDER_VERSION_CHAR\s" "$blender_srcdir/source/blender/blenkernel/BKE_blender_version.h" | awk '{print $3}')
|
||||
blender_version_cycle=$(grep "BLENDER_VERSION_CYCLE\s" "$blender_srcdir/source/blender/blenkernel/BKE_blender_version.h" | awk '{print $3}')
|
||||
blender_subversion=$(grep "BLENDER_SUBVERSION\s" "$blender_srcdir/source/blender/blenkernel/BKE_blender_version.h" | awk '{print $3}')
|
||||
|
||||
if [ "$blender_version_cycle" = "release" ] ; then
|
||||
VERSION=$(expr $blender_version / 100).$(expr $blender_version % 100)$blender_version_char
|
||||
|
||||
@@ -4,7 +4,7 @@ Persistent Handler Example
|
||||
|
||||
By default handlers are freed when loading new files, in some cases you may
|
||||
wan't the handler stay running across multiple files (when the handler is
|
||||
part of an addon for example).
|
||||
part of an add-on for example).
|
||||
|
||||
For this the :data:`bpy.app.handlers.persistent` decorator needs to be used.
|
||||
"""
|
||||
|
||||
@@ -5,7 +5,7 @@ Intro
|
||||
.. warning::
|
||||
|
||||
Most of this object should only be useful if you actually manipulate i18n stuff from Python.
|
||||
If you are a regular addon, you should only bother about :const:`contexts` member,
|
||||
If you are a regular add-on, you should only bother about :const:`contexts` member,
|
||||
and the :func:`register`/:func:`unregister` functions! The :func:`pgettext` family of functions
|
||||
should only be used in rare, specific cases (like e.g. complex "composited" UI strings...).
|
||||
|
||||
@@ -21,7 +21,7 @@ Intro
|
||||
Then, call ``bpy.app.translations.register(__name__, your_dict)`` in your ``register()`` function, and
|
||||
``bpy.app.translations.unregister(__name__)`` in your ``unregister()`` one.
|
||||
|
||||
The ``Manage UI translations`` addon has several functions to help you collect strings to translate, and
|
||||
The ``Manage UI translations`` add-on has several functions to help you collect strings to translate, and
|
||||
generate the needed python code (the translation dictionary), as well as optional intermediary po files
|
||||
if you want some... See
|
||||
`How to Translate Blender <http://wiki.blender.org/index.php/Dev:Doc/Process/Translate_Blender>`_ and
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
bl_info = {
|
||||
"name": "Example Addon Preferences",
|
||||
"name": "Example Add-on Preferences",
|
||||
"author": "Your Name Here",
|
||||
"version": (1, 0),
|
||||
"blender": (2, 65, 0),
|
||||
"location": "SpaceBar Search -> Addon Preferences Example",
|
||||
"description": "Example Addon",
|
||||
"location": "SpaceBar Search -> Add-on Preferences Example",
|
||||
"description": "Example Add-on",
|
||||
"warning": "",
|
||||
"wiki_url": "",
|
||||
"tracker_url": "",
|
||||
@@ -18,7 +18,7 @@ from bpy.props import StringProperty, IntProperty, BoolProperty
|
||||
|
||||
|
||||
class ExampleAddonPreferences(AddonPreferences):
|
||||
# this must match the addon name, use '__package__'
|
||||
# this must match the add-on name, use '__package__'
|
||||
# when defining this in a submodule of a python package.
|
||||
bl_idname = __name__
|
||||
|
||||
@@ -37,7 +37,7 @@ class ExampleAddonPreferences(AddonPreferences):
|
||||
|
||||
def draw(self, context):
|
||||
layout = self.layout
|
||||
layout.label(text="This is a preferences view for our addon")
|
||||
layout.label(text="This is a preferences view for our add-on")
|
||||
layout.prop(self, "filepath")
|
||||
layout.prop(self, "number")
|
||||
layout.prop(self, "boolean")
|
||||
@@ -46,7 +46,7 @@ class ExampleAddonPreferences(AddonPreferences):
|
||||
class OBJECT_OT_addon_prefs_example(Operator):
|
||||
"""Display example preferences"""
|
||||
bl_idname = "object.addon_prefs_example"
|
||||
bl_label = "Addon Preferences Example"
|
||||
bl_label = "Add-on Preferences Example"
|
||||
bl_options = {'REGISTER', 'UNDO'}
|
||||
|
||||
def execute(self, context):
|
||||
|
||||
@@ -2,9 +2,9 @@
|
||||
Extending Menus
|
||||
+++++++++++++++
|
||||
|
||||
When creating menus for addons you can't reference menus in Blender's default
|
||||
scripts.
|
||||
Instead, the addon can add menu items to existing menus.
|
||||
When creating menus for add-ons you can't reference menus
|
||||
in Blender's default scripts.
|
||||
Instead, the add-on can add menu items to existing menus.
|
||||
|
||||
The function menu_draw acts like :class:`Menu.draw`.
|
||||
"""
|
||||
|
||||
@@ -13,7 +13,7 @@ be animated, accessed from the user interface and from python.
|
||||
definitions are not, this means whenever you load blender the class needs
|
||||
to be registered too.
|
||||
|
||||
This is best done by creating an addon which loads on startup and registers
|
||||
This is best done by creating an add-on which loads on startup and registers
|
||||
your properties.
|
||||
|
||||
.. note::
|
||||
|
||||
@@ -77,22 +77,22 @@ To run as modules:
|
||||
- The obvious way, ``import some_module`` command from the text window or interactive console.
|
||||
- Open as a text block and tick "Register" option, this will load with the blend file.
|
||||
- copy into one of the directories ``scripts/startup``, where they will be automatically imported on startup.
|
||||
- define as an addon, enabling the addon will load it as a Python module.
|
||||
- define as an add-on, enabling the add-on will load it as a Python module.
|
||||
|
||||
|
||||
Addons
|
||||
Add-ons
|
||||
------
|
||||
|
||||
Some of Blenders functionality is best kept optional,
|
||||
alongside scripts loaded at startup we have addons which are kept in their own directory ``scripts/addons``,
|
||||
alongside scripts loaded at startup we have add-ons which are kept in their own directory ``scripts/addons``,
|
||||
and only load on startup if selected from the user preferences.
|
||||
|
||||
The only difference between addons and built-in Python modules is that addons must contain a ``bl_info``
|
||||
The only difference between add-ons and built-in Python modules is that add-ons must contain a ``bl_info``
|
||||
variable which Blender uses to read metadata such as name, author, category and URL.
|
||||
|
||||
The user preferences addon listing uses **bl_info** to display information about each addon.
|
||||
The User Preferences add-on listing uses **bl_info** to display information about each add-on.
|
||||
|
||||
`See Addons <http://wiki.blender.org/index.php/Dev:2.5/Py/Scripts/Guidelines/Addons>`__
|
||||
`See Add-ons <http://wiki.blender.org/index.php/Dev:2.5/Py/Scripts/Guidelines/Addons>`__
|
||||
for details on the ``bl_info`` dictionary.
|
||||
|
||||
|
||||
@@ -223,7 +223,7 @@ These functions usually appear at the bottom of the script containing class regi
|
||||
You can also use them for internal purposes setting up data for your own tools but take care
|
||||
since register won't re-run when a new blend file is loaded.
|
||||
|
||||
The register/unregister calls are used so it's possible to toggle addons and reload scripts while Blender runs.
|
||||
The register/unregister calls are used so it's possible to toggle add-ons and reload scripts while Blender runs.
|
||||
If the register calls were placed in the body of the script, registration would be called on import,
|
||||
meaning there would be no distinction between importing a module or loading its classes into Blender.
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
|
||||
Addon Tutorial
|
||||
##############
|
||||
Add-on Tutorial
|
||||
###############
|
||||
|
||||
************
|
||||
Introduction
|
||||
@@ -36,6 +36,7 @@ Suggested reading before starting this tutorial.
|
||||
To best troubleshoot any error message Python prints while writing scripts you run blender with from a terminal,
|
||||
see :ref:`Use The Terminal <use_the_terminal>`.
|
||||
|
||||
|
||||
Documentation Links
|
||||
===================
|
||||
|
||||
@@ -46,51 +47,48 @@ While going through the tutorial you may want to look into our reference documen
|
||||
- :mod:`bpy.context` api reference. -
|
||||
*Handy to have a list of available items your script may operate on.*
|
||||
- :class:`bpy.types.Operator`. -
|
||||
*The following addons define operators, these docs give details and more examples of operators.*
|
||||
*The following add-ons define operators, these docs give details and more examples of operators.*
|
||||
|
||||
|
||||
******
|
||||
Addons
|
||||
******
|
||||
*******
|
||||
Add-ons
|
||||
*******
|
||||
|
||||
What is an Add-on?
|
||||
==================
|
||||
|
||||
What is an Addon?
|
||||
=================
|
||||
|
||||
An addon is simply a Python module with some additional requirements so Blender can display it in a list with useful
|
||||
An add-on is simply a Python module with some additional requirements so Blender can display it in a list with useful
|
||||
information.
|
||||
|
||||
To give an example, here is the simplest possible addon.
|
||||
|
||||
To give an example, here is the simplest possible add-on.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
bl_info = {"name": "My Test Addon", "category": "Object"}
|
||||
bl_info = {"name": "My Test Add-on", "category": "Object"}
|
||||
def register():
|
||||
print("Hello World")
|
||||
def unregister():
|
||||
print("Goodbye World")
|
||||
|
||||
|
||||
- ``bl_info`` is a dictionary containing addon meta-data such as the title, version and author to be displayed in the
|
||||
user preferences addon list.
|
||||
- ``register`` is a function which only runs when enabling the addon, this means the module can be loaded without
|
||||
activating the addon.
|
||||
- ``unregister`` is a function to unload anything setup by ``register``, this is called when the addon is disabled.
|
||||
- ``bl_info`` is a dictionary containing add-on metadata such as the title,
|
||||
version and author to be displayed in the user preferences add-on list.
|
||||
- ``register`` is a function which only runs when enabling the add-on,
|
||||
this means the module can be loaded without activating the add-on.
|
||||
- ``unregister`` is a function to unload anything setup by ``register``, this is called when the add-on is disabled.
|
||||
|
||||
|
||||
Notice this add-on does not do anything related to Blender, (the :mod:`bpy` module is not imported for example).
|
||||
|
||||
Notice this addon does not do anything related to Blender, (the :mod:`bpy` module is not imported for example).
|
||||
This is a contrived example of an add-on that serves to illustrate the point
|
||||
that the base requirements of an add-on are simple.
|
||||
|
||||
This is a contrived example of an addon that serves to illustrate the point
|
||||
that the base requirements of an addon are simple.
|
||||
|
||||
An addon will typically register operators, panels, menu items etc, but its worth noting that _any_ script can do this,
|
||||
An add-on will typically register operators, panels, menu items etc, but its worth noting that _any_ script can do this,
|
||||
when executed from the text editor or even the interactive console - there is nothing inherently different about an
|
||||
addon that allows it to integrate with Blender, such functionality is just provided by the :mod:`bpy` module for any
|
||||
add-on that allows it to integrate with Blender, such functionality is just provided by the :mod:`bpy` module for any
|
||||
script to access.
|
||||
|
||||
So an addon is just a way to encapsulate a Python module in a way a user can easily utilize.
|
||||
So an add-on is just a way to encapsulate a Python module in a way a user can easily utilize.
|
||||
|
||||
.. note::
|
||||
|
||||
@@ -99,14 +97,14 @@ So an addon is just a way to encapsulate a Python module in a way a user can eas
|
||||
Messages will be printed when enabling and disabling.
|
||||
|
||||
|
||||
Your First Addon
|
||||
================
|
||||
Your First Add-on
|
||||
=================
|
||||
|
||||
The simplest possible addon above was useful as an example but not much else.
|
||||
This next addon is simple but shows how to integrate a script into Blender using an ``Operator``
|
||||
The simplest possible add-on above is useful as an example but not much else.
|
||||
This next add-on is simple but shows how to integrate a script into Blender using an ``Operator``
|
||||
which is the typical way to define a tool accessed from menus, buttons and keyboard shortcuts.
|
||||
|
||||
For the first example we'll make a script that simply moves all objects in a scene.
|
||||
For the first example we will make a script that simply moves all objects in a scene.
|
||||
|
||||
|
||||
Write The Script
|
||||
@@ -130,13 +128,13 @@ Add the following script to the text editor in Blender.
|
||||
:alt: Run Script button
|
||||
|
||||
Click the Run Script button, all objects in the active scene are moved by 1.0 Blender unit.
|
||||
Next we'll make this script into an addon.
|
||||
Next we will make this script into an add-on.
|
||||
|
||||
|
||||
Write the Addon (Simple)
|
||||
------------------------
|
||||
Write the Add-on (Simple)
|
||||
-------------------------
|
||||
|
||||
This addon takes the body of the script above, and adds them to an operator's ``execute()`` function.
|
||||
This add-on takes the body of the script above, and adds them to an operator's ``execute()`` function.
|
||||
|
||||
|
||||
.. code-block:: python
|
||||
@@ -173,7 +171,7 @@ This addon takes the body of the script above, and adds them to an operator's ``
|
||||
|
||||
|
||||
# This allows you to run the script directly from blenders text editor
|
||||
# to test the addon without having to install it.
|
||||
# to test the add-on without having to install it.
|
||||
if __name__ == "__main__":
|
||||
register()
|
||||
|
||||
@@ -206,33 +204,33 @@ Do this by pressing :kbd:`Spacebar` to bring up the operator search dialog and t
|
||||
|
||||
The objects should move as before.
|
||||
|
||||
*Keep this addon open in Blender for the next step - Installing.*
|
||||
*Keep this add-on open in Blender for the next step - Installing.*
|
||||
|
||||
|
||||
Install The Addon
|
||||
-----------------
|
||||
Install The Add-on
|
||||
------------------
|
||||
|
||||
Once you have your addon within in Blender's text editor,
|
||||
Once you have your add-on within in Blender's text editor,
|
||||
you will want to be able to install it so it can be enabled in the user preferences to load on startup.
|
||||
|
||||
Even though the addon above is a test, lets go through the steps anyway so you know how to do it for later.
|
||||
Even though the add-on above is a test, lets go through the steps anyway so you know how to do it for later.
|
||||
|
||||
To install the Blender text as an addon you will first have to save it to disk, take care to obey the naming
|
||||
To install the Blender text as an add-on you will first have to save it to disk, take care to obey the naming
|
||||
restrictions that apply to Python modules and end with a ``.py`` extension.
|
||||
|
||||
Once the file is on disk, you can install it as you would for an addon downloaded online.
|
||||
Once the file is on disk, you can install it as you would for an add-on downloaded online.
|
||||
|
||||
Open the user :menuselection:`File -> User Preferences`,
|
||||
Select the *Addon* section, press *Install Addon...* and select the file.
|
||||
Open the user :menuselection:`File --> User Preferences`,
|
||||
Select the *Add-on* section, press *Install Add-on...* and select the file.
|
||||
|
||||
Now the addon will be listed and you can enable it by pressing the check-box,
|
||||
Now the add-on will be listed and you can enable it by pressing the check-box,
|
||||
if you want it to be enabled on restart, press *Save as Default*.
|
||||
|
||||
.. note::
|
||||
|
||||
The destination of the addon depends on your Blender configuration.
|
||||
When installing an addon the source and destination path are printed in the console.
|
||||
You can also find addon path locations by running this in the Python console.
|
||||
The destination of the add-on depends on your Blender configuration.
|
||||
When installing an add-on the source and destination path are printed in the console.
|
||||
You can also find add-on path locations by running this in the Python console.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@@ -243,17 +241,17 @@ if you want it to be enabled on restart, press *Save as Default*.
|
||||
`Directory Layout <https://www.blender.org/manual/getting_started/installing_blender/directorylayout.html>`_
|
||||
|
||||
|
||||
Your Second Addon
|
||||
=================
|
||||
Your Second Add-on
|
||||
==================
|
||||
|
||||
For our second addon, we will focus on object instancing - this is - to make linked copies of an object in a
|
||||
For our second add-on, we will focus on object instancing - this is - to make linked copies of an object in a
|
||||
similar way to what you may have seen with the array modifier.
|
||||
|
||||
|
||||
Write The Script
|
||||
----------------
|
||||
|
||||
As before, first we will start with a script, develop it, then convert into an addon.
|
||||
As before, first we will start with a script, develop it, then convert into an add-on.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@@ -324,17 +322,17 @@ allows vectors to be multiplied by numbers and matrices.
|
||||
|
||||
If you are interested in this area, read into :class:`mathutils.Vector` - there are many handy utility functions
|
||||
such as getting the angle between vectors, cross product, dot products
|
||||
as well as more advanced functions in :mod:`mathutils.geometry` such as bezier spline interpolation and
|
||||
as well as more advanced functions in :mod:`mathutils.geometry` such as Bézier Spline interpolation and
|
||||
ray-triangle intersection.
|
||||
|
||||
For now we'll focus on making this script an addon, but its good to know that this 3D math module is available and
|
||||
For now we will focus on making this script an add-on, but its good to know that this 3D math module is available and
|
||||
can help you with more advanced functionality later on.
|
||||
|
||||
|
||||
Write the Addon
|
||||
---------------
|
||||
Write the Add-on
|
||||
----------------
|
||||
|
||||
The first step is to convert the script as-is into an addon.
|
||||
The first step is to convert the script as-is into an add-on.
|
||||
|
||||
|
||||
.. code-block:: python
|
||||
@@ -381,7 +379,7 @@ The first step is to convert the script as-is into an addon.
|
||||
register()
|
||||
|
||||
|
||||
Everything here has been covered in the previous steps, you may want to try run the addon still
|
||||
Everything here has been covered in the previous steps, you may want to try run the add-on still
|
||||
and consider what could be done to make it more useful.
|
||||
|
||||
|
||||
@@ -434,7 +432,7 @@ however the link above includes examples of more advanced property usage.
|
||||
Menu Item
|
||||
^^^^^^^^^
|
||||
|
||||
Addons can add to the user interface of existing panels, headers and menus defined in Python.
|
||||
Add-ons can add to the user interface of existing panels, headers and menus defined in Python.
|
||||
|
||||
For this example we'll add to an existing menu.
|
||||
|
||||
@@ -464,7 +462,7 @@ For docs on extending menus see: :doc:`bpy.types.Menu`.
|
||||
Keymap
|
||||
^^^^^^
|
||||
|
||||
In Blender addons have their own key-maps so as not to interfere with Blenders built in key-maps.
|
||||
In Blender, add-ons have their own keymaps so as not to interfere with Blenders built in key-maps.
|
||||
|
||||
In the example below, a new object-mode :class:`bpy.types.KeyMap` is added,
|
||||
then a :class:`bpy.types.KeyMapItem` is added to the key-map which references our newly added operator,
|
||||
@@ -502,7 +500,7 @@ this allows you to have multiple keys accessing the same operator with different
|
||||
|
||||
.. note::
|
||||
|
||||
While :kbd:`Ctrl-Shift-Space` isn't a default Blender key shortcut, its hard to make sure addons won't
|
||||
While :kbd:`Ctrl-Shift-Space` isn't a default Blender key shortcut, its hard to make sure add-ons won't
|
||||
overwrite each others keymaps, At least take care when assigning keys that they don't
|
||||
conflict with important functionality within Blender.
|
||||
|
||||
@@ -606,14 +604,14 @@ After selecting it from the menu, you can choose how many instance of the cube y
|
||||
.. note::
|
||||
|
||||
Directly executing the script multiple times will add the menu each time too.
|
||||
While not useful behavior, theres nothing to worry about since addons won't register them selves multiple
|
||||
While not useful behavior, theres nothing to worry about since add-ons won't register them selves multiple
|
||||
times when enabled through the user preferences.
|
||||
|
||||
|
||||
Conclusions
|
||||
===========
|
||||
|
||||
Addons can encapsulate certain functionality neatly for writing tools to improve your work-flow or for writing utilities
|
||||
Add-ons can encapsulate certain functionality neatly for writing tools to improve your work-flow or for writing utilities
|
||||
for others to use.
|
||||
|
||||
While there are limits to what Python can do within Blender, there is certainly a lot that can be achieved without
|
||||
@@ -636,7 +634,7 @@ Here are some sites you might like to check on after completing this tutorial.
|
||||
*For more background details on Blender/Python integration.*
|
||||
- `How to Think Like a Computer Scientist <http://interactivepython.org/courselib/static/thinkcspy/index.html>`_ -
|
||||
*Great info for those who are still learning Python.*
|
||||
- `Blender Development (Wiki) <http://wiki.blender.org/index.php/Dev:Contents>`_ -
|
||||
- `Blender Development (Wiki) <https://wiki.blender.org/index.php/Dev:Contents>`_ -
|
||||
*Blender Development, general information and helpful links.*
|
||||
- `Blender Artists (Coding Section) <http://blenderartists.org/forum/forumdisplay.php?47-Coding>`_ -
|
||||
*forum where people ask Python development questions*
|
||||
|
||||
@@ -27,7 +27,7 @@ output from this tool should be added into "doc/python_api/rst/change_log.rst"
|
||||
blender --background --python doc/python_api/sphinx_changelog_gen.py -- --dump
|
||||
|
||||
# create changelog
|
||||
blender --background --python doc/python_api/sphinx_changelog_gen.py -- \
|
||||
blender --background --factory-startup --python doc/python_api/sphinx_changelog_gen.py -- \
|
||||
--api_from blender_2_63_0.py \
|
||||
--api_to blender_2_64_0.py \
|
||||
--api_out changes.rst
|
||||
@@ -331,7 +331,7 @@ def main():
|
||||
|
||||
# When --help or no args are given, print this help
|
||||
usage_text = "Run blender in background mode with this script: "
|
||||
"blender --background --python %s -- [options]" % os.path.basename(__file__)
|
||||
"blender --background --factory-startup --python %s -- [options]" % os.path.basename(__file__)
|
||||
|
||||
epilog = "Run this before releases"
|
||||
|
||||
|
||||
@@ -26,16 +26,16 @@ API dump in RST files
|
||||
---------------------
|
||||
Run this script from Blender's root path once you have compiled Blender
|
||||
|
||||
./blender.bin --background -noaudio --python doc/python_api/sphinx_doc_gen.py
|
||||
blender --background --factory-startup -noaudio --python doc/python_api/sphinx_doc_gen.py
|
||||
|
||||
This will generate python files in doc/python_api/sphinx-in/
|
||||
providing ./blender.bin is or links to the blender executable
|
||||
providing ./blender is or links to the blender executable
|
||||
|
||||
To choose sphinx-in directory:
|
||||
./blender.bin --background --python doc/python_api/sphinx_doc_gen.py -- --output ../python_api
|
||||
blender --background --factory-startup --python doc/python_api/sphinx_doc_gen.py -- --output ../python_api
|
||||
|
||||
For quick builds:
|
||||
./blender.bin --background --python doc/python_api/sphinx_doc_gen.py -- --partial bmesh.*
|
||||
blender --background --factory-startup --python doc/python_api/sphinx_doc_gen.py -- --partial bmesh.*
|
||||
|
||||
|
||||
Sphinx: HTML generation
|
||||
@@ -46,8 +46,6 @@ Sphinx: HTML generation
|
||||
cd doc/python_api
|
||||
sphinx-build sphinx-in sphinx-out
|
||||
|
||||
This requires sphinx 1.0.7 to be installed.
|
||||
|
||||
|
||||
Sphinx: PDF generation
|
||||
----------------------
|
||||
@@ -68,7 +66,7 @@ except ImportError:
|
||||
import sys
|
||||
sys.exit()
|
||||
|
||||
import rna_info # Blender module
|
||||
import rna_info # Blender module
|
||||
|
||||
|
||||
def rna_info_BuildRNAInfo_cache():
|
||||
@@ -86,7 +84,7 @@ import shutil
|
||||
import logging
|
||||
|
||||
from platform import platform
|
||||
PLATFORM = platform().split('-')[0].lower() # 'linux', 'darwin', 'windows'
|
||||
PLATFORM = platform().split('-')[0].lower() # 'linux', 'darwin', 'windows'
|
||||
|
||||
SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
|
||||
|
||||
@@ -208,12 +206,12 @@ BPY_LOGGER.setLevel(logging.DEBUG)
|
||||
"""
|
||||
# for quick rebuilds
|
||||
rm -rf /b/doc/python_api/sphinx-* && \
|
||||
./blender.bin -b -noaudio --factory-startup -P doc/python_api/sphinx_doc_gen.py && \
|
||||
./blender -b -noaudio --factory-startup -P doc/python_api/sphinx_doc_gen.py && \
|
||||
sphinx-build doc/python_api/sphinx-in doc/python_api/sphinx-out
|
||||
|
||||
or
|
||||
|
||||
./blender.bin -b -noaudio --factory-startup -P doc/python_api/sphinx_doc_gen.py -- -f -B
|
||||
./blender -b -noaudio --factory-startup -P doc/python_api/sphinx_doc_gen.py -- -f -B
|
||||
"""
|
||||
|
||||
# Switch for quick testing so doc-builds don't take so long
|
||||
@@ -365,7 +363,7 @@ INFO_DOCS = (
|
||||
("info_overview.rst",
|
||||
"Blender/Python API Overview: a more complete explanation of Python integration"),
|
||||
("info_tutorial_addon.rst",
|
||||
"Blender/Python Addon Tutorial: a step by step guide on how to write an addon from scratch"),
|
||||
"Blender/Python Add-on Tutorial: a step by step guide on how to write an add-on from scratch"),
|
||||
("info_api_reference.rst",
|
||||
"Blender/Python API Reference Usage: examples of how to use the API reference docs"),
|
||||
("info_best_practice.rst",
|
||||
@@ -420,7 +418,7 @@ MODULE_GROUPING = {
|
||||
|
||||
blender_version_strings = [str(v) for v in bpy.app.version]
|
||||
|
||||
# converting bytes to strings, due to #30154
|
||||
# converting bytes to strings, due to T30154
|
||||
BLENDER_REVISION = str(bpy.app.build_hash, 'utf_8')
|
||||
BLENDER_DATE = str(bpy.app.build_date, 'utf_8')
|
||||
|
||||
@@ -1567,9 +1565,9 @@ def pyrna2sphinx(basepath):
|
||||
|
||||
# operators
|
||||
def write_ops():
|
||||
API_BASEURL = "http://svn.blender.org/svnroot/bf-blender/trunk/blender/release/scripts"
|
||||
API_BASEURL_ADDON = "http://svn.blender.org/svnroot/bf-extensions/trunk/py/scripts"
|
||||
API_BASEURL_ADDON_CONTRIB = "http://svn.blender.org/svnroot/bf-extensions/contrib/py/scripts"
|
||||
API_BASEURL = "https://developer.blender.org/diffusion/B/browse/master/release/scripts/ "
|
||||
API_BASEURL_ADDON = "https://developer.blender.org/diffusion/BA/"
|
||||
API_BASEURL_ADDON_CONTRIB = "https://developer.blender.org/diffusion/BAC/"
|
||||
|
||||
op_modules = {}
|
||||
for op in ops.values():
|
||||
@@ -1645,7 +1643,7 @@ def write_sphinx_conf_py(basepath):
|
||||
|
||||
if ARGS.sphinx_theme == "blender-org":
|
||||
fw("html_theme_path = ['../']\n")
|
||||
# copied with the theme, exclude else we get an error [#28873]
|
||||
# copied with the theme, exclude else we get an error [T28873]
|
||||
fw("html_favicon = 'favicon.ico'\n") # in <theme>/static/
|
||||
|
||||
# not helpful since the source is generated, adds to upload size.
|
||||
|
||||
2
extern/cuew/README
vendored
2
extern/cuew/README
vendored
@@ -4,7 +4,7 @@ for determining which CUDA functions and extensions extensions are supported
|
||||
on the target platform.
|
||||
|
||||
CUDA core and extension functionality is exposed in a single header file.
|
||||
GUEW has been tested on a variety of operating systems, including Windows,
|
||||
CUEW has been tested on a variety of operating systems, including Windows,
|
||||
Linux, Mac OS X.
|
||||
|
||||
LICENSE
|
||||
|
||||
2
extern/cuew/README.blender
vendored
2
extern/cuew/README.blender
vendored
@@ -1,5 +1,5 @@
|
||||
Project: Cuda Wrangler
|
||||
URL: https://github.com/CudaWrangler/cuew
|
||||
License: Apache 2.0
|
||||
Upstream version: e2e0315
|
||||
Upstream version: 63d2a0f
|
||||
Local modifications: None
|
||||
|
||||
@@ -36,6 +36,15 @@ set(SRC
|
||||
device_task.cpp
|
||||
)
|
||||
|
||||
set(SRC_OPENCL
|
||||
opencl/opencl.h
|
||||
|
||||
opencl/opencl_base.cpp
|
||||
opencl/opencl_mega.cpp
|
||||
opencl/opencl_split.cpp
|
||||
opencl/opencl_util.cpp
|
||||
)
|
||||
|
||||
if(WITH_CYCLES_NETWORK)
|
||||
list(APPEND SRC
|
||||
device_network.cpp
|
||||
@@ -67,4 +76,4 @@ endif()
|
||||
include_directories(${INC})
|
||||
include_directories(SYSTEM ${INC_SYS})
|
||||
|
||||
add_library(cycles_device ${SRC} ${SRC_HEADERS})
|
||||
add_library(cycles_device ${SRC} ${SRC_OPENCL} ${SRC_HEADERS})
|
||||
|
||||
@@ -993,7 +993,7 @@ public:
|
||||
cuda_assert(cuCtxSynchronize());
|
||||
|
||||
if(task.get_cancel()) {
|
||||
canceled = false;
|
||||
canceled = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
403
intern/cycles/device/opencl/opencl.h
Normal file
403
intern/cycles/device/opencl/opencl.h
Normal file
@@ -0,0 +1,403 @@
|
||||
/*
|
||||
* Copyright 2011-2013 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifdef WITH_OPENCL
|
||||
|
||||
#include "clew.h"
|
||||
|
||||
#include "device.h"
|
||||
|
||||
#include "util_map.h"
|
||||
#include "util_param.h"
|
||||
#include "util_string.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#define CL_MEM_PTR(p) ((cl_mem)(uintptr_t)(p))
|
||||
|
||||
/* Macro declarations used with split kernel */
|
||||
|
||||
/* Macro to enable/disable work-stealing */
|
||||
#define __WORK_STEALING__
|
||||
|
||||
#define SPLIT_KERNEL_LOCAL_SIZE_X 64
|
||||
#define SPLIT_KERNEL_LOCAL_SIZE_Y 1
|
||||
|
||||
/* This value may be tuned according to the scene we are rendering.
|
||||
*
|
||||
* Modifying PATH_ITER_INC_FACTOR value proportional to number of expected
|
||||
* ray-bounces will improve performance.
|
||||
*/
|
||||
#define PATH_ITER_INC_FACTOR 8
|
||||
|
||||
/* When allocate global memory in chunks. We may not be able to
|
||||
* allocate exactly "CL_DEVICE_MAX_MEM_ALLOC_SIZE" bytes in chunks;
|
||||
* Since some bytes may be needed for aligning chunks of memory;
|
||||
* This is the amount of memory that we dedicate for that purpose.
|
||||
*/
|
||||
#define DATA_ALLOCATION_MEM_FACTOR 5000000 //5MB
|
||||
|
||||
struct OpenCLPlatformDevice {
|
||||
OpenCLPlatformDevice(cl_platform_id platform_id,
|
||||
const string& platform_name,
|
||||
cl_device_id device_id,
|
||||
cl_device_type device_type,
|
||||
const string& device_name)
|
||||
: platform_id(platform_id),
|
||||
platform_name(platform_name),
|
||||
device_id(device_id),
|
||||
device_type(device_type),
|
||||
device_name(device_name) {}
|
||||
cl_platform_id platform_id;
|
||||
string platform_name;
|
||||
cl_device_id device_id;
|
||||
cl_device_type device_type;
|
||||
string device_name;
|
||||
};
|
||||
|
||||
/* Contains all static OpenCL helper functions. */
|
||||
class OpenCLInfo
|
||||
{
|
||||
public:
|
||||
static cl_device_type device_type();
|
||||
static bool use_debug();
|
||||
static bool kernel_use_advanced_shading(const string& platform_name);
|
||||
static bool kernel_use_split(const string& platform_name,
|
||||
const cl_device_type device_type);
|
||||
static bool device_supported(const string& platform_name,
|
||||
const cl_device_id device_id);
|
||||
static bool platform_version_check(cl_platform_id platform,
|
||||
string *error = NULL);
|
||||
static bool device_version_check(cl_device_id device,
|
||||
string *error = NULL);
|
||||
static void get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices,
|
||||
bool force_all = false);
|
||||
};
|
||||
|
||||
/* Thread safe cache for contexts and programs.
|
||||
*/
|
||||
class OpenCLCache
|
||||
{
|
||||
struct Slot
|
||||
{
|
||||
struct ProgramEntry
|
||||
{
|
||||
ProgramEntry();
|
||||
ProgramEntry(const ProgramEntry& rhs);
|
||||
~ProgramEntry();
|
||||
cl_program program;
|
||||
thread_mutex *mutex;
|
||||
};
|
||||
|
||||
Slot();
|
||||
Slot(const Slot& rhs);
|
||||
~Slot();
|
||||
|
||||
thread_mutex *context_mutex;
|
||||
cl_context context;
|
||||
typedef map<ustring, ProgramEntry> EntryMap;
|
||||
EntryMap programs;
|
||||
|
||||
};
|
||||
|
||||
/* key is combination of platform ID and device ID */
|
||||
typedef pair<cl_platform_id, cl_device_id> PlatformDevicePair;
|
||||
|
||||
/* map of Slot objects */
|
||||
typedef map<PlatformDevicePair, Slot> CacheMap;
|
||||
CacheMap cache;
|
||||
|
||||
/* MD5 hash of the kernel source. */
|
||||
string kernel_md5;
|
||||
|
||||
thread_mutex cache_lock;
|
||||
thread_mutex kernel_md5_lock;
|
||||
|
||||
/* lazy instantiate */
|
||||
static OpenCLCache& global_instance();
|
||||
|
||||
public:
|
||||
|
||||
enum ProgramName {
|
||||
OCL_DEV_BASE_PROGRAM,
|
||||
OCL_DEV_MEGAKERNEL_PROGRAM,
|
||||
};
|
||||
|
||||
/* Lookup context in the cache. If this returns NULL, slot_locker
|
||||
* will be holding a lock for the cache. slot_locker should refer to a
|
||||
* default constructed thread_scoped_lock. */
|
||||
static cl_context get_context(cl_platform_id platform,
|
||||
cl_device_id device,
|
||||
thread_scoped_lock& slot_locker);
|
||||
/* Same as above. */
|
||||
static cl_program get_program(cl_platform_id platform,
|
||||
cl_device_id device,
|
||||
ustring key,
|
||||
thread_scoped_lock& slot_locker);
|
||||
|
||||
/* Store context in the cache. You MUST have tried to get the item before storing to it. */
|
||||
static void store_context(cl_platform_id platform,
|
||||
cl_device_id device,
|
||||
cl_context context,
|
||||
thread_scoped_lock& slot_locker);
|
||||
/* Same as above. */
|
||||
static void store_program(cl_platform_id platform,
|
||||
cl_device_id device,
|
||||
cl_program program,
|
||||
ustring key,
|
||||
thread_scoped_lock& slot_locker);
|
||||
|
||||
static string get_kernel_md5();
|
||||
};
|
||||
|
||||
#define opencl_assert(stmt) \
|
||||
{ \
|
||||
cl_int err = stmt; \
|
||||
\
|
||||
if(err != CL_SUCCESS) { \
|
||||
string message = string_printf("OpenCL error: %s in %s", clewErrorString(err), #stmt); \
|
||||
if(error_msg == "") \
|
||||
error_msg = message; \
|
||||
fprintf(stderr, "%s\n", message.c_str()); \
|
||||
} \
|
||||
} (void)0
|
||||
|
||||
class OpenCLDeviceBase : public Device
|
||||
{
|
||||
public:
|
||||
DedicatedTaskPool task_pool;
|
||||
cl_context cxContext;
|
||||
cl_command_queue cqCommandQueue;
|
||||
cl_platform_id cpPlatform;
|
||||
cl_device_id cdDevice;
|
||||
cl_int ciErr;
|
||||
|
||||
class OpenCLProgram {
|
||||
public:
|
||||
OpenCLProgram() : loaded(false), device(NULL) {}
|
||||
OpenCLProgram(OpenCLDeviceBase *device,
|
||||
string program_name,
|
||||
string kernel_name,
|
||||
string kernel_build_options,
|
||||
bool use_stdout = true);
|
||||
~OpenCLProgram();
|
||||
|
||||
void add_kernel(ustring name);
|
||||
void load();
|
||||
|
||||
bool is_loaded() { return loaded; }
|
||||
string get_log() { return log; }
|
||||
void report_error();
|
||||
|
||||
cl_kernel operator()();
|
||||
cl_kernel operator()(ustring name);
|
||||
|
||||
void release();
|
||||
|
||||
private:
|
||||
bool build_kernel(const string *debug_src);
|
||||
bool compile_kernel(const string *debug_src);
|
||||
bool load_binary(const string& clbin, const string *debug_src = NULL);
|
||||
bool save_binary(const string& clbin);
|
||||
|
||||
void add_log(string msg, bool is_debug);
|
||||
void add_error(string msg);
|
||||
|
||||
bool loaded;
|
||||
cl_program program;
|
||||
OpenCLDeviceBase *device;
|
||||
|
||||
/* Used for the OpenCLCache key. */
|
||||
string program_name;
|
||||
|
||||
string kernel_file, kernel_build_options, device_md5;
|
||||
|
||||
bool use_stdout;
|
||||
string log, error_msg;
|
||||
string compile_output;
|
||||
|
||||
map<ustring, cl_kernel> kernels;
|
||||
};
|
||||
|
||||
OpenCLProgram base_program;
|
||||
|
||||
typedef map<string, device_vector<uchar>*> ConstMemMap;
|
||||
typedef map<string, device_ptr> MemMap;
|
||||
|
||||
ConstMemMap const_mem_map;
|
||||
MemMap mem_map;
|
||||
device_ptr null_mem;
|
||||
|
||||
bool device_initialized;
|
||||
string platform_name;
|
||||
|
||||
bool opencl_error(cl_int err);
|
||||
void opencl_error(const string& message);
|
||||
void opencl_assert_err(cl_int err, const char* where);
|
||||
|
||||
OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool background_);
|
||||
~OpenCLDeviceBase();
|
||||
|
||||
static void CL_CALLBACK context_notify_callback(const char *err_info,
|
||||
const void * /*private_info*/, size_t /*cb*/, void *user_data);
|
||||
|
||||
bool opencl_version_check();
|
||||
|
||||
string device_md5_hash(string kernel_custom_build_options = "");
|
||||
bool load_kernels(const DeviceRequestedFeatures& requested_features);
|
||||
|
||||
/* Has to be implemented by the real device classes.
|
||||
* The base device will then load all these programs. */
|
||||
virtual void load_kernels(const DeviceRequestedFeatures& requested_features,
|
||||
vector<OpenCLProgram*> &programs) = 0;
|
||||
|
||||
void mem_alloc(device_memory& mem, MemoryType type);
|
||||
void mem_copy_to(device_memory& mem);
|
||||
void mem_copy_from(device_memory& mem, int y, int w, int h, int elem);
|
||||
void mem_zero(device_memory& mem);
|
||||
void mem_free(device_memory& mem);
|
||||
void const_copy_to(const char *name, void *host, size_t size);
|
||||
void tex_alloc(const char *name,
|
||||
device_memory& mem,
|
||||
InterpolationType /*interpolation*/,
|
||||
ExtensionType /*extension*/);
|
||||
void tex_free(device_memory& mem);
|
||||
|
||||
size_t global_size_round_up(int group_size, int global_size);
|
||||
void enqueue_kernel(cl_kernel kernel, size_t w, size_t h);
|
||||
void set_kernel_arg_mem(cl_kernel kernel, cl_uint *narg, const char *name);
|
||||
|
||||
void film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half);
|
||||
void shader(DeviceTask& task);
|
||||
|
||||
class OpenCLDeviceTask : public DeviceTask {
|
||||
public:
|
||||
OpenCLDeviceTask(OpenCLDeviceBase *device, DeviceTask& task)
|
||||
: DeviceTask(task)
|
||||
{
|
||||
run = function_bind(&OpenCLDeviceBase::thread_run,
|
||||
device,
|
||||
this);
|
||||
}
|
||||
};
|
||||
|
||||
int get_split_task_count(DeviceTask& /*task*/)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
void task_add(DeviceTask& task)
|
||||
{
|
||||
task_pool.push(new OpenCLDeviceTask(this, task));
|
||||
}
|
||||
|
||||
void task_wait()
|
||||
{
|
||||
task_pool.wait();
|
||||
}
|
||||
|
||||
void task_cancel()
|
||||
{
|
||||
task_pool.cancel();
|
||||
}
|
||||
|
||||
virtual void thread_run(DeviceTask * /*task*/) = 0;
|
||||
|
||||
protected:
|
||||
string kernel_build_options(const string *debug_src = NULL);
|
||||
|
||||
class ArgumentWrapper {
|
||||
public:
|
||||
ArgumentWrapper() : size(0), pointer(NULL) {}
|
||||
template <typename T>
|
||||
ArgumentWrapper(T& argument) : size(sizeof(argument)),
|
||||
pointer(&argument) { }
|
||||
ArgumentWrapper(int argument) : size(sizeof(int)),
|
||||
int_value(argument),
|
||||
pointer(&int_value) { }
|
||||
ArgumentWrapper(float argument) : size(sizeof(float)),
|
||||
float_value(argument),
|
||||
pointer(&float_value) { }
|
||||
size_t size;
|
||||
int int_value;
|
||||
float float_value;
|
||||
void *pointer;
|
||||
};
|
||||
|
||||
/* TODO(sergey): In the future we can use variadic templates, once
|
||||
* C++0x is allowed. Should allow to clean this up a bit.
|
||||
*/
|
||||
int kernel_set_args(cl_kernel kernel,
|
||||
int start_argument_index,
|
||||
const ArgumentWrapper& arg1 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg2 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg3 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg4 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg5 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg6 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg7 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg8 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg9 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg10 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg11 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg12 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg13 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg14 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg15 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg16 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg17 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg18 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg19 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg20 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg21 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg22 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg23 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg24 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg25 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg26 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg27 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg28 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg29 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg30 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg31 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg32 = ArgumentWrapper(),
|
||||
const ArgumentWrapper& arg33 = ArgumentWrapper());
|
||||
|
||||
void release_kernel_safe(cl_kernel kernel);
|
||||
void release_mem_object_safe(cl_mem mem);
|
||||
void release_program_safe(cl_program program);
|
||||
|
||||
/* ** Those guys are for workign around some compiler-specific bugs ** */
|
||||
|
||||
virtual cl_program load_cached_kernel(
|
||||
ustring key,
|
||||
thread_scoped_lock& cache_locker);
|
||||
|
||||
virtual void store_cached_kernel(
|
||||
cl_program program,
|
||||
ustring key,
|
||||
thread_scoped_lock& cache_locker);
|
||||
|
||||
virtual string build_options_for_base_program(
|
||||
const DeviceRequestedFeatures& /*requested_features*/);
|
||||
};
|
||||
|
||||
Device *opencl_create_mega_device(DeviceInfo& info, Stats& stats, bool background);
|
||||
Device *opencl_create_split_device(DeviceInfo& info, Stats& stats, bool background);
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
738
intern/cycles/device/opencl/opencl_base.cpp
Normal file
738
intern/cycles/device/opencl/opencl_base.cpp
Normal file
@@ -0,0 +1,738 @@
|
||||
/*
|
||||
* Copyright 2011-2013 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifdef WITH_OPENCL
|
||||
|
||||
#include "opencl.h"
|
||||
|
||||
#include "kernel_types.h"
|
||||
|
||||
#include "util_foreach.h"
|
||||
#include "util_logging.h"
|
||||
#include "util_md5.h"
|
||||
#include "util_path.h"
|
||||
#include "util_time.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
bool OpenCLDeviceBase::opencl_error(cl_int err)
|
||||
{
|
||||
if(err != CL_SUCCESS) {
|
||||
string message = string_printf("OpenCL error (%d): %s", err, clewErrorString(err));
|
||||
if(error_msg == "")
|
||||
error_msg = message;
|
||||
fprintf(stderr, "%s\n", message.c_str());
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void OpenCLDeviceBase::opencl_error(const string& message)
|
||||
{
|
||||
if(error_msg == "")
|
||||
error_msg = message;
|
||||
fprintf(stderr, "%s\n", message.c_str());
|
||||
}
|
||||
|
||||
void OpenCLDeviceBase::opencl_assert_err(cl_int err, const char* where)
|
||||
{
|
||||
if(err != CL_SUCCESS) {
|
||||
string message = string_printf("OpenCL error (%d): %s in %s", err, clewErrorString(err), where);
|
||||
if(error_msg == "")
|
||||
error_msg = message;
|
||||
fprintf(stderr, "%s\n", message.c_str());
|
||||
#ifndef NDEBUG
|
||||
abort();
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool background_)
|
||||
: Device(info, stats, background_)
|
||||
{
|
||||
cpPlatform = NULL;
|
||||
cdDevice = NULL;
|
||||
cxContext = NULL;
|
||||
cqCommandQueue = NULL;
|
||||
null_mem = 0;
|
||||
device_initialized = false;
|
||||
|
||||
vector<OpenCLPlatformDevice> usable_devices;
|
||||
OpenCLInfo::get_usable_devices(&usable_devices);
|
||||
if(usable_devices.size() == 0) {
|
||||
opencl_error("OpenCL: no devices found.");
|
||||
return;
|
||||
}
|
||||
assert(info.num < usable_devices.size());
|
||||
OpenCLPlatformDevice& platform_device = usable_devices[info.num];
|
||||
cpPlatform = platform_device.platform_id;
|
||||
cdDevice = platform_device.device_id;
|
||||
platform_name = platform_device.platform_name;
|
||||
VLOG(2) << "Creating new Cycles device for OpenCL platform "
|
||||
<< platform_name << ", device "
|
||||
<< platform_device.device_name << ".";
|
||||
|
||||
{
|
||||
/* try to use cached context */
|
||||
thread_scoped_lock cache_locker;
|
||||
cxContext = OpenCLCache::get_context(cpPlatform, cdDevice, cache_locker);
|
||||
|
||||
if(cxContext == NULL) {
|
||||
/* create context properties array to specify platform */
|
||||
const cl_context_properties context_props[] = {
|
||||
CL_CONTEXT_PLATFORM, (cl_context_properties)cpPlatform,
|
||||
0, 0
|
||||
};
|
||||
|
||||
/* create context */
|
||||
cxContext = clCreateContext(context_props, 1, &cdDevice,
|
||||
context_notify_callback, cdDevice, &ciErr);
|
||||
|
||||
if(opencl_error(ciErr)) {
|
||||
opencl_error("OpenCL: clCreateContext failed");
|
||||
return;
|
||||
}
|
||||
|
||||
/* cache it */
|
||||
OpenCLCache::store_context(cpPlatform, cdDevice, cxContext, cache_locker);
|
||||
}
|
||||
}
|
||||
|
||||
cqCommandQueue = clCreateCommandQueue(cxContext, cdDevice, 0, &ciErr);
|
||||
if(opencl_error(ciErr))
|
||||
return;
|
||||
|
||||
null_mem = (device_ptr)clCreateBuffer(cxContext, CL_MEM_READ_ONLY, 1, NULL, &ciErr);
|
||||
if(opencl_error(ciErr))
|
||||
return;
|
||||
|
||||
fprintf(stderr, "Device init success\n");
|
||||
device_initialized = true;
|
||||
}
|
||||
|
||||
OpenCLDeviceBase::~OpenCLDeviceBase()
|
||||
{
|
||||
task_pool.stop();
|
||||
|
||||
if(null_mem)
|
||||
clReleaseMemObject(CL_MEM_PTR(null_mem));
|
||||
|
||||
ConstMemMap::iterator mt;
|
||||
for(mt = const_mem_map.begin(); mt != const_mem_map.end(); mt++) {
|
||||
mem_free(*(mt->second));
|
||||
delete mt->second;
|
||||
}
|
||||
|
||||
base_program.release();
|
||||
if(cqCommandQueue)
|
||||
clReleaseCommandQueue(cqCommandQueue);
|
||||
if(cxContext)
|
||||
clReleaseContext(cxContext);
|
||||
}
|
||||
|
||||
void CL_CALLBACK OpenCLDeviceBase::context_notify_callback(const char *err_info,
|
||||
const void * /*private_info*/, size_t /*cb*/, void *user_data)
|
||||
{
|
||||
char name[256];
|
||||
clGetDeviceInfo((cl_device_id)user_data, CL_DEVICE_NAME, sizeof(name), &name, NULL);
|
||||
|
||||
fprintf(stderr, "OpenCL error (%s): %s\n", name, err_info);
|
||||
}
|
||||
|
||||
bool OpenCLDeviceBase::opencl_version_check()
|
||||
{
|
||||
string error;
|
||||
if(!OpenCLInfo::platform_version_check(cpPlatform, &error)) {
|
||||
opencl_error(error);
|
||||
return false;
|
||||
}
|
||||
if(!OpenCLInfo::device_version_check(cdDevice, &error)) {
|
||||
opencl_error(error);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
string OpenCLDeviceBase::device_md5_hash(string kernel_custom_build_options)
|
||||
{
|
||||
MD5Hash md5;
|
||||
char version[256], driver[256], name[256], vendor[256];
|
||||
|
||||
clGetPlatformInfo(cpPlatform, CL_PLATFORM_VENDOR, sizeof(vendor), &vendor, NULL);
|
||||
clGetDeviceInfo(cdDevice, CL_DEVICE_VERSION, sizeof(version), &version, NULL);
|
||||
clGetDeviceInfo(cdDevice, CL_DEVICE_NAME, sizeof(name), &name, NULL);
|
||||
clGetDeviceInfo(cdDevice, CL_DRIVER_VERSION, sizeof(driver), &driver, NULL);
|
||||
|
||||
md5.append((uint8_t*)vendor, strlen(vendor));
|
||||
md5.append((uint8_t*)version, strlen(version));
|
||||
md5.append((uint8_t*)name, strlen(name));
|
||||
md5.append((uint8_t*)driver, strlen(driver));
|
||||
|
||||
string options = kernel_build_options();
|
||||
options += kernel_custom_build_options;
|
||||
md5.append((uint8_t*)options.c_str(), options.size());
|
||||
|
||||
return md5.get_hex();
|
||||
}
|
||||
|
||||
bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_features)
|
||||
{
|
||||
/* Verify if device was initialized. */
|
||||
if(!device_initialized) {
|
||||
fprintf(stderr, "OpenCL: failed to initialize device.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Verify we have right opencl version. */
|
||||
if(!opencl_version_check())
|
||||
return false;
|
||||
|
||||
base_program = OpenCLProgram(this, "base", "kernel.cl", build_options_for_base_program(requested_features));
|
||||
base_program.add_kernel(ustring("convert_to_byte"));
|
||||
base_program.add_kernel(ustring("convert_to_half_float"));
|
||||
base_program.add_kernel(ustring("shader"));
|
||||
base_program.add_kernel(ustring("bake"));
|
||||
|
||||
vector<OpenCLProgram*> programs;
|
||||
programs.push_back(&base_program);
|
||||
/* Call actual class to fill the vector with its programs. */
|
||||
load_kernels(requested_features, programs);
|
||||
|
||||
/* Parallel compilation is supported by Cycles, but currently all OpenCL frameworks
|
||||
* serialize the calls internally, so it's not much use right now.
|
||||
* Note: When enabling parallel compilation, use_stdout in the OpenCLProgram constructor
|
||||
* should be set to false as well. */
|
||||
#if 0
|
||||
TaskPool task_pool;
|
||||
foreach(OpenCLProgram *program, programs) {
|
||||
task_pool.push(function_bind(&OpenCLProgram::load, program));
|
||||
}
|
||||
task_pool.wait_work();
|
||||
|
||||
foreach(OpenCLProgram *program, programs) {
|
||||
VLOG(2) << program->get_log();
|
||||
if(!program->is_loaded()) {
|
||||
program->report_error();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
#else
|
||||
foreach(OpenCLProgram *program, programs) {
|
||||
program->load();
|
||||
}
|
||||
#endif
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void OpenCLDeviceBase::mem_alloc(device_memory& mem, MemoryType type)
|
||||
{
|
||||
size_t size = mem.memory_size();
|
||||
|
||||
cl_mem_flags mem_flag;
|
||||
void *mem_ptr = NULL;
|
||||
|
||||
if(type == MEM_READ_ONLY)
|
||||
mem_flag = CL_MEM_READ_ONLY;
|
||||
else if(type == MEM_WRITE_ONLY)
|
||||
mem_flag = CL_MEM_WRITE_ONLY;
|
||||
else
|
||||
mem_flag = CL_MEM_READ_WRITE;
|
||||
|
||||
/* Zero-size allocation might be invoked by render, but not really
|
||||
* supported by OpenCL. Using NULL as device pointer also doesn't really
|
||||
* work for some reason, so for the time being we'll use special case
|
||||
* will null_mem buffer.
|
||||
*/
|
||||
if(size != 0) {
|
||||
mem.device_pointer = (device_ptr)clCreateBuffer(cxContext,
|
||||
mem_flag,
|
||||
size,
|
||||
mem_ptr,
|
||||
&ciErr);
|
||||
opencl_assert_err(ciErr, "clCreateBuffer");
|
||||
}
|
||||
else {
|
||||
mem.device_pointer = null_mem;
|
||||
}
|
||||
|
||||
stats.mem_alloc(size);
|
||||
mem.device_size = size;
|
||||
}
|
||||
|
||||
void OpenCLDeviceBase::mem_copy_to(device_memory& mem)
|
||||
{
|
||||
/* this is blocking */
|
||||
size_t size = mem.memory_size();
|
||||
if(size != 0) {
|
||||
opencl_assert(clEnqueueWriteBuffer(cqCommandQueue,
|
||||
CL_MEM_PTR(mem.device_pointer),
|
||||
CL_TRUE,
|
||||
0,
|
||||
size,
|
||||
(void*)mem.data_pointer,
|
||||
0,
|
||||
NULL, NULL));
|
||||
}
|
||||
}
|
||||
|
||||
void OpenCLDeviceBase::mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
|
||||
{
|
||||
size_t offset = elem*y*w;
|
||||
size_t size = elem*w*h;
|
||||
assert(size != 0);
|
||||
opencl_assert(clEnqueueReadBuffer(cqCommandQueue,
|
||||
CL_MEM_PTR(mem.device_pointer),
|
||||
CL_TRUE,
|
||||
offset,
|
||||
size,
|
||||
(uchar*)mem.data_pointer + offset,
|
||||
0,
|
||||
NULL, NULL));
|
||||
}
|
||||
|
||||
void OpenCLDeviceBase::mem_zero(device_memory& mem)
|
||||
{
|
||||
if(mem.device_pointer) {
|
||||
memset((void*)mem.data_pointer, 0, mem.memory_size());
|
||||
mem_copy_to(mem);
|
||||
}
|
||||
}
|
||||
|
||||
void OpenCLDeviceBase::mem_free(device_memory& mem)
|
||||
{
|
||||
if(mem.device_pointer) {
|
||||
if(mem.device_pointer != null_mem) {
|
||||
opencl_assert(clReleaseMemObject(CL_MEM_PTR(mem.device_pointer)));
|
||||
}
|
||||
mem.device_pointer = 0;
|
||||
|
||||
stats.mem_free(mem.device_size);
|
||||
mem.device_size = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void OpenCLDeviceBase::const_copy_to(const char *name, void *host, size_t size)
|
||||
{
|
||||
ConstMemMap::iterator i = const_mem_map.find(name);
|
||||
|
||||
if(i == const_mem_map.end()) {
|
||||
device_vector<uchar> *data = new device_vector<uchar>();
|
||||
data->copy((uchar*)host, size);
|
||||
|
||||
mem_alloc(*data, MEM_READ_ONLY);
|
||||
i = const_mem_map.insert(ConstMemMap::value_type(name, data)).first;
|
||||
}
|
||||
else {
|
||||
device_vector<uchar> *data = i->second;
|
||||
data->copy((uchar*)host, size);
|
||||
}
|
||||
|
||||
mem_copy_to(*i->second);
|
||||
}
|
||||
|
||||
void OpenCLDeviceBase::tex_alloc(const char *name,
|
||||
device_memory& mem,
|
||||
InterpolationType /*interpolation*/,
|
||||
ExtensionType /*extension*/)
|
||||
{
|
||||
VLOG(1) << "Texture allocate: " << name << ", "
|
||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||
mem_alloc(mem, MEM_READ_ONLY);
|
||||
mem_copy_to(mem);
|
||||
assert(mem_map.find(name) == mem_map.end());
|
||||
mem_map.insert(MemMap::value_type(name, mem.device_pointer));
|
||||
}
|
||||
|
||||
void OpenCLDeviceBase::tex_free(device_memory& mem)
|
||||
{
|
||||
if(mem.device_pointer) {
|
||||
foreach(const MemMap::value_type& value, mem_map) {
|
||||
if(value.second == mem.device_pointer) {
|
||||
mem_map.erase(value.first);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
mem_free(mem);
|
||||
}
|
||||
}
|
||||
|
||||
size_t OpenCLDeviceBase::global_size_round_up(int group_size, int global_size)
|
||||
{
|
||||
int r = global_size % group_size;
|
||||
return global_size + ((r == 0)? 0: group_size - r);
|
||||
}
|
||||
|
||||
void OpenCLDeviceBase::enqueue_kernel(cl_kernel kernel, size_t w, size_t h)
|
||||
{
|
||||
size_t workgroup_size, max_work_items[3];
|
||||
|
||||
clGetKernelWorkGroupInfo(kernel, cdDevice,
|
||||
CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &workgroup_size, NULL);
|
||||
clGetDeviceInfo(cdDevice,
|
||||
CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t)*3, max_work_items, NULL);
|
||||
|
||||
/* Try to divide evenly over 2 dimensions. */
|
||||
size_t sqrt_workgroup_size = max((size_t)sqrt((double)workgroup_size), 1);
|
||||
size_t local_size[2] = {sqrt_workgroup_size, sqrt_workgroup_size};
|
||||
|
||||
/* Some implementations have max size 1 on 2nd dimension. */
|
||||
if(local_size[1] > max_work_items[1]) {
|
||||
local_size[0] = workgroup_size/max_work_items[1];
|
||||
local_size[1] = max_work_items[1];
|
||||
}
|
||||
|
||||
size_t global_size[2] = {global_size_round_up(local_size[0], w),
|
||||
global_size_round_up(local_size[1], h)};
|
||||
|
||||
/* Vertical size of 1 is coming from bake/shade kernels where we should
|
||||
* not round anything up because otherwise we'll either be doing too
|
||||
* much work per pixel (if we don't check global ID on Y axis) or will
|
||||
* be checking for global ID to always have Y of 0.
|
||||
*/
|
||||
if (h == 1) {
|
||||
global_size[h] = 1;
|
||||
}
|
||||
|
||||
/* run kernel */
|
||||
opencl_assert(clEnqueueNDRangeKernel(cqCommandQueue, kernel, 2, NULL, global_size, NULL, 0, NULL, NULL));
|
||||
opencl_assert(clFlush(cqCommandQueue));
|
||||
}
|
||||
|
||||
void OpenCLDeviceBase::set_kernel_arg_mem(cl_kernel kernel, cl_uint *narg, const char *name)
|
||||
{
|
||||
cl_mem ptr;
|
||||
|
||||
MemMap::iterator i = mem_map.find(name);
|
||||
if(i != mem_map.end()) {
|
||||
ptr = CL_MEM_PTR(i->second);
|
||||
}
|
||||
else {
|
||||
/* work around NULL not working, even though the spec says otherwise */
|
||||
ptr = CL_MEM_PTR(null_mem);
|
||||
}
|
||||
|
||||
opencl_assert(clSetKernelArg(kernel, (*narg)++, sizeof(ptr), (void*)&ptr));
|
||||
}
|
||||
|
||||
void OpenCLDeviceBase::film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half)
|
||||
{
|
||||
/* cast arguments to cl types */
|
||||
cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
|
||||
cl_mem d_rgba = (rgba_byte)? CL_MEM_PTR(rgba_byte): CL_MEM_PTR(rgba_half);
|
||||
cl_mem d_buffer = CL_MEM_PTR(buffer);
|
||||
cl_int d_x = task.x;
|
||||
cl_int d_y = task.y;
|
||||
cl_int d_w = task.w;
|
||||
cl_int d_h = task.h;
|
||||
cl_float d_sample_scale = 1.0f/(task.sample + 1);
|
||||
cl_int d_offset = task.offset;
|
||||
cl_int d_stride = task.stride;
|
||||
|
||||
|
||||
cl_kernel ckFilmConvertKernel = (rgba_byte)? base_program(ustring("convert_to_byte")): base_program(ustring("convert_to_half_float"));
|
||||
|
||||
cl_uint start_arg_index =
|
||||
kernel_set_args(ckFilmConvertKernel,
|
||||
0,
|
||||
d_data,
|
||||
d_rgba,
|
||||
d_buffer);
|
||||
|
||||
#define KERNEL_TEX(type, ttype, name) \
|
||||
set_kernel_arg_mem(ckFilmConvertKernel, &start_arg_index, #name);
|
||||
#include "kernel_textures.h"
|
||||
#undef KERNEL_TEX
|
||||
|
||||
start_arg_index += kernel_set_args(ckFilmConvertKernel,
|
||||
start_arg_index,
|
||||
d_sample_scale,
|
||||
d_x,
|
||||
d_y,
|
||||
d_w,
|
||||
d_h,
|
||||
d_offset,
|
||||
d_stride);
|
||||
|
||||
enqueue_kernel(ckFilmConvertKernel, d_w, d_h);
|
||||
}
|
||||
|
||||
void OpenCLDeviceBase::shader(DeviceTask& task)
|
||||
{
|
||||
/* cast arguments to cl types */
|
||||
cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
|
||||
cl_mem d_input = CL_MEM_PTR(task.shader_input);
|
||||
cl_mem d_output = CL_MEM_PTR(task.shader_output);
|
||||
cl_mem d_output_luma = CL_MEM_PTR(task.shader_output_luma);
|
||||
cl_int d_shader_eval_type = task.shader_eval_type;
|
||||
cl_int d_shader_filter = task.shader_filter;
|
||||
cl_int d_shader_x = task.shader_x;
|
||||
cl_int d_shader_w = task.shader_w;
|
||||
cl_int d_offset = task.offset;
|
||||
|
||||
cl_kernel kernel;
|
||||
|
||||
if(task.shader_eval_type >= SHADER_EVAL_BAKE)
|
||||
kernel = base_program(ustring("bake"));
|
||||
else
|
||||
kernel = base_program(ustring("shader"));
|
||||
|
||||
cl_uint start_arg_index =
|
||||
kernel_set_args(kernel,
|
||||
0,
|
||||
d_data,
|
||||
d_input,
|
||||
d_output);
|
||||
|
||||
if(task.shader_eval_type < SHADER_EVAL_BAKE) {
|
||||
start_arg_index += kernel_set_args(kernel,
|
||||
start_arg_index,
|
||||
d_output_luma);
|
||||
}
|
||||
|
||||
#define KERNEL_TEX(type, ttype, name) \
|
||||
set_kernel_arg_mem(kernel, &start_arg_index, #name);
|
||||
#include "kernel_textures.h"
|
||||
#undef KERNEL_TEX
|
||||
|
||||
start_arg_index += kernel_set_args(kernel,
|
||||
start_arg_index,
|
||||
d_shader_eval_type);
|
||||
if(task.shader_eval_type >= SHADER_EVAL_BAKE) {
|
||||
start_arg_index += kernel_set_args(kernel,
|
||||
start_arg_index,
|
||||
d_shader_filter);
|
||||
}
|
||||
start_arg_index += kernel_set_args(kernel,
|
||||
start_arg_index,
|
||||
d_shader_x,
|
||||
d_shader_w,
|
||||
d_offset);
|
||||
|
||||
for(int sample = 0; sample < task.num_samples; sample++) {
|
||||
|
||||
if(task.get_cancel())
|
||||
break;
|
||||
|
||||
kernel_set_args(kernel, start_arg_index, sample);
|
||||
|
||||
enqueue_kernel(kernel, task.shader_w, 1);
|
||||
|
||||
clFinish(cqCommandQueue);
|
||||
|
||||
task.update_progress(NULL);
|
||||
}
|
||||
}
|
||||
|
||||
string OpenCLDeviceBase::kernel_build_options(const string *debug_src)
|
||||
{
|
||||
string build_options = "-cl-fast-relaxed-math ";
|
||||
|
||||
if(platform_name == "NVIDIA CUDA") {
|
||||
build_options += "-D__KERNEL_OPENCL_NVIDIA__ "
|
||||
"-cl-nv-maxrregcount=32 "
|
||||
"-cl-nv-verbose ";
|
||||
|
||||
uint compute_capability_major, compute_capability_minor;
|
||||
clGetDeviceInfo(cdDevice, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV,
|
||||
sizeof(cl_uint), &compute_capability_major, NULL);
|
||||
clGetDeviceInfo(cdDevice, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV,
|
||||
sizeof(cl_uint), &compute_capability_minor, NULL);
|
||||
|
||||
build_options += string_printf("-D__COMPUTE_CAPABILITY__=%u ",
|
||||
compute_capability_major * 100 +
|
||||
compute_capability_minor * 10);
|
||||
}
|
||||
|
||||
else if(platform_name == "Apple")
|
||||
build_options += "-D__KERNEL_OPENCL_APPLE__ ";
|
||||
|
||||
else if(platform_name == "AMD Accelerated Parallel Processing")
|
||||
build_options += "-D__KERNEL_OPENCL_AMD__ ";
|
||||
|
||||
else if(platform_name == "Intel(R) OpenCL") {
|
||||
build_options += "-D__KERNEL_OPENCL_INTEL_CPU__ ";
|
||||
|
||||
/* Options for gdb source level kernel debugging.
|
||||
* this segfaults on linux currently.
|
||||
*/
|
||||
if(OpenCLInfo::use_debug() && debug_src)
|
||||
build_options += "-g -s \"" + *debug_src + "\" ";
|
||||
}
|
||||
|
||||
if(OpenCLInfo::use_debug())
|
||||
build_options += "-D__KERNEL_OPENCL_DEBUG__ ";
|
||||
|
||||
#ifdef WITH_CYCLES_DEBUG
|
||||
build_options += "-D__KERNEL_DEBUG__ ";
|
||||
#endif
|
||||
|
||||
return build_options;
|
||||
}
|
||||
|
||||
/* TODO(sergey): In the future we can use variadic templates, once
|
||||
* C++0x is allowed. Should allow to clean this up a bit.
|
||||
*/
|
||||
int OpenCLDeviceBase::kernel_set_args(cl_kernel kernel,
|
||||
int start_argument_index,
|
||||
const ArgumentWrapper& arg1,
|
||||
const ArgumentWrapper& arg2,
|
||||
const ArgumentWrapper& arg3,
|
||||
const ArgumentWrapper& arg4,
|
||||
const ArgumentWrapper& arg5,
|
||||
const ArgumentWrapper& arg6,
|
||||
const ArgumentWrapper& arg7,
|
||||
const ArgumentWrapper& arg8,
|
||||
const ArgumentWrapper& arg9,
|
||||
const ArgumentWrapper& arg10,
|
||||
const ArgumentWrapper& arg11,
|
||||
const ArgumentWrapper& arg12,
|
||||
const ArgumentWrapper& arg13,
|
||||
const ArgumentWrapper& arg14,
|
||||
const ArgumentWrapper& arg15,
|
||||
const ArgumentWrapper& arg16,
|
||||
const ArgumentWrapper& arg17,
|
||||
const ArgumentWrapper& arg18,
|
||||
const ArgumentWrapper& arg19,
|
||||
const ArgumentWrapper& arg20,
|
||||
const ArgumentWrapper& arg21,
|
||||
const ArgumentWrapper& arg22,
|
||||
const ArgumentWrapper& arg23,
|
||||
const ArgumentWrapper& arg24,
|
||||
const ArgumentWrapper& arg25,
|
||||
const ArgumentWrapper& arg26,
|
||||
const ArgumentWrapper& arg27,
|
||||
const ArgumentWrapper& arg28,
|
||||
const ArgumentWrapper& arg29,
|
||||
const ArgumentWrapper& arg30,
|
||||
const ArgumentWrapper& arg31,
|
||||
const ArgumentWrapper& arg32,
|
||||
const ArgumentWrapper& arg33)
|
||||
{
|
||||
int current_arg_index = 0;
|
||||
#define FAKE_VARARG_HANDLE_ARG(arg) \
|
||||
do { \
|
||||
if(arg.pointer != NULL) { \
|
||||
opencl_assert(clSetKernelArg( \
|
||||
kernel, \
|
||||
start_argument_index + current_arg_index, \
|
||||
arg.size, arg.pointer)); \
|
||||
++current_arg_index; \
|
||||
} \
|
||||
else { \
|
||||
return current_arg_index; \
|
||||
} \
|
||||
} while(false)
|
||||
FAKE_VARARG_HANDLE_ARG(arg1);
|
||||
FAKE_VARARG_HANDLE_ARG(arg2);
|
||||
FAKE_VARARG_HANDLE_ARG(arg3);
|
||||
FAKE_VARARG_HANDLE_ARG(arg4);
|
||||
FAKE_VARARG_HANDLE_ARG(arg5);
|
||||
FAKE_VARARG_HANDLE_ARG(arg6);
|
||||
FAKE_VARARG_HANDLE_ARG(arg7);
|
||||
FAKE_VARARG_HANDLE_ARG(arg8);
|
||||
FAKE_VARARG_HANDLE_ARG(arg9);
|
||||
FAKE_VARARG_HANDLE_ARG(arg10);
|
||||
FAKE_VARARG_HANDLE_ARG(arg11);
|
||||
FAKE_VARARG_HANDLE_ARG(arg12);
|
||||
FAKE_VARARG_HANDLE_ARG(arg13);
|
||||
FAKE_VARARG_HANDLE_ARG(arg14);
|
||||
FAKE_VARARG_HANDLE_ARG(arg15);
|
||||
FAKE_VARARG_HANDLE_ARG(arg16);
|
||||
FAKE_VARARG_HANDLE_ARG(arg17);
|
||||
FAKE_VARARG_HANDLE_ARG(arg18);
|
||||
FAKE_VARARG_HANDLE_ARG(arg19);
|
||||
FAKE_VARARG_HANDLE_ARG(arg20);
|
||||
FAKE_VARARG_HANDLE_ARG(arg21);
|
||||
FAKE_VARARG_HANDLE_ARG(arg22);
|
||||
FAKE_VARARG_HANDLE_ARG(arg23);
|
||||
FAKE_VARARG_HANDLE_ARG(arg24);
|
||||
FAKE_VARARG_HANDLE_ARG(arg25);
|
||||
FAKE_VARARG_HANDLE_ARG(arg26);
|
||||
FAKE_VARARG_HANDLE_ARG(arg27);
|
||||
FAKE_VARARG_HANDLE_ARG(arg28);
|
||||
FAKE_VARARG_HANDLE_ARG(arg29);
|
||||
FAKE_VARARG_HANDLE_ARG(arg30);
|
||||
FAKE_VARARG_HANDLE_ARG(arg31);
|
||||
FAKE_VARARG_HANDLE_ARG(arg32);
|
||||
FAKE_VARARG_HANDLE_ARG(arg33);
|
||||
#undef FAKE_VARARG_HANDLE_ARG
|
||||
return current_arg_index;
|
||||
}
|
||||
|
||||
void OpenCLDeviceBase::release_kernel_safe(cl_kernel kernel)
|
||||
{
|
||||
if(kernel) {
|
||||
clReleaseKernel(kernel);
|
||||
}
|
||||
}
|
||||
|
||||
void OpenCLDeviceBase::release_mem_object_safe(cl_mem mem)
|
||||
{
|
||||
if(mem != NULL) {
|
||||
clReleaseMemObject(mem);
|
||||
}
|
||||
}
|
||||
|
||||
void OpenCLDeviceBase::release_program_safe(cl_program program)
|
||||
{
|
||||
if(program) {
|
||||
clReleaseProgram(program);
|
||||
}
|
||||
}
|
||||
|
||||
/* ** Those guys are for workign around some compiler-specific bugs ** */
|
||||
|
||||
cl_program OpenCLDeviceBase::load_cached_kernel(
|
||||
ustring key,
|
||||
thread_scoped_lock& cache_locker)
|
||||
{
|
||||
return OpenCLCache::get_program(cpPlatform,
|
||||
cdDevice,
|
||||
key,
|
||||
cache_locker);
|
||||
}
|
||||
|
||||
void OpenCLDeviceBase::store_cached_kernel(
|
||||
cl_program program,
|
||||
ustring key,
|
||||
thread_scoped_lock& cache_locker)
|
||||
{
|
||||
OpenCLCache::store_program(cpPlatform,
|
||||
cdDevice,
|
||||
program,
|
||||
key,
|
||||
cache_locker);
|
||||
}
|
||||
|
||||
string OpenCLDeviceBase::build_options_for_base_program(
|
||||
const DeviceRequestedFeatures& /*requested_features*/)
|
||||
{
|
||||
/* TODO(sergey): By default we compile all features, meaning
|
||||
* mega kernel is not getting feature-based optimizations.
|
||||
*
|
||||
* Ideally we need always compile kernel with as less features
|
||||
* enabled as possible to keep performance at it's max.
|
||||
*/
|
||||
return "";
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
150
intern/cycles/device/opencl/opencl_mega.cpp
Normal file
150
intern/cycles/device/opencl/opencl_mega.cpp
Normal file
@@ -0,0 +1,150 @@
|
||||
/*
|
||||
* Copyright 2011-2013 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifdef WITH_OPENCL
|
||||
|
||||
#include "opencl.h"
|
||||
|
||||
#include "buffers.h"
|
||||
|
||||
#include "kernel_types.h"
|
||||
|
||||
#include "util_md5.h"
|
||||
#include "util_path.h"
|
||||
#include "util_time.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class OpenCLDeviceMegaKernel : public OpenCLDeviceBase
|
||||
{
|
||||
public:
|
||||
OpenCLProgram path_trace_program;
|
||||
|
||||
OpenCLDeviceMegaKernel(DeviceInfo& info, Stats &stats, bool background_)
|
||||
: OpenCLDeviceBase(info, stats, background_),
|
||||
path_trace_program(this, "megakernel", "kernel.cl", "-D__COMPILE_ONLY_MEGAKERNEL__ ")
|
||||
{
|
||||
}
|
||||
|
||||
virtual void load_kernels(const DeviceRequestedFeatures& /*requested_features*/,
|
||||
vector<OpenCLProgram*> &programs)
|
||||
{
|
||||
path_trace_program.add_kernel(ustring("path_trace"));
|
||||
programs.push_back(&path_trace_program);
|
||||
}
|
||||
|
||||
~OpenCLDeviceMegaKernel()
|
||||
{
|
||||
task_pool.stop();
|
||||
path_trace_program.release();
|
||||
}
|
||||
|
||||
void path_trace(RenderTile& rtile, int sample)
|
||||
{
|
||||
/* Cast arguments to cl types. */
|
||||
cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
|
||||
cl_mem d_buffer = CL_MEM_PTR(rtile.buffer);
|
||||
cl_mem d_rng_state = CL_MEM_PTR(rtile.rng_state);
|
||||
cl_int d_x = rtile.x;
|
||||
cl_int d_y = rtile.y;
|
||||
cl_int d_w = rtile.w;
|
||||
cl_int d_h = rtile.h;
|
||||
cl_int d_offset = rtile.offset;
|
||||
cl_int d_stride = rtile.stride;
|
||||
|
||||
/* Sample arguments. */
|
||||
cl_int d_sample = sample;
|
||||
|
||||
cl_kernel ckPathTraceKernel = path_trace_program(ustring("path_trace"));
|
||||
|
||||
cl_uint start_arg_index =
|
||||
kernel_set_args(ckPathTraceKernel,
|
||||
0,
|
||||
d_data,
|
||||
d_buffer,
|
||||
d_rng_state);
|
||||
|
||||
#define KERNEL_TEX(type, ttype, name) \
|
||||
set_kernel_arg_mem(ckPathTraceKernel, &start_arg_index, #name);
|
||||
#include "kernel_textures.h"
|
||||
#undef KERNEL_TEX
|
||||
|
||||
start_arg_index += kernel_set_args(ckPathTraceKernel,
|
||||
start_arg_index,
|
||||
d_sample,
|
||||
d_x,
|
||||
d_y,
|
||||
d_w,
|
||||
d_h,
|
||||
d_offset,
|
||||
d_stride);
|
||||
|
||||
enqueue_kernel(ckPathTraceKernel, d_w, d_h);
|
||||
}
|
||||
|
||||
void thread_run(DeviceTask *task)
|
||||
{
|
||||
if(task->type == DeviceTask::FILM_CONVERT) {
|
||||
film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half);
|
||||
}
|
||||
else if(task->type == DeviceTask::SHADER) {
|
||||
shader(*task);
|
||||
}
|
||||
else if(task->type == DeviceTask::PATH_TRACE) {
|
||||
RenderTile tile;
|
||||
/* Keep rendering tiles until done. */
|
||||
while(task->acquire_tile(this, tile)) {
|
||||
int start_sample = tile.start_sample;
|
||||
int end_sample = tile.start_sample + tile.num_samples;
|
||||
|
||||
for(int sample = start_sample; sample < end_sample; sample++) {
|
||||
if(task->get_cancel()) {
|
||||
if(task->need_finish_queue == false)
|
||||
break;
|
||||
}
|
||||
|
||||
path_trace(tile, sample);
|
||||
|
||||
tile.sample = sample + 1;
|
||||
|
||||
task->update_progress(&tile);
|
||||
}
|
||||
|
||||
/* Complete kernel execution before release tile */
|
||||
/* This helps in multi-device render;
|
||||
* The device that reaches the critical-section function
|
||||
* release_tile waits (stalling other devices from entering
|
||||
* release_tile) for all kernels to complete. If device1 (a
|
||||
* slow-render device) reaches release_tile first then it would
|
||||
* stall device2 (a fast-render device) from proceeding to render
|
||||
* next tile.
|
||||
*/
|
||||
clFinish(cqCommandQueue);
|
||||
|
||||
task->release_tile(tile);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Device *opencl_create_mega_device(DeviceInfo& info, Stats& stats, bool background)
|
||||
{
|
||||
return new OpenCLDeviceMegaKernel(info, stats, background);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
1307
intern/cycles/device/opencl/opencl_split.cpp
Normal file
1307
intern/cycles/device/opencl/opencl_split.cpp
Normal file
File diff suppressed because it is too large
Load Diff
800
intern/cycles/device/opencl/opencl_util.cpp
Normal file
800
intern/cycles/device/opencl/opencl_util.cpp
Normal file
@@ -0,0 +1,800 @@
|
||||
/*
|
||||
* Copyright 2011-2013 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifdef WITH_OPENCL
|
||||
|
||||
#include "opencl.h"
|
||||
|
||||
#include "util_logging.h"
|
||||
#include "util_path.h"
|
||||
#include "util_time.h"
|
||||
|
||||
using std::cerr;
|
||||
using std::endl;
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
OpenCLCache::Slot::ProgramEntry::ProgramEntry()
|
||||
: program(NULL),
|
||||
mutex(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
OpenCLCache::Slot::ProgramEntry::ProgramEntry(const ProgramEntry& rhs)
|
||||
: program(rhs.program),
|
||||
mutex(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
OpenCLCache::Slot::ProgramEntry::~ProgramEntry()
|
||||
{
|
||||
delete mutex;
|
||||
}
|
||||
|
||||
OpenCLCache::Slot::Slot()
|
||||
: context_mutex(NULL),
|
||||
context(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
OpenCLCache::Slot::Slot(const Slot& rhs)
|
||||
: context_mutex(NULL),
|
||||
context(NULL),
|
||||
programs(rhs.programs)
|
||||
{
|
||||
}
|
||||
|
||||
OpenCLCache::Slot::~Slot()
|
||||
{
|
||||
delete context_mutex;
|
||||
}
|
||||
|
||||
OpenCLCache& OpenCLCache::global_instance()
|
||||
{
|
||||
static OpenCLCache instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
cl_context OpenCLCache::get_context(cl_platform_id platform,
|
||||
cl_device_id device,
|
||||
thread_scoped_lock& slot_locker)
|
||||
{
|
||||
assert(platform != NULL);
|
||||
|
||||
OpenCLCache& self = global_instance();
|
||||
|
||||
thread_scoped_lock cache_lock(self.cache_lock);
|
||||
|
||||
pair<CacheMap::iterator,bool> ins = self.cache.insert(
|
||||
CacheMap::value_type(PlatformDevicePair(platform, device), Slot()));
|
||||
|
||||
Slot &slot = ins.first->second;
|
||||
|
||||
/* create slot lock only while holding cache lock */
|
||||
if(!slot.context_mutex)
|
||||
slot.context_mutex = new thread_mutex;
|
||||
|
||||
/* need to unlock cache before locking slot, to allow store to complete */
|
||||
cache_lock.unlock();
|
||||
|
||||
/* lock the slot */
|
||||
slot_locker = thread_scoped_lock(*slot.context_mutex);
|
||||
|
||||
/* If the thing isn't cached */
|
||||
if(slot.context == NULL) {
|
||||
/* return with the caller's lock holder holding the slot lock */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* the item was already cached, release the slot lock */
|
||||
slot_locker.unlock();
|
||||
|
||||
cl_int ciErr = clRetainContext(slot.context);
|
||||
assert(ciErr == CL_SUCCESS);
|
||||
(void)ciErr;
|
||||
|
||||
return slot.context;
|
||||
}
|
||||
|
||||
cl_program OpenCLCache::get_program(cl_platform_id platform,
|
||||
cl_device_id device,
|
||||
ustring key,
|
||||
thread_scoped_lock& slot_locker)
|
||||
{
|
||||
assert(platform != NULL);
|
||||
|
||||
OpenCLCache& self = global_instance();
|
||||
|
||||
thread_scoped_lock cache_lock(self.cache_lock);
|
||||
|
||||
pair<CacheMap::iterator,bool> ins = self.cache.insert(
|
||||
CacheMap::value_type(PlatformDevicePair(platform, device), Slot()));
|
||||
|
||||
Slot &slot = ins.first->second;
|
||||
|
||||
pair<Slot::EntryMap::iterator,bool> ins2 = slot.programs.insert(
|
||||
Slot::EntryMap::value_type(key, Slot::ProgramEntry()));
|
||||
|
||||
Slot::ProgramEntry &entry = ins2.first->second;
|
||||
|
||||
/* create slot lock only while holding cache lock */
|
||||
if(!entry.mutex)
|
||||
entry.mutex = new thread_mutex;
|
||||
|
||||
/* need to unlock cache before locking slot, to allow store to complete */
|
||||
cache_lock.unlock();
|
||||
|
||||
/* lock the slot */
|
||||
slot_locker = thread_scoped_lock(*entry.mutex);
|
||||
|
||||
/* If the thing isn't cached */
|
||||
if(entry.program == NULL) {
|
||||
/* return with the caller's lock holder holding the slot lock */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* the item was already cached, release the slot lock */
|
||||
slot_locker.unlock();
|
||||
|
||||
cl_int ciErr = clRetainProgram(entry.program);
|
||||
assert(ciErr == CL_SUCCESS);
|
||||
(void)ciErr;
|
||||
|
||||
return entry.program;
|
||||
}
|
||||
|
||||
void OpenCLCache::store_context(cl_platform_id platform,
|
||||
cl_device_id device,
|
||||
cl_context context,
|
||||
thread_scoped_lock& slot_locker)
|
||||
{
|
||||
assert(platform != NULL);
|
||||
assert(device != NULL);
|
||||
assert(context != NULL);
|
||||
|
||||
OpenCLCache &self = global_instance();
|
||||
|
||||
thread_scoped_lock cache_lock(self.cache_lock);
|
||||
CacheMap::iterator i = self.cache.find(PlatformDevicePair(platform, device));
|
||||
cache_lock.unlock();
|
||||
|
||||
Slot &slot = i->second;
|
||||
|
||||
/* sanity check */
|
||||
assert(i != self.cache.end());
|
||||
assert(slot.context == NULL);
|
||||
|
||||
slot.context = context;
|
||||
|
||||
/* unlock the slot */
|
||||
slot_locker.unlock();
|
||||
|
||||
/* increment reference count in OpenCL.
|
||||
* The caller is going to release the object when done with it. */
|
||||
cl_int ciErr = clRetainContext(context);
|
||||
assert(ciErr == CL_SUCCESS);
|
||||
(void)ciErr;
|
||||
}
|
||||
|
||||
void OpenCLCache::store_program(cl_platform_id platform,
|
||||
cl_device_id device,
|
||||
cl_program program,
|
||||
ustring key,
|
||||
thread_scoped_lock& slot_locker)
|
||||
{
|
||||
assert(platform != NULL);
|
||||
assert(device != NULL);
|
||||
assert(program != NULL);
|
||||
|
||||
OpenCLCache &self = global_instance();
|
||||
|
||||
thread_scoped_lock cache_lock(self.cache_lock);
|
||||
|
||||
CacheMap::iterator i = self.cache.find(PlatformDevicePair(platform, device));
|
||||
assert(i != self.cache.end());
|
||||
Slot &slot = i->second;
|
||||
|
||||
Slot::EntryMap::iterator i2 = slot.programs.find(key);
|
||||
assert(i2 != slot.programs.end());
|
||||
Slot::ProgramEntry &entry = i2->second;
|
||||
|
||||
assert(entry.program == NULL);
|
||||
|
||||
cache_lock.unlock();
|
||||
|
||||
entry.program = program;
|
||||
|
||||
/* unlock the slot */
|
||||
slot_locker.unlock();
|
||||
|
||||
/* Increment reference count in OpenCL.
|
||||
* The caller is going to release the object when done with it.
|
||||
*/
|
||||
cl_int ciErr = clRetainProgram(program);
|
||||
assert(ciErr == CL_SUCCESS);
|
||||
(void)ciErr;
|
||||
}
|
||||
|
||||
string OpenCLCache::get_kernel_md5()
|
||||
{
|
||||
OpenCLCache &self = global_instance();
|
||||
thread_scoped_lock lock(self.kernel_md5_lock);
|
||||
|
||||
if(self.kernel_md5.empty()) {
|
||||
self.kernel_md5 = path_files_md5_hash(path_get("kernel"));
|
||||
}
|
||||
return self.kernel_md5;
|
||||
}
|
||||
|
||||
OpenCLDeviceBase::OpenCLProgram::OpenCLProgram(OpenCLDeviceBase *device,
|
||||
string program_name,
|
||||
string kernel_file,
|
||||
string kernel_build_options,
|
||||
bool use_stdout)
|
||||
: device(device),
|
||||
program_name(program_name),
|
||||
kernel_file(kernel_file),
|
||||
kernel_build_options(kernel_build_options),
|
||||
use_stdout(use_stdout)
|
||||
{
|
||||
loaded = false;
|
||||
program = NULL;
|
||||
}
|
||||
|
||||
OpenCLDeviceBase::OpenCLProgram::~OpenCLProgram()
|
||||
{
|
||||
release();
|
||||
}
|
||||
|
||||
void OpenCLDeviceBase::OpenCLProgram::release()
|
||||
{
|
||||
for(map<ustring, cl_kernel>::iterator kernel = kernels.begin(); kernel != kernels.end(); ++kernel) {
|
||||
if(kernel->second) {
|
||||
clReleaseKernel(kernel->second);
|
||||
kernel->second = NULL;
|
||||
}
|
||||
}
|
||||
if(program) {
|
||||
clReleaseProgram(program);
|
||||
program = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void OpenCLDeviceBase::OpenCLProgram::add_log(string msg, bool debug)
|
||||
{
|
||||
if(!use_stdout) {
|
||||
log += msg + "\n";
|
||||
}
|
||||
else if(!debug) {
|
||||
printf("%s\n", msg.c_str());
|
||||
}
|
||||
else {
|
||||
VLOG(2) << msg;
|
||||
}
|
||||
}
|
||||
|
||||
void OpenCLDeviceBase::OpenCLProgram::add_error(string msg)
|
||||
{
|
||||
if(use_stdout) {
|
||||
fprintf(stderr, "%s\n", msg.c_str());
|
||||
}
|
||||
if(error_msg == "") {
|
||||
error_msg += "\n";
|
||||
}
|
||||
error_msg += msg;
|
||||
}
|
||||
|
||||
void OpenCLDeviceBase::OpenCLProgram::add_kernel(ustring name)
|
||||
{
|
||||
if(!kernels.count(name)) {
|
||||
kernels[name] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
bool OpenCLDeviceBase::OpenCLProgram::build_kernel(const string *debug_src)
|
||||
{
|
||||
string build_options;
|
||||
build_options = device->kernel_build_options(debug_src) + kernel_build_options;
|
||||
|
||||
cl_int ciErr = clBuildProgram(program, 0, NULL, build_options.c_str(), NULL, NULL);
|
||||
|
||||
/* show warnings even if build is successful */
|
||||
size_t ret_val_size = 0;
|
||||
|
||||
clGetProgramBuildInfo(program, device->cdDevice, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
|
||||
|
||||
if(ret_val_size > 1) {
|
||||
vector<char> build_log(ret_val_size + 1);
|
||||
clGetProgramBuildInfo(program, device->cdDevice, CL_PROGRAM_BUILD_LOG, ret_val_size, &build_log[0], NULL);
|
||||
|
||||
build_log[ret_val_size] = '\0';
|
||||
/* Skip meaningless empty output from the NVidia compiler. */
|
||||
if(!(ret_val_size == 2 && build_log[0] == '\n')) {
|
||||
add_error("OpenCL build failed: errors in console");
|
||||
if(use_stdout) {
|
||||
fprintf(stderr, "OpenCL kernel build output:\n%s\n", &build_log[0]);
|
||||
}
|
||||
else {
|
||||
compile_output = string(&build_log[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(ciErr != CL_SUCCESS) {
|
||||
add_error(string("OpenCL build failed: ") + clewErrorString(ciErr));
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OpenCLDeviceBase::OpenCLProgram::compile_kernel(const string *debug_src)
|
||||
{
|
||||
string source = "#include \"kernels/opencl/" + kernel_file + "\" // " + OpenCLCache::get_kernel_md5() + "\n";
|
||||
/* We compile kernels consisting of many files. unfortunately OpenCL
|
||||
* kernel caches do not seem to recognize changes in included files.
|
||||
* so we force recompile on changes by adding the md5 hash of all files.
|
||||
*/
|
||||
source = path_source_replace_includes(source, path_get("kernel"));
|
||||
|
||||
if(debug_src) {
|
||||
path_write_text(*debug_src, source);
|
||||
}
|
||||
|
||||
size_t source_len = source.size();
|
||||
const char *source_str = source.c_str();
|
||||
cl_int ciErr;
|
||||
|
||||
program = clCreateProgramWithSource(device->cxContext,
|
||||
1,
|
||||
&source_str,
|
||||
&source_len,
|
||||
&ciErr);
|
||||
|
||||
if(ciErr != CL_SUCCESS) {
|
||||
add_error(string("OpenCL program creation failed: ") + clewErrorString(ciErr));
|
||||
return false;
|
||||
}
|
||||
|
||||
double starttime = time_dt();
|
||||
add_log(string("Compiling OpenCL program ") + program_name.c_str(), false);
|
||||
add_log(string("Build flags: ") + kernel_build_options, true);
|
||||
|
||||
if(!build_kernel(debug_src))
|
||||
return false;
|
||||
|
||||
add_log(string("Kernel compilation of ") + program_name + " finished in " + string_printf("%.2lfs.\n", time_dt() - starttime), false);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OpenCLDeviceBase::OpenCLProgram::load_binary(const string& clbin,
|
||||
const string *debug_src)
|
||||
{
|
||||
/* read binary into memory */
|
||||
vector<uint8_t> binary;
|
||||
|
||||
if(!path_read_binary(clbin, binary)) {
|
||||
add_error(string_printf("OpenCL failed to read cached binary %s.", clbin.c_str()));
|
||||
return false;
|
||||
}
|
||||
|
||||
/* create program */
|
||||
cl_int status, ciErr;
|
||||
size_t size = binary.size();
|
||||
const uint8_t *bytes = &binary[0];
|
||||
|
||||
program = clCreateProgramWithBinary(device->cxContext, 1, &device->cdDevice,
|
||||
&size, &bytes, &status, &ciErr);
|
||||
|
||||
if(status != CL_SUCCESS || ciErr != CL_SUCCESS) {
|
||||
add_error(string("OpenCL failed create program from cached binary ") + clbin + ": "
|
||||
+ clewErrorString(status) + " " + clewErrorString(ciErr));
|
||||
return false;
|
||||
}
|
||||
|
||||
if(!build_kernel(debug_src))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OpenCLDeviceBase::OpenCLProgram::save_binary(const string& clbin)
|
||||
{
|
||||
size_t size = 0;
|
||||
clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL);
|
||||
|
||||
if(!size)
|
||||
return false;
|
||||
|
||||
vector<uint8_t> binary(size);
|
||||
uint8_t *bytes = &binary[0];
|
||||
|
||||
clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(uint8_t*), &bytes, NULL);
|
||||
|
||||
return path_write_binary(clbin, binary);
|
||||
}
|
||||
|
||||
void OpenCLDeviceBase::OpenCLProgram::load()
|
||||
{
|
||||
assert(device);
|
||||
|
||||
loaded = false;
|
||||
|
||||
string device_md5 = device->device_md5_hash(kernel_build_options);
|
||||
|
||||
/* Try to use cached kernel. */
|
||||
thread_scoped_lock cache_locker;
|
||||
ustring cache_key(program_name + device_md5);
|
||||
program = device->load_cached_kernel(cache_key,
|
||||
cache_locker);
|
||||
|
||||
if(!program) {
|
||||
add_log(string("OpenCL program ") + program_name + " not found in cache.", true);
|
||||
|
||||
string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" + OpenCLCache::get_kernel_md5();
|
||||
basename = path_cache_get(path_join("kernels", basename));
|
||||
string clbin = basename + ".clbin";
|
||||
|
||||
/* path to preprocessed source for debugging */
|
||||
string clsrc, *debug_src = NULL;
|
||||
|
||||
if(OpenCLInfo::use_debug()) {
|
||||
clsrc = basename + ".cl";
|
||||
debug_src = &clsrc;
|
||||
}
|
||||
|
||||
/* If binary kernel exists already, try use it. */
|
||||
if(path_exists(clbin) && load_binary(clbin)) {
|
||||
/* Kernel loaded from binary, nothing to do. */
|
||||
add_log(string("Loaded program from ") + clbin + ".", true);
|
||||
}
|
||||
else {
|
||||
add_log(string("Kernel file ") + clbin + " either doesn't exist or failed to be loaded by driver.", true);
|
||||
|
||||
/* If does not exist or loading binary failed, compile kernel. */
|
||||
if(!compile_kernel(debug_src)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Save binary for reuse. */
|
||||
if(!save_binary(clbin)) {
|
||||
add_log(string("Saving compiled OpenCL kernel to ") + clbin + " failed!", true);
|
||||
}
|
||||
}
|
||||
|
||||
/* Cache the program. */
|
||||
device->store_cached_kernel(program,
|
||||
cache_key,
|
||||
cache_locker);
|
||||
}
|
||||
else {
|
||||
add_log(string("Found cached OpenCL program ") + program_name + ".", true);
|
||||
}
|
||||
|
||||
for(map<ustring, cl_kernel>::iterator kernel = kernels.begin(); kernel != kernels.end(); ++kernel) {
|
||||
assert(kernel->second == NULL);
|
||||
cl_int ciErr;
|
||||
string name = "kernel_ocl_" + kernel->first.string();
|
||||
kernel->second = clCreateKernel(program, name.c_str(), &ciErr);
|
||||
if(device->opencl_error(ciErr)) {
|
||||
add_error(string("Error getting kernel ") + name + " from program " + program_name + ": " + clewErrorString(ciErr));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
loaded = true;
|
||||
}
|
||||
|
||||
void OpenCLDeviceBase::OpenCLProgram::report_error()
|
||||
{
|
||||
/* If loaded is true, there was no error. */
|
||||
if(loaded) return;
|
||||
/* if use_stdout is true, the error was already reported. */
|
||||
if(use_stdout) return;
|
||||
|
||||
cerr << error_msg << endl;
|
||||
if(!compile_output.empty()) {
|
||||
cerr << "OpenCL kernel build output for " << program_name << ":" << endl;
|
||||
cerr << compile_output << endl;
|
||||
}
|
||||
}
|
||||
|
||||
cl_kernel OpenCLDeviceBase::OpenCLProgram::operator()()
|
||||
{
|
||||
assert(kernels.size() == 1);
|
||||
return kernels.begin()->second;
|
||||
}
|
||||
|
||||
cl_kernel OpenCLDeviceBase::OpenCLProgram::operator()(ustring name)
|
||||
{
|
||||
assert(kernels.count(name));
|
||||
return kernels[name];
|
||||
}
|
||||
|
||||
cl_device_type OpenCLInfo::device_type()
|
||||
{
|
||||
switch(DebugFlags().opencl.device_type)
|
||||
{
|
||||
case DebugFlags::OpenCL::DEVICE_NONE:
|
||||
return 0;
|
||||
case DebugFlags::OpenCL::DEVICE_ALL:
|
||||
return CL_DEVICE_TYPE_ALL;
|
||||
case DebugFlags::OpenCL::DEVICE_DEFAULT:
|
||||
return CL_DEVICE_TYPE_DEFAULT;
|
||||
case DebugFlags::OpenCL::DEVICE_CPU:
|
||||
return CL_DEVICE_TYPE_CPU;
|
||||
case DebugFlags::OpenCL::DEVICE_GPU:
|
||||
return CL_DEVICE_TYPE_GPU;
|
||||
case DebugFlags::OpenCL::DEVICE_ACCELERATOR:
|
||||
return CL_DEVICE_TYPE_ACCELERATOR;
|
||||
default:
|
||||
return CL_DEVICE_TYPE_ALL;
|
||||
}
|
||||
}
|
||||
|
||||
bool OpenCLInfo::use_debug()
|
||||
{
|
||||
return DebugFlags().opencl.debug;
|
||||
}
|
||||
|
||||
bool OpenCLInfo::kernel_use_advanced_shading(const string& platform)
|
||||
{
|
||||
/* keep this in sync with kernel_types.h! */
|
||||
if(platform == "NVIDIA CUDA")
|
||||
return true;
|
||||
else if(platform == "Apple")
|
||||
return true;
|
||||
else if(platform == "AMD Accelerated Parallel Processing")
|
||||
return true;
|
||||
else if(platform == "Intel(R) OpenCL")
|
||||
return true;
|
||||
/* Make sure officially unsupported OpenCL platforms
|
||||
* does not set up to use advanced shading.
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
|
||||
bool OpenCLInfo::kernel_use_split(const string& platform_name,
|
||||
const cl_device_type device_type)
|
||||
{
|
||||
if(DebugFlags().opencl.kernel_type == DebugFlags::OpenCL::KERNEL_SPLIT) {
|
||||
VLOG(1) << "Forcing split kernel to use.";
|
||||
return true;
|
||||
}
|
||||
if(DebugFlags().opencl.kernel_type == DebugFlags::OpenCL::KERNEL_MEGA) {
|
||||
VLOG(1) << "Forcing mega kernel to use.";
|
||||
return false;
|
||||
}
|
||||
/* TODO(sergey): Replace string lookups with more enum-like API,
|
||||
* similar to device/vendor checks blender's gpu.
|
||||
*/
|
||||
if(platform_name == "AMD Accelerated Parallel Processing" &&
|
||||
device_type == CL_DEVICE_TYPE_GPU)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool OpenCLInfo::device_supported(const string& platform_name,
|
||||
const cl_device_id device_id)
|
||||
{
|
||||
cl_device_type device_type;
|
||||
clGetDeviceInfo(device_id,
|
||||
CL_DEVICE_TYPE,
|
||||
sizeof(cl_device_type),
|
||||
&device_type,
|
||||
NULL);
|
||||
if(platform_name == "AMD Accelerated Parallel Processing" &&
|
||||
device_type == CL_DEVICE_TYPE_GPU)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
if(platform_name == "Apple" && device_type == CL_DEVICE_TYPE_GPU) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool OpenCLInfo::platform_version_check(cl_platform_id platform,
|
||||
string *error)
|
||||
{
|
||||
const int req_major = 1, req_minor = 1;
|
||||
int major, minor;
|
||||
char version[256];
|
||||
clGetPlatformInfo(platform,
|
||||
CL_PLATFORM_VERSION,
|
||||
sizeof(version),
|
||||
&version,
|
||||
NULL);
|
||||
if(sscanf(version, "OpenCL %d.%d", &major, &minor) < 2) {
|
||||
if(error != NULL) {
|
||||
*error = string_printf("OpenCL: failed to parse platform version string (%s).", version);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if(!((major == req_major && minor >= req_minor) || (major > req_major))) {
|
||||
if(error != NULL) {
|
||||
*error = string_printf("OpenCL: platform version 1.1 or later required, found %d.%d", major, minor);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if(error != NULL) {
|
||||
*error = "";
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OpenCLInfo::device_version_check(cl_device_id device,
|
||||
string *error)
|
||||
{
|
||||
const int req_major = 1, req_minor = 1;
|
||||
int major, minor;
|
||||
char version[256];
|
||||
clGetDeviceInfo(device,
|
||||
CL_DEVICE_OPENCL_C_VERSION,
|
||||
sizeof(version),
|
||||
&version,
|
||||
NULL);
|
||||
if(sscanf(version, "OpenCL C %d.%d", &major, &minor) < 2) {
|
||||
if(error != NULL) {
|
||||
*error = string_printf("OpenCL: failed to parse OpenCL C version string (%s).", version);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if(!((major == req_major && minor >= req_minor) || (major > req_major))) {
|
||||
if(error != NULL) {
|
||||
*error = string_printf("OpenCL: C version 1.1 or later required, found %d.%d", major, minor);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if(error != NULL) {
|
||||
*error = "";
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices,
|
||||
bool force_all)
|
||||
{
|
||||
const bool force_all_platforms = force_all ||
|
||||
(DebugFlags().opencl.kernel_type != DebugFlags::OpenCL::KERNEL_DEFAULT);
|
||||
const cl_device_type device_type = OpenCLInfo::device_type();
|
||||
static bool first_time = true;
|
||||
#define FIRST_VLOG(severity) if(first_time) VLOG(severity)
|
||||
|
||||
usable_devices->clear();
|
||||
|
||||
if(device_type == 0) {
|
||||
FIRST_VLOG(2) << "OpenCL devices are forced to be disabled.";
|
||||
first_time = false;
|
||||
return;
|
||||
}
|
||||
|
||||
vector<cl_device_id> device_ids;
|
||||
cl_uint num_devices = 0;
|
||||
vector<cl_platform_id> platform_ids;
|
||||
cl_uint num_platforms = 0;
|
||||
|
||||
/* Get devices. */
|
||||
if(clGetPlatformIDs(0, NULL, &num_platforms) != CL_SUCCESS ||
|
||||
num_platforms == 0)
|
||||
{
|
||||
FIRST_VLOG(2) << "No OpenCL platforms were found.";
|
||||
first_time = false;
|
||||
return;
|
||||
}
|
||||
platform_ids.resize(num_platforms);
|
||||
if(clGetPlatformIDs(num_platforms, &platform_ids[0], NULL) != CL_SUCCESS) {
|
||||
FIRST_VLOG(2) << "Failed to fetch platform IDs from the driver..";
|
||||
first_time = false;
|
||||
return;
|
||||
}
|
||||
/* Devices are numbered consecutively across platforms. */
|
||||
for(int platform = 0; platform < num_platforms; platform++) {
|
||||
cl_platform_id platform_id = platform_ids[platform];
|
||||
char pname[256];
|
||||
if(clGetPlatformInfo(platform_id,
|
||||
CL_PLATFORM_NAME,
|
||||
sizeof(pname),
|
||||
&pname,
|
||||
NULL) != CL_SUCCESS)
|
||||
{
|
||||
FIRST_VLOG(2) << "Failed to get platform name, ignoring.";
|
||||
continue;
|
||||
}
|
||||
string platform_name = pname;
|
||||
FIRST_VLOG(2) << "Enumerating devices for platform "
|
||||
<< platform_name << ".";
|
||||
if(!platform_version_check(platform_id)) {
|
||||
FIRST_VLOG(2) << "Ignoring platform " << platform_name
|
||||
<< " due to too old compiler version.";
|
||||
continue;
|
||||
}
|
||||
num_devices = 0;
|
||||
cl_int ciErr;
|
||||
if((ciErr = clGetDeviceIDs(platform_id,
|
||||
device_type,
|
||||
0,
|
||||
NULL,
|
||||
&num_devices)) != CL_SUCCESS || num_devices == 0)
|
||||
{
|
||||
FIRST_VLOG(2) << "Ignoring platform " << platform_name
|
||||
<< ", failed to fetch number of devices: " << string(clewErrorString(ciErr));
|
||||
continue;
|
||||
}
|
||||
device_ids.resize(num_devices);
|
||||
if(clGetDeviceIDs(platform_id,
|
||||
device_type,
|
||||
num_devices,
|
||||
&device_ids[0],
|
||||
NULL) != CL_SUCCESS)
|
||||
{
|
||||
FIRST_VLOG(2) << "Ignoring platform " << platform_name
|
||||
<< ", failed to fetch devices list.";
|
||||
continue;
|
||||
}
|
||||
for(int num = 0; num < num_devices; num++) {
|
||||
cl_device_id device_id = device_ids[num];
|
||||
char device_name[1024] = "\0";
|
||||
if(clGetDeviceInfo(device_id,
|
||||
CL_DEVICE_NAME,
|
||||
sizeof(device_name),
|
||||
&device_name,
|
||||
NULL) != CL_SUCCESS)
|
||||
{
|
||||
FIRST_VLOG(2) << "Failed to fetch device name, ignoring.";
|
||||
continue;
|
||||
}
|
||||
if(!device_version_check(device_id)) {
|
||||
FIRST_VLOG(2) << "Ignoring device " << device_name
|
||||
<< " due to old compiler version.";
|
||||
continue;
|
||||
}
|
||||
if(force_all_platforms ||
|
||||
device_supported(platform_name, device_id))
|
||||
{
|
||||
cl_device_type device_type;
|
||||
if(clGetDeviceInfo(device_id,
|
||||
CL_DEVICE_TYPE,
|
||||
sizeof(cl_device_type),
|
||||
&device_type,
|
||||
NULL) != CL_SUCCESS)
|
||||
{
|
||||
FIRST_VLOG(2) << "Ignoring device " << device_name
|
||||
<< ", failed to fetch device type.";
|
||||
continue;
|
||||
}
|
||||
FIRST_VLOG(2) << "Adding new device " << device_name << ".";
|
||||
usable_devices->push_back(OpenCLPlatformDevice(platform_id,
|
||||
platform_name,
|
||||
device_id,
|
||||
device_type,
|
||||
device_name));
|
||||
}
|
||||
else {
|
||||
FIRST_VLOG(2) << "Ignoring device " << device_name
|
||||
<< ", not officially supported yet.";
|
||||
}
|
||||
}
|
||||
}
|
||||
first_time = false;
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
@@ -217,11 +217,11 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
||||
set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}")
|
||||
|
||||
# warn for other versions
|
||||
if(CUDA_VERSION MATCHES "75")
|
||||
if(CUDA_VERSION MATCHES "80")
|
||||
else()
|
||||
message(WARNING
|
||||
"CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, "
|
||||
"build may succeed but only CUDA 7.5 is officially supported")
|
||||
"build may succeed but only CUDA 8.0 is officially supported")
|
||||
endif()
|
||||
|
||||
# build for each arch
|
||||
@@ -253,11 +253,6 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
||||
set(cuda_nvcc_command ${CUDA_NVCC_EXECUTABLE})
|
||||
set(cuda_nvcc_version ${CUDA_VERSION})
|
||||
|
||||
if(DEFINED CUDA_NVCC8_EXECUTABLE AND ((${arch} STREQUAL "sm_60") OR (${arch} STREQUAL "sm_61")))
|
||||
set(cuda_nvcc_command ${CUDA_NVCC8_EXECUTABLE})
|
||||
set(cuda_nvcc_version "80")
|
||||
endif()
|
||||
|
||||
set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${cuda_nvcc_version}")
|
||||
set(cuda_math_flags "--use_fast_math")
|
||||
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
/*
|
||||
* Adapted from code Copyright 2009-2010 NVIDIA Corporation
|
||||
* Modifications Copyright 2011, Blender Foundation.
|
||||
* Copyright 2011-2013 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@@ -158,8 +157,9 @@ CCL_NAMESPACE_BEGIN
|
||||
#undef BVH_NAME_EVAL
|
||||
#undef BVH_FUNCTION_FULL_NAME
|
||||
|
||||
/* Note: ray is passed by value to work around a possible CUDA compiler bug. */
|
||||
ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
|
||||
const Ray *ray,
|
||||
const Ray ray,
|
||||
const uint visibility,
|
||||
Intersection *isect,
|
||||
uint *lcg_state,
|
||||
@@ -170,32 +170,32 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
|
||||
if(kernel_data.bvh.have_motion) {
|
||||
# ifdef __HAIR__
|
||||
if(kernel_data.bvh.have_curves)
|
||||
return bvh_intersect_hair_motion(kg, ray, isect, visibility, lcg_state, difl, extmax);
|
||||
return bvh_intersect_hair_motion(kg, &ray, isect, visibility, lcg_state, difl, extmax);
|
||||
# endif /* __HAIR__ */
|
||||
|
||||
return bvh_intersect_motion(kg, ray, isect, visibility);
|
||||
return bvh_intersect_motion(kg, &ray, isect, visibility);
|
||||
}
|
||||
#endif /* __OBJECT_MOTION__ */
|
||||
|
||||
#ifdef __HAIR__
|
||||
if(kernel_data.bvh.have_curves)
|
||||
return bvh_intersect_hair(kg, ray, isect, visibility, lcg_state, difl, extmax);
|
||||
return bvh_intersect_hair(kg, &ray, isect, visibility, lcg_state, difl, extmax);
|
||||
#endif /* __HAIR__ */
|
||||
|
||||
#ifdef __KERNEL_CPU__
|
||||
|
||||
# ifdef __INSTANCING__
|
||||
if(kernel_data.bvh.have_instancing)
|
||||
return bvh_intersect_instancing(kg, ray, isect, visibility);
|
||||
return bvh_intersect_instancing(kg, &ray, isect, visibility);
|
||||
# endif /* __INSTANCING__ */
|
||||
|
||||
return bvh_intersect(kg, ray, isect, visibility);
|
||||
return bvh_intersect(kg, &ray, isect, visibility);
|
||||
#else /* __KERNEL_CPU__ */
|
||||
|
||||
# ifdef __INSTANCING__
|
||||
return bvh_intersect_instancing(kg, ray, isect, visibility);
|
||||
return bvh_intersect_instancing(kg, &ray, isect, visibility);
|
||||
# else
|
||||
return bvh_intersect(kg, ray, isect, visibility);
|
||||
return bvh_intersect(kg, &ray, isect, visibility);
|
||||
# endif /* __INSTANCING__ */
|
||||
|
||||
#endif /* __KERNEL_CPU__ */
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
// TODO(sergey): Look into avoid use of full Transform and use 3x3 matrix and
|
||||
// 3-vector which might be faster.
|
||||
ccl_device_inline Transform bvh_unaligned_node_fetch_space(KernelGlobals *kg,
|
||||
ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals *kg,
|
||||
int node_addr,
|
||||
int child)
|
||||
{
|
||||
@@ -30,7 +30,7 @@ ccl_device_inline Transform bvh_unaligned_node_fetch_space(KernelGlobals *kg,
|
||||
}
|
||||
|
||||
#if !defined(__KERNEL_SSE2__)
|
||||
ccl_device_inline int bvh_aligned_node_intersect(KernelGlobals *kg,
|
||||
ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
|
||||
const float3 P,
|
||||
const float3 idir,
|
||||
const float t,
|
||||
@@ -77,7 +77,7 @@ ccl_device_inline int bvh_aligned_node_intersect(KernelGlobals *kg,
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
|
||||
ccl_device_forceinline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
|
||||
const float3 P,
|
||||
const float3 idir,
|
||||
const float t,
|
||||
@@ -139,7 +139,7 @@ ccl_device_inline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline bool bvh_unaligned_node_intersect_child(
|
||||
ccl_device_forceinline bool bvh_unaligned_node_intersect_child(
|
||||
KernelGlobals *kg,
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
@@ -166,7 +166,7 @@ ccl_device_inline bool bvh_unaligned_node_intersect_child(
|
||||
return tnear <= tfar;
|
||||
}
|
||||
|
||||
ccl_device_inline bool bvh_unaligned_node_intersect_child_robust(
|
||||
ccl_device_forceinline bool bvh_unaligned_node_intersect_child_robust(
|
||||
KernelGlobals *kg,
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
@@ -202,7 +202,7 @@ ccl_device_inline bool bvh_unaligned_node_intersect_child_robust(
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device_inline int bvh_unaligned_node_intersect(KernelGlobals *kg,
|
||||
ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const float3 idir,
|
||||
@@ -232,7 +232,7 @@ ccl_device_inline int bvh_unaligned_node_intersect(KernelGlobals *kg,
|
||||
return mask;
|
||||
}
|
||||
|
||||
ccl_device_inline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
|
||||
ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const float3 idir,
|
||||
@@ -264,7 +264,7 @@ ccl_device_inline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
|
||||
return mask;
|
||||
}
|
||||
|
||||
ccl_device_inline int bvh_node_intersect(KernelGlobals *kg,
|
||||
ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const float3 idir,
|
||||
@@ -295,7 +295,7 @@ ccl_device_inline int bvh_node_intersect(KernelGlobals *kg,
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device_inline int bvh_node_intersect_robust(KernelGlobals *kg,
|
||||
ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg,
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const float3 idir,
|
||||
@@ -333,7 +333,7 @@ ccl_device_inline int bvh_node_intersect_robust(KernelGlobals *kg,
|
||||
}
|
||||
#else /* !defined(__KERNEL_SSE2__) */
|
||||
|
||||
int ccl_device_inline bvh_aligned_node_intersect(
|
||||
int ccl_device_forceinline bvh_aligned_node_intersect(
|
||||
KernelGlobals *kg,
|
||||
const float3& P,
|
||||
const float3& dir,
|
||||
@@ -377,7 +377,7 @@ int ccl_device_inline bvh_aligned_node_intersect(
|
||||
# endif
|
||||
}
|
||||
|
||||
int ccl_device_inline bvh_aligned_node_intersect_robust(
|
||||
ccl_device_forceinline int bvh_aligned_node_intersect_robust(
|
||||
KernelGlobals *kg,
|
||||
const float3& P,
|
||||
const float3& dir,
|
||||
@@ -441,7 +441,7 @@ int ccl_device_inline bvh_aligned_node_intersect_robust(
|
||||
# endif
|
||||
}
|
||||
|
||||
int ccl_device_inline bvh_unaligned_node_intersect(KernelGlobals *kg,
|
||||
ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const ssef& isect_near,
|
||||
@@ -502,7 +502,7 @@ int ccl_device_inline bvh_unaligned_node_intersect(KernelGlobals *kg,
|
||||
# endif
|
||||
}
|
||||
|
||||
int ccl_device_inline bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
|
||||
ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const ssef& isect_near,
|
||||
@@ -573,7 +573,7 @@ int ccl_device_inline bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
|
||||
# endif
|
||||
}
|
||||
|
||||
ccl_device_inline int bvh_node_intersect(KernelGlobals *kg,
|
||||
ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
|
||||
const float3& P,
|
||||
const float3& dir,
|
||||
const ssef& isect_near,
|
||||
@@ -611,7 +611,7 @@ ccl_device_inline int bvh_node_intersect(KernelGlobals *kg,
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device_inline int bvh_node_intersect_robust(KernelGlobals *kg,
|
||||
ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg,
|
||||
const float3& P,
|
||||
const float3& dir,
|
||||
const ssef& isect_near,
|
||||
|
||||
@@ -40,21 +40,16 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __KERNEL_GPU__
|
||||
ccl_device
|
||||
#else
|
||||
ccl_device_inline
|
||||
#endif
|
||||
bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
const Ray *ray,
|
||||
Intersection *isect,
|
||||
const uint visibility
|
||||
ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
const Ray *ray,
|
||||
Intersection *isect,
|
||||
const uint visibility
|
||||
#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
|
||||
, uint *lcg_state,
|
||||
float difl,
|
||||
float extmax
|
||||
, uint *lcg_state,
|
||||
float difl,
|
||||
float extmax
|
||||
#endif
|
||||
)
|
||||
)
|
||||
{
|
||||
/* todo:
|
||||
* - test if pushing distance on the stack helps (for non shadow rays)
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
/*
|
||||
* Adapted from code Copyright 2009-2010 NVIDIA Corporation
|
||||
* Modifications Copyright 2011, Blender Foundation.
|
||||
* Copyright 2011-2013 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@@ -22,7 +21,7 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Don't inline intersect functions on GPU, this is faster */
|
||||
#ifdef __KERNEL_GPU__
|
||||
# define ccl_device_intersect ccl_device_noinline
|
||||
# define ccl_device_intersect ccl_device_forceinline
|
||||
#else
|
||||
# define ccl_device_intersect ccl_device_inline
|
||||
#endif
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Aligned nodes intersection SSE code is adopted from Embree,
|
||||
*/
|
||||
|
||||
struct QBVHStackItem {
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
/*
|
||||
* Adapted from code Copyright 2009-2010 NVIDIA Corporation,
|
||||
* and code copyright 2009-2012 Intel Corporation
|
||||
*
|
||||
* Modifications Copyright 2011-2014, Blender Foundation.
|
||||
* Copyright 2011-2013 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
/*
|
||||
* Adapted from code Copyright 2009-2010 NVIDIA Corporation,
|
||||
* and code copyright 2009-2012 Intel Corporation
|
||||
*
|
||||
* Modifications Copyright 2011-2014, Blender Foundation.
|
||||
* Copyright 2011-2013 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
/*
|
||||
* Adapted from code Copyright 2009-2010 NVIDIA Corporation,
|
||||
* and code copyright 2009-2012 Intel Corporation
|
||||
*
|
||||
* Modifications Copyright 2011-2014, Blender Foundation.
|
||||
* Copyright 2011-2013 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
/*
|
||||
* Adapted from code Copyright 2009-2010 NVIDIA Corporation,
|
||||
* and code copyright 2009-2012 Intel Corporation
|
||||
*
|
||||
* Modifications Copyright 2011-2014, Blender Foundation.
|
||||
* Copyright 2011-2013 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
/*
|
||||
* Adapted from code Copyright 2009-2010 NVIDIA Corporation,
|
||||
* and code copyright 2009-2012 Intel Corporation
|
||||
*
|
||||
* Modifications Copyright 2011-2014, Blender Foundation.
|
||||
* Copyright 2011-2013 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
||||
@@ -38,7 +38,7 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
ccl_device_inline int bsdf_sample(KernelGlobals *kg,
|
||||
ccl_device_forceinline int bsdf_sample(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
const ShaderClosure *sc,
|
||||
float randu,
|
||||
@@ -159,7 +159,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
|
||||
#ifndef __KERNEL_CUDA__
|
||||
ccl_device
|
||||
#else
|
||||
ccl_device_inline
|
||||
ccl_device_forceinline
|
||||
#endif
|
||||
float3 bsdf_eval(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
@@ -401,6 +401,8 @@ ccl_device bool bsdf_merge(ShaderClosure *a, ShaderClosure *b)
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -62,7 +62,7 @@ ccl_device_inline float bsdf_ashikhmin_shirley_roughness_to_exponent(float rough
|
||||
return 2.0f / (roughness*roughness) - 2.0f;
|
||||
}
|
||||
|
||||
ccl_device_inline float3 bsdf_ashikhmin_shirley_eval_reflect(
|
||||
ccl_device_forceinline float3 bsdf_ashikhmin_shirley_eval_reflect(
|
||||
const ShaderClosure *sc,
|
||||
const float3 I,
|
||||
const float3 omega_in,
|
||||
|
||||
@@ -184,7 +184,7 @@ ccl_device_inline void microfacet_ggx_sample_slopes(
|
||||
*slope_y = S * z * safe_sqrtf(1.0f + (*slope_x)*(*slope_x));
|
||||
}
|
||||
|
||||
ccl_device_inline float3 microfacet_sample_stretched(
|
||||
ccl_device_forceinline float3 microfacet_sample_stretched(
|
||||
KernelGlobals *kg, const float3 omega_i,
|
||||
const float alpha_x, const float alpha_y,
|
||||
const float randu, const float randv,
|
||||
@@ -277,7 +277,7 @@ ccl_device bool bsdf_microfacet_merge(const ShaderClosure *a, const ShaderClosur
|
||||
(isequal_float3(bsdf_a->T, bsdf_b->T)) &&
|
||||
(bsdf_a->ior == bsdf_b->ior) &&
|
||||
((!bsdf_a->extra && !bsdf_b->extra) ||
|
||||
((bsdf_a->extra && bsdf_b->extra) &&
|
||||
((bsdf_a->extra && bsdf_b->extra) &&
|
||||
(isequal_float3(bsdf_a->extra->color, bsdf_b->extra->color))));
|
||||
}
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ CCL_NAMESPACE_BEGIN
|
||||
/* === GGX Microfacet distribution functions === */
|
||||
|
||||
/* Isotropic GGX microfacet distribution */
|
||||
ccl_device_inline float D_ggx(float3 wm, float alpha)
|
||||
ccl_device_forceinline float D_ggx(float3 wm, float alpha)
|
||||
{
|
||||
wm.z *= wm.z;
|
||||
alpha *= alpha;
|
||||
@@ -30,7 +30,7 @@ ccl_device_inline float D_ggx(float3 wm, float alpha)
|
||||
}
|
||||
|
||||
/* Anisotropic GGX microfacet distribution */
|
||||
ccl_device_inline float D_ggx_aniso(const float3 wm, const float2 alpha)
|
||||
ccl_device_forceinline float D_ggx_aniso(const float3 wm, const float2 alpha)
|
||||
{
|
||||
float slope_x = -wm.x/alpha.x;
|
||||
float slope_y = -wm.y/alpha.y;
|
||||
@@ -40,7 +40,7 @@ ccl_device_inline float D_ggx_aniso(const float3 wm, const float2 alpha)
|
||||
}
|
||||
|
||||
/* Sample slope distribution (based on page 14 of the supplemental implementation). */
|
||||
ccl_device_inline float2 mf_sampleP22_11(const float cosI, const float2 randU)
|
||||
ccl_device_forceinline float2 mf_sampleP22_11(const float cosI, const float2 randU)
|
||||
{
|
||||
if(cosI > 0.9999f || cosI < 1e-6f) {
|
||||
const float r = sqrtf(randU.x / (1.0f - randU.x));
|
||||
@@ -78,7 +78,7 @@ ccl_device_inline float2 mf_sampleP22_11(const float cosI, const float2 randU)
|
||||
}
|
||||
|
||||
/* Visible normal sampling for the GGX distribution (based on page 7 of the supplemental implementation). */
|
||||
ccl_device_inline float3 mf_sample_vndf(const float3 wi, const float2 alpha, const float2 randU)
|
||||
ccl_device_forceinline float3 mf_sample_vndf(const float3 wi, const float2 alpha, const float2 randU)
|
||||
{
|
||||
const float3 wi_11 = normalize(make_float3(alpha.x*wi.x, alpha.y*wi.y, wi.z));
|
||||
const float2 slope_11 = mf_sampleP22_11(wi_11.z, randU);
|
||||
@@ -94,7 +94,7 @@ ccl_device_inline float3 mf_sample_vndf(const float3 wi, const float2 alpha, con
|
||||
/* === Phase functions: Glossy, Diffuse and Glass === */
|
||||
|
||||
/* Phase function for reflective materials, either without a fresnel term (for compatibility) or with the conductive fresnel term. */
|
||||
ccl_device_inline float3 mf_sample_phase_glossy(const float3 wi, float3 *n, float3 *k, float3 *weight, const float3 wm)
|
||||
ccl_device_forceinline float3 mf_sample_phase_glossy(const float3 wi, float3 *n, float3 *k, float3 *weight, const float3 wm)
|
||||
{
|
||||
if(n && k)
|
||||
*weight *= fresnel_conductor(dot(wi, wm), *n, *k);
|
||||
@@ -102,7 +102,7 @@ ccl_device_inline float3 mf_sample_phase_glossy(const float3 wi, float3 *n, floa
|
||||
return -wi + 2.0f * wm * dot(wi, wm);
|
||||
}
|
||||
|
||||
ccl_device_inline float3 mf_eval_phase_glossy(const float3 w, const float lambda, const float3 wo, const float2 alpha, float3 *n, float3 *k)
|
||||
ccl_device_forceinline float3 mf_eval_phase_glossy(const float3 w, const float lambda, const float3 wo, const float2 alpha, float3 *n, float3 *k)
|
||||
{
|
||||
if(w.z > 0.9999f)
|
||||
return make_float3(0.0f, 0.0f, 0.0f);
|
||||
@@ -132,7 +132,7 @@ ccl_device_inline float3 mf_eval_phase_glossy(const float3 w, const float lambda
|
||||
}
|
||||
|
||||
/* Phase function for rough lambertian diffuse surfaces. */
|
||||
ccl_device_inline float3 mf_sample_phase_diffuse(const float3 wm, const float randu, const float randv)
|
||||
ccl_device_forceinline float3 mf_sample_phase_diffuse(const float3 wm, const float randu, const float randv)
|
||||
{
|
||||
float3 tm, bm;
|
||||
make_orthonormals(wm, &tm, &bm);
|
||||
@@ -141,14 +141,14 @@ ccl_device_inline float3 mf_sample_phase_diffuse(const float3 wm, const float ra
|
||||
return disk.x*tm + disk.y*bm + safe_sqrtf(1.0f - disk.x*disk.x - disk.y*disk.y)*wm;
|
||||
}
|
||||
|
||||
ccl_device_inline float3 mf_eval_phase_diffuse(const float3 w, const float3 wm)
|
||||
ccl_device_forceinline float3 mf_eval_phase_diffuse(const float3 w, const float3 wm)
|
||||
{
|
||||
const float v = max(0.0f, dot(w, wm)) * M_1_PI_F;
|
||||
return make_float3(v, v, v);
|
||||
}
|
||||
|
||||
/* Phase function for dielectric transmissive materials, including both reflection and refraction according to the dielectric fresnel term. */
|
||||
ccl_device_inline float3 mf_sample_phase_glass(const float3 wi, const float eta, const float3 wm, const float randV, bool *outside)
|
||||
ccl_device_forceinline float3 mf_sample_phase_glass(const float3 wi, const float eta, const float3 wm, const float randV, bool *outside)
|
||||
{
|
||||
float cosI = dot(wi, wm);
|
||||
float f = fresnel_dielectric_cos(cosI, eta);
|
||||
@@ -162,7 +162,7 @@ ccl_device_inline float3 mf_sample_phase_glass(const float3 wi, const float eta,
|
||||
return normalize(wm*(cosI*inv_eta + cosT) - wi*inv_eta);
|
||||
}
|
||||
|
||||
ccl_device_inline float3 mf_eval_phase_glass(const float3 w, const float lambda, const float3 wo, const bool wo_outside, const float2 alpha, const float eta)
|
||||
ccl_device_forceinline float3 mf_eval_phase_glass(const float3 w, const float lambda, const float3 wo, const bool wo_outside, const float2 alpha, const float eta)
|
||||
{
|
||||
if(w.z > 0.9999f)
|
||||
return make_float3(0.0f, 0.0f, 0.0f);
|
||||
@@ -195,7 +195,7 @@ ccl_device_inline float3 mf_eval_phase_glass(const float3 w, const float lambda,
|
||||
/* === Utility functions for the random walks === */
|
||||
|
||||
/* Smith Lambda function for GGX (based on page 12 of the supplemental implementation). */
|
||||
ccl_device_inline float mf_lambda(const float3 w, const float2 alpha)
|
||||
ccl_device_forceinline float mf_lambda(const float3 w, const float2 alpha)
|
||||
{
|
||||
if(w.z > 0.9999f)
|
||||
return 0.0f;
|
||||
@@ -212,18 +212,18 @@ ccl_device_inline float mf_lambda(const float3 w, const float2 alpha)
|
||||
}
|
||||
|
||||
/* Height distribution CDF (based on page 4 of the supplemental implementation). */
|
||||
ccl_device_inline float mf_invC1(const float h)
|
||||
ccl_device_forceinline float mf_invC1(const float h)
|
||||
{
|
||||
return 2.0f * saturate(h) - 1.0f;
|
||||
}
|
||||
|
||||
ccl_device_inline float mf_C1(const float h)
|
||||
ccl_device_forceinline float mf_C1(const float h)
|
||||
{
|
||||
return saturate(0.5f * (h + 1.0f));
|
||||
}
|
||||
|
||||
/* Masking function (based on page 16 of the supplemental implementation). */
|
||||
ccl_device_inline float mf_G1(const float3 w, const float C1, const float lambda)
|
||||
ccl_device_forceinline float mf_G1(const float3 w, const float C1, const float lambda)
|
||||
{
|
||||
if(w.z > 0.9999f)
|
||||
return 1.0f;
|
||||
@@ -233,7 +233,7 @@ ccl_device_inline float mf_G1(const float3 w, const float C1, const float lambda
|
||||
}
|
||||
|
||||
/* Sampling from the visible height distribution (based on page 17 of the supplemental implementation). */
|
||||
ccl_device_inline bool mf_sample_height(const float3 w, float *h, float *C1, float *G1, float *lambda, const float U)
|
||||
ccl_device_forceinline bool mf_sample_height(const float3 w, float *h, float *C1, float *G1, float *lambda, const float U)
|
||||
{
|
||||
if(w.z > 0.9999f)
|
||||
return false;
|
||||
@@ -262,14 +262,14 @@ ccl_device_inline bool mf_sample_height(const float3 w, float *h, float *C1, flo
|
||||
|
||||
/* Approximation for the albedo of the single-scattering GGX distribution,
|
||||
* the missing energy is then approximated as a diffuse reflection for the PDF. */
|
||||
ccl_device_inline float mf_ggx_albedo(float r)
|
||||
ccl_device_forceinline float mf_ggx_albedo(float r)
|
||||
{
|
||||
float albedo = 0.806495f*expf(-1.98712f*r*r) + 0.199531f;
|
||||
albedo -= ((((((1.76741f*r - 8.43891f)*r + 15.784f)*r - 14.398f)*r + 6.45221f)*r - 1.19722f)*r + 0.027803f)*r + 0.00568739f;
|
||||
return saturate(albedo);
|
||||
}
|
||||
|
||||
ccl_device_inline float mf_ggx_pdf(const float3 wi, const float3 wo, const float alpha)
|
||||
ccl_device_forceinline float mf_ggx_pdf(const float3 wi, const float3 wo, const float alpha)
|
||||
{
|
||||
float D = D_ggx(normalize(wi+wo), alpha);
|
||||
float lambda = mf_lambda(wi, make_float2(alpha, alpha));
|
||||
@@ -277,17 +277,17 @@ ccl_device_inline float mf_ggx_pdf(const float3 wi, const float3 wo, const float
|
||||
return 0.25f * D / max((1.0f + lambda) * wi.z, 1e-7f) + (1.0f - albedo) * wo.z;
|
||||
}
|
||||
|
||||
ccl_device_inline float mf_ggx_aniso_pdf(const float3 wi, const float3 wo, const float2 alpha)
|
||||
ccl_device_forceinline float mf_ggx_aniso_pdf(const float3 wi, const float3 wo, const float2 alpha)
|
||||
{
|
||||
return 0.25f * D_ggx_aniso(normalize(wi+wo), alpha) / ((1.0f + mf_lambda(wi, alpha)) * wi.z) + (1.0f - mf_ggx_albedo(sqrtf(alpha.x*alpha.y))) * wo.z;
|
||||
}
|
||||
|
||||
ccl_device_inline float mf_diffuse_pdf(const float3 wo)
|
||||
ccl_device_forceinline float mf_diffuse_pdf(const float3 wo)
|
||||
{
|
||||
return M_1_PI_F * wo.z;
|
||||
}
|
||||
|
||||
ccl_device_inline float mf_glass_pdf(const float3 wi, const float3 wo, const float alpha, const float eta)
|
||||
ccl_device_forceinline float mf_glass_pdf(const float3 wi, const float3 wo, const float alpha, const float eta)
|
||||
{
|
||||
float3 wh;
|
||||
float fresnel;
|
||||
@@ -404,7 +404,7 @@ ccl_device float3 bsdf_microfacet_multi_ggx_eval_reflect(const ShaderClosure *sc
|
||||
*pdf = mf_ggx_aniso_pdf(localI, localO, make_float2(bsdf->alpha_x, bsdf->alpha_y));
|
||||
else
|
||||
*pdf = mf_ggx_pdf(localI, localO, bsdf->alpha_x);
|
||||
return mf_eval_glossy(localI, localO, true, bsdf->extra->color, bsdf->extra->cspec0, bsdf->alpha_x, bsdf->alpha_y, lcg_state, NULL, NULL, bsdf->ior, bsdf->extra->use_fresnel);
|
||||
return mf_eval_glossy(localI, localO, true, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, NULL, NULL, bsdf->ior, bsdf->extra->use_fresnel, bsdf->extra->cspec0);
|
||||
}
|
||||
|
||||
ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf, ccl_addr_space uint *lcg_state)
|
||||
@@ -430,7 +430,7 @@ ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals *kg, const ShaderC
|
||||
float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
|
||||
float3 localO;
|
||||
|
||||
*eval = mf_sample_glossy(localI, &localO, bsdf->extra->color, bsdf->extra->cspec0, bsdf->alpha_x, bsdf->alpha_y, lcg_state, NULL, NULL, bsdf->ior, bsdf->extra->use_fresnel);
|
||||
*eval = mf_sample_glossy(localI, &localO, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, NULL, NULL, bsdf->ior, bsdf->extra->use_fresnel, bsdf->extra->cspec0);
|
||||
if(is_aniso)
|
||||
*pdf = mf_ggx_aniso_pdf(localI, localO, make_float2(bsdf->alpha_x, bsdf->alpha_y));
|
||||
else
|
||||
@@ -447,7 +447,7 @@ ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals *kg, const ShaderC
|
||||
|
||||
/* Multiscattering GGX Glass closure */
|
||||
|
||||
ccl_device int bsdf_microfacet_multi_ggx_glass_setup(MicrofacetBsdf *bsdf, bool use_fresnel = false, bool initial_outside = true)
|
||||
ccl_device int bsdf_microfacet_multi_ggx_glass_setup(MicrofacetBsdf *bsdf, bool use_fresnel = false)
|
||||
{
|
||||
bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
|
||||
bsdf->alpha_y = bsdf->alpha_x;
|
||||
@@ -459,7 +459,6 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_setup(MicrofacetBsdf *bsdf, bool
|
||||
bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
|
||||
bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
|
||||
bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
|
||||
bsdf->extra->initial_outside = initial_outside;
|
||||
|
||||
bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID;
|
||||
|
||||
@@ -481,7 +480,7 @@ ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_transmit(const ShaderClos
|
||||
float3 localO = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z));
|
||||
|
||||
*pdf = mf_glass_pdf(localI, localO, bsdf->alpha_x, bsdf->ior);
|
||||
return mf_eval_glass(localI, localO, false, bsdf->extra->color, bsdf->extra->cspec0, bsdf->alpha_x, bsdf->alpha_y, lcg_state, bsdf->ior);
|
||||
return mf_eval_glass(localI, localO, false, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, bsdf->ior);
|
||||
}
|
||||
|
||||
ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf, ccl_addr_space uint *lcg_state) {
|
||||
@@ -499,7 +498,7 @@ ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_reflect(const ShaderClosu
|
||||
float3 localO = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z));
|
||||
|
||||
*pdf = mf_glass_pdf(localI, localO, bsdf->alpha_x, bsdf->ior);
|
||||
return mf_eval_glass(localI, localO, true, bsdf->extra->color, bsdf->extra->cspec0, bsdf->alpha_x, bsdf->alpha_y, lcg_state, bsdf->ior, bsdf->extra->use_fresnel, bsdf->extra->initial_outside);
|
||||
return mf_eval_glass(localI, localO, true, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, bsdf->ior, bsdf->extra->use_fresnel, bsdf->extra->cspec0);
|
||||
}
|
||||
|
||||
ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf, ccl_addr_space uint *lcg_state)
|
||||
@@ -546,7 +545,7 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals *kg, const S
|
||||
float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
|
||||
float3 localO;
|
||||
|
||||
*eval = mf_sample_glass(localI, &localO, bsdf->extra->color, bsdf->extra->cspec0, bsdf->alpha_x, bsdf->alpha_y, lcg_state, bsdf->ior, bsdf->extra->use_fresnel, bsdf->extra->initial_outside);
|
||||
*eval = mf_sample_glass(localI, &localO, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, bsdf->ior, bsdf->extra->use_fresnel, bsdf->extra->cspec0);
|
||||
*pdf = mf_glass_pdf(localI, localO, bsdf->alpha_x, bsdf->ior);
|
||||
*eval *= *pdf;
|
||||
|
||||
|
||||
@@ -25,23 +25,23 @@
|
||||
* energy is used. In combination with MIS, that is enough to produce an unbiased result, although
|
||||
* the balance heuristic isn't necessarily optimal anymore.
|
||||
*/
|
||||
ccl_device_inline float3 MF_FUNCTION_FULL_NAME(mf_eval)(
|
||||
ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(
|
||||
float3 wi,
|
||||
float3 wo,
|
||||
const bool wo_outside,
|
||||
const float3 color,
|
||||
const float3 cspec0,
|
||||
const float alpha_x,
|
||||
const float alpha_y,
|
||||
ccl_addr_space uint *lcg_state
|
||||
#ifdef MF_MULTI_GLASS
|
||||
, const float eta
|
||||
, bool use_fresnel = false
|
||||
, bool initial_outside = true
|
||||
, const float3 cspec0 = make_float3(1.0f, 1.0f, 1.0f)
|
||||
#elif defined(MF_MULTI_GLOSSY)
|
||||
, float3 *n, float3 *k
|
||||
, const float eta = 1.0f
|
||||
, bool use_fresnel = false
|
||||
, const float3 cspec0 = make_float3(1.0f, 1.0f, 1.0f)
|
||||
#endif
|
||||
)
|
||||
{
|
||||
@@ -88,8 +88,8 @@ ccl_device_inline float3 MF_FUNCTION_FULL_NAME(mf_eval)(
|
||||
float3 throughput2 = make_float3(1.0f, 1.0f, 1.0f);
|
||||
float F0 = fresnel_dielectric_cos(1.0f, eta);
|
||||
float F0_norm = 1.0f / (1.0f - F0);
|
||||
if (use_fresnel/* && initial_outside*/) {
|
||||
float FH = (fresnel_dielectric_cos(dot(wi, normalize(wi + wo)), eta) - F0) * F0_norm; //schlick_fresnel(dot(wi, normalize(wi + wo))); //
|
||||
if (use_fresnel) {
|
||||
float FH = (fresnel_dielectric_cos(dot(wi, normalize(wi + wo)), eta) - F0) * F0_norm;
|
||||
throughput2 = cspec0 * (1.0f - FH) + make_float3(1.0f, 1.0f, 1.0f) * FH;
|
||||
|
||||
eval2 = throughput2 * eval;
|
||||
@@ -118,7 +118,7 @@ ccl_device_inline float3 MF_FUNCTION_FULL_NAME(mf_eval)(
|
||||
float F0 = fresnel_dielectric_cos(1.0f, eta);
|
||||
float F0_norm = 1.0f / (1.0f - F0);
|
||||
if (use_fresnel) {
|
||||
float FH = (fresnel_dielectric_cos(dot(wi, normalize(wi + wo)), eta) - F0) * F0_norm; //schlick_fresnel(dot(wi, normalize(wi + wo))); //
|
||||
float FH = (fresnel_dielectric_cos(dot(wi, normalize(wi + wo)), eta) - F0) * F0_norm;
|
||||
throughput2 = cspec0 * (1.0f - FH) + make_float3(1.0f, 1.0f, 1.0f) * FH;
|
||||
|
||||
eval2 = throughput2 * val;
|
||||
@@ -167,7 +167,7 @@ ccl_device_inline float3 MF_FUNCTION_FULL_NAME(mf_eval)(
|
||||
else
|
||||
phase = mf_eval_phase_glass(wr, lambda_r, -wo, !wo_outside, alpha, 1.0f/eta);
|
||||
|
||||
if (use_fresnel/* && initial_outside*/)
|
||||
if (use_fresnel)
|
||||
eval2 += throughput2 * phase * mf_G1(wo_outside ? wo : -wo, mf_C1((outside == wo_outside) ? hr : -hr), shadowing_lambda);
|
||||
#elif defined(MF_MULTI_DIFFUSE)
|
||||
phase = mf_eval_phase_diffuse(wo, wm);
|
||||
@@ -194,8 +194,8 @@ ccl_device_inline float3 MF_FUNCTION_FULL_NAME(mf_eval)(
|
||||
if (use_fresnel && !next_outside) {
|
||||
throughput2 *= color;
|
||||
}
|
||||
else if (use_fresnel/* && initial_outside && outside && next_outside*/) {
|
||||
float FH = (fresnel_dielectric_cos(dot(wi_prev, wm), eta) - F0) * F0_norm; //schlick_fresnel(dot(wi_prev, wm)); //
|
||||
else if (use_fresnel) {
|
||||
float FH = (fresnel_dielectric_cos(dot(wi_prev, wm), eta) - F0) * F0_norm;
|
||||
t_color = cspec0 * (1.0f - FH) + make_float3(1.0f, 1.0f, 1.0f) * FH;
|
||||
|
||||
if (order > 0)
|
||||
@@ -207,7 +207,7 @@ ccl_device_inline float3 MF_FUNCTION_FULL_NAME(mf_eval)(
|
||||
lcg_step_float_addrspace(lcg_state));
|
||||
#else /* MF_MULTI_GLOSSY */
|
||||
if (use_fresnel) {
|
||||
float FH = (fresnel_dielectric_cos(dot(-wr, wm), eta) - F0) * F0_norm; //schlick_fresnel(dot(-wr, wm)); //
|
||||
float FH = (fresnel_dielectric_cos(dot(-wr, wm), eta) - F0) * F0_norm;
|
||||
t_color = cspec0 * (1.0f - FH) + make_float3(1.0f, 1.0f, 1.0f) * FH;
|
||||
|
||||
if (order > 0)
|
||||
@@ -246,15 +246,16 @@ ccl_device_inline float3 MF_FUNCTION_FULL_NAME(mf_eval)(
|
||||
* escaped the surface in wo. The function returns the throughput between wi and wo.
|
||||
* Without reflection losses due to coloring or fresnel absorption in conductors, the sampling is optimal.
|
||||
*/
|
||||
ccl_device float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi, float3 *wo, const float3 color, const float3 cspec0, const float alpha_x, const float alpha_y, ccl_addr_space uint *lcg_state
|
||||
ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi, float3 *wo, const float3 color, const float alpha_x, const float alpha_y, ccl_addr_space uint *lcg_state
|
||||
#ifdef MF_MULTI_GLASS
|
||||
, const float eta
|
||||
, bool use_fresnel = false
|
||||
, bool initial_outside = true
|
||||
, const float3 cspec0 = make_float3(1.0f, 1.0f, 1.0f)
|
||||
#elif defined(MF_MULTI_GLOSSY)
|
||||
, float3 *n, float3 *k
|
||||
, const float eta = 1.0f
|
||||
, bool use_fresnel = false
|
||||
, const float3 cspec0 = make_float3(1.0f, 1.0f, 1.0f)
|
||||
#endif
|
||||
)
|
||||
{
|
||||
@@ -272,8 +273,8 @@ ccl_device float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi, float3 *wo, const
|
||||
float3 throughput2 = make_float3(1.0f, 1.0f, 1.0f);
|
||||
float F0 = fresnel_dielectric_cos(1.0f, eta);
|
||||
float F0_norm = 1.0f / (1.0f - F0);
|
||||
if (use_fresnel/* && initial_outside*/) {
|
||||
float FH = (fresnel_dielectric_cos(dot(wi, normalize(wi + wr)), eta) - F0) * F0_norm; //schlick_fresnel(dot(wi, normalize(wi + wr))); //
|
||||
if (use_fresnel) {
|
||||
float FH = (fresnel_dielectric_cos(dot(wi, normalize(wi + wr)), eta) - F0) * F0_norm;
|
||||
throughput2 = cspec0 * (1.0f - FH) + make_float3(1.0f, 1.0f, 1.0f) * FH;
|
||||
}
|
||||
#elif defined(MF_MULTI_GLOSSY)
|
||||
@@ -282,7 +283,7 @@ ccl_device float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi, float3 *wo, const
|
||||
float F0 = fresnel_dielectric_cos(1.0f, eta);
|
||||
float F0_norm = 1.0f / (1.0f - F0);
|
||||
if (use_fresnel) {
|
||||
float FH = (fresnel_dielectric_cos(dot(wi, normalize(wi + wr)), eta) - F0) * F0_norm; //schlick_fresnel(dot(wi, normalize(wi + wr))); //
|
||||
float FH = (fresnel_dielectric_cos(dot(wi, normalize(wi + wr)), eta) - F0) * F0_norm;
|
||||
throughput2 = cspec0 * (1.0f - FH) + make_float3(1.0f, 1.0f, 1.0f) * FH;
|
||||
}
|
||||
#endif
|
||||
@@ -322,8 +323,8 @@ ccl_device float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi, float3 *wo, const
|
||||
if (!next_outside) {
|
||||
throughput2 *= color;
|
||||
}
|
||||
else if (/*initial_outside && outside && next_outside*/true) {
|
||||
float FH = (fresnel_dielectric_cos(dot(wi_prev, wm), eta) - F0) * F0_norm; //schlick_fresnel(dot(wi_prev, wm)); //
|
||||
else {
|
||||
float FH = (fresnel_dielectric_cos(dot(wi_prev, wm), eta) - F0) * F0_norm;
|
||||
t_color = cspec0 * (1.0f - FH) + make_float3(1.0f, 1.0f, 1.0f) * FH;
|
||||
|
||||
if (order == 0)
|
||||
@@ -338,7 +339,7 @@ ccl_device float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi, float3 *wo, const
|
||||
lcg_step_float_addrspace(lcg_state));
|
||||
#else /* MF_MULTI_GLOSSY */
|
||||
if (use_fresnel) {
|
||||
float FH = (fresnel_dielectric_cos(dot(-wr, wm), eta) - F0) * F0_norm; //schlick_fresnel(dot(-wr, wm)); //
|
||||
float FH = (fresnel_dielectric_cos(dot(-wr, wm), eta) - F0) * F0_norm;
|
||||
t_color = cspec0 * (1.0f - FH) + make_float3(1.0f, 1.0f, 1.0f) * FH;
|
||||
|
||||
if (order == 0)
|
||||
|
||||
@@ -143,7 +143,7 @@ ccl_device float bssrdf_cubic_pdf(const ShaderClosure *sc, float r)
|
||||
}
|
||||
|
||||
/* solve 10x^2 - 20x^3 + 15x^4 - 4x^5 - xi == 0 */
|
||||
ccl_device_inline float bssrdf_cubic_quintic_root_find(float xi)
|
||||
ccl_device_forceinline float bssrdf_cubic_quintic_root_find(float xi)
|
||||
{
|
||||
/* newton-raphson iteration, usually succeeds in 2-4 iterations, except
|
||||
* outside 0.02 ... 0.98 where it can go up to 10, so overall performance
|
||||
@@ -257,7 +257,7 @@ ccl_device float bssrdf_burley_pdf(const ShaderClosure *sc, float r)
|
||||
* Returns scaled radius, meaning the result is to be scaled up by d.
|
||||
* Since there's no closed form solution we do Newton-Raphson method to find it.
|
||||
*/
|
||||
ccl_device_inline float bssrdf_burley_root_find(float xi)
|
||||
ccl_device_forceinline float bssrdf_burley_root_find(float xi)
|
||||
{
|
||||
const float tolerance = 1e-6f;
|
||||
const int max_iteration_count = 10;
|
||||
@@ -412,7 +412,7 @@ ccl_device void bssrdf_sample(const ShaderClosure *sc, float xi, float *r, float
|
||||
bssrdf_burley_sample(sc, xi, r, h);
|
||||
}
|
||||
|
||||
ccl_device_inline float bssrdf_pdf(const ShaderClosure *sc, float r)
|
||||
ccl_device_forceinline float bssrdf_pdf(const ShaderClosure *sc, float r)
|
||||
{
|
||||
if(sc->type == CLOSURE_BSSRDF_CUBIC_ID)
|
||||
return bssrdf_cubic_pdf(sc, r);
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
/*
|
||||
* Adapted from code Copyright 2009-2010 NVIDIA Corporation
|
||||
* Modifications Copyright 2011, Blender Foundation.
|
||||
* Copyright 2011-2013 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
||||
@@ -222,10 +222,10 @@ ccl_device_inline ssef transform_point_T3(const ssef t[3], const ssef &a)
|
||||
|
||||
#ifdef __KERNEL_SSE2__
|
||||
/* Pass P and dir by reference to aligned vector */
|
||||
ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersection *isect,
|
||||
ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersection *isect,
|
||||
const float3 &P, const float3 &dir, uint visibility, int object, int curveAddr, float time, int type, uint *lcg_state, float difl, float extmax)
|
||||
#else
|
||||
ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersection *isect,
|
||||
ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersection *isect,
|
||||
float3 P, float3 dir, uint visibility, int object, int curveAddr, float time,int type, uint *lcg_state, float difl, float extmax)
|
||||
#endif
|
||||
{
|
||||
@@ -621,7 +621,7 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
|
||||
return hit;
|
||||
}
|
||||
|
||||
ccl_device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isect,
|
||||
ccl_device_forceinline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isect,
|
||||
float3 P, float3 direction, uint visibility, int object, int curveAddr, float time, int type, uint *lcg_state, float difl, float extmax)
|
||||
{
|
||||
/* define few macros to minimize code duplication for SSE */
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
/*
|
||||
* Adapted from code Copyright 2009-2010 NVIDIA Corporation
|
||||
* Modifications Copyright 2011, Blender Foundation.
|
||||
* Copyright 2011-2013 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
/*
|
||||
* Adapted from code Copyright 2009-2010 NVIDIA Corporation
|
||||
* Modifications Copyright 2011, Blender Foundation.
|
||||
* Copyright 2011-2013 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
||||
@@ -107,6 +107,67 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
|
||||
|
||||
/* Calculate vertices relative to ray origin. */
|
||||
const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, triAddr);
|
||||
|
||||
#if defined(__KERNEL_AVX2__)
|
||||
const avxf avxf_P(P.m128, P.m128);
|
||||
|
||||
const avxf tri_ab = kernel_tex_fetch_avxf(__prim_tri_verts, tri_vindex + 0);
|
||||
const avxf tri_bc = kernel_tex_fetch_avxf(__prim_tri_verts, tri_vindex + 1);
|
||||
|
||||
const avxf AB = tri_ab - avxf_P;
|
||||
const avxf BC = tri_bc - avxf_P;
|
||||
|
||||
const __m256i permuteMask = _mm256_set_epi32(0x3, kz, ky, kx, 0x3, kz, ky, kx);
|
||||
|
||||
const avxf AB_k = shuffle(AB, permuteMask);
|
||||
const avxf BC_k = shuffle(BC, permuteMask);
|
||||
|
||||
/* Akz, Akz, Bkz, Bkz, Bkz, Bkz, Ckz, Ckz */
|
||||
const avxf ABBC_kz = shuffle<2>(AB_k, BC_k);
|
||||
|
||||
/* Akx, Aky, Bkx, Bky, Bkx,Bky, Ckx, Cky */
|
||||
const avxf ABBC_kxy = shuffle<0,1,0,1>(AB_k, BC_k);
|
||||
|
||||
const avxf Sxy(Sy, Sx, Sy, Sx);
|
||||
|
||||
/* Ax, Ay, Bx, By, Bx, By, Cx, Cy */
|
||||
const avxf ABBC_xy = nmadd(ABBC_kz, Sxy, ABBC_kxy);
|
||||
|
||||
float ABBC_kz_array[8];
|
||||
_mm256_storeu_ps((float*)&ABBC_kz_array, ABBC_kz);
|
||||
|
||||
const float A_kz = ABBC_kz_array[0];
|
||||
const float B_kz = ABBC_kz_array[2];
|
||||
const float C_kz = ABBC_kz_array[6];
|
||||
|
||||
/* By, Bx, Cy, Cx, By, Bx, Ay, Ax */
|
||||
const avxf BCBA_yx = permute<3,2,7,6,3,2,1,0>(ABBC_xy);
|
||||
|
||||
const avxf negMask(0,0,0,0,0x80000000, 0x80000000, 0x80000000, 0x80000000);
|
||||
|
||||
/* W U V
|
||||
* (AxBy-AyBx) (BxCy-ByCx) XX XX (BxBy-ByBx) (CxAy-CyAx) XX XX
|
||||
*/
|
||||
const avxf WUxxxxVxx_neg = _mm256_hsub_ps(ABBC_xy * BCBA_yx, negMask /* Dont care */);
|
||||
|
||||
const avxf WUVWnegWUVW = permute<0,1,5,0,0,1,5,0>(WUxxxxVxx_neg) ^ negMask;
|
||||
|
||||
/* Calculate scaled barycentric coordinates. */
|
||||
float WUVW_array[4];
|
||||
_mm_storeu_ps((float*)&WUVW_array, _mm256_castps256_ps128 (WUVWnegWUVW));
|
||||
|
||||
const float W = WUVW_array[0];
|
||||
const float U = WUVW_array[1];
|
||||
const float V = WUVW_array[2];
|
||||
|
||||
const int WUVW_mask = 0x7 & _mm256_movemask_ps(WUVWnegWUVW);
|
||||
const int WUVW_zero = 0x7 & _mm256_movemask_ps(_mm256_cmp_ps(WUVWnegWUVW,
|
||||
_mm256_setzero_ps(), 0));
|
||||
|
||||
if(!((WUVW_mask == 7) || (WUVW_mask == 0)) && ((WUVW_mask | WUVW_zero) != 7)) {
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0),
|
||||
tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1),
|
||||
tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2);
|
||||
@@ -135,6 +196,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Calculate determinant. */
|
||||
float det = U + V + W;
|
||||
|
||||
@@ -54,13 +54,7 @@ ccl_device_inline void bsdf_eval_init(BsdfEval *eval, ClosureType type, float3 v
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO(sergey): This is just a workaround for annoying 6.5 compiler bug. */
|
||||
#if !defined(__KERNEL_CUDA__) || __CUDA_ARCH__ < 500
|
||||
ccl_device_inline
|
||||
#else
|
||||
ccl_device_noinline
|
||||
#endif
|
||||
void bsdf_eval_accum(BsdfEval *eval, ClosureType type, float3 value)
|
||||
ccl_device_inline void bsdf_eval_accum(BsdfEval *eval, ClosureType type, float3 value)
|
||||
{
|
||||
#ifdef __PASSES__
|
||||
if(eval->use_light_pass) {
|
||||
|
||||
@@ -221,14 +221,6 @@ ccl_device_inline void camera_sample_panorama(KernelGlobals *kg,
|
||||
|
||||
/* create ray form raster position */
|
||||
ray->P = make_float3(0.0f, 0.0f, 0.0f);
|
||||
|
||||
#ifdef __CAMERA_CLIPPING__
|
||||
/* clipping */
|
||||
ray->t = kernel_data.cam.cliplength;
|
||||
#else
|
||||
ray->t = FLT_MAX;
|
||||
#endif
|
||||
|
||||
ray->D = panorama_to_direction(kg, Pcamera.x, Pcamera.y);
|
||||
|
||||
/* indicates ray should not receive any light, outside of the lens */
|
||||
@@ -302,6 +294,14 @@ ccl_device_inline void camera_sample_panorama(KernelGlobals *kg,
|
||||
ray->dD.dy = spherical_stereo_direction(kg, tD, tP, Pcamera) - Ddiff;
|
||||
/* dP.dy is zero, since the omnidirectional panorama only shift the eyes horizontally */
|
||||
#endif
|
||||
|
||||
#ifdef __CAMERA_CLIPPING__
|
||||
/* clipping */
|
||||
ray->P += kernel_data.cam.nearclip*ray->D;
|
||||
ray->t = kernel_data.cam.cliplength;
|
||||
#else
|
||||
ray->t = FLT_MAX;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Common */
|
||||
|
||||
@@ -71,6 +71,20 @@ template<typename T> struct texture {
|
||||
return data[index];
|
||||
}
|
||||
|
||||
#ifdef __KERNEL_AVX__
|
||||
/* Reads 256 bytes but indexes in blocks of 128 bytes to maintain
|
||||
* compatibility with existing indicies and data structures.
|
||||
*/
|
||||
ccl_always_inline avxf fetch_avxf(const int index)
|
||||
{
|
||||
kernel_assert(index >= 0 && (index+1) < width);
|
||||
ssef *ssefData = (ssef*)data;
|
||||
ssef *ssefNodeData = &ssefData[index];
|
||||
return _mm256_loadu_ps((float *)ssefNodeData);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __KERNEL_SSE2__
|
||||
ccl_always_inline ssef fetch_ssef(int index)
|
||||
{
|
||||
@@ -506,6 +520,7 @@ typedef texture_image<half4> texture_image_half4;
|
||||
/* Macros to handle different memory storage on different devices */
|
||||
|
||||
#define kernel_tex_fetch(tex, index) (kg->tex.fetch(index))
|
||||
#define kernel_tex_fetch_avxf(tex, index) (kg->tex.fetch_avxf(index))
|
||||
#define kernel_tex_fetch_ssef(tex, index) (kg->tex.fetch_ssef(index))
|
||||
#define kernel_tex_fetch_ssei(tex, index) (kg->tex.fetch_ssei(index))
|
||||
#define kernel_tex_lookup(tex, t, offset, size) (kg->tex.lookup(t, offset, size))
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
/* Qualifier wrappers for different names on different devices */
|
||||
|
||||
#define ccl_device __device__ __inline__
|
||||
# define ccl_device_forceinline __device__ __forceinline__
|
||||
#if (__KERNEL_CUDA_VERSION__ == 80) && (__CUDA_ARCH__ < 500)
|
||||
# define ccl_device_inline __device__ __forceinline__
|
||||
#else
|
||||
|
||||
@@ -33,6 +33,7 @@
|
||||
/* in opencl all functions are device functions, so leave this empty */
|
||||
#define ccl_device
|
||||
#define ccl_device_inline ccl_device
|
||||
#define ccl_device_forceinline ccl_device
|
||||
#define ccl_device_noinline ccl_device ccl_noinline
|
||||
#define ccl_may_alias
|
||||
#define ccl_constant __constant
|
||||
|
||||
@@ -69,7 +69,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
|
||||
Intersection isect;
|
||||
uint visibility = path_state_ray_visibility(kg, state);
|
||||
bool hit = scene_intersect(kg,
|
||||
ray,
|
||||
*ray,
|
||||
visibility,
|
||||
&isect,
|
||||
NULL,
|
||||
@@ -655,9 +655,9 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
|
||||
lcg_state = lcg_state_init(rng, &state, 0x51633e2d);
|
||||
}
|
||||
|
||||
bool hit = scene_intersect(kg, &ray, visibility, &isect, &lcg_state, difl, extmax);
|
||||
bool hit = scene_intersect(kg, ray, visibility, &isect, &lcg_state, difl, extmax);
|
||||
#else
|
||||
bool hit = scene_intersect(kg, &ray, visibility, &isect, NULL, 0.0f, 0.0f);
|
||||
bool hit = scene_intersect(kg, ray, visibility, &isect, NULL, 0.0f, 0.0f);
|
||||
#endif
|
||||
|
||||
#ifdef __KERNEL_DEBUG__
|
||||
|
||||
@@ -282,9 +282,9 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
|
||||
lcg_state = lcg_state_init(rng, &state, 0x51633e2d);
|
||||
}
|
||||
|
||||
bool hit = scene_intersect(kg, &ray, visibility, &isect, &lcg_state, difl, extmax);
|
||||
bool hit = scene_intersect(kg, ray, visibility, &isect, &lcg_state, difl, extmax);
|
||||
#else
|
||||
bool hit = scene_intersect(kg, &ray, visibility, &isect, NULL, 0.0f, 0.0f);
|
||||
bool hit = scene_intersect(kg, ray, visibility, &isect, NULL, 0.0f, 0.0f);
|
||||
#endif
|
||||
|
||||
#ifdef __KERNEL_DEBUG__
|
||||
|
||||
@@ -98,7 +98,7 @@ ccl_device uint sobol_lookup(const uint m, const uint frame, const uint ex, cons
|
||||
return index;
|
||||
}
|
||||
|
||||
ccl_device_inline float path_rng_1D(KernelGlobals *kg, ccl_addr_space RNG *rng, int sample, int num_samples, int dimension)
|
||||
ccl_device_forceinline float path_rng_1D(KernelGlobals *kg, ccl_addr_space RNG *rng, int sample, int num_samples, int dimension)
|
||||
{
|
||||
#ifdef __CMJ__
|
||||
if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) {
|
||||
@@ -132,13 +132,7 @@ ccl_device_inline float path_rng_1D(KernelGlobals *kg, ccl_addr_space RNG *rng,
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Temporary workaround for Pascal cards, otherwise AA does not work properly. */
|
||||
#if defined(__KERNEL_GPU__) && __CUDA_ARCH__ >= 600
|
||||
__device__ __forceinline__
|
||||
#else
|
||||
ccl_device_inline
|
||||
#endif
|
||||
void path_rng_2D(KernelGlobals *kg, ccl_addr_space RNG *rng, int sample, int num_samples, int dimension, float *fx, float *fy)
|
||||
ccl_device_forceinline void path_rng_2D(KernelGlobals *kg, ccl_addr_space RNG *rng, int sample, int num_samples, int dimension, float *fx, float *fy)
|
||||
{
|
||||
#ifdef __CMJ__
|
||||
if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) {
|
||||
@@ -199,7 +193,7 @@ ccl_device void path_rng_end(KernelGlobals *kg, ccl_global uint *rng_state, RNG
|
||||
|
||||
/* Linear Congruential Generator */
|
||||
|
||||
ccl_device_inline float path_rng_1D(KernelGlobals *kg, RNG& rng, int sample, int num_samples, int dimension)
|
||||
ccl_device_forceinline float path_rng_1D(KernelGlobals *kg, RNG& rng, int sample, int num_samples, int dimension)
|
||||
{
|
||||
/* implicit mod 2^32 */
|
||||
rng = (1103515245*(rng) + 12345);
|
||||
|
||||
@@ -851,11 +851,11 @@ ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, ccl_addr_
|
||||
#ifdef __SVM__
|
||||
svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag);
|
||||
#else
|
||||
ccl_fetch_array(sd, closure, 0)->weight = make_float3(0.8f, 0.8f, 0.8f);
|
||||
ccl_fetch_array(sd, closure, 0)->N = ccl_fetch(sd, N);
|
||||
ccl_fetch_array(sd, closure, 0)->data0 = 0.0f;
|
||||
ccl_fetch_array(sd, closure, 0)->data1 = 0.0f;
|
||||
ccl_fetch(sd, flag) |= bsdf_diffuse_setup(ccl_fetch_array(sd, closure, 0));
|
||||
DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd,
|
||||
sizeof(DiffuseBsdf),
|
||||
make_float3(0.8f, 0.8f, 0.8f));
|
||||
bsdf->N = ccl_fetch(sd, N);
|
||||
ccl_fetch(sd, flag) |= bsdf_diffuse_setup(bsdf);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -155,7 +155,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, ShaderData *shadow_sd,
|
||||
}
|
||||
else {
|
||||
Intersection isect;
|
||||
blocked = scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect, NULL, 0.0f, 0.0f);
|
||||
blocked = scene_intersect(kg, *ray, PATH_RAY_SHADOW_OPAQUE, &isect, NULL, 0.0f, 0.0f);
|
||||
}
|
||||
|
||||
#ifdef __VOLUME__
|
||||
@@ -205,7 +205,7 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
|
||||
Intersection *isect = &isect_object;
|
||||
#endif
|
||||
|
||||
bool blocked = scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, isect, NULL, 0.0f, 0.0f);
|
||||
bool blocked = scene_intersect(kg, *ray, PATH_RAY_SHADOW_OPAQUE, isect, NULL, 0.0f, 0.0f);
|
||||
|
||||
#ifdef __TRANSPARENT_SHADOWS__
|
||||
if(blocked && kernel_data.integrator.transparent_shadows) {
|
||||
@@ -221,7 +221,7 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
|
||||
if(bounce >= kernel_data.integrator.transparent_max_bounce)
|
||||
return true;
|
||||
|
||||
if(!scene_intersect(kg, ray, PATH_RAY_SHADOW_TRANSPARENT, isect, NULL, 0.0f, 0.0f))
|
||||
if(!scene_intersect(kg, *ray, PATH_RAY_SHADOW_TRANSPARENT, isect, NULL, 0.0f, 0.0f))
|
||||
{
|
||||
#ifdef __VOLUME__
|
||||
/* attenuation for last line segment towards light */
|
||||
|
||||
@@ -85,16 +85,11 @@ ccl_device ShaderClosure *subsurface_scatter_pick_closure(KernelGlobals *kg, Sha
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#ifndef __KERNEL_GPU__
|
||||
ccl_device_noinline
|
||||
#else
|
||||
ccl_device_inline
|
||||
#endif
|
||||
float3 subsurface_scatter_eval(ShaderData *sd,
|
||||
ShaderClosure *sc,
|
||||
float disk_r,
|
||||
float r,
|
||||
bool all)
|
||||
ccl_device_inline float3 subsurface_scatter_eval(ShaderData *sd,
|
||||
ShaderClosure *sc,
|
||||
float disk_r,
|
||||
float r,
|
||||
bool all)
|
||||
{
|
||||
#ifdef BSSRDF_MULTI_EVAL
|
||||
/* this is the veach one-sample model with balance heuristic, some pdf
|
||||
@@ -240,14 +235,9 @@ ccl_device void subsurface_color_bump_blur(KernelGlobals *kg,
|
||||
/* Subsurface scattering step, from a point on the surface to other
|
||||
* nearby points on the same object.
|
||||
*/
|
||||
#ifndef __KERNEL_CUDA__
|
||||
ccl_device
|
||||
#else
|
||||
ccl_device_inline
|
||||
#endif
|
||||
int subsurface_scatter_multi_intersect(
|
||||
ccl_device_inline int subsurface_scatter_multi_intersect(
|
||||
KernelGlobals *kg,
|
||||
SubsurfaceIntersection* ss_isect,
|
||||
SubsurfaceIntersection *ss_isect,
|
||||
ShaderData *sd,
|
||||
ShaderClosure *sc,
|
||||
uint *lcg_state,
|
||||
@@ -347,6 +337,10 @@ int subsurface_scatter_multi_intersect(
|
||||
verts);
|
||||
}
|
||||
#endif /* __OBJECT_MOTION__ */
|
||||
else {
|
||||
ss_isect->weight[hit] = make_float3(0.0f, 0.0f, 0.0f);
|
||||
continue;
|
||||
}
|
||||
|
||||
float3 hit_Ng = ss_isect->Ng[hit];
|
||||
if(ss_isect->hits[hit].object != OBJECT_NONE) {
|
||||
|
||||
@@ -45,6 +45,7 @@
|
||||
# define __KERNEL_AVX__
|
||||
# endif
|
||||
# ifdef __AVX2__
|
||||
# define __KERNEL_SSE__
|
||||
# define __KERNEL_AVX2__
|
||||
# endif
|
||||
#endif
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
|
||||
/* SSE optimization disabled for now on 32 bit, see bug #36316 */
|
||||
#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
|
||||
# define __KERNEL_SSE__
|
||||
# define __KERNEL_SSE2__
|
||||
# define __KERNEL_SSE3__
|
||||
# define __KERNEL_SSSE3__
|
||||
|
||||
@@ -1153,7 +1153,7 @@ bool OSLRenderServices::trace(TraceOpt &options, OSL::ShaderGlobals *sg,
|
||||
tracedata->sd.osl_globals = sd->osl_globals;
|
||||
|
||||
/* raytrace */
|
||||
return scene_intersect(sd->osl_globals, &ray, PATH_RAY_ALL_VISIBILITY, &tracedata->isect, NULL, 0.0f, 0.0f);
|
||||
return scene_intersect(sd->osl_globals, ray, PATH_RAY_ALL_VISIBILITY, &tracedata->isect, NULL, 0.0f, 0.0f);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -109,9 +109,9 @@ ccl_device void kernel_scene_intersect(
|
||||
lcg_state = lcg_state_init(&rng, &state, 0x51633e2d);
|
||||
}
|
||||
|
||||
bool hit = scene_intersect(kg, &ray, visibility, isect, &lcg_state, difl, extmax);
|
||||
bool hit = scene_intersect(kg, ray, visibility, isect, &lcg_state, difl, extmax);
|
||||
#else
|
||||
bool hit = scene_intersect(kg, &ray, visibility, isect, NULL, 0.0f, 0.0f);
|
||||
bool hit = scene_intersect(kg, ray, visibility, isect, NULL, 0.0f, 0.0f);
|
||||
#endif
|
||||
|
||||
#ifdef __KERNEL_DEBUG__
|
||||
|
||||
@@ -302,7 +302,6 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
|
||||
if (transp > CLOSURE_WEIGHT_CUTOFF) {
|
||||
float3 glass_weight = weight * transp;
|
||||
float3 cspec0 = baseColor * specularTint + make_float3(1.0f, 1.0f, 1.0f) * (1.0f - specularTint);
|
||||
bool frontfacing = (ccl_fetch(sd, flag) & SD_BACKFACING) == 0;
|
||||
|
||||
if (roughness <= 5e-2f || distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID) { /* use single-scatter GGX */
|
||||
float refl_roughness = roughness;
|
||||
@@ -382,7 +381,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
|
||||
bsdf->extra->cspec0 = cspec0;
|
||||
|
||||
/* setup bsdf */
|
||||
ccl_fetch(sd, flag) |= bsdf_microfacet_multi_ggx_glass_setup(bsdf, true, frontfacing);
|
||||
ccl_fetch(sd, flag) |= bsdf_microfacet_multi_ggx_glass_setup(bsdf, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -164,6 +164,9 @@ ccl_device float3 svm_math_blackbody_color(float t) {
|
||||
|
||||
ccl_device_inline float3 svm_math_gamma_color(float3 color, float gamma)
|
||||
{
|
||||
if(gamma == 0.0f)
|
||||
return make_float3(1.0f, 1.0f, 1.0f);
|
||||
|
||||
if(color.x > 0.0f)
|
||||
color.x = powf(color.x, gamma);
|
||||
if(color.y > 0.0f)
|
||||
|
||||
@@ -89,6 +89,19 @@ void ConstantFolder::make_zero() const
|
||||
}
|
||||
}
|
||||
|
||||
void ConstantFolder::make_one() const
|
||||
{
|
||||
if(output->type() == SocketType::FLOAT) {
|
||||
make_constant(1.0f);
|
||||
}
|
||||
else if(SocketType::is_float3(output->type())) {
|
||||
make_constant(make_float3(1.0f, 1.0f, 1.0f));
|
||||
}
|
||||
else {
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
void ConstantFolder::bypass(ShaderOutput *new_output) const
|
||||
{
|
||||
assert(new_output);
|
||||
@@ -321,6 +334,15 @@ void ConstantFolder::fold_math(NodeMath type, bool clamp) const
|
||||
make_zero();
|
||||
}
|
||||
break;
|
||||
case NODE_MATH_POWER:
|
||||
/* 1 ^ X == X ^ 0 == 1 */
|
||||
if(is_one(value1_in) || is_zero(value2_in)) {
|
||||
make_one();
|
||||
}
|
||||
/* X ^ 1 == X */
|
||||
else if(is_one(value2_in)) {
|
||||
try_bypass_or_make_constant(value1_in, clamp);
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -43,6 +43,7 @@ public:
|
||||
void make_constant_clamp(float value, bool clamp) const;
|
||||
void make_constant_clamp(float3 value, bool clamp) const;
|
||||
void make_zero() const;
|
||||
void make_one() const;
|
||||
|
||||
/* Bypass node, relinking to another output socket. */
|
||||
void bypass(ShaderOutput *output) const;
|
||||
|
||||
@@ -321,8 +321,8 @@ void ShaderGraph::finalize(Scene *scene,
|
||||
* modified afterwards. */
|
||||
|
||||
if(!finalized) {
|
||||
clean(scene);
|
||||
default_inputs(do_osl);
|
||||
clean(scene);
|
||||
refine_bump_nodes();
|
||||
|
||||
if(do_bump)
|
||||
|
||||
@@ -109,7 +109,7 @@ namespace Far {
|
||||
|
||||
template<>
|
||||
void TopologyRefinerFactory<ccl::Mesh>::reportInvalidTopology(TopologyError /*err_code*/,
|
||||
char const */*msg*/, ccl::Mesh const& /*mesh*/)
|
||||
char const * /*msg*/, ccl::Mesh const& /*mesh*/)
|
||||
{
|
||||
}
|
||||
} /* namespace Far */
|
||||
|
||||
@@ -4049,6 +4049,19 @@ void GammaNode::constant_fold(const ConstantFolder& folder)
|
||||
if(folder.all_inputs_constant()) {
|
||||
folder.make_constant(svm_math_gamma_color(color, gamma));
|
||||
}
|
||||
else {
|
||||
ShaderInput *color_in = input("Color");
|
||||
ShaderInput *gamma_in = input("Gamma");
|
||||
|
||||
/* 1 ^ X == X ^ 0 == 1 */
|
||||
if(folder.is_one(color_in) || folder.is_zero(gamma_in)) {
|
||||
folder.make_one();
|
||||
}
|
||||
/* X ^ 1 == X */
|
||||
else if(folder.is_one(gamma_in)) {
|
||||
folder.try_bypass_or_make_constant(color_in, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GammaNode::compile(SVMCompiler& compiler)
|
||||
|
||||
@@ -930,6 +930,72 @@ TEST(render_graph, constant_fold_gamma)
|
||||
graph.finalize(&scene);
|
||||
}
|
||||
|
||||
/*
|
||||
* Tests: Gamma with one constant 0 input.
|
||||
*/
|
||||
TEST(render_graph, constant_fold_gamma_part_0)
|
||||
{
|
||||
DEFINE_COMMON_VARIABLES(builder, log);
|
||||
|
||||
EXPECT_ANY_MESSAGE(log);
|
||||
INVALID_INFO_MESSAGE(log, "Folding Gamma_Cx::");
|
||||
CORRECT_INFO_MESSAGE(log, "Folding Gamma_xC::Color to constant (1, 1, 1).");
|
||||
|
||||
builder
|
||||
.add_attribute("Attribute")
|
||||
/* constant on the left */
|
||||
.add_node(ShaderNodeBuilder<GammaNode>("Gamma_Cx")
|
||||
.set("Color", make_float3(0.0f, 0.0f, 0.0f)))
|
||||
.add_connection("Attribute::Fac", "Gamma_Cx::Gamma")
|
||||
/* constant on the right */
|
||||
.add_node(ShaderNodeBuilder<GammaNode>("Gamma_xC")
|
||||
.set("Gamma", 0.0f))
|
||||
.add_connection("Attribute::Color", "Gamma_xC::Color")
|
||||
/* output sum */
|
||||
.add_node(ShaderNodeBuilder<MixNode>("Out")
|
||||
.set(&MixNode::type, NODE_MIX_ADD)
|
||||
.set(&MixNode::use_clamp, true)
|
||||
.set("Fac", 1.0f))
|
||||
.add_connection("Gamma_Cx::Color", "Out::Color1")
|
||||
.add_connection("Gamma_xC::Color", "Out::Color2")
|
||||
.output_color("Out::Color");
|
||||
|
||||
graph.finalize(&scene);
|
||||
}
|
||||
|
||||
/*
|
||||
* Tests: Gamma with one constant 1 input.
|
||||
*/
|
||||
TEST(render_graph, constant_fold_gamma_part_1)
|
||||
{
|
||||
DEFINE_COMMON_VARIABLES(builder, log);
|
||||
|
||||
EXPECT_ANY_MESSAGE(log);
|
||||
CORRECT_INFO_MESSAGE(log, "Folding Gamma_Cx::Color to constant (1, 1, 1).");
|
||||
CORRECT_INFO_MESSAGE(log, "Folding Gamma_xC::Color to socket Attribute::Color.");
|
||||
|
||||
builder
|
||||
.add_attribute("Attribute")
|
||||
/* constant on the left */
|
||||
.add_node(ShaderNodeBuilder<GammaNode>("Gamma_Cx")
|
||||
.set("Color", make_float3(1.0f, 1.0f, 1.0f)))
|
||||
.add_connection("Attribute::Fac", "Gamma_Cx::Gamma")
|
||||
/* constant on the right */
|
||||
.add_node(ShaderNodeBuilder<GammaNode>("Gamma_xC")
|
||||
.set("Gamma", 1.0f))
|
||||
.add_connection("Attribute::Color", "Gamma_xC::Color")
|
||||
/* output sum */
|
||||
.add_node(ShaderNodeBuilder<MixNode>("Out")
|
||||
.set(&MixNode::type, NODE_MIX_ADD)
|
||||
.set(&MixNode::use_clamp, true)
|
||||
.set("Fac", 1.0f))
|
||||
.add_connection("Gamma_Cx::Color", "Out::Color1")
|
||||
.add_connection("Gamma_xC::Color", "Out::Color2")
|
||||
.output_color("Out::Color");
|
||||
|
||||
graph.finalize(&scene);
|
||||
}
|
||||
|
||||
/*
|
||||
* Tests: BrightnessContrast with all constant inputs.
|
||||
*/
|
||||
@@ -1142,6 +1208,40 @@ TEST(render_graph, constant_fold_part_math_div_0)
|
||||
graph.finalize(&scene);
|
||||
}
|
||||
|
||||
/*
|
||||
* Tests: partial folding for Math Power with known 0.
|
||||
*/
|
||||
TEST(render_graph, constant_fold_part_math_pow_0)
|
||||
{
|
||||
DEFINE_COMMON_VARIABLES(builder, log);
|
||||
|
||||
EXPECT_ANY_MESSAGE(log);
|
||||
/* X ^ 0 == 1 */
|
||||
INVALID_INFO_MESSAGE(log, "Folding Math_Cx::");
|
||||
CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to constant (1).");
|
||||
INVALID_INFO_MESSAGE(log, "Folding Out::");
|
||||
|
||||
build_math_partial_test_graph(builder, NODE_MATH_POWER, 0.0f);
|
||||
graph.finalize(&scene);
|
||||
}
|
||||
|
||||
/*
|
||||
* Tests: partial folding for Math Power with known 1.
|
||||
*/
|
||||
TEST(render_graph, constant_fold_part_math_pow_1)
|
||||
{
|
||||
DEFINE_COMMON_VARIABLES(builder, log);
|
||||
|
||||
EXPECT_ANY_MESSAGE(log);
|
||||
/* 1 ^ X == 1; X ^ 1 == X */
|
||||
CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Value to constant (1)");
|
||||
CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to socket Attribute::Fac.");
|
||||
INVALID_INFO_MESSAGE(log, "Folding Out::");
|
||||
|
||||
build_math_partial_test_graph(builder, NODE_MATH_POWER, 1.0f);
|
||||
graph.finalize(&scene);
|
||||
}
|
||||
|
||||
/*
|
||||
* Tests: Vector Math with all constant inputs.
|
||||
*/
|
||||
|
||||
@@ -63,6 +63,7 @@ set(SRC_HEADERS
|
||||
util_sky_model.cpp
|
||||
util_sky_model.h
|
||||
util_sky_model_data.h
|
||||
util_avxf.h
|
||||
util_sseb.h
|
||||
util_ssef.h
|
||||
util_ssei.h
|
||||
|
||||
185
intern/cycles/util/util_avxf.h
Normal file
185
intern/cycles/util/util_avxf.h
Normal file
@@ -0,0 +1,185 @@
|
||||
/*
|
||||
* Copyright 2016 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0(the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef __UTIL_AVXF_H__
|
||||
#define __UTIL_AVXF_H__
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#ifdef __KERNEL_AVX__
|
||||
struct avxf
|
||||
{
|
||||
typedef avxf Float;
|
||||
|
||||
enum { size = 8 }; /* Number of SIMD elements. */
|
||||
|
||||
union {
|
||||
__m256 m256;
|
||||
float f[8];
|
||||
int i[8];
|
||||
};
|
||||
|
||||
__forceinline avxf () {}
|
||||
__forceinline avxf (const avxf& other) { m256 = other.m256; }
|
||||
__forceinline avxf& operator=(const avxf& other) { m256 = other.m256; return *this; }
|
||||
|
||||
__forceinline avxf(const __m256 a) : m256(a) {}
|
||||
__forceinline avxf(const __m256i a) : m256(_mm256_castsi256_ps (a)) {}
|
||||
|
||||
__forceinline operator const __m256&(void) const { return m256; }
|
||||
__forceinline operator __m256&(void) { return m256; }
|
||||
|
||||
__forceinline avxf (float a) : m256(_mm256_set1_ps(a)) {}
|
||||
|
||||
__forceinline avxf(float high32x4, float low32x4) :
|
||||
m256(_mm256_set_ps(high32x4, high32x4, high32x4, high32x4, low32x4, low32x4, low32x4, low32x4)) {}
|
||||
|
||||
__forceinline avxf(float a3, float a2, float a1, float a0) :
|
||||
m256(_mm256_set_ps(a3, a2, a1, a0, a3, a2, a1, a0)) {}
|
||||
|
||||
__forceinline avxf(float a7, float a6, float a5, float a4, float a3, float a2, float a1, float a0) :
|
||||
m256(_mm256_set_ps(a7, a6, a5, a4, a3, a2, a1, a0)) {}
|
||||
|
||||
|
||||
__forceinline avxf(int a3, int a2, int a1, int a0)
|
||||
{
|
||||
const __m256i foo = _mm256_set_epi32(a3, a2, a1, a0, a3, a2, a1, a0);
|
||||
m256 = _mm256_castsi256_ps(foo);
|
||||
}
|
||||
|
||||
|
||||
__forceinline avxf(int a7, int a6, int a5, int a4, int a3, int a2, int a1, int a0)
|
||||
{
|
||||
const __m256i foo = _mm256_set_epi32(a7, a6, a5, a4, a3, a2, a1, a0);
|
||||
m256 = _mm256_castsi256_ps(foo);
|
||||
}
|
||||
|
||||
__forceinline avxf(__m128 a, __m128 b)
|
||||
{
|
||||
const __m256 foo = _mm256_castps128_ps256(a);
|
||||
m256 = _mm256_insertf128_ps(foo, b, 1);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Unary Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline const avxf mm256_sqrt(const avxf& a) { return _mm256_sqrt_ps(a.m256); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Binary Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline const avxf operator +(const avxf& a, const avxf& b) { return _mm256_add_ps(a.m256, b.m256); }
|
||||
__forceinline const avxf operator +(const avxf& a, const float& b) { return a + avxf(b); }
|
||||
__forceinline const avxf operator +(const float& a, const avxf& b) { return avxf(a) + b; }
|
||||
|
||||
__forceinline const avxf operator -(const avxf& a, const avxf& b) { return _mm256_sub_ps(a.m256, b.m256); }
|
||||
__forceinline const avxf operator -(const avxf& a, const float& b) { return a - avxf(b); }
|
||||
__forceinline const avxf operator -(const float& a, const avxf& b) { return avxf(a) - b; }
|
||||
|
||||
__forceinline const avxf operator *(const avxf& a, const avxf& b) { return _mm256_mul_ps(a.m256, b.m256); }
|
||||
__forceinline const avxf operator *(const avxf& a, const float& b) { return a * avxf(b); }
|
||||
__forceinline const avxf operator *(const float& a, const avxf& b) { return avxf(a) * b; }
|
||||
|
||||
__forceinline const avxf operator /(const avxf& a, const avxf& b) { return _mm256_div_ps(a.m256,b.m256); }
|
||||
__forceinline const avxf operator /(const avxf& a, const float& b) { return a/avxf(b); }
|
||||
__forceinline const avxf operator /(const float& a, const avxf& b) { return avxf(a)/b; }
|
||||
|
||||
__forceinline const avxf operator|(const avxf& a, const avxf& b) { return _mm256_or_ps(a.m256,b.m256); }
|
||||
|
||||
__forceinline const avxf operator^(const avxf& a, const avxf& b) { return _mm256_xor_ps(a.m256,b.m256); }
|
||||
|
||||
__forceinline const avxf operator&(const avxf& a, const avxf& b) { return _mm256_and_ps(a.m256,b.m256); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Movement/Shifting/Shuffling Functions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline const avxf shuffle(const avxf& a, const __m256i &shuf) {
|
||||
return _mm256_permutevar_ps(a, shuf);
|
||||
}
|
||||
|
||||
template<int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7> __forceinline const avxf shuffle(const avxf& a) {
|
||||
return _mm256_permutevar_ps(a, _mm256_set_epi32( i7,i6,i5,i4 ,i3,i2,i1,i0));
|
||||
}
|
||||
|
||||
template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const avxf shuffle(const avxf& a, const avxf& b) {
|
||||
return _mm256_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0));
|
||||
}
|
||||
template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const avxf shuffle(const avxf& a) {
|
||||
return shuffle<i0,i1,i2,i3>(a,a);
|
||||
}
|
||||
template<size_t i0> __forceinline const avxf shuffle(const avxf& a, const avxf& b) {
|
||||
return shuffle<i0,i0,i0,i0>(a, b);
|
||||
}
|
||||
template<size_t i0> __forceinline const avxf shuffle(const avxf& a) {
|
||||
return shuffle<i0>(a,a);
|
||||
}
|
||||
|
||||
template<int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7> __forceinline const avxf permute(const avxf& a) {
|
||||
#ifdef __KERNEL_AVX2__
|
||||
return _mm256_permutevar8x32_ps(a,_mm256_set_epi32( i7,i6,i5,i4 ,i3,i2,i1,i0));
|
||||
#else
|
||||
float temp[8];
|
||||
_mm256_storeu_ps((float*)&temp, a);
|
||||
return avxf(temp[i7], temp[i6], temp[i5], temp[i4], temp[i3], temp[i2], temp[i1], temp[i0]);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<int S0, int S1, int S2, int S3,int S4,int S5,int S6, int S7>
|
||||
ccl_device_inline const avxf set_sign_bit(const avxf &a)
|
||||
{
|
||||
return a ^ avxf(S7 << 31, S6 << 31, S5 << 31, S4 << 31, S3 << 31,S2 << 31,S1 << 31,S0 << 31);
|
||||
}
|
||||
|
||||
template<size_t S0, size_t S1, size_t S2, size_t S3,size_t S4,size_t S5,size_t S6, size_t S7>
|
||||
ccl_device_inline const avxf blend(const avxf &a, const avxf &b)
|
||||
{
|
||||
return _mm256_blend_ps(a,b,S7 << 0 | S6 << 1 | S5 << 2 | S4 << 3 | S3 << 4 | S2 << 5 | S1 << 6 | S0 << 7);
|
||||
}
|
||||
|
||||
template<size_t S0, size_t S1, size_t S2, size_t S3 >
|
||||
ccl_device_inline const avxf blend(const avxf &a, const avxf &b)
|
||||
{
|
||||
return blend<S0,S1,S2,S3,S0,S1,S2,S3>(a,b);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Ternary Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
__forceinline const avxf madd (const avxf& a, const avxf& b, const avxf& c) {
|
||||
#ifdef __KERNEL_AVX2__
|
||||
return _mm256_fmadd_ps(a,b,c);
|
||||
#else
|
||||
return c+(a*b);
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline const avxf nmadd(const avxf& a, const avxf& b, const avxf& c) {
|
||||
#ifdef __KERNEL_AVX2__
|
||||
return _mm256_fnmadd_ps(a, b, c);
|
||||
#else
|
||||
return c-(a*b);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
@@ -233,7 +233,7 @@ ccl_device_inline int mod(int x, int m)
|
||||
|
||||
#ifndef __KERNEL_OPENCL__
|
||||
|
||||
ccl_device_inline bool is_zero(const float2 a)
|
||||
ccl_device_inline bool is_zero(const float2& a)
|
||||
{
|
||||
return (a.x == 0.0f && a.y == 0.0f);
|
||||
}
|
||||
@@ -242,7 +242,7 @@ ccl_device_inline bool is_zero(const float2 a)
|
||||
|
||||
#ifndef __KERNEL_OPENCL__
|
||||
|
||||
ccl_device_inline float average(const float2 a)
|
||||
ccl_device_inline float average(const float2& a)
|
||||
{
|
||||
return (a.x + a.y)*(1.0f/2.0f);
|
||||
}
|
||||
@@ -251,58 +251,58 @@ ccl_device_inline float average(const float2 a)
|
||||
|
||||
#ifndef __KERNEL_OPENCL__
|
||||
|
||||
ccl_device_inline float2 operator-(const float2 a)
|
||||
ccl_device_inline float2 operator-(const float2& a)
|
||||
{
|
||||
return make_float2(-a.x, -a.y);
|
||||
}
|
||||
|
||||
ccl_device_inline float2 operator*(const float2 a, const float2 b)
|
||||
ccl_device_inline float2 operator*(const float2& a, const float2& b)
|
||||
{
|
||||
return make_float2(a.x*b.x, a.y*b.y);
|
||||
}
|
||||
|
||||
ccl_device_inline float2 operator*(const float2 a, float f)
|
||||
ccl_device_inline float2 operator*(const float2& a, float f)
|
||||
{
|
||||
return make_float2(a.x*f, a.y*f);
|
||||
}
|
||||
|
||||
ccl_device_inline float2 operator*(float f, const float2 a)
|
||||
ccl_device_inline float2 operator*(float f, const float2& a)
|
||||
{
|
||||
return make_float2(a.x*f, a.y*f);
|
||||
}
|
||||
|
||||
ccl_device_inline float2 operator/(float f, const float2 a)
|
||||
ccl_device_inline float2 operator/(float f, const float2& a)
|
||||
{
|
||||
return make_float2(f/a.x, f/a.y);
|
||||
}
|
||||
|
||||
ccl_device_inline float2 operator/(const float2 a, float f)
|
||||
ccl_device_inline float2 operator/(const float2& a, float f)
|
||||
{
|
||||
float invf = 1.0f/f;
|
||||
return make_float2(a.x*invf, a.y*invf);
|
||||
}
|
||||
|
||||
ccl_device_inline float2 operator/(const float2 a, const float2 b)
|
||||
ccl_device_inline float2 operator/(const float2& a, const float2& b)
|
||||
{
|
||||
return make_float2(a.x/b.x, a.y/b.y);
|
||||
}
|
||||
|
||||
ccl_device_inline float2 operator+(const float2 a, const float2 b)
|
||||
ccl_device_inline float2 operator+(const float2& a, const float2& b)
|
||||
{
|
||||
return make_float2(a.x+b.x, a.y+b.y);
|
||||
}
|
||||
|
||||
ccl_device_inline float2 operator-(const float2 a, const float2 b)
|
||||
ccl_device_inline float2 operator-(const float2& a, const float2& b)
|
||||
{
|
||||
return make_float2(a.x-b.x, a.y-b.y);
|
||||
}
|
||||
|
||||
ccl_device_inline float2 operator+=(float2& a, const float2 b)
|
||||
ccl_device_inline float2 operator+=(float2& a, const float2& b)
|
||||
{
|
||||
return a = a + b;
|
||||
}
|
||||
|
||||
ccl_device_inline float2 operator*=(float2& a, const float2 b)
|
||||
ccl_device_inline float2 operator*=(float2& a, const float2& b)
|
||||
{
|
||||
return a = a * b;
|
||||
}
|
||||
@@ -312,7 +312,7 @@ ccl_device_inline float2 operator*=(float2& a, float f)
|
||||
return a = a * f;
|
||||
}
|
||||
|
||||
ccl_device_inline float2 operator/=(float2& a, const float2 b)
|
||||
ccl_device_inline float2 operator/=(float2& a, const float2& b)
|
||||
{
|
||||
return a = a / b;
|
||||
}
|
||||
@@ -324,12 +324,12 @@ ccl_device_inline float2 operator/=(float2& a, float f)
|
||||
}
|
||||
|
||||
|
||||
ccl_device_inline float dot(const float2 a, const float2 b)
|
||||
ccl_device_inline float dot(const float2& a, const float2& b)
|
||||
{
|
||||
return a.x*b.x + a.y*b.y;
|
||||
}
|
||||
|
||||
ccl_device_inline float cross(const float2 a, const float2 b)
|
||||
ccl_device_inline float cross(const float2& a, const float2& b)
|
||||
{
|
||||
return (a.x*b.y - a.y*b.x);
|
||||
}
|
||||
@@ -343,59 +343,59 @@ ccl_device_inline bool operator==(const int2 a, const int2 b)
|
||||
return (a.x == b.x && a.y == b.y);
|
||||
}
|
||||
|
||||
ccl_device_inline float len(const float2 a)
|
||||
ccl_device_inline float len(const float2& a)
|
||||
{
|
||||
return sqrtf(dot(a, a));
|
||||
}
|
||||
|
||||
ccl_device_inline float2 normalize(const float2 a)
|
||||
ccl_device_inline float2 normalize(const float2& a)
|
||||
{
|
||||
return a/len(a);
|
||||
}
|
||||
|
||||
ccl_device_inline float2 normalize_len(const float2 a, float *t)
|
||||
ccl_device_inline float2 normalize_len(const float2& a, float *t)
|
||||
{
|
||||
*t = len(a);
|
||||
return a/(*t);
|
||||
}
|
||||
|
||||
ccl_device_inline float2 safe_normalize(const float2 a)
|
||||
ccl_device_inline float2 safe_normalize(const float2& a)
|
||||
{
|
||||
float t = len(a);
|
||||
return (t != 0.0f)? a/t: a;
|
||||
}
|
||||
|
||||
ccl_device_inline bool operator==(const float2 a, const float2 b)
|
||||
ccl_device_inline bool operator==(const float2& a, const float2& b)
|
||||
{
|
||||
return (a.x == b.x && a.y == b.y);
|
||||
}
|
||||
|
||||
ccl_device_inline bool operator!=(const float2 a, const float2 b)
|
||||
ccl_device_inline bool operator!=(const float2& a, const float2& b)
|
||||
{
|
||||
return !(a == b);
|
||||
}
|
||||
|
||||
ccl_device_inline float2 min(float2 a, float2 b)
|
||||
ccl_device_inline float2 min(const float2& a, const float2& b)
|
||||
{
|
||||
return make_float2(min(a.x, b.x), min(a.y, b.y));
|
||||
}
|
||||
|
||||
ccl_device_inline float2 max(float2 a, float2 b)
|
||||
ccl_device_inline float2 max(const float2& a, const float2& b)
|
||||
{
|
||||
return make_float2(max(a.x, b.x), max(a.y, b.y));
|
||||
}
|
||||
|
||||
ccl_device_inline float2 clamp(float2 a, float2 mn, float2 mx)
|
||||
ccl_device_inline float2 clamp(const float2& a, const float2& mn, const float2& mx)
|
||||
{
|
||||
return min(max(a, mn), mx);
|
||||
}
|
||||
|
||||
ccl_device_inline float2 fabs(float2 a)
|
||||
ccl_device_inline float2 fabs(const float2& a)
|
||||
{
|
||||
return make_float2(fabsf(a.x), fabsf(a.y));
|
||||
}
|
||||
|
||||
ccl_device_inline float2 as_float2(const float4 a)
|
||||
ccl_device_inline float2 as_float2(const float4& a)
|
||||
{
|
||||
return make_float2(a.x, a.y);
|
||||
}
|
||||
@@ -413,7 +413,7 @@ ccl_device_inline void print_float2(const char *label, const float2& a)
|
||||
|
||||
#ifndef __KERNEL_OPENCL__
|
||||
|
||||
ccl_device_inline float2 interp(float2 a, float2 b, float t)
|
||||
ccl_device_inline float2 interp(const float2& a, const float2& b, float t)
|
||||
{
|
||||
return a + t*(b - a);
|
||||
}
|
||||
@@ -424,58 +424,93 @@ ccl_device_inline float2 interp(float2 a, float2 b, float t)
|
||||
|
||||
#ifndef __KERNEL_OPENCL__
|
||||
|
||||
ccl_device_inline float3 operator-(const float3 a)
|
||||
ccl_device_inline float3 operator-(const float3& a)
|
||||
{
|
||||
#ifdef __KERNEL_SSE__
|
||||
return float3(_mm_xor_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000))));
|
||||
#else
|
||||
return make_float3(-a.x, -a.y, -a.z);
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float3 operator*(const float3 a, const float3 b)
|
||||
ccl_device_inline float3 operator*(const float3& a, const float3& b)
|
||||
{
|
||||
#ifdef __KERNEL_SSE__
|
||||
return float3(_mm_mul_ps(a.m128,b.m128));
|
||||
#else
|
||||
return make_float3(a.x*b.x, a.y*b.y, a.z*b.z);
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float3 operator*(const float3 a, float f)
|
||||
ccl_device_inline float3 operator*(const float3& a, const float f)
|
||||
{
|
||||
#ifdef __KERNEL_SSE__
|
||||
return float3(_mm_mul_ps(a.m128,_mm_set1_ps(f)));
|
||||
#else
|
||||
return make_float3(a.x*f, a.y*f, a.z*f);
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float3 operator*(float f, const float3 a)
|
||||
ccl_device_inline float3 operator*(const float f, const float3& a)
|
||||
{
|
||||
#ifdef __KERNEL_SSE__
|
||||
return float3(_mm_mul_ps(a.m128, _mm_set1_ps(f)));
|
||||
#else
|
||||
return make_float3(a.x*f, a.y*f, a.z*f);
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float3 operator/(float f, const float3 a)
|
||||
ccl_device_inline float3 operator/(const float f, const float3& a)
|
||||
{
|
||||
return make_float3(f/a.x, f/a.y, f/a.z);
|
||||
/* TODO(sergey): Currently disabled, gives speedup but makes intersection tets non-watertight. */
|
||||
// #ifdef __KERNEL_SSE__
|
||||
// __m128 rc = _mm_rcp_ps(a.m128);
|
||||
// return float3(_mm_mul_ps(_mm_set1_ps(f),rc));
|
||||
// #else
|
||||
return make_float3(f / a.x, f / a.y, f / a.z);
|
||||
// #endif
|
||||
}
|
||||
|
||||
ccl_device_inline float3 operator/(const float3 a, float f)
|
||||
ccl_device_inline float3 operator/(const float3& a, const float f)
|
||||
{
|
||||
float invf = 1.0f/f;
|
||||
return make_float3(a.x*invf, a.y*invf, a.z*invf);
|
||||
return a * invf;
|
||||
}
|
||||
|
||||
ccl_device_inline float3 operator/(const float3 a, const float3 b)
|
||||
ccl_device_inline float3 operator/(const float3& a, const float3& b)
|
||||
{
|
||||
return make_float3(a.x/b.x, a.y/b.y, a.z/b.z);
|
||||
#ifdef __KERNEL_SSE__
|
||||
__m128 rc = _mm_rcp_ps(b.m128);
|
||||
return float3(_mm_mul_ps(a, rc));
|
||||
#else
|
||||
return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float3 operator+(const float3 a, const float3 b)
|
||||
ccl_device_inline float3 operator+(const float3& a, const float3& b)
|
||||
{
|
||||
return make_float3(a.x+b.x, a.y+b.y, a.z+b.z);
|
||||
#ifdef __KERNEL_SSE__
|
||||
return float3(_mm_add_ps(a.m128, b.m128));
|
||||
#else
|
||||
return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float3 operator-(const float3 a, const float3 b)
|
||||
ccl_device_inline float3 operator-(const float3& a, const float3& b)
|
||||
{
|
||||
return make_float3(a.x-b.x, a.y-b.y, a.z-b.z);
|
||||
#ifdef __KERNEL_SSE__
|
||||
return float3(_mm_sub_ps(a.m128, b.m128));
|
||||
#else
|
||||
return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float3 operator+=(float3& a, const float3 b)
|
||||
ccl_device_inline float3 operator+=(float3& a, const float3& b)
|
||||
{
|
||||
return a = a + b;
|
||||
}
|
||||
|
||||
ccl_device_inline float3 operator*=(float3& a, const float3 b)
|
||||
ccl_device_inline float3 operator*=(float3& a, const float3& b)
|
||||
{
|
||||
return a = a * b;
|
||||
}
|
||||
@@ -485,7 +520,7 @@ ccl_device_inline float3 operator*=(float3& a, float f)
|
||||
return a = a * f;
|
||||
}
|
||||
|
||||
ccl_device_inline float3 operator/=(float3& a, const float3 b)
|
||||
ccl_device_inline float3 operator/=(float3& a, const float3& b)
|
||||
{
|
||||
return a = a / b;
|
||||
}
|
||||
@@ -496,7 +531,7 @@ ccl_device_inline float3 operator/=(float3& a, float f)
|
||||
return a = a * invf;
|
||||
}
|
||||
|
||||
ccl_device_inline float dot(const float3 a, const float3 b)
|
||||
ccl_device_inline float dot(const float3& a, const float3& b)
|
||||
{
|
||||
#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
|
||||
return _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7F));
|
||||
@@ -505,7 +540,16 @@ ccl_device_inline float dot(const float3 a, const float3 b)
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float dot(const float4 a, const float4 b)
|
||||
ccl_device_inline float dot_xy(const float3& a, const float3& b)
|
||||
{
|
||||
#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
|
||||
return _mm_cvtss_f32(_mm_hadd_ps(_mm_mul_ps(a,b),b));
|
||||
#else
|
||||
return a.x*b.x + a.y*b.y;
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float dot(const float4& a, const float4& b)
|
||||
{
|
||||
#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
|
||||
return _mm_cvtss_f32(_mm_dp_ps(a, b, 0xFF));
|
||||
@@ -514,7 +558,7 @@ ccl_device_inline float dot(const float4 a, const float4 b)
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float3 cross(const float3 a, const float3 b)
|
||||
ccl_device_inline float3 cross(const float3& a, const float3& b)
|
||||
{
|
||||
float3 r = make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x);
|
||||
return r;
|
||||
@@ -538,12 +582,12 @@ ccl_device_inline float len_squared(const float3 a)
|
||||
|
||||
#ifndef __KERNEL_OPENCL__
|
||||
|
||||
ccl_device_inline float len_squared(const float4 a)
|
||||
ccl_device_inline float len_squared(const float4& a)
|
||||
{
|
||||
return dot(a, a);
|
||||
}
|
||||
|
||||
ccl_device_inline float3 normalize(const float3 a)
|
||||
ccl_device_inline float3 normalize(const float3& a)
|
||||
{
|
||||
#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
|
||||
__m128 norm = _mm_sqrt_ps(_mm_dp_ps(a.m128, a.m128, 0x7F));
|
||||
@@ -563,13 +607,14 @@ ccl_device_inline float3 saturate3(float3 a)
|
||||
ccl_device_inline float3 normalize_len(const float3 a, float *t)
|
||||
{
|
||||
*t = len(a);
|
||||
return a/(*t);
|
||||
float x = 1.0f / *t;
|
||||
return a*x;
|
||||
}
|
||||
|
||||
ccl_device_inline float3 safe_normalize(const float3 a)
|
||||
{
|
||||
float t = len(a);
|
||||
return (t != 0.0f)? a/t: a;
|
||||
return (t != 0.0f)? a * (1.0f/t) : a;
|
||||
}
|
||||
|
||||
ccl_device_inline float3 safe_normalize_len(const float3 a, float *t)
|
||||
@@ -580,7 +625,7 @@ ccl_device_inline float3 safe_normalize_len(const float3 a, float *t)
|
||||
|
||||
#ifndef __KERNEL_OPENCL__
|
||||
|
||||
ccl_device_inline bool operator==(const float3 a, const float3 b)
|
||||
ccl_device_inline bool operator==(const float3& a, const float3& b)
|
||||
{
|
||||
#ifdef __KERNEL_SSE__
|
||||
return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 7) == 7;
|
||||
@@ -589,12 +634,12 @@ ccl_device_inline bool operator==(const float3 a, const float3 b)
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline bool operator!=(const float3 a, const float3 b)
|
||||
ccl_device_inline bool operator!=(const float3& a, const float3& b)
|
||||
{
|
||||
return !(a == b);
|
||||
}
|
||||
|
||||
ccl_device_inline float3 min(float3 a, float3 b)
|
||||
ccl_device_inline float3 min(const float3& a, const float3& b)
|
||||
{
|
||||
#ifdef __KERNEL_SSE__
|
||||
return _mm_min_ps(a.m128, b.m128);
|
||||
@@ -603,7 +648,7 @@ ccl_device_inline float3 min(float3 a, float3 b)
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float3 max(float3 a, float3 b)
|
||||
ccl_device_inline float3 max(const float3& a, const float3& b)
|
||||
{
|
||||
#ifdef __KERNEL_SSE__
|
||||
return _mm_max_ps(a.m128, b.m128);
|
||||
@@ -612,12 +657,12 @@ ccl_device_inline float3 max(float3 a, float3 b)
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float3 clamp(float3 a, float3 mn, float3 mx)
|
||||
ccl_device_inline float3 clamp(const float3& a, const float3& mn, const float3& mx)
|
||||
{
|
||||
return min(max(a, mn), mx);
|
||||
}
|
||||
|
||||
ccl_device_inline float3 fabs(float3 a)
|
||||
ccl_device_inline float3 fabs(const float3& a)
|
||||
{
|
||||
#ifdef __KERNEL_SSE__
|
||||
__m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
|
||||
@@ -670,7 +715,7 @@ ccl_device_inline float3 interp(float3 a, float3 b, float t)
|
||||
|
||||
#ifndef __KERNEL_OPENCL__
|
||||
|
||||
ccl_device_inline float3 mix(float3 a, float3 b, float t)
|
||||
ccl_device_inline float3 mix(const float3& a, const float3& b, float t)
|
||||
{
|
||||
return a + t*(b - a);
|
||||
}
|
||||
@@ -833,7 +878,7 @@ ccl_device_inline int4 operator<(const float4& a, const float4& b)
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline int4 operator>=(float4 a, float4 b)
|
||||
ccl_device_inline int4 operator>=(const float4& a, const float4& b)
|
||||
{
|
||||
#ifdef __KERNEL_SSE__
|
||||
return _mm_cvtps_epi32(_mm_cmpge_ps(a.m128, b.m128)); /* todo: avoid cvt */
|
||||
@@ -851,7 +896,7 @@ ccl_device_inline int4 operator<=(const float4& a, const float4& b)
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline bool operator==(const float4 a, const float4 b)
|
||||
ccl_device_inline bool operator==(const float4& a, const float4& b)
|
||||
{
|
||||
#ifdef __KERNEL_SSE__
|
||||
return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 15) == 15;
|
||||
@@ -893,23 +938,23 @@ ccl_device_inline float average(const float4& a)
|
||||
return reduce_add(a) * 0.25f;
|
||||
}
|
||||
|
||||
ccl_device_inline float len(const float4 a)
|
||||
ccl_device_inline float len(const float4& a)
|
||||
{
|
||||
return sqrtf(dot(a, a));
|
||||
}
|
||||
|
||||
ccl_device_inline float4 normalize(const float4 a)
|
||||
ccl_device_inline float4 normalize(const float4& a)
|
||||
{
|
||||
return a/len(a);
|
||||
}
|
||||
|
||||
ccl_device_inline float4 safe_normalize(const float4 a)
|
||||
ccl_device_inline float4 safe_normalize(const float4& a)
|
||||
{
|
||||
float t = len(a);
|
||||
return (t != 0.0f)? a/t: a;
|
||||
}
|
||||
|
||||
ccl_device_inline float4 min(float4 a, float4 b)
|
||||
ccl_device_inline float4 min(const float4& a, const float4& b)
|
||||
{
|
||||
#ifdef __KERNEL_SSE__
|
||||
return _mm_min_ps(a.m128, b.m128);
|
||||
@@ -918,7 +963,7 @@ ccl_device_inline float4 min(float4 a, float4 b)
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float4 max(float4 a, float4 b)
|
||||
ccl_device_inline float4 max(const float4& a, const float4& b)
|
||||
{
|
||||
#ifdef __KERNEL_SSE__
|
||||
return _mm_max_ps(a.m128, b.m128);
|
||||
@@ -1190,7 +1235,7 @@ template<class A, class B> A lerp(const A& a, const A& b, const B& t)
|
||||
|
||||
/* Triangle */
|
||||
|
||||
ccl_device_inline float triangle_area(const float3 v1, const float3 v2, const float3 v3)
|
||||
ccl_device_inline float triangle_area(const float3& v1, const float3& v2, const float3& v3)
|
||||
{
|
||||
return len(cross(v3 - v2, v1 - v2))*0.5f;
|
||||
}
|
||||
|
||||
@@ -778,7 +778,9 @@ static string line_directive(const string& path, int line)
|
||||
}
|
||||
|
||||
|
||||
string path_source_replace_includes(const string& source, const string& path)
|
||||
string path_source_replace_includes(const string& source,
|
||||
const string& path,
|
||||
const string& source_filename)
|
||||
{
|
||||
/* Our own little c preprocessor that replaces #includes with the file
|
||||
* contents, to work around issue of opencl drivers not supporting
|
||||
@@ -807,12 +809,12 @@ string path_source_replace_includes(const string& source, const string& path)
|
||||
* and avoids having list of include directories.x
|
||||
*/
|
||||
text = path_source_replace_includes(
|
||||
text, path_dirname(filepath));
|
||||
text = path_source_replace_includes(text, path);
|
||||
text, path_dirname(filepath), filename);
|
||||
text = path_source_replace_includes(text, path, filename);
|
||||
/* Use line directives for better error messages. */
|
||||
line = line_directive(filepath, 1)
|
||||
+ token.replace(0, n_end + 1, "\n" + text + "\n")
|
||||
+ line_directive(path, i);
|
||||
+ line_directive(path_join(path, source_filename), i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -66,7 +66,9 @@ bool path_read_text(const string& path, string& text);
|
||||
bool path_remove(const string& path);
|
||||
|
||||
/* source code utility */
|
||||
string path_source_replace_includes(const string& source, const string& path);
|
||||
string path_source_replace_includes(const string& source,
|
||||
const string& path,
|
||||
const string& source_filename="");
|
||||
|
||||
/* cache utility */
|
||||
void path_cache_clear_except(const string& name, const set<string>& except);
|
||||
|
||||
@@ -71,7 +71,7 @@ __forceinline operator int ( ) const { return std::numeric_limits<
|
||||
#define _lzcnt_u64 __lzcnt64
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32) && !defined(__MINGW32__)
|
||||
#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__clang__)
|
||||
|
||||
__forceinline int __popcnt(int in) {
|
||||
return _mm_popcnt_u32(in);
|
||||
@@ -455,6 +455,7 @@ CCL_NAMESPACE_END
|
||||
#include "util_sseb.h"
|
||||
#include "util_ssei.h"
|
||||
#include "util_ssef.h"
|
||||
#include "util_avxf.h"
|
||||
|
||||
#endif /* __UTIL_SIMD_TYPES_H__ */
|
||||
|
||||
|
||||
@@ -42,6 +42,7 @@
|
||||
|
||||
#if defined(_WIN32) && !defined(FREE_WINDOWS)
|
||||
#define ccl_device_inline static __forceinline
|
||||
#define ccl_device_forceinline static __forceinline
|
||||
#define ccl_align(...) __declspec(align(__VA_ARGS__))
|
||||
#ifdef __KERNEL_64_BIT__
|
||||
#define ccl_try_align(...) __declspec(align(__VA_ARGS__))
|
||||
@@ -56,6 +57,7 @@
|
||||
#else
|
||||
|
||||
#define ccl_device_inline static inline __attribute__((always_inline))
|
||||
#define ccl_device_forceinline static inline __attribute__((always_inline))
|
||||
#define ccl_align(...) __attribute__((aligned(__VA_ARGS__)))
|
||||
#ifndef FREE_WINDOWS64
|
||||
#define __forceinline inline __attribute__((always_inline))
|
||||
@@ -172,6 +174,9 @@ struct ccl_try_align(16) int3 {
|
||||
__forceinline int3(const __m128i a) : m128(a) {}
|
||||
__forceinline operator const __m128i&(void) const { return m128; }
|
||||
__forceinline operator __m128i&(void) { return m128; }
|
||||
|
||||
int3(const int3& a) { m128 = a.m128; }
|
||||
int3& operator =(const int3& a) { m128 = a.m128; return *this; }
|
||||
#else
|
||||
int x, y, z, w;
|
||||
#endif
|
||||
@@ -191,6 +196,9 @@ struct ccl_try_align(16) int4 {
|
||||
__forceinline int4(const __m128i a) : m128(a) {}
|
||||
__forceinline operator const __m128i&(void) const { return m128; }
|
||||
__forceinline operator __m128i&(void) { return m128; }
|
||||
|
||||
int4(const int4& a) : m128(a.m128) {}
|
||||
int4& operator=(const int4& a) { m128 = a.m128; return *this; }
|
||||
#else
|
||||
int x, y, z, w;
|
||||
#endif
|
||||
@@ -235,9 +243,12 @@ struct ccl_try_align(16) float3 {
|
||||
};
|
||||
|
||||
__forceinline float3() {}
|
||||
__forceinline float3(const __m128 a) : m128(a) {}
|
||||
__forceinline float3(const __m128& a) : m128(a) {}
|
||||
__forceinline operator const __m128&(void) const { return m128; }
|
||||
__forceinline operator __m128&(void) { return m128; }
|
||||
|
||||
__forceinline float3(const float3& a) : m128(a.m128) {}
|
||||
__forceinline float3& operator =(const float3& a) { m128 = a.m128; return *this; }
|
||||
#else
|
||||
float x, y, z, w;
|
||||
#endif
|
||||
@@ -257,6 +268,10 @@ struct ccl_try_align(16) float4 {
|
||||
__forceinline float4(const __m128 a) : m128(a) {}
|
||||
__forceinline operator const __m128&(void) const { return m128; }
|
||||
__forceinline operator __m128&(void) { return m128; }
|
||||
|
||||
__forceinline float4(const float4& a) : m128(a.m128) {}
|
||||
__forceinline float4& operator =(const float4& a) { m128 = a.m128; return *this; }
|
||||
|
||||
#else
|
||||
float x, y, z, w;
|
||||
#endif
|
||||
|
||||
@@ -332,7 +332,7 @@ class LbmFsgrSolver :
|
||||
void debugMarkCellCall(int level, int vi,int vj,int vk);
|
||||
|
||||
// loop over grid, stream&collide update
|
||||
void mainLoop(int lev);
|
||||
void mainLoop(const int lev);
|
||||
// change time step size
|
||||
void adaptTimestep();
|
||||
//! init mObjectSpeeds for current parametrization
|
||||
|
||||
@@ -355,7 +355,7 @@ void LbmFsgrSolver::fineAdvance()
|
||||
//! fine step function
|
||||
/*****************************************************************************/
|
||||
void
|
||||
LbmFsgrSolver::mainLoop(int lev)
|
||||
LbmFsgrSolver::mainLoop(const int lev)
|
||||
{
|
||||
// loops over _only inner_ cells -----------------------------------------------------------------------------------
|
||||
|
||||
@@ -376,13 +376,16 @@ LbmFsgrSolver::mainLoop(int lev)
|
||||
// main loop region
|
||||
const bool doReduce = true;
|
||||
const int gridLoopBound=1;
|
||||
int calcNumInvIfCells = 0;
|
||||
LbmFloat calcInitialMass = 0;
|
||||
GRID_REGION_INIT();
|
||||
#if PARALLEL==1
|
||||
#pragma omp parallel default(shared) num_threads(mNumOMPThreads) \
|
||||
const int gDebugLevel = ::gDebugLevel;
|
||||
#pragma omp parallel default(none) num_threads(mNumOMPThreads) \
|
||||
reduction(+: \
|
||||
calcCurrentMass,calcCurrentVolume, \
|
||||
calcCellsFilled,calcCellsEmptied, \
|
||||
calcNumUsedCells )
|
||||
calcNumUsedCells,calcNumInvIfCells,calcInitialMass)
|
||||
GRID_REGION_START();
|
||||
#else // PARALLEL==1
|
||||
GRID_REGION_START();
|
||||
@@ -468,7 +471,7 @@ LbmFsgrSolver::mainLoop(int lev)
|
||||
calcCurrentMass += iniRho;
|
||||
calcCurrentVolume += 1.0;
|
||||
calcNumUsedCells++;
|
||||
mInitialMass += iniRho;
|
||||
calcInitialMass += iniRho;
|
||||
// dont treat cell until next step
|
||||
continue;
|
||||
}
|
||||
@@ -479,7 +482,7 @@ LbmFsgrSolver::mainLoop(int lev)
|
||||
if(isnotValid) {
|
||||
// remove fluid cells, shouldnt be here anyway
|
||||
LbmFloat fluidRho = m[0]; FORDF1 { fluidRho += m[l]; }
|
||||
mInitialMass -= fluidRho;
|
||||
calcInitialMass -= fluidRho;
|
||||
const LbmFloat iniRho = 0.0;
|
||||
RAC(tcel, dMass) = RAC(tcel, dFfrac) = iniRho;
|
||||
RAC(tcel, dFlux) = FLUX_INIT;
|
||||
@@ -608,8 +611,8 @@ LbmFsgrSolver::mainLoop(int lev)
|
||||
// read distribution funtions of adjacent cells = stream step
|
||||
DEFAULT_STREAM;
|
||||
|
||||
if((nbored & CFFluid)==0) { newFlag |= CFNoNbFluid; mNumInvIfCells++; }
|
||||
if((nbored & CFEmpty)==0) { newFlag |= CFNoNbEmpty; mNumInvIfCells++; }
|
||||
if((nbored & CFFluid)==0) { newFlag |= CFNoNbFluid; calcNumInvIfCells++; }
|
||||
if((nbored & CFEmpty)==0) { newFlag |= CFNoNbEmpty; calcNumInvIfCells++; }
|
||||
|
||||
// calculate mass exchange for interface cells
|
||||
LbmFloat myfrac = RAC(ccel,dFfrac);
|
||||
@@ -809,7 +812,7 @@ LbmFsgrSolver::mainLoop(int lev)
|
||||
// fill if cells in inflow region
|
||||
if(myfrac<0.5) {
|
||||
mass += 0.25;
|
||||
mInitialMass += 0.25;
|
||||
calcInitialMass += 0.25;
|
||||
}
|
||||
const int OId = oldFlag>>24;
|
||||
const LbmVec vel(mObjectSpeeds[OId]);
|
||||
@@ -865,10 +868,8 @@ LbmFsgrSolver::mainLoop(int lev)
|
||||
// physical drop model
|
||||
if(mPartUsePhysModel) {
|
||||
LbmFloat realWorldFac = (mLevel[lev].simCellSize / mLevel[lev].timestep);
|
||||
LbmFloat rux = (ux * realWorldFac);
|
||||
LbmFloat ruy = (uy * realWorldFac);
|
||||
LbmFloat ruz = (uz * realWorldFac);
|
||||
LbmFloat rl = norm(ntlVec3Gfx(rux,ruy,ruz));
|
||||
LbmVec ru(ux * realWorldFac, uy * realWorldFac, uz * realWorldFac);
|
||||
LbmFloat rl = norm(ru);
|
||||
basethresh *= rl;
|
||||
|
||||
// reduce probability in outer region?
|
||||
@@ -960,14 +961,15 @@ LbmFsgrSolver::mainLoop(int lev)
|
||||
// average normal & velocity
|
||||
// -> mostly along velocity dir, many into surface
|
||||
// fluid velocity (not normalized!)
|
||||
LbmVec flvelVel = LbmVec(ux,uy,uz);
|
||||
LbmVec flvelVel(ux,uy,uz);
|
||||
LbmFloat flvelLen = norm(flvelVel);
|
||||
// surface normal
|
||||
LbmVec normVel = LbmVec(surfaceNormal[0],surfaceNormal[1],surfaceNormal[2]);
|
||||
LbmVec normVel(surfaceNormal[0],surfaceNormal[1],surfaceNormal[2]);
|
||||
normalize(normVel);
|
||||
LbmFloat normScale = (0.01+flvelLen);
|
||||
// jitter vector, 0.2 * flvel
|
||||
LbmVec jittVel = LbmVec(jx,jy,jz)*(0.05+flvelLen)*0.1;
|
||||
LbmVec jittVel(jx,jy,jz);
|
||||
jittVel *= (0.05+flvelLen)*0.1;
|
||||
// weighten velocities
|
||||
const LbmFloat flvelWeight = 0.9;
|
||||
LbmVec newpartVel = normVel*normScale*(1.-flvelWeight) + flvelVel*(flvelWeight) + jittVel;
|
||||
@@ -1013,7 +1015,7 @@ LbmFsgrSolver::mainLoop(int lev)
|
||||
if( (mass) <= (rho * ( -FSGR_MAGICNR)) ) { ifemptied = 1; }
|
||||
|
||||
if(oldFlag & (CFMbndOutflow)) {
|
||||
mInitialMass -= mass;
|
||||
calcInitialMass -= mass;
|
||||
mass = myfrac = 0.0;
|
||||
iffilled = 0; ifemptied = 1;
|
||||
}
|
||||
@@ -1105,6 +1107,8 @@ LbmFsgrSolver::mainLoop(int lev)
|
||||
mNumFilledCells = calcCellsFilled;
|
||||
mNumEmptiedCells = calcCellsEmptied;
|
||||
mNumUsedCells = calcNumUsedCells;
|
||||
mNumInvIfCells += calcNumInvIfCells;
|
||||
mInitialMass += calcInitialMass;
|
||||
}
|
||||
|
||||
|
||||
@@ -1121,7 +1125,8 @@ LbmFsgrSolver::preinitGrids()
|
||||
|
||||
GRID_REGION_INIT();
|
||||
#if PARALLEL==1
|
||||
#pragma omp parallel default(shared) num_threads(mNumOMPThreads) \
|
||||
const int gDebugLevel = ::gDebugLevel;
|
||||
#pragma omp parallel default(none) num_threads(mNumOMPThreads) \
|
||||
reduction(+: \
|
||||
calcCurrentMass,calcCurrentVolume, \
|
||||
calcCellsFilled,calcCellsEmptied, \
|
||||
@@ -1158,7 +1163,8 @@ LbmFsgrSolver::standingFluidPreinit()
|
||||
|
||||
GRID_REGION_INIT();
|
||||
#if PARALLEL==1
|
||||
#pragma omp parallel default(shared) num_threads(mNumOMPThreads) \
|
||||
const int gDebugLevel = ::gDebugLevel;
|
||||
#pragma omp parallel default(none) num_threads(mNumOMPThreads) \
|
||||
reduction(+: \
|
||||
calcCurrentMass,calcCurrentVolume, \
|
||||
calcCellsFilled,calcCellsEmptied, \
|
||||
|
||||
@@ -324,14 +324,14 @@ bool GHOST_NDOFManager::setDevice(unsigned short vendor_id, unsigned short produ
|
||||
return m_deviceType != NDOF_UnknownDevice;
|
||||
}
|
||||
|
||||
void GHOST_NDOFManager::updateTranslation(const short t[3], GHOST_TUns64 time)
|
||||
void GHOST_NDOFManager::updateTranslation(const int t[3], GHOST_TUns64 time)
|
||||
{
|
||||
memcpy(m_translation, t, sizeof(m_translation));
|
||||
m_motionTime = time;
|
||||
m_motionEventPending = true;
|
||||
}
|
||||
|
||||
void GHOST_NDOFManager::updateRotation(const short r[3], GHOST_TUns64 time)
|
||||
void GHOST_NDOFManager::updateRotation(const int r[3], GHOST_TUns64 time)
|
||||
{
|
||||
memcpy(m_rotation, r, sizeof(m_rotation));
|
||||
m_motionTime = time;
|
||||
|
||||
@@ -138,8 +138,8 @@ public:
|
||||
// rotations are + when CCW, - when CW
|
||||
// each platform is responsible for getting axis data into this form
|
||||
// these values should not be scaled (just shuffled or flipped)
|
||||
void updateTranslation(const short t[3], GHOST_TUns64 time);
|
||||
void updateRotation(const short r[3], GHOST_TUns64 time);
|
||||
void updateTranslation(const int t[3], GHOST_TUns64 time);
|
||||
void updateRotation(const int r[3], GHOST_TUns64 time);
|
||||
|
||||
// the latest raw button data from the device
|
||||
// use HID button encoding (not NDOF_ButtonT)
|
||||
@@ -163,8 +163,8 @@ private:
|
||||
int m_buttonMask;
|
||||
const NDOF_ButtonT *m_hidMap;
|
||||
|
||||
short m_translation[3];
|
||||
short m_rotation[3];
|
||||
int m_translation[3];
|
||||
int m_rotation[3];
|
||||
int m_buttons; // bit field
|
||||
|
||||
GHOST_TUns64 m_motionTime; // in milliseconds
|
||||
|
||||
@@ -200,8 +200,8 @@ static void DeviceEvent(uint32_t unused, uint32_t msg_type, void* msg_arg)
|
||||
case kConnexionCmdHandleAxis:
|
||||
{
|
||||
// convert to blender view coordinates
|
||||
const short t[3] = {s->axis[0], -(s->axis[2]), s->axis[1]};
|
||||
const short r[3] = {-(s->axis[3]), s->axis[5], -(s->axis[4])};
|
||||
const int t[3] = {s->axis[0], -(s->axis[2]), s->axis[1]};
|
||||
const int r[3] = {-(s->axis[3]), s->axis[5], -(s->axis[4])};
|
||||
|
||||
ndof_manager->updateTranslation(t, now);
|
||||
ndof_manager->updateRotation(r, now);
|
||||
|
||||
@@ -107,8 +107,8 @@ bool GHOST_NDOFManagerUnix::processEvents()
|
||||
{
|
||||
/* convert to blender view coords */
|
||||
GHOST_TUns64 now = m_system.getMilliSeconds();
|
||||
const short t[3] = {(short)e.motion.x, (short)e.motion.y, (short)-e.motion.z};
|
||||
const short r[3] = {(short)-e.motion.rx, (short)-e.motion.ry, (short)e.motion.rz};
|
||||
const int t[3] = {(int)e.motion.x, (int)e.motion.y, (int)-e.motion.z};
|
||||
const int r[3] = {(int)-e.motion.rx, (int)-e.motion.ry, (int)e.motion.rz};
|
||||
|
||||
updateTranslation(t, now);
|
||||
updateRotation(r, now);
|
||||
@@ -128,7 +128,7 @@ bool GHOST_NDOFManagerUnix::processEvents()
|
||||
#ifdef USE_FINISH_GLITCH_WORKAROUND
|
||||
if (motion_test_prev == true && motion_test == false) {
|
||||
GHOST_TUns64 now = m_system.getMilliSeconds();
|
||||
const short v[3] = {0, 0, 0};
|
||||
const int v[3] = {0, 0, 0};
|
||||
|
||||
updateTranslation(v, now);
|
||||
updateRotation(v, now);
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -712,18 +712,26 @@ GHOST_EventCursor *GHOST_SystemWin32::processCursorEvent(GHOST_TEventType type,
|
||||
}
|
||||
|
||||
|
||||
GHOST_EventWheel *GHOST_SystemWin32::processWheelEvent(GHOST_WindowWin32 *window, WPARAM wParam, LPARAM lParam)
|
||||
void GHOST_SystemWin32::processWheelEvent(GHOST_WindowWin32 *window, WPARAM wParam, LPARAM lParam)
|
||||
{
|
||||
// short fwKeys = LOWORD(wParam); // key flags
|
||||
int zDelta = (short) HIWORD(wParam); // wheel rotation
|
||||
|
||||
// zDelta /= WHEEL_DELTA;
|
||||
// temporary fix below: microsoft now has added more precision, making the above division not work
|
||||
zDelta = (zDelta <= 0) ? -1 : 1;
|
||||
GHOST_SystemWin32 *system = (GHOST_SystemWin32 *)getSystem();
|
||||
|
||||
// short xPos = (short) LOWORD(lParam); // horizontal position of pointer
|
||||
// short yPos = (short) HIWORD(lParam); // vertical position of pointer
|
||||
return new GHOST_EventWheel(getSystem()->getMilliSeconds(), window, zDelta);
|
||||
int acc = system->m_wheelDeltaAccum;
|
||||
int delta = GET_WHEEL_DELTA_WPARAM(wParam);
|
||||
|
||||
if (acc * delta < 0) {
|
||||
// scroll direction reversed.
|
||||
acc = 0;
|
||||
}
|
||||
acc += delta;
|
||||
int direction = (acc >= 0) ? 1 : -1;
|
||||
acc = abs(acc);
|
||||
|
||||
while (acc >= WHEEL_DELTA) {
|
||||
system->pushEvent(new GHOST_EventWheel(system->getMilliSeconds(), window, direction));
|
||||
acc -= WHEEL_DELTA;
|
||||
}
|
||||
system->m_wheelDeltaAccum = acc * direction;
|
||||
}
|
||||
|
||||
|
||||
@@ -871,12 +879,12 @@ bool GHOST_SystemWin32::processNDOF(RAWINPUT const &raw)
|
||||
{
|
||||
const short *axis = (short *)(data + 1);
|
||||
// massage into blender view coords (same goes for rotation)
|
||||
const short t[3] = {axis[0], -axis[2], axis[1]};
|
||||
const int t[3] = {axis[0], -axis[2], axis[1]};
|
||||
m_ndofManager->updateTranslation(t, now);
|
||||
|
||||
if (raw.data.hid.dwSizeHid == 13) {
|
||||
// this report also includes rotation
|
||||
const short r[3] = {-axis[3], axis[5], -axis[4]};
|
||||
const int r[3] = {-axis[3], axis[5], -axis[4]};
|
||||
m_ndofManager->updateRotation(r, now);
|
||||
|
||||
// I've never gotten one of these, has anyone else?
|
||||
@@ -887,7 +895,7 @@ bool GHOST_SystemWin32::processNDOF(RAWINPUT const &raw)
|
||||
case 2: // rotation
|
||||
{
|
||||
const short *axis = (short *)(data + 1);
|
||||
const short r[3] = {-axis[0], axis[2], -axis[1]};
|
||||
const int r[3] = {-axis[0], axis[2], -axis[1]};
|
||||
m_ndofManager->updateRotation(r, now);
|
||||
break;
|
||||
}
|
||||
@@ -1137,14 +1145,9 @@ LRESULT WINAPI GHOST_SystemWin32::s_wndProc(HWND hwnd, UINT msg, WPARAM wParam,
|
||||
POINT mouse_pos = {GET_X_LPARAM(lParam), GET_Y_LPARAM(lParam)};
|
||||
HWND mouse_hwnd = ChildWindowFromPoint(HWND_DESKTOP, mouse_pos);
|
||||
GHOST_WindowWin32 *mouse_window = (GHOST_WindowWin32 *)::GetWindowLongPtr(mouse_hwnd, GWLP_USERDATA);
|
||||
if (mouse_window != NULL) {
|
||||
event = processWheelEvent(mouse_window, wParam, lParam);
|
||||
}
|
||||
else {
|
||||
/* Happens when mouse is not over any of blender's windows. */
|
||||
event = processWheelEvent(window, wParam, lParam);
|
||||
}
|
||||
|
||||
|
||||
processWheelEvent(mouse_window ? mouse_window : window , wParam, lParam);
|
||||
eventHandled = true;
|
||||
#ifdef BROKEN_PEEK_TOUCHPAD
|
||||
PostMessage(hwnd, WM_USER, 0, 0);
|
||||
#endif
|
||||
@@ -1203,6 +1206,7 @@ LRESULT WINAPI GHOST_SystemWin32::s_wndProc(HWND hwnd, UINT msg, WPARAM wParam,
|
||||
GHOST_ModifierKeys modifiers;
|
||||
modifiers.clear();
|
||||
system->storeModifierKeys(modifiers);
|
||||
system->m_wheelDeltaAccum = 0;
|
||||
event = processWindowEvent(LOWORD(wParam) ? GHOST_kEventWindowActivate : GHOST_kEventWindowDeactivate, window);
|
||||
/* WARNING: Let DefWindowProc handle WM_ACTIVATE, otherwise WM_MOUSEWHEEL
|
||||
* will not be dispatched to OUR active window if we minimize one of OUR windows. */
|
||||
|
||||
@@ -264,12 +264,12 @@ protected:
|
||||
static GHOST_EventCursor *processCursorEvent(GHOST_TEventType type, GHOST_WindowWin32 *window);
|
||||
|
||||
/**
|
||||
* Creates a mouse wheel event.
|
||||
* Handles a mouse wheel event.
|
||||
* \param window The window receiving the event (the active window).
|
||||
* \param wParam The wParam from the wndproc
|
||||
* \param lParam The lParam from the wndproc
|
||||
*/
|
||||
static GHOST_EventWheel *processWheelEvent(GHOST_WindowWin32 *window, WPARAM wParam, LPARAM lParam);
|
||||
static void processWheelEvent(GHOST_WindowWin32 *window, WPARAM wParam, LPARAM lParam);
|
||||
|
||||
/**
|
||||
* Creates a key event and updates the key data stored locally (m_modifierKeys).
|
||||
@@ -376,6 +376,9 @@ protected:
|
||||
|
||||
/** Console status */
|
||||
int m_consoleStatus;
|
||||
|
||||
/** Wheel delta accumulator **/
|
||||
int m_wheelDeltaAccum;
|
||||
};
|
||||
|
||||
inline void GHOST_SystemWin32::retrieveModifierKeys(GHOST_ModifierKeys& keys) const
|
||||
|
||||
@@ -579,13 +579,16 @@ static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], cons
|
||||
{
|
||||
// make bbox
|
||||
int c=0, l=0, channel=0;
|
||||
float fvMin[3] = {INFINITY, INFINITY, INFINITY};
|
||||
float fvMax[3] = {-INFINITY, -INFINITY, -INFINITY};
|
||||
float fvMin[3], fvMax[3];
|
||||
float dx=0, dy=0, dz=0, fSep=0;
|
||||
for (l=iL_in; l<=iR_in; l++)
|
||||
for (c=0; c<3; c++)
|
||||
for (c=0; c<3; c++)
|
||||
{ fvMin[c]=pTmpVert[iL_in].vert[c]; fvMax[c]=fvMin[c]; }
|
||||
for (l=(iL_in+1); l<=iR_in; l++) {
|
||||
for (c=0; c<3; c++) {
|
||||
if (fvMin[c]>pTmpVert[l].vert[c]) fvMin[c]=pTmpVert[l].vert[c];
|
||||
else if (fvMax[c]<pTmpVert[l].vert[c]) fvMax[c]=pTmpVert[l].vert[c];
|
||||
if (fvMax[c]<pTmpVert[l].vert[c]) fvMax[c]=pTmpVert[l].vert[c];
|
||||
}
|
||||
}
|
||||
|
||||
dx = fvMax[0]-fvMin[0];
|
||||
dy = fvMax[1]-fvMin[1];
|
||||
|
||||
102
make.bat
102
make.bat
@@ -6,7 +6,17 @@ setlocal ENABLEEXTENSIONS
|
||||
set BLENDER_DIR=%~dp0
|
||||
set BUILD_DIR=%BLENDER_DIR%..\build_windows
|
||||
set BUILD_TYPE=Release
|
||||
rem reset all variables so they do not get accidentally get carried over from previous builds
|
||||
set BUILD_CMAKE_ARGS=
|
||||
set BUILD_ARCH=
|
||||
set BUILD_VS_VER=
|
||||
set BUILD_VS_YEAR=
|
||||
set KEY_NAME=
|
||||
set MSBUILD_PLATFORM=
|
||||
set MUST_CLEAN=
|
||||
set NOBUILD=
|
||||
set TARGET=
|
||||
set WINDOWS_ARCH=
|
||||
|
||||
:argv_loop
|
||||
if NOT "%1" == "" (
|
||||
@@ -18,54 +28,34 @@ if NOT "%1" == "" (
|
||||
|
||||
REM Build Types
|
||||
if "%1" == "debug" (
|
||||
set BUILD_DIR=%BUILD_DIR%_debug
|
||||
set BUILD_TYPE=Debug
|
||||
|
||||
REM Build Configurations
|
||||
) else if "%1" == "full" (
|
||||
set TARGET_SET=1
|
||||
set BUILD_DIR=%BUILD_DIR%_full
|
||||
set TARGET=Full
|
||||
set BUILD_CMAKE_ARGS=%BUILD_CMAKE_ARGS% ^
|
||||
-C"%BLENDER_DIR%\build_files\cmake\config\blender_full.cmake"
|
||||
) else if "%1" == "lite" (
|
||||
set TARGET_SET=1
|
||||
set BUILD_DIR=%BUILD_DIR%_lite
|
||||
set TARGET=Lite
|
||||
set BUILD_CMAKE_ARGS=%BUILD_CMAKE_ARGS% ^
|
||||
-C"%BLENDER_DIR%\build_files\cmake\config\blender_lite.cmake"
|
||||
) else if "%1" == "cycles" (
|
||||
set TARGET_SET=1
|
||||
set BUILD_DIR=%BUILD_DIR%_cycles
|
||||
set TARGET=Cycles
|
||||
set BUILD_CMAKE_ARGS=%BUILD_CMAKE_ARGS% ^
|
||||
-C"%BLENDER_DIR%\build_files\cmake\config\cycles_standalone.cmake"
|
||||
) else if "%1" == "headless" (
|
||||
set TARGET_SET=1
|
||||
set BUILD_DIR=%BUILD_DIR%_headless
|
||||
set TARGET=Headless
|
||||
set BUILD_CMAKE_ARGS=%BUILD_CMAKE_ARGS% ^
|
||||
-C"%BLENDER_DIR%\build_files\cmake\config\blender_headless.cmake"
|
||||
) else if "%1" == "bpy" (
|
||||
set TARGET_SET=1
|
||||
set BUILD_DIR=%BUILD_DIR%_bpy
|
||||
set TARGET=Bpy
|
||||
set BUILD_CMAKE_ARGS=%BUILD_CMAKE_ARGS% ^
|
||||
-C"%BLENDER_DIR%\build_files\cmake\config\bpy_module.cmake"
|
||||
) else if "%1" == "release" (
|
||||
set TARGET_SET=1
|
||||
if "%CUDA_PATH_V7_5%"=="" (
|
||||
echo Cuda 7.5 Not found, aborting!
|
||||
goto EOF
|
||||
)
|
||||
if "%CUDA_PATH_V8_0%"=="" (
|
||||
echo Cuda 8.0 Not found, aborting!
|
||||
goto EOF
|
||||
)
|
||||
set BUILD_DIR=%BUILD_DIR%_Release
|
||||
set BUILD_CMAKE_ARGS=%BUILD_CMAKE_ARGS% ^
|
||||
-C"%BLENDER_DIR%\build_files\cmake\config\blender_release.cmake" -DCUDA_NVCC_EXECUTABLE:FILEPATH=%CUDA_PATH_V7_5%/bin/nvcc.exe -DCUDA_NVCC8_EXECUTABLE:FILEPATH=%CUDA_PATH_V8_0%/bin/nvcc.exe
|
||||
set TARGET=Release
|
||||
) else if "%1" == "x86" (
|
||||
set BUILD_ARCH=x86
|
||||
set BUILD_DIR=%BUILD_DIR%_x86
|
||||
) else if "%1" == "x64" (
|
||||
set BUILD_ARCH=x64
|
||||
set BUILD_DIR=%BUILD_DIR%_x64
|
||||
) else if "%1" == "2015" (
|
||||
set BUILD_VS_VER=14
|
||||
set BUILD_VS_YEAR=2015
|
||||
@@ -105,10 +95,13 @@ if NOT "%1" == "" (
|
||||
if "%BUILD_ARCH%"=="" (
|
||||
if "%PROCESSOR_ARCHITECTURE%" == "AMD64" (
|
||||
set WINDOWS_ARCH= Win64
|
||||
set BUILD_ARCH=x64
|
||||
) else if "%PROCESSOR_ARCHITEW6432%" == "AMD64" (
|
||||
set WINDOWS_ARCH= Win64
|
||||
set BUILD_ARCH=x64
|
||||
) else (
|
||||
set WINDOWS_ARCH=
|
||||
set BUILD_ARCH=x86
|
||||
)
|
||||
) else if "%BUILD_ARCH%"=="x64" (
|
||||
set WINDOWS_ARCH= Win64
|
||||
@@ -121,8 +114,27 @@ if "%BUILD_VS_VER%"=="" (
|
||||
set BUILD_VS_YEAR=2013
|
||||
)
|
||||
|
||||
set BUILD_DIR=%BUILD_DIR%_vc%BUILD_VS_VER%
|
||||
if "%BUILD_ARCH%"=="x64" (
|
||||
set MSBUILD_PLATFORM=x64
|
||||
) else if "%BUILD_ARCH%"=="x86" (
|
||||
set MSBUILD_PLATFORM=win32
|
||||
)
|
||||
|
||||
|
||||
set BUILD_DIR=%BUILD_DIR%_%TARGET%_%BUILD_ARCH%_vc%BUILD_VS_VER%_%BUILD_TYPE%
|
||||
|
||||
|
||||
if "%target%"=="Release" (
|
||||
rem for vc12 check for both cuda 7.5 and 8
|
||||
if "%CUDA_PATH%"=="" (
|
||||
echo Cuda Not found, aborting!
|
||||
goto EOF
|
||||
)
|
||||
set BUILD_CMAKE_ARGS=%BUILD_CMAKE_ARGS% ^
|
||||
-C"%BLENDER_DIR%\build_files\cmake\config\blender_release.cmake"
|
||||
)
|
||||
|
||||
:DetectMSVC
|
||||
REM Detect MSVC Installation
|
||||
if DEFINED VisualStudioVersion goto msvc_detect_finally
|
||||
set VALUE_NAME=ProductDir
|
||||
@@ -140,10 +152,18 @@ if DEFINED MSVC_VC_DIR call "%MSVC_VC_DIR%\vcvarsall.bat"
|
||||
REM Sanity Checks
|
||||
where /Q msbuild
|
||||
if %ERRORLEVEL% NEQ 0 (
|
||||
echo Error: "MSBuild" command not in the PATH.
|
||||
echo You must have MSVC installed and run this from the "Developer Command Prompt"
|
||||
echo ^(available from Visual Studio's Start menu entry^), aborting!
|
||||
goto EOF
|
||||
if "%BUILD_VS_VER%"=="12" (
|
||||
rem vs12 not found, try vs14
|
||||
echo Visual Studio 2012 not found, trying Visual Studio 2015.
|
||||
set BUILD_VS_VER=14
|
||||
set BUILD_VS_YEAR=2015
|
||||
goto DetectMSVC
|
||||
) else (
|
||||
echo Error: "MSBuild" command not in the PATH.
|
||||
echo You must have MSVC installed and run this from the "Developer Command Prompt"
|
||||
echo ^(available from Visual Studio's Start menu entry^), aborting!
|
||||
goto EOF
|
||||
)
|
||||
)
|
||||
where /Q cmake
|
||||
if %ERRORLEVEL% NEQ 0 (
|
||||
@@ -156,7 +176,7 @@ if NOT EXIST %BLENDER_DIR%..\lib\nul (
|
||||
echo This is needed for building, aborting!
|
||||
goto EOF
|
||||
)
|
||||
if NOT "%TARGET_SET%"=="1" (
|
||||
if "%TARGET%"=="" (
|
||||
echo Error: Convenience target not set
|
||||
echo This is required for building, aborting!
|
||||
echo .
|
||||
@@ -173,7 +193,9 @@ if "%MUST_CLEAN%"=="1" (
|
||||
%BUILD_DIR%\Blender.sln ^
|
||||
/target:clean ^
|
||||
/property:Configuration=%BUILD_TYPE% ^
|
||||
/verbosity:minimal
|
||||
/verbosity:minimal ^
|
||||
/p:platform=%MSBUILD_PLATFORM%
|
||||
|
||||
if %ERRORLEVEL% NEQ 0 (
|
||||
echo Cleaned "%BUILD_DIR%"
|
||||
)
|
||||
@@ -202,7 +224,8 @@ msbuild ^
|
||||
/target:build ^
|
||||
/property:Configuration=%BUILD_TYPE% ^
|
||||
/maxcpucount ^
|
||||
/verbosity:minimal
|
||||
/verbosity:minimal ^
|
||||
/p:platform=%MSBUILD_PLATFORM%
|
||||
|
||||
if %ERRORLEVEL% NEQ 0 (
|
||||
echo "Build Failed"
|
||||
@@ -212,7 +235,8 @@ if %ERRORLEVEL% NEQ 0 (
|
||||
msbuild ^
|
||||
%BUILD_DIR%\INSTALL.vcxproj ^
|
||||
/property:Configuration=%BUILD_TYPE% ^
|
||||
/verbosity:minimal
|
||||
/verbosity:minimal ^
|
||||
/p:platform=%MSBUILD_PLATFORM%
|
||||
|
||||
echo.
|
||||
echo At any point you can optionally modify your build configuration by editing:
|
||||
@@ -224,10 +248,9 @@ goto EOF
|
||||
:HELP
|
||||
echo.
|
||||
echo Convenience targets
|
||||
echo - release
|
||||
echo - debug
|
||||
echo - full
|
||||
echo - lite
|
||||
echo - release ^(identical to the offical blender.org builds^)
|
||||
echo - full ^(same as release minus the cuda kernels^)
|
||||
echo - lite
|
||||
echo - headless
|
||||
echo - cycles
|
||||
echo - bpy
|
||||
@@ -239,6 +262,7 @@ goto EOF
|
||||
echo - showhash ^(Show git hashes of source tree^)
|
||||
echo.
|
||||
echo Configuration options
|
||||
echo - debug ^(Build an unoptimized debuggable build^)
|
||||
echo - packagename [newname] ^(override default cpack package name^)
|
||||
echo - x86 ^(override host autodetect and build 32 bit code^)
|
||||
echo - x64 ^(override host autodetect and build 64 bit code^)
|
||||
|
||||
@@ -163,6 +163,7 @@ class SpellChecker:
|
||||
"runtime",
|
||||
"scanline",
|
||||
"screencast", "screenshot", "screenshots",
|
||||
"seekability",
|
||||
"selfcollision",
|
||||
"shadowbuffer", "shadowbuffers",
|
||||
"singletexture",
|
||||
@@ -184,6 +185,7 @@ class SpellChecker:
|
||||
"timestamp", "timestamps",
|
||||
"timestep", "timesteps",
|
||||
"todo",
|
||||
"tradeoff",
|
||||
"un",
|
||||
"unbake",
|
||||
"uncomment",
|
||||
|
||||
@@ -236,8 +236,8 @@ def do_previews(do_objects, do_groups, do_scenes, do_data_intern):
|
||||
return success
|
||||
|
||||
def objects_render_engine_guess(obs):
|
||||
for obname in obs:
|
||||
ob = bpy.data.objects[obname, None]
|
||||
for obname, libpath in obs:
|
||||
ob = bpy.data.objects[obname, libpath]
|
||||
for matslot in ob.material_slots:
|
||||
mat = matslot.material
|
||||
if mat and mat.use_nodes and mat.node_tree:
|
||||
@@ -247,10 +247,20 @@ def do_previews(do_objects, do_groups, do_scenes, do_data_intern):
|
||||
return 'BLENDER_RENDER'
|
||||
|
||||
def object_bbox_merge(bbox, ob, ob_space, offset_matrix):
|
||||
if ob.bound_box:
|
||||
# Take group instances into account (including linked one in this case).
|
||||
if ob.type == 'EMPTY' and ob.dupli_type == 'GROUP':
|
||||
grp_objects = tuple((ob.name, ob.library.filepath if ob.library else None) for ob in ob.dupli_group.objects)
|
||||
if (len(grp_objects) == 0):
|
||||
ob_bbox = ob.bound_box
|
||||
else:
|
||||
coords = objects_bbox_calc(ob_space, grp_objects,
|
||||
Matrix.Translation(ob.dupli_group.dupli_offset).inverted())
|
||||
ob_bbox = ((coords[0], coords[1], coords[2]), (coords[21], coords[22], coords[23]))
|
||||
elif ob.bound_box:
|
||||
ob_bbox = ob.bound_box
|
||||
else:
|
||||
ob_bbox = ((-ob.scale.x, -ob.scale.y, -ob.scale.z), (ob.scale.x, ob.scale.y, ob.scale.z))
|
||||
|
||||
for v in ob_bbox:
|
||||
v = offset_matrix * Vector(v) if offset_matrix is not None else Vector(v)
|
||||
v = ob_space.matrix_world.inverted() * ob.matrix_world * v
|
||||
@@ -269,8 +279,8 @@ def do_previews(do_objects, do_groups, do_scenes, do_data_intern):
|
||||
|
||||
def objects_bbox_calc(camera, objects, offset_matrix):
|
||||
bbox = (Vector((1e9, 1e9, 1e9)), Vector((-1e9, -1e9, -1e9)))
|
||||
for obname in objects:
|
||||
ob = bpy.data.objects[obname, None]
|
||||
for obname, libpath in objects:
|
||||
ob = bpy.data.objects[obname, libpath]
|
||||
object_bbox_merge(bbox, ob, camera, offset_matrix)
|
||||
# Our bbox has been generated in camera local space, bring it back in world one
|
||||
bbox[0][:] = camera.matrix_world * bbox[0]
|
||||
@@ -333,7 +343,7 @@ def do_previews(do_objects, do_groups, do_scenes, do_data_intern):
|
||||
continue
|
||||
if root.type not in OBJECT_TYPES_RENDER:
|
||||
continue
|
||||
objects = (root.name,)
|
||||
objects = ((root.name, None),)
|
||||
|
||||
render_engine = objects_render_engine_guess(objects)
|
||||
render_context = render_contexts.get(render_engine, None)
|
||||
@@ -344,8 +354,8 @@ def do_previews(do_objects, do_groups, do_scenes, do_data_intern):
|
||||
scene = bpy.data.scenes[render_context.scene, None]
|
||||
bpy.context.screen.scene = scene
|
||||
|
||||
for obname in objects:
|
||||
ob = bpy.data.objects[obname, None]
|
||||
for obname, libpath in objects:
|
||||
ob = bpy.data.objects[obname, libpath]
|
||||
if obname not in scene.objects:
|
||||
scene.objects.link(ob)
|
||||
ob.hide_render = False
|
||||
@@ -363,8 +373,8 @@ def do_previews(do_objects, do_groups, do_scenes, do_data_intern):
|
||||
# OverflowError: Python int too large to convert to C long
|
||||
# ... :(
|
||||
scene = bpy.data.scenes[render_context.scene, None]
|
||||
for obname in objects:
|
||||
ob = bpy.data.objects[obname, None]
|
||||
for obname, libpath in objects:
|
||||
ob = bpy.data.objects[obname, libpath]
|
||||
scene.objects.unlink(ob)
|
||||
ob.hide_render = True
|
||||
|
||||
@@ -377,7 +387,8 @@ def do_previews(do_objects, do_groups, do_scenes, do_data_intern):
|
||||
for grp in ids_nolib(bpy.data.groups):
|
||||
if grp.name in groups_ignored:
|
||||
continue
|
||||
objects = tuple(ob.name for ob in grp.objects)
|
||||
# Here too, we do want to keep linked objects members of local group...
|
||||
objects = tuple((ob.name, ob.library.filepath if ob.library else None) for ob in grp.objects)
|
||||
|
||||
render_engine = objects_render_engine_guess(objects)
|
||||
render_context = render_contexts.get(render_engine, None)
|
||||
@@ -415,7 +426,7 @@ def do_previews(do_objects, do_groups, do_scenes, do_data_intern):
|
||||
objects = None
|
||||
if not has_camera:
|
||||
# We had to add a temp camera, now we need to place it to see interesting objects!
|
||||
objects = tuple(ob.name for ob in scene.objects
|
||||
objects = tuple((ob.name, ob.library.filepath if ob.library else None) for ob in scene.objects
|
||||
if (not ob.hide_render) and (ob.type in OBJECT_TYPES_RENDER))
|
||||
|
||||
preview_render_do(render_context, 'scenes', scene.name, objects)
|
||||
|
||||
@@ -136,6 +136,7 @@ KM_HIERARCHY = [
|
||||
|
||||
('Standard Modal Map', 'EMPTY', 'WINDOW', []),
|
||||
('Transform Modal Map', 'EMPTY', 'WINDOW', []),
|
||||
('Eyedropper Modal Map', 'EMPTY', 'WINDOW', []),
|
||||
]
|
||||
|
||||
|
||||
|
||||
@@ -145,8 +145,12 @@ def object_data_add(context, obdata, operator=None, use_active_layer=True, name=
|
||||
base.layers_from_view(context.space_data)
|
||||
base.layers[scene.active_layer] = True
|
||||
else:
|
||||
base.layers = [True if i == scene.active_layer
|
||||
else False for i in range(len(scene.layers))]
|
||||
if v3d and not v3d.lock_camera_and_layers:
|
||||
base.layers = [True if i == v3d.active_layer
|
||||
else False for i in range(len(v3d.layers))]
|
||||
else:
|
||||
base.layers = [True if i == scene.active_layer
|
||||
else False for i in range(len(scene.layers))]
|
||||
else:
|
||||
if v3d:
|
||||
base.layers_from_view(context.space_data)
|
||||
|
||||
@@ -398,11 +398,7 @@
|
||||
</ThemeGraphEditor>
|
||||
</graph_editor>
|
||||
<file_browser>
|
||||
<ThemeFileBrowser selected_file="#354d66"
|
||||
scrollbar="#a0a0a0"
|
||||
scroll_handle="#7f7070"
|
||||
active_file="#b1b1b1"
|
||||
active_file_text="#ffffff">
|
||||
<ThemeFileBrowser selected_file="#354d66">
|
||||
<space>
|
||||
<ThemeSpaceGeneric back="#000000"
|
||||
title="#5d5d5d"
|
||||
@@ -428,13 +424,6 @@
|
||||
</panelcolors>
|
||||
</ThemeSpaceGeneric>
|
||||
</space>
|
||||
<space_list>
|
||||
<ThemeSpaceListGeneric list="#181818"
|
||||
list_title="#9e9e9e"
|
||||
list_text="#5d5d5d"
|
||||
list_text_hi="#ffffff">
|
||||
</ThemeSpaceListGeneric>
|
||||
</space_list>
|
||||
</ThemeFileBrowser>
|
||||
</file_browser>
|
||||
<nla_editor>
|
||||
|
||||
@@ -398,11 +398,7 @@
|
||||
</ThemeGraphEditor>
|
||||
</graph_editor>
|
||||
<file_browser>
|
||||
<ThemeFileBrowser selected_file="#a0a0d0"
|
||||
scrollbar="#a0a0a0"
|
||||
scroll_handle="#7f7070"
|
||||
active_file="#828282"
|
||||
active_file_text="#ffffff">
|
||||
<ThemeFileBrowser selected_file="#a0a0d0">
|
||||
<space>
|
||||
<ThemeSpaceGeneric back="#808080"
|
||||
title="#000000"
|
||||
@@ -428,13 +424,6 @@
|
||||
</panelcolors>
|
||||
</ThemeSpaceGeneric>
|
||||
</space>
|
||||
<space_list>
|
||||
<ThemeSpaceListGeneric list="#b4b4b4"
|
||||
list_title="#000000"
|
||||
list_text="#000000"
|
||||
list_text_hi="#ffffff">
|
||||
</ThemeSpaceListGeneric>
|
||||
</space_list>
|
||||
</ThemeFileBrowser>
|
||||
</file_browser>
|
||||
<nla_editor>
|
||||
|
||||
@@ -398,11 +398,7 @@
|
||||
</ThemeGraphEditor>
|
||||
</graph_editor>
|
||||
<file_browser>
|
||||
<ThemeFileBrowser selected_file="#755129"
|
||||
scrollbar="#a0a0a0"
|
||||
scroll_handle="#7f7070"
|
||||
active_file="#828282"
|
||||
active_file_text="#fafafa">
|
||||
<ThemeFileBrowser selected_file="#755129">
|
||||
<space>
|
||||
<ThemeSpaceGeneric back="#4b4b4b"
|
||||
title="#8b8b8b"
|
||||
@@ -428,13 +424,6 @@
|
||||
</panelcolors>
|
||||
</ThemeSpaceGeneric>
|
||||
</space>
|
||||
<space_list>
|
||||
<ThemeSpaceListGeneric list="#3b3b3b"
|
||||
list_title="#8b8b8b"
|
||||
list_text="#8b8b8b"
|
||||
list_text_hi="#ffffff">
|
||||
</ThemeSpaceListGeneric>
|
||||
</space_list>
|
||||
</ThemeFileBrowser>
|
||||
</file_browser>
|
||||
<nla_editor>
|
||||
|
||||
@@ -398,11 +398,7 @@
|
||||
</ThemeGraphEditor>
|
||||
</graph_editor>
|
||||
<file_browser>
|
||||
<ThemeFileBrowser selected_file="#ff8c19"
|
||||
scrollbar="#a0a0a0"
|
||||
scroll_handle="#7f7070"
|
||||
active_file="#828282"
|
||||
active_file_text="#fafafa">
|
||||
<ThemeFileBrowser selected_file="#ff8c19">
|
||||
<space>
|
||||
<ThemeSpaceGeneric back="#404040"
|
||||
title="#000000"
|
||||
@@ -428,13 +424,6 @@
|
||||
</panelcolors>
|
||||
</ThemeSpaceGeneric>
|
||||
</space>
|
||||
<space_list>
|
||||
<ThemeSpaceListGeneric list="#808080"
|
||||
list_title="#000000"
|
||||
list_text="#000000"
|
||||
list_text_hi="#ffffff">
|
||||
</ThemeSpaceListGeneric>
|
||||
</space_list>
|
||||
</ThemeFileBrowser>
|
||||
</file_browser>
|
||||
<nla_editor>
|
||||
|
||||
@@ -398,11 +398,7 @@
|
||||
</ThemeGraphEditor>
|
||||
</graph_editor>
|
||||
<file_browser>
|
||||
<ThemeFileBrowser selected_file="#607f9e"
|
||||
scrollbar="#a0a0a0"
|
||||
scroll_handle="#7f7070"
|
||||
active_file="#ffffff"
|
||||
active_file_text="#ffffff">
|
||||
<ThemeFileBrowser selected_file="#607f9e">
|
||||
<space>
|
||||
<ThemeSpaceGeneric back="#4b4b4b"
|
||||
title="#e4e4e4"
|
||||
@@ -428,13 +424,6 @@
|
||||
</panelcolors>
|
||||
</ThemeSpaceGeneric>
|
||||
</space>
|
||||
<space_list>
|
||||
<ThemeSpaceListGeneric list="#3b3b3b"
|
||||
list_title="#8b8b8b"
|
||||
list_text="#8b8b8b"
|
||||
list_text_hi="#ffffff">
|
||||
</ThemeSpaceListGeneric>
|
||||
</space_list>
|
||||
</ThemeFileBrowser>
|
||||
</file_browser>
|
||||
<nla_editor>
|
||||
|
||||
@@ -398,11 +398,7 @@
|
||||
</ThemeGraphEditor>
|
||||
</graph_editor>
|
||||
<file_browser>
|
||||
<ThemeFileBrowser selected_file="#69a5be"
|
||||
scrollbar="#a0a0a0"
|
||||
scroll_handle="#7f7070"
|
||||
active_file="#859cb9"
|
||||
active_file_text="#fafafa">
|
||||
<ThemeFileBrowser selected_file="#69a5be">
|
||||
<space>
|
||||
<ThemeSpaceGeneric back="#646875"
|
||||
title="#000000"
|
||||
@@ -428,13 +424,6 @@
|
||||
</panelcolors>
|
||||
</ThemeSpaceGeneric>
|
||||
</space>
|
||||
<space_list>
|
||||
<ThemeSpaceListGeneric list="#6c717f"
|
||||
list_title="#d7d7d7"
|
||||
list_text="#d7d7d7"
|
||||
list_text_hi="#ffffff">
|
||||
</ThemeSpaceListGeneric>
|
||||
</space_list>
|
||||
</ThemeFileBrowser>
|
||||
</file_browser>
|
||||
<nla_editor>
|
||||
|
||||
@@ -398,11 +398,7 @@
|
||||
</ThemeGraphEditor>
|
||||
</graph_editor>
|
||||
<file_browser>
|
||||
<ThemeFileBrowser selected_file="#ff8c19"
|
||||
scrollbar="#a0a0a0"
|
||||
scroll_handle="#7f7070"
|
||||
active_file="#828282"
|
||||
active_file_text="#fafafa">
|
||||
<ThemeFileBrowser selected_file="#ff8c19">
|
||||
<space>
|
||||
<ThemeSpaceGeneric back="#9098a0"
|
||||
title="#000000"
|
||||
@@ -428,13 +424,6 @@
|
||||
</panelcolors>
|
||||
</ThemeSpaceGeneric>
|
||||
</space>
|
||||
<space_list>
|
||||
<ThemeSpaceListGeneric list="#484848"
|
||||
list_title="#000000"
|
||||
list_text="#000000"
|
||||
list_text_hi="#ffffff">
|
||||
</ThemeSpaceListGeneric>
|
||||
</space_list>
|
||||
</ThemeFileBrowser>
|
||||
</file_browser>
|
||||
<nla_editor>
|
||||
|
||||
@@ -398,11 +398,7 @@
|
||||
</ThemeGraphEditor>
|
||||
</graph_editor>
|
||||
<file_browser>
|
||||
<ThemeFileBrowser selected_file="#517da1"
|
||||
scrollbar="#a0a0a0"
|
||||
scroll_handle="#7f7070"
|
||||
active_file="#828282"
|
||||
active_file_text="#fafafa">
|
||||
<ThemeFileBrowser selected_file="#517da1">
|
||||
<space>
|
||||
<ThemeSpaceGeneric back="#363636"
|
||||
title="#646464"
|
||||
@@ -428,13 +424,6 @@
|
||||
</panelcolors>
|
||||
</ThemeSpaceGeneric>
|
||||
</space>
|
||||
<space_list>
|
||||
<ThemeSpaceListGeneric list="#444444"
|
||||
list_title="#c0c0c0"
|
||||
list_text="#999999"
|
||||
list_text_hi="#ffffff">
|
||||
</ThemeSpaceListGeneric>
|
||||
</space_list>
|
||||
</ThemeFileBrowser>
|
||||
</file_browser>
|
||||
<nla_editor>
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user