Fix typo and change .enabled to .active

Use parenthesis for bit shifts
Remove unused weight_components
2017-02-18 04:12:29 -02:00 · 2017-02-14 18:13:08 -02:00 · 2017-02-14 18:13:08 -02:00 · 2017-02-14 18:13:08 -02:00 · 2017-02-06 01:23:18 -02:00 · 2017-01-25 04:05:53 -02:00
886 changed files with 25106 additions and 44634 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -242,8 +242,6 @@ endif()
 option(WITH_PLAYER        "Build Player" OFF)
 option(WITH_OPENCOLORIO   "Enable OpenColorIO color management" ${_init_OPENCOLORIO})

-option(WITH_CLAY_ENGINE    "Enable New Clay engine (Breaks Mac and Intel compatibility)" ON)
-
 # Compositor
 option(WITH_COMPOSITOR         "Enable the tile based nodal compositor" ON)

@@ -499,10 +497,11 @@ endif()
 # We default options to whatever default standard in the current compiler.
 if(CMAKE_COMPILER_IS_GNUCC AND (NOT "${CMAKE_C_COMPILER_VERSION}" VERSION_LESS "6.0") AND (NOT WITH_CXX11))
 	set(_c11_init ON)
+	set(_cxx11_init ON)
 else()
 	set(_c11_init OFF)
+	set(_cxx11_init OFF)
 endif()
-set(_cxx11_init ON)

 option(WITH_C11 "Build with C11 standard enabled, for development use only!" ${_c11_init})
 mark_as_advanced(WITH_C11)
@@ -629,12 +628,6 @@ if(APPLE)
 			# to silence sdk not found warning, just overrides CMAKE_OSX_SYSROOT
 			set(CMAKE_XCODE_ATTRIBUTE_SDKROOT macosx${OSX_SYSTEM})
 		endif()
-
-		# QuickTime framework is no longer available in SDK 10.12+
-		if(WITH_CODEC_QUICKTIME AND ${OSX_SYSTEM} VERSION_GREATER 10.11)
-			set(WITH_CODEC_QUICKTIME OFF)
-			message(STATUS "QuickTime not supported by SDK ${OSX_SYSTEM}, disabling WITH_CODEC_QUICKTIME")
-		endif()
 	endif()

 	if(OSX_SYSTEM MATCHES 10.9)
@@ -724,7 +717,7 @@ if(NOT WITH_BOOST)
 	macro(set_and_warn
 		_setting _val)
 		if(${${_setting}})
-			message(STATUS "'WITH_BOOST' is disabled: forcing 'set(${_setting} ${_val})'")
+			message(STATUS "'WITH_BOOST' is disabled: forceing 'set(${_setting} ${_val})'")
 		endif()
 		set(${_setting} ${_val})
 	endmacro()
@@ -868,7 +861,7 @@ endif()
 # linux only, not cached
 set(WITH_BINRELOC OFF)

-# MACOSX only, set to avoid uninitialized
+# MAXOSX only, set to avoid uninitialized
 set(EXETYPE "")

 # C/C++ flags
@@ -1574,7 +1567,7 @@ if(WITH_CXX11)
 	if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang")
 		# TODO(sergey): Do we want c++11 or gnu-c++11 here?
 		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
-	elseif(MSVC)
+	elseif(MSVC12)
 		# Nothing special is needed, C++11 features are available by default.
 	else()
 		message(FATAL_ERROR "Compiler ${CMAKE_C_COMPILER_ID} is not supported for C++11 build yet")
--- a/build_files/build_environment/install_deps.sh
+++ b/build_files/build_environment/install_deps.sh
@@ -25,7 +25,7 @@
 ARGS=$( \
 getopt \
 -o s:i:t:h \
--long source:,install:,tmp:,info:,threads:,help,show-deps,no-sudo,no-build,no-confirm,\
+--long source:,install:,tmp:,info:,threads:,help,show-deps,no-sudo,no-build,no-confirm,use-cxx11,\
 with-all,with-opencollada,\
 ver-ocio:,ver-oiio:,ver-llvm:,ver-osl:,ver-osd:,ver-openvdb:,\
 force-all,force-python,force-numpy,force-boost,\
@@ -104,6 +104,11 @@ ARGUMENTS_INFO="\"COMMAND LINE ARGUMENTS:
    --no-confirm
        Disable any interaction with user (suitable for automated run).

+    --use-cxx11
+        Build all libraries in cpp11 'mode' (will be mandatory soon in blender2.8 branch).
+        NOTE: If your compiler is gcc-6.0 or above, you probably *want* to enable this option (since it's default
+              standard starting from this version).
+
    --with-all
        By default, a number of optional and not-so-often needed libraries are not installed.
        This option will try to install them, at the cost of potential conflicts (depending on
@@ -282,9 +287,9 @@ SUDO="sudo"

 NO_BUILD=false
 NO_CONFIRM=false
-USE_CXX11=true  # Mandatory in blender2.8
+USE_CXX11=false

-PYTHON_VERSION="3.5.2"
+PYTHON_VERSION="3.5.1"
 PYTHON_VERSION_MIN="3.5"
 PYTHON_FORCE_BUILD=false
 PYTHON_FORCE_REBUILD=false
@@ -317,7 +322,7 @@ OPENEXR_FORCE_REBUILD=false
 OPENEXR_SKIP=false
 _with_built_openexr=false

-OIIO_VERSION="1.7.8"
+OIIO_VERSION="1.6.9"
 OIIO_VERSION_MIN="1.6.0"
 OIIO_VERSION_MAX="1.9.0"  # UNKNOWN currently # Not supported by current OSL...
 OIIO_FORCE_BUILD=false
@@ -332,14 +337,14 @@ LLVM_FORCE_REBUILD=false
 LLVM_SKIP=false

 # OSL needs to be compiled for now!
-OSL_VERSION="1.7.5"
+OSL_VERSION="1.7.3"
 OSL_VERSION_MIN=$OSL_VERSION
 OSL_FORCE_BUILD=false
 OSL_FORCE_REBUILD=false
 OSL_SKIP=false

 # OpenSubdiv needs to be compiled for now
-OSD_VERSION="3.1.1"
+OSD_VERSION="3.0.5"
 OSD_VERSION_MIN=$OSD_VERSION
 OSD_FORCE_BUILD=false
 OSD_FORCE_REBUILD=false
@@ -367,7 +372,7 @@ OPENCOLLADA_FORCE_BUILD=false
 OPENCOLLADA_FORCE_REBUILD=false
 OPENCOLLADA_SKIP=false

-FFMPEG_VERSION="3.2.1"
+FFMPEG_VERSION="2.8.4"
 FFMPEG_VERSION_MIN="2.8.4"
 FFMPEG_FORCE_BUILD=false
 FFMPEG_FORCE_REBUILD=false
@@ -494,6 +499,9 @@ while true; do
    --no-confirm)
      NO_CONFIRM=true; shift; continue
    ;;
+    --use-cxx11)
+      USE_CXX11=true; shift; continue
+    ;;
    --with-all)
      WITH_ALL=true; shift; continue
    ;;
@@ -785,9 +793,9 @@ FFMPEG_SOURCE=( "http://ffmpeg.org/releases/ffmpeg-$FFMPEG_VERSION.tar.bz2" )

 CXXFLAGS_BACK=$CXXFLAGS
 if [ "$USE_CXX11" = true ]; then
-  WARNING "C++11 is now mandatory for blender2.8, this *should* go smoothly with any very recent distribution.
+  WARNING "You are trying to use c++11, this *should* go smoothely with any very recent distribution
 However, if you are experiencing linking errors (also when building Blender itself), please try the following:
-    * Re-run this script with '--build-all --force-all' options.
+    * Re-run this script with `--build-all --force-all` options.
    * Ensure your gcc version is at the very least 4.8, if possible you should really rather use gcc-5.1 or above.

 Please note that until the transition to C++11-built libraries if completed in your distribution, situation will
@@ -2472,7 +2480,7 @@ compile_FFmpeg() {
        --enable-avfilter --disable-vdpau \
        --disable-bzlib --disable-libgsm --disable-libspeex \
        --enable-pthreads --enable-zlib --enable-stripping --enable-runtime-cpudetect \
-        --disable-vaapi --disable-nonfree --enable-gpl \
+        --disable-vaapi --disable-libfaac --disable-nonfree --enable-gpl \
        --disable-postproc --disable-librtmp --disable-libopencore-amrnb \
        --disable-libopencore-amrwb --disable-libdc1394 --disable-version3 --disable-outdev=sdl \
        --disable-libxcb \
--- a/build_files/buildbot/master.cfg
+++ b/build_files/buildbot/master.cfg
@@ -297,8 +297,8 @@ def generic_builder(id, libdir='', branch='', rsync=False):
 # Builders

 add_builder(c, 'mac_x86_64_10_6_cmake', 'darwin-9.x.universal', generic_builder, hour=5)
-# add_builder(c, 'linux_glibc211_i686_cmake', '', generic_builder, hour=1)
-# add_builder(c, 'linux_glibc211_x86_64_cmake', '', generic_builder, hour=2)
+add_builder(c, 'linux_glibc211_i686_cmake', '', generic_builder, hour=1)
+add_builder(c, 'linux_glibc211_x86_64_cmake', '', generic_builder, hour=2)
 add_builder(c, 'linux_glibc219_i686_cmake', '', generic_builder, hour=3)
 add_builder(c, 'linux_glibc219_x86_64_cmake', '', generic_builder, hour=4)
 add_builder(c, 'win32_cmake_vc2013', 'windows_vc12', generic_builder, hour=3)
--- a/build_files/cmake/Modules/GTestTesting.cmake
+++ b/build_files/cmake/Modules/GTestTesting.cmake
@@ -45,7 +45,7 @@ macro(BLENDER_SRC_GTEST_EX NAME SRC EXTRA_LIBS DO_ADD_TEST)
 		                      RUNTIME_OUTPUT_DIRECTORY_DEBUG   "${TESTS_OUTPUT_DIR}"
 		                      INCLUDE_DIRECTORIES              "${TEST_INC}")
 		if(${DO_ADD_TEST})
-			add_test(NAME ${NAME}_test COMMAND ${TESTS_OUTPUT_DIR}/${NAME}_test WORKING_DIRECTORY $<TARGET_FILE_DIR:blender>)
+			add_test(${NAME}_test ${TESTS_OUTPUT_DIR}/${NAME}_test)
 		endif()
 	endif()
 endmacro()
--- a/build_files/cmake/macros.cmake
+++ b/build_files/cmake/macros.cmake
@@ -416,7 +416,14 @@ function(setup_liblinks
 		target_link_libraries(${target} ${OPENCOLORIO_LIBRARIES})
 	endif()
 	if(WITH_OPENSUBDIV OR WITH_CYCLES_OPENSUBDIV)
+		if(WIN32 AND NOT UNIX)
+			file_list_suffix(OPENSUBDIV_LIBRARIES_DEBUG "${OPENSUBDIV_LIBRARIES}" "_d")
+			target_link_libraries_debug(${target} "${OPENSUBDIV_LIBRARIES_DEBUG}")
+			target_link_libraries_optimized(${target} "${OPENSUBDIV_LIBRARIES}")
+			unset(OPENSUBDIV_LIBRARIES_DEBUG)
+		else()
 			target_link_libraries(${target} ${OPENSUBDIV_LIBRARIES})
+		endif()
 	endif()
 	if(WITH_OPENVDB)
 		target_link_libraries(${target} ${OPENVDB_LIBRARIES} ${TBB_LIBRARIES})
@@ -566,7 +573,6 @@ function(SETUP_BLENDER_SORTED_LIBS)
 		bf_editor_space_userpref
 		bf_editor_space_view3d
 		bf_editor_space_clip
-		bf_editor_space_collections

 		bf_editor_transform
 		bf_editor_util
@@ -598,7 +604,6 @@ function(SETUP_BLENDER_SORTED_LIBS)
 		bf_modifiers
 		bf_bmesh
 		bf_gpu
-		bf_draw
 		bf_blenloader
 		bf_blenkernel
 		bf_physics
--- a/build_files/cmake/platform/platform_apple.cmake
+++ b/build_files/cmake/platform/platform_apple.cmake
@@ -158,7 +158,7 @@ if(WITH_CODEC_FFMPEG)
 		mp3lame swscale x264 xvidcore theora theoradec theoraenc vorbis vorbisenc vorbisfile ogg
 	)
 	if(WITH_CXX11)
-		set(FFMPEG_LIBRARIES ${FFMPEG_LIBRARIES} schroedinger orc vpx webp swresample)
+		set(FFMPEG_LIBRARIES ${FFMPEG_LIBRARIES} schroedinger orc vpx)
 	endif()
 	set(FFMPEG_LIBPATH ${FFMPEG}/lib)
 endif()
@@ -316,9 +316,6 @@ if(WITH_OPENIMAGEIO)
 		${OPENEXR_LIBRARIES}
 		${ZLIB_LIBRARIES}
 	)
-	if(WITH_CXX11)
-		set(OPENIMAGEIO_LIBRARIES ${OPENIMAGEIO_LIBRARIES} ${LIBDIR}/ffmpeg/lib/libwebp.a)
-	endif()
 	set(OPENIMAGEIO_LIBPATH
 		${OPENIMAGEIO}/lib
 		${JPEG_LIBPATH}
--- a/build_files/cmake/platform/platform_win32_msvc.cmake
+++ b/build_files/cmake/platform/platform_win32_msvc.cmake
@@ -112,7 +112,7 @@ set(CMAKE_STATIC_LINKER_FLAGS "${CMAKE_STATIC_LINKER_FLAGS} /ignore:4221")

 # MSVC only, Mingw doesnt need
 if(CMAKE_CL_64)
-	set(PLATFORM_LINKFLAGS "/MACHINE:X64 ${PLATFORM_LINKFLAGS}")
+	set(PLATFORM_LINKFLAGS "/MACHINE:X64 /OPT:NOREF ${PLATFORM_LINKFLAGS}")
 else()
 	set(PLATFORM_LINKFLAGS "/MACHINE:IX86 /LARGEADDRESSAWARE ${PLATFORM_LINKFLAGS}")
 endif()
@@ -238,14 +238,14 @@ if(WITH_CODEC_FFMPEG)
 	windows_find_package(FFMPEG)
 	if(NOT FFMPEG_FOUND)
 		warn_hardcoded_paths(ffmpeg)
-		set(FFMPEG_LIBRARY_VERSION 57)
-		set(FFMPEG_LIBRARY_VERSION_AVU 55)
+		set(FFMPEG_LIBRARY_VERSION 55)
+		set(FFMPEG_LIBRARY_VERSION_AVU 52)
 		set(FFMPEG_LIBRARIES
-			${LIBDIR}/ffmpeg/lib/avcodec.lib
-			${LIBDIR}/ffmpeg/lib/avformat.lib
-			${LIBDIR}/ffmpeg/lib/avdevice.lib
-			${LIBDIR}/ffmpeg/lib/avutil.lib
-			${LIBDIR}/ffmpeg/lib/swscale.lib
+			${LIBDIR}/ffmpeg/lib/avcodec-${FFMPEG_LIBRARY_VERSION}.lib
+			${LIBDIR}/ffmpeg/lib/avformat-${FFMPEG_LIBRARY_VERSION}.lib
+			${LIBDIR}/ffmpeg/lib/avdevice-${FFMPEG_LIBRARY_VERSION}.lib
+			${LIBDIR}/ffmpeg/lib/avutil-${FFMPEG_LIBRARY_VERSION_AVU}.lib
+			${LIBDIR}/ffmpeg/lib/swscale-2.lib
 			)
 	endif()
 endif()
@@ -380,7 +380,6 @@ if(WITH_OPENIMAGEIO)
 	set(OPENCOLORIO_DEFINITIONS "-DOCIO_STATIC_BUILD")
 	set(OPENIMAGEIO_IDIFF "${OPENIMAGEIO}/bin/idiff.exe")
 	add_definitions(-DOIIO_STATIC_BUILD)
-	add_definitions(-DOIIO_NO_SSE=1)
 endif()

 if(WITH_LLVM)
@@ -446,20 +445,10 @@ if(WITH_MOD_CLOTH_ELTOPO)
 endif()

 if(WITH_OPENSUBDIV OR WITH_CYCLES_OPENSUBDIV)
-    set(OPENSUBDIV_INCLUDE_DIR ${LIBDIR}/opensubdiv/include)
-    set(OPENSUBDIV_LIBPATH ${LIBDIR}/opensubdiv/lib)
-    set(OPENSUBDIV_LIBRARIES    optimized ${OPENSUBDIV_LIBPATH}/osdCPU.lib 
-                                optimized ${OPENSUBDIV_LIBPATH}/osdGPU.lib
-                                debug ${OPENSUBDIV_LIBPATH}/osdCPU_d.lib 
-                                debug ${OPENSUBDIV_LIBPATH}/osdGPU_d.lib
-                                )
-    set(OPENSUBDIV_HAS_OPENMP TRUE)
-	set(OPENSUBDIV_HAS_TBB FALSE)
-	set(OPENSUBDIV_HAS_OPENCL TRUE)
-	set(OPENSUBDIV_HAS_CUDA FALSE)
-	set(OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK TRUE)
-	set(OPENSUBDIV_HAS_GLSL_COMPUTE TRUE)
-    windows_find_package(OpenSubdiv)
+	set(OPENSUBDIV_INCLUDE_DIR ${LIBDIR}/opensubdiv/include)
+	set(OPENSUBDIV_LIBPATH ${LIBDIR}/opensubdiv/lib)
+	set(OPENSUBDIV_LIBRARIES ${OPENSUBDIV_LIBPATH}/osdCPU.lib ${OPENSUBDIV_LIBPATH}/osdGPU.lib)
+	find_package(OpenSubdiv)
 endif()

 if(WITH_SDL)
--- a/doc/python_api/rst/bge_types/bge.types.KX_GameObject.rst
+++ b/doc/python_api/rst/bge_types/bge.types.KX_GameObject.rst
@@ -405,7 +405,7 @@ base class --- :class:`SCA_IObject`

      .. note::

-         This attribute is experimental and may be removed (but probably wont be).
+         This attribute is experemental and may be removed (but probably wont be).

      .. note::

@@ -419,7 +419,7 @@ base class --- :class:`SCA_IObject`

      .. note::

-         This attribute is experimental and may be removed (but probably wont be).
+         This attribute is experemental and may be removed (but probably wont be).

      .. note::

@@ -453,7 +453,7 @@ base class --- :class:`SCA_IObject`

   .. attribute:: childrenRecursive

-      all children of this object including children's children, (read-only).
+      all children of this object including childrens children, (read-only).

      :type: :class:`CListValue` of :class:`KX_GameObject`'s

@@ -536,7 +536,7 @@ base class --- :class:`SCA_IObject`

   .. method:: getAxisVect(vect)

-      Returns the axis vector rotates by the object's worldspace orientation.
+      Returns the axis vector rotates by the objects worldspace orientation.
      This is the equivalent of multiplying the vector by the orientation matrix.

      :arg vect: a vector to align the axis.
@@ -596,7 +596,7 @@ base class --- :class:`SCA_IObject`

      Gets the game object's linear velocity.

-      This method returns the game object's velocity through it's center of mass, ie no angular velocity component.
+      This method returns the game object's velocity through it's centre of mass, ie no angular velocity component.

      :arg local:
         * False: you get the "global" velocity ie: relative to world orientation.
@@ -609,7 +609,7 @@ base class --- :class:`SCA_IObject`

      Sets the game object's linear velocity.

-      This method sets game object's velocity through it's center of mass,
+      This method sets game object's velocity through it's centre of mass,
      ie no angular velocity component.

      This requires a dynamic object.
@@ -814,7 +814,7 @@ base class --- :class:`SCA_IObject`
            # do something
            pass

-      The face parameter determines the orientation of the normal.
+      The face paremeter determines the orientation of the normal.

      * 0 => hit normal is always oriented towards the ray origin (as if you casted the ray from outside)
      * 1 => hit normal is the real face normal (only for mesh object, otherwise face has no effect)
@@ -911,7 +911,7 @@ base class --- :class:`SCA_IObject`

      .. note::

-         The gameObject argument has an advantage that it can convert from a mesh with modifiers applied (such as the Subdivision Surface modifier).
+         The gameObject argument has an advantage that it can convert from a mesh with modifiers applied (such as subsurf).

      .. warning::

@@ -919,7 +919,7 @@ base class --- :class:`SCA_IObject`

      .. warning::

-         If the object is a part of a compound object it will fail (parent or child)
+         If the object is a part of a combound object it will fail (parent or child)

      .. warning::

--- a/doc/python_api/rst/bgl.rst
+++ b/doc/python_api/rst/bgl.rst
@@ -12,7 +12,7 @@ contents: dir(bgl).  A simple search on the web can point to more
 than enough material to teach OpenGL programming, from books to many
 collections of tutorials.

-Here is a comprehensive `list of books <https://www.khronos.org/developers/books/>`__ (non free).
+Here is a comprehensive `list of books <https://www.opengl.org/documentation/books/>`__ (non free).
 The `arcsynthesis tutorials <https://web.archive.org/web/20150225192611/http://www.arcsynthesis.org/gltut/index.html>`__
 is one of the best resources to learn modern OpenGL and
 `g-truc <http://www.g-truc.net/post-opengl-samples.html#menu>`__
@@ -2067,7 +2067,7 @@ offers a set of extensive examples, including advanced features.
   :arg length: Returns the length of the string returned in source (excluding the null terminator).
   :type source: :class:`bgl.Buffer` char.
   :arg source: Specifies an array of characters that is used to return the source code string.
-
+   

 .. function:: glShaderSource(shader, shader_string):

--- a/doc/python_api/rst/info_api_reference.rst
+++ b/doc/python_api/rst/info_api_reference.rst
@@ -204,7 +204,7 @@ Lets say we want to access the texture of a brush via Python, to adjust its ``co

 - Start in the default scene and enable 'Sculpt' mode from the 3D-View header.
 - From the toolbar expand the **Texture** panel and add a new texture.
-  *Notice the texture button its self doesn't have very useful links (you can check the tooltips).*
+  *Notice the texture button its self doesn't have very useful links (you can check the tool-tips).*
 - The contrast setting isn't exposed in the sculpt toolbar, so view the texture in the properties panel...

  - In the properties button select the Texture context.
--- a/doc/python_api/rst/info_overview.rst
+++ b/doc/python_api/rst/info_overview.rst
@@ -19,7 +19,7 @@ This is a typical Python environment so tutorials on how to write Python scripts
 will work running the scripts in Blender too.
 Blender provides the :mod:`bpy` module to the Python interpreter.
 This module can be imported in a script and gives access to Blender data, classes, and functions.
-Scripts that deal with Blender data will need to import this module.
+Scripts that deal with Blender data will need to import this module. 

 Here is a simple example of moving a vertex of the object named **Cube**:

@@ -80,7 +80,7 @@ To run as modules:


 Add-ons
-------
+------

 Some of Blenders functionality is best kept optional,
 alongside scripts loaded at startup we have add-ons which are kept in their own directory ``scripts/addons``,
@@ -213,7 +213,7 @@ A simple Blender/Python module can look like this:
       bpy.utils.register_class(SimpleOperator)

   def unregister():
-       bpy.utils.unregister_class(SimpleOperator)
+       bpy.utils.unregister_class(SimpleOperator)    

   if __name__ == "__main__":
       register()
@@ -327,7 +327,7 @@ Say you want to store material settings for a custom engine.
 .. note::

   *The class must be registered before being used in a property, failing to do so will raise an error:*
-
+   
   ``ValueError: bpy_struct "Material" registration error: my_custom_props could not register``


@@ -429,3 +429,4 @@ Calling these operators:
   >>> bpy.ops.object.operator_2()
   Hello World OBJECT_OT_operator_2
   {'FINISHED'}
+
--- a/doc/python_api/sphinx_doc_gen.py
+++ b/doc/python_api/sphinx_doc_gen.py
@@ -1565,9 +1565,9 @@ def pyrna2sphinx(basepath):

    # operators
    def write_ops():
-        API_BASEURL = "https://developer.blender.org/diffusion/B/browse/master/release/scripts "
-        API_BASEURL_ADDON = "https://developer.blender.org/diffusion/BA"
-        API_BASEURL_ADDON_CONTRIB = "https://developer.blender.org/diffusion/BAC"
+        API_BASEURL = "https://developer.blender.org/diffusion/B/browse/master/release/scripts/ "
+        API_BASEURL_ADDON = "https://developer.blender.org/diffusion/BA/"
+        API_BASEURL_ADDON_CONTRIB = "https://developer.blender.org/diffusion/BAC/"

        op_modules = {}
        for op in ops.values():
@@ -1632,9 +1632,13 @@ def write_sphinx_conf_py(basepath):
    file = open(filepath, "w", encoding="utf-8")
    fw = file.write

-    fw("import sys, os\n\n")
-    fw("extensions = ['sphinx.ext.intersphinx']\n\n")
-    fw("intersphinx_mapping = {'blender_manual': ('https://docs.blender.org/manual/en/dev/', None)}\n\n")
+    fw("import sys, os\n")
+    fw("\n")
+    fw("extensions = ['sphinx.ext.intersphinx']\n")
+    fw("\n")
+    fw("intersphinx_mapping = {'blender_manual': ('https://www.blender.org/manual/', None)}\n")
+    fw("\n")
+
    fw("project = 'Blender'\n")
    # fw("master_doc = 'index'\n")
    fw("copyright = u'Blender Foundation'\n")
@@ -1651,16 +1655,12 @@ def write_sphinx_conf_py(basepath):

    # not helpful since the source is generated, adds to upload size.
    fw("html_copy_source = False\n")
-    fw("html_show_sphinx = False\n")
    fw("html_split_index = True\n")
    fw("\n")

    # needed for latex, pdf gen
-    fw("latex_elements = {\n")
-    fw("  'papersize': 'a4paper',\n")
-    fw("}\n\n")
-
    fw("latex_documents = [ ('contents', 'contents.tex', 'Blender Index', 'Blender Foundation', 'manual'), ]\n")
+    fw("latex_paper_size = 'a4paper'\n")
    file.close()


--- a/doc/python_api/sphinx_doc_update.py
+++ b/doc/python_api/sphinx_doc_update.py
@@ -41,9 +41,9 @@ import tempfile
 import zipfile


-DEFAULT_RSYNC_SERVER = "docs.blender.org"
+DEFAULT_RSYNC_SERVER = "www.blender.org"
 DEFAULT_RSYNC_ROOT = "/api/"
-DEFAULT_SYMLINK_ROOT = "/data/www/vhosts/docs.blender.org/api"
+DEFAULT_SYMLINK_ROOT = "/data/www/vhosts/www.blender.org/api"


 def argparse_create():
--- a/extern/Eigen3/Eigen/src/StlSupport/StdVector.h
+++ b/extern/Eigen3/Eigen/src/StlSupport/StdVector.h
@@ -77,7 +77,7 @@ namespace std {
  void resize(size_type new_size)
  { resize(new_size, T()); }

-#if defined(_VECTOR_) && (_MSC_VER<1910)
+#if defined(_VECTOR_)
  // workaround MSVC std::vector implementation
  void resize(size_type new_size, const value_type& x)
  {
@@ -110,7 +110,7 @@ namespace std {
      vector_base::insert(vector_base::end(), new_size - vector_base::size(), x);
  }
 #else
-  // either GCC 4.1, MSVC2017 or non-GCC
+  // either GCC 4.1 or non-GCC
  // default implementation which should always work.
  void resize(size_type new_size, const value_type& x)
  {
--- a/extern/curve_fit_nd/curve_fit_nd.h
+++ b/extern/curve_fit_nd/curve_fit_nd.h
@@ -137,7 +137,7 @@ int curve_fit_cubic_to_points_refit_db(
        const double          error_threshold,
        const unsigned int    calc_flag,
        const unsigned int   *corners,
-        const unsigned int    corners_len,
+        unsigned int          corners_len,
        const double          corner_angle,

        double **r_cubic_array, unsigned int *r_cubic_array_len,
--- a/extern/gflags/README.blender
+++ b/extern/gflags/README.blender
@@ -18,8 +18,6 @@ Local modifications:
 - Applied some modifications from fork https://github.com/Nazg-Gul/gflags.git
  (see https://github.com/gflags/gflags/pull/129)

- Avoid attempt of acquiring mutex lock in FlagRegistry::GlobalRegistry when
+- Avoid attemot of acquiring mutex lock in FlagRegistry::GlobalRegistry when
  doing static flags initialization. See d81dd2d in Blender repository.

- Made `google::{anonymous}::FlagValue::ValueSize() const` inlined, so it does
-  not trigger strict compiler warning.
--- a/extern/gflags/src/gflags.cc
+++ b/extern/gflags/src/gflags.cc
@@ -218,7 +218,7 @@ class FlagValue {
  bool Equal(const FlagValue& x) const;
  FlagValue* New() const;   // creates a new one with default value
  void CopyFrom(const FlagValue& x);
-  inline int ValueSize() const;
+  int ValueSize() const;

  // Calls the given validate-fn on value_buffer_, and returns
  // whatever it returns.  But first casts validate_fn_proto to a
@@ -443,7 +443,7 @@ void FlagValue::CopyFrom(const FlagValue& x) {
  }
 }

-inline int FlagValue::ValueSize() const {
+int FlagValue::ValueSize() const {
  if (type_ > FV_MAX_INDEX) {
    assert(false);  // unknown type
    return 0;
--- a/extern/gtest/include/gtest/internal/gtest-internal.h
+++ b/extern/gtest/include/gtest/internal/gtest-internal.h
@@ -60,10 +60,6 @@
 #include <string>
 #include <vector>

-#if (__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800)
-# include <type_traits>
-#endif
-
 #include "gtest/gtest-message.h"
 #include "gtest/internal/gtest-string.h"
 #include "gtest/internal/gtest-filepath.h"
@@ -858,7 +854,6 @@ struct AddReference<T&> { typedef T& type; };  // NOLINT
 template <typename From, typename To>
 class ImplicitlyConvertible {
 private:
-#if !((__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800))
  // We need the following helper functions only for their types.
  // They have no implementations.

@@ -879,7 +874,6 @@ class ImplicitlyConvertible {
  // implicitly converted to type To.
  static char Helper(To);
  static char (&Helper(...))[2];  // NOLINT
-#endif

  // We have to put the 'public' section after the 'private' section,
  // or MSVC refuses to compile the code.
@@ -889,8 +883,6 @@ class ImplicitlyConvertible {
  // instantiation.  The simplest workaround is to use its C++0x type traits
  // functions (C++Builder 2009 and above only).
  static const bool value = __is_convertible(From, To);
-#elif (__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800)
-  static const bool value = std::is_convertible<From, To>::value;
 #else
  // MSVC warns about implicitly converting from double to int for
  // possible loss of data, so we need to temporarily disable the
--- a/intern/audaspace/FX/AUD_LimiterReader.cpp
+++ b/intern/audaspace/FX/AUD_LimiterReader.cpp
@@ -110,10 +110,10 @@ void AUD_LimiterReader::read(int& length, bool& eos, sample_t* buffer)
 			eos = true;
 		}

-		if(position < int(m_start * rate))
+		if(position < m_start * rate)
 		{
 			int len2 = length;
-			for(int len = int(m_start * rate) - position;
+			for(int len = m_start * rate - position;
 				len2 == length && !eos;
 				len -= length)
 			{
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -74,6 +74,7 @@ elseif(CMAKE_COMPILER_IS_GNUCC)
 	if(CXX_HAS_AVX2)
 		set(CYCLES_AVX2_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mfma -mlzcnt -mbmi -mbmi2 -mf16c -mfpmath=sse")
 	endif()
+	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math")
 elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
 	check_cxx_compiler_flag(-msse CXX_HAS_SSE)
 	check_cxx_compiler_flag(-mavx CXX_HAS_AVX)
@@ -89,6 +90,7 @@ elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
 	if(CXX_HAS_AVX2)
 		set(CYCLES_AVX2_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mfma -mlzcnt -mbmi -mbmi2 -mf16c")
 	endif()
+	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math")
 endif()

 if(CXX_HAS_SSE)
--- a/intern/cycles/app/cycles_standalone.cpp
+++ b/intern/cycles/app/cycles_standalone.cpp
@@ -72,17 +72,20 @@ static void session_print(const string& str)

 static void session_print_status()
 {
+	int sample, tile;
+	double total_time, sample_time, render_time;
 	string status, substatus;

 	/* get status */
-	float progress = options.session->progress.get_progress();
+	sample = options.session->progress.get_sample();
+	options.session->progress.get_tile(tile, total_time, sample_time, render_time);
 	options.session->progress.get_status(status, substatus);

 	if(substatus != "")
 		status += ": " + substatus;

 	/* print status */
-	status = string_printf("Progress %05.2f   %s", (double) progress*100, status.c_str());
+	status = string_printf("Sample %d   %s", sample, status.c_str());
 	session_print(status);
 }

@@ -164,12 +167,13 @@ static void display_info(Progress& progress)
 	latency = (elapsed - last);
 	last = elapsed;

-	double total_time, sample_time;
+	int sample, tile;
+	double total_time, sample_time, render_time;
 	string status, substatus;

-	progress.get_time(total_time, sample_time);
+	sample = progress.get_sample();
+	progress.get_tile(tile, total_time, sample_time, render_time);
 	progress.get_status(status, substatus);
-	float progress_val = progress.get_progress();

 	if(substatus != "")
 		status += ": " + substatus;
@@ -180,10 +184,10 @@ static void display_info(Progress& progress)
 	        "%s"
 	        "        Time: %.2f"
 	        "        Latency: %.4f"
-	        "        Progress: %05.2f"
+	        "        Sample: %d"
 	        "        Average: %.4f"
 	        "        Interactive: %s",
-	        status.c_str(), total_time, latency, (double) progress_val*100, sample_time, interactive.c_str());
+	        status.c_str(), total_time, latency, sample, sample_time, interactive.c_str());

 	view_display_info(str.c_str());

--- a/intern/cycles/app/cycles_xml.cpp
+++ b/intern/cycles/app/cycles_xml.cpp
@@ -523,7 +523,7 @@ static void xml_read_mesh(const XMLReadState& state, pugi::xml_node node)

 	/* we don't yet support arbitrary attributes, for now add vertex
 	 * coordinates as generated coordinates if requested */
-	if(mesh->need_attribute(state.scene, ATTR_STD_GENERATED)) {
+	if (mesh->need_attribute(state.scene, ATTR_STD_GENERATED)) {
 		Attribute *attr = mesh->attributes.add(ATTR_STD_GENERATED);
 		memcpy(attr->data_float3(), mesh->verts.data(), sizeof(float3)*mesh->verts.size());
 	}
--- a/intern/cycles/blender/CMakeLists.txt
+++ b/intern/cycles/blender/CMakeLists.txt
@@ -25,7 +25,6 @@ set(SRC
 	blender_camera.cpp
 	blender_mesh.cpp
 	blender_object.cpp
-	blender_object_cull.cpp
 	blender_particles.cpp
 	blender_curves.cpp
 	blender_logging.cpp
@@ -36,7 +35,6 @@ set(SRC
 	blender_texture.cpp

 	CCL_api.h
-	blender_object_cull.h
 	blender_sync.h
 	blender_session.h
 	blender_texture.h
--- a/intern/cycles/blender/addon/init.py
+++ b/intern/cycles/blender/addon/init.py
@@ -23,25 +23,11 @@ bl_info = {
    "location": "Info header, render engine menu",
    "description": "Cycles Render Engine integration",
    "warning": "",
-    "wiki_url": "https://docs.blender.org/manual/en/dev/render/cycles/",
+    "wiki_url": "https://www.blender.org/manual/render/cycles/index.html",
    "tracker_url": "",
    "support": 'OFFICIAL',
    "category": "Render"}

-# Support 'reload' case.
-if "bpy" in locals():
-    import importlib
-    if "engine" in locals():
-        importlib.reload(engine)
-    if "version_update" in locals():
-        importlib.reload(version_update)
-    if "ui" in locals():
-        importlib.reload(ui)
-    if "properties" in locals():
-        importlib.reload(properties)
-    if "presets" in locals():
-        importlib.reload(presets)
-
 import bpy

 from . import (
--- a/intern/cycles/blender/addon/engine.py
+++ b/intern/cycles/blender/addon/engine.py
@@ -62,7 +62,7 @@ def _parse_command_line():
    num_resumable_chunks = None
    current_resumable_chunk = None

-    # TODO(sergey): Add some nice error prints if argument is not used properly.
+    # TODO(sergey): Add some nice error ptins if argument is not used properly.
    idx = 0
    while idx < len(argv) - 1:
        arg = argv[idx]
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -288,7 +288,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
                description="Probabilistically terminate light samples when the light contribution is below this threshold (more noise but faster rendering). "
                            "Zero disables the test and never ignores lights",
                min=0.0, max=1.0,
-                default=0.01,
+                default=0.05,
                )

        cls.caustics_reflective = BoolProperty(
@@ -528,12 +528,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
                description="Use special type BVH optimized for hair (uses more ram but renders faster)",
                default=True,
                )
-        cls.debug_bvh_time_steps = IntProperty(
-                name="BVH Time Steps",
-                description="Split BVH primitives by this number of time steps to speed up render time in cost of memory",
-                default=0,
-                min=0, max=16,
-                )
        cls.tile_order = EnumProperty(
                name="Tile Order",
                description="Tile order for rendering",
@@ -638,20 +632,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
            items=enum_texture_limit
            )

-        cls.ao_bounces = IntProperty(
-            name="AO Bounces",
-            default=0,
-            description="Approximate indirect light with background tinted ambient occlusion at the specified bounce, 0 disables this feature",
-            min=0, max=1024,
-            )
-
-        cls.ao_bounces_render = IntProperty(
-            name="AO Bounces Render",
-            default=0,
-            description="Approximate indirect light with background tinted ambient occlusion at the specified bounce, 0 disables this feature",
-            min=0, max=1024,
-            )
-
        # Various fine-tuning debug flags

        def devices_update_callback(self, context):
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -217,7 +217,7 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel):
        draw_samples_info(layout, context)


-class CyclesRender_PT_geometry(CyclesButtonsPanel, Panel):
+class CyclesRender_PT_geometery(CyclesButtonsPanel, Panel):
    bl_label = "Geometry"
    bl_options = {'DEFAULT_CLOSED'}

@@ -226,7 +226,6 @@ class CyclesRender_PT_geometry(CyclesButtonsPanel, Panel):

        scene = context.scene
        cscene = scene.cycles
-        ccscene = scene.cycles_curves

        if cscene.feature_set == 'EXPERIMENTAL':
            split = layout.split()
@@ -253,25 +252,6 @@ class CyclesRender_PT_geometry(CyclesButtonsPanel, Panel):
            row.prop(cscene, "volume_step_size")
            row.prop(cscene, "volume_max_steps")

-        layout.prop(ccscene, "use_curves", text="Use Hair")
-        col = layout.column()
-        col.active = ccscene.use_curves
-
-        col.prop(ccscene, "primitive", text="Primitive")
-        col.prop(ccscene, "shape", text="Shape")
-
-        if not (ccscene.primitive in {'CURVE_SEGMENTS', 'LINE_SEGMENTS'} and ccscene.shape == 'RIBBONS'):
-            col.prop(ccscene, "cull_backfacing", text="Cull back-faces")
-
-        if ccscene.primitive == 'TRIANGLES' and ccscene.shape == 'THICK':
-            col.prop(ccscene, "resolution", text="Resolution")
-        elif ccscene.primitive == 'CURVE_SEGMENTS':
-            col.prop(ccscene, "subdivisions", text="Curve subdivisions")
-
-        row = col.row()
-        row.prop(ccscene, "minimum_width", text="Min Pixels")
-        row.prop(ccscene, "maximum_width", text="Max Ext.")
-

 class CyclesRender_PT_light_paths(CyclesButtonsPanel, Panel):
    bl_label = "Light Paths"
@@ -432,10 +412,6 @@ class CyclesRender_PT_performance(CyclesButtonsPanel, Panel):
        col.prop(cscene, "debug_use_spatial_splits")
        col.prop(cscene, "debug_use_hair_bvh")

-        row = col.row()
-        row.active = not cscene.debug_use_spatial_splits
-        row.prop(cscene, "debug_bvh_time_steps")
-

 class CyclesRender_PT_layer_options(CyclesButtonsPanel, Panel):
    bl_label = "Layer"
@@ -791,13 +767,10 @@ class CyclesObject_PT_cycles_settings(CyclesButtonsPanel, Panel):
        col = layout.column()
        col.label(text="Performance:")
        row = col.row()
-        sub = row.row()
-        sub.active = scene.render.use_simplify and cscene.use_camera_cull
-        sub.prop(cob, "use_camera_cull")
-
-        sub = row.row()
-        sub.active = scene.render.use_simplify and cscene.use_distance_cull
-        sub.prop(cob, "use_distance_cull")
+        row.active = scene.render.use_simplify and cscene.use_camera_cull
+        row.prop(cob, "use_camera_cull")
+        row.active = scene.render.use_simplify and cscene.use_distance_cull
+        row.prop(cob, "use_distance_cull")


 class CYCLES_OT_use_shading_nodes(Operator):
@@ -1038,11 +1011,10 @@ class CyclesWorld_PT_ambient_occlusion(CyclesButtonsPanel, Panel):
        layout = self.layout

        light = context.world.light_settings
-        scene = context.scene

        row = layout.row()
        sub = row.row()
-        sub.active = light.use_ambient_occlusion or scene.render.use_simplify
+        sub.active = light.use_ambient_occlusion
        sub.prop(light, "ao_factor", text="Factor")
        row.prop(light, "distance", text="Distance")

@@ -1419,6 +1391,43 @@ class CyclesParticle_PT_textures(CyclesButtonsPanel, Panel):
            layout.template_ID(slot, "texture", new="texture.new")


+class CyclesRender_PT_CurveRendering(CyclesButtonsPanel, Panel):
+    bl_label = "Cycles Hair Rendering"
+    bl_context = "particle"
+
+    @classmethod
+    def poll(cls, context):
+        psys = context.particle_system
+        return CyclesButtonsPanel.poll(context) and psys and psys.settings.type == 'HAIR'
+
+    def draw_header(self, context):
+        ccscene = context.scene.cycles_curves
+        self.layout.prop(ccscene, "use_curves", text="")
+
+    def draw(self, context):
+        layout = self.layout
+
+        scene = context.scene
+        ccscene = scene.cycles_curves
+
+        layout.active = ccscene.use_curves
+
+        layout.prop(ccscene, "primitive", text="Primitive")
+        layout.prop(ccscene, "shape", text="Shape")
+
+        if not (ccscene.primitive in {'CURVE_SEGMENTS', 'LINE_SEGMENTS'} and ccscene.shape == 'RIBBONS'):
+            layout.prop(ccscene, "cull_backfacing", text="Cull back-faces")
+
+        if ccscene.primitive == 'TRIANGLES' and ccscene.shape == 'THICK':
+            layout.prop(ccscene, "resolution", text="Resolution")
+        elif ccscene.primitive == 'CURVE_SEGMENTS':
+            layout.prop(ccscene, "subdivisions", text="Curve subdivisions")
+
+        row = layout.row()
+        row.prop(ccscene, "minimum_width", text="Min Pixels")
+        row.prop(ccscene, "maximum_width", text="Max Ext.")
+
+
 class CyclesRender_PT_bake(CyclesButtonsPanel, Panel):
    bl_label = "Bake"
    bl_context = "render"
@@ -1585,7 +1594,6 @@ class CyclesScene_PT_simplify(CyclesButtonsPanel, Panel):
        row.prop(rd, "simplify_subdivision", text="Viewport")
        row.prop(rd, "simplify_subdivision_render", text="Render")

-
        col = layout.column(align=True)
        col.label(text="Child Particles")
        row = col.row(align=True)
@@ -1614,13 +1622,6 @@ class CyclesScene_PT_simplify(CyclesButtonsPanel, Panel):
        row.active = cscene.use_distance_cull
        row.prop(cscene, "distance_cull_margin", text="Distance")

-        split = layout.split()
-        col = split.column()
-        col.prop(cscene, "ao_bounces")
-
-        col = split.column()
-        col.prop(cscene, "ao_bounces_render")
-
 def draw_device(self, context):
    scene = context.scene
    layout = self.layout
--- a/intern/cycles/blender/blender_curves.cpp
+++ b/intern/cycles/blender/blender_curves.cpp
@@ -29,6 +29,24 @@

 CCL_NAMESPACE_BEGIN

+/* Utilities */
+
+/* Hair curve functions */
+
+void curveinterp_v3_v3v3v3v3(float3 *p, float3 *v1, float3 *v2, float3 *v3, float3 *v4, const float w[4]);
+void interp_weights(float t, float data[4]);
+float shaperadius(float shape, float root, float tip, float time);
+void InterpolateKeySegments(int seg, int segno, int key, int curve, float3 *keyloc, float *time, ParticleCurveData *CData);
+bool ObtainCacheParticleUV(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background, int uv_num);
+bool ObtainCacheParticleVcol(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background, int vcol_num);
+bool ObtainCacheParticleData(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background);
+void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CData);
+void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
+                               float3 RotCam, bool is_ortho);
+void ExportCurveTriangleGeometry(Mesh *mesh, ParticleCurveData *CData, int resolution);
+void ExportCurveTriangleUV(ParticleCurveData *CData, int vert_offset, int resol, float3 *uvdata);
+void ExportCurveTriangleVcol(ParticleCurveData *CData, int vert_offset, int resol, uchar4 *cdata);
+
 ParticleCurveData::ParticleCurveData()
 {
 }
@@ -37,7 +55,7 @@ ParticleCurveData::~ParticleCurveData()
 {
 }

-static void interp_weights(float t, float data[4])
+void interp_weights(float t, float data[4])
 {
 	/* Cardinal curve interpolation */
 	float t2 = t * t;
@@ -50,19 +68,17 @@ static void interp_weights(float t, float data[4])
 	data[3] =  fc          * t3  - fc * t2;
 }

-static void curveinterp_v3_v3v3v3v3(float3 *p,
-                                    float3 *v1, float3 *v2, float3 *v3, float3 *v4,
-                                    const float w[4])
+void curveinterp_v3_v3v3v3v3(float3 *p, float3 *v1, float3 *v2, float3 *v3, float3 *v4, const float w[4])
 {
 	p->x = v1->x * w[0] + v2->x * w[1] + v3->x * w[2] + v4->x * w[3];
 	p->y = v1->y * w[0] + v2->y * w[1] + v3->y * w[2] + v4->y * w[3];
 	p->z = v1->z * w[0] + v2->z * w[1] + v3->z * w[2] + v4->z * w[3];
 }

-static float shaperadius(float shape, float root, float tip, float time)
+float shaperadius(float shape, float root, float tip, float time)
 {
 	float radius = 1.0f - time;
-
+	
 	if(shape != 0.0f) {
 		if(shape < 0.0f)
 			radius = powf(radius, 1.0f + shape);
@@ -74,13 +90,7 @@ static float shaperadius(float shape, float root, float tip, float time)

 /* curve functions */

-static void InterpolateKeySegments(int seg,
-                                   int segno,
-                                   int key,
-                                   int curve,
-                                   float3 *keyloc,
-                                   float *time,
-                                   ParticleCurveData *CData)
+void InterpolateKeySegments(int seg, int segno, int key, int curve, float3 *keyloc, float *time, ParticleCurveData *CData)
 {
 	float3 ckey_loc1 = CData->curvekey_co[key];
 	float3 ckey_loc2 = ckey_loc1;
@@ -109,11 +119,7 @@ static void InterpolateKeySegments(int seg,
 		curveinterp_v3_v3v3v3v3(keyloc, &ckey_loc1, &ckey_loc2, &ckey_loc3, &ckey_loc4, t);
 }

-static bool ObtainCacheParticleData(Mesh *mesh,
-                                    BL::Mesh *b_mesh,
-                                    BL::Object *b_ob,
-                                    ParticleCurveData *CData,
-                                    bool background)
+bool ObtainCacheParticleData(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background)
 {
 	int curvenum = 0;
 	int keyno = 0;
@@ -137,7 +143,7 @@ static bool ObtainCacheParticleData(Mesh *mesh,
 				int totparts = b_psys.particles.length();
 				int totchild = background ? b_psys.child_particles.length() : (int)((float)b_psys.child_particles.length() * (float)b_part.draw_percentage() / 100.0f);
 				int totcurves = totchild;
-
+				
 				if(b_part.child_type() == 0 || totchild == 0)
 					totcurves += totparts;

@@ -155,7 +161,7 @@ static bool ObtainCacheParticleData(Mesh *mesh,
 				CData->psys_shader.push_back_slow(shader);

 				float radius = get_float(cpsys, "radius_scale") * 0.5f;
-
+	
 				CData->psys_rootradius.push_back_slow(radius * get_float(cpsys, "root_width"));
 				CData->psys_tipradius.push_back_slow(radius * get_float(cpsys, "tip_width"));
 				CData->psys_shape.push_back_slow(get_float(cpsys, "shape"));
@@ -175,7 +181,7 @@ static bool ObtainCacheParticleData(Mesh *mesh,
 				for(; pa_no < totparts+totchild; pa_no++) {
 					int keynum = 0;
 					CData->curve_firstkey.push_back_slow(keyno);
-
+					
 					float curve_length = 0.0f;
 					float3 pcKey;
 					for(int step_no = 0; step_no < ren_step; step_no++) {
@@ -207,12 +213,7 @@ static bool ObtainCacheParticleData(Mesh *mesh,
 	return true;
 }

-static bool ObtainCacheParticleUV(Mesh *mesh,
-                                  BL::Mesh *b_mesh,
-                                  BL::Object *b_ob,
-                                  ParticleCurveData *CData,
-                                  bool background,
-                                  int uv_num)
+bool ObtainCacheParticleUV(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background, int uv_num)
 {
 	if(!(mesh && b_mesh && b_ob && CData))
 		return false;
@@ -230,7 +231,7 @@ static bool ObtainCacheParticleUV(Mesh *mesh,
 				int totparts = b_psys.particles.length();
 				int totchild = background ? b_psys.child_particles.length() : (int)((float)b_psys.child_particles.length() * (float)b_part.draw_percentage() / 100.0f);
 				int totcurves = totchild;
-
+				
 				if(b_part.child_type() == 0 || totchild == 0)
 					totcurves += totparts;

@@ -266,12 +267,7 @@ static bool ObtainCacheParticleUV(Mesh *mesh,
 	return true;
 }

-static bool ObtainCacheParticleVcol(Mesh *mesh,
-                                    BL::Mesh *b_mesh,
-                                    BL::Object *b_ob,
-                                    ParticleCurveData *CData,
-                                    bool background,
-                                    int vcol_num)
+bool ObtainCacheParticleVcol(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background, int vcol_num)
 {
 	if(!(mesh && b_mesh && b_ob && CData))
 		return false;
@@ -289,7 +285,7 @@ static bool ObtainCacheParticleVcol(Mesh *mesh,
 				int totparts = b_psys.particles.length();
 				int totchild = background ? b_psys.child_particles.length() : (int)((float)b_psys.child_particles.length() * (float)b_part.draw_percentage() / 100.0f);
 				int totcurves = totchild;
-
+				
 				if(b_part.child_type() == 0 || totchild == 0)
 					totcurves += totparts;

@@ -337,16 +333,16 @@ static void set_resolution(BL::Object *b_ob, BL::Scene *scene, bool render)
 	}
 }

-static void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
-                                      float3 RotCam, bool is_ortho)
+void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
+                               float3 RotCam, bool is_ortho)
 {
 	int vertexno = mesh->verts.size();
 	int vertexindex = vertexno;
 	int numverts = 0, numtris = 0;

 	/* compute and reserve size of arrays */
-	for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
-		for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
+	for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
+		for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
 			if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
 				continue;

@@ -358,8 +354,8 @@ static void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
 	mesh->reserve_mesh(mesh->verts.size() + numverts, mesh->num_triangles() + numtris);

 	/* actually export */
-	for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
-		for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
+	for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
+		for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
 			if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
 				continue;

@@ -384,7 +380,7 @@ static void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,

 				if(curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1)
 					v1 = CData->curvekey_co[curvekey] - CData->curvekey_co[max(curvekey - 1, CData->curve_firstkey[curve])];
-				else
+				else 
 					v1 = CData->curvekey_co[curvekey + 1] - CData->curvekey_co[curvekey - 1];

 				time = CData->curvekey_time[curvekey]/CData->curve_length[curve];
@@ -420,17 +416,15 @@ static void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
 	/* texture coords still needed */
 }

-static void ExportCurveTriangleGeometry(Mesh *mesh,
-                                        ParticleCurveData *CData,
-                                        int resolution)
+void ExportCurveTriangleGeometry(Mesh *mesh, ParticleCurveData *CData, int resolution)
 {
 	int vertexno = mesh->verts.size();
 	int vertexindex = vertexno;
 	int numverts = 0, numtris = 0;

 	/* compute and reserve size of arrays */
-	for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
-		for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
+	for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
+		for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
 			if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
 				continue;

@@ -442,8 +436,8 @@ static void ExportCurveTriangleGeometry(Mesh *mesh,
 	mesh->reserve_mesh(mesh->verts.size() + numverts, mesh->num_triangles() + numtris);

 	/* actually export */
-	for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
-		for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
+	for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
+		for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
 			if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
 				continue;

@@ -554,7 +548,7 @@ static void ExportCurveTriangleGeometry(Mesh *mesh,
 	/* texture coords still needed */
 }

-static void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CData)
+void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CData)
 {
 	int num_keys = 0;
 	int num_curves = 0;
@@ -563,13 +557,13 @@ static void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CDa
 		return;

 	Attribute *attr_intercept = NULL;
-
+	
 	if(mesh->need_attribute(scene, ATTR_STD_CURVE_INTERCEPT))
 		attr_intercept = mesh->curve_attributes.add(ATTR_STD_CURVE_INTERCEPT);

 	/* compute and reserve size of arrays */
-	for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
-		for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
+	for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
+		for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
 			if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
 				continue;

@@ -588,8 +582,8 @@ static void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CDa
 	num_curves = 0;

 	/* actually export */
-	for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
-		for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
+	for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
+		for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
 			if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
 				continue;

@@ -683,13 +677,8 @@ static void ExportCurveSegmentsMotion(Mesh *mesh, ParticleCurveData *CData, int
 	/* in case of new attribute, we verify if there really was any motion */
 	if(new_attribute) {
 		if(i != numkeys || !have_motion) {
-			/* No motion or hair "topology" changed, remove attributes again. */
-			if(i != numkeys) {
-				VLOG(1) << "Hair topology changed, removing attribute.";
-			}
-			else {
-				VLOG(1) << "No motion, removing attribute.";
-			}
+			/* no motion, remove attributes again */
+			VLOG(1) << "No motion, removing attribute";
 			mesh->curve_attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION);
 		}
 		else if(time_index > 0) {
@@ -709,10 +698,7 @@ static void ExportCurveSegmentsMotion(Mesh *mesh, ParticleCurveData *CData, int
 	}
 }

-static void ExportCurveTriangleUV(ParticleCurveData *CData,
-                                  int vert_offset,
-                                  int resol,
-                                  float3 *uvdata)
+void ExportCurveTriangleUV(ParticleCurveData *CData, int vert_offset, int resol, float3 *uvdata)
 {
 	if(uvdata == NULL)
 		return;
@@ -722,8 +708,8 @@ static void ExportCurveTriangleUV(ParticleCurveData *CData,

 	int vertexindex = vert_offset;

-	for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
-		for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
+	for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
+		for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
 			if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
 				continue;

@@ -757,18 +743,15 @@ static void ExportCurveTriangleUV(ParticleCurveData *CData,
 	}
 }

-static void ExportCurveTriangleVcol(ParticleCurveData *CData,
-                                    int vert_offset,
-                                    int resol,
-                                    uchar4 *cdata)
+void ExportCurveTriangleVcol(ParticleCurveData *CData, int vert_offset, int resol, uchar4 *cdata)
 {
 	if(cdata == NULL)
 		return;

 	int vertexindex = vert_offset;

-	for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
-		for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
+	for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
+		for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
 			if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
 				continue;

@@ -1061,3 +1044,4 @@ void BlenderSync::sync_curves(Mesh *mesh,
 }

 CCL_NAMESPACE_END
+
--- a/intern/cycles/blender/blender_mesh.cpp
+++ b/intern/cycles/blender/blender_mesh.cpp
@@ -597,8 +597,8 @@ static void create_mesh(Scene *scene,
                        Mesh *mesh,
                        BL::Mesh& b_mesh,
                        const vector<Shader*>& used_shaders,
-                        bool subdivision = false,
-                        bool subdivide_uvs = true)
+                        bool subdivision=false,
+                        bool subdivide_uvs=true)
 {
 	/* count vertices and faces */
 	int numverts = b_mesh.vertices.length();
@@ -671,10 +671,28 @@ static void create_mesh(Scene *scene,
 			int shader = clamp(f->material_index(), 0, used_shaders.size()-1);
 			bool smooth = f->use_smooth() || use_loop_normals;

-			/* Create triangles.
+			/* split vertices if normal is different
 			 *
-			 * NOTE: Autosmooth is already taken care about.
-			 */
+			 * note all vertex attributes must have been set here so we can split
+			 * and copy attributes in split_vertex without remapping later */
+			if(use_loop_normals) {
+				BL::Array<float, 12> loop_normals = f->split_normals();
+
+				for(int i = 0; i < n; i++) {
+					float3 loop_N = make_float3(loop_normals[i * 3], loop_normals[i * 3 + 1], loop_normals[i * 3 + 2]);
+
+					if(N[vi[i]] != loop_N) {
+						int new_vi = mesh->split_vertex(vi[i]);
+
+						/* set new normal and vertex index */
+						N = attr_N->data_float3();
+						N[new_vi] = loop_N;
+						vi[i] = new_vi;
+					}
+				}
+			}
+
+			/* create triangles */
 			if(n == 4) {
 				if(is_zero(cross(mesh->verts[vi[1]] - mesh->verts[vi[0]], mesh->verts[vi[2]] - mesh->verts[vi[0]])) ||
 				   is_zero(cross(mesh->verts[vi[2]] - mesh->verts[vi[0]], mesh->verts[vi[3]] - mesh->verts[vi[0]])))
@@ -706,8 +724,24 @@ static void create_mesh(Scene *scene,

 			vi.reserve(n);
 			for(int i = 0; i < n; i++) {
-				/* NOTE: Autosmooth is already taken care about. */
 				vi[i] = b_mesh.loops[p->loop_start() + i].vertex_index();
+
+				/* split vertices if normal is different
+				 *
+				 * note all vertex attributes must have been set here so we can split
+				 * and copy attributes in split_vertex without remapping later */
+				if(use_loop_normals) {
+					float3 loop_N = get_float3(b_mesh.loops[p->loop_start() + i].normal());
+
+					if(N[vi[i]] != loop_N) {
+						int new_vi = mesh->split_vertex(vi[i]);
+
+						/* set new normal and vertex index */
+						N = attr_N->data_float3();
+						N[new_vi] = loop_N;
+						vi[i] = new_vi;
+					}
+				}
 			}

 			/* create subd faces */
@@ -927,20 +961,7 @@ Mesh *BlenderSync::sync_mesh(BL::Object& b_ob,

 		mesh->subdivision_type = object_subdivision_type(b_ob, preview, experimental);

-		/* Disable adaptive subdivision while baking as the baking system
-		 * currently doesnt support the topology and will crash.
-		 */
-		if(scene->bake_manager->get_baking()) {
-			mesh->subdivision_type = Mesh::SUBDIVISION_NONE;
-		}
-
-		BL::Mesh b_mesh = object_to_mesh(b_data,
-		                                 b_ob,
-		                                 b_scene,
-		                                 true,
-		                                 !preview,
-		                                 need_undeformed,
-		                                 mesh->subdivision_type);
+		BL::Mesh b_mesh = object_to_mesh(b_data, b_ob, b_scene, true, !preview, need_undeformed, mesh->subdivision_type);

 		if(b_mesh) {
 			if(render_layer.use_surfaces && !hide_tris) {
@@ -1065,13 +1086,7 @@ void BlenderSync::sync_mesh_motion(BL::Object& b_ob,

 	if(ccl::BKE_object_is_deform_modified(b_ob, b_scene, preview)) {
 		/* get derived mesh */
-		b_mesh = object_to_mesh(b_data,
-		                        b_ob,
-		                        b_scene,
-		                        true,
-		                        !preview,
-		                        false,
-		                        Mesh::SUBDIVISION_NONE);
+		b_mesh = object_to_mesh(b_data, b_ob, b_scene, true, !preview, false, false);
 	}

 	if(!b_mesh) {
@@ -1142,12 +1157,10 @@ void BlenderSync::sync_mesh_motion(BL::Object& b_ob,
 			{
 				/* no motion, remove attributes again */
 				if(b_mesh.vertices.length() != numverts) {
-					VLOG(1) << "Topology differs, disabling motion blur for object "
-					        << b_ob.name();
+					VLOG(1) << "Topology differs, disabling motion blur.";
 				}
 				else {
-					VLOG(1) << "No actual deformation motion for object "
-					        << b_ob.name();
+					VLOG(1) << "No actual deformation motion for object " << b_ob.name();
 				}
 				mesh->attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION);
 				if(attr_mN)
--- a/intern/cycles/blender/blender_object.cpp
+++ b/intern/cycles/blender/blender_object.cpp
@@ -25,7 +25,6 @@
 #include "particles.h"
 #include "shader.h"

-#include "blender_object_cull.h"
 #include "blender_sync.h"
 #include "blender_util.h"

@@ -89,6 +88,143 @@ static uint object_ray_visibility(BL::Object& b_ob)
 	return flag;
 }

+/* Culling */
+
+class BlenderObjectCulling
+{
+public:
+	BlenderObjectCulling(Scene *scene, BL::Scene& b_scene)
+	: use_scene_camera_cull(false),
+	  use_camera_cull(false),
+	  camera_cull_margin(0.0f),
+	  use_scene_distance_cull(false),
+	  use_distance_cull(false),
+	  distance_cull_margin(0.0f)
+	{
+		if(b_scene.render().use_simplify()) {
+			PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+
+			use_scene_camera_cull = scene->camera->type != CAMERA_PANORAMA &&
+									!b_scene.render().use_multiview() &&
+									get_boolean(cscene, "use_camera_cull");
+			use_scene_distance_cull = scene->camera->type != CAMERA_PANORAMA &&
+									  !b_scene.render().use_multiview() &&
+									  get_boolean(cscene, "use_distance_cull");
+
+			camera_cull_margin = get_float(cscene, "camera_cull_margin");
+			distance_cull_margin = get_float(cscene, "distance_cull_margin");
+
+			if (distance_cull_margin == 0.0f) {
+				use_scene_distance_cull = false;
+			}
+		}
+	}
+
+	void init_object(Scene *scene, BL::Object& b_ob)
+	{
+		if(!use_scene_camera_cull && !use_scene_distance_cull) {
+			return;
+		}
+
+		PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
+
+		use_camera_cull = use_scene_camera_cull && get_boolean(cobject, "use_camera_cull");
+		use_distance_cull = use_scene_distance_cull && get_boolean(cobject, "use_distance_cull");
+
+		if(use_camera_cull || use_distance_cull) {
+			/* Need to have proper projection matrix. */
+			scene->camera->update();
+		}
+	}
+
+	bool test(Scene *scene, BL::Object& b_ob, Transform& tfm)
+	{
+		if(!use_camera_cull && !use_distance_cull) {
+			return false;
+		}
+
+		/* Compute world space bounding box corners. */
+		float3 bb[8];
+		BL::Array<float, 24> boundbox = b_ob.bound_box();
+		for(int i = 0; i < 8; ++i) {
+			float3 p = make_float3(boundbox[3 * i + 0],
+								   boundbox[3 * i + 1],
+								   boundbox[3 * i + 2]);
+			bb[i] = transform_point(&tfm, p);
+		}
+
+		bool camera_culled = use_camera_cull && test_camera(scene, bb);
+		bool distance_culled = use_distance_cull && test_distance(scene, bb);
+
+		return ((camera_culled && distance_culled) ||
+		        (camera_culled && !use_distance_cull) ||
+		        (distance_culled && !use_camera_cull));
+	}
+
+private:
+	/* TODO(sergey): Not really optimal, consider approaches based on k-DOP in order
+	 * to reduce number of objects which are wrongly considered visible.
+	 */
+	bool test_camera(Scene *scene, float3 bb[8])
+	{
+		Camera *cam = scene->camera;
+		Transform& worldtondc = cam->worldtondc;
+		float3 bb_min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX),
+			   bb_max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
+		bool all_behind = true;
+		for(int i = 0; i < 8; ++i) {
+			float3 p = bb[i];
+			float4 b = make_float4(p.x, p.y, p.z, 1.0f);
+			float4 c = make_float4(dot(worldtondc.x, b),
+			                       dot(worldtondc.y, b),
+			                       dot(worldtondc.z, b),
+			                       dot(worldtondc.w, b));
+			p = float4_to_float3(c / c.w);
+			if(c.z < 0.0f) {
+				p.x = 1.0f - p.x;
+				p.y = 1.0f - p.y;
+			}
+			if(c.z >= -camera_cull_margin) {
+				all_behind = false;
+			}
+			bb_min = min(bb_min, p);
+			bb_max = max(bb_max, p);
+		}
+		if(all_behind) {
+			return true;
+		}
+		return (bb_min.x >= 1.0f + camera_cull_margin ||
+		        bb_min.y >= 1.0f + camera_cull_margin ||
+		        bb_max.x <= -camera_cull_margin ||
+		        bb_max.y <= -camera_cull_margin);
+	}
+
+	bool test_distance(Scene *scene, float3 bb[8])
+	{
+		float3 camera_position = transform_get_column(&scene->camera->matrix, 3);
+		float3 bb_min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX),
+			   bb_max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
+
+		/* Find min & max points for x & y & z on bounding box */
+		for(int i = 0; i < 8; ++i) {
+			float3 p = bb[i];
+			bb_min = min(bb_min, p);
+			bb_max = max(bb_max, p);
+		}
+
+		float3 closest_point = max(min(bb_max,camera_position),bb_min);
+		return (len_squared(camera_position - closest_point) >
+		        distance_cull_margin * distance_cull_margin);
+	}
+
+	bool use_scene_camera_cull;
+	bool use_camera_cull;
+	float camera_cull_margin;
+	bool use_scene_distance_cull;
+	bool use_distance_cull;
+	float distance_cull_margin;
+};
+
 /* Light */

 void BlenderSync::sync_light(BL::Object& b_parent,
--- a/intern/cycles/blender/blender_object_cull.cpp
+++ b/intern/cycles/blender/blender_object_cull.cpp
@@ -1,149 +0,0 @@
-/*
- * Copyright 2011-2016 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cstdlib>
-
-#include "camera.h"
-
-#include "blender_object_cull.h"
-
-CCL_NAMESPACE_BEGIN
-
-BlenderObjectCulling::BlenderObjectCulling(Scene *scene, BL::Scene& b_scene)
-        : use_scene_camera_cull_(false),
-          use_camera_cull_(false),
-          camera_cull_margin_(0.0f),
-          use_scene_distance_cull_(false),
-          use_distance_cull_(false),
-          distance_cull_margin_(0.0f)
-{
-	if(b_scene.render().use_simplify()) {
-		PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
-
-		use_scene_camera_cull_ = scene->camera->type != CAMERA_PANORAMA &&
-		                         !b_scene.render().use_multiview() &&
-		                         get_boolean(cscene, "use_camera_cull");
-		use_scene_distance_cull_ = scene->camera->type != CAMERA_PANORAMA &&
-		                           !b_scene.render().use_multiview() &&
-		                           get_boolean(cscene, "use_distance_cull");
-
-		camera_cull_margin_ = get_float(cscene, "camera_cull_margin");
-		distance_cull_margin_ = get_float(cscene, "distance_cull_margin");
-
-		if(distance_cull_margin_ == 0.0f) {
-			use_scene_distance_cull_ = false;
-		}
-	}
-}
-
-void BlenderObjectCulling::init_object(Scene *scene, BL::Object& b_ob)
-{
-	if(!use_scene_camera_cull_ && !use_scene_distance_cull_) {
-		return;
-	}
-
-	PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
-
-	use_camera_cull_ = use_scene_camera_cull_ && get_boolean(cobject, "use_camera_cull");
-	use_distance_cull_ = use_scene_distance_cull_ && get_boolean(cobject, "use_distance_cull");
-
-	if(use_camera_cull_ || use_distance_cull_) {
-		/* Need to have proper projection matrix. */
-		scene->camera->update();
-	}
-}
-
-bool BlenderObjectCulling::test(Scene *scene, BL::Object& b_ob, Transform& tfm)
-{
-	if(!use_camera_cull_ && !use_distance_cull_) {
-		return false;
-	}
-
-	/* Compute world space bounding box corners. */
-	float3 bb[8];
-	BL::Array<float, 24> boundbox = b_ob.bound_box();
-	for(int i = 0; i < 8; ++i) {
-		float3 p = make_float3(boundbox[3 * i + 0],
-		                       boundbox[3 * i + 1],
-		                       boundbox[3 * i + 2]);
-		bb[i] = transform_point(&tfm, p);
-	}
-
-	bool camera_culled = use_camera_cull_ && test_camera(scene, bb);
-	bool distance_culled = use_distance_cull_ && test_distance(scene, bb);
-
-	return ((camera_culled && distance_culled) ||
-	        (camera_culled && !use_distance_cull_) ||
-	        (distance_culled && !use_camera_cull_));
-}
-
-/* TODO(sergey): Not really optimal, consider approaches based on k-DOP in order
- * to reduce number of objects which are wrongly considered visible.
- */
-bool BlenderObjectCulling::test_camera(Scene *scene, float3 bb[8])
-{
-	Camera *cam = scene->camera;
-	Transform& worldtondc = cam->worldtondc;
-	float3 bb_min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX),
-	       bb_max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
-	bool all_behind = true;
-	for(int i = 0; i < 8; ++i) {
-		float3 p = bb[i];
-		float4 b = make_float4(p.x, p.y, p.z, 1.0f);
-		float4 c = make_float4(dot(worldtondc.x, b),
-		                       dot(worldtondc.y, b),
-		                       dot(worldtondc.z, b),
-		                       dot(worldtondc.w, b));
-		p = float4_to_float3(c / c.w);
-		if(c.z < 0.0f) {
-			p.x = 1.0f - p.x;
-			p.y = 1.0f - p.y;
-		}
-		if(c.z >= -camera_cull_margin_) {
-			all_behind = false;
-		}
-		bb_min = min(bb_min, p);
-		bb_max = max(bb_max, p);
-	}
-	if(all_behind) {
-		return true;
-	}
-	return (bb_min.x >= 1.0f + camera_cull_margin_ ||
-	        bb_min.y >= 1.0f + camera_cull_margin_ ||
-	        bb_max.x <= -camera_cull_margin_ ||
-	        bb_max.y <= -camera_cull_margin_);
-}
-
-bool BlenderObjectCulling::test_distance(Scene *scene, float3 bb[8])
-{
-	float3 camera_position = transform_get_column(&scene->camera->matrix, 3);
-	float3 bb_min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX),
-	       bb_max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
-
-	/* Find min & max points for x & y & z on bounding box */
-	for(int i = 0; i < 8; ++i) {
-		float3 p = bb[i];
-		bb_min = min(bb_min, p);
-		bb_max = max(bb_max, p);
-	}
-
-	float3 closest_point = max(min(bb_max,camera_position),bb_min);
-	return (len_squared(camera_position - closest_point) >
-	        distance_cull_margin_ * distance_cull_margin_);
-}
-
-CCL_NAMESPACE_END
-
--- a/intern/cycles/blender/blender_object_cull.h
+++ b/intern/cycles/blender/blender_object_cull.h
@@ -1,49 +0,0 @@
-/*
- * Copyright 2011-2016 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __BLENDER_OBJECT_CULL_H__
-#define __BLENDER_OBJECT_CULL_H__
-
-#include "blender_sync.h"
-#include "util_types.h"
-
-CCL_NAMESPACE_BEGIN
-
-class Scene;
-
-class BlenderObjectCulling
-{
-public:
-	BlenderObjectCulling(Scene *scene, BL::Scene& b_scene);
-
-	void init_object(Scene *scene, BL::Object& b_ob);
-	bool test(Scene *scene, BL::Object& b_ob, Transform& tfm);
-
-private:
-	bool test_camera(Scene *scene, float3 bb[8]);
-	bool test_distance(Scene *scene, float3 bb[8]);
-
-	bool use_scene_camera_cull_;
-	bool use_camera_cull_;
-	float camera_cull_margin_;
-	bool use_scene_distance_cull_;
-	bool use_distance_cull_;
-	float distance_cull_margin_;
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __BLENDER_OBJECT_CULL_H__ */
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -126,8 +126,8 @@ void BlenderSession::create_session()

 	/* setup callbacks for builtin image support */
 	scene->image_manager->builtin_image_info_cb = function_bind(&BlenderSession::builtin_image_info, this, _1, _2, _3, _4, _5, _6, _7);
-	scene->image_manager->builtin_image_pixels_cb = function_bind(&BlenderSession::builtin_image_pixels, this, _1, _2, _3, _4);
-	scene->image_manager->builtin_image_float_pixels_cb = function_bind(&BlenderSession::builtin_image_float_pixels, this, _1, _2, _3, _4);
+	scene->image_manager->builtin_image_pixels_cb = function_bind(&BlenderSession::builtin_image_pixels, this, _1, _2, _3);
+	scene->image_manager->builtin_image_float_pixels_cb = function_bind(&BlenderSession::builtin_image_float_pixels, this, _1, _2, _3);

 	/* create session */
 	session = new Session(session_params);
@@ -305,16 +305,12 @@ static PassType get_pass_type(BL::RenderPass& b_pass)
 #ifdef WITH_CYCLES_DEBUG
 		case BL::RenderPass::type_DEBUG:
 		{
-			switch(b_pass.debug_type()) {
-				case BL::RenderPass::debug_type_BVH_TRAVERSED_NODES:
-					return PASS_BVH_TRAVERSED_NODES;
-				case BL::RenderPass::debug_type_BVH_TRAVERSED_INSTANCES:
-					return PASS_BVH_TRAVERSED_INSTANCES;
-				case BL::RenderPass::debug_type_BVH_INTERSECTIONS:
-					return PASS_BVH_INTERSECTIONS;
-				case BL::RenderPass::debug_type_RAY_BOUNCES:
-					return PASS_RAY_BOUNCES;
-			}
+			if(b_pass.debug_type() == BL::RenderPass::debug_type_BVH_TRAVERSAL_STEPS)
+				return PASS_BVH_TRAVERSAL_STEPS;
+			if(b_pass.debug_type() == BL::RenderPass::debug_type_BVH_TRAVERSED_INSTANCES)
+				return PASS_BVH_TRAVERSED_INSTANCES;
+			if(b_pass.debug_type() == BL::RenderPass::debug_type_RAY_BOUNCES)
+				return PASS_RAY_BOUNCES;
 			break;
 		}
 #endif
@@ -584,7 +580,7 @@ static void populate_bake_data(BakeData *data, const
 	BL::BakePixel bp = pixel_array;

 	int i;
-	for(i = 0; i < num_pixels; i++) {
+	for(i=0; i < num_pixels; i++) {
 		if(bp.object_id() == object_id) {
 			data->set(i, bp.primitive_id(), bp.uv(), bp.du_dx(), bp.du_dy(), bp.dv_dx(), bp.dv_dy());
 		} else {
@@ -934,13 +930,38 @@ void BlenderSession::get_status(string& status, string& substatus)

 void BlenderSession::get_progress(float& progress, double& total_time, double& render_time)
 {
-	session->progress.get_time(total_time, render_time);
-	progress = session->progress.get_progress();
+	double tile_time;
+	int tile, sample, samples_per_tile;
+	int tile_total = session->tile_manager.state.num_tiles;
+	int samples = session->tile_manager.state.sample + 1;
+	int total_samples = session->tile_manager.get_num_effective_samples();
+
+	session->progress.get_tile(tile, total_time, render_time, tile_time);
+
+	sample = session->progress.get_sample();
+	samples_per_tile = session->tile_manager.get_num_effective_samples();
+
+	if(background && samples_per_tile && tile_total)
+		progress = ((float)sample / (float)(tile_total * samples_per_tile));
+	else if(!background && samples > 0 && total_samples != INT_MAX)
+		progress = ((float)samples) / total_samples;
+	else
+		progress = 0.0;
 }

 void BlenderSession::update_bake_progress()
 {
-	float progress = session->progress.get_progress();
+	float progress;
+	int sample, samples_per_task, parts_total;
+
+	sample = session->progress.get_sample();
+	samples_per_task = scene->bake_manager->num_samples;
+	parts_total = scene->bake_manager->num_parts;
+
+	if(samples_per_task)
+		progress = ((float)sample / (float)(parts_total * samples_per_task));
+	else
+		progress = 0.0;

 	if(progress != last_progress) {
 		b_engine.update_progress(progress);
@@ -1059,13 +1080,7 @@ int BlenderSession::builtin_image_frame(const string &builtin_name)
 	return atoi(builtin_name.substr(last + 1, builtin_name.size() - last - 1).c_str());
 }

-void BlenderSession::builtin_image_info(const string &builtin_name,
-                                        void *builtin_data,
-                                        bool &is_float,
-                                        int &width,
-                                        int &height,
-                                        int &depth,
-                                        int &channels)
+void BlenderSession::builtin_image_info(const string &builtin_name, void *builtin_data, bool &is_float, int &width, int &height, int &depth, int &channels)
 {
 	/* empty image */
 	is_float = false;
@@ -1143,67 +1158,60 @@ void BlenderSession::builtin_image_info(const string &builtin_name,
 	}
 }

-bool BlenderSession::builtin_image_pixels(const string &builtin_name,
-                                          void *builtin_data,
-                                          unsigned char *pixels,
-                                          const size_t pixels_size)
+bool BlenderSession::builtin_image_pixels(const string &builtin_name, void *builtin_data, unsigned char *pixels)
 {
-	if(!builtin_data) {
+	if(!builtin_data)
 		return false;
-	}

-	const int frame = builtin_image_frame(builtin_name);
+	int frame = builtin_image_frame(builtin_name);

 	PointerRNA ptr;
 	RNA_id_pointer_create((ID*)builtin_data, &ptr);
 	BL::Image b_image(ptr);

-	const int width = b_image.size()[0];
-	const int height = b_image.size()[1];
-	const int channels = b_image.channels();
+	int width = b_image.size()[0];
+	int height = b_image.size()[1];
+	int channels = b_image.channels();

-	unsigned char *image_pixels = image_get_pixels_for_frame(b_image, frame);
-	const size_t num_pixels = ((size_t)width) * height;
+	unsigned char *image_pixels;
+	image_pixels = image_get_pixels_for_frame(b_image, frame);
+	size_t num_pixels = ((size_t)width) * height;

-	if(image_pixels && num_pixels * channels == pixels_size) {
-		memcpy(pixels, image_pixels, pixels_size * sizeof(unsigned char));
+	if(image_pixels) {
+		memcpy(pixels, image_pixels, num_pixels * channels * sizeof(unsigned char));
 		MEM_freeN(image_pixels);
 	}
 	else {
 		if(channels == 1) {
-			memset(pixels, 0, pixels_size * sizeof(unsigned char));
+			memset(pixels, 0, num_pixels * sizeof(unsigned char));
 		}
 		else {
-			const size_t num_pixels_safe = pixels_size / channels;
 			unsigned char *cp = pixels;
-			for(size_t i = 0; i < num_pixels_safe; i++, cp += channels) {
+			for(size_t i = 0; i < num_pixels; i++, cp += channels) {
 				cp[0] = 255;
 				cp[1] = 0;
 				cp[2] = 255;
-				if(channels == 4) {
+				if(channels == 4)
 					cp[3] = 255;
-				}
 			}
 		}
 	}
-	/* Premultiply, byte images are always straight for Blender. */
+
+	/* premultiply, byte images are always straight for blender */
 	unsigned char *cp = pixels;
 	for(size_t i = 0; i < num_pixels; i++, cp += channels) {
 		cp[0] = (cp[0] * cp[3]) >> 8;
 		cp[1] = (cp[1] * cp[3]) >> 8;
 		cp[2] = (cp[2] * cp[3]) >> 8;
 	}
+
 	return true;
 }

-bool BlenderSession::builtin_image_float_pixels(const string &builtin_name,
-                                                void *builtin_data,
-                                                float *pixels,
-                                                const size_t pixels_size)
+bool BlenderSession::builtin_image_float_pixels(const string &builtin_name, void *builtin_data, float *pixels)
 {
-	if(!builtin_data) {
+	if(!builtin_data)
 		return false;
-	}

 	PointerRNA ptr;
 	RNA_id_pointer_create((ID*)builtin_data, &ptr);
@@ -1214,16 +1222,16 @@ bool BlenderSession::builtin_image_float_pixels(const string &builtin_name,
 		BL::Image b_image(b_id);
 		int frame = builtin_image_frame(builtin_name);

-		const int width = b_image.size()[0];
-		const int height = b_image.size()[1];
-		const int channels = b_image.channels();
+		int width = b_image.size()[0];
+		int height = b_image.size()[1];
+		int channels = b_image.channels();

 		float *image_pixels;
 		image_pixels = image_get_float_pixels_for_frame(b_image, frame);
-		const size_t num_pixels = ((size_t)width) * height;
+		size_t num_pixels = ((size_t)width) * height;

-		if(image_pixels && num_pixels * channels == pixels_size) {
-			memcpy(pixels, image_pixels, pixels_size * sizeof(float));
+		if(image_pixels) {
+			memcpy(pixels, image_pixels, num_pixels * channels * sizeof(float));
 			MEM_freeN(image_pixels);
 		}
 		else {
@@ -1231,15 +1239,13 @@ bool BlenderSession::builtin_image_float_pixels(const string &builtin_name,
 				memset(pixels, 0, num_pixels * sizeof(float));
 			}
 			else {
-				const size_t num_pixels_safe = pixels_size / channels;
 				float *fp = pixels;
-				for(int i = 0; i < num_pixels_safe; i++, fp += channels) {
+				for(int i = 0; i < num_pixels; i++, fp += channels) {
 					fp[0] = 1.0f;
 					fp[1] = 0.0f;
 					fp[2] = 1.0f;
-					if(channels == 4) {
+					if(channels == 4)
 						fp[3] = 1.0f;
-					}
 				}
 			}
 		}
@@ -1251,9 +1257,8 @@ bool BlenderSession::builtin_image_float_pixels(const string &builtin_name,
 		BL::Object b_ob(b_id);
 		BL::SmokeDomainSettings b_domain = object_smoke_domain_find(b_ob);

-		if(!b_domain) {
+		if(!b_domain)
 			return false;
-		}

 		int3 resolution = get_int3(b_domain.domain_resolution());
 		int length, amplify = (b_domain.use_high_resolution())? b_domain.amplify() + 1: 1;
@@ -1265,10 +1270,10 @@ bool BlenderSession::builtin_image_float_pixels(const string &builtin_name,
 			amplify = 1;
 		}

-		const int width = resolution.x * amplify;
-		const int height = resolution.y * amplify;
-		const int depth = resolution.z * amplify;
-		const size_t num_pixels = ((size_t)width) * height * depth;
+		int width = resolution.x * amplify;
+		int height = resolution.y * amplify;
+		int depth = resolution.z * amplify;
+		size_t num_pixels = ((size_t)width) * height * depth;

 		if(builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_DENSITY)) {
 			SmokeDomainSettings_density_grid_get_length(&b_domain.ptr, &length);
@@ -1352,9 +1357,6 @@ void BlenderSession::update_resumable_tile_manager(int num_samples)
 	VLOG(1) << "Samples range start is " << range_start_sample << ", "
 	        << "number of samples to render is " << range_num_samples;

-	scene->integrator->start_sample = range_start_sample;
-	scene->integrator->tag_update(scene);
-
 	session->tile_manager.range_start_sample = range_start_sample;
 	session->tile_manager.range_num_samples = range_num_samples;
 }
--- a/intern/cycles/blender/blender_session.h
+++ b/intern/cycles/blender/blender_session.h
@@ -145,21 +145,9 @@ protected:
 	void do_write_update_render_tile(RenderTile& rtile, bool do_update_only);

 	int builtin_image_frame(const string &builtin_name);
-	void builtin_image_info(const string &builtin_name,
-	                        void *builtin_data,
-	                        bool &is_float,
-	                        int &width,
-	                        int &height,
-	                        int &depth,
-	                        int &channels);
-	bool builtin_image_pixels(const string &builtin_name,
-	                          void *builtin_data,
-	                          unsigned char *pixels,
-	                          const size_t pixels_size);
-	bool builtin_image_float_pixels(const string &builtin_name,
-	                                void *builtin_data,
-	                                float *pixels,
-	                                const size_t pixels_size);
+	void builtin_image_info(const string &builtin_name, void *builtin_data, bool &is_float, int &width, int &height, int &depth, int &channels);
+	bool builtin_image_pixels(const string &builtin_name, void *builtin_data, unsigned char *pixels);
+	bool builtin_image_float_pixels(const string &builtin_name, void *builtin_data, float *pixels);

 	/* Update tile manager to reflect resumable render settings. */
 	void update_resumable_tile_manager(int num_samples);
--- a/intern/cycles/blender/blender_shader.cpp
+++ b/intern/cycles/blender/blender_shader.cpp
@@ -640,8 +640,7 @@ static ShaderNode *add_node(Scene *scene,
 				        image->filename.string(),
 				        image->builtin_data,
 				        get_image_interpolation(b_image_node),
-				        get_image_extension(b_image_node),
-				        image->use_alpha);
+				        get_image_extension(b_image_node));
 			}
 		}
 		image->color_space = (NodeImageColorSpace)b_image_node.color_space();
@@ -687,8 +686,7 @@ static ShaderNode *add_node(Scene *scene,
 				        env->filename.string(),
 				        env->builtin_data,
 				        get_image_interpolation(b_env_node),
-				        EXTENSION_REPEAT,
-				        env->use_alpha);
+				        EXTENSION_REPEAT);
 			}
 		}
 		env->color_space = (NodeImageColorSpace)b_env_node.color_space();
@@ -825,8 +823,7 @@ static ShaderNode *add_node(Scene *scene,
 			        point_density->filename.string(),
 			        point_density->builtin_data,
 			        point_density->interpolation,
-			        EXTENSION_CLIP,
-			        true);
+			        EXTENSION_CLIP);
 		}
 		node = point_density;

--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -322,15 +322,6 @@ void BlenderSync::sync_integrator()
 		integrator->volume_samples = volume_samples;
 	}

-	if(b_scene.render().use_simplify()) {
-		if(preview) {
-			integrator->ao_bounces = get_int(cscene, "ao_bounces");
-		}
-		else {
-			integrator->ao_bounces = get_int(cscene, "ao_bounces_render");
-		}
-	}
-
 	if(integrator->modified(previntegrator))
 		integrator->tag_update(scene);
 }
@@ -507,7 +498,6 @@ SceneParams BlenderSync::get_scene_params(BL::Scene& b_scene,

 	params.use_bvh_spatial_split = RNA_boolean_get(&cscene, "debug_use_spatial_splits");
 	params.use_bvh_unaligned_nodes = RNA_boolean_get(&cscene, "debug_use_hair_bvh");
-	params.num_bvh_time_steps = RNA_int_get(&cscene, "debug_bvh_time_steps");

 	if(background && params.shadingsystem != SHADINGSYSTEM_OSL)
 		params.persistent_data = r.use_persistent_data();
--- a/intern/cycles/blender/blender_util.h
+++ b/intern/cycles/blender/blender_util.h
@@ -48,12 +48,12 @@ static inline BL::Mesh object_to_mesh(BL::BlendData& data,
                                      bool apply_modifiers,
                                      bool render,
                                      bool calc_undeformed,
-                                      Mesh::SubdivisionType subdivision_type)
+                                      bool subdivision)
 {
 	bool subsurf_mod_show_render;
 	bool subsurf_mod_show_viewport;

-	if(subdivision_type != Mesh::SUBDIVISION_NONE) {
+	if(subdivision) {
 		BL::Modifier subsurf_mod = object.modifiers[object.modifiers.length()-1];

 		subsurf_mod_show_render = subsurf_mod.show_render();
@@ -65,7 +65,7 @@ static inline BL::Mesh object_to_mesh(BL::BlendData& data,

 	BL::Mesh me = data.meshes.new_from_object(scene, object, apply_modifiers, (render)? 2: 1, false, calc_undeformed);

-	if(subdivision_type != Mesh::SUBDIVISION_NONE) {
+	if(subdivision) {
 		BL::Modifier subsurf_mod = object.modifiers[object.modifiers.length()-1];

 		subsurf_mod.show_render(subsurf_mod_show_render);
@@ -74,14 +74,9 @@ static inline BL::Mesh object_to_mesh(BL::BlendData& data,

 	if((bool)me) {
 		if(me.use_auto_smooth()) {
-			if(subdivision_type == Mesh::SUBDIVISION_CATMULL_CLARK) {
-				me.calc_normals_split();
-			}
-			else {
-				me.split_faces();
-			}
+			me.calc_normals_split();
 		}
-		if(subdivision_type == Mesh::SUBDIVISION_NONE) {
+		if(!subdivision) {
 			me.calc_tessface(true);
 		}
 	}
--- a/intern/cycles/bvh/bvh.cpp
+++ b/intern/cycles/bvh/bvh.cpp
@@ -845,8 +845,6 @@ void QBVH::pack_aligned_inner(const BVHStackEntry& e,
 	                  bounds,
 	                  child,
 	                  e.node->m_visibility,
-	                  e.node->m_time_from,
-	                  e.node->m_time_to,
 	                  num);
 }

@@ -854,17 +852,12 @@ void QBVH::pack_aligned_node(int idx,
                             const BoundBox *bounds,
                             const int *child,
                             const uint visibility,
-                             const float time_from,
-                             const float time_to,
                             const int num)
 {
 	float4 data[BVH_QNODE_SIZE];
 	memset(data, 0, sizeof(data));

 	data[0].x = __uint_as_float(visibility & ~PATH_RAY_NODE_UNALIGNED);
-	data[0].y = time_from;
-	data[0].z = time_to;
-
 	for(int i = 0; i < num; i++) {
 		float3 bb_min = bounds[i].min;
 		float3 bb_max = bounds[i].max;
@@ -915,8 +908,6 @@ void QBVH::pack_unaligned_inner(const BVHStackEntry& e,
 	                    bounds,
 	                    child,
 	                    e.node->m_visibility,
-	                    e.node->m_time_from,
-	                    e.node->m_time_to,
 	                    num);
 }

@@ -925,16 +916,12 @@ void QBVH::pack_unaligned_node(int idx,
                               const BoundBox *bounds,
                               const int *child,
                               const uint visibility,
-                               const float time_from,
-                               const float time_to,
                               const int num)
 {
 	float4 data[BVH_UNALIGNED_QNODE_SIZE];
 	memset(data, 0, sizeof(data));

 	data[0].x = __uint_as_float(visibility | PATH_RAY_NODE_UNALIGNED);
-	data[0].y = time_from;
-	data[0].z = time_to;

 	for(int i = 0; i < num; i++) {
 		Transform space = BVHUnaligned::compute_node_transform(
@@ -1220,8 +1207,6 @@ void QBVH::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
 			                    child_bbox,
 			                    &c[0],
 			                    visibility,
-			                    0.0f,
-			                    1.0f,
 			                    4);
 		}
 		else {
@@ -1229,8 +1214,6 @@ void QBVH::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
 			                  child_bbox,
 			                  &c[0],
 			                  visibility,
-			                  0.0f,
-			                  1.0f,
 			                  4);
 		}
 	}
--- a/intern/cycles/bvh/bvh.h
+++ b/intern/cycles/bvh/bvh.h
@@ -175,8 +175,6 @@ protected:
 	                       const BoundBox *bounds,
 	                       const int *child,
 	                       const uint visibility,
-	                       const float time_from,
-	                       const float time_to,
 	                       const int num);

 	void pack_unaligned_inner(const BVHStackEntry& e,
@@ -187,8 +185,6 @@ protected:
 	                         const BoundBox *bounds,
 	                         const int *child,
 	                         const uint visibility,
-	                         const float time_from,
-	                         const float time_to,
 	                         const int num);

 	/* refit */
--- a/intern/cycles/bvh/bvh_build.cpp
+++ b/intern/cycles/bvh/bvh_build.cpp
@@ -26,7 +26,6 @@
 #include "scene.h"
 #include "curves.h"

-#include "util_algorithm.h"
 #include "util_debug.h"
 #include "util_foreach.h"
 #include "util_logging.h"
@@ -113,237 +112,81 @@ BVHBuild::~BVHBuild()

 /* Adding References */

-void BVHBuild::add_reference_triangles(BoundBox& root, BoundBox& center, Mesh *mesh, int i)
+void BVHBuild::add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i)
 {
-	const Attribute *attr_mP = NULL;
-	if(mesh->has_motion_blur()) {
-		attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-	}
-	const size_t num_triangles = mesh->num_triangles();
-	for(uint j = 0; j < num_triangles; j++) {
-		Mesh::Triangle t = mesh->get_triangle(j);
-		const float3 *verts = &mesh->verts[0];
-		if(attr_mP == NULL) {
-			BoundBox bounds = BoundBox::empty;
-			t.bounds_grow(verts, bounds);
-			if(bounds.valid()) {
-				references.push_back(BVHReference(bounds,
-				                                  j,
-				                                  i,
-				                                  PRIMITIVE_TRIANGLE));
-				root.grow(bounds);
-				center.grow(bounds.center2());
-			}
-		}
-		else if(params.num_motion_triangle_steps == 0 || params.use_spatial_split) {
-			/* Motion triangles, simple case: single node for the whole
-			 * primitive. Lowest memory footprint and faster BVH build but
-			 * least optimal ray-tracing.
-			 */
-			/* TODO(sergey): Support motion steps for spatially split BVH. */
-			const size_t num_verts = mesh->verts.size();
-			const size_t num_steps = mesh->motion_steps;
-			const float3 *vert_steps = attr_mP->data_float3();
-			BoundBox bounds = BoundBox::empty;
-			t.bounds_grow(verts, bounds);
-			for(size_t step = 0; step < num_steps - 1; step++) {
-				t.bounds_grow(vert_steps + step*num_verts, bounds);
-			}
-			if(bounds.valid()) {
-				references.push_back(
-				        BVHReference(bounds,
-				                     j,
-				                     i,
-				                     PRIMITIVE_MOTION_TRIANGLE));
-				root.grow(bounds);
-				center.grow(bounds.center2());
-			}
-		}
-		else {
-			/* Motion triangles, trace optimized case:  we split triangle
-			 * primitives into separate nodes for each of the time steps.
-			 * This way we minimize overlap of neighbor curve primitives.
-			 */
-			const int num_bvh_steps = params.num_motion_curve_steps * 2 + 1;
-			const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
-			const size_t num_verts = mesh->verts.size();
-			const size_t num_steps = mesh->motion_steps;
-			const float3 *vert_steps = attr_mP->data_float3();
-			/* Calculate bounding box of the previous time step.
-			 * Will be reused later to avoid duplicated work on
-			 * calculating BVH time step boundbox.
-			 */
-			float3 prev_verts[3];
-			t.motion_verts(verts,
-			               vert_steps,
-			               num_verts,
-			               num_steps,
-			               0.0f,
-			               prev_verts);
-			BoundBox prev_bounds = BoundBox::empty;
-			prev_bounds.grow(prev_verts[0]);
-			prev_bounds.grow(prev_verts[1]);
-			prev_bounds.grow(prev_verts[2]);
-			/* Create all primitive time steps, */
-			for(int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
-				const float curr_time = (float)(bvh_step) * num_bvh_steps_inv_1;
-				float3 curr_verts[3];
-				t.motion_verts(verts,
-				               vert_steps,
-				               num_verts,
-				               num_steps,
-				               curr_time,
-				               curr_verts);
-				BoundBox curr_bounds = BoundBox::empty;
-				curr_bounds.grow(curr_verts[0]);
-				curr_bounds.grow(curr_verts[1]);
-				curr_bounds.grow(curr_verts[2]);
-				BoundBox bounds = prev_bounds;
-				bounds.grow(curr_bounds);
-				if(bounds.valid()) {
-					const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
-					references.push_back(
-					        BVHReference(bounds,
-					                     j,
-					                     i,
-					                     PRIMITIVE_MOTION_TRIANGLE,
-					                     prev_time,
-					                     curr_time));
-					root.grow(bounds);
-					center.grow(bounds.center2());
-				}
-				/* Current time boundbox becomes previous one for the
-				 * next time step.
-				 */
-				prev_bounds = curr_bounds;
-			}
-		}
-	}
-}
+	if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
+		Attribute *attr_mP = NULL;

-void BVHBuild::add_reference_curves(BoundBox& root, BoundBox& center, Mesh *mesh, int i)
-{
-	const Attribute *curve_attr_mP = NULL;
-	if(mesh->has_motion_blur()) {
-		curve_attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+		if(mesh->has_motion_blur())
+			attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+
+		size_t num_triangles = mesh->num_triangles();
+		for(uint j = 0; j < num_triangles; j++) {
+			Mesh::Triangle t = mesh->get_triangle(j);
+			BoundBox bounds = BoundBox::empty;
+			PrimitiveType type = PRIMITIVE_TRIANGLE;
+
+			t.bounds_grow(&mesh->verts[0], bounds);
+
+			/* motion triangles */
+			if(attr_mP) {
+				size_t mesh_size = mesh->verts.size();
+				size_t steps = mesh->motion_steps - 1;
+				float3 *vert_steps = attr_mP->data_float3();
+
+				for(size_t i = 0; i < steps; i++)
+					t.bounds_grow(vert_steps + i*mesh_size, bounds);
+
+				type = PRIMITIVE_MOTION_TRIANGLE;
+			}
+
+			if(bounds.valid()) {
+				references.push_back(BVHReference(bounds, j, i, type));
+				root.grow(bounds);
+				center.grow(bounds.center2());
+			}
+		}
 	}
-	const size_t num_curves = mesh->num_curves();
-	for(uint j = 0; j < num_curves; j++) {
-		const Mesh::Curve curve = mesh->get_curve(j);
-		const float *curve_radius = &mesh->curve_radius[0];
-		for(int k = 0; k < curve.num_keys - 1; k++) {
-			if(curve_attr_mP == NULL) {
-				/* Really simple logic for static hair. */
+
+	if(params.primitive_mask & PRIMITIVE_ALL_CURVE) {
+		Attribute *curve_attr_mP = NULL;
+
+		if(mesh->has_motion_blur())
+			curve_attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+
+		size_t num_curves = mesh->num_curves();
+		for(uint j = 0; j < num_curves; j++) {
+			Mesh::Curve curve = mesh->get_curve(j);
+			PrimitiveType type = PRIMITIVE_CURVE;
+
+			for(int k = 0; k < curve.num_keys - 1; k++) {
 				BoundBox bounds = BoundBox::empty;
-				curve.bounds_grow(k, &mesh->curve_keys[0], curve_radius, bounds);
+				curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bounds);
+
+				/* motion curve */
+				if(curve_attr_mP) {
+					size_t mesh_size = mesh->curve_keys.size();
+					size_t steps = mesh->motion_steps - 1;
+					float3 *key_steps = curve_attr_mP->data_float3();
+
+					for(size_t i = 0; i < steps; i++)
+						curve.bounds_grow(k, key_steps + i*mesh_size, &mesh->curve_radius[0], bounds);
+
+					type = PRIMITIVE_MOTION_CURVE;
+				}
+
 				if(bounds.valid()) {
-					int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_CURVE, k);
+					int packed_type = PRIMITIVE_PACK_SEGMENT(type, k);
+
 					references.push_back(BVHReference(bounds, j, i, packed_type));
 					root.grow(bounds);
 					center.grow(bounds.center2());
 				}
 			}
-			else if(params.num_motion_curve_steps == 0 || params.use_spatial_split) {
-				/* Simple case of motion curves: single node for the while
-				 * shutter time. Lowest memory usage but less optimal
-				 * rendering.
-				 */
-				/* TODO(sergey): Support motion steps for spatially split BVH. */
-				BoundBox bounds = BoundBox::empty;
-				curve.bounds_grow(k, &mesh->curve_keys[0], curve_radius, bounds);
-				const size_t num_keys = mesh->curve_keys.size();
-				const size_t num_steps = mesh->motion_steps;
-				const float3 *key_steps = curve_attr_mP->data_float3();
-				for(size_t step = 0; step < num_steps - 1; step++) {
-					curve.bounds_grow(k,
-					                  key_steps + step*num_keys,
-					                  curve_radius,
-					                  bounds);
-				}
-				if(bounds.valid()) {
-					int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_MOTION_CURVE, k);
-					references.push_back(BVHReference(bounds,
-					                                  j,
-					                                  i,
-					                                  packed_type));
-					root.grow(bounds);
-					center.grow(bounds.center2());
-				}
-			}
-			else {
-				/* Motion curves, trace optimized case:  we split curve keys
-				 * primitives into separate nodes for each of the time steps.
-				 * This way we minimize overlap of neighbor curve primitives.
-				 */
-				const int num_bvh_steps = params.num_motion_curve_steps * 2 + 1;
-				const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
-				const size_t num_steps = mesh->motion_steps;
-				const float3 *curve_keys = &mesh->curve_keys[0];
-				const float3 *key_steps = curve_attr_mP->data_float3();
-				const size_t num_keys = mesh->curve_keys.size();
-				/* Calculate bounding box of the previous time step.
-				 * Will be reused later to avoid duplicated work on
-				 * calculating BVH time step boundbox.
-				 */
-				float4 prev_keys[4];
-				curve.cardinal_motion_keys(curve_keys,
-				                           curve_radius,
-				                           key_steps,
-				                           num_keys,
-				                           num_steps,
-				                           0.0f,
-				                           k - 1, k, k + 1, k + 2,
-				                           prev_keys);
-				BoundBox prev_bounds = BoundBox::empty;
-				curve.bounds_grow(prev_keys, prev_bounds);
-				/* Create all primitive time steps, */
-				for(int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
-					const float curr_time = (float)(bvh_step) * num_bvh_steps_inv_1;
-					float4 curr_keys[4];
-					curve.cardinal_motion_keys(curve_keys,
-					                           curve_radius,
-					                           key_steps,
-					                           num_keys,
-					                           num_steps,
-					                           curr_time,
-					                           k - 1, k, k + 1, k + 2,
-					                           curr_keys);
-					BoundBox curr_bounds = BoundBox::empty;
-					curve.bounds_grow(curr_keys, curr_bounds);
-					BoundBox bounds = prev_bounds;
-					bounds.grow(curr_bounds);
-					if(bounds.valid()) {
-						const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
-						int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_MOTION_CURVE, k);
-						references.push_back(BVHReference(bounds,
-						                                  j,
-						                                  i,
-						                                  packed_type,
-						                                  prev_time,
-						                                  curr_time));
-						root.grow(bounds);
-						center.grow(bounds.center2());
-					}
-					/* Current time boundbox becomes previous one for the
-					 * next time step.
-					 */
-					prev_bounds = curr_bounds;
-				}
-			}
 		}
 	}
 }

-void BVHBuild::add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i)
-{
-	if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
-		add_reference_triangles(root, center, mesh, i);
-	}
-	if(params.primitive_mask & PRIMITIVE_ALL_CURVE) {
-		add_reference_curves(root, center, mesh, i);
-	}
-}
-
 void BVHBuild::add_reference_object(BoundBox& root, BoundBox& center, Object *ob, int i)
 {
 	references.push_back(BVHReference(ob->bounds, -1, i, 0));
@@ -357,7 +200,7 @@ static size_t count_curve_segments(Mesh *mesh)

 	for(size_t i = 0; i < num_curves; i++)
 		num += mesh->get_curve(i).num_keys - 1;
-
+	
 	return num;
 }

@@ -501,7 +344,6 @@ BVHNode* BVHBuild::run()
 		else {
 			/*rotate(rootnode, 4, 5);*/
 			rootnode->update_visibility();
-			rootnode->update_time();
 		}
 		if(rootnode != NULL) {
 			VLOG(1) << "BVH build statistics:\n"
@@ -529,7 +371,7 @@ void BVHBuild::progress_update()
 {
 	if(time_dt() - progress_start_time < 0.25)
 		return;
-
+	
 	double progress_start = (double)progress_count/(double)progress_total;
 	double duplicates = (double)(progress_total - progress_original_total)/(double)progress_total;

@@ -537,7 +379,7 @@ void BVHBuild::progress_update()
 	                           progress_start * 100.0, duplicates * 100.0);

 	progress.set_substatus(msg);
-	progress_start_time = time_dt();
+	progress_start_time = time_dt(); 
 }

 void BVHBuild::thread_build_node(InnerNode *inner,
@@ -593,7 +435,6 @@ bool BVHBuild::range_within_max_leaf_size(const BVHRange& range,
 		return false;

 	size_t num_triangles = 0;
-	size_t num_motion_triangles = 0;
 	size_t num_curves = 0;
 	size_t num_motion_curves = 0;

@@ -604,16 +445,13 @@ bool BVHBuild::range_within_max_leaf_size(const BVHRange& range,
 			num_curves++;
 		if(ref.prim_type() & PRIMITIVE_MOTION_CURVE)
 			num_motion_curves++;
-		else if(ref.prim_type() & PRIMITIVE_TRIANGLE)
+		else if(ref.prim_type() & PRIMITIVE_ALL_TRIANGLE)
 			num_triangles++;
-		else if(ref.prim_type() & PRIMITIVE_MOTION_TRIANGLE)
-			num_motion_triangles++;
 	}

-	return (num_triangles <= params.max_triangle_leaf_size) &&
-	       (num_motion_triangles <= params.max_motion_triangle_leaf_size) &&
-	       (num_curves <= params.max_curve_leaf_size) &&
-	       (num_motion_curves <= params.max_motion_curve_leaf_size);
+	return (num_triangles < params.max_triangle_leaf_size) &&
+	       (num_curves < params.max_curve_leaf_size) &&
+	       (num_motion_curves < params.max_curve_leaf_size);
 }

 /* multithreaded binning builder */
@@ -851,24 +689,18 @@ BVHNode *BVHBuild::create_object_leaf_nodes(const BVHReference *ref, int start,
 		prim_object[start] = ref->prim_object();

 		uint visibility = objects[ref->prim_object()]->visibility;
-		BVHNode *leaf_node =  new LeafNode(ref->bounds(), visibility, start, start+1);
-		leaf_node->m_time_from = ref->time_from();
-		leaf_node->m_time_to = ref->time_to();
-		return leaf_node;
+		return new LeafNode(ref->bounds(), visibility, start, start+1);
 	}
 	else {
 		int mid = num/2;
-		BVHNode *leaf0 = create_object_leaf_nodes(ref, start, mid);
-		BVHNode *leaf1 = create_object_leaf_nodes(ref+mid, start+mid, num-mid);
+		BVHNode *leaf0 = create_object_leaf_nodes(ref, start, mid); 
+		BVHNode *leaf1 = create_object_leaf_nodes(ref+mid, start+mid, num-mid); 

 		BoundBox bounds = BoundBox::empty;
 		bounds.grow(leaf0->m_bounds);
 		bounds.grow(leaf1->m_bounds);

-		BVHNode *inner_node = new InnerNode(bounds, leaf0, leaf1);
-		inner_node->m_time_from = min(leaf0->m_time_from, leaf1->m_time_from);
-		inner_node->m_time_to = max(leaf0->m_time_to, leaf1->m_time_to);
-		return inner_node;
+		return new InnerNode(bounds, leaf0, leaf1);
 	}
 }

@@ -972,16 +804,6 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
 			                                   visibility[i],
 			                                   start_index,
 			                                   start_index + num);
-			if(true) {
-				float time_from = 1.0f, time_to = 0.0f;
-				for(int j = 0; j < num; ++j) {
-					const BVHReference &ref = p_ref[i][j];
-					time_from = min(time_from, ref.time_from());
-					time_to = max(time_to, ref.time_to());
-				}
-				leaf_node->m_time_from = time_from;
-				leaf_node->m_time_to = time_to;
-			}
 			if(alignment_found) {
 				/* Need to recalculate leaf bounds with new alignment. */
 				leaf_node->m_bounds = BoundBox::empty;
@@ -1096,7 +918,7 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
 		BVHNode *inner = new InnerNode(inner_bounds, leaves[1], leaves[2]);
 		return new InnerNode(range.bounds(), leaves[0], inner);
 	} else {
-		/* Should be doing more branches if more primitive types added. */
+		/* Shpuld be doing more branches if more primitive types added. */
 		assert(num_leaves <= 5);
 		BoundBox inner_bounds_a = merge(leaves[0]->m_bounds, leaves[1]->m_bounds);
 		BoundBox inner_bounds_b = merge(leaves[2]->m_bounds, leaves[3]->m_bounds);
@@ -1129,7 +951,7 @@ void BVHBuild::rotate(BVHNode *node, int max_depth)
 	/* nothing to rotate if we reached a leaf node. */
 	if(node->is_leaf() || max_depth < 0)
 		return;
-
+	
 	InnerNode *parent = (InnerNode*)node;

 	/* rotate all children first */
--- a/intern/cycles/bvh/bvh_build.h
+++ b/intern/cycles/bvh/bvh_build.h
@@ -63,8 +63,6 @@ protected:
 	friend class BVHObjectBinning;

 	/* Adding references. */
-	void add_reference_triangles(BoundBox& root, BoundBox& center, Mesh *mesh, int i);
-	void add_reference_curves(BoundBox& root, BoundBox& center, Mesh *mesh, int i);
 	void add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i);
 	void add_reference_object(BoundBox& root, BoundBox& center, Object *ob, int i);
 	void add_references(BVHRange& root);
--- a/intern/cycles/bvh/bvh_node.cpp
+++ b/intern/cycles/bvh/bvh_node.cpp
@@ -176,19 +176,6 @@ uint BVHNode::update_visibility()
 	return m_visibility;
 }

-void BVHNode::update_time()
-{
-	if(!is_leaf()) {
-		InnerNode *inner = (InnerNode*)this;
-		BVHNode *child0 = inner->children[0];
-		BVHNode *child1 = inner->children[1];
-		child0->update_time();
-		child1->update_time();
-		m_time_from = min(child0->m_time_from, child1->m_time_from);
-		m_time_to =  max(child0->m_time_to, child1->m_time_to);
-	}
-}
-
 /* Inner Node */

 void InnerNode::print(int depth) const
--- a/intern/cycles/bvh/bvh_node.h
+++ b/intern/cycles/bvh/bvh_node.h
@@ -47,9 +47,7 @@ class BVHNode
 {
 public:
 	BVHNode() : m_is_unaligned(false),
-	            m_aligned_space(NULL),
-	            m_time_from(0.0f),
-	            m_time_to(1.0f)
+	            m_aligned_space(NULL)
 	{
 	}

@@ -93,15 +91,12 @@ public:
 	void deleteSubtree();

 	uint update_visibility();
-	void update_time();

 	bool m_is_unaligned;

 	// TODO(sergey): Can be stored as 3x3 matrix, but better to have some
 	// utilities and type defines in util_transform first.
 	Transform *m_aligned_space;
-
-	float m_time_from, m_time_to;
 };

 class InnerNode : public BVHNode
--- a/intern/cycles/bvh/bvh_params.h
+++ b/intern/cycles/bvh/bvh_params.h
@@ -43,9 +43,7 @@ public:
 	/* number of primitives in leaf */
 	int min_leaf_size;
 	int max_triangle_leaf_size;
-	int max_motion_triangle_leaf_size;
 	int max_curve_leaf_size;
-	int max_motion_curve_leaf_size;

 	/* object or mesh level bvh */
 	bool top_level;
@@ -61,17 +59,6 @@ public:
 	 */
 	bool use_unaligned_nodes;

-	/* Split time range to this number of steps and create leaf node for each
-	 * of this time steps.
-	 *
-	 * Speeds up rendering of motion curve primitives in the cost of higher
-	 * memory usage.
-	 */
-	int num_motion_curve_steps;
-
-	/* Same as above, but for triangle primitives. */
-	int num_motion_triangle_steps;
-
 	/* fixed parameters */
 	enum {
 		MAX_DEPTH = 64,
@@ -93,17 +80,13 @@ public:

 		min_leaf_size = 1;
 		max_triangle_leaf_size = 8;
-		max_motion_triangle_leaf_size = 8;
-		max_curve_leaf_size = 1;
-		max_motion_curve_leaf_size = 4;
+		max_curve_leaf_size = 2;

 		top_level = false;
 		use_qbvh = false;
 		use_unaligned_nodes = false;

 		primitive_mask = PRIMITIVE_ALL;
-
-		num_motion_curve_steps = 0;
 	}

 	/* SAH costs */
@@ -130,15 +113,8 @@ class BVHReference
 public:
 	__forceinline BVHReference() {}

-	__forceinline BVHReference(const BoundBox& bounds_,
-	                           int prim_index_,
-	                           int prim_object_,
-	                           int prim_type,
-	                           float time_from = 0.0f,
-	                           float time_to = 1.0f)
-	        : rbounds(bounds_),
-	          time_from_(time_from),
-	          time_to_(time_to)
+	__forceinline BVHReference(const BoundBox& bounds_, int prim_index_, int prim_object_, int prim_type)
+	: rbounds(bounds_)
 	{
 		rbounds.min.w = __int_as_float(prim_index_);
 		rbounds.max.w = __int_as_float(prim_object_);
@@ -149,9 +125,6 @@ public:
 	__forceinline int prim_index() const { return __float_as_int(rbounds.min.w); }
 	__forceinline int prim_object() const { return __float_as_int(rbounds.max.w); }
 	__forceinline int prim_type() const { return type; }
-	__forceinline float time_from() const { return time_from_; }
-	__forceinline float time_to() const { return time_to_; }
-

 	BVHReference& operator=(const BVHReference &arg) {
 		if(&arg != this) {
@@ -160,11 +133,9 @@ public:
 		return *this;
 	}

-
 protected:
 	BoundBox rbounds;
 	uint type;
-	float time_from_, time_to_;
 };

 /* BVH Range
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -64,8 +64,6 @@ std::ostream& operator <<(std::ostream &os,
 	   << string_from_bool(requested_features.use_integrator_branched) << std::endl;
 	os << "Use Patch Evaluation: "
 	   << string_from_bool(requested_features.use_patch_evaluation) << std::endl;
-	os << "Use Transparent Shadows: "
-	   << string_from_bool(requested_features.use_transparent) << std::endl;
 	return os;
 }

--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -117,9 +117,6 @@ public:

 	/* Use OpenSubdiv patch evaluation */
 	bool use_patch_evaluation;
-	
-	/* Use Transparent shadows */
-	bool use_transparent;

 	DeviceRequestedFeatures()
 	{
@@ -136,7 +133,6 @@ public:
 		use_volume = false;
 		use_integrator_branched = false;
 		use_patch_evaluation = false;
-		use_transparent = false;
 	}

 	bool modified(const DeviceRequestedFeatures& requested_features)
@@ -152,8 +148,7 @@ public:
 		         use_subsurface == requested_features.use_subsurface &&
 		         use_volume == requested_features.use_volume &&
 		         use_integrator_branched == requested_features.use_integrator_branched &&
-		         use_patch_evaluation == requested_features.use_patch_evaluation &&
-		         use_transparent == requested_features.use_transparent);
+		         use_patch_evaluation == requested_features.use_patch_evaluation);
 	}

 	/* Convert the requested features structure to a build options,
@@ -194,9 +189,6 @@ public:
 		if(!use_patch_evaluation) {
 			build_options += " -D__NO_PATCH_EVAL__";
 		}
-		if(!use_transparent) {
-			build_options += " -D__NO_TRANSPARENT__";
-		}
 		return build_options;
 	}
 };
@@ -228,7 +220,6 @@ public:
 	DeviceInfo info;
 	virtual const string& error_message() { return error_msg; }
 	bool have_error() { return !error_message().empty(); }
-	virtual bool show_samples() const { return false; }

 	/* statistics */
 	Stats &stats;
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -112,11 +112,6 @@ public:
 		task_pool.stop();
 	}

-	virtual bool show_samples() const
-	{
-		return (TaskScheduler::num_threads() == 1);
-	}
-
 	void mem_alloc(device_memory& mem, MemoryType /*type*/)
 	{
 		mem.device_pointer = mem.data_pointer;
@@ -280,7 +275,7 @@ public:

 				tile.sample = sample + 1;

-				task.update_progress(&tile, tile.w*tile.h);
+				task.update_progress(&tile);
 			}

 			task.release_tile(tile);
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -115,12 +115,6 @@ public:
 		return path_exists(cubins_path);
 	}

-	virtual bool show_samples() const
-	{
-		/* The CUDADevice only processes one tile at a time, so showing samples is fine. */
-		return true;
-	}
-
 /*#ifdef NDEBUG
 #define cuda_abort()
 #else
@@ -130,7 +124,7 @@ public:
 	{
 		if(first_error) {
 			fprintf(stderr, "\nRefer to the Cycles GPU rendering documentation for possible solutions:\n");
-			fprintf(stderr, "https://docs.blender.org/manual/en/dev/render/cycles/gpu_rendering.html\n\n");
+			fprintf(stderr, "http://www.blender.org/manual/render/cycles/gpu_rendering.html\n\n");
 			first_error = false;
 		}
 	}
@@ -1273,7 +1267,7 @@ public:

 					tile.sample = sample + 1;

-					task->update_progress(&tile, tile.w*tile.h);
+					task->update_progress(&tile);
 				}

 				task->release_tile(tile);
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -89,14 +89,6 @@ public:
 		return error_msg;
 	}

-	virtual bool show_samples() const
-	{
-		if(devices.size() > 1) {
-			return false;
-		}
-		return devices.front().device->show_samples();
-	}
-
 	bool load_kernels(const DeviceRequestedFeatures& requested_features)
 	{
 		foreach(SubDevice& sub, devices)
--- a/intern/cycles/device/device_network.cpp
+++ b/intern/cycles/device/device_network.cpp
@@ -51,11 +51,6 @@ public:

 	thread_mutex rpc_lock;

-	virtual bool show_samples() const
-	{
-		return false;
-	}
-
 	NetworkDevice(DeviceInfo& info, Stats &stats, const char *address)
 	: Device(info, stats, true), socket(io_service)
 	{
--- a/intern/cycles/device/device_task.cpp
+++ b/intern/cycles/device/device_task.cpp
@@ -19,8 +19,6 @@

 #include "device_task.h"

-#include "buffers.h"
-
 #include "util_algorithm.h"
 #include "util_time.h"

@@ -101,18 +99,14 @@ void DeviceTask::split(list<DeviceTask>& tasks, int num, int max_size)
 	}
 }

-void DeviceTask::update_progress(RenderTile *rtile, int pixel_samples)
+void DeviceTask::update_progress(RenderTile *rtile)
 {
 	if((type != PATH_TRACE) &&
 	   (type != SHADER))
 		return;

-	if(update_progress_sample) {
-		if(pixel_samples == -1) {
-			pixel_samples = shader_w;
-		}
-		update_progress_sample(pixel_samples, rtile? rtile->sample : 0);
-	}
+	if(update_progress_sample)
+		update_progress_sample();

 	if(update_tile_sample) {
 		double current_time = time_dt();
--- a/intern/cycles/device/device_task.h
+++ b/intern/cycles/device/device_task.h
@@ -56,10 +56,10 @@ public:
 	int get_subtask_count(int num, int max_size = 0);
 	void split(list<DeviceTask>& tasks, int num, int max_size = 0);

-	void update_progress(RenderTile *rtile, int pixel_samples = -1);
+	void update_progress(RenderTile *rtile);

 	function<bool(Device *device, RenderTile&)> acquire_tile;
-	function<void(long, int)> update_progress_sample;
+	function<void(void)> update_progress_sample;
 	function<void(RenderTile&)> update_tile_sample;
 	function<void(RenderTile&)> release_tile;
 	function<bool(void)> get_cancel;
--- a/intern/cycles/device/opencl/opencl_mega.cpp
+++ b/intern/cycles/device/opencl/opencl_mega.cpp
@@ -39,10 +39,6 @@ public:
 	{
 	}

-	virtual bool show_samples() const {
-		return true;
-	}
-
 	virtual void load_kernels(const DeviceRequestedFeatures& /*requested_features*/,
 	                          vector<OpenCLProgram*> &programs)
 	{
@@ -124,7 +120,7 @@ public:

 					tile.sample = sample + 1;

-					task->update_progress(&tile, tile.w*tile.h);
+					task->update_progress(&tile);
 				}

 				/* Complete kernel execution before release tile */
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -247,10 +247,6 @@ public:
 		}
 	}

-	virtual bool show_samples() const {
-		return false;
-	}
-
 	/* Split kernel utility functions. */
 	size_t get_tex_size(const char *tex_name)
 	{
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -164,8 +164,6 @@ set(SRC_GEOM_HEADERS
 	geom/geom_curve.h
 	geom/geom_motion_curve.h
 	geom/geom_motion_triangle.h
-	geom/geom_motion_triangle_intersect.h
-	geom/geom_motion_triangle_shader.h
 	geom/geom_object.h
 	geom/geom_patch.h
 	geom/geom_primitive.h
--- a/intern/cycles/kernel/bvh/bvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h
@@ -187,7 +187,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,

 					/* primitive intersection */
 					while(prim_addr < prim_addr2) {
-						kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
+						kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);

 						bool hit;

@@ -222,7 +222,6 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 #if BVH_FEATURE(BVH_HAIR)
 							case PRIMITIVE_CURVE:
 							case PRIMITIVE_MOTION_CURVE: {
-								const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
 								if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
 									hit = bvh_cardinal_curve_intersect(kg,
 									                                   isect_array,
@@ -232,7 +231,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 									                                   object,
 									                                   prim_addr,
 									                                   ray->time,
-									                                   curve_type,
+									                                   type,
 									                                   NULL,
 									                                   0, 0);
 								}
@@ -245,7 +244,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 									                          object,
 									                          prim_addr,
 									                          ray->time,
-									                          curve_type,
+									                          type,
 									                          NULL,
 									                          0, 0);
 								}
--- a/intern/cycles/kernel/bvh/bvh_subsurface.h
+++ b/intern/cycles/kernel/bvh/bvh_subsurface.h
@@ -72,7 +72,7 @@ void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 	ss_isect->num_hits = 0;

 	const int object_flag = kernel_tex_fetch(__object_flag, subsurface_object);
-	if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+	if(!(object_flag & SD_TRANSFORM_APPLIED)) {
 #if BVH_FEATURE(BVH_MOTION)
 		Transform ob_itfm;
 		bvh_instance_motion_push(kg,
--- a/intern/cycles/kernel/bvh/bvh_traversal.h
+++ b/intern/cycles/kernel/bvh/bvh_traversal.h
@@ -213,7 +213,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 						--stack_ptr;
 					}
 				}
-				BVH_DEBUG_NEXT_NODE();
+				BVH_DEBUG_NEXT_STEP();
 			}

 			/* if node is leaf, fetch triangle list */
@@ -235,7 +235,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 					switch(type & PRIMITIVE_ALL) {
 						case PRIMITIVE_TRIANGLE: {
 							for(; prim_addr < prim_addr2; prim_addr++) {
-								BVH_DEBUG_NEXT_INTERSECTION();
+								BVH_DEBUG_NEXT_STEP();
 								kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
 								if(triangle_intersect(kg,
 								                      &isect_precalc,
@@ -264,7 +264,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 #if BVH_FEATURE(BVH_MOTION)
 						case PRIMITIVE_MOTION_TRIANGLE: {
 							for(; prim_addr < prim_addr2; prim_addr++) {
-								BVH_DEBUG_NEXT_INTERSECTION();
+								BVH_DEBUG_NEXT_STEP();
 								kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
 								if(motion_triangle_intersect(kg,
 								                             isect,
@@ -296,9 +296,8 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 						case PRIMITIVE_CURVE:
 						case PRIMITIVE_MOTION_CURVE: {
 							for(; prim_addr < prim_addr2; prim_addr++) {
-								BVH_DEBUG_NEXT_INTERSECTION();
-								const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
-								kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
+								BVH_DEBUG_NEXT_STEP();
+								kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
 								bool hit;
 								if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
 									hit = bvh_cardinal_curve_intersect(kg,
@@ -309,7 +308,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 									                                   object,
 									                                   prim_addr,
 									                                   ray->time,
-									                                   curve_type,
+									                                   type,
 									                                   lcg_state,
 									                                   difl,
 									                                   extmax);
@@ -323,7 +322,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 									                          object,
 									                          prim_addr,
 									                          ray->time,
-									                          curve_type,
+									                          type,
 									                          lcg_state,
 									                          difl,
 									                          extmax);
--- a/intern/cycles/kernel/bvh/bvh_types.h
+++ b/intern/cycles/kernel/bvh/bvh_types.h
@@ -50,17 +50,12 @@ CCL_NAMESPACE_BEGIN
 #ifdef __KERNEL_DEBUG__
 #  define BVH_DEBUG_INIT() \
 	do { \
-		isect->num_traversed_nodes = 0; \
+		isect->num_traversal_steps = 0; \
 		isect->num_traversed_instances = 0; \
-		isect->num_intersections = 0; \
 	} while(0)
-#  define BVH_DEBUG_NEXT_NODE() \
+#  define BVH_DEBUG_NEXT_STEP() \
 	do { \
-		++isect->num_traversed_nodes; \
-	} while(0)
-#  define BVH_DEBUG_NEXT_INTERSECTION() \
-	do { \
-		++isect->num_intersections; \
+		++isect->num_traversal_steps; \
 	} while(0)
 #  define BVH_DEBUG_NEXT_INSTANCE() \
 	do { \
@@ -68,8 +63,7 @@ CCL_NAMESPACE_BEGIN
 	} while(0)
 #else  /* __KERNEL_DEBUG__ */
 #  define BVH_DEBUG_INIT()
-#  define BVH_DEBUG_NEXT_NODE()
-#  define BVH_DEBUG_NEXT_INTERSECTION()
+#  define BVH_DEBUG_NEXT_STEP()
 #  define BVH_DEBUG_NEXT_INSTANCE()
 #endif  /* __KERNEL_DEBUG__ */

--- a/intern/cycles/kernel/bvh/bvh_volume.h
+++ b/intern/cycles/kernel/bvh/bvh_volume.h
@@ -236,7 +236,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 					/* instance push */
 					object = kernel_tex_fetch(__prim_object, -prim_addr-1);
 					int object_flag = kernel_tex_fetch(__object_flag, object);
+
 					if(object_flag & SD_OBJECT_HAS_VOLUME) {
+
 #  if BVH_FEATURE(BVH_MOTION)
 						bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
 #  else
--- a/intern/cycles/kernel/bvh/bvh_volume_all.h
+++ b/intern/cycles/kernel/bvh/bvh_volume_all.h
@@ -287,6 +287,7 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 					/* instance push */
 					object = kernel_tex_fetch(__prim_object, -prim_addr-1);
 					int object_flag = kernel_tex_fetch(__object_flag, object);
+
 					if(object_flag & SD_OBJECT_HAS_VOLUME) {

 #  if BVH_FEATURE(BVH_MOTION)
--- a/intern/cycles/kernel/bvh/qbvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/qbvh_shadow_all.h
@@ -106,20 +106,14 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 			while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
 				float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);

-				if(false
 #ifdef __VISIBILITY_FLAG__
-				   || ((__float_as_uint(inodes.x) & PATH_RAY_SHADOW) == 0)
-#endif
-#if BVH_FEATURE(BVH_MOTION)
-				   || UNLIKELY(ray->time < inodes.y)
-				   || UNLIKELY(ray->time > inodes.z)
-#endif
-				) {
+				if((__float_as_uint(inodes.x) & PATH_RAY_SHADOW) == 0) {
 					/* Pop. */
 					node_addr = traversal_stack[stack_ptr].addr;
 					--stack_ptr;
 					continue;
 				}
+#endif

 				ssef dist;
 				int child_mask = NODE_INTERSECT(kg,
@@ -268,7 +262,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,

 					/* Primitive intersection. */
 					while(prim_addr < prim_addr2) {
-						kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
+						kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);

 						bool hit;

@@ -303,7 +297,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 #if BVH_FEATURE(BVH_HAIR)
 							case PRIMITIVE_CURVE:
 							case PRIMITIVE_MOTION_CURVE: {
-								const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
 								if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
 									hit = bvh_cardinal_curve_intersect(kg,
 									                                   isect_array,
@@ -313,7 +306,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 									                                   object,
 									                                   prim_addr,
 									                                   ray->time,
-									                                   curve_type,
+									                                   type,
 									                                   NULL,
 									                                   0, 0);
 								}
@@ -326,7 +319,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 									                          object,
 									                          prim_addr,
 									                          ray->time,
-									                          curve_type,
+									                          type,
 									                          NULL,
 									                          0, 0);
 								}
--- a/intern/cycles/kernel/bvh/qbvh_subsurface.h
+++ b/intern/cycles/kernel/bvh/qbvh_subsurface.h
@@ -61,7 +61,7 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 	ss_isect->num_hits = 0;

 	const int object_flag = kernel_tex_fetch(__object_flag, subsurface_object);
-	if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+	if(!(object_flag & SD_TRANSFORM_APPLIED)) {
 #if BVH_FEATURE(BVH_MOTION)
 		Transform ob_itfm;
 		bvh_instance_motion_push(kg,
--- a/intern/cycles/kernel/bvh/qbvh_traversal.h
+++ b/intern/cycles/kernel/bvh/qbvh_traversal.h
@@ -117,10 +117,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 				float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);

 				if(UNLIKELY(node_dist > isect->t)
-#if BVH_FEATURE(BVH_MOTION)
-				   || UNLIKELY(ray->time < inodes.y)
-				   || UNLIKELY(ray->time > inodes.z)
-#endif
 #ifdef __VISIBILITY_FLAG__
 				   || (__float_as_uint(inodes.x) & visibility) == 0)
 #endif
@@ -135,7 +131,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 				int child_mask;
 				ssef dist;

-				BVH_DEBUG_NEXT_NODE();
+				BVH_DEBUG_NEXT_STEP();

 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
 				if(difl != 0.0f) {
@@ -330,7 +326,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 					switch(type & PRIMITIVE_ALL) {
 						case PRIMITIVE_TRIANGLE: {
 							for(; prim_addr < prim_addr2; prim_addr++) {
-								BVH_DEBUG_NEXT_INTERSECTION();
+								BVH_DEBUG_NEXT_STEP();
 								kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
 								if(triangle_intersect(kg,
 								                      &isect_precalc,
@@ -351,7 +347,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 #if BVH_FEATURE(BVH_MOTION)
 						case PRIMITIVE_MOTION_TRIANGLE: {
 							for(; prim_addr < prim_addr2; prim_addr++) {
-								BVH_DEBUG_NEXT_INTERSECTION();
+								BVH_DEBUG_NEXT_STEP();
 								kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
 								if(motion_triangle_intersect(kg,
 								                             isect,
@@ -375,9 +371,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 						case PRIMITIVE_CURVE:
 						case PRIMITIVE_MOTION_CURVE: {
 							for(; prim_addr < prim_addr2; prim_addr++) {
-								BVH_DEBUG_NEXT_INTERSECTION();
-								const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
-								kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
+								BVH_DEBUG_NEXT_STEP();
+								kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
 								bool hit;
 								if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
 									hit = bvh_cardinal_curve_intersect(kg,
@@ -388,7 +383,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 									                                   object,
 									                                   prim_addr,
 									                                   ray->time,
-									                                   curve_type,
+									                                   type,
 									                                   lcg_state,
 									                                   difl,
 									                                   extmax);
@@ -402,7 +397,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 									                          object,
 									                          prim_addr,
 									                          ray->time,
-									                          curve_type,
+									                          type,
 									                          lcg_state,
 									                          difl,
 									                          extmax);
--- a/intern/cycles/kernel/bvh/qbvh_volume.h
+++ b/intern/cycles/kernel/bvh/qbvh_volume.h
@@ -293,7 +293,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 					/* Instance push. */
 					object = kernel_tex_fetch(__prim_object, -prim_addr-1);
 					int object_flag = kernel_tex_fetch(__object_flag, object);
+
 					if(object_flag & SD_OBJECT_HAS_VOLUME) {
+
 #  if BVH_FEATURE(BVH_MOTION)
 						bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
 #  else
--- a/intern/cycles/kernel/bvh/qbvh_volume_all.h
+++ b/intern/cycles/kernel/bvh/qbvh_volume_all.h
@@ -344,7 +344,9 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 					/* Instance push. */
 					object = kernel_tex_fetch(__prim_object, -prim_addr-1);
 					int object_flag = kernel_tex_fetch(__object_flag, object);
+
 					if(object_flag & SD_OBJECT_HAS_VOLUME) {
+
 #  if BVH_FEATURE(BVH_MOTION)
 						bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
 #  else
--- a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
+++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
@@ -143,7 +143,6 @@ ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng,
 {
 	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
 	float3 N = bsdf->N;
-	int label = LABEL_REFLECT | LABEL_GLOSSY;

 	float NdotI = dot(N, I);
 	if(NdotI > 0.0f) {
@@ -212,7 +211,6 @@ ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng,
 			/* Some high number for MIS. */
 			*pdf = 1e6f;
 			*eval = make_float3(1e6f, 1e6f, 1e6f);
-			label = LABEL_REFLECT | LABEL_SINGULAR;
 		}
 		else {
 			/* leave the rest to eval_reflect */
@@ -226,7 +224,7 @@ ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng,
 #endif
 	}

-	return label;
+	return LABEL_REFLECT|LABEL_GLOSSY;
 }


--- a/intern/cycles/kernel/closure/bsdf_hair.h
+++ b/intern/cycles/kernel/closure/bsdf_hair.h
@@ -267,10 +267,7 @@ ccl_device int bsdf_hair_transmission_sample(const ShaderClosure *sc, float3 Ng,

 	*eval = make_float3(*pdf, *pdf, *pdf);

-	/* TODO(sergey): Should always be negative, but seems some precision issue
-	 * is involved here.
-	 */
-	kernel_assert(dot(locy, *omega_in) < 1e-4f);
+	kernel_assert(dot(locy, *omega_in) < 0.0f);

 	return LABEL_TRANSMIT|LABEL_GLOSSY;
 }
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@@ -452,7 +452,6 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
 	float alpha_y = bsdf->alpha_y;
 	bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
 	float3 N = bsdf->N;
-	int label;

 	float cosNO = dot(N, I);
 	if(cosNO > 0) {
@@ -478,7 +477,6 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
 		/* reflection or refraction? */
 		if(!m_refractive) {
 			float cosMO = dot(m, I);
-			label = LABEL_REFLECT | LABEL_GLOSSY;

 			if(cosMO > 0) {
 				/* eq. 39 - compute actual reflected direction */
@@ -489,7 +487,6 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
 						/* some high number for MIS */
 						*pdf = 1e6f;
 						*eval = make_float3(1e6f, 1e6f, 1e6f);
-						label = LABEL_REFLECT | LABEL_SINGULAR;
 					}
 					else {
 						/* microfacet normal is visible to this ray */
@@ -552,8 +549,6 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
 			}
 		}
 		else {
-			label = LABEL_TRANSMIT | LABEL_GLOSSY;
-
 			/* CAUTION: the i and o variables are inverted relative to the paper
 			 * eq. 39 - compute actual refractive direction */
 			float3 R, T;
@@ -581,7 +576,6 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
 					/* some high number for MIS */
 					*pdf = 1e6f;
 					*eval = make_float3(1e6f, 1e6f, 1e6f);
-					label = LABEL_TRANSMIT | LABEL_SINGULAR;
 				}
 				else {
 					/* eq. 33 */
@@ -613,10 +607,7 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
 			}
 		}
 	}
-	else {
-		label = (m_refractive) ? LABEL_TRANSMIT|LABEL_GLOSSY : LABEL_REFLECT|LABEL_GLOSSY;
-	}
-	return label;
+	return (m_refractive) ? LABEL_TRANSMIT|LABEL_GLOSSY : LABEL_REFLECT|LABEL_GLOSSY;
 }

 /* Beckmann microfacet with Smith shadow-masking from:
@@ -824,7 +815,6 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
 	float alpha_y = bsdf->alpha_y;
 	bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
 	float3 N = bsdf->N;
-	int label;

 	float cosNO = dot(N, I);
 	if(cosNO > 0) {
@@ -849,7 +839,6 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl

 		/* reflection or refraction? */
 		if(!m_refractive) {
-			label = LABEL_REFLECT | LABEL_GLOSSY;
 			float cosMO = dot(m, I);

 			if(cosMO > 0) {
@@ -861,7 +850,6 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
 						/* some high number for MIS */
 						*pdf = 1e6f;
 						*eval = make_float3(1e6f, 1e6f, 1e6f);
-						label = LABEL_REFLECT | LABEL_SINGULAR;
 					}
 					else {
 						/* microfacet normal is visible to this ray
@@ -916,8 +904,6 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
 			}
 		}
 		else {
-			label = LABEL_TRANSMIT | LABEL_GLOSSY;
-
 			/* CAUTION: the i and o variables are inverted relative to the paper
 			 * eq. 39 - compute actual refractive direction */
 			float3 R, T;
@@ -945,7 +931,6 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
 					/* some high number for MIS */
 					*pdf = 1e6f;
 					*eval = make_float3(1e6f, 1e6f, 1e6f);
-					label = LABEL_TRANSMIT | LABEL_SINGULAR;
 				}
 				else {
 					/* eq. 33 */
@@ -978,10 +963,7 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
 			}
 		}
 	}
-	else {
-		label = (m_refractive) ? LABEL_TRANSMIT|LABEL_GLOSSY : LABEL_REFLECT|LABEL_GLOSSY;
-	}
-	return label;
+	return (m_refractive) ? LABEL_TRANSMIT|LABEL_GLOSSY : LABEL_REFLECT|LABEL_GLOSSY;
 }

 CCL_NAMESPACE_END
--- a/intern/cycles/kernel/geom/geom.h
+++ b/intern/cycles/kernel/geom/geom.h
@@ -23,8 +23,6 @@
 #include "geom_subd_triangle.h"
 #include "geom_triangle_intersect.h"
 #include "geom_motion_triangle.h"
-#include "geom_motion_triangle_intersect.h"
-#include "geom_motion_triangle_shader.h"
 #include "geom_motion_curve.h"
 #include "geom_curve.h"
 #include "geom_volume.h"
--- a/intern/cycles/kernel/geom/geom_curve.h
+++ b/intern/cycles/kernel/geom/geom_curve.h
@@ -255,17 +255,6 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
 		int ka = max(k0 - 1, v00.x);
 		int kb = min(k1 + 1, v00.x + v00.y - 1);

-#ifdef __KERNEL_AVX2__
-		avxf P_curve_0_1, P_curve_2_3;
-		if(type & PRIMITIVE_CURVE) {
-			P_curve_0_1 = _mm256_loadu2_m128(&kg->__curve_keys.data[k0].x, &kg->__curve_keys.data[ka].x);
-			P_curve_2_3 = _mm256_loadu2_m128(&kg->__curve_keys.data[kb].x, &kg->__curve_keys.data[k1].x);
-		}
-		else {
-			int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
-			motion_cardinal_curve_keys_avx(kg, fobject, prim, time, ka, k0, k1, kb, &P_curve_0_1,&P_curve_2_3);
-		}
-#else  /* __KERNEL_AVX2__ */
 		ssef P_curve[4];

 		if(type & PRIMITIVE_CURVE) {
@@ -278,7 +267,6 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
 			int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object;
 			motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, (float4*)&P_curve);
 		}
-#endif  /* __KERNEL_AVX2__ */

 		ssef rd_sgn = set_sign_bit<0, 1, 1, 1>(shuffle<0>(rd_ss));
 		ssef mul_zxxy = shuffle<2, 0, 0, 1>(vdir) * rd_sgn;
@@ -290,33 +278,6 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
 		ssef htfm1 = shuffle<1, 0, 1, 3>(load1f_first(extract<0>(d_ss)), vdir0);
 		ssef htfm2 = shuffle<1, 3, 2, 3>(mul_shuf, vdir0);

-#ifdef __KERNEL_AVX2__
-		const avxf vPP = _mm256_broadcast_ps(&P.m128);
-		const avxf htfm00 = avxf(htfm0.m128, htfm0.m128);
-		const avxf htfm11 = avxf(htfm1.m128, htfm1.m128);
-		const avxf htfm22 = avxf(htfm2.m128, htfm2.m128);
-
-		const avxf p01 = madd(shuffle<0>(P_curve_0_1 - vPP),
-		                      htfm00,
-		                      madd(shuffle<1>(P_curve_0_1 - vPP),
-		                           htfm11,
-		                           shuffle<2>(P_curve_0_1 - vPP) * htfm22));
-		const avxf p23 = madd(shuffle<0>(P_curve_2_3 - vPP),
-		                      htfm00,
-		                      madd(shuffle<1>(P_curve_2_3 - vPP),
-		                           htfm11,
-		                           shuffle<2>(P_curve_2_3 - vPP)*htfm22));
-
-		const ssef p0 = _mm256_castps256_ps128(p01);
-		const ssef p1 = _mm256_extractf128_ps(p01, 1);
-		const ssef p2 = _mm256_castps256_ps128(p23);
-		const ssef p3 = _mm256_extractf128_ps(p23, 1);
-
-		const ssef P_curve_1 = _mm256_extractf128_ps(P_curve_0_1, 1);
-		r_st = ((float4 &)P_curve_1).w;
-		const ssef P_curve_2 = _mm256_castps256_ps128(P_curve_2_3);
-		r_en = ((float4 &)P_curve_2).w;
-#else  /* __KERNEL_AVX2__ */
 		ssef htfm[] = { htfm0, htfm1, htfm2 };
 		ssef vP = load4f(P);
 		ssef p0 = transform_point_T3(htfm, P_curve[0] - vP);
@@ -324,10 +285,6 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
 		ssef p2 = transform_point_T3(htfm, P_curve[2] - vP);
 		ssef p3 = transform_point_T3(htfm, P_curve[3] - vP);

-		r_st = ((float4 &)P_curve[1]).w;
-		r_en = ((float4 &)P_curve[2]).w;
-#endif  /* __KERNEL_AVX2__ */
-
 		float fc = 0.71f;
 		ssef vfc = ssef(fc);
 		ssef vfcxp3 = vfc * p3;
@@ -337,6 +294,8 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
 		vcurve_coef[2] = madd(ssef(fc * 2.0f), p0, madd(ssef(fc - 3.0f), p1, msub(ssef(3.0f - 2.0f * fc), p2, vfcxp3)));
 		vcurve_coef[3] = msub(ssef(fc - 2.0f), p2 - p1, msub(vfc, p0, vfcxp3));

+		r_st = ((float4 &)P_curve[1]).w;
+		r_en = ((float4 &)P_curve[2]).w;
 	}
 #else
 	float3 curve_coef[4];
@@ -424,9 +383,8 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte

 	/* begin loop */
 	while(!(tree >> (depth))) {
-		const float i_st = tree * resol;
-		const float i_en = i_st + (level * resol);
-
+		float i_st = tree * resol;
+		float i_en = i_st + (level * resol);
 #ifdef __KERNEL_SSE2__
 		ssef vi_st = ssef(i_st), vi_en = ssef(i_en);
 		ssef vp_st = madd(madd(madd(vcurve_coef[3], vi_st, vcurve_coef[2]), vi_st, vcurve_coef[1]), vi_st, vcurve_coef[0]);
@@ -500,23 +458,13 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte

 			if(flags & CURVE_KN_RIBBONS) {
 				float3 tg = (p_en - p_st);
-#ifdef __KERNEL_SSE__
-				const float3 tg_sq = tg * tg;
-				float w = tg_sq.x + tg_sq.y;
-#else
 				float w = tg.x * tg.x + tg.y * tg.y;
-#endif
 				if(w == 0) {
 					tree++;
 					level = tree & -tree;
 					continue;
 				}
-#ifdef __KERNEL_SSE__
-				const float3 p_sttg = p_st * tg;
-				w = -(p_sttg.x + p_sttg.y) / w;
-#else
 				w = -(p_st.x * tg.x + p_st.y * tg.y) / w;
-#endif
 				w = saturate(w);

 				/* compute u on the curve segment */
@@ -548,13 +496,7 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
 				if(difl != 0.0f) {
 					mw_extension = min(difl * fabsf(bmaxz), extmax);
 					r_ext = mw_extension + r_curr;
-#ifdef __KERNEL_SSE__
-					const float3 p_curr_sq = p_curr * p_curr;
-					const float3 dxxx = _mm_sqrt_ss(_mm_hadd_ps(p_curr_sq.m128, p_curr_sq.m128));
-					float d = dxxx.x;
-#else
 					float d = sqrtf(p_curr.x * p_curr.x + p_curr.y * p_curr.y);
-#endif
 					float d0 = d - r_curr;
 					float d1 = d + r_curr;
 					float inv_mw_extension = 1.0f/mw_extension;
@@ -911,7 +853,7 @@ ccl_device_forceinline bool bvh_curve_intersect(KernelGlobals *kg, Intersection
 #  undef len3_squared
 #  undef len3
 #  undef dot3
-#endif
+#  endif
 }

 ccl_device_inline float3 curvetangent(float t, float3 p0, float3 p1, float3 p2, float3 p3)
--- a/intern/cycles/kernel/geom/geom_motion_curve.h
+++ b/intern/cycles/kernel/geom/geom_motion_curve.h
@@ -50,12 +50,12 @@ ccl_device_inline int find_attribute_curve_motion(KernelGlobals *kg, int object,
 ccl_device_inline void motion_curve_keys_for_step(KernelGlobals *kg, int offset, int numkeys, int numsteps, int step, int k0, int k1, float4 keys[2])
 {
 	if(step == numsteps) {
-		/* center step: regular key location */
+		/* center step: regular vertex location */
 		keys[0] = kernel_tex_fetch(__curve_keys, k0);
 		keys[1] = kernel_tex_fetch(__curve_keys, k1);
 	}
 	else {
-		/* center step is not stored in this array */
+		/* center step not stored in this array */
 		if(step > numsteps)
 			step--;

@@ -97,14 +97,14 @@ ccl_device_inline void motion_curve_keys(KernelGlobals *kg, int object, int prim
 ccl_device_inline void motion_cardinal_curve_keys_for_step(KernelGlobals *kg, int offset, int numkeys, int numsteps, int step, int k0, int k1, int k2, int k3, float4 keys[4])
 {
 	if(step == numsteps) {
-		/* center step: regular key location */
+		/* center step: regular vertex location */
 		keys[0] = kernel_tex_fetch(__curve_keys, k0);
 		keys[1] = kernel_tex_fetch(__curve_keys, k1);
 		keys[2] = kernel_tex_fetch(__curve_keys, k2);
 		keys[3] = kernel_tex_fetch(__curve_keys, k3);
 	}
 	else {
-		/* center step is not stored in this array */
+		/* center step not store in this array */
 		if(step > numsteps)
 			step--;

@@ -118,12 +118,7 @@ ccl_device_inline void motion_cardinal_curve_keys_for_step(KernelGlobals *kg, in
 }

 /* return 2 curve key locations */
-ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg,
-                                                  int object,
-                                                  int prim,
-                                                  float time,
-                                                  int k0, int k1, int k2, int k3,
-                                                  float4 keys[4])
+ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg, int object, int prim, float time, int k0, int k1, int k2, int k3, float4 keys[4])
 {
 	/* get motion info */
 	int numsteps, numkeys;
@@ -152,65 +147,6 @@ ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg,
 	keys[3] = (1.0f - t)*keys[3] + t*next_keys[3];
 }

-#ifdef __KERNEL_AVX2__
-/* Similar to above, but returns keys as pair of two AVX registers with each
- * holding two float4.
- */
-ccl_device_inline void motion_cardinal_curve_keys_avx(KernelGlobals *kg,
-                                                      int object,
-                                                      int prim,
-                                                      float time,
-                                                      int k0, int k1,
-                                                      int k2, int k3,
-                                                      avxf *out_keys_0_1,
-                                                      avxf *out_keys_2_3)
-{
-	/* Get motion info. */
-	int numsteps, numkeys;
-	object_motion_info(kg, object, &numsteps, NULL, &numkeys);
-
-	/* Figure out which steps we need to fetch and their interpolation factor. */
-	int maxstep = numsteps * 2;
-	int step = min((int)(time*maxstep), maxstep - 1);
-	float t = time*maxstep - step;
-
-	/* Find attribute. */
-	AttributeElement elem;
-	int offset = find_attribute_curve_motion(kg,
-	                                         object,
-	                                         ATTR_STD_MOTION_VERTEX_POSITION,
-	                                         &elem);
-	kernel_assert(offset != ATTR_STD_NOT_FOUND);
-
-	/* Fetch key coordinates. */
-	float4 next_keys[4];
-	float4 keys[4];
-	motion_cardinal_curve_keys_for_step(kg,
-	                                    offset,
-	                                    numkeys,
-	                                    numsteps,
-	                                    step,
-	                                    k0, k1, k2, k3,
-	                                    keys);
-	motion_cardinal_curve_keys_for_step(kg,
-	                                    offset,
-	                                    numkeys,
-	                                    numsteps,
-	                                    step + 1,
-	                                    k0, k1, k2, k3,
-	                                    next_keys);
-
-	const avxf keys_0_1 = avxf(keys[0].m128, keys[1].m128);
-	const avxf keys_2_3 = avxf(keys[2].m128, keys[3].m128);
-	const avxf next_keys_0_1 = avxf(next_keys[0].m128, next_keys[1].m128);
-	const avxf next_keys_2_3 = avxf(next_keys[2].m128, next_keys[3].m128);
-
-	/* Interpolate between steps. */
-	*out_keys_0_1 = (1.0f - t) * keys_0_1 + t*next_keys_0_1;
-	*out_keys_2_3 = (1.0f - t) * keys_2_3 + t*next_keys_2_3;
-}
-#endif
-
 #endif

 CCL_NAMESPACE_END
--- a/intern/cycles/kernel/geom/geom_motion_triangle.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle.h
@@ -76,7 +76,7 @@ ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals *kg, uint4
 		normals[2] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z));
 	}
 	else {
-		/* center step is not stored in this array */
+		/* center step not stored in this array */
 		if(step > numsteps)
 			step--;

@@ -117,4 +117,312 @@ ccl_device_inline void motion_triangle_vertices(KernelGlobals *kg, int object, i
 	verts[2] = (1.0f - t)*verts[2] + t*next_verts[2];
 }

+/* Refine triangle intersection to more precise hit point. For rays that travel
+ * far the precision is often not so good, this reintersects the primitive from
+ * a closer distance. */
+
+ccl_device_inline float3 motion_triangle_refine(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, float3 verts[3])
+{
+	float3 P = ray->P;
+	float3 D = ray->D;
+	float t = isect->t;
+
+#ifdef __INTERSECTION_REFINE__
+	if(isect->object != OBJECT_NONE) {
+		if(UNLIKELY(t == 0.0f)) {
+			return P;
+		}
+#  ifdef __OBJECT_MOTION__
+		Transform tfm = ccl_fetch(sd, ob_itfm);
+#  else
+		Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
+#  endif
+
+		P = transform_point(&tfm, P);
+		D = transform_direction(&tfm, D*t);
+		D = normalize_len(D, &t);
+	}
+
+	P = P + D*t;
+
+	/* compute refined intersection distance */
+	const float3 e1 = verts[0] - verts[2];
+	const float3 e2 = verts[1] - verts[2];
+	const float3 s1 = cross(D, e2);
+
+	const float invdivisor = 1.0f/dot(s1, e1);
+	const float3 d = P - verts[2];
+	const float3 s2 = cross(d, e1);
+	float rt = dot(e2, s2)*invdivisor;
+
+	/* compute refined position */
+	P = P + D*rt;
+
+	if(isect->object != OBJECT_NONE) {
+#  ifdef __OBJECT_MOTION__
+		Transform tfm = ccl_fetch(sd, ob_tfm);
+#  else
+		Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
+#  endif
+
+		P = transform_point(&tfm, P);
+	}
+
+	return P;
+#else
+	return P + D*t;
+#endif
+}
+
+/* Same as above, except that isect->t is assumed to be in object space for instancing */
+
+#ifdef __SUBSURFACE__
+#  if defined(__KERNEL_CUDA__) && (defined(i386) || defined(_M_IX86))
+ccl_device_noinline
+#  else
+ccl_device_inline
+#  endif
+float3 motion_triangle_refine_subsurface(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, float3 verts[3])
+{
+	float3 P = ray->P;
+	float3 D = ray->D;
+	float t = isect->t;
+
+#  ifdef __INTERSECTION_REFINE__
+	if(isect->object != OBJECT_NONE) {
+#    ifdef __OBJECT_MOTION__
+		Transform tfm = ccl_fetch(sd, ob_itfm);
+#    else
+		Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
+#    endif
+
+		P = transform_point(&tfm, P);
+		D = transform_direction(&tfm, D);
+		D = normalize(D);
+	}
+
+	P = P + D*t;
+
+	/* compute refined intersection distance */
+	const float3 e1 = verts[0] - verts[2];
+	const float3 e2 = verts[1] - verts[2];
+	const float3 s1 = cross(D, e2);
+
+	const float invdivisor = 1.0f/dot(s1, e1);
+	const float3 d = P - verts[2];
+	const float3 s2 = cross(d, e1);
+	float rt = dot(e2, s2)*invdivisor;
+
+	P = P + D*rt;
+
+	if(isect->object != OBJECT_NONE) {
+#    ifdef __OBJECT_MOTION__
+		Transform tfm = ccl_fetch(sd, ob_tfm);
+#    else
+		Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
+#    endif
+
+		P = transform_point(&tfm, P);
+	}
+
+	return P;
+#  else
+	return P + D*t;
+#  endif
+}
+#endif
+
+/* Setup of motion triangle specific parts of ShaderData, moved into this one
+ * function to more easily share computation of interpolated positions and
+ * normals */
+
+/* return 3 triangle vertex normals */
+ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, bool subsurface)
+{
+	/* get shader */
+	ccl_fetch(sd, shader) = kernel_tex_fetch(__tri_shader, ccl_fetch(sd, prim));
+
+	/* get motion info */
+	int numsteps, numverts;
+	object_motion_info(kg, ccl_fetch(sd, object), &numsteps, &numverts, NULL);
+
+	/* figure out which steps we need to fetch and their interpolation factor */
+	int maxstep = numsteps*2;
+	int step = min((int)(ccl_fetch(sd, time)*maxstep), maxstep-1);
+	float t = ccl_fetch(sd, time)*maxstep - step;
+
+	/* find attribute */
+	AttributeElement elem;
+	int offset = find_attribute_motion(kg, ccl_fetch(sd, object), ATTR_STD_MOTION_VERTEX_POSITION, &elem);
+	kernel_assert(offset != ATTR_STD_NOT_FOUND);
+
+	/* fetch vertex coordinates */
+	float3 verts[3], next_verts[3];
+	uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim));
+
+	motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
+	motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_verts);
+
+	/* interpolate between steps */
+	verts[0] = (1.0f - t)*verts[0] + t*next_verts[0];
+	verts[1] = (1.0f - t)*verts[1] + t*next_verts[1];
+	verts[2] = (1.0f - t)*verts[2] + t*next_verts[2];
+
+	/* compute refined position */
+#ifdef __SUBSURFACE__
+	if(!subsurface)
+#endif
+		ccl_fetch(sd, P) = motion_triangle_refine(kg, sd, isect, ray, verts);
+#ifdef __SUBSURFACE__
+	else
+		ccl_fetch(sd, P) = motion_triangle_refine_subsurface(kg, sd, isect, ray, verts);
+#endif
+
+	/* compute face normal */
+	float3 Ng;
+	if(ccl_fetch(sd, flag) & SD_NEGATIVE_SCALE_APPLIED)
+		Ng = normalize(cross(verts[2] - verts[0], verts[1] - verts[0]));
+	else
+		Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0]));
+
+	ccl_fetch(sd, Ng) = Ng;
+	ccl_fetch(sd, N) = Ng;
+
+	/* compute derivatives of P w.r.t. uv */
+#ifdef __DPDU__
+	ccl_fetch(sd, dPdu) = (verts[0] - verts[2]);
+	ccl_fetch(sd, dPdv) = (verts[1] - verts[2]);
+#endif
+
+	/* compute smooth normal */
+	if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) {
+		/* find attribute */
+		AttributeElement elem;
+		int offset = find_attribute_motion(kg, ccl_fetch(sd, object), ATTR_STD_MOTION_VERTEX_NORMAL, &elem);
+		kernel_assert(offset != ATTR_STD_NOT_FOUND);
+
+		/* fetch vertex coordinates */
+		float3 normals[3], next_normals[3];
+		motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals);
+		motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_normals);
+
+		/* interpolate between steps */
+		normals[0] = (1.0f - t)*normals[0] + t*next_normals[0];
+		normals[1] = (1.0f - t)*normals[1] + t*next_normals[1];
+		normals[2] = (1.0f - t)*normals[2] + t*next_normals[2];
+
+		/* interpolate between vertices */
+		float u = ccl_fetch(sd, u);
+		float v = ccl_fetch(sd, v);
+		float w = 1.0f - u - v;
+		ccl_fetch(sd, N) = (u*normals[0] + v*normals[1] + w*normals[2]);
+	}
+}
+
+/* Ray intersection. We simply compute the vertex positions at the given ray
+ * time and do a ray intersection with the resulting triangle */
+
+ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg, Intersection *isect,
+	float3 P, float3 dir, float time, uint visibility, int object, int triAddr)
+{
+	/* primitive index for vertex location lookup */
+	int prim = kernel_tex_fetch(__prim_index, triAddr);
+	int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, triAddr): object;
+
+	/* get vertex locations for intersection */
+	float3 verts[3];
+	motion_triangle_vertices(kg, fobject, prim, time, verts);
+
+	/* ray-triangle intersection, unoptimized */
+	float t, u, v;
+
+	if(ray_triangle_intersect_uv(P, dir, isect->t, verts[2], verts[0], verts[1], &u, &v, &t)) {
+#ifdef __VISIBILITY_FLAG__
+		/* visibility flag test. we do it here under the assumption
+		 * that most triangles are culled by node flags */
+		if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility)
+#endif
+		{
+			isect->t = t;
+			isect->u = u;
+			isect->v = v;
+			isect->prim = triAddr;
+			isect->object = object;
+			isect->type = PRIMITIVE_MOTION_TRIANGLE;
+		
+			return true;
+		}
+	}
+
+	return false;
+}
+
+/* Special ray intersection routines for subsurface scattering. In that case we
+ * only want to intersect with primitives in the same object, and if case of
+ * multiple hits we pick a single random primitive as the intersection point. */
+
+#ifdef __SUBSURFACE__
+ccl_device_inline void motion_triangle_intersect_subsurface(
+        KernelGlobals *kg,
+        SubsurfaceIntersection *ss_isect,
+        float3 P,
+        float3 dir,
+        float time,
+        int object,
+        int triAddr,
+        float tmax,
+        uint *lcg_state,
+        int max_hits)
+{
+	/* primitive index for vertex location lookup */
+	int prim = kernel_tex_fetch(__prim_index, triAddr);
+	int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, triAddr): object;
+
+	/* get vertex locations for intersection */
+	float3 verts[3];
+	motion_triangle_vertices(kg, fobject, prim, time, verts);
+
+	/* ray-triangle intersection, unoptimized */
+	float t, u, v;
+
+	if(ray_triangle_intersect_uv(P, dir, tmax, verts[2], verts[0], verts[1], &u, &v, &t)) {
+		for(int i = min(max_hits, ss_isect->num_hits) - 1; i >= 0; --i) {
+			if(ss_isect->hits[i].t == t) {
+				return;
+			}
+		}
+
+		ss_isect->num_hits++;
+
+		int hit;
+
+		if(ss_isect->num_hits <= max_hits) {
+			hit = ss_isect->num_hits - 1;
+		}
+		else {
+			/* reservoir sampling: if we are at the maximum number of
+			 * hits, randomly replace element or skip it */
+			hit = lcg_step_uint(lcg_state) % ss_isect->num_hits;
+
+			if(hit >= max_hits)
+				return;
+		}
+
+		/* record intersection */
+		Intersection *isect = &ss_isect->hits[hit];
+		isect->t = t;
+		isect->u = u;
+		isect->v = v;
+		isect->prim = triAddr;
+		isect->object = object;
+		isect->type = PRIMITIVE_MOTION_TRIANGLE;
+
+		/* Record geometric normal. */
+		ss_isect->Ng[hit] = normalize(cross(verts[1] - verts[0],
+		                                    verts[2] - verts[0]));
+	}
+}
+#endif
+
 CCL_NAMESPACE_END
+
--- a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
@@ -1,280 +0,0 @@
-/*
- * Copyright 2011-2016 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Motion Triangle Primitive
- *
- * These are stored as regular triangles, plus extra positions and normals at
- * times other than the frame center. Computing the triangle vertex positions
- * or normals at a given ray time is a matter of interpolation of the two steps
- * between which the ray time lies.
- *
- * The extra positions and normals are stored as ATTR_STD_MOTION_VERTEX_POSITION
- * and ATTR_STD_MOTION_VERTEX_NORMAL mesh attributes.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Refine triangle intersection to more precise hit point. For rays that travel
- * far the precision is often not so good, this reintersects the primitive from
- * a closer distance.
- */
-
-ccl_device_inline float3 motion_triangle_refine(KernelGlobals *kg,
-                                                ShaderData *sd,
-                                                const Intersection *isect,
-                                                const Ray *ray,
-                                                float3 verts[3])
-{
-	float3 P = ray->P;
-	float3 D = ray->D;
-	float t = isect->t;
-
-#ifdef __INTERSECTION_REFINE__
-	if(isect->object != OBJECT_NONE) {
-		if(UNLIKELY(t == 0.0f)) {
-			return P;
-		}
-#  ifdef __OBJECT_MOTION__
-		Transform tfm = ccl_fetch(sd, ob_itfm);
-#  else
-		Transform tfm = object_fetch_transform(kg,
-		                                       isect->object,
-		                                       OBJECT_INVERSE_TRANSFORM);
-#  endif
-
-		P = transform_point(&tfm, P);
-		D = transform_direction(&tfm, D*t);
-		D = normalize_len(D, &t);
-	}
-
-	P = P + D*t;
-
-	/* Compute refined intersection distance. */
-	const float3 e1 = verts[0] - verts[2];
-	const float3 e2 = verts[1] - verts[2];
-	const float3 s1 = cross(D, e2);
-
-	const float invdivisor = 1.0f/dot(s1, e1);
-	const float3 d = P - verts[2];
-	const float3 s2 = cross(d, e1);
-	float rt = dot(e2, s2)*invdivisor;
-
-	/* Compute refined position. */
-	P = P + D*rt;
-
-	if(isect->object != OBJECT_NONE) {
-#  ifdef __OBJECT_MOTION__
-		Transform tfm = ccl_fetch(sd, ob_tfm);
-#  else
-		Transform tfm = object_fetch_transform(kg,
-		                                       isect->object,
-		                                       OBJECT_TRANSFORM);
-#  endif
-
-		P = transform_point(&tfm, P);
-	}
-
-	return P;
-#else
-	return P + D*t;
-#endif
-}
-
-/* Same as above, except that isect->t is assumed to be in object space
- * for instancing.
- */
-
-#ifdef __SUBSURFACE__
-#  if defined(__KERNEL_CUDA__) && (defined(i386) || defined(_M_IX86))
-ccl_device_noinline
-#  else
-ccl_device_inline
-#  endif
-float3 motion_triangle_refine_subsurface(KernelGlobals *kg,
-                                         ShaderData *sd,
-                                         const Intersection *isect,
-                                         const Ray *ray,
-                                         float3 verts[3])
-{
-	float3 P = ray->P;
-	float3 D = ray->D;
-	float t = isect->t;
-
-#  ifdef __INTERSECTION_REFINE__
-	if(isect->object != OBJECT_NONE) {
-#    ifdef __OBJECT_MOTION__
-		Transform tfm = ccl_fetch(sd, ob_itfm);
-#    else
-		Transform tfm = object_fetch_transform(kg,
-		                                       isect->object,
-		                                       OBJECT_INVERSE_TRANSFORM);
-#    endif
-
-		P = transform_point(&tfm, P);
-		D = transform_direction(&tfm, D);
-		D = normalize(D);
-	}
-
-	P = P + D*t;
-
-	/* compute refined intersection distance */
-	const float3 e1 = verts[0] - verts[2];
-	const float3 e2 = verts[1] - verts[2];
-	const float3 s1 = cross(D, e2);
-
-	const float invdivisor = 1.0f/dot(s1, e1);
-	const float3 d = P - verts[2];
-	const float3 s2 = cross(d, e1);
-	float rt = dot(e2, s2)*invdivisor;
-
-	P = P + D*rt;
-
-	if(isect->object != OBJECT_NONE) {
-#    ifdef __OBJECT_MOTION__
-		Transform tfm = ccl_fetch(sd, ob_tfm);
-#    else
-		Transform tfm = object_fetch_transform(kg,
-		                                       isect->object,
-		                                       OBJECT_TRANSFORM);
-#    endif
-
-		P = transform_point(&tfm, P);
-	}
-
-	return P;
-#  else  /* __INTERSECTION_REFINE__ */
-	return P + D*t;
-#  endif  /* __INTERSECTION_REFINE__ */
-}
-#endif  /* __SUBSURFACE__ */
-
-
-/* Ray intersection. We simply compute the vertex positions at the given ray
- * time and do a ray intersection with the resulting triangle.
- */
-
-ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg,
-                                                 Intersection *isect,
-                                                 float3 P,
-                                                 float3 dir,
-                                                 float time,
-                                                 uint visibility,
-                                                 int object,
-                                                 int prim_addr)
-{
-	/* Primitive index for vertex location lookup. */
-	int prim = kernel_tex_fetch(__prim_index, prim_addr);
-	int fobject = (object == OBJECT_NONE)
-	                  ? kernel_tex_fetch(__prim_object, prim_addr)
-	                  : object;
-	/* Get vertex locations for intersection. */
-	float3 verts[3];
-	motion_triangle_vertices(kg, fobject, prim, time, verts);
-	/* Ray-triangle intersection, unoptimized. */
-	float t, u, v;
-	if(ray_triangle_intersect_uv(P,
-	                             dir,
-	                             isect->t,
-	                             verts[2], verts[0], verts[1],
-	                             &u, &v, &t))
-	{
-#ifdef __VISIBILITY_FLAG__
-		/* Visibility flag test. we do it here under the assumption
-		 * that most triangles are culled by node flags.
-		 */
-		if(kernel_tex_fetch(__prim_visibility, prim_addr) & visibility)
-#endif
-		{
-			isect->t = t;
-			isect->u = u;
-			isect->v = v;
-			isect->prim = prim_addr;
-			isect->object = object;
-			isect->type = PRIMITIVE_MOTION_TRIANGLE;
-			return true;
-		}
-	}
-	return false;
-}
-
-/* Special ray intersection routines for subsurface scattering. In that case we
- * only want to intersect with primitives in the same object, and if case of
- * multiple hits we pick a single random primitive as the intersection point.
- */
-#ifdef __SUBSURFACE__
-ccl_device_inline void motion_triangle_intersect_subsurface(
-        KernelGlobals *kg,
-        SubsurfaceIntersection *ss_isect,
-        float3 P,
-        float3 dir,
-        float time,
-        int object,
-        int prim_addr,
-        float tmax,
-        uint *lcg_state,
-        int max_hits)
-{
-	/* Primitive index for vertex location lookup. */
-	int prim = kernel_tex_fetch(__prim_index, prim_addr);
-	int fobject = (object == OBJECT_NONE)
-	                  ? kernel_tex_fetch(__prim_object, prim_addr)
-	                  : object;
-	/* Get vertex locations for intersection. */
-	float3 verts[3];
-	motion_triangle_vertices(kg, fobject, prim, time, verts);
-	/* Ray-triangle intersection, unoptimized. */
-	float t, u, v;
-	if(ray_triangle_intersect_uv(P,
-	                             dir,
-	                             tmax,
-	                             verts[2], verts[0], verts[1],
-	                             &u, &v, &t))
-	{
-		for(int i = min(max_hits, ss_isect->num_hits) - 1; i >= 0; --i) {
-			if(ss_isect->hits[i].t == t) {
-				return;
-			}
-		}
-		ss_isect->num_hits++;
-		int hit;
-		if(ss_isect->num_hits <= max_hits) {
-			hit = ss_isect->num_hits - 1;
-		}
-		else {
-			/* Reservoir sampling: if we are at the maximum number of
-			 * hits, randomly replace element or skip it.
-			 */
-			hit = lcg_step_uint(lcg_state) % ss_isect->num_hits;
-
-			if(hit >= max_hits)
-				return;
-		}
-		/* Record intersection. */
-		Intersection *isect = &ss_isect->hits[hit];
-		isect->t = t;
-		isect->u = u;
-		isect->v = v;
-		isect->prim = prim_addr;
-		isect->object = object;
-		isect->type = PRIMITIVE_MOTION_TRIANGLE;
-		/* Record geometric normal. */
-		ss_isect->Ng[hit] = normalize(cross(verts[1] - verts[0],
-		                                    verts[2] - verts[0]));
-	}
-}
-#endif  /* __SUBSURFACE__ */
-
-CCL_NAMESPACE_END
--- a/intern/cycles/kernel/geom/geom_motion_triangle_shader.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle_shader.h
@@ -1,123 +0,0 @@
-/*
- * Copyright 2011-2016 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Motion Triangle Primitive
- *
- * These are stored as regular triangles, plus extra positions and normals at
- * times other than the frame center. Computing the triangle vertex positions
- * or normals at a given ray time is a matter of interpolation of the two steps
- * between which the ray time lies.
- *
- * The extra positions and normals are stored as ATTR_STD_MOTION_VERTEX_POSITION
- * and ATTR_STD_MOTION_VERTEX_NORMAL mesh attributes.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Setup of motion triangle specific parts of ShaderData, moved into this one
- * function to more easily share computation of interpolated positions and
- * normals */
-
-/* return 3 triangle vertex normals */
-ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg,
-                                                      ShaderData *sd, const
-                                                      Intersection *isect,
-                                                      const Ray *ray,
-                                                      bool subsurface)
-{
-	/* Get shader. */
-	ccl_fetch(sd, shader) = kernel_tex_fetch(__tri_shader, ccl_fetch(sd, prim));
-	/* Get motion info. */
-	/* TODO(sergey): This logic is really similar to motion_triangle_vertices(),
-	 * can we de-duplicate something here?
-	 */
-	int numsteps, numverts;
-	object_motion_info(kg, ccl_fetch(sd, object), &numsteps, &numverts, NULL);
-	/* Figure out which steps we need to fetch and their interpolation factor. */
-	int maxstep = numsteps*2;
-	int step = min((int)(ccl_fetch(sd, time)*maxstep), maxstep-1);
-	float t = ccl_fetch(sd, time)*maxstep - step;
-	/* Find attribute. */
-	AttributeElement elem;
-	int offset = find_attribute_motion(kg, ccl_fetch(sd, object),
-	                                   ATTR_STD_MOTION_VERTEX_POSITION,
-	                                   &elem);
-	kernel_assert(offset != ATTR_STD_NOT_FOUND);
-	/* Fetch vertex coordinates. */
-	float3 verts[3], next_verts[3];
-	uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim));
-	motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
-	motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_verts);
-	/* Interpolate between steps. */
-	verts[0] = (1.0f - t)*verts[0] + t*next_verts[0];
-	verts[1] = (1.0f - t)*verts[1] + t*next_verts[1];
-	verts[2] = (1.0f - t)*verts[2] + t*next_verts[2];
-	/* Compute refined position. */
-#ifdef __SUBSURFACE__
-	if(subsurface) {
-		ccl_fetch(sd, P) = motion_triangle_refine_subsurface(kg,
-		                                                     sd,
-		                                                     isect,
-		                                                     ray,
-		                                                     verts);
-	}
-	else
-#endif  /*  __SUBSURFACE__*/
-	{
-		ccl_fetch(sd, P) = motion_triangle_refine(kg, sd, isect, ray, verts);
-	}
-	/* Compute face normal. */
-	float3 Ng;
-	if(ccl_fetch(sd, object_flag) & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
-		Ng = normalize(cross(verts[2] - verts[0], verts[1] - verts[0]));
-	}
-	else {
-		Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0]));
-	}
-	ccl_fetch(sd, Ng) = Ng;
-	ccl_fetch(sd, N) = Ng;
-	/* Compute derivatives of P w.r.t. uv. */
-#ifdef __DPDU__
-	ccl_fetch(sd, dPdu) = (verts[0] - verts[2]);
-	ccl_fetch(sd, dPdv) = (verts[1] - verts[2]);
-#endif
-	/* Compute smooth normal. */
-	if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) {
-		/* Find attribute. */
-		AttributeElement elem;
-		int offset = find_attribute_motion(kg,
-		                                   ccl_fetch(sd, object),
-		                                   ATTR_STD_MOTION_VERTEX_NORMAL,
-		                                   &elem);
-		kernel_assert(offset != ATTR_STD_NOT_FOUND);
-		/* Fetch vertex coordinates. */
-		float3 normals[3], next_normals[3];
-		motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals);
-		motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_normals);
-		/* Interpolate between steps. */
-		normals[0] = (1.0f - t)*normals[0] + t*next_normals[0];
-		normals[1] = (1.0f - t)*normals[1] + t*next_normals[1];
-		normals[2] = (1.0f - t)*normals[2] + t*next_normals[2];
-		/* Interpolate between vertices. */
-		float u = ccl_fetch(sd, u);
-		float v = ccl_fetch(sd, v);
-		float w = 1.0f - u - v;
-		ccl_fetch(sd, N) = (u*normals[0] + v*normals[1] + w*normals[2]);
-	}
-}
-
-CCL_NAMESPACE_END
-
--- a/intern/cycles/kernel/geom/geom_object.h
+++ b/intern/cycles/kernel/geom/geom_object.h
@@ -113,6 +113,7 @@ ccl_device_inline Transform object_fetch_transform_motion(KernelGlobals *kg, int
 ccl_device_inline Transform object_fetch_transform_motion_test(KernelGlobals *kg, int object, float time, Transform *itfm)
 {
 	int object_flag = kernel_tex_fetch(__object_flag, object);
+
 	if(object_flag & SD_OBJECT_MOTION) {
 		/* if we do motion blur */
 		Transform tfm = object_fetch_transform_motion(kg, object, time);
--- a/intern/cycles/kernel/geom/geom_primitive.h
+++ b/intern/cycles/kernel/geom/geom_primitive.h
@@ -157,9 +157,8 @@ ccl_device_inline float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *
 	if(is_curve_primitive) {
 		center = curve_motion_center_location(kg, sd);

-		if(!(ccl_fetch(sd, object_flag) & SD_OBJECT_TRANSFORM_APPLIED)) {
+		if(!(ccl_fetch(sd, flag) & SD_TRANSFORM_APPLIED))
 			object_position_transform(kg, sd, &center);
-		}
 	}
 	else
 #endif
@@ -182,7 +181,7 @@ ccl_device_inline float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *
 		motion_post = primitive_attribute_float3(kg, sd, desc, NULL, NULL);

 #ifdef __HAIR__
-		if(is_curve_primitive && (ccl_fetch(sd, object_flag) & SD_OBJECT_HAS_VERTEX_MOTION) == 0) {
+		if(is_curve_primitive && (ccl_fetch(sd, flag) & SD_OBJECT_HAS_VERTEX_MOTION) == 0) {
 			object_position_transform(kg, sd, &motion_pre);
 			object_position_transform(kg, sd, &motion_post);
 		}
--- a/intern/cycles/kernel/geom/geom_triangle.h
+++ b/intern/cycles/kernel/geom/geom_triangle.h
@@ -32,12 +32,10 @@ ccl_device_inline float3 triangle_normal(KernelGlobals *kg, ShaderData *sd)
 	const float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2));

 	/* return normal */
-	if(ccl_fetch(sd, object_flag) & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
+	if(ccl_fetch(sd, flag) & SD_NEGATIVE_SCALE_APPLIED)
 		return normalize(cross(v2 - v0, v1 - v0));
-	}
-	else {
+	else
 		return normalize(cross(v1 - v0, v2 - v0));
-	}
 }

 /* point and normal on triangle  */
@@ -48,18 +46,20 @@ ccl_device_inline void triangle_point_normal(KernelGlobals *kg, int object, int
 	float3 v0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0));
 	float3 v1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1));
 	float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2));
+
 	/* compute point */
 	float t = 1.0f - u - v;
 	*P = (u*v0 + v*v1 + t*v2);
+
 	/* get object flags */
 	int object_flag = kernel_tex_fetch(__object_flag, object);
+
 	/* compute normal */
-	if(object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
+	if(object_flag & SD_NEGATIVE_SCALE_APPLIED)
 		*Ng = normalize(cross(v2 - v0, v1 - v0));
-	}
-	else {
+	else
 		*Ng = normalize(cross(v1 - v0, v2 - v0));
-	}
+
 	/* shader`*/
 	*shader = kernel_tex_fetch(__tri_shader, prim);
 }
--- a/intern/cycles/kernel/geom/geom_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h
@@ -108,7 +108,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
                                          float3 P,
                                          uint visibility,
                                          int object,
-                                          int prim_addr)
+                                          int triAddr)
 {
 	const int kx = isect_precalc->kx;
 	const int ky = isect_precalc->ky;
@@ -118,7 +118,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
 	const float Sz = isect_precalc->Sz;

 	/* Calculate vertices relative to ray origin. */
-	const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
+	const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, triAddr);

 #if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
 	const avxf avxf_P(P.m128, P.m128);
@@ -129,10 +129,10 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
 	const avxf AB = tri_ab - avxf_P;
 	const avxf BC = tri_bc - avxf_P;

-	const __m256i permute_mask = _mm256_set_epi32(0x3, kz, ky, kx, 0x3, kz, ky, kx);
+	const __m256i permuteMask = _mm256_set_epi32(0x3, kz, ky, kx, 0x3, kz, ky, kx);

-	const avxf AB_k = shuffle(AB, permute_mask);
-	const avxf BC_k = shuffle(BC, permute_mask);
+	const avxf AB_k = shuffle(AB, permuteMask);
+	const avxf BC_k = shuffle(BC, permuteMask);

 	/* Akz, Akz, Bkz, Bkz, Bkz, Bkz, Ckz, Ckz */
 	const avxf ABBC_kz = shuffle<2>(AB_k, BC_k);
@@ -155,14 +155,14 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
 	/* By, Bx, Cy, Cx, By, Bx, Ay, Ax */
 	const avxf BCBA_yx = permute<3,2,7,6,3,2,1,0>(ABBC_xy);

-	const avxf neg_mask(0,0,0,0,0x80000000, 0x80000000, 0x80000000, 0x80000000);
+	const avxf negMask(0,0,0,0,0x80000000, 0x80000000, 0x80000000, 0x80000000);

 	/* W           U                             V
 	 * (AxBy-AyBx) (BxCy-ByCx) XX XX (BxBy-ByBx) (CxAy-CyAx) XX XX
 	 */
-	const avxf WUxxxxVxx_neg = _mm256_hsub_ps(ABBC_xy * BCBA_yx, neg_mask /* Dont care */);
+	const avxf WUxxxxVxx_neg = _mm256_hsub_ps(ABBC_xy * BCBA_yx, negMask /* Dont care */);

-	const avxf WUVWnegWUVW = permute<0,1,5,0,0,1,5,0>(WUxxxxVxx_neg) ^ neg_mask;
+	const avxf WUVWnegWUVW = permute<0,1,5,0,0,1,5,0>(WUxxxxVxx_neg) ^ negMask;

 	/* Calculate scaled barycentric coordinates. */
 	float WUVW_array[4];
@@ -231,7 +231,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
 #ifdef __VISIBILITY_FLAG__
 	/* visibility flag test. we do it here under the assumption
 	 * that most triangles are culled by node flags */
-	if(kernel_tex_fetch(__prim_visibility, prim_addr) & visibility)
+	if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility)
 #endif
 	{
 #ifdef __KERNEL_CUDA__
@@ -241,7 +241,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
 #endif
 		/* Normalize U, V, W, and T. */
 		const float inv_det = 1.0f / det;
-		isect->prim = prim_addr;
+		isect->prim = triAddr;
 		isect->object = object;
 		isect->type = PRIMITIVE_TRIANGLE;
 		isect->u = U * inv_det;
@@ -264,7 +264,7 @@ ccl_device_inline void triangle_intersect_subsurface(
        SubsurfaceIntersection *ss_isect,
        float3 P,
        int object,
-        int prim_addr,
+        int triAddr,
        float tmax,
        uint *lcg_state,
        int max_hits)
@@ -277,7 +277,7 @@ ccl_device_inline void triangle_intersect_subsurface(
 	const float Sz = isect_precalc->Sz;

 	/* Calculate vertices relative to ray origin. */
-	const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
+	const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, triAddr);
 	const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0),
 	             tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1),
 	             tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2);
@@ -415,7 +415,7 @@ ccl_device_inline void triangle_intersect_subsurface(

 	/* record intersection */
 	Intersection *isect = &ss_isect->hits[hit];
-	isect->prim = prim_addr;
+	isect->prim = triAddr;
 	isect->object = object;
 	isect->type = PRIMITIVE_TRIANGLE;
 	isect->u = U * inv_det;
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -320,7 +320,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
 	                         P, Ng, Ng,
 	                         shader, object, prim,
 	                         u, v, 1.0f, 0.5f,
-	                         !(kernel_tex_fetch(__object_flag, object) & SD_OBJECT_TRANSFORM_APPLIED),
+	                         !(kernel_tex_fetch(__object_flag, object) & SD_TRANSFORM_APPLIED),
 	                         LAMP_NONE);
 	sd.I = sd.N;

--- a/intern/cycles/kernel/kernel_debug.h
+++ b/intern/cycles/kernel/kernel_debug.h
@@ -18,9 +18,8 @@ CCL_NAMESPACE_BEGIN

 ccl_device_inline void debug_data_init(DebugData *debug_data)
 {
-	debug_data->num_bvh_traversed_nodes = 0;
+	debug_data->num_bvh_traversal_steps = 0;
 	debug_data->num_bvh_traversed_instances = 0;
-	debug_data->num_bvh_intersections = 0;
 	debug_data->num_ray_bounces = 0;
 }

@@ -31,21 +30,16 @@ ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg,
                                                 int sample)
 {
 	int flag = kernel_data.film.pass_flag;
-	if(flag & PASS_BVH_TRAVERSED_NODES) {
-		kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_nodes,
+	if(flag & PASS_BVH_TRAVERSAL_STEPS) {
+		kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversal_steps,
 		                        sample,
-		                        debug_data->num_bvh_traversed_nodes);
+		                        debug_data->num_bvh_traversal_steps);
 	}
 	if(flag & PASS_BVH_TRAVERSED_INSTANCES) {
 		kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_instances,
 		                        sample,
 		                        debug_data->num_bvh_traversed_instances);
 	}
-	if(flag & PASS_BVH_INTERSECTIONS) {
-		kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_intersections,
-		                        sample,
-		                        debug_data->num_bvh_intersections);
-	}
 	if(flag & PASS_RAY_BOUNCES) {
 		kernel_write_pass_float(buffer + kernel_data.film.pass_ray_bounces,
 		                        sample,
--- a/intern/cycles/kernel/kernel_jitter.h
+++ b/intern/cycles/kernel/kernel_jitter.h
@@ -149,15 +149,6 @@ ccl_device_inline uint cmj_hash(uint i, uint p)
 	return i;
 }

-ccl_device_inline uint cmj_hash_simple(uint i, uint p)
-{
-	i = (i ^ 61) ^ p;
-	i += i << 3;
-	i ^= i >> 4;
-	i *= 0x27d4eb2d;
-	return i;
-}
-
 ccl_device_inline float cmj_randfloat(uint i, uint p)
 {
 	return cmj_hash(i, p) * (1.0f / 4294967808.0f);
--- a/intern/cycles/kernel/kernel_light.h
+++ b/intern/cycles/kernel/kernel_light.h
@@ -767,7 +767,7 @@ ccl_device void object_transform_light_sample(KernelGlobals *kg, LightSample *ls
 {
 #ifdef __INSTANCING__
 	/* instance transform */
-	if(!(kernel_tex_fetch(__object_flag, object) & SD_OBJECT_TRANSFORM_APPLIED)) {
+	if(!(kernel_tex_fetch(__object_flag, object) & SD_TRANSFORM_APPLIED)) {
 #  ifdef __OBJECT_MOTION__
 		Transform itfm;
 		Transform tfm = object_fetch_transform_motion_test(kg, object, time, &itfm);
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -109,10 +109,6 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
 		/* intersect scene */
 		Intersection isect;
 		uint visibility = path_state_ray_visibility(kg, state);
-		if(state->bounce > kernel_data.integrator.ao_bounces) {
-			visibility = PATH_RAY_SHADOW;
-			ray->t = kernel_data.background.ao_distance;
-		}
 		bool hit = scene_intersect(kg,
 		                           *ray,
 		                           visibility,
@@ -145,10 +141,6 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
 #endif  /* __LAMP_MIS__ */

 #ifdef __VOLUME__
-		/* Sanitize volume stack. */
-		if(!hit) {
-			kernel_volume_clean_stack(kg, state->volume_stack);
-		}
 		/* volume attenuation, emission, scatter */
 		if(state->volume_stack[0].shader != SHADER_NONE) {
 			Ray volume_ray = *ray;
@@ -296,9 +288,6 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,

 			break;
 		}
-		else if(state->bounce > kernel_data.integrator.ao_bounces) {
-			break;
-		}

 		/* setup shading */
 		shader_setup_from_ray(kg,
@@ -459,7 +448,7 @@ bool kernel_path_subsurface_scatter(
 #  ifdef __VOLUME__
 		ss_indirect->need_update_volume_stack =
 		        kernel_data.integrator.use_volumes &&
-		        ccl_fetch(sd, object_flag) & SD_OBJECT_INTERSECTS_VOLUME;
+		        ccl_fetch(sd, flag) & SD_OBJECT_INTERSECTS_VOLUME;
 #  endif  /* __VOLUME__ */

 		/* compute lighting with the BSDF closure */
@@ -634,11 +623,6 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
 			lcg_state = lcg_state_init(rng, &state, 0x51633e2d);
 		}

-		if(state.bounce > kernel_data.integrator.ao_bounces) {
-			visibility = PATH_RAY_SHADOW;
-			ray.t = kernel_data.background.ao_distance;
-		}
-
 		bool hit = scene_intersect(kg, ray, visibility, &isect, &lcg_state, difl, extmax);
 #else
 		bool hit = scene_intersect(kg, ray, visibility, &isect, NULL, 0.0f, 0.0f);
@@ -646,9 +630,8 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,

 #ifdef __KERNEL_DEBUG__
 		if(state.flag & PATH_RAY_CAMERA) {
-			debug_data.num_bvh_traversed_nodes += isect.num_traversed_nodes;
+			debug_data.num_bvh_traversal_steps += isect.num_traversal_steps;
 			debug_data.num_bvh_traversed_instances += isect.num_traversed_instances;
-			debug_data.num_bvh_intersections += isect.num_intersections;
 		}
 		debug_data.num_ray_bounces++;
 #endif  /* __KERNEL_DEBUG__ */
@@ -675,10 +658,6 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
 #endif  /* __LAMP_MIS__ */

 #ifdef __VOLUME__
-		/* Sanitize volume stack. */
-		if(!hit) {
-			kernel_volume_clean_stack(kg, state.volume_stack);
-		}
 		/* volume attenuation, emission, scatter */
 		if(state.volume_stack[0].shader != SHADER_NONE) {
 			Ray volume_ray = ray;
@@ -781,9 +760,6 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,

 			break;
 		}
-		else if(state.bounce > kernel_data.integrator.ao_bounces) {
-			break;
-		}

 		/* setup shading */
 		shader_setup_from_ray(kg, &sd, &isect, &ray);
@@ -792,25 +768,21 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,

 		/* holdout */
 #ifdef __HOLDOUT__
-		if(((sd.flag & SD_HOLDOUT) ||
-		    (sd.object_flag & SD_OBJECT_HOLDOUT_MASK)) &&
-		   (state.flag & PATH_RAY_CAMERA))
-		{
+		if((sd.flag & (SD_HOLDOUT|SD_HOLDOUT_MASK)) && (state.flag & PATH_RAY_CAMERA)) {
 			if(kernel_data.background.transparent) {
 				float3 holdout_weight;
-				if(sd.object_flag & SD_OBJECT_HOLDOUT_MASK) {
+				
+				if(sd.flag & SD_HOLDOUT_MASK)
 					holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
-				}
-				else {
+				else
 					holdout_weight = shader_holdout_eval(kg, &sd);
-				}
+
 				/* any throughput is ok, should all be identical here */
 				L_transparent += average(holdout_weight*throughput);
 			}

-			if(sd.object_flag & SD_OBJECT_HOLDOUT_MASK) {
+			if(sd.flag & SD_HOLDOUT_MASK)
 				break;
-			}
 		}
 #endif  /* __HOLDOUT__ */

--- a/intern/cycles/kernel/kernel_path_branched.h
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -167,9 +167,8 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
 			                                                  true);
 #ifdef __VOLUME__
 			Ray volume_ray = *ray;
-			bool need_update_volume_stack =
-			        kernel_data.integrator.use_volumes &&
-			        ccl_fetch(sd, object_flag) & SD_OBJECT_INTERSECTS_VOLUME;
+			bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
+			                                ccl_fetch(sd, flag) & SD_OBJECT_INTERSECTS_VOLUME;
 #endif  /* __VOLUME__ */

 			/* compute lighting with the BSDF closure */
@@ -289,17 +288,12 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
 #endif  /* __HAIR__ */

 #ifdef __KERNEL_DEBUG__
-		debug_data.num_bvh_traversed_nodes += isect.num_traversed_nodes;
+		debug_data.num_bvh_traversal_steps += isect.num_traversal_steps;
 		debug_data.num_bvh_traversed_instances += isect.num_traversed_instances;
-		debug_data.num_bvh_intersections += isect.num_intersections;
 		debug_data.num_ray_bounces++;
 #endif  /* __KERNEL_DEBUG__ */

 #ifdef __VOLUME__
-		/* Sanitize volume stack. */
-		if(!hit) {
-			kernel_volume_clean_stack(kg, state.volume_stack);
-		}
 		/* volume attenuation, emission, scatter */
 		if(state.volume_stack[0].shader != SHADER_NONE) {
 			Ray volume_ray = ray;
@@ -474,21 +468,21 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in

 		/* holdout */
 #ifdef __HOLDOUT__
-		if((sd.flag & SD_HOLDOUT) || (sd.object_flag & SD_OBJECT_HOLDOUT_MASK)) {
+		if(sd.flag & (SD_HOLDOUT|SD_HOLDOUT_MASK)) {
 			if(kernel_data.background.transparent) {
 				float3 holdout_weight;
-				if(sd.object_flag & SD_OBJECT_HOLDOUT_MASK) {
+				
+				if(sd.flag & SD_HOLDOUT_MASK)
 					holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
-				}
-				else {
+				else
 					holdout_weight = shader_holdout_eval(kg, &sd);
-				}
+
 				/* any throughput is ok, should all be identical here */
 				L_transparent += average(holdout_weight*throughput);
 			}
-			if(sd.object_flag & SD_OBJECT_HOLDOUT_MASK) {
+
+			if(sd.flag & SD_HOLDOUT_MASK)
 				break;
-			}
 		}
 #endif  /* __HOLDOUT__ */

--- a/intern/cycles/kernel/kernel_path_common.h
+++ b/intern/cycles/kernel/kernel_path_common.h
@@ -30,7 +30,7 @@ ccl_device_inline void kernel_path_trace_setup(KernelGlobals *kg,

 	int num_samples = kernel_data.integrator.aa_samples;

-	if(sample == kernel_data.integrator.start_sample) {
+	if(sample == 0) {
 		*rng_state = hash_int_2d(x, y);
 	}

--- a/intern/cycles/kernel/kernel_random.h
+++ b/intern/cycles/kernel/kernel_random.h
@@ -120,11 +120,13 @@ ccl_device_forceinline float path_rng_1D(KernelGlobals *kg, ccl_addr_space RNG *
 	/* Cranly-Patterson rotation using rng seed */
 	float shift;

-	/* Hash rng with dimension to solve correlation issues.
-	 * See T38710, T50116.
-	 */
-	RNG tmp_rng = cmj_hash_simple(dimension, *rng);
-	shift = tmp_rng * (1.0f/(float)0xFFFFFFFF);
+	/* using the same *rng value to offset seems to give correlation issues,
+	 * we could hash it with the dimension but this has a performance impact,
+	 * we need to find a solution for this */
+	if(dimension & 1)
+		shift = (*rng >> 16) * (1.0f/(float)0xFFFF);
+	else
+		shift = (*rng & 0xFFFF) * (1.0f/(float)0xFFFF);

 	return r + shift - floorf(r + shift);
 #endif
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -38,7 +38,7 @@ CCL_NAMESPACE_BEGIN
 #ifdef __OBJECT_MOTION__
 ccl_device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time)
 {
-	if(ccl_fetch(sd, object_flag) & SD_OBJECT_MOTION) {
+	if(ccl_fetch(sd, flag) & SD_OBJECT_MOTION) {
 		ccl_fetch(sd, ob_tfm) = object_fetch_transform_motion(kg, ccl_fetch(sd, object), time);
 		ccl_fetch(sd, ob_itfm) = transform_quick_inverse(ccl_fetch(sd, ob_tfm));
 	}
@@ -59,9 +59,7 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
 #endif

 	ccl_fetch(sd, type) = isect->type;
-	ccl_fetch(sd, flag) = 0;
-	ccl_fetch(sd, object_flag) = kernel_tex_fetch(__object_flag,
-	                                              ccl_fetch(sd, object));
+	ccl_fetch(sd, flag) = kernel_tex_fetch(__object_flag, ccl_fetch(sd, object));

 	/* matrices and time */
 #ifdef __OBJECT_MOTION__
@@ -162,11 +160,10 @@ void shader_setup_from_subsurface(
        const Intersection *isect,
        const Ray *ray)
 {
-	const bool backfacing = sd->flag & SD_BACKFACING;
+	bool backfacing = sd->flag & SD_BACKFACING;

 	/* object, matrices, time, ray_length stay the same */
-	sd->flag = 0;
-	sd->object_flag = kernel_tex_fetch(__object_flag, sd->object);
+	sd->flag = kernel_tex_fetch(__object_flag, sd->object);
 	sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
 	sd->type = isect->type;

@@ -274,10 +271,8 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
 	ccl_fetch(sd, ray_length) = t;

 	ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*SHADER_SIZE);
-	ccl_fetch(sd, object_flag) = 0;
 	if(ccl_fetch(sd, object) != OBJECT_NONE) {
-		ccl_fetch(sd, object_flag) |= kernel_tex_fetch(__object_flag,
-		                                               ccl_fetch(sd, object));
+		ccl_fetch(sd, flag) |= kernel_tex_fetch(__object_flag, ccl_fetch(sd, object));

 #ifdef __OBJECT_MOTION__
 		shader_setup_object_transforms(kg, sd, time);
@@ -303,7 +298,7 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
 			ccl_fetch(sd, N) = triangle_smooth_normal(kg, ccl_fetch(sd, prim), ccl_fetch(sd, u), ccl_fetch(sd, v));

 #ifdef __INSTANCING__
-			if(!(ccl_fetch(sd, object_flag) & SD_OBJECT_TRANSFORM_APPLIED)) {
+			if(!(ccl_fetch(sd, flag) & SD_TRANSFORM_APPLIED)) {
 				object_normal_transform_auto(kg, sd, &ccl_fetch(sd, N));
 			}
 #endif
@@ -314,7 +309,7 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
 		triangle_dPdudv(kg, ccl_fetch(sd, prim), &ccl_fetch(sd, dPdu), &ccl_fetch(sd, dPdv));

 #  ifdef __INSTANCING__
-		if(!(ccl_fetch(sd, object_flag) & SD_OBJECT_TRANSFORM_APPLIED)) {
+		if(!(ccl_fetch(sd, flag) & SD_TRANSFORM_APPLIED)) {
 			object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdu));
 			object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdv));
 		}
@@ -369,7 +364,7 @@ ccl_device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd,
 	                         P, Ng, I,
 	                         shader, object, prim,
 	                         u, v, 0.0f, 0.5f,
-	                         !(kernel_tex_fetch(__object_flag, object) & SD_OBJECT_TRANSFORM_APPLIED),
+	                         !(kernel_tex_fetch(__object_flag, object) & SD_TRANSFORM_APPLIED),
 	                         LAMP_NONE);
 }

@@ -384,7 +379,6 @@ ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderDat
 	ccl_fetch(sd, I) = -ray->D;
 	ccl_fetch(sd, shader) = kernel_data.background.surface_shader;
 	ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*SHADER_SIZE);
-	ccl_fetch(sd, object_flag) = 0;
 #ifdef __OBJECT_MOTION__
 	ccl_fetch(sd, time) = ray->time;
 #endif
@@ -426,7 +420,6 @@ ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *s
 	sd->I = -ray->D;
 	sd->shader = SHADER_NONE;
 	sd->flag = 0;
-	sd->object_flag = 0;
 #ifdef __OBJECT_MOTION__
 	sd->time = ray->time;
 #endif
@@ -1034,7 +1027,6 @@ ccl_device_inline void shader_eval_volume(KernelGlobals *kg,
 	sd->num_closure = 0;
 	sd->num_closure_extra = 0;
 	sd->flag = 0;
-	sd->object_flag = 0;

 	for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
 		/* setup shaderdata from stack. it's mostly setup already in
@@ -1042,12 +1034,11 @@ ccl_device_inline void shader_eval_volume(KernelGlobals *kg,
 		sd->object = stack[i].object;
 		sd->shader = stack[i].shader;

-		sd->flag &= ~SD_SHADER_FLAGS;
+		sd->flag &= ~(SD_SHADER_FLAGS|SD_OBJECT_FLAGS);
 		sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE);
-		sd->object_flag &= ~SD_OBJECT_FLAGS;

 		if(sd->object != OBJECT_NONE) {
-			sd->object_flag |= kernel_tex_fetch(__object_flag, sd->object);
+			sd->flag |= kernel_tex_fetch(__object_flag, sd->object);

 #ifdef __OBJECT_MOTION__
 			/* todo: this is inefficient for motion blur, we should be
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -192,9 +192,6 @@ CCL_NAMESPACE_BEGIN
 #ifdef __NO_PATCH_EVAL__
 #  undef __PATCH_EVAL__
 #endif
-#ifdef __NO_TRANSPARENT__
-#  undef __TRANSPARENT_SHADOWS__
-#endif

 /* Random Numbers */

@@ -345,10 +342,9 @@ typedef enum PassType {
 	PASS_SUBSURFACE_COLOR = (1 << 24),
 	PASS_LIGHT = (1 << 25), /* no real pass, used to force use_light_pass */
 #ifdef __KERNEL_DEBUG__
-	PASS_BVH_TRAVERSED_NODES = (1 << 26),
+	PASS_BVH_TRAVERSAL_STEPS = (1 << 26),
 	PASS_BVH_TRAVERSED_INSTANCES = (1 << 27),
-	PASS_BVH_INTERSECTIONS = (1 << 28),
-	PASS_RAY_BOUNCES = (1 << 29),
+	PASS_RAY_BOUNCES = (1 << 28),
 #endif
 } PassType;

@@ -543,38 +539,35 @@ typedef ccl_addr_space struct Intersection {
 	int type;

 #ifdef __KERNEL_DEBUG__
-	int num_traversed_nodes;
+	int num_traversal_steps;
 	int num_traversed_instances;
-	int num_intersections;
 #endif
 } Intersection;

 /* Primitives */

 typedef enum PrimitiveType {
-	PRIMITIVE_NONE            = 0,
-	PRIMITIVE_TRIANGLE        = (1 << 0),
-	PRIMITIVE_MOTION_TRIANGLE = (1 << 1),
-	PRIMITIVE_CURVE           = (1 << 2),
-	PRIMITIVE_MOTION_CURVE    = (1 << 3),
-	/* Lamp primitive is not included below on purpose,
-	 * since it is no real traceable primitive.
-	 */
-	PRIMITIVE_LAMP            = (1 << 4),
+	PRIMITIVE_NONE = 0,
+	PRIMITIVE_TRIANGLE = 1,
+	PRIMITIVE_MOTION_TRIANGLE = 2,
+	PRIMITIVE_CURVE = 4,
+	PRIMITIVE_MOTION_CURVE = 8,
+	/* Lamp primitive is not included below on purpose, since it is no real traceable primitive */
+	PRIMITIVE_LAMP = 16,

 	PRIMITIVE_ALL_TRIANGLE = (PRIMITIVE_TRIANGLE|PRIMITIVE_MOTION_TRIANGLE),
 	PRIMITIVE_ALL_CURVE = (PRIMITIVE_CURVE|PRIMITIVE_MOTION_CURVE),
 	PRIMITIVE_ALL_MOTION = (PRIMITIVE_MOTION_TRIANGLE|PRIMITIVE_MOTION_CURVE),
 	PRIMITIVE_ALL = (PRIMITIVE_ALL_TRIANGLE|PRIMITIVE_ALL_CURVE),

-	/* Total number of different traceable primitives.
+	/* Total number of different primitives.
 	 * NOTE: This is an actual value, not a bitflag.
 	 */
 	PRIMITIVE_NUM_TOTAL = 4,
 } PrimitiveType;

-#define PRIMITIVE_PACK_SEGMENT(type, segment) ((segment << PRIMITIVE_NUM_TOTAL) | (type))
-#define PRIMITIVE_UNPACK_SEGMENT(type) (type >> PRIMITIVE_NUM_TOTAL)
+#define PRIMITIVE_PACK_SEGMENT(type, segment) ((segment << 16) | type)
+#define PRIMITIVE_UNPACK_SEGMENT(type) (type >> 16)

 /* Attributes */

@@ -692,108 +685,56 @@ typedef enum ShaderContext {
 /* Shader Data
 *
 * Main shader state at a point on the surface or in a volume. All coordinates
- * are in world space.
- */
+ * are in world space. */

 enum ShaderDataFlag {
-	/* Runtime flags. */
-
-	/* Set when ray hits backside of surface. */
-	SD_BACKFACING      = (1 << 0),
-	/* Shader has emissive closure. */
-	SD_EMISSION        = (1 << 1),
-	/* Shader has BSDF closure. */
-	SD_BSDF            = (1 << 2),
-	/* Shader has non-singular BSDF closure. */
-	SD_BSDF_HAS_EVAL   = (1 << 3),
-	/* Shader has BSSRDF closure. */
-	SD_BSSRDF          = (1 << 4),
-	/* Shader has holdout closure. */
-	SD_HOLDOUT         = (1 << 5),
-	/* Shader has volume absorption closure. */
-	SD_ABSORPTION      = (1 << 6),
-	/* Shader has have volume phase (scatter) closure. */
-	SD_SCATTER         = (1 << 7),
-	/* Shader has AO closure. */
-	SD_AO              = (1 << 8),
-	/* Shader has transparent closure. */
-	SD_TRANSPARENT     = (1 << 9),
-	/* BSDF requires LCG for evaluation. */
+	/* runtime flags */
+	SD_BACKFACING      = (1 << 0),   /* backside of surface? */
+	SD_EMISSION        = (1 << 1),   /* have emissive closure? */
+	SD_BSDF            = (1 << 2),   /* have bsdf closure? */
+	SD_BSDF_HAS_EVAL   = (1 << 3),   /* have non-singular bsdf closure? */
+	SD_BSSRDF          = (1 << 4),   /* have bssrdf */
+	SD_HOLDOUT         = (1 << 5),   /* have holdout closure? */
+	SD_ABSORPTION      = (1 << 6),   /* have volume absorption closure? */
+	SD_SCATTER         = (1 << 7),   /* have volume phase closure? */
+	SD_AO              = (1 << 8),   /* have ao closure? */
+	SD_TRANSPARENT     = (1 << 9),  /* have transparent closure? */
 	SD_BSDF_NEEDS_LCG  = (1 << 10),

-	SD_CLOSURE_FLAGS = (SD_EMISSION |
-	                    SD_BSDF |
-	                    SD_BSDF_HAS_EVAL |
-	                    SD_BSSRDF |
-	                    SD_HOLDOUT |
-	                    SD_ABSORPTION |
-	                    SD_SCATTER |
-	                    SD_AO |
+	SD_CLOSURE_FLAGS = (SD_EMISSION|SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSSRDF|
+	                    SD_HOLDOUT|SD_ABSORPTION|SD_SCATTER|SD_AO|
 	                    SD_BSDF_NEEDS_LCG),

-	/* Shader flags. */
+	/* shader flags */
+	SD_USE_MIS                = (1 << 12),  /* direct light sample */
+	SD_HAS_TRANSPARENT_SHADOW = (1 << 13),  /* has transparent shadow */
+	SD_HAS_VOLUME             = (1 << 14),  /* has volume shader */
+	SD_HAS_ONLY_VOLUME        = (1 << 15),  /* has only volume shader, no surface */
+	SD_HETEROGENEOUS_VOLUME   = (1 << 16),  /* has heterogeneous volume */
+	SD_HAS_BSSRDF_BUMP        = (1 << 17),  /* bssrdf normal uses bump */
+	SD_VOLUME_EQUIANGULAR     = (1 << 18),  /* use equiangular sampling */
+	SD_VOLUME_MIS             = (1 << 19),  /* use multiple importance sampling */
+	SD_VOLUME_CUBIC           = (1 << 20),  /* use cubic interpolation for voxels */
+	SD_HAS_BUMP               = (1 << 21),  /* has data connected to the displacement input */
+	SD_HAS_DISPLACEMENT       = (1 << 22),  /* has true displacement */
+	SD_HAS_CONSTANT_EMISSION  = (1 << 23),  /* has constant emission (value stored in __shader_flag) */

-	/* direct light sample */
-	SD_USE_MIS                = (1 << 16),
-	/* Has transparent shadow. */
-	SD_HAS_TRANSPARENT_SHADOW = (1 << 17),
-	/* Has volume shader. */
-	SD_HAS_VOLUME             = (1 << 18),
-	/* Has only volume shader, no surface. */
-	SD_HAS_ONLY_VOLUME        = (1 << 19),
-	/* Has heterogeneous volume. */
-	SD_HETEROGENEOUS_VOLUME   = (1 << 20),
-	/* BSSRDF normal uses bump. */
-	SD_HAS_BSSRDF_BUMP        = (1 << 21),
-	/* Use equiangular volume sampling */
-	SD_VOLUME_EQUIANGULAR     = (1 << 22),
-	/* Use multiple importance volume sampling. */
-	SD_VOLUME_MIS             = (1 << 23),
-	/* Use cubic interpolation for voxels. */
-	SD_VOLUME_CUBIC           = (1 << 24),
-	/* Has data connected to the displacement input. */
-	SD_HAS_BUMP               = (1 << 25),
-	/* Has true displacement. */
-	SD_HAS_DISPLACEMENT       = (1 << 26),
-	/* Has constant emission (value stored in __shader_flag) */
-	SD_HAS_CONSTANT_EMISSION  = (1 << 27),
+	SD_SHADER_FLAGS = (SD_USE_MIS|SD_HAS_TRANSPARENT_SHADOW|SD_HAS_VOLUME|
+	                   SD_HAS_ONLY_VOLUME|SD_HETEROGENEOUS_VOLUME|
+	                   SD_HAS_BSSRDF_BUMP|SD_VOLUME_EQUIANGULAR|SD_VOLUME_MIS|
+	                   SD_VOLUME_CUBIC|SD_HAS_BUMP|SD_HAS_DISPLACEMENT|SD_HAS_CONSTANT_EMISSION),

-	SD_SHADER_FLAGS = (SD_USE_MIS |
-	                   SD_HAS_TRANSPARENT_SHADOW |
-	                   SD_HAS_VOLUME |
-	                   SD_HAS_ONLY_VOLUME |
-	                   SD_HETEROGENEOUS_VOLUME|
-	                   SD_HAS_BSSRDF_BUMP |
-	                   SD_VOLUME_EQUIANGULAR |
-	                   SD_VOLUME_MIS |
-	                   SD_VOLUME_CUBIC |
-	                   SD_HAS_BUMP |
-	                   SD_HAS_DISPLACEMENT |
-	                   SD_HAS_CONSTANT_EMISSION)
-};
+	/* object flags */
+	SD_HOLDOUT_MASK             = (1 << 24),  /* holdout for camera rays */
+	SD_OBJECT_MOTION            = (1 << 25),  /* has object motion blur */
+	SD_TRANSFORM_APPLIED        = (1 << 26),  /* vertices have transform applied */
+	SD_NEGATIVE_SCALE_APPLIED   = (1 << 27),  /* vertices have negative scale applied */
+	SD_OBJECT_HAS_VOLUME        = (1 << 28),  /* object has a volume shader */
+	SD_OBJECT_INTERSECTS_VOLUME = (1 << 29),  /* object intersects AABB of an object with volume shader */
+	SD_OBJECT_HAS_VERTEX_MOTION = (1 << 30),  /* has position for motion vertices */

-	/* Object flags. */
-enum ShaderDataObjectFlag {
-	/* Holdout for camera rays. */
-	SD_OBJECT_HOLDOUT_MASK           = (1 << 0),
-	/* Has object motion blur. */
-	SD_OBJECT_MOTION                 = (1 << 1),
-	/* Vertices have transform applied. */
-	SD_OBJECT_TRANSFORM_APPLIED      = (1 << 2),
-	/* Vertices have negative scale applied. */
-	SD_OBJECT_NEGATIVE_SCALE_APPLIED = (1 << 3),
-	/* Object has a volume shader. */
-	SD_OBJECT_HAS_VOLUME             = (1 << 4),
-	/* Object intersects AABB of an object with volume shader. */
-	SD_OBJECT_INTERSECTS_VOLUME      = (1 << 5),
-	/* Has position for motion vertices. */
-	SD_OBJECT_HAS_VERTEX_MOTION      = (1 << 6),
-
-	SD_OBJECT_FLAGS = (SD_OBJECT_HOLDOUT_MASK |
-	                   SD_OBJECT_MOTION |
-	                   SD_OBJECT_TRANSFORM_APPLIED |
-	                   SD_OBJECT_NEGATIVE_SCALE_APPLIED |
-	                   SD_OBJECT_HAS_VOLUME |
+	SD_OBJECT_FLAGS = (SD_HOLDOUT_MASK|SD_OBJECT_MOTION|SD_TRANSFORM_APPLIED|
+	                   SD_NEGATIVE_SCALE_APPLIED|SD_OBJECT_HAS_VOLUME|
 	                   SD_OBJECT_INTERSECTS_VOLUME)
 };

@@ -832,8 +773,6 @@ typedef ccl_addr_space struct ShaderData {
 	ccl_soa_member(int, shader);
 	/* booleans describing shader, see ShaderDataFlag */
 	ccl_soa_member(int, flag);
-	/* booleans describing object of the shader, see ShaderDataObjectFlag */
-	ccl_soa_member(int, object_flag);

 	/* primitive id if there is one, ~0 otherwise */
 	ccl_soa_member(int, prim);
@@ -1096,10 +1035,10 @@ typedef struct KernelFilm {
 	float mist_falloff;

 #ifdef __KERNEL_DEBUG__
-	int pass_bvh_traversed_nodes;
+	int pass_bvh_traversal_steps;
 	int pass_bvh_traversed_instances;
-	int pass_bvh_intersections;
 	int pass_ray_bounces;
+	int pass_pad3;
 #endif
 } KernelFilm;
 static_assert_align(KernelFilm, 16);
@@ -1143,8 +1082,6 @@ typedef struct KernelIntegrator {
 	int max_transmission_bounce;
 	int max_volume_bounce;

-	int ao_bounces;
-
 	/* transparent */
 	int transparent_min_bounce;
 	int transparent_max_bounce;
@@ -1188,8 +1125,7 @@ typedef struct KernelIntegrator {

 	float light_inv_rr_threshold;

-	int start_sample;
-	int pad1, pad2, pad3;
+	int pad1;
 } KernelIntegrator;
 static_assert_align(KernelIntegrator, 16);

@@ -1247,9 +1183,10 @@ static_assert_align(KernelData, 16);
 * really important here.
 */
 typedef ccl_addr_space struct DebugData {
-	int num_bvh_traversed_nodes;
+	// Total number of BVH node traversal steps and primitives intersections
+	// for the camera rays.
+	int num_bvh_traversal_steps;
 	int num_bvh_traversed_instances;
-	int num_bvh_intersections;
 	int num_ray_bounces;
 } DebugData;
 #endif
--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@@ -245,18 +245,11 @@ ccl_device float kernel_volume_equiangular_sample(Ray *ray, float3 light_P, floa
 	float t = ray->t;

 	float delta = dot((light_P - ray->P) , ray->D);
-	float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
-	if(UNLIKELY(D == 0.0f)) {
-		*pdf = 0.0f;
-		return 0.0f;
-	}
+	float D = sqrtf(len_squared(light_P - ray->P) - delta * delta);
 	float theta_a = -atan2f(delta, D);
 	float theta_b = atan2f(t - delta, D);
 	float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a);
-	if(UNLIKELY(theta_b == theta_a)) {
-		*pdf = 0.0f;
-		return 0.0f;
-	}
+
 	*pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));

 	return min(t, delta + t_); /* min is only for float precision errors */
@@ -265,19 +258,13 @@ ccl_device float kernel_volume_equiangular_sample(Ray *ray, float3 light_P, floa
 ccl_device float kernel_volume_equiangular_pdf(Ray *ray, float3 light_P, float sample_t)
 {
 	float delta = dot((light_P - ray->P) , ray->D);
-	float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
-	if(UNLIKELY(D == 0.0f)) {
-		return 0.0f;
-	}
+	float D = sqrtf(len_squared(light_P - ray->P) - delta * delta);

 	float t = ray->t;
 	float t_ = sample_t - delta;

 	float theta_a = -atan2f(delta, D);
 	float theta_b = atan2f(t - delta, D);
-	if(UNLIKELY(theta_b == theta_a)) {
-		return 0.0f;
-	}

 	float pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));

@@ -582,12 +569,17 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(
 ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals *kg,
 	PathState *state, ShaderData *sd, Ray *ray, PathRadiance *L, float3 *throughput, RNG *rng, bool heterogeneous)
 {
+	/* workaround to fix correlation bug in T38710, can find better solution
+	 * in random number generator later, for now this is done here to not impact
+	 * performance of rendering without volumes */
+	RNG tmp_rng = cmj_hash(*rng, state->rng_offset);
+
 	shader_setup_from_volume(kg, sd, ray);

 	if(heterogeneous)
-		return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput, rng);
+		return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput, &tmp_rng);
 	else
-		return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, rng, true);
+		return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, &tmp_rng, true);
 }

 /* Decoupled Volume Sampling
@@ -966,9 +958,6 @@ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
 			mis_weight = 2.0f*power_heuristic(pdf, distance_pdf);
 		}
 	}
-	if(sample_t < 1e-6f) {
-		return VOLUME_PATH_SCATTERED;
-	}

 	/* compute transmittance up to this step */
 	if(step != segment->steps)
@@ -1262,30 +1251,4 @@ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
 }
 #endif

-/* Clean stack after the last bounce.
- *
- * It is expected that all volumes are closed manifolds, so at the time when ray
- * hits nothing (for example, it is a last bounce which goes to environment) the
- * only expected volume in the stack is the world's one. All the rest volume
- * entries should have been exited already.
- *
- * This isn't always true because of ray intersection precision issues, which
- * could lead us to an infinite non-world volume in the stack, causing render
- * artifacts.
- *
- * Use this function after the last bounce to get rid of all volumes apart from
- * the world's one after the last bounce to avoid render artifacts.
- */
-ccl_device_inline void kernel_volume_clean_stack(KernelGlobals *kg,
-                                                 VolumeStack *volume_stack)
-{
-	if(kernel_data.background.volume_shader != SHADER_NONE) {
-		/* Keep the world's volume in stack. */
-		volume_stack[1].shader = SHADER_NONE;
-	}
-	else {
-		volume_stack[0].shader = SHADER_NONE;
-	}
-}
-
 CCL_NAMESPACE_END
--- a/intern/cycles/kernel/osl/osl_services.cpp
+++ b/intern/cycles/kernel/osl/osl_services.cpp
@@ -102,8 +102,6 @@ ustring OSLRenderServices::u_curve_tangent_normal("geom:curve_tangent_normal");
 #endif
 ustring OSLRenderServices::u_path_ray_length("path:ray_length");
 ustring OSLRenderServices::u_path_ray_depth("path:ray_depth");
-ustring OSLRenderServices::u_path_diffuse_depth("path:diffuse_depth");
-ustring OSLRenderServices::u_path_glossy_depth("path:glossy_depth");
 ustring OSLRenderServices::u_path_transparent_depth("path:transparent_depth");
 ustring OSLRenderServices::u_path_transmission_depth("path:transmission_depth");
 ustring OSLRenderServices::u_trace("trace");
@@ -712,7 +710,7 @@ bool OSLRenderServices::get_object_standard_attribute(KernelGlobals *kg, ShaderD
 		else
 			motion_triangle_vertices(kg, sd->object, sd->prim, sd->time, P);

-		if(!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+		if(!(sd->flag & SD_TRANSFORM_APPLIED)) {
 			object_position_transform(kg, sd, &P[0]);
 			object_position_transform(kg, sd, &P[1]);
 			object_position_transform(kg, sd, &P[2]);
@@ -761,24 +759,6 @@ bool OSLRenderServices::get_background_attribute(KernelGlobals *kg, ShaderData *
 		int f = state->bounce;
 		return set_attribute_int(f, type, derivatives, val);
 	}
-	else if(name == u_path_diffuse_depth) {
-		/* Diffuse Ray Depth */
-		PathState *state = sd->osl_path_state;
-		int f = state->diffuse_bounce;
-		return set_attribute_int(f, type, derivatives, val);
-	}
-	else if(name == u_path_glossy_depth) {
-		/* Glossy Ray Depth */
-		PathState *state = sd->osl_path_state;
-		int f = state->glossy_bounce;
-		return set_attribute_int(f, type, derivatives, val);
-	}
-	else if(name == u_path_transmission_depth) {
-		/* Transmission Ray Depth */
-		PathState *state = sd->osl_path_state;
-		int f = state->transmission_bounce;
-		return set_attribute_int(f, type, derivatives, val);
-	}
 	else if(name == u_path_transparent_depth) {
 		/* Transparent Ray Depth */
 		PathState *state = sd->osl_path_state;
--- a/intern/cycles/kernel/osl/osl_services.h
+++ b/intern/cycles/kernel/osl/osl_services.h
@@ -165,8 +165,6 @@ public:
 	static ustring u_curve_tangent_normal;
 	static ustring u_path_ray_length;
 	static ustring u_path_ray_depth;
-	static ustring u_path_diffuse_depth;
-	static ustring u_path_glossy_depth;
 	static ustring u_path_transparent_depth;
 	static ustring u_path_transmission_depth;
 	static ustring u_trace;
--- a/intern/cycles/kernel/shaders/node_light_path.osl
+++ b/intern/cycles/kernel/shaders/node_light_path.osl
@@ -27,8 +27,6 @@ shader node_light_path(
 	output float IsVolumeScatterRay = 0.0,
 	output float RayLength = 0.0,
 	output float RayDepth = 0.0,
-	output float DiffuseDepth = 0.0,
-	output float GlossyDepth = 0.0,
 	output float TransparentDepth = 0.0,
 	output float TransmissionDepth = 0.0)
 {
@@ -47,14 +45,6 @@ shader node_light_path(
 	getattribute("path:ray_depth", ray_depth);
 	RayDepth = (float)ray_depth;

-	int diffuse_depth;
-	getattribute("path:diffuse_depth", diffuse_depth);
-	DiffuseDepth = (float)diffuse_depth;
-
-	int glossy_depth;
-	getattribute("path:glossy_depth", glossy_depth);
-	GlossyDepth = (float)glossy_depth;
-
 	int transparent_depth;
 	getattribute("path:transparent_depth", transparent_depth);
 	TransparentDepth = (float)transparent_depth;
--- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
+++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
@@ -137,22 +137,22 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(

 		/* holdout */
 #ifdef __HOLDOUT__
-		if(((ccl_fetch(sd, flag) & SD_HOLDOUT) ||
-		    (ccl_fetch(sd, object_flag) & SD_OBJECT_HOLDOUT_MASK)) &&
+		if((ccl_fetch(sd, flag) & (SD_HOLDOUT|SD_HOLDOUT_MASK)) &&
 		   (state->flag & PATH_RAY_CAMERA))
 		{
 			if(kernel_data.background.transparent) {
 				float3 holdout_weight;
-				if(ccl_fetch(sd, object_flag) & SD_OBJECT_HOLDOUT_MASK) {
+
+				if(ccl_fetch(sd, flag) & SD_HOLDOUT_MASK)
 					holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
-				}
-				else {
+				else
 					holdout_weight = shader_holdout_eval(kg, sd);
-				}
+
 				/* any throughput is ok, should all be identical here */
 				L_transparent_coop[ray_index] += average(holdout_weight*throughput);
 			}
-			if(ccl_fetch(sd, object_flag) & SD_OBJECT_HOLDOUT_MASK) {
+
+			if(ccl_fetch(sd, flag) & SD_HOLDOUT_MASK) {
 				ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
 				*enqueue_flag = 1;
 			}
--- a/intern/cycles/kernel/split/kernel_scene_intersect.h
+++ b/intern/cycles/kernel/split/kernel_scene_intersect.h
@@ -116,9 +116,8 @@ ccl_device void kernel_scene_intersect(

 #ifdef __KERNEL_DEBUG__
 	if(state.flag & PATH_RAY_CAMERA) {
-		debug_data->num_bvh_traversed_nodes += isect->num_traversed_nodes;
+		debug_data->num_bvh_traversal_steps += isect->num_traversal_steps;
 		debug_data->num_bvh_traversed_instances += isect->num_traversed_instances;
-		debug_data->num_bvh_intersections += isect->num_intersections;
 	}
 	debug_data->num_ray_bounces++;
 #endif
--- a/intern/cycles/kernel/svm/svm_light_path.h
+++ b/intern/cycles/kernel/svm/svm_light_path.h
@@ -34,8 +34,6 @@ ccl_device void svm_node_light_path(ShaderData *sd, ccl_addr_space PathState *st
 		case NODE_LP_backfacing: info = (ccl_fetch(sd, flag) & SD_BACKFACING)? 1.0f: 0.0f; break;
 		case NODE_LP_ray_length: info = ccl_fetch(sd, ray_length); break;
 		case NODE_LP_ray_depth: info = (float)state->bounce; break;
-		case NODE_LP_ray_diffuse: info = (float)state->diffuse_bounce; break;
-		case NODE_LP_ray_glossy: info = (float)state->glossy_bounce; break;
 		case NODE_LP_ray_transparent: info = (float)state->transparent_bounce; break;
 		case NODE_LP_ray_transmission: info = (float)state->transmission_bounce; break;
 	}
--- a/intern/cycles/kernel/svm/svm_types.h
+++ b/intern/cycles/kernel/svm/svm_types.h
@@ -188,8 +188,6 @@ typedef enum NodeLightPath {
 	NODE_LP_backfacing,
 	NODE_LP_ray_length,
 	NODE_LP_ray_depth,
-	NODE_LP_ray_diffuse,
-	NODE_LP_ray_glossy,
 	NODE_LP_ray_transparent,
 	NODE_LP_ray_transmission,
 } NodeLightPath;
--- a/intern/cycles/kernel/svm/svm_wireframe.h
+++ b/intern/cycles/kernel/svm/svm_wireframe.h
@@ -57,7 +57,7 @@ ccl_device_inline float wireframe(KernelGlobals *kg,
 		else
 			motion_triangle_vertices(kg, ccl_fetch(sd, object), ccl_fetch(sd, prim), ccl_fetch(sd, time), Co);

-		if(!(ccl_fetch(sd, object_flag) & SD_OBJECT_TRANSFORM_APPLIED)) {
+		if(!(ccl_fetch(sd, flag) & SD_TRANSFORM_APPLIED)) {
 			object_position_transform(kg, sd, &Co[0]);
 			object_position_transform(kg, sd, &Co[1]);
 			object_position_transform(kg, sd, &Co[2]);
--- a/intern/cycles/render/bake.cpp
+++ b/intern/cycles/render/bake.cpp
@@ -135,16 +135,20 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre
 {
 	size_t num_pixels = bake_data->size();

-	int num_samples = is_aa_pass(shader_type)? scene->integrator->aa_samples : 1;
+	progress.reset_sample();
+	this->num_parts = 0;

-	/* calculate the total pixel samples for the progress bar */
-	total_pixel_samples = 0;
+	/* calculate the total parts for the progress bar */
 	for(size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) {
 		size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit);
-		total_pixel_samples += shader_size * num_samples;
+
+		DeviceTask task(DeviceTask::SHADER);
+		task.shader_w = shader_size;
+
+		this->num_parts += device->get_split_task_count(task);
 	}
-	progress.reset_sample();
-	progress.set_total_pixel_samples(total_pixel_samples);
+
+	this->num_samples = is_aa_pass(shader_type)? scene->integrator->aa_samples : 1;

 	for(size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) {
 		size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit);
@@ -183,9 +187,9 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre
 		task.shader_x = 0;
 		task.offset = shader_offset;
 		task.shader_w = d_output.size();
-		task.num_samples = num_samples;
+		task.num_samples = this->num_samples;
 		task.get_cancel = function_bind(&Progress::get_cancel, &progress);
-		task.update_progress_sample = function_bind(&Progress::add_samples_update, &progress, _1, _2);
+		task.update_progress_sample = function_bind(&Progress::increment_sample_update, &progress);

 		device->task_add(task);
 		device->task_wait();
--- a/intern/cycles/render/bake.h
+++ b/intern/cycles/render/bake.h
@@ -73,7 +73,8 @@ public:

 	bool need_update;

-	int total_pixel_samples;
+	int num_samples;
+	int num_parts;

 private:
 	BakeData *m_bake_data;
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Luna Rood	a2ed635a73	Fix typo and change `.enabled` to `.active`	2017-02-18 04:12:29 -02:00
Luna Rood	b3aead8fd7	Use parenthesis for bit shifts	2017-02-14 18:13:08 -02:00
Luna Rood	733b5b8c66	Remove unused `weight_components`	2017-02-14 18:13:08 -02:00
Luna Rood	5a17cb4c08	Use poll callback to disable bind operator instead of Python	2017-02-14 18:13:08 -02:00
Luna Rood	e5e44c01f2	icon	2017-02-06 01:23:18 -02:00
Luna Rood	a3e32e2ab5	Review: Multithread deform code	2017-01-25 04:05:53 -02:00
Luna Rood	e843f42e66	Review: Cleanup	2017-01-25 04:05:52 -02:00
Luna Rood	96f6ec07fb	Review: Add infinite weight flags enum	2017-01-25 04:05:52 -02:00
Luna Rood	c38e19ca67	Review: Move stuff to helper func and more cleanup	2017-01-25 04:05:52 -02:00
Luna Rood	96d66c7e4d	Review: Optimize numpoly calculation	2017-01-25 04:05:52 -02:00
Luna Rood	46821f072d	Review: Join allocations and some bpoly refactor	2017-01-25 04:05:52 -02:00
Luna Rood	f870343208	Review: Combine allocations and minor cleanup	2017-01-25 04:05:52 -02:00
Luna Rood	cf1a7e3944	Review: Report errors in UI and some more cleanup	2017-01-25 04:05:52 -02:00
Luna Rood	cf660b2a02	Review: Fix depsgraph relation	2017-01-25 04:05:52 -02:00
Luna Rood	6f3957770d	Review: Fix indentations and use MEM_SAFE_FREE	2017-01-25 04:05:52 -02:00
Luna Rood	7608f366c7	Review: Replace weight_components with individual variables	2017-01-25 04:05:52 -02:00
Luna Rood	8c220c57f9	Review: More cleanup...	2017-01-25 04:05:52 -02:00
Luna Rood	a300f80043	Review: Inline loop indices Also fixed endian switch sign, and UI Python thingy...	2017-01-25 04:05:52 -02:00
Luna Rood	22ce298d73	General cleanup (unsigned stuff and loop counter inlining)	2017-01-15 16:54:46 -02:00
Luna Rood	3469aa47c1	Remove warnings	2017-01-15 16:54:46 -02:00
Luna Rood	097a560bc9	Fix silly mistake in nearestVert	2017-01-15 16:54:45 -02:00
Luna Rood	1b7623fc06	Change angle function calls in sdef	2017-01-14 01:51:55 -02:00
Luna Rood	c546256563	Change angle function call in 3d to 2d mapping function	2017-01-14 01:51:55 -02:00
Luna Rood	5c263a9050	Split interp_weights_face_v3 into specific functions for tris and quads	2017-01-11 15:52:52 -02:00
Luna Rood	8745cd825a	Remove custom weight interp func in favor of Blender's built-in implementation	2017-01-11 15:52:32 -02:00
Luna Rood	28622ae81e	Fix VS 2015 issue (change isnanf to isnan)	2017-01-11 13:05:53 -02:00
Luna Rood	d6c7163c06	Fix 2d mapping function's name	2017-01-11 03:59:30 -02:00
Luna Rood	0bb57759ec	Replace "cent" functions from math_geom with "mid" ones from math_vector	2017-01-10 20:29:22 -02:00
Luna Rood	5e1d438d5e	Constify some stuff (for clarity and correctness)	2017-01-10 16:42:34 -02:00
Luna Rood	0721bc0ac4	Silly const mistake (missed in refactor...)	2017-01-03 20:02:49 -02:00
Luna Rood	7ca0894a17	Implement target poly influence interpolation	2017-01-03 19:27:08 -02:00
Luna Rood	751496437b	Add 3d to 2d plane mapping functions to math lib	2017-01-03 19:26:03 -02:00
Luna Rood	3014601f3b	Fix out of bounds memory access in interp_weights_face_v3 interp_weights_face_v3 required a length four array for weights even when calculating weights for a tri, otherwise, it would access unkown memory. This fix allows a weight array of size three to be passed when only calculating tri weights.	2017-01-03 19:22:08 -02:00
Luna Rood	b80971ce10	Initial Surface Deform Modifier implementation	2016-11-29 23:04:40 -02:00
Luna Rood	68f5ce194b	Add cent_poly_v3 function	2016-11-27 00:44:48 -02:00
Luna Rood	1e9003aea5	Add is_poly_convex_v3 function	2016-11-25 14:56:09 -02:00
Luna Rood	95701b0b04	Fix (unreported) looptri array not being recalculated in ccgDM and emDM In ccgDM and emDM, looptri array recalculation was being handled directly by `DM_getLoopTriArray` (`getLoopTriArray` callback), while `DM_recalcLoopTri` (`recalcLoopTri` callback) was doing nothing. This results in the array not being recalculated when other functions that depend on the array data are called. These functions, such as `getNumLoopTris`, call `recalcLoopTri` to ensure the data is up to date, but in the case of CCGDerivedMesh that was doing nothing. This moves all the recalculation code to `ccgDM_recalcLoopTri` and makes `ccgDM_getLoopTriArray` call that. Reviewed By: mont29 Differential Revision: https://developer.blender.org/D2375	2016-11-25 14:49:58 -02:00