Cycles Denoising: Fix reading of the new properties

Merge remote-tracking branch 'origin/master' into temp-cycles-denoising
Cycles Denoising: Add option to store feature passes in the render result
2017-05-04 14:57:20 +02:00 · 2017-05-04 14:20:27 +02:00 · 2017-05-04 14:19:50 +02:00 · 2017-05-04 14:19:50 +02:00 · 2017-05-04 12:27:28 +02:00 · 2017-05-04 12:18:39 +02:00
1116 changed files with 48594 additions and 23862 deletions
--- a/.gitmodules
+++ b/.gitmodules
@@ -2,15 +2,19 @@
 	path = release/scripts/addons
 	url = ../blender-addons.git
 	ignore = all
+	branch = master
 [submodule "release/scripts/addons_contrib"]
 	path = release/scripts/addons_contrib
 	url = ../blender-addons-contrib.git
 	ignore = all
+	branch = master
 [submodule "release/datafiles/locale"]
 	path = release/datafiles/locale
 	url = ../blender-translations.git
 	ignore = all
+	branch = master
 [submodule "source/tools"]
 	path = source/tools
 	url = ../blender-dev-tools.git
 	ignore = all
+	branch = master
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -192,7 +192,7 @@ endif()
 #-----------------------------------------------------------------------------
 # Options

-# First platform spesific non-cached vars
+# First platform specific non-cached vars
 if(UNIX AND NOT APPLE)
 	set(WITH_X11 ON)
 endif()
@@ -363,6 +363,12 @@ option(WITH_LIBMV         "Enable Libmv structure from motion library" ON)
 option(WITH_LIBMV_SCHUR_SPECIALIZATIONS "Enable fixed-size schur specializations." OFF)
 mark_as_advanced(WITH_LIBMV_SCHUR_SPECIALIZATIONS)

+# Logging/unbit test libraries.
+option(WITH_SYSTEM_GFLAGS   "Use system-wide Gflags instead of a bundled one" OFF)
+option(WITH_SYSTEM_GLOG     "Use system-wide Glog instead of a bundled one" OFF)
+mark_as_advanced(WITH_SYSTEM_GFLAGS)
+mark_as_advanced(WITH_SYSTEM_GLOG)
+
 # Freestyle
 option(WITH_FREESTYLE     "Enable Freestyle (advanced edges rendering)" ON)

@@ -445,6 +451,7 @@ option(WITH_BOOST					"Enable features depending on boost" ON)

 # Unit testsing
 option(WITH_GTESTS "Enable GTest unit testing" OFF)
+option(WITH_OPENGL_TESTS "Enable OpenGL related unit testing (Experimental)" OFF)


 # Documentation
@@ -518,18 +525,20 @@ endif()
 option(WITH_LEGACY_DEPSGRAPH "Build Blender with legacy dependency graph" ON)
 mark_as_advanced(WITH_LEGACY_DEPSGRAPH)

-# Use hardcoded paths or find_package to find externals
-option(WITH_WINDOWS_FIND_MODULES "Use find_package to locate libraries" OFF)
-mark_as_advanced(WITH_WINDOWS_FIND_MODULES)
+if(WIN32)
+	# Use hardcoded paths or find_package to find externals
+	option(WITH_WINDOWS_FIND_MODULES "Use find_package to locate libraries" OFF)
+	mark_as_advanced(WITH_WINDOWS_FIND_MODULES)

-option(WITH_WINDOWS_CODESIGN "Use signtool to sign the final binary." OFF)
-mark_as_advanced(WITH_WINDOWS_CODESIGN)
+	option(WITH_WINDOWS_CODESIGN "Use signtool to sign the final binary." OFF)
+	mark_as_advanced(WITH_WINDOWS_CODESIGN)

-set(WINDOWS_CODESIGN_PFX CACHE FILEPATH  "Path to pfx file to use for codesigning.")
-mark_as_advanced(WINDOWS_CODESIGN_PFX)
+	set(WINDOWS_CODESIGN_PFX CACHE FILEPATH  "Path to pfx file to use for codesigning.")
+	mark_as_advanced(WINDOWS_CODESIGN_PFX)

-set(WINDOWS_CODESIGN_PFX_PASSWORD CACHE STRING  "password for pfx file used for codesigning.")
-mark_as_advanced(WINDOWS_CODESIGN_PFX_PASSWORD)
+	set(WINDOWS_CODESIGN_PFX_PASSWORD CACHE STRING  "password for pfx file used for codesigning.")
+	mark_as_advanced(WINDOWS_CODESIGN_PFX_PASSWORD)
+endif()

 # avoid using again
 option_defaults_clear()
@@ -557,113 +566,7 @@ endif()
 # Apple

 if(APPLE)
-	# require newer cmake on osx because of version handling,
-	# older cmake cannot handle 2 digit subversion!
-	cmake_minimum_required(VERSION 3.0.0)
-
-	if(NOT CMAKE_OSX_ARCHITECTURES)
-		set(CMAKE_OSX_ARCHITECTURES x86_64 CACHE STRING
-			"Choose the architecture you want to build Blender for: i386, x86_64 or ppc"
-			FORCE)
-	endif()
-
-	if(NOT DEFINED OSX_SYSTEM)
-		execute_process(
-		        COMMAND xcodebuild -version -sdk macosx SDKVersion
-		        OUTPUT_VARIABLE OSX_SYSTEM
-		        OUTPUT_STRIP_TRAILING_WHITESPACE)
-	endif()
-
-	# workaround for incorrect cmake xcode lookup for developer previews - XCODE_VERSION does not
-	# take xcode-select path into account but would always look  into /Applications/Xcode.app
-	# while dev versions are named Xcode<version>-DP<preview_number>
-	execute_process(
-	        COMMAND xcode-select --print-path
-	        OUTPUT_VARIABLE XCODE_CHECK OUTPUT_STRIP_TRAILING_WHITESPACE)
-	string(REPLACE "/Contents/Developer" "" XCODE_BUNDLE ${XCODE_CHECK}) # truncate to bundlepath in any case
-	
-	if(${CMAKE_GENERATOR} MATCHES "Xcode")
-	
-		# earlier xcode has no bundled developer dir, no sense in getting xcode path from
-		if(${XCODE_VERSION} VERSION_GREATER 4.2) 
-			# reduce to XCode name without dp extension
-			string(SUBSTRING "${XCODE_CHECK}" 14 6 DP_NAME) 
-			if(${DP_NAME} MATCHES Xcode5)
-				set(XCODE_VERSION 5)
-			endif()
-		endif()
-
-		##### cmake incompatibility with xcode  4.3 and higher #####
-		if(${XCODE_VERSION} MATCHES '') # cmake fails due looking for xcode in the wrong path, thus will be empty var
-			message(FATAL_ERROR "Xcode 4.3 and higher must be used with cmake 2.8-8 or higher")
-		endif()
-		### end cmake incompatibility with xcode 4.3 and higher ###
-		
-		if(${XCODE_VERSION} VERSION_EQUAL 4 OR ${XCODE_VERSION} VERSION_GREATER 4 AND ${XCODE_VERSION} VERSION_LESS 4.3)
-			# Xcode 4 defaults to the Apple LLVM Compiler.
-			# Override the default compiler selection because Blender only compiles with gcc up to xcode 4.2
-			set(CMAKE_XCODE_ATTRIBUTE_GCC_VERSION "com.apple.compilers.llvmgcc42")
-			message(STATUS "Setting compiler to: " ${CMAKE_XCODE_ATTRIBUTE_GCC_VERSION})
-		endif()
-	else() # unix makefile generator does not fill XCODE_VERSION var, so we get it with a command
-		execute_process(COMMAND xcodebuild -version OUTPUT_VARIABLE XCODE_VERS_BUILD_NR)
-		string(SUBSTRING "${XCODE_VERS_BUILD_NR}" 6 3 XCODE_VERSION) # truncate away build-nr
-		unset(XCODE_VERS_BUILD_NR)
-	endif()
-
-	message(STATUS "Detected OS X ${OSX_SYSTEM} and Xcode ${XCODE_VERSION} at ${XCODE_BUNDLE}")
-
-	if(${XCODE_VERSION} VERSION_LESS 4.3)
-		# use guaranteed existing sdk
-		set(CMAKE_OSX_SYSROOT /Developer/SDKs/MacOSX${OSX_SYSTEM}.sdk CACHE PATH "" FORCE)
-	else()
-		# note: xcode-select path could be ambigous,
-		# cause /Applications/Xcode.app/Contents/Developer or /Applications/Xcode.app would be allowed
-		# so i use a selfcomposed bundlepath here  
-		set(OSX_SYSROOT_PREFIX ${XCODE_BUNDLE}/Contents/Developer/Platforms/MacOSX.platform)
-		message(STATUS "OSX_SYSROOT_PREFIX: " ${OSX_SYSROOT_PREFIX})
-		set(OSX_DEVELOPER_PREFIX /Developer/SDKs/MacOSX${OSX_SYSTEM}.sdk) # use guaranteed existing sdk
-		set(CMAKE_OSX_SYSROOT ${OSX_SYSROOT_PREFIX}/${OSX_DEVELOPER_PREFIX} CACHE PATH "" FORCE)
-		if(${CMAKE_GENERATOR} MATCHES "Xcode")
-			# to silence sdk not found warning, just overrides CMAKE_OSX_SYSROOT
-			set(CMAKE_XCODE_ATTRIBUTE_SDKROOT macosx${OSX_SYSTEM})
-		endif()
-
-		# QuickTime framework is no longer available in SDK 10.12+
-		if(WITH_CODEC_QUICKTIME AND ${OSX_SYSTEM} VERSION_GREATER 10.11)
-			set(WITH_CODEC_QUICKTIME OFF)
-			message(STATUS "QuickTime not supported by SDK ${OSX_SYSTEM}, disabling WITH_CODEC_QUICKTIME")
-		endif()
-	endif()
-
-	if(OSX_SYSTEM MATCHES 10.9)
-		# make sure syslibs and headers are looked up in sdk ( expecially for 10.9 openGL atm. )
-		set(CMAKE_FIND_ROOT_PATH ${CMAKE_OSX_SYSROOT})
-	endif()
-
-	if(WITH_CXX11)
-		# 10.9 is our min. target, if you use higher sdk, weak linking happens
-		if(CMAKE_OSX_DEPLOYMENT_TARGET)
-			if(${CMAKE_OSX_DEPLOYMENT_TARGET} VERSION_LESS 10.9)
-				message(STATUS "Setting deployment target to 10.9, lower versions are incompatible with WITH_CXX11")
-				set(CMAKE_OSX_DEPLOYMENT_TARGET "10.9" CACHE STRING "" FORCE)
-			endif()
-		else()
-			set(CMAKE_OSX_DEPLOYMENT_TARGET "10.9" CACHE STRING "" FORCE)
-		endif()
-	else()
-		if(NOT CMAKE_OSX_DEPLOYMENT_TARGET)
-			# 10.6 is our min. target, if you use higher sdk, weak linking happens
-			set(CMAKE_OSX_DEPLOYMENT_TARGET "10.6" CACHE STRING "" FORCE)
-		endif()
-	endif()
-	
-	if(NOT ${CMAKE_GENERATOR} MATCHES "Xcode")
-		# force CMAKE_OSX_DEPLOYMENT_TARGET for makefiles, will not work else ( cmake bug ? )
-		set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mmacosx-version-min=${CMAKE_OSX_DEPLOYMENT_TARGET}")
-		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mmacosx-version-min=${CMAKE_OSX_DEPLOYMENT_TARGET}")
-		add_definitions("-DMACOSX_DEPLOYMENT_TARGET=${CMAKE_OSX_DEPLOYMENT_TARGET}")
-	endif()
+	include(platform_apple_xcode)
 endif()


@@ -723,7 +626,7 @@ if(NOT WITH_BOOST)
 	macro(set_and_warn
 		_setting _val)
 		if(${${_setting}})
-			message(STATUS "'WITH_BOOST' is disabled: forceing 'set(${_setting} ${_val})'")
+			message(STATUS "'WITH_BOOST' is disabled: forcing 'set(${_setting} ${_val})'")
 		endif()
 		set(${_setting} ${_val})
 	endmacro()
@@ -833,6 +736,10 @@ if(WITH_AUDASPACE)
 	endif()
 endif()

+if(APPLE)
+	apple_check_quicktime()
+endif()
+
 #-----------------------------------------------------------------------------
 # Check for valid directories
 # ... a partial checkout may cause this.
@@ -924,7 +831,7 @@ if(WITH_X11)
 	if(WITH_X11_ALPHA)
 		find_library(X11_Xrender_LIB Xrender  ${X11_LIB_SEARCH_PATH})
 		mark_as_advanced(X11_Xrender_LIB)
-		if (X11_Xrender_LIB)
+		if(X11_Xrender_LIB)
 			list(APPEND PLATFORM_LINKLIBS ${X11_Xrender_LIB})
 		else()
 			set(WITH_X11_ALPHA OFF)
@@ -1010,16 +917,16 @@ endif()
 if(MSVC)
 	# for some reason this fails on msvc
 	add_definitions(-D__LITTLE_ENDIAN__)
-	
-# OSX-Note: as we do crosscompiling with specific set architecture,
-# endianess-detection and autosetting is counterproductive
+
+# OSX-Note: as we do cross-compiling with specific set architecture,
+# endianess-detection and auto-setting is counterproductive
 # so we just set endianess according CMAKE_OSX_ARCHITECTURES

 elseif(CMAKE_OSX_ARCHITECTURES MATCHES i386 OR CMAKE_OSX_ARCHITECTURES MATCHES x86_64)
 	add_definitions(-D__LITTLE_ENDIAN__)
-elseif(CMAKE_OSX_ARCHITECTURES MATCHES ppc OR CMAKE_OSX_ARCHITECTURES MATCHES ppc64)		
+elseif(CMAKE_OSX_ARCHITECTURES MATCHES ppc OR CMAKE_OSX_ARCHITECTURES MATCHES ppc64)
 	add_definitions(-D__BIG_ENDIAN__)
-	
+
 else()
 	include(TestBigEndian)
 	test_big_endian(_SYSTEM_BIG_ENDIAN)
@@ -1314,15 +1221,42 @@ endif()
 # Configure GLog/GFlags

 if(WITH_LIBMV OR WITH_GTESTS OR (WITH_CYCLES AND WITH_CYCLES_LOGGING))
-	set(GLOG_DEFINES
-		-DGOOGLE_GLOG_DLL_DECL=
-	)
+	if(WITH_SYSTEM_GFLAGS)
+		find_package(Gflags)
+		if(NOT GFLAGS_FOUND)
+			message(FATAL_ERROR "System wide Gflags is requested but was not found")
+		endif()
+		# FindGflags does not define this, and we are not even sure what to use here.
+		set(GFLAGS_DEFINES)
+	else()
+		set(GFLAGS_DEFINES
+			-DGFLAGS_DLL_DEFINE_FLAG=
+			-DGFLAGS_DLL_DECLARE_FLAG=
+			-DGFLAGS_DLL_DECL=
+		)
+		set(GFLAGS_NAMESPACE "gflags")
+		set(GFLAGS_LIBRARIES extern_gflags)
+		set(GFLAGS_INCLUDE_DIRS "${PROJECT_SOURCE_DIR}/extern/gflags/src")
+	endif()

-	set(GFLAGS_DEFINES
-		-DGFLAGS_DLL_DEFINE_FLAG=
-		-DGFLAGS_DLL_DECLARE_FLAG=
-		-DGFLAGS_DLL_DECL=
-	)
+	if(WITH_SYSTEM_GLOG)
+		find_package(Glog)
+		if(NOT GLOG_FOUND)
+			message(FATAL_ERROR "System wide Glog is requested but was not found")
+		endif()
+		# FindGlog does not define this, and we are not even sure what to use here.
+		set(GLOG_DEFINES)
+	else()
+		set(GLOG_DEFINES
+			-DGOOGLE_GLOG_DLL_DECL=
+		)
+		set(GLOG_LIBRARIES extern_glog)
+		if(WIN32)
+			set(GLOG_INCLUDE_DIRS ${CMAKE_SOURCE_DIR}/extern/glog/src/windows)
+		else()
+			set(GLOG_INCLUDE_DIRS ${CMAKE_SOURCE_DIR}/extern/glog/src)
+		endif()
+	endif()
 endif()

 #-----------------------------------------------------------------------------
@@ -1604,7 +1538,7 @@ endif()
 set(CMAKE_C_FLAGS "${C_WARNINGS} ${CMAKE_C_FLAGS} ${PLATFORM_CFLAGS}")
 set(CMAKE_CXX_FLAGS "${CXX_WARNINGS} ${CMAKE_CXX_FLAGS} ${PLATFORM_CFLAGS}")

-# defined above, platform spesific but shared names
+# defined above, platform specific but shared names
 mark_as_advanced(
 	CYCLES_OSL
 	OSL_LIB_EXEC
--- a/6
+++ b/6
@@ -1,4 +1,4 @@
-# -*- mode: gnumakefile; tab-width: 8; indent-tabs-mode: t; -*-
+# -*- mode: gnumakefile; tab-width: 4; indent-tabs-mode: t; -*-
 # vim: tabstop=4
 #
 # ##### BEGIN GPL LICENSE BLOCK #####
@@ -113,7 +113,7 @@ CMAKE_CONFIG = cmake $(BUILD_CMAKE_ARGS) \
 # X11 spesific
 ifdef DISPLAY
 	CMAKE_CONFIG_TOOL = cmake-gui
-else 
+else
 	CMAKE_CONFIG_TOOL = ccmake
 endif

@@ -127,7 +127,7 @@ all: .FORCE
 #	# if test ! -f $(BUILD_DIR)/CMakeCache.txt ; then \
 #	# 	$(CMAKE_CONFIG); \
 #	# fi
-	
+
 #	# do this always incase of failed initial build, could be smarter here...
 	@$(CMAKE_CONFIG)

--- a/build_files/build_environment/install_deps.sh
+++ b/build_files/build_environment/install_deps.sh
@@ -322,8 +322,8 @@ OPENEXR_FORCE_REBUILD=false
 OPENEXR_SKIP=false
 _with_built_openexr=false

-OIIO_VERSION="1.7.8"
-OIIO_VERSION_MIN="1.6.0"
+OIIO_VERSION="1.7.13"
+OIIO_VERSION_MIN="1.7.13"
 OIIO_VERSION_MAX="1.9.0"  # UNKNOWN currently # Not supported by current OSL...
 OIIO_FORCE_BUILD=false
 OIIO_FORCE_REBUILD=false
@@ -360,7 +360,7 @@ OPENVDB_FORCE_REBUILD=false
 OPENVDB_SKIP=false

 # Alembic needs to be compiled for now
-ALEMBIC_VERSION="1.6.0"
+ALEMBIC_VERSION="1.7.1"
 ALEMBIC_VERSION_MIN=$ALEMBIC_VERSION
 ALEMBIC_FORCE_BUILD=false
 ALEMBIC_FORCE_REBUILD=false
@@ -795,7 +795,7 @@ CXXFLAGS_BACK=$CXXFLAGS
 if [ "$USE_CXX11" = true ]; then
  WARNING "You are trying to use c++11, this *should* go smoothely with any very recent distribution
 However, if you are experiencing linking errors (also when building Blender itself), please try the following:
-    * Re-run this script with `--build-all --force-all` options.
+    * Re-run this script with '--build-all --force-all' options.
    * Ensure your gcc version is at the very least 4.8, if possible you should really rather use gcc-5.1 or above.

 Please note that until the transition to C++11-built libraries if completed in your distribution, situation will
@@ -2236,9 +2236,6 @@ compile_ALEMBIC() {
    return
  fi

-  compile_HDF5
-  PRINT ""
-
  # To be changed each time we make edits that would modify the compiled result!
  alembic_magic=2
  _init_alembic
@@ -2266,8 +2263,16 @@ compile_ALEMBIC() {

    cmake_d="-D CMAKE_INSTALL_PREFIX=$_inst"

+    # Without Boost or TR1, Alembic requires C++11.
+    if [ "$USE_CXX11" != true ]; then
+      cmake_d="$cmake_d -D ALEMBIC_LIB_USES_BOOST=ON"
+      cmake_d="$cmake_d -D ALEMBIC_LIB_USES_TR1=OFF"
+    fi
+
    if [ -d $INST/boost ]; then
-      cmake_d="$cmake_d -D BOOST_ROOT=$INST/boost"
+      if [ -d $INST/boost ]; then
+        cmake_d="$cmake_d -D BOOST_ROOT=$INST/boost"
+      fi
      cmake_d="$cmake_d -D USE_STATIC_BOOST=ON"
    else
      cmake_d="$cmake_d -D USE_STATIC_BOOST=OFF"
@@ -2285,8 +2290,6 @@ compile_ALEMBIC() {
      cmake_d="$cmake_d -D USE_STATIC_HDF5=OFF"
      cmake_d="$cmake_d -D ALEMBIC_ILMBASE_LINK_STATIC=OFF"
      cmake_d="$cmake_d -D ALEMBIC_SHARED_LIBS=OFF"
-      cmake_d="$cmake_d -D ALEMBIC_LIB_USES_BOOST=ON"
-      cmake_d="$cmake_d -D ALEMBIC_LIB_USES_TR1=OFF"
      INFO "ILMBASE_ROOT=$INST/openexr"
    fi

@@ -2598,7 +2601,6 @@ install_DEB() {
  fi

  # These libs should always be available in debian/ubuntu official repository...
-  OPENJPEG_DEV="libopenjpeg-dev"
  VORBIS_DEV="libvorbis-dev"
  OGG_DEV="libogg-dev"
  THEORA_DEV="libtheora-dev"
@@ -2606,16 +2608,24 @@ install_DEB() {
  _packages="gawk cmake cmake-curses-gui build-essential libjpeg-dev libpng-dev libtiff-dev \
             git libfreetype6-dev libx11-dev flex bison libtbb-dev libxxf86vm-dev \
             libxcursor-dev libxi-dev wget libsqlite3-dev libxrandr-dev libxinerama-dev \
-             libbz2-dev libncurses5-dev libssl-dev liblzma-dev libreadline-dev $OPENJPEG_DEV \
+             libbz2-dev libncurses5-dev libssl-dev liblzma-dev libreadline-dev \
             libopenal-dev libglew-dev yasm $THEORA_DEV $VORBIS_DEV $OGG_DEV \
             libsdl1.2-dev libfftw3-dev patch bzip2 libxml2-dev libtinyxml-dev libjemalloc-dev"
             # libglewmx-dev  (broken in deb testing currently...)

-  OPENJPEG_USE=true
  VORBIS_USE=true
  OGG_USE=true
  THEORA_USE=true

+  PRINT ""
+  # New Ubuntu crap (17.04 and more) have no openjpeg lib!
+  OPENJPEG_DEV="libopenjpeg-dev"
+  check_package_DEB $OPENJPEG_DEV
+  if [ $? -eq 0 ]; then
+    _packages="$_packages $OPENJPEG_DEV"
+    OPENJPEG_USE=true
+  fi
+
  PRINT ""
  # Some not-so-old distro (ubuntu 12.4) do not have it, do not fail in this case, just warn.
  YAMLCPP_DEV="libyaml-cpp-dev"
@@ -2777,7 +2787,7 @@ install_DEB() {

      boost_version=$(echo `get_package_version_DEB libboost-dev` | sed -r 's/^([0-9]+\.[0-9]+).*/\1/')

-      install_packages_DEB libboost-{filesystem,iostreams,locale,regex,system,thread,wave}$boost_version-dev
+      install_packages_DEB libboost-{filesystem,iostreams,locale,regex,system,thread,wave,program-options}$boost_version-dev
      clean_Boost
    else
      compile_Boost
@@ -4252,7 +4262,7 @@ print_info() {
    PRINT "  $_3"
    _buildargs="$_buildargs $_1 $_2 $_3"
    if [ -d $INST/osl ]; then
-      _1="-D CYCLES_OSL=$INST/osl"
+      _1="-D OSL_ROOT_DIR=$INST/osl"
      PRINT "  $_1"
      _buildargs="$_buildargs $_1"
    fi
--- a/build_files/buildbot/config/blender_linux.cmake
+++ b/build_files/buildbot/config/blender_linux.cmake
@@ -94,6 +94,10 @@ set(OPENCOLORIO_OPENCOLORIO_LIBRARY "${OPENCOLORIO_ROOT_DIR}/lib/libOpenColorIO.
 set(OPENCOLORIO_TINYXML_LIBRARY "${OPENCOLORIO_ROOT_DIR}/lib/libtinyxml.a"         CACHE STRING "" FORCE)
 set(OPENCOLORIO_YAML-CPP_LIBRARY "${OPENCOLORIO_ROOT_DIR}/lib/libyaml-cpp.a"       CACHE STRING "" FORCE)

+# Freetype
+set(FREETYPE_INCLUDE_DIRS "/usr/include/freetype2"       CACHE STRING "" FORCE)
+set(FREETYPE_LIBRARY "/usr/lib${MULTILIB}/libfreetype.a" CACHE STRING "" FORCE)
+
 # OpenImageIO
 if(GLIBC EQUAL "2.19")
 	set(OPENIMAGEIO_LIBRARY
@@ -102,6 +106,7 @@ if(GLIBC EQUAL "2.19")
 		/usr/lib${MULTILIB}/libwebp.a
 		/usr/lib${MULTILIB}/liblzma.a
 		/usr/lib${MULTILIB}/libjbig.a
+		${FREETYPE_LIBRARY}
 		CACHE STRING "" FORCE
 	)
 endif()
--- a/build_files/buildbot/master.cfg
+++ b/build_files/buildbot/master.cfg
@@ -4,10 +4,10 @@
 # <pep8 compliant>

 # List of the branches being built automatically overnight
-NIGHT_SCHEDULE_BRANCHES = [None]
+NIGHT_SCHEDULE_BRANCHES = [None, "blender2.8"]

 # List of the branches available for force build
-FORCE_SCHEDULE_BRANCHES = ["master", "gooseberry", "experimental-build"]
+FORCE_SCHEDULE_BRANCHES = ["master", "blender2.8", "experimental-build"]

 """
 Stock Twisted directory lister doesn't provide any information about last file
@@ -127,7 +127,14 @@ def schedule_force_build(name):
                project=forcesched.FixedParameter(name="project", default="", hide=True)),
            # For now, hide other codebases.
            forcesched.CodebaseParameter(hide=True, codebase="blender-translations"),
-            forcesched.CodebaseParameter(hide=True, codebase="blender-addons"),
+            forcesched.CodebaseParameter(
+                codebase="blender-addons",
+                branch=forcesched.ChoiceStringParameter(
+                    name="branch", choices=["master", "blender2.8"], default="master"),
+                repository=forcesched.FixedParameter(name="repository", default="", hide=True),
+                project=forcesched.FixedParameter(name="project", default="", hide=True),
+                revision=forcesched.FixedParameter(name="revision", default="", hide=True),
+            ),
            forcesched.CodebaseParameter(hide=True, codebase="blender-addons-contrib"),
            forcesched.CodebaseParameter(hide=True, codebase="blender-dev-tools"),
            forcesched.CodebaseParameter(hide=True, codebase="lib svn")],
@@ -139,11 +146,15 @@ def schedule_build(name, hour, minute=0):
        scheduler_name = "nightly " + name
        if current_branch:
            scheduler_name += ' ' + current_branch
+        # Use special addons submodule branch when building blender2.8 branch.
+        addons_branch = "master"
+        if current_branch == "blender2.8":
+            addons_branch = "blender2.8"
        c['schedulers'].append(timed.Nightly(name=scheduler_name,
            codebases={
                "blender": {"repository": ""},
                "blender-translations": {"repository": "", "branch": "master"},
-                "blender-addons": {"repository": "", "branch": "master"},
+                "blender-addons": {"repository": "", "branch": addons_branch},
                "blender-addons-contrib": {"repository": "", "branch": "master"},
                "blender-dev-tools": {"repository": "", "branch": "master"},
                "lib svn": {"repository": "", "branch": "trunk"}},
@@ -225,8 +236,7 @@ def git_step(branch=''):


 def git_submodules_update():
-    command = ['git', 'submodule', 'foreach', '--recursive',
-               'git', 'pull', 'origin', 'master']
+    command = ['git', 'submodule', 'update', '--remote']
    return ShellCommand(name='Submodules Update',
                        command=command,
                        description='updating',
@@ -235,7 +245,10 @@ def git_submodules_update():


 def lib_svn_step(dir):
-    return SVN(name='lib svn',
+    name = "lib svn"
+    if dir == "darwin":
+        name = "C++11 lib svn"
+    return SVN(name=name,
               baseURL='https://svn.blender.org/svnroot/bf-blender/%%BRANCH%%/lib/' + dir,
               codebase='lib svn',
               mode='update',
@@ -264,6 +277,9 @@ def generic_builder(id, libdir='', branch='', rsync=False):
    f = BuildFactory()
    if libdir != '':
        f.addStep(lib_svn_step(libdir))
+        # Special trick to make sure we always have all the libs.
+        if libdir.startswith("darwin"):
+            f.addStep(lib_svn_step("darwin"))

    for submodule in ('blender-translations',
                      'blender-addons',
@@ -286,7 +302,7 @@ def generic_builder(id, libdir='', branch='', rsync=False):
        f.addStep(FileUpload(name='upload',
                             slavesrc='buildbot_upload.zip',
                             masterdest=filename,
-                             maxsize=150 * 1024 * 1024,
+                             maxsize=180 * 1024 * 1024,
                             workdir='install'))
    f.addStep(MasterShellCommand(name='unpack',
                                 command=['python2.7', unpack_script, filename],
--- a/build_files/buildbot/master_unpack.py
+++ b/build_files/buildbot/master_unpack.py
@@ -67,6 +67,9 @@ def get_platform(filename):


 def get_branch(filename):
+    if filename.startswith("blender-2.8"):
+        return "blender2.8"
+
    tokens = filename.split("-")
    branch = ""

--- a/build_files/buildbot/slave_compile.py
+++ b/build_files/buildbot/slave_compile.py
@@ -72,10 +72,11 @@ if 'cmake' in builder:
        # Set up OSX architecture
        if builder.endswith('x86_64_10_6_cmake'):
            cmake_extra_options.append('-DCMAKE_OSX_ARCHITECTURES:STRING=x86_64')
-        cmake_extra_options.append('-DCUDA_NVCC_EXECUTABLE=/usr/local/cuda8-hack/bin/nvcc')
        cmake_extra_options.append('-DWITH_CODEC_QUICKTIME=OFF')
        cmake_extra_options.append('-DCMAKE_OSX_DEPLOYMENT_TARGET=10.6')
-        build_cubins = False
+        cmake_extra_options.append('-DCUDA_HOST_COMPILER=/usr/local/cuda-hack/clang')
+        cmake_extra_options.append('-DCUDA_NVCC_EXECUTABLE=/usr/local/cuda-hack/nvcc')
+


    elif builder.startswith('win'):
@@ -93,7 +94,6 @@ if 'cmake' in builder:
            elif builder.startswith('win32'):
                bits = 32
                cmake_options.extend(['-G', 'Visual Studio 12 2013'])
-        cmake_extra_options.append('-DCUDA_NVCC_EXECUTABLE:FILEPATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/bin/nvcc.exe')

    elif builder.startswith('linux'):
        tokens = builder.split("_")
@@ -112,8 +112,8 @@ if 'cmake' in builder:
            chroot_name = 'buildbot_' + deb_name + '_i686'
            cuda_chroot_name = 'buildbot_' + deb_name + '_x86_64'
            targets = ['player', 'blender', 'cuda']
-
-        cmake_extra_options.append('-DCUDA_NVCC_EXECUTABLE=/usr/local/cuda-8.0/bin/nvcc')
+        cmake_extra_options.extend(["-DCMAKE_C_COMPILER=/usr/bin/gcc-6",
+                                    "-DCMAKE_CXX_COMPILER=/usr/bin/g++-6"])

    cmake_options.append("-C" + os.path.join(blender_dir, cmake_config_file))

@@ -180,7 +180,7 @@ if 'cmake' in builder:
            os.remove('CMakeCache.txt')
        retcode = subprocess.call(target_chroot_prefix + ['cmake', blender_dir] + target_cmake_options)
        if retcode != 0:
-            print('Condifuration FAILED!')
+            print('Configuration FAILED!')
            sys.exit(retcode)

        if 'win32' in builder or 'win64' in builder:
--- a/build_files/buildbot/slave_pack.py
+++ b/build_files/buildbot/slave_pack.py
@@ -111,7 +111,8 @@ if builder.find('cmake') != -1:
        if builder.endswith('vc2015'):
            platform += "-vc14"
        builderified_name = 'blender-{}-{}-{}'.format(blender_full_version, git_hash, platform)
-        if branch != '':
+        # NOTE: Blender 2.8 is already respected by blender_full_version.
+        if branch != '' and branch != 'blender2.8':
            builderified_name = branch + "-" + builderified_name

        os.rename(result_file, "{}.zip".format(builderified_name))
@@ -177,7 +178,8 @@ if builder.find('cmake') != -1:
                                                      blender_hash,
                                                      blender_glibc,
                                                      blender_arch)
-        if branch != '':
+        # NOTE: Blender 2.8 is already respected by blender_full_version.
+        if branch != '' and branch != 'blender2.8':
            package_name = branch + "-" + package_name

        upload_filename = package_name + ".tar.bz2"
--- a/build_files/cmake/Modules/FindGflags.cmake
+++ b/build_files/cmake/Modules/FindGflags.cmake
@@ -0,0 +1,603 @@
+# Ceres Solver - A fast non-linear least squares minimizer
+# Copyright 2015 Google Inc. All rights reserved.
+# http://ceres-solver.org/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+# * Neither the name of Google Inc. nor the names of its contributors may be
+#   used to endorse or promote products derived from this software without
+#   specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# Author: alexs.mac@gmail.com (Alex Stewart)
+#
+
+# FindGflags.cmake - Find Google gflags logging library.
+#
+# This module will attempt to find gflags, either via an exported CMake
+# configuration (generated by gflags >= 2.1 which are built with CMake), or
+# by performing a standard search for all gflags components.  The order of
+# precedence for these two methods of finding gflags is controlled by:
+# GFLAGS_PREFER_EXPORTED_GFLAGS_CMAKE_CONFIGURATION.
+#
+# This module defines the following variables:
+#
+# GFLAGS_FOUND: TRUE iff gflags is found.
+# GFLAGS_INCLUDE_DIRS: Include directories for gflags.
+# GFLAGS_LIBRARIES: Libraries required to link gflags.
+# GFLAGS_NAMESPACE: The namespace in which gflags is defined.  In versions of
+#                   gflags < 2.1, this was google, for versions >= 2.1 it is
+#                   by default gflags, although can be configured when building
+#                   gflags to be something else (i.e. google for legacy
+#                   compatibility).
+#
+# The following variables control the behaviour of this module when an exported
+# gflags CMake configuration is not found.
+#
+# GFLAGS_PREFER_EXPORTED_GFLAGS_CMAKE_CONFIGURATION: TRUE/FALSE, iff TRUE then
+#                           then prefer using an exported CMake configuration
+#                           generated by gflags >= 2.1 over searching for the
+#                           gflags components manually.  Otherwise (FALSE)
+#                           ignore any exported gflags CMake configurations and
+#                           always perform a manual search for the components.
+#                           Default: TRUE iff user does not define this variable
+#                           before we are called, and does NOT specify either
+#                           GFLAGS_INCLUDE_DIR_HINTS or GFLAGS_LIBRARY_DIR_HINTS
+#                           otherwise FALSE.
+# GFLAGS_INCLUDE_DIR_HINTS: List of additional directories in which to
+#                           search for gflags includes, e.g: /timbuktu/include.
+# GFLAGS_LIBRARY_DIR_HINTS: List of additional directories in which to
+#                           search for gflags libraries, e.g: /timbuktu/lib.
+# GFLAGS_ROOT_DIR,          The base directory to search for Gflags.
+#                           This can also be an environment variable.
+#
+# The following variables are also defined by this module, but in line with
+# CMake recommended FindPackage() module style should NOT be referenced directly
+# by callers (use the plural variables detailed above instead).  These variables
+# do however affect the behaviour of the module via FIND_[PATH/LIBRARY]() which
+# are NOT re-called (i.e. search for library is not repeated) if these variables
+# are set with valid values _in the CMake cache_. This means that if these
+# variables are set directly in the cache, either by the user in the CMake GUI,
+# or by the user passing -DVAR=VALUE directives to CMake when called (which
+# explicitly defines a cache variable), then they will be used verbatim,
+# bypassing the HINTS variables and other hard-coded search locations.
+#
+# GFLAGS_INCLUDE_DIR: Include directory for gflags, not including the
+#                     include directory of any dependencies.
+# GFLAGS_LIBRARY: gflags library, not including the libraries of any
+#                 dependencies.
+
+# If GFLAGS_ROOT_DIR was defined in the environment, use it.
+if(NOT GFLAGS_ROOT_DIR AND NOT $ENV{GFLAGS_ROOT_DIR} STREQUAL "")
+  set(GFLAGS_ROOT_DIR $ENV{GFLAGS_ROOT_DIR})
+endif()
+
+if(DEFINED GFLAGS_ROOT_DIR)
+  set(GFLAGS_ROOT_DIR_INCLUDE "${GFLAGS_ROOT_DIR}/include")
+  set(GFLAGS_ROOT_DIR_LIB "${GFLAGS_ROOT_DIR}/lib")
+endif()
+
+# Reset CALLERS_CMAKE_FIND_LIBRARY_PREFIXES to its value when FindGflags was
+# invoked, necessary for MSVC.
+macro(GFLAGS_RESET_FIND_LIBRARY_PREFIX)
+  if(MSVC)
+    set(CMAKE_FIND_LIBRARY_PREFIXES "${CALLERS_CMAKE_FIND_LIBRARY_PREFIXES}")
+  endif()
+endmacro()
+
+# Called if we failed to find gflags or any of it's required dependencies,
+# unsets all public (designed to be used externally) variables and reports
+# error message at priority depending upon [REQUIRED/QUIET/<NONE>] argument.
+macro(GFLAGS_REPORT_NOT_FOUND REASON_MSG)
+  unset(GFLAGS_FOUND)
+  unset(GFLAGS_INCLUDE_DIRS)
+  unset(GFLAGS_LIBRARIES)
+  # Do not use unset, as we want to keep GFLAGS_NAMESPACE in the cache,
+  # but simply clear its value.
+  set(GFLAGS_NAMESPACE "" CACHE STRING
+    "gflags namespace (google or gflags)" FORCE)
+
+  # Make results of search visible in the CMake GUI if gflags has not
+  # been found so that user does not have to toggle to advanced view.
+  mark_as_advanced(CLEAR GFLAGS_INCLUDE_DIR
+                         GFLAGS_LIBRARY
+                         GFLAGS_NAMESPACE)
+
+  gflags_reset_find_library_prefix()
+
+  # Note <package>_FIND_[REQUIRED/QUIETLY] variables defined by FindPackage()
+  # use the camelcase library name, not uppercase.
+  if(Gflags_FIND_QUIETLY)
+    message(STATUS "Failed to find gflags - " ${REASON_MSG} ${ARGN})
+  elseif(Gflags_FIND_REQUIRED)
+    message(FATAL_ERROR "Failed to find gflags - " ${REASON_MSG} ${ARGN})
+  else()
+    # Neither QUIETLY nor REQUIRED, use no priority which emits a message
+    # but continues configuration and allows generation.
+    message("-- Failed to find gflags - " ${REASON_MSG} ${ARGN})
+  endif()
+  return()
+endmacro()
+
+# Verify that all variable names passed as arguments are defined (can be empty
+# but must be defined) or raise a fatal error.
+macro(GFLAGS_CHECK_VARS_DEFINED)
+  foreach(CHECK_VAR ${ARGN})
+    if(NOT DEFINED ${CHECK_VAR})
+      message(FATAL_ERROR "Ceres Bug: ${CHECK_VAR} is not defined.")
+    endif()
+  endforeach()
+endmacro()
+
+# Use check_cxx_source_compiles() to compile trivial test programs to determine
+# the gflags namespace.  This works on all OSs except Windows.  If using Visual
+# Studio, it fails because msbuild forces check_cxx_source_compiles() to use
+# CMAKE_BUILD_TYPE=Debug for the test project, which usually breaks detection
+# because MSVC requires that the test project use the same build type as gflags,
+# which would normally be built in Release.
+#
+# Defines: GFLAGS_NAMESPACE in the caller's scope with the detected namespace,
+#          which is blank (empty string, will test FALSE is CMake conditionals)
+#          if detection failed.
+function(GFLAGS_CHECK_GFLAGS_NAMESPACE_USING_TRY_COMPILE)
+  # Verify that all required variables are defined.
+  gflags_check_vars_defined(
+    GFLAGS_INCLUDE_DIR GFLAGS_LIBRARY)
+  # Ensure that GFLAGS_NAMESPACE is always unset on completion unless
+  # we explicitly set if after having the correct namespace.
+  set(GFLAGS_NAMESPACE "" PARENT_SCOPE)
+
+  include(CheckCXXSourceCompiles)
+  # Setup include path & link library for gflags for CHECK_CXX_SOURCE_COMPILES.
+  set(CMAKE_REQUIRED_INCLUDES ${GFLAGS_INCLUDE_DIR})
+  set(CMAKE_REQUIRED_LIBRARIES ${GFLAGS_LIBRARY} ${GFLAGS_LINK_LIBRARIES})
+  # First try the (older) google namespace.  Note that the output variable
+  # MUST be unique to the build type as otherwise the test is not repeated as
+  # it is assumed to have already been performed.
+  check_cxx_source_compiles(
+    "#include <gflags/gflags.h>
+     int main(int argc, char * argv[]) {
+       google::ParseCommandLineFlags(&argc, &argv, true);
+       return 0;
+     }"
+     GFLAGS_IN_GOOGLE_NAMESPACE)
+  if(GFLAGS_IN_GOOGLE_NAMESPACE)
+    set(GFLAGS_NAMESPACE google PARENT_SCOPE)
+    return()
+  endif()
+
+  # Try (newer) gflags namespace instead.  Note that the output variable
+  # MUST be unique to the build type as otherwise the test is not repeated as
+  # it is assumed to have already been performed.
+  set(CMAKE_REQUIRED_INCLUDES ${GFLAGS_INCLUDE_DIR})
+  set(CMAKE_REQUIRED_LIBRARIES ${GFLAGS_LIBRARY} ${GFLAGS_LINK_LIBRARIES})
+  check_cxx_source_compiles(
+    "#include <gflags/gflags.h>
+     int main(int argc, char * argv[]) {
+        gflags::ParseCommandLineFlags(&argc, &argv, true);
+        return 0;
+     }"
+     GFLAGS_IN_GFLAGS_NAMESPACE)
+  if(GFLAGS_IN_GFLAGS_NAMESPACE)
+    set(GFLAGS_NAMESPACE gflags PARENT_SCOPE)
+    return()
+  endif()
+endfunction()
+
+# Use regex on the gflags headers to attempt to determine the gflags namespace.
+# Checks both gflags.h (contained namespace on versions < 2.1.2) and
+# gflags_declare.h, which contains the namespace on versions >= 2.1.2.
+# In general, this method should only be used when
+# GFLAGS_CHECK_GFLAGS_NAMESPACE_USING_TRY_COMPILE() cannot be used, or has
+# failed.
+#
+# Defines: GFLAGS_NAMESPACE in the caller's scope with the detected namespace,
+#          which is blank (empty string, will test FALSE is CMake conditionals)
+#          if detection failed.
+function(GFLAGS_CHECK_GFLAGS_NAMESPACE_USING_REGEX)
+  # Verify that all required variables are defined.
+  gflags_check_vars_defined(GFLAGS_INCLUDE_DIR)
+  # Ensure that GFLAGS_NAMESPACE is always undefined on completion unless
+  # we explicitly set if after having the correct namespace.
+  set(GFLAGS_NAMESPACE "" PARENT_SCOPE)
+
+  # Scan gflags.h to identify what namespace gflags was built with.  On
+  # versions of gflags < 2.1.2, gflags.h was configured with the namespace
+  # directly, on >= 2.1.2, gflags.h uses the GFLAGS_NAMESPACE #define which
+  # is defined in gflags_declare.h, we try each location in turn.
+  set(GFLAGS_HEADER_FILE ${GFLAGS_INCLUDE_DIR}/gflags/gflags.h)
+  if(NOT EXISTS ${GFLAGS_HEADER_FILE})
+    gflags_report_not_found(
+      "Could not find file: ${GFLAGS_HEADER_FILE} "
+      "containing namespace information in gflags install located at: "
+      "${GFLAGS_INCLUDE_DIR}.")
+  endif()
+  file(READ ${GFLAGS_HEADER_FILE} GFLAGS_HEADER_FILE_CONTENTS)
+
+  string(REGEX MATCH "namespace [A-Za-z]+"
+    GFLAGS_NAMESPACE "${GFLAGS_HEADER_FILE_CONTENTS}")
+  string(REGEX REPLACE "namespace ([A-Za-z]+)" "\\1"
+    GFLAGS_NAMESPACE "${GFLAGS_NAMESPACE}")
+
+  if(NOT GFLAGS_NAMESPACE)
+    gflags_report_not_found(
+      "Failed to extract gflags namespace from header file: "
+      "${GFLAGS_HEADER_FILE}.")
+  endif()
+
+  if(GFLAGS_NAMESPACE STREQUAL "google" OR
+     GFLAGS_NAMESPACE STREQUAL "gflags")
+    # Found valid gflags namespace from gflags.h.
+    set(GFLAGS_NAMESPACE "${GFLAGS_NAMESPACE}" PARENT_SCOPE)
+    return()
+  endif()
+
+  # Failed to find gflags namespace from gflags.h, gflags is likely a new
+  # version, check gflags_declare.h, which in newer versions (>= 2.1.2) contains
+  # the GFLAGS_NAMESPACE #define, which is then referenced in gflags.h.
+  set(GFLAGS_DECLARE_FILE ${GFLAGS_INCLUDE_DIR}/gflags/gflags_declare.h)
+  if(NOT EXISTS ${GFLAGS_DECLARE_FILE})
+    gflags_report_not_found(
+      "Could not find file: ${GFLAGS_DECLARE_FILE} "
+      "containing namespace information in gflags install located at: "
+      "${GFLAGS_INCLUDE_DIR}.")
+  endif()
+  file(READ ${GFLAGS_DECLARE_FILE} GFLAGS_DECLARE_FILE_CONTENTS)
+
+  string(REGEX MATCH "#define GFLAGS_NAMESPACE [A-Za-z]+"
+    GFLAGS_NAMESPACE "${GFLAGS_DECLARE_FILE_CONTENTS}")
+  string(REGEX REPLACE "#define GFLAGS_NAMESPACE ([A-Za-z]+)" "\\1"
+    GFLAGS_NAMESPACE "${GFLAGS_NAMESPACE}")
+
+  if(NOT GFLAGS_NAMESPACE)
+    gflags_report_not_found(
+      "Failed to extract gflags namespace from declare file: "
+      "${GFLAGS_DECLARE_FILE}.")
+  endif()
+
+  if(GFLAGS_NAMESPACE STREQUAL "google" OR
+     GFLAGS_NAMESPACE STREQUAL "gflags")
+    # Found valid gflags namespace from gflags.h.
+    set(GFLAGS_NAMESPACE "${GFLAGS_NAMESPACE}" PARENT_SCOPE)
+    return()
+  endif()
+endfunction()
+
+# -----------------------------------------------------------------
+# By default, if the user has expressed no preference for using an exported
+# gflags CMake configuration over performing a search for the installed
+# components, and has not specified any hints for the search locations, then
+# prefer a gflags exported configuration if available.
+if(NOT DEFINED GFLAGS_PREFER_EXPORTED_GFLAGS_CMAKE_CONFIGURATION
+   AND NOT GFLAGS_INCLUDE_DIR_HINTS
+   AND NOT GFLAGS_LIBRARY_DIR_HINTS)
+  message(STATUS "No preference for use of exported gflags CMake configuration "
+    "set, and no hints for include/library directories provided. "
+    "Defaulting to preferring an installed/exported gflags CMake configuration "
+    "if available.")
+  set(GFLAGS_PREFER_EXPORTED_GFLAGS_CMAKE_CONFIGURATION TRUE)
+endif()
+
+if(GFLAGS_PREFER_EXPORTED_GFLAGS_CMAKE_CONFIGURATION)
+  # Try to find an exported CMake configuration for gflags, as generated by
+  # gflags versions >= 2.1.
+  #
+  # We search twice, s/t we can invert the ordering of precedence used by
+  # find_package() for exported package build directories, and installed
+  # packages (found via CMAKE_SYSTEM_PREFIX_PATH), listed as items 6) and 7)
+  # respectively in [1].
+  #
+  # By default, exported build directories are (in theory) detected first, and
+  # this is usually the case on Windows.  However, on OS X & Linux, the install
+  # path (/usr/local) is typically present in the PATH environment variable
+  # which is checked in item 4) in [1] (i.e. before both of the above, unless
+  # NO_SYSTEM_ENVIRONMENT_PATH is passed).  As such on those OSs installed
+  # packages are usually detected in preference to exported package build
+  # directories.
+  #
+  # To ensure a more consistent response across all OSs, and as users usually
+  # want to prefer an installed version of a package over a locally built one
+  # where both exist (esp. as the exported build directory might be removed
+  # after installation), we first search with NO_CMAKE_PACKAGE_REGISTRY which
+  # means any build directories exported by the user are ignored, and thus
+  # installed directories are preferred.  If this fails to find the package
+  # we then research again, but without NO_CMAKE_PACKAGE_REGISTRY, so any
+  # exported build directories will now be detected.
+  #
+  # To prevent confusion on Windows, we also pass NO_CMAKE_BUILDS_PATH (which
+  # is item 5) in [1]), to not preferentially use projects that were built
+  # recently with the CMake GUI to ensure that we always prefer an installed
+  # version if available.
+  #
+  # [1] http://www.cmake.org/cmake/help/v2.8.11/cmake.html#command:find_package
+  find_package(gflags QUIET
+                      NO_MODULE
+                      NO_CMAKE_PACKAGE_REGISTRY
+                      NO_CMAKE_BUILDS_PATH)
+  if(gflags_FOUND)
+    message(STATUS "Found installed version of gflags: ${gflags_DIR}")
+  else()
+    # Failed to find an installed version of gflags, repeat search allowing
+    # exported build directories.
+    message(STATUS "Failed to find installed gflags CMake configuration, "
+      "searching for gflags build directories exported with CMake.")
+    # Again pass NO_CMAKE_BUILDS_PATH, as we know that gflags is exported and
+    # do not want to treat projects built with the CMake GUI preferentially.
+    find_package(gflags QUIET
+                        NO_MODULE
+                        NO_CMAKE_BUILDS_PATH)
+    if(gflags_FOUND)
+      message(STATUS "Found exported gflags build directory: ${gflags_DIR}")
+    endif()
+  endif()
+
+  set(FOUND_INSTALLED_GFLAGS_CMAKE_CONFIGURATION ${gflags_FOUND})
+
+  # gflags v2.1 - 2.1.2 shipped with a bug in their gflags-config.cmake [1]
+  # whereby gflags_LIBRARIES = "gflags", but there was no imported target
+  # called "gflags", they were called: gflags[_nothreads]-[static/shared].
+  # As this causes linker errors when gflags is not installed in a location
+  # on the current library paths, detect if this problem is present and
+  # fix it.
+  #
+  # [1] https://github.com/gflags/gflags/issues/110
+  if(gflags_FOUND)
+    # NOTE: This is not written as additional conditions in the outer
+    #       if(gflags_FOUND) as the NOT TARGET "${gflags_LIBRARIES}"
+    #       condition causes problems if gflags is not found.
+    if(${gflags_VERSION} VERSION_LESS 2.1.3 AND
+        NOT TARGET "${gflags_LIBRARIES}")
+      message(STATUS "Detected broken gflags install in: ${gflags_DIR}, "
+        "version: ${gflags_VERSION} <= 2.1.2 which defines gflags_LIBRARIES = "
+        "${gflags_LIBRARIES} which is not an imported CMake target, see: "
+        "https://github.com/gflags/gflags/issues/110.  Attempting to fix by "
+        "detecting correct gflags target.")
+      # Ordering here expresses preference for detection, specifically we do not
+      # want to use the _nothreads variants if the full library is available.
+      list(APPEND CHECK_GFLAGS_IMPORTED_TARGET_NAMES
+        gflags-shared gflags-static
+        gflags_nothreads-shared gflags_nothreads-static)
+      foreach(CHECK_GFLAGS_TARGET ${CHECK_GFLAGS_IMPORTED_TARGET_NAMES})
+        if(TARGET ${CHECK_GFLAGS_TARGET})
+          message(STATUS "Found valid gflags target: ${CHECK_GFLAGS_TARGET}, "
+            "updating gflags_LIBRARIES.")
+          set(gflags_LIBRARIES ${CHECK_GFLAGS_TARGET})
+          break()
+        endif()
+      endforeach()
+      if(NOT TARGET ${gflags_LIBRARIES})
+        message(STATUS "Failed to fix detected broken gflags install in: "
+          "${gflags_DIR}, version: ${gflags_VERSION} <= 2.1.2, none of the "
+          "imported targets for gflags: ${CHECK_GFLAGS_IMPORTED_TARGET_NAMES} "
+          "are defined.  Will continue with a manual search for gflags "
+          "components.  We recommend you build/install a version of gflags > "
+          "2.1.2 (or master).")
+        set(FOUND_INSTALLED_GFLAGS_CMAKE_CONFIGURATION FALSE)
+      endif()
+    endif()
+  endif()
+
+  if(FOUND_INSTALLED_GFLAGS_CMAKE_CONFIGURATION)
+    message(STATUS "Detected gflags version: ${gflags_VERSION}")
+    set(GFLAGS_FOUND ${gflags_FOUND})
+    set(GFLAGS_INCLUDE_DIR ${gflags_INCLUDE_DIR})
+    set(GFLAGS_LIBRARY ${gflags_LIBRARIES})
+
+    # gflags does not export the namespace in their CMake configuration, so
+    # use our function to determine what it should be, as it can be either
+    # gflags or google dependent upon version & configuration.
+    #
+    # NOTE: We use the regex method to determine the namespace here, as
+    #       check_cxx_source_compiles() will not use imported targets, which
+    #       is what gflags will be in this case.
+    gflags_check_gflags_namespace_using_regex()
+
+    if(NOT GFLAGS_NAMESPACE)
+      gflags_report_not_found(
+        "Failed to determine gflags namespace using regex for gflags "
+        "version: ${gflags_VERSION} exported here: ${gflags_DIR} using CMake.")
+    endif()
+  else()
+    message(STATUS "Failed to find an installed/exported CMake configuration "
+      "for gflags, will perform search for installed gflags components.")
+  endif()
+endif()
+
+if(NOT GFLAGS_FOUND)
+  # Either failed to find an exported gflags CMake configuration, or user
+  # told us not to use one.  Perform a manual search for all gflags components.
+
+  # Handle possible presence of lib prefix for libraries on MSVC, see
+  # also GFLAGS_RESET_FIND_LIBRARY_PREFIX().
+  if(MSVC)
+    # Preserve the caller's original values for CMAKE_FIND_LIBRARY_PREFIXES
+    # s/t we can set it back before returning.
+    set(CALLERS_CMAKE_FIND_LIBRARY_PREFIXES "${CMAKE_FIND_LIBRARY_PREFIXES}")
+    # The empty string in this list is important, it represents the case when
+    # the libraries have no prefix (shared libraries / DLLs).
+    set(CMAKE_FIND_LIBRARY_PREFIXES "lib" "" "${CMAKE_FIND_LIBRARY_PREFIXES}")
+  endif()
+
+  # Search user-installed locations first, so that we prefer user installs
+  # to system installs where both exist.
+  list(APPEND GFLAGS_CHECK_INCLUDE_DIRS
+    ${GFLAGS_ROOT_DIR_INCLUDE}
+    /usr/local/include
+    /usr/local/homebrew/include # Mac OS X
+    /opt/local/var/macports/software # Mac OS X.
+    /opt/local/include
+    /usr/include
+    /sw/include # Fink
+    /opt/csw/include # Blastwave
+    /opt/lib/gflags/include)
+
+  list(APPEND GFLAGS_CHECK_PATH_SUFFIXES
+    gflags/include # Windows (for C:/Program Files prefix).
+    gflags/Include) # Windows (for C:/Program Files prefix).
+
+  list(APPEND GFLAGS_CHECK_LIBRARY_DIRS
+    ${GFLAGS_ROOT_DIR_LIB}
+    /usr/local/lib
+    /usr/local/homebrew/lib # Mac OS X.
+    /opt/local/lib
+    /usr/lib
+    /sw/lib # Fink
+    /opt/csw/lib # Blastwave
+    /opt/lib/gflags/lib)
+  list(APPEND GFLAGS_CHECK_LIBRARY_SUFFIXES
+    gflags/lib # Windows (for C:/Program Files prefix).
+    gflags/Lib) # Windows (for C:/Program Files prefix).
+
+  # Search supplied hint directories first if supplied.
+  find_path(GFLAGS_INCLUDE_DIR
+    NAMES gflags/gflags.h
+    PATHS ${GFLAGS_INCLUDE_DIR_HINTS}
+    ${GFLAGS_CHECK_INCLUDE_DIRS}
+    PATH_SUFFIXES ${GFLAGS_CHECK_PATH_SUFFIXES})
+  if(NOT GFLAGS_INCLUDE_DIR OR
+      NOT EXISTS ${GFLAGS_INCLUDE_DIR})
+    gflags_report_not_found(
+      "Could not find gflags include directory, set GFLAGS_INCLUDE_DIR "
+      "to directory containing gflags/gflags.h")
+  endif(NOT GFLAGS_INCLUDE_DIR OR
+    NOT EXISTS ${GFLAGS_INCLUDE_DIR})
+
+  find_library(GFLAGS_LIBRARY NAMES gflags
+    PATHS ${GFLAGS_LIBRARY_DIR_HINTS}
+    ${GFLAGS_CHECK_LIBRARY_DIRS}
+    PATH_SUFFIXES ${GFLAGS_CHECK_LIBRARY_SUFFIXES})
+  if(NOT GFLAGS_LIBRARY OR
+      NOT EXISTS ${GFLAGS_LIBRARY})
+    gflags_report_not_found(
+      "Could not find gflags library, set GFLAGS_LIBRARY "
+      "to full path to libgflags.")
+  endif(NOT GFLAGS_LIBRARY OR
+    NOT EXISTS ${GFLAGS_LIBRARY})
+
+  # gflags typically requires a threading library (which is OS dependent), note
+  # that this defines the CMAKE_THREAD_LIBS_INIT variable.  If we are able to
+  # detect threads, we assume that gflags requires it.
+  find_package(Threads QUIET)
+  set(GFLAGS_LINK_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
+  # On Windows (including MinGW), the Shlwapi library is used by gflags if
+  # available.
+  if(WIN32)
+    include(CheckIncludeFileCXX)
+    check_include_file_cxx("shlwapi.h" HAVE_SHLWAPI)
+    if(HAVE_SHLWAPI)
+      list(APPEND GFLAGS_LINK_LIBRARIES shlwapi.lib)
+    endif()
+  endif()
+
+  # Mark internally as found, then verify. GFLAGS_REPORT_NOT_FOUND() unsets
+  # if called.
+  set(GFLAGS_FOUND TRUE)
+
+  # Identify what namespace gflags was built with.
+  if(GFLAGS_INCLUDE_DIR AND NOT GFLAGS_NAMESPACE)
+    # To handle Windows peculiarities / CMake bugs on MSVC we try two approaches
+    # to detect the gflags namespace:
+    #
+    # 1) Try to use check_cxx_source_compiles() to compile a trivial program
+    #    with the two choices for the gflags namespace.
+    #
+    # 2) [In the event 1) fails] Use regex on the gflags headers to try to
+    #    determine the gflags namespace.  Whilst this is less robust than 1),
+    #    it does avoid any interaction with msbuild.
+    gflags_check_gflags_namespace_using_try_compile()
+
+    if(NOT GFLAGS_NAMESPACE)
+      # Failed to determine gflags namespace using check_cxx_source_compiles()
+      # method, try and obtain it using regex on the gflags headers instead.
+      message(STATUS "Failed to find gflags namespace using using "
+        "check_cxx_source_compiles(), trying namespace regex instead, "
+        "this is expected on Windows.")
+      gflags_check_gflags_namespace_using_regex()
+
+      if(NOT GFLAGS_NAMESPACE)
+        gflags_report_not_found(
+          "Failed to determine gflags namespace either by "
+          "check_cxx_source_compiles(), or namespace regex.")
+      endif()
+    endif()
+  endif()
+
+  # Make the GFLAGS_NAMESPACE a cache variable s/t the user can view it, and could
+  # overwrite it in the CMake GUI.
+  set(GFLAGS_NAMESPACE "${GFLAGS_NAMESPACE}" CACHE STRING
+    "gflags namespace (google or gflags)" FORCE)
+
+  # gflags does not seem to provide any record of the version in its
+  # source tree, thus cannot extract version.
+
+  # Catch case when caller has set GFLAGS_NAMESPACE in the cache / GUI
+  # with an invalid value.
+  if(GFLAGS_NAMESPACE AND
+      NOT GFLAGS_NAMESPACE STREQUAL "google" AND
+      NOT GFLAGS_NAMESPACE STREQUAL "gflags")
+    gflags_report_not_found(
+      "Caller defined GFLAGS_NAMESPACE:"
+      " ${GFLAGS_NAMESPACE} is not valid, not google or gflags.")
+  endif()
+  # Catch case when caller has set GFLAGS_INCLUDE_DIR in the cache / GUI and
+  # thus FIND_[PATH/LIBRARY] are not called, but specified locations are
+  # invalid, otherwise we would report the library as found.
+  if(GFLAGS_INCLUDE_DIR AND
+      NOT EXISTS ${GFLAGS_INCLUDE_DIR}/gflags/gflags.h)
+    gflags_report_not_found(
+      "Caller defined GFLAGS_INCLUDE_DIR:"
+      " ${GFLAGS_INCLUDE_DIR} does not contain gflags/gflags.h header.")
+  endif(GFLAGS_INCLUDE_DIR AND
+    NOT EXISTS ${GFLAGS_INCLUDE_DIR}/gflags/gflags.h)
+  # TODO: This regex for gflags library is pretty primitive, we use lowercase
+  #       for comparison to handle Windows using CamelCase library names, could
+  #       this check be better?
+  string(TOLOWER "${GFLAGS_LIBRARY}" LOWERCASE_GFLAGS_LIBRARY)
+  if(GFLAGS_LIBRARY AND
+      NOT "${LOWERCASE_GFLAGS_LIBRARY}" MATCHES ".*gflags[^/]*")
+    gflags_report_not_found(
+      "Caller defined GFLAGS_LIBRARY: "
+      "${GFLAGS_LIBRARY} does not match gflags.")
+  endif(GFLAGS_LIBRARY AND
+    NOT "${LOWERCASE_GFLAGS_LIBRARY}" MATCHES ".*gflags[^/]*")
+
+  gflags_reset_find_library_prefix()
+
+endif()
+
+# Set standard CMake FindPackage variables if found.
+if(GFLAGS_FOUND)
+  set(GFLAGS_INCLUDE_DIRS ${GFLAGS_INCLUDE_DIR})
+  set(GFLAGS_LIBRARIES ${GFLAGS_LIBRARY} ${GFLAGS_LINK_LIBRARIES})
+endif()
+
+# Handle REQUIRED / QUIET optional arguments.
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(GFLAGS DEFAULT_MSG
+  GFLAGS_INCLUDE_DIRS GFLAGS_LIBRARIES GFLAGS_NAMESPACE)
+
+# Only mark internal variables as advanced if we found gflags, otherwise
+# leave them visible in the standard GUI for the user to set manually.
+if(GFLAGS_FOUND)
+  mark_as_advanced(FORCE GFLAGS_INCLUDE_DIR
+    GFLAGS_LIBRARY
+    GFLAGS_NAMESPACE
+    gflags_DIR) # Autogenerated by find_package(gflags)
+endif()
--- a/build_files/cmake/Modules/FindGlog.cmake
+++ b/build_files/cmake/Modules/FindGlog.cmake
@@ -0,0 +1,226 @@
+# Ceres Solver - A fast non-linear least squares minimizer
+# Copyright 2015 Google Inc. All rights reserved.
+# http://ceres-solver.org/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+# * Neither the name of Google Inc. nor the names of its contributors may be
+#   used to endorse or promote products derived from this software without
+#   specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# Author: alexs.mac@gmail.com (Alex Stewart)
+#
+
+# FindGlog.cmake - Find Google glog logging library.
+#
+# This module defines the following variables:
+#
+# GLOG_FOUND: TRUE iff glog is found.
+# GLOG_INCLUDE_DIRS: Include directories for glog.
+# GLOG_LIBRARIES: Libraries required to link glog.
+#
+# The following variables control the behaviour of this module:
+#
+# GLOG_INCLUDE_DIR_HINTS: List of additional directories in which to
+#                         search for glog includes, e.g: /timbuktu/include.
+# GLOG_LIBRARY_DIR_HINTS: List of additional directories in which to
+#                         search for glog libraries, e.g: /timbuktu/lib.
+# GFLOG_ROOT_DIR,         The base directory to search for Glog.
+#                         This can also be an environment variable.
+#
+# The following variables are also defined by this module, but in line with
+# CMake recommended FindPackage() module style should NOT be referenced directly
+# by callers (use the plural variables detailed above instead).  These variables
+# do however affect the behaviour of the module via FIND_[PATH/LIBRARY]() which
+# are NOT re-called (i.e. search for library is not repeated) if these variables
+# are set with valid values _in the CMake cache_. This means that if these
+# variables are set directly in the cache, either by the user in the CMake GUI,
+# or by the user passing -DVAR=VALUE directives to CMake when called (which
+# explicitly defines a cache variable), then they will be used verbatim,
+# bypassing the HINTS variables and other hard-coded search locations.
+#
+# GLOG_INCLUDE_DIR: Include directory for glog, not including the
+#                   include directory of any dependencies.
+# GLOG_LIBRARY: glog library, not including the libraries of any
+#               dependencies.
+
+# If GLOG_ROOT_DIR was defined in the environment, use it.
+if(NOT GLOG_ROOT_DIR AND NOT $ENV{GLOG_ROOT_DIR} STREQUAL "")
+  set(GLOG_ROOT_DIR $ENV{GLOG_ROOT_DIR})
+endif()
+
+if(DEFINED GLOG_ROOT_DIR)
+  set(GLOG_ROOT_DIR_INCLUDE "${GLOG_ROOT_DIR}/include")
+  set(GLOG_ROOT_DIR_LIB "${GLOG_ROOT_DIR}/lib")
+endif()
+
+# Reset CALLERS_CMAKE_FIND_LIBRARY_PREFIXES to its value when
+# FindGlog was invoked.
+macro(GLOG_RESET_FIND_LIBRARY_PREFIX)
+  if(MSVC)
+    set(CMAKE_FIND_LIBRARY_PREFIXES "${CALLERS_CMAKE_FIND_LIBRARY_PREFIXES}")
+  endif()
+endmacro()
+
+# Called if we failed to find glog or any of it's required dependencies,
+# unsets all public (designed to be used externally) variables and reports
+# error message at priority depending upon [REQUIRED/QUIET/<NONE>] argument.
+macro(GLOG_REPORT_NOT_FOUND REASON_MSG)
+  unset(GLOG_FOUND)
+  unset(GLOG_INCLUDE_DIRS)
+  unset(GLOG_LIBRARIES)
+  # Make results of search visible in the CMake GUI if glog has not
+  # been found so that user does not have to toggle to advanced view.
+  mark_as_advanced(CLEAR GLOG_INCLUDE_DIR
+                         GLOG_LIBRARY)
+
+  glog_reset_find_library_prefix()
+
+  # Note <package>_FIND_[REQUIRED/QUIETLY] variables defined by FindPackage()
+  # use the camelcase library name, not uppercase.
+  if(Glog_FIND_QUIETLY)
+    message(STATUS "Failed to find glog - " ${REASON_MSG} ${ARGN})
+  elseif(Glog_FIND_REQUIRED)
+    message(FATAL_ERROR "Failed to find glog - " ${REASON_MSG} ${ARGN})
+  else()
+    # Neither QUIETLY nor REQUIRED, use no priority which emits a message
+    # but continues configuration and allows generation.
+    message("-- Failed to find glog - " ${REASON_MSG} ${ARGN})
+  endif()
+  return()
+endmacro()
+
+# Handle possible presence of lib prefix for libraries on MSVC, see
+# also GLOG_RESET_FIND_LIBRARY_PREFIX().
+if(MSVC)
+  # Preserve the caller's original values for CMAKE_FIND_LIBRARY_PREFIXES
+  # s/t we can set it back before returning.
+  set(CALLERS_CMAKE_FIND_LIBRARY_PREFIXES "${CMAKE_FIND_LIBRARY_PREFIXES}")
+  # The empty string in this list is important, it represents the case when
+  # the libraries have no prefix (shared libraries / DLLs).
+  set(CMAKE_FIND_LIBRARY_PREFIXES "lib" "" "${CMAKE_FIND_LIBRARY_PREFIXES}")
+endif()
+
+# Search user-installed locations first, so that we prefer user installs
+# to system installs where both exist.
+list(APPEND GLOG_CHECK_INCLUDE_DIRS
+  ${GLOG_ROOT_DIR_INCLUDE}
+  /usr/local/include
+  /usr/local/homebrew/include # Mac OS X
+  /opt/local/var/macports/software # Mac OS X.
+  /opt/local/include
+  /usr/include
+  /sw/include # Fink
+  /opt/csw/include # Blastwave
+  /opt/lib/glog/include)
+# Windows (for C:/Program Files prefix).
+list(APPEND GLOG_CHECK_PATH_SUFFIXES
+  glog/include
+  glog/Include
+  Glog/include
+  Glog/Include)
+
+list(APPEND GLOG_CHECK_LIBRARY_DIRS
+  ${GLOG_ROOT_DIR_LIB}
+  /usr/local/lib
+  /usr/local/homebrew/lib # Mac OS X.
+  /opt/local/lib
+  /usr/lib
+  /sw/lib # Fink
+  /opt/csw/lib # Blastwave
+  /opt/lib/gflags/lib)
+# Windows (for C:/Program Files prefix).
+list(APPEND GLOG_CHECK_LIBRARY_SUFFIXES
+  glog/lib
+  glog/Lib
+  Glog/lib
+  Glog/Lib)
+
+# Search supplied hint directories first if supplied.
+find_path(GLOG_INCLUDE_DIR
+  NAMES glog/logging.h
+  PATHS ${GLOG_INCLUDE_DIR_HINTS}
+  ${GLOG_CHECK_INCLUDE_DIRS}
+  PATH_SUFFIXES ${GLOG_CHECK_PATH_SUFFIXES})
+if(NOT GLOG_INCLUDE_DIR OR
+   NOT EXISTS ${GLOG_INCLUDE_DIR})
+  glog_report_not_found(
+    "Could not find glog include directory, set GLOG_INCLUDE_DIR "
+    "to directory containing glog/logging.h")
+endif()
+
+find_library(GLOG_LIBRARY NAMES glog
+  PATHS ${GLOG_LIBRARY_DIR_HINTS}
+  ${GLOG_CHECK_LIBRARY_DIRS}
+  PATH_SUFFIXES ${GLOG_CHECK_LIBRARY_SUFFIXES})
+if(NOT GLOG_LIBRARY OR
+   NOT EXISTS ${GLOG_LIBRARY})
+  glog_report_not_found(
+    "Could not find glog library, set GLOG_LIBRARY "
+    "to full path to libglog.")
+endif()
+
+# Mark internally as found, then verify. GLOG_REPORT_NOT_FOUND() unsets
+# if called.
+set(GLOG_FOUND TRUE)
+
+# Glog does not seem to provide any record of the version in its
+# source tree, thus cannot extract version.
+
+# Catch case when caller has set GLOG_INCLUDE_DIR in the cache / GUI and
+# thus FIND_[PATH/LIBRARY] are not called, but specified locations are
+# invalid, otherwise we would report the library as found.
+if(GLOG_INCLUDE_DIR AND
+   NOT EXISTS ${GLOG_INCLUDE_DIR}/glog/logging.h)
+  glog_report_not_found(
+    "Caller defined GLOG_INCLUDE_DIR:"
+    " ${GLOG_INCLUDE_DIR} does not contain glog/logging.h header.")
+endif()
+# TODO: This regex for glog library is pretty primitive, we use lowercase
+#       for comparison to handle Windows using CamelCase library names, could
+#       this check be better?
+string(TOLOWER "${GLOG_LIBRARY}" LOWERCASE_GLOG_LIBRARY)
+if(GLOG_LIBRARY AND
+   NOT "${LOWERCASE_GLOG_LIBRARY}" MATCHES ".*glog[^/]*")
+  glog_report_not_found(
+    "Caller defined GLOG_LIBRARY: "
+    "${GLOG_LIBRARY} does not match glog.")
+endif()
+
+# Set standard CMake FindPackage variables if found.
+if(GLOG_FOUND)
+  set(GLOG_INCLUDE_DIRS ${GLOG_INCLUDE_DIR})
+  set(GLOG_LIBRARIES ${GLOG_LIBRARY})
+endif()
+
+glog_reset_find_library_prefix()
+
+# Handle REQUIRED / QUIET optional arguments.
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(GLOG DEFAULT_MSG
+  GLOG_INCLUDE_DIRS GLOG_LIBRARIES)
+
+# Only mark internal variables as advanced if we found glog, otherwise
+# leave them visible in the standard GUI for the user to set manually.
+if(GLOG_FOUND)
+  mark_as_advanced(FORCE GLOG_INCLUDE_DIR
+                         GLOG_LIBRARY)
+endif()
--- a/build_files/cmake/Modules/FindOpenImageIO.cmake
+++ b/build_files/cmake/Modules/FindOpenImageIO.cmake
@@ -60,7 +60,7 @@ FIND_FILE(OPENIMAGEIO_IDIFF
  NAMES
    idiff
  HINTS
-    ${OPENIMAGEIO_ROOT_DIR}
+    ${_openimageio_SEARCH_DIRS}
  PATH_SUFFIXES
    bin
 )
--- a/build_files/cmake/Modules/GTestTesting.cmake
+++ b/build_files/cmake/Modules/GTestTesting.cmake
@@ -20,8 +20,8 @@ macro(BLENDER_SRC_GTEST_EX NAME SRC EXTRA_LIBS DO_ADD_TEST)
 		set(TEST_INC
 			${_current_include_directories}
 			${CMAKE_SOURCE_DIR}/tests/gtests
-			${CMAKE_SOURCE_DIR}/extern/glog/src
-			${CMAKE_SOURCE_DIR}/extern/gflags/src
+			${GLOG_INCLUDE_DIRS}
+			${GFLAGS_INCLUDE_DIRS}
 			${CMAKE_SOURCE_DIR}/extern/gtest/include
 			${CMAKE_SOURCE_DIR}/extern/gmock/include
 		)
@@ -37,15 +37,18 @@ macro(BLENDER_SRC_GTEST_EX NAME SRC EXTRA_LIBS DO_ADD_TEST)
 		                      extern_gmock
 		                      # needed for glog
 		                      ${PTHREADS_LIBRARIES}
-		                      extern_glog
-		                      extern_gflags)
+		                      ${GLOG_LIBRARIES}
+		                      ${GFLAGS_LIBRARIES})
+		if(WITH_OPENMP_STATIC)
+			target_link_libraries(${NAME}_test ${OpenMP_LIBRARIES})
+		endif()
 		set_target_properties(${NAME}_test PROPERTIES
 		                      RUNTIME_OUTPUT_DIRECTORY         "${TESTS_OUTPUT_DIR}"
 		                      RUNTIME_OUTPUT_DIRECTORY_RELEASE "${TESTS_OUTPUT_DIR}"
 		                      RUNTIME_OUTPUT_DIRECTORY_DEBUG   "${TESTS_OUTPUT_DIR}"
 		                      INCLUDE_DIRECTORIES              "${TEST_INC}")
 		if(${DO_ADD_TEST})
-			add_test(${NAME}_test ${TESTS_OUTPUT_DIR}/${NAME}_test)
+			add_test(NAME ${NAME}_test COMMAND ${TESTS_OUTPUT_DIR}/${NAME}_test WORKING_DIRECTORY $<TARGET_FILE_DIR:blender>)
 		endif()
 	endif()
 endmacro()
--- a/build_files/cmake/buildinfo.cmake
+++ b/build_files/cmake/buildinfo.cmake
@@ -56,7 +56,7 @@ if(EXISTS ${SOURCE_DIR}/.git)
 				string(REGEX REPLACE "[\r\n]+" ";" _git_contains_branches "${_git_contains_branches}")
 				string(REGEX REPLACE ";[ \t]+" ";" _git_contains_branches "${_git_contains_branches}")
 				foreach(_branch ${_git_contains_branches})
-					if (NOT "${_branch}" MATCHES "\\(HEAD.*")
+					if(NOT "${_branch}" MATCHES "\\(HEAD.*")
 						set(MY_WC_BRANCH "${_branch}")
 						break()
 					endif()
--- a/build_files/cmake/cmake_netbeans_project.py
+++ b/build_files/cmake/cmake_netbeans_project.py
@@ -84,7 +84,7 @@ def create_nb_project_main():
        make_exe = cmake_cache_var("CMAKE_MAKE_PROGRAM")
        make_exe_basename = os.path.basename(make_exe)

-        # --------------- NB spesific
+        # --------------- NB specific
        defines = [("%s=%s" % cdef) if cdef[1] else cdef[0] for cdef in defines]
        defines += [cdef.replace("#define", "").strip() for cdef in cmake_compiler_defines()]

--- a/build_files/cmake/config/blender_full.cmake
+++ b/build_files/cmake/config/blender_full.cmake
@@ -73,6 +73,9 @@ elseif(WIN32)
 	endif()
 elseif(APPLE)
 	set(WITH_JACK                ON  CACHE BOOL "" FORCE)
-	set(WITH_CODEC_QUICKTIME     ON  CACHE BOOL "" FORCE)
+	set(WITH_CODEC_QUICKTIME     OFF  CACHE BOOL "" FORCE)
 	set(WITH_OPENSUBDIV          OFF CACHE BOOL "" FORCE)
+
+#	include("${CMAKE_CURRENT_SOURCE_DIR}/../platform/platform_apple_xcode.cmake")
+#	apple_check_quicktime()
 endif()
--- a/build_files/cmake/config/blender_release.cmake
+++ b/build_files/cmake/config/blender_release.cmake
@@ -74,6 +74,9 @@ elseif(WIN32)
 	endif()
 elseif(APPLE)
 	set(WITH_JACK                ON  CACHE BOOL "" FORCE)
-	set(WITH_CODEC_QUICKTIME     ON  CACHE BOOL "" FORCE)
+	set(WITH_CODEC_QUICKTIME     OFF  CACHE BOOL "" FORCE)
 	set(WITH_OPENSUBDIV          OFF CACHE BOOL "" FORCE)
+
+#	include("${CMAKE_CURRENT_SOURCE_DIR}/../platform/platform_apple_xcode.cmake")
+#	apple_check_quicktime()
 endif()
--- a/build_files/cmake/macros.cmake
+++ b/build_files/cmake/macros.cmake
@@ -416,14 +416,7 @@ function(setup_liblinks
 		target_link_libraries(${target} ${OPENCOLORIO_LIBRARIES})
 	endif()
 	if(WITH_OPENSUBDIV OR WITH_CYCLES_OPENSUBDIV)
-		if(WIN32 AND NOT UNIX)
-			file_list_suffix(OPENSUBDIV_LIBRARIES_DEBUG "${OPENSUBDIV_LIBRARIES}" "_d")
-			target_link_libraries_debug(${target} "${OPENSUBDIV_LIBRARIES_DEBUG}")
-			target_link_libraries_optimized(${target} "${OPENSUBDIV_LIBRARIES}")
-			unset(OPENSUBDIV_LIBRARIES_DEBUG)
-		else()
 			target_link_libraries(${target} ${OPENSUBDIV_LIBRARIES})
-		endif()
 	endif()
 	if(WITH_OPENVDB)
 		target_link_libraries(${target} ${OPENVDB_LIBRARIES} ${TBB_LIBRARIES})
@@ -498,6 +491,12 @@ function(setup_liblinks
 			target_link_libraries(${target} ${NDOF_LIBRARIES})
 		endif()
 	endif()
+	if(WITH_SYSTEM_GLOG)
+		target_link_libraries(${target} ${GLOG_LIBRARIES})
+	endif()
+	if(WITH_SYSTEM_GFLAGS)
+		target_link_libraries(${target} ${GFLAGS_LIBRARIES})
+	endif()

 	# We put CLEW and CUEW here because OPENSUBDIV_LIBRARIES dpeends on them..
 	if(WITH_CYCLES OR WITH_COMPOSITOR OR WITH_OPENSUBDIV)
@@ -602,6 +601,7 @@ function(SETUP_BLENDER_SORTED_LIBS)
 		bf_freestyle
 		bf_ikplugin
 		bf_modifiers
+		bf_alembic
 		bf_bmesh
 		bf_gpu
 		bf_blenloader
@@ -620,7 +620,6 @@ function(SETUP_BLENDER_SORTED_LIBS)
 		bf_imbuf_openimageio
 		bf_imbuf_dds
 		bf_collada
-		bf_alembic
 		bf_intern_elbeem
 		bf_intern_memutil
 		bf_intern_guardedalloc
@@ -665,13 +664,19 @@ function(SETUP_BLENDER_SORTED_LIBS)
 		extern_rangetree
 		extern_wcwidth
 		bf_intern_libmv
-		extern_glog
-		extern_gflags
 		extern_sdlew

 		bf_intern_glew_mx
 	)

+	if(NOT WITH_SYSTEM_GLOG)
+		list(APPEND BLENDER_SORTED_LIBS extern_glog)
+	endif()
+
+	if(NOT WITH_SYSTEM_GFLAGS)
+		list(APPEND BLENDER_SORTED_LIBS extern_gflags)
+	endif()
+
 	if(WITH_COMPOSITOR)
 		# added for opencl compositor
 		list_insert_before(BLENDER_SORTED_LIBS "bf_blenkernel" "bf_compositor")
@@ -1581,24 +1586,24 @@ macro(openmp_delayload
 endmacro()

 MACRO(WINDOWS_SIGN_TARGET target)
-	if (WITH_WINDOWS_CODESIGN)
-		if (!SIGNTOOL_EXE)
+	if(WITH_WINDOWS_CODESIGN)
+		if(!SIGNTOOL_EXE)
 			error("Codesigning is enabled, but signtool is not found")
 		else()
-			if (WINDOWS_CODESIGN_PFX_PASSWORD)
+			if(WINDOWS_CODESIGN_PFX_PASSWORD)
 				set(CODESIGNPASSWORD /p ${WINDOWS_CODESIGN_PFX_PASSWORD})
 			else()
-				if ($ENV{PFXPASSWORD})
+				if($ENV{PFXPASSWORD})
 					set(CODESIGNPASSWORD /p $ENV{PFXPASSWORD})
 				else()
-					message( FATAL_ERROR "WITH_WINDOWS_CODESIGN is on but WINDOWS_CODESIGN_PFX_PASSWORD not set, and environment variable PFXPASSWORD not found, unable to sign code.")
+					message(FATAL_ERROR "WITH_WINDOWS_CODESIGN is on but WINDOWS_CODESIGN_PFX_PASSWORD not set, and environment variable PFXPASSWORD not found, unable to sign code.")
 				endif()
 			endif()
 			add_custom_command(TARGET ${target}
-						POST_BUILD
-						COMMAND ${SIGNTOOL_EXE} sign /f ${WINDOWS_CODESIGN_PFX} ${CODESIGNPASSWORD} $<TARGET_FILE:${target}>
-						VERBATIM
-				)
+				POST_BUILD
+				COMMAND ${SIGNTOOL_EXE} sign /f ${WINDOWS_CODESIGN_PFX} ${CODESIGNPASSWORD} $<TARGET_FILE:${target}>
+				VERBATIM
+			)
 		endif()
 	endif()
 ENDMACRO()
--- a/build_files/cmake/packaging.cmake
+++ b/build_files/cmake/packaging.cmake
@@ -1,5 +1,7 @@
-set(PROJECT_DESCRIPTION  "Blender is a very fast and versatile 3D modeller/renderer.")
-set(PROJECT_COPYRIGHT    "Copyright (C) 2001-2012 Blender Foundation")
+string(TIMESTAMP CURRENT_YEAR "%Y")
+
+set(PROJECT_DESCRIPTION  "Blender is the free and open source 3D creation suite software.")
+set(PROJECT_COPYRIGHT    "Copyright (C) 2001-${CURRENT_YEAR} Blender Foundation")
 set(PROJECT_CONTACT      "foundation@blender.org")
 set(PROJECT_VENDOR       "Blender Foundation")

@@ -38,8 +40,8 @@ unset(MY_WC_HASH)
 # Force Package Name
 execute_process(COMMAND date "+%Y%m%d" OUTPUT_VARIABLE CPACK_DATE OUTPUT_STRIP_TRAILING_WHITESPACE)
 string(TOLOWER ${PROJECT_NAME} PROJECT_NAME_LOWER)
-if (MSVC)
-	if ("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
+if(MSVC)
+	if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
 		set(PACKAGE_ARCH windows64)
 	else()
 		set(PACKAGE_ARCH windows32)
@@ -48,7 +50,7 @@ else(MSVC)
 	set(PACKAGE_ARCH ${CMAKE_SYSTEM_PROCESSOR})
 endif()

-if (CPACK_OVERRIDE_PACKAGENAME)
+if(CPACK_OVERRIDE_PACKAGENAME)
 	set(CPACK_PACKAGE_FILE_NAME ${CPACK_OVERRIDE_PACKAGENAME}-${PACKAGE_ARCH})
 else()
 	set(CPACK_PACKAGE_FILE_NAME ${PROJECT_NAME_LOWER}-${MAJOR_VERSION}.${MINOR_VERSION}.${PATCH_VERSION}-git${CPACK_DATE}.${BUILD_REV}-${PACKAGE_ARCH})
@@ -135,4 +137,3 @@ unset(MINOR_VERSION)
 unset(PATCH_VERSION)

 unset(BUILD_REV)
-
--- a/build_files/cmake/platform/platform_apple.cmake
+++ b/build_files/cmake/platform/platform_apple.cmake
@@ -23,6 +23,10 @@

 # Libraries configuration for Apple.

+macro(find_package_wrapper)
+# do nothing, just satisfy the macro
+endmacro()
+
 if(NOT DEFINED LIBDIR)
 	if(WITH_CXX11)
 		set(LIBDIR ${CMAKE_SOURCE_DIR}/../lib/darwin)
@@ -52,6 +56,7 @@ if(WITH_ALEMBIC)
 	set(ALEMBIC_INCLUDE_DIRS ${ALEMBIC_INCLUDE_DIR})
 	set(ALEMBIC_LIBPATH ${ALEMBIC}/lib)
 	set(ALEMBIC_LIBRARIES Alembic)
+	set(ALEMBIC_FOUND ON)
 endif()

 if(WITH_OPENSUBDIV OR WITH_CYCLES_OPENSUBDIV)
--- a/build_files/cmake/platform/platform_apple_xcode.cmake
+++ b/build_files/cmake/platform/platform_apple_xcode.cmake
@@ -0,0 +1,135 @@
+# ***** BEGIN GPL LICENSE BLOCK *****
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+# The Original Code is Copyright (C) 2016, Blender Foundation
+# All rights reserved.
+#
+# Contributor(s): Jacques Beaurain.
+#
+# ***** END GPL LICENSE BLOCK *****
+
+# Xcode and system configuration for Apple.
+
+# require newer cmake on osx because of version handling,
+# older cmake cannot handle 2 digit subversion!
+cmake_minimum_required(VERSION 3.0.0)
+
+if(NOT CMAKE_OSX_ARCHITECTURES)
+	set(CMAKE_OSX_ARCHITECTURES x86_64 CACHE STRING
+		"Choose the architecture you want to build Blender for: i386, x86_64 or ppc"
+		FORCE)
+endif()
+
+if(NOT DEFINED OSX_SYSTEM)
+	execute_process(
+			COMMAND xcodebuild -version -sdk macosx SDKVersion
+			OUTPUT_VARIABLE OSX_SYSTEM
+			OUTPUT_STRIP_TRAILING_WHITESPACE)
+endif()
+
+# workaround for incorrect cmake xcode lookup for developer previews - XCODE_VERSION does not
+# take xcode-select path into account but would always look  into /Applications/Xcode.app
+# while dev versions are named Xcode<version>-DP<preview_number>
+execute_process(
+		COMMAND xcode-select --print-path
+		OUTPUT_VARIABLE XCODE_CHECK OUTPUT_STRIP_TRAILING_WHITESPACE)
+string(REPLACE "/Contents/Developer" "" XCODE_BUNDLE ${XCODE_CHECK}) # truncate to bundlepath in any case
+
+if(${CMAKE_GENERATOR} MATCHES "Xcode")
+
+	# earlier xcode has no bundled developer dir, no sense in getting xcode path from
+	if(${XCODE_VERSION} VERSION_GREATER 4.2) 
+		# reduce to XCode name without dp extension
+		string(SUBSTRING "${XCODE_CHECK}" 14 6 DP_NAME) 
+		if(${DP_NAME} MATCHES Xcode5)
+			set(XCODE_VERSION 5)
+		endif()
+	endif()
+
+	##### cmake incompatibility with xcode  4.3 and higher #####
+	if(${XCODE_VERSION} MATCHES '') # cmake fails due looking for xcode in the wrong path, thus will be empty var
+		message(FATAL_ERROR "Xcode 4.3 and higher must be used with cmake 2.8-8 or higher")
+	endif()
+	### end cmake incompatibility with xcode 4.3 and higher ###
+	
+	if(${XCODE_VERSION} VERSION_EQUAL 4 OR ${XCODE_VERSION} VERSION_GREATER 4 AND ${XCODE_VERSION} VERSION_LESS 4.3)
+		# Xcode 4 defaults to the Apple LLVM Compiler.
+		# Override the default compiler selection because Blender only compiles with gcc up to xcode 4.2
+		set(CMAKE_XCODE_ATTRIBUTE_GCC_VERSION "com.apple.compilers.llvmgcc42")
+		message(STATUS "Setting compiler to: " ${CMAKE_XCODE_ATTRIBUTE_GCC_VERSION})
+	endif()
+else() # unix makefile generator does not fill XCODE_VERSION var, so we get it with a command
+	execute_process(COMMAND xcodebuild -version OUTPUT_VARIABLE XCODE_VERS_BUILD_NR)
+	string(SUBSTRING "${XCODE_VERS_BUILD_NR}" 6 3 XCODE_VERSION) # truncate away build-nr
+	unset(XCODE_VERS_BUILD_NR)
+endif()
+
+message(STATUS "Detected OS X ${OSX_SYSTEM} and Xcode ${XCODE_VERSION} at ${XCODE_BUNDLE}")
+
+if(${XCODE_VERSION} VERSION_LESS 4.3)
+	# use guaranteed existing sdk
+	set(CMAKE_OSX_SYSROOT /Developer/SDKs/MacOSX${OSX_SYSTEM}.sdk CACHE PATH "" FORCE)
+else()
+	# note: xcode-select path could be ambigous,
+	# cause /Applications/Xcode.app/Contents/Developer or /Applications/Xcode.app would be allowed
+	# so i use a selfcomposed bundlepath here  
+	set(OSX_SYSROOT_PREFIX ${XCODE_BUNDLE}/Contents/Developer/Platforms/MacOSX.platform)
+	message(STATUS "OSX_SYSROOT_PREFIX: " ${OSX_SYSROOT_PREFIX})
+	set(OSX_DEVELOPER_PREFIX /Developer/SDKs/MacOSX${OSX_SYSTEM}.sdk) # use guaranteed existing sdk
+	set(CMAKE_OSX_SYSROOT ${OSX_SYSROOT_PREFIX}/${OSX_DEVELOPER_PREFIX} CACHE PATH "" FORCE)
+	if(${CMAKE_GENERATOR} MATCHES "Xcode")
+		# to silence sdk not found warning, just overrides CMAKE_OSX_SYSROOT
+		set(CMAKE_XCODE_ATTRIBUTE_SDKROOT macosx${OSX_SYSTEM})
+	endif()
+endif()
+
+if(OSX_SYSTEM MATCHES 10.9)
+	# make sure syslibs and headers are looked up in sdk ( expecially for 10.9 openGL atm. )
+	set(CMAKE_FIND_ROOT_PATH ${CMAKE_OSX_SYSROOT})
+endif()
+
+if(WITH_CXX11)
+	# 10.9 is our min. target, if you use higher sdk, weak linking happens
+	if(CMAKE_OSX_DEPLOYMENT_TARGET)
+		if(${CMAKE_OSX_DEPLOYMENT_TARGET} VERSION_LESS 10.9)
+			message(STATUS "Setting deployment target to 10.9, lower versions are incompatible with WITH_CXX11")
+			set(CMAKE_OSX_DEPLOYMENT_TARGET "10.9" CACHE STRING "" FORCE)
+		endif()
+	else()
+		set(CMAKE_OSX_DEPLOYMENT_TARGET "10.9" CACHE STRING "" FORCE)
+	endif()
+else()
+	if(NOT CMAKE_OSX_DEPLOYMENT_TARGET)
+		# 10.6 is our min. target, if you use higher sdk, weak linking happens
+		set(CMAKE_OSX_DEPLOYMENT_TARGET "10.6" CACHE STRING "" FORCE)
+	endif()
+endif()
+
+if(NOT ${CMAKE_GENERATOR} MATCHES "Xcode")
+	# force CMAKE_OSX_DEPLOYMENT_TARGET for makefiles, will not work else ( cmake bug ? )
+	set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mmacosx-version-min=${CMAKE_OSX_DEPLOYMENT_TARGET}")
+	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mmacosx-version-min=${CMAKE_OSX_DEPLOYMENT_TARGET}")
+	add_definitions("-DMACOSX_DEPLOYMENT_TARGET=${CMAKE_OSX_DEPLOYMENT_TARGET}")
+endif()
+
+macro(apple_check_quicktime)
+	# QuickTime framework is no longer available in SDK 10.12+
+	if(WITH_CODEC_QUICKTIME AND ${OSX_SYSTEM} VERSION_GREATER 10.11)
+		set(WITH_CODEC_QUICKTIME OFF CACHE BOOL "" FORCE)
+		message(STATUS "QuickTime not supported by SDK ${OSX_SYSTEM}, disabling WITH_CODEC_QUICKTIME")
+	endif()
+endmacro()
+
--- a/build_files/cmake/platform/platform_win32_msvc.cmake
+++ b/build_files/cmake/platform/platform_win32_msvc.cmake
@@ -33,10 +33,16 @@ endmacro()
 macro(windows_find_package package_name
 	)
 	if(WITH_WINDOWS_FIND_MODULES)
-		find_package( ${package_name})
+		find_package(${package_name})
 	endif(WITH_WINDOWS_FIND_MODULES)
 endmacro()

+macro(find_package_wrapper)
+	if(WITH_WINDOWS_FIND_MODULES)
+		find_package(${ARGV})
+	endif()
+endmacro()
+
 add_definitions(-DWIN32)
 # Minimum MSVC Version
 if(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
@@ -432,6 +438,7 @@ if(WITH_ALEMBIC)
 	set(ALEMBIC_INCLUDE_DIRS ${ALEMBIC_INCLUDE_DIR})
 	set(ALEMBIC_LIBPATH ${ALEMBIC}/lib)
 	set(ALEMBIC_LIBRARIES optimized alembic debug alembic_d)
+	set(ALEMBIC_FOUND 1)
 endif()

 if(WITH_MOD_CLOTH_ELTOPO)
@@ -446,10 +453,20 @@ if(WITH_MOD_CLOTH_ELTOPO)
 endif()

 if(WITH_OPENSUBDIV OR WITH_CYCLES_OPENSUBDIV)
-	set(OPENSUBDIV_INCLUDE_DIR ${LIBDIR}/opensubdiv/include)
-	set(OPENSUBDIV_LIBPATH ${LIBDIR}/opensubdiv/lib)
-	set(OPENSUBDIV_LIBRARIES ${OPENSUBDIV_LIBPATH}/osdCPU.lib ${OPENSUBDIV_LIBPATH}/osdGPU.lib)
-	find_package(OpenSubdiv)
+    set(OPENSUBDIV_INCLUDE_DIR ${LIBDIR}/opensubdiv/include)
+    set(OPENSUBDIV_LIBPATH ${LIBDIR}/opensubdiv/lib)
+    set(OPENSUBDIV_LIBRARIES    optimized ${OPENSUBDIV_LIBPATH}/osdCPU.lib 
+                                optimized ${OPENSUBDIV_LIBPATH}/osdGPU.lib
+                                debug ${OPENSUBDIV_LIBPATH}/osdCPU_d.lib 
+                                debug ${OPENSUBDIV_LIBPATH}/osdGPU_d.lib
+                                )
+    set(OPENSUBDIV_HAS_OPENMP TRUE)
+	set(OPENSUBDIV_HAS_TBB FALSE)
+	set(OPENSUBDIV_HAS_OPENCL TRUE)
+	set(OPENSUBDIV_HAS_CUDA FALSE)
+	set(OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK TRUE)
+	set(OPENSUBDIV_HAS_GLSL_COMPUTE TRUE)
+    windows_find_package(OpenSubdiv)
 endif()

 if(WITH_SDL)
--- a/doc/python_api/examples/bpy.types.Menu.4.py
+++ b/doc/python_api/examples/bpy.types.Menu.4.py
@@ -0,0 +1,81 @@
+"""
+Extending the Button Context Menu
+++++++++++++++++++++++++++++++++
+
+This example enables you to insert your own menu entry into the common
+right click menu that you get while hovering over a value field,
+color, string, etc.
+
+To make the example work, you have to first select an object
+then right click on an user interface element (maybe a color in the
+material properties) and choose *Execute Custom Action*.
+
+Executing the operator will then print all values.
+"""
+
+import bpy
+from bpy.types import Menu
+
+
+def dump(obj, text):
+    for attr in dir(obj):
+        print("%r.%s = %s" % (obj, attr, getattr(obj, attr)))
+
+
+class WM_OT_button_context_test(bpy.types.Operator):
+    """Right click entry test"""
+    bl_idname = "wm.button_context_test"
+    bl_label = "Run Context Test"
+
+    @classmethod
+    def poll(cls, context):
+        return context.active_object is not None
+
+    def execute(self, context):
+        value = getattr(context, "button_pointer", None)
+        if value is not None:
+            dump(value, "button_pointer")
+
+        value = getattr(context, "button_prop", None)
+        if value is not None:
+            dump(value, "button_prop")
+
+        value = getattr(context, "button_operator", None)
+        if value is not None:
+            dump(value, "button_operator")
+
+        return {'FINISHED'}
+
+
+# This class has to be exactly named like that to insert an entry in the right click menu
+class WM_MT_button_context(Menu):
+    bl_label = "Unused"
+
+    def draw(self, context):
+        pass
+
+
+def menu_func(self, context):
+    layout = self.layout
+    layout.separator()
+    layout.operator(WM_OT_button_context_test.bl_idname)
+
+classes = (
+    WM_OT_button_context_test,
+    WM_MT_button_context,
+)
+
+
+def register():
+    for cls in classes:
+        bpy.utils.register_class(cls)
+    bpy.types.WM_MT_button_context.append(menu_func)
+
+
+def unregister():
+    for cls in classes:
+        bpy.utils.unregister_class(cls)
+    bpy.types.WM_MT_button_context.remove(menu_func)
+
+if __name__ == "__main__":
+    register()
--- a/doc/python_api/rst/bge.texture.rst
+++ b/doc/python_api/rst/bge.texture.rst
@@ -681,7 +681,7 @@ Image classes

   .. attribute:: zbuff

-      Use depth component of render as grey scale color -  suitable for texture source.
+      Use depth component of render as grayscale color - suitable for texture source.

      :type: bool

@@ -817,7 +817,7 @@ Image classes

   .. attribute:: zbuff

-      Use depth component of viewport as grey scale color - suitable for texture source.
+      Use depth component of viewport as grayscale color - suitable for texture source.

      :type: bool

@@ -1260,8 +1260,8 @@ Filter classes

 .. class:: FilterGray

-   Filter for gray scale effect.
-   Proportions of R, G and B contributions in the output gray scale are 28:151:77.
+   Filter for grayscale effect.
+   Proportions of R, G and B contributions in the output grayscale are 28:151:77.

   .. attribute:: previous

--- a/doc/python_api/sphinx_doc_gen.py
+++ b/doc/python_api/sphinx_doc_gen.py
@@ -427,9 +427,9 @@ if BLENDER_REVISION != "Unknown":
    BLENDER_VERSION_DOTS += " " + BLENDER_REVISION          # '2.62.1 SHA1'

 BLENDER_VERSION_PATH = "_".join(blender_version_strings)    # '2_62_1'
-if bpy.app.version_cycle == "release":
-    BLENDER_VERSION_PATH = "%s%s_release" % ("_".join(blender_version_strings[:2]),
-                                             bpy.app.version_char)   # '2_62_release'
+if bpy.app.version_cycle in {"rc", "release"}:
+    # '2_62a_release'
+    BLENDER_VERSION_PATH = "%s%s_release" % ("_".join(blender_version_strings[:2]), bpy.app.version_char)

 # --------------------------DOWNLOADABLE FILES----------------------------------

--- a/doc/python_api/sphinx_doc_update.py
+++ b/doc/python_api/sphinx_doc_update.py
@@ -96,6 +96,11 @@ def main():

    rsync_base = "rsync://%s@%s:%s" % (args.user, args.rsync_server, args.rsync_root)

+    blenver = blenver_zip = ""
+    api_name = ""
+    branch = ""
+    is_release = False
+
    # I) Update local mirror using rsync.
    rsync_mirror_cmd = ("rsync", "--delete-after", "-avzz", rsync_base, args.mirror_dir)
    subprocess.run(rsync_mirror_cmd, env=dict(os.environ, RSYNC_PASSWORD=args.password))
@@ -108,19 +113,24 @@ def main():
        subprocess.run(doc_gen_cmd)

        # III) Get Blender version info.
-        blenver = blenver_zip = ""
        getver_file = os.path.join(tmp_dir, "blendver.txt")
        getver_script = (""
            "import sys, bpy\n"
            "with open(sys.argv[-1], 'w') as f:\n"
-            "    f.write('%d_%d%s_release\\n' % (bpy.app.version[0], bpy.app.version[1], bpy.app.version_char)\n"
-            "            if bpy.app.version_cycle in {'rc', 'release'} else '%d_%d_%d\\n' % bpy.app.version)\n"
-            "    f.write('%d_%d_%d' % bpy.app.version)\n")
+            "    is_release = bpy.app.version_cycle in {'rc', 'release'}\n"
+            "    branch = bpy.app.build_branch.split()[0].decode()\n"
+            "    f.write('%d\\n' % is_release)\n"
+            "    f.write('%s\\n' % branch)\n"
+            "    f.write('%d.%d%s\\n' % (bpy.app.version[0], bpy.app.version[1], bpy.app.version_char)\n"
+            "            if is_release else '%s\\n' % branch)\n"
+            "    f.write('%d_%d%s_release' % (bpy.app.version[0], bpy.app.version[1], bpy.app.version_char)\n"
+            "            if is_release else '%d_%d_%d' % bpy.app.version)\n")
        get_ver_cmd = (args.blender, "--background", "-noaudio", "--factory-startup", "--python-exit-code", "1",
                       "--python-expr", getver_script, "--", getver_file)
        subprocess.run(get_ver_cmd)
        with open(getver_file) as f:
-            blenver, blenver_zip = f.read().split("\n")
+            is_release, branch, blenver, blenver_zip = f.read().split("\n")
+            is_release = bool(int(is_release))
        os.remove(getver_file)

        # IV) Build doc.
@@ -132,7 +142,7 @@ def main():
        os.chdir(curr_dir)

        # V) Cleanup existing matching dir in server mirror (if any), and copy new doc.
-        api_name = "blender_python_api_%s" % blenver
+        api_name = blenver
        api_dir = os.path.join(args.mirror_dir, api_name)
        if os.path.exists(api_dir):
            shutil.rmtree(api_dir)
@@ -150,19 +160,15 @@ def main():
    os.rename(zip_path, os.path.join(api_dir, "%s.zip" % zip_name))

    # VII) Create symlinks and html redirects.
-    #~ os.symlink(os.path.join(DEFAULT_SYMLINK_ROOT, api_name, "contents.html"), os.path.join(api_dir, "index.html"))
    os.symlink("./contents.html", os.path.join(api_dir, "index.html"))
-    if blenver.endswith("release"):
-        symlink = os.path.join(args.mirror_dir, "blender_python_api_current")
+    if is_release:
+        symlink = os.path.join(args.mirror_dir, "current")
        os.remove(symlink)
        os.symlink("./%s" % api_name, symlink)
        with open(os.path.join(args.mirror_dir, "250PythonDoc/index.html"), 'w') as f:
            f.write("<html><head><title>Redirecting...</title><meta http-equiv=\"REFRESH\""
                    "content=\"0;url=../%s/\"></head><body>Redirecting...</body></html>" % api_name)
-    else:
-        symlink = os.path.join(args.mirror_dir, "blender_python_api_master")
-        os.remove(symlink)
-        os.symlink("./%s" % api_name, symlink)
+    elif branch == "master":
        with open(os.path.join(args.mirror_dir, "blender_python_api/index.html"), 'w') as f:
            f.write("<html><head><title>Redirecting...</title><meta http-equiv=\"REFRESH\""
                    "content=\"0;url=../%s/\"></head><body>Redirecting...</body></html>" % api_name)
--- a/extern/CMakeLists.txt
+++ b/extern/CMakeLists.txt
@@ -99,7 +99,9 @@ if(WITH_LIBMV)
 endif()

 if(WITH_LIBMV OR WITH_GTESTS OR (WITH_CYCLES AND WITH_CYCLES_LOGGING))
-	add_subdirectory(gflags)
+	if (NOT WITH_SYSTEM_GFLAGS)
+		add_subdirectory(gflags)
+	endif()
 	add_subdirectory(glog)
 endif()

--- a/extern/clew/README.blender
+++ b/extern/clew/README.blender
@@ -1,5 +1,5 @@
 Project: OpenCL Wrangler
 URL: https://github.com/OpenCLWrangler/clew
 License: Apache 2.0
-Upstream version: 309a653
+Upstream version: 27a6867
 Local modifications: None
--- a/extern/clew/include/clew.h
+++ b/extern/clew/include/clew.h
@@ -369,7 +369,7 @@ typedef unsigned int cl_GLenum;
 #endif

 /* Define basic vector types */
-/* WOrkaround for ppc64el platform: conflicts with bool from C++. */
+/* Workaround for ppc64el platform: conflicts with bool from C++. */
 #if defined( __VEC__ ) && !(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
   #include <altivec.h>   /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */
   typedef vector unsigned char     __cl_uchar16;
@@ -2765,11 +2765,40 @@ CLEW_FUN_EXPORT     PFNCLGETGLCONTEXTINFOKHR            __clewGetGLContextInfoKH
 #define CL_DEVICE_GPU_OVERLAP_NV                    0x4004
 #define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV            0x4005
 #define CL_DEVICE_INTEGRATED_MEMORY_NV              0x4006
+#define CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV   0x4007
+#define CL_DEVICE_PCI_BUS_ID_NV                     0x4008
+#define CL_DEVICE_PCI_SLOT_ID_NV                    0x4009

 /*********************************
 * cl_amd_device_attribute_query *
 *********************************/
 #define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD        0x4036
+#define CL_DEVICE_TOPOLOGY_AMD                      0x4037
+#define CL_DEVICE_BOARD_NAME_AMD                    0x4038
+#define CL_DEVICE_GLOBAL_FREE_MEMORY_AMD            0x4039
+#define CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD         0x4040
+#define CL_DEVICE_SIMD_WIDTH_AMD                    0x4041
+#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD        0x4042
+#define CL_DEVICE_WAVEFRONT_WIDTH_AMD               0x4043
+#define CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD           0x4044
+#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD      0x4045
+#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD    0x4046
+#define CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD  0x4047
+#define CL_DEVICE_LOCAL_MEM_BANKS_AMD               0x4048
+#define CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD        0x4049
+#define CL_DEVICE_GFXIP_MAJOR_AMD                   0x404A
+#define CL_DEVICE_GFXIP_MINOR_AMD                   0x404B
+#define CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD        0x404C
+
+#ifndef CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD
+#define CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD            1
+
+typedef union
+{
+    struct { cl_uint type; cl_uint data[5]; } raw;
+    struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie;
+} cl_device_topology_amd;
+#endif

 /*********************************
 * cl_arm_printf extension
--- a/extern/clew/src/clew.c
+++ b/extern/clew/src/clew.c
@@ -15,7 +15,7 @@

    typedef HMODULE             CLEW_DYNLIB_HANDLE;

-    #define CLEW_DYNLIB_OPEN    LoadLibrary
+    #define CLEW_DYNLIB_OPEN    LoadLibraryA
    #define CLEW_DYNLIB_CLOSE   FreeLibrary
    #define CLEW_DYNLIB_IMPORT  GetProcAddress
 #else
@@ -223,7 +223,7 @@ int clewInit()
    __clewSetCommandQueueProperty       = (PFNCLSETCOMMANDQUEUEPROPERTY     )CLEW_DYNLIB_IMPORT(module, "clSetCommandQueueProperty");
 #endif
    __clewCreateBuffer                  = (PFNCLCREATEBUFFER                )CLEW_DYNLIB_IMPORT(module, "clCreateBuffer");
-    __clewCreateSubBuffer               = (PFNCLCREATESUBBUFFER             )CLEW_DYNLIB_IMPORT(module, "clCreateBuffer");
+    __clewCreateSubBuffer               = (PFNCLCREATESUBBUFFER             )CLEW_DYNLIB_IMPORT(module, "clCreateSubBuffer");
    __clewCreateImage                   = (PFNCLCREATEIMAGE                 )CLEW_DYNLIB_IMPORT(module, "clCreateImage");
    __clewRetainMemObject               = (PFNCLRETAINMEMOBJECT             )CLEW_DYNLIB_IMPORT(module, "clRetainMemObject");
    __clewReleaseMemObject              = (PFNCLRELEASEMEMOBJECT            )CLEW_DYNLIB_IMPORT(module, "clReleaseMemObject");
--- a/extern/cuew/include/cuew.h
+++ b/extern/cuew/include/cuew.h
@@ -114,7 +114,7 @@ extern "C" {
 #define cuGLGetDevices cuGLGetDevices_v2

 /* Types. */
-#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
+#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) || defined (__aarch64__)
 typedef unsigned long long CUdeviceptr;
 #else
 typedef unsigned int CUdeviceptr;
--- a/extern/curve_fit_nd/curve_fit_nd.h
+++ b/extern/curve_fit_nd/curve_fit_nd.h
@@ -36,7 +36,7 @@
 /* curve_fit_cubic.c */

 /**
- * Takes a flat array of points and evalues that to calculate a bezier spline.
+ * Takes a flat array of points and evaluates that to calculate a bezier spline.
 *
 * \param points, points_len: The array of points to calculate a cubics from.
 * \param dims: The number of dimensions for for each element in \a points.
@@ -82,7 +82,7 @@ int curve_fit_cubic_to_points_fl(
        unsigned int **r_corners_index_array, unsigned int *r_corners_index_len);

 /**
- * Takes a flat array of points and evalues that to calculate handle lengths.
+ * Takes a flat array of points and evaluates that to calculate handle lengths.
 *
 * \param points, points_len: The array of points to calculate a cubics from.
 * \param dims: The number of dimensions for for each element in \a points.
@@ -107,7 +107,8 @@ int curve_fit_cubic_to_points_single_db(

        double  r_handle_l[],
        double  r_handle_r[],
-        double *r_error_sq);
+        double *r_error_sq,
+        unsigned int *r_error_index);

 int curve_fit_cubic_to_points_single_fl(
        const float       *points,
@@ -120,7 +121,8 @@ int curve_fit_cubic_to_points_single_fl(

        float   r_handle_l[],
        float   r_handle_r[],
-        float  *r_error_sq);
+        float  *r_error_sq,
+        unsigned int *r_error_index);

 enum {
 	CURVE_FIT_CALC_HIGH_QUALIY          = (1 << 0),
--- a/extern/curve_fit_nd/intern/curve_fit_cubic.c
+++ b/extern/curve_fit_nd/intern/curve_fit_cubic.c
@@ -554,8 +554,8 @@ static void cubic_from_points_fallback(
 	r_cubic->orig_span = (points_offset_len - 1);
 #endif

-	/* p1 = p0 - (tan_l * alpha_l);
-	 * p2 = p3 + (tan_r * alpha_r);
+	/* p1 = p0 - (tan_l * alpha);
+	 * p2 = p3 + (tan_r * alpha);
 	 */
 	msub_vn_vnvn_fl(p1, p0, tan_l, alpha, dims);
 	madd_vn_vnvn_fl(p2, p3, tan_r, alpha, dims);
@@ -1436,12 +1436,11 @@ int curve_fit_cubic_to_points_single_db(

        double  r_handle_l[],
        double  r_handle_r[],
-        double  *r_error_max_sq)
+        double *r_error_max_sq,
+        uint   *r_error_index)
 {
 	Cubic *cubic = alloca(cubic_alloc_size(dims));

-	uint split_index;
-
 	/* in this instance theres no advantage in using length cache,
 	 * since we're not recursively calculating values. */
 #ifdef USE_LENGTH_CACHE
@@ -1462,7 +1461,7 @@ int curve_fit_cubic_to_points_single_db(
 #endif
 	        tan_l, tan_r, error_threshold, dims,

-	        cubic, r_error_max_sq, &split_index);
+	        cubic, r_error_max_sq, r_error_index);

 #ifdef USE_LENGTH_CACHE
 	if (points_length_cache_alloc) {
@@ -1487,7 +1486,8 @@ int curve_fit_cubic_to_points_single_fl(

        float   r_handle_l[],
        float   r_handle_r[],
-        float  *r_error_sq)
+        float  *r_error_sq,
+        uint   *r_error_index)
 {
 	const uint points_flat_len = points_len * dims;
 	double *points_db = malloc(sizeof(double) * points_flat_len);
@@ -1521,7 +1521,8 @@ int curve_fit_cubic_to_points_single_fl(
 	        (double)error_threshold,
 	        tan_l_db, tan_r_db,
 	        r_handle_l_db, r_handle_r_db,
-	        &r_error_sq_db);
+	        &r_error_sq_db,
+	        r_error_index);

 	free(points_db);

--- a/extern/curve_fit_nd/intern/curve_fit_cubic_refit.c
+++ b/extern/curve_fit_nd/intern/curve_fit_cubic_refit.c
@@ -207,7 +207,7 @@ struct KnotCornerState {

 /* Utility functions */

-#ifdef USE_KNOT_REFIT
+#if defined(USE_KNOT_REFIT) && !defined(USE_KNOT_REFIT_REMOVE)
 /**
 * Find the most distant point between the 2 knots.
 */
@@ -269,7 +269,7 @@ static uint knot_find_split_point(

 	return split_point;
 }
-#endif  /* USE_KNOT_REFIT */
+#endif  /* USE_KNOT_REFIT && !USE_KNOT_REFIT_REMOVE */


 #ifdef USE_CORNER_DETECT
@@ -322,7 +322,7 @@ static double knot_remove_error_value(
        const double *points_offset_length_cache,
        const uint dims,
        /* Avoid having to re-calculate again */
-        double r_handle_factors[2])
+        double r_handle_factors[2], uint *r_error_index)
 {
 	double error_sq = FLT_MAX;

@@ -338,7 +338,7 @@ static double knot_remove_error_value(
 	        points_offset, points_offset_len, points_offset_length_cache, dims, 0.0,
 	        tan_l, tan_r,
 	        handle_factor_l, handle_factor_r,
-	        &error_sq);
+	        &error_sq, r_error_index);

 	assert(error_sq != FLT_MAX);

@@ -363,6 +363,7 @@ static double knot_calc_curve_error_value(
 	        ((knot_r->index + pd->points_len) - knot_l->index)) + 1;

 	if (points_offset_len != 2) {
+		uint error_index_dummy;
 		return knot_remove_error_value(
 		        tan_l, tan_r,
 		        &pd->points[knot_l->index * dims], points_offset_len,
@@ -372,7 +373,7 @@ static double knot_calc_curve_error_value(
 		        NULL,
 #endif
 		        dims,
-		        r_handle_factors);
+		        r_handle_factors, &error_index_dummy);
 	}
 	else {
 		/* No points between, use 1/3 handle length with no error as a fallback. */
@@ -388,6 +389,56 @@ static double knot_calc_curve_error_value(
 	}
 }

+#ifdef USE_KNOT_REFIT_REMOVE
+
+static double knot_calc_curve_error_value_and_index(
+        const struct PointData *pd,
+        const struct Knot *knot_l, const struct Knot *knot_r,
+        const double *tan_l, const double *tan_r,
+        const uint dims,
+        double r_handle_factors[2],
+        uint *r_error_index)
+{
+	const uint points_offset_len = ((knot_l->index < knot_r->index) ?
+	        (knot_r->index - knot_l->index) :
+	        ((knot_r->index + pd->points_len) - knot_l->index)) + 1;
+
+	if (points_offset_len != 2) {
+		const double error_sq = knot_remove_error_value(
+		        tan_l, tan_r,
+		        &pd->points[knot_l->index * dims], points_offset_len,
+#ifdef USE_LENGTH_CACHE
+		        &pd->points_length_cache[knot_l->index],
+#else
+		        NULL,
+#endif
+		        dims,
+		        r_handle_factors, r_error_index);
+
+		/* Adjust the offset index to the global index & wrap if needed. */
+		*r_error_index += knot_l->index;
+		if (*r_error_index >= pd->points_len) {
+			*r_error_index -= pd->points_len;
+		}
+
+		return error_sq;
+	}
+	else {
+		/* No points between, use 1/3 handle length with no error as a fallback. */
+		assert(points_offset_len == 2);
+#ifdef USE_LENGTH_CACHE
+		r_handle_factors[0] = r_handle_factors[1] = pd->points_length_cache[knot_l->index] / 3.0;
+#else
+		r_handle_factors[0] = r_handle_factors[1] = len_vnvn(
+		        &pd->points[(knot_l->index + 0) * dims],
+		        &pd->points[(knot_l->index + 1) * dims], dims) / 3.0;
+#endif
+		*r_error_index = 0;
+		return 0.0;
+	}
+}
+#endif  /* USE_KNOT_REFIT_REMOVE */
+
 struct KnotRemove_Params {
 	Heap *heap;
 	const struct PointData *pd;
@@ -556,15 +607,18 @@ static void knot_refit_error_recalculate(
 	assert(k->can_remove);

 #ifdef USE_KNOT_REFIT_REMOVE
+	(void)knots_len;
+
+	uint refit_index = SPLIT_POINT_INVALID;
 	{
 		double handles[2];

 		/* First check if we can remove, this allows to refit and remove as we go. */
-		const double cost_sq = knot_calc_curve_error_value(
+		const double cost_sq = knot_calc_curve_error_value_and_index(
 		        p->pd, k->prev, k->next,
 		        k->prev->tan[1], k->next->tan[0],
 		        dims,
-		        handles);
+		        handles, &refit_index);

 		if (cost_sq < error_sq_max) {
 			struct KnotRefitState *r;
@@ -598,13 +652,14 @@ static void knot_refit_error_recalculate(
 	}
 #else
 	(void)error_sq_max;
-#endif  /* USE_KNOT_REFIT_REMOVE */

 	const uint refit_index = knot_find_split_point(
 	         p->pd, k->prev, k->next,
 	         knots_len,
 	         dims);

+#endif  /* USE_KNOT_REFIT_REMOVE */
+
 	if ((refit_index == SPLIT_POINT_INVALID) ||
 	    (refit_index == k->index))
 	{
--- a/intern/CMakeLists.txt
+++ b/intern/CMakeLists.txt
@@ -34,7 +34,7 @@ add_subdirectory(mikktspace)
 add_subdirectory(glew-mx)
 add_subdirectory(eigen)

-if (WITH_GAMEENGINE_DECKLINK)
+if(WITH_GAMEENGINE_DECKLINK)
 	add_subdirectory(decklink)
 endif()

@@ -62,7 +62,7 @@ if(WITH_IK_ITASC)
 	add_subdirectory(itasc)
 endif()

-if(WITH_IK_SOLVER OR WITH_GAMEENGINE OR WITH_MOD_BOOLEAN)
+if(WITH_GAMEENGINE)
 	add_subdirectory(moto)
 endif()

--- a/intern/atomic/atomic_ops.h
+++ b/intern/atomic/atomic_ops.h
@@ -101,11 +101,11 @@ ATOMIC_INLINE size_t atomic_fetch_and_add_z(size_t *p, size_t x);
 ATOMIC_INLINE size_t atomic_fetch_and_sub_z(size_t *p, size_t x);
 ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new);

-ATOMIC_INLINE unsigned atomic_add_and_fetch_u(unsigned *p, unsigned x);
-ATOMIC_INLINE unsigned atomic_sub_and_fetch_u(unsigned *p, unsigned x);
-ATOMIC_INLINE unsigned atomic_fetch_and_add_u(unsigned *p, unsigned x);
-ATOMIC_INLINE unsigned atomic_fetch_and_sub_u(unsigned *p, unsigned x);
-ATOMIC_INLINE unsigned atomic_cas_u(unsigned *v, unsigned old, unsigned _new);
+ATOMIC_INLINE unsigned int atomic_add_and_fetch_u(unsigned int *p, unsigned int x);
+ATOMIC_INLINE unsigned int atomic_sub_and_fetch_u(unsigned int *p, unsigned int x);
+ATOMIC_INLINE unsigned int atomic_fetch_and_add_u(unsigned int *p, unsigned int x);
+ATOMIC_INLINE unsigned int atomic_fetch_and_sub_u(unsigned int *p, unsigned int x);
+ATOMIC_INLINE unsigned int atomic_cas_u(unsigned int *v, unsigned int old, unsigned int _new);

 /* WARNING! Float 'atomics' are really faked ones, those are actually closer to some kind of spinlock-sync'ed operation,
 *          which means they are only efficient if collisions are highly unlikely (i.e. if probability of two threads
--- a/intern/atomic/intern/atomic_ops_ext.h
+++ b/intern/atomic/intern/atomic_ops_ext.h
@@ -113,58 +113,58 @@ ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new)

 /******************************************************************************/
 /* unsigned operations. */
-ATOMIC_INLINE unsigned atomic_add_and_fetch_u(unsigned *p, unsigned x)
+ATOMIC_INLINE unsigned int atomic_add_and_fetch_u(unsigned int *p, unsigned int x)
 {
-	assert(sizeof(unsigned) == LG_SIZEOF_INT);
+	assert(sizeof(unsigned int) == LG_SIZEOF_INT);

 #if (LG_SIZEOF_INT == 8)
-	return (unsigned)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)x);
+	return (unsigned int)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)x);
 #elif (LG_SIZEOF_INT == 4)
-	return (unsigned)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)x);
+	return (unsigned int)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)x);
 #endif
 }

-ATOMIC_INLINE unsigned atomic_sub_and_fetch_u(unsigned *p, unsigned x)
+ATOMIC_INLINE unsigned int atomic_sub_and_fetch_u(unsigned int *p, unsigned int x)
 {
-	assert(sizeof(unsigned) == LG_SIZEOF_INT);
+	assert(sizeof(unsigned int) == LG_SIZEOF_INT);

 #if (LG_SIZEOF_INT == 8)
-	return (unsigned)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
+	return (unsigned int)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
 #elif (LG_SIZEOF_INT == 4)
-	return (unsigned)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
+	return (unsigned int)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
 #endif
 }

-ATOMIC_INLINE unsigned atomic_fetch_and_add_u(unsigned *p, unsigned x)
+ATOMIC_INLINE unsigned int atomic_fetch_and_add_u(unsigned int *p, unsigned int x)
 {
-	assert(sizeof(unsigned) == LG_SIZEOF_INT);
+	assert(sizeof(unsigned int) == LG_SIZEOF_INT);

 #if (LG_SIZEOF_INT == 8)
-	return (unsigned)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)x);
+	return (unsigned int)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)x);
 #elif (LG_SIZEOF_INT == 4)
-	return (unsigned)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)x);
+	return (unsigned int)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)x);
 #endif
 }

-ATOMIC_INLINE unsigned atomic_fetch_and_sub_u(unsigned *p, unsigned x)
+ATOMIC_INLINE unsigned int atomic_fetch_and_sub_u(unsigned int *p, unsigned int x)
 {
-	assert(sizeof(unsigned) == LG_SIZEOF_INT);
+	assert(sizeof(unsigned int) == LG_SIZEOF_INT);

 #if (LG_SIZEOF_INT == 8)
-	return (unsigned)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
+	return (unsigned int)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
 #elif (LG_SIZEOF_INT == 4)
-	return (unsigned)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
+	return (unsigned int)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
 #endif
 }

-ATOMIC_INLINE unsigned atomic_cas_u(unsigned *v, unsigned old, unsigned _new)
+ATOMIC_INLINE unsigned int atomic_cas_u(unsigned int *v, unsigned int old, unsigned int _new)
 {
-	assert(sizeof(unsigned) == LG_SIZEOF_INT);
+	assert(sizeof(unsigned int) == LG_SIZEOF_INT);

 #if (LG_SIZEOF_INT == 8)
-	return (unsigned)atomic_cas_uint64((uint64_t *)v, (uint64_t)old, (uint64_t)_new);
+	return (unsigned int)atomic_cas_uint64((uint64_t *)v, (uint64_t)old, (uint64_t)_new);
 #elif (LG_SIZEOF_INT == 4)
-	return (unsigned)atomic_cas_uint32((uint32_t *)v, (uint32_t)old, (uint32_t)_new);
+	return (unsigned int)atomic_cas_uint32((uint32_t *)v, (uint32_t)old, (uint32_t)_new);
 #endif
 }

--- a/intern/audaspace/intern/AUD_SoftwareDevice.cpp
+++ b/intern/audaspace/intern/AUD_SoftwareDevice.cpp
@@ -365,6 +365,7 @@ bool AUD_SoftwareDevice::AUD_SoftwareHandle::seek(float position)
 	if(!m_status)
 		return false;

+	m_pitch->setPitch(m_user_pitch);
 	m_reader->seek((int)(position * m_reader->getSpecs().rate));

 	if(m_status == AUD_STATUS_STOPPED)
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -22,6 +22,7 @@ if(WITH_CYCLES_NATIVE_ONLY)
 		-DWITH_KERNEL_NATIVE
 	)
 	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
+	set(CYCLES_KERNEL_FLAGS "-march=native")
 elseif(NOT WITH_CPU_SSE)
 	set(CXX_HAS_SSE FALSE)
 	set(CXX_HAS_AVX FALSE)
@@ -59,10 +60,13 @@ elseif(WIN32 AND MSVC)
 	set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Ox")
 	set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Ox")
 	set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} /Ox")
+
+	set(CYCLES_KERNEL_FLAGS "/fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-")
 elseif(CMAKE_COMPILER_IS_GNUCC)
 	check_cxx_compiler_flag(-msse CXX_HAS_SSE)
 	check_cxx_compiler_flag(-mavx CXX_HAS_AVX)
 	check_cxx_compiler_flag(-mavx2 CXX_HAS_AVX2)
+	set(CYCLES_KERNEL_FLAGS "-ffast-math")
 	if(CXX_HAS_SSE)
 		set(CYCLES_SSE2_KERNEL_FLAGS "-ffast-math -msse -msse2 -mfpmath=sse")
 		set(CYCLES_SSE3_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -mfpmath=sse")
@@ -74,10 +78,12 @@ elseif(CMAKE_COMPILER_IS_GNUCC)
 	if(CXX_HAS_AVX2)
 		set(CYCLES_AVX2_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mfma -mlzcnt -mbmi -mbmi2 -mf16c -mfpmath=sse")
 	endif()
+	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math -fno-finite-math-only")
 elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
 	check_cxx_compiler_flag(-msse CXX_HAS_SSE)
 	check_cxx_compiler_flag(-mavx CXX_HAS_AVX)
 	check_cxx_compiler_flag(-mavx2 CXX_HAS_AVX2)
+	set(CYCLES_KERNEL_FLAGS "-ffast-math")
 	if(CXX_HAS_SSE)
 		set(CYCLES_SSE2_KERNEL_FLAGS "-ffast-math -msse -msse2")
 		set(CYCLES_SSE3_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3")
@@ -89,6 +95,7 @@ elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
 	if(CXX_HAS_AVX2)
 		set(CYCLES_AVX2_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mfma -mlzcnt -mbmi -mbmi2 -mf16c")
 	endif()
+	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math -fno-finite-math-only")
 endif()

 if(CXX_HAS_SSE)
@@ -184,7 +191,7 @@ endif()
 # Logging capabilities using GLog library.
 if(WITH_CYCLES_LOGGING)
 	add_definitions(-DWITH_CYCLES_LOGGING)
-	add_definitions(-DGOOGLE_GLOG_DLL_DECL=)
+	add_definitions(${GLOG_DEFINES})
 	add_definitions(-DCYCLES_GFLAGS_NAMESPACE=${GFLAGS_NAMESPACE})
 	include_directories(
 		SYSTEM
--- a/intern/cycles/app/CMakeLists.txt
+++ b/intern/cycles/app/CMakeLists.txt
@@ -1,14 +1,6 @@

 set(INC
-	.
-	../bvh
-	../device
-	../graph
-	../kernel
-	../kernel/svm
-	../render
-	../subd
-	../util
+	..
 )
 set(INC_SYS
 )
@@ -43,18 +35,15 @@ if(WITH_CYCLES_OSL)
 	list(APPEND LIBRARIES cycles_kernel_osl)
 endif()

-if(CYCLES_STANDALONE_REPOSITORY)
-	if(WITH_CYCLES_LOGGING)
-		list(APPEND LIBRARIES
-			${GLOG_LIBRARIES}
-			${GFLAGS_LIBRARIES}
-		)
-	endif()
-else()
+if(NOT CYCLES_STANDALONE_REPOSITORY)
 	list(APPEND LIBRARIES bf_intern_glew_mx bf_intern_guardedalloc)
-	if(WITH_CYCLES_LOGGING)
-		list(APPEND LIBRARIES extern_glog extern_gflags)
-	endif()
+endif()
+
+if(WITH_CYCLES_LOGGING)
+	list(APPEND LIBRARIES
+		${GLOG_LIBRARIES}
+		${GFLAGS_LIBRARIES}
+	)
 endif()

 if(WITH_CYCLES_STANDALONE AND WITH_CYCLES_STANDALONE_GUI)
--- a/intern/cycles/app/cycles_server.cpp
+++ b/intern/cycles/app/cycles_server.cpp
@@ -16,15 +16,15 @@

 #include <stdio.h>

-#include "device.h"
+#include "device/device.h"

-#include "util_args.h"
-#include "util_foreach.h"
-#include "util_path.h"
-#include "util_stats.h"
-#include "util_string.h"
-#include "util_task.h"
-#include "util_logging.h"
+#include "util/util_args.h"
+#include "util/util_foreach.h"
+#include "util/util_path.h"
+#include "util/util_stats.h"
+#include "util/util_string.h"
+#include "util/util_task.h"
+#include "util/util_logging.h"

 using namespace ccl;

--- a/intern/cycles/app/cycles_standalone.cpp
+++ b/intern/cycles/app/cycles_standalone.cpp
@@ -16,29 +16,29 @@

 #include <stdio.h>

-#include "buffers.h"
-#include "camera.h"
-#include "device.h"
-#include "scene.h"
-#include "session.h"
-#include "integrator.h"
+#include "render/buffers.h"
+#include "render/camera.h"
+#include "device/device.h"
+#include "render/scene.h"
+#include "render/session.h"
+#include "render/integrator.h"

-#include "util_args.h"
-#include "util_foreach.h"
-#include "util_function.h"
-#include "util_logging.h"
-#include "util_path.h"
-#include "util_progress.h"
-#include "util_string.h"
-#include "util_time.h"
-#include "util_transform.h"
-#include "util_version.h"
+#include "util/util_args.h"
+#include "util/util_foreach.h"
+#include "util/util_function.h"
+#include "util/util_logging.h"
+#include "util/util_path.h"
+#include "util/util_progress.h"
+#include "util/util_string.h"
+#include "util/util_time.h"
+#include "util/util_transform.h"
+#include "util/util_version.h"

 #ifdef WITH_CYCLES_STANDALONE_GUI
-#include "util_view.h"
+#include "util/util_view.h"
 #endif

-#include "cycles_xml.h"
+#include "app/cycles_xml.h"

 CCL_NAMESPACE_BEGIN

--- a/intern/cycles/app/cycles_xml.cpp
+++ b/intern/cycles/app/cycles_xml.cpp
@@ -20,31 +20,31 @@
 #include <algorithm>
 #include <iterator>

-#include "node_xml.h"
+#include "graph/node_xml.h"

-#include "background.h"
-#include "camera.h"
-#include "film.h"
-#include "graph.h"
-#include "integrator.h"
-#include "light.h"
-#include "mesh.h"
-#include "nodes.h"
-#include "object.h"
-#include "osl.h"
-#include "shader.h"
-#include "scene.h"
+#include "render/background.h"
+#include "render/camera.h"
+#include "render/film.h"
+#include "render/graph.h"
+#include "render/integrator.h"
+#include "render/light.h"
+#include "render/mesh.h"
+#include "render/nodes.h"
+#include "render/object.h"
+#include "render/osl.h"
+#include "render/shader.h"
+#include "render/scene.h"

-#include "subd_patch.h"
-#include "subd_split.h"
+#include "subd/subd_patch.h"
+#include "subd/subd_split.h"

-#include "util_debug.h"
-#include "util_foreach.h"
-#include "util_path.h"
-#include "util_transform.h"
-#include "util_xml.h"
+#include "util/util_debug.h"
+#include "util/util_foreach.h"
+#include "util/util_path.h"
+#include "util/util_transform.h"
+#include "util/util_xml.h"

-#include "cycles_xml.h"
+#include "app/cycles_xml.h"

 CCL_NAMESPACE_BEGIN

--- a/intern/cycles/blender/CMakeLists.txt
+++ b/intern/cycles/blender/CMakeLists.txt
@@ -1,12 +1,6 @@

 set(INC
-	../graph
-	../render
-	../device
-	../kernel
-	../kernel/svm
-	../util
-	../subd
+	..
 	../../glew-mx
 	../../guardedalloc
 	../../mikktspace
--- a/intern/cycles/blender/addon/init.py
+++ b/intern/cycles/blender/addon/init.py
@@ -102,12 +102,21 @@ class CyclesRender(bpy.types.RenderEngine):
        else:
            self.report({'ERROR'}, "OSL support disabled in this build.")

+    def update_render_passes(self, scene, srl):
+        engine.register_passes(self, scene, srl)
+

 def engine_exit():
    engine.exit()


+classes = (
+    CyclesRender,
+)
+
+
 def register():
+    from bpy.utils import register_class
    from . import ui
    from . import properties
    from . import presets
@@ -122,12 +131,15 @@ def register():
    properties.register()
    ui.register()
    presets.register()
-    bpy.utils.register_module(__name__)
+
+    for cls in classes:
+        register_class(cls)

    bpy.app.handlers.version_update.append(version_update.do_versions)


 def unregister():
+    from bpy.utils import unregister_class
    from . import ui
    from . import properties
    from . import presets
@@ -138,4 +150,6 @@ def unregister():
    ui.unregister()
    properties.unregister()
    presets.unregister()
-    bpy.utils.unregister_module(__name__)
+
+    for cls in classes:
+        unregister_class(cls)
--- a/intern/cycles/blender/addon/engine.py
+++ b/intern/cycles/blender/addon/engine.py
@@ -50,6 +50,24 @@ def _workaround_buggy_drivers():
            _cycles.opencl_disable()


+def _configure_argument_parser():
+    import argparse
+    parser = argparse.ArgumentParser(description="Cycles Addon argument parser")
+    parser.add_argument("--cycles-resumable-num-chunks",
+                        help="Number of chunks to split sample range into",
+                        default=None)
+    parser.add_argument("--cycles-resumable-current-chunk",
+                        help="Current chunk of samples range to render",
+                        default=None)
+    parser.add_argument("--cycles-resumable-start-chunk",
+                        help="Start chunk to render",
+                        default=None)
+    parser.add_argument("--cycles-resumable-end-chunk",
+                        help="End chunk to render",
+                        default=None)
+    return parser
+
+
 def _parse_command_line():
    import sys

@@ -57,25 +75,22 @@ def _parse_command_line():
    if "--" not in argv:
        return

-    argv = argv[argv.index("--") + 1:]
+    parser = _configure_argument_parser()
+    args, unknown = parser.parse_known_args(argv[argv.index("--") + 1:])

-    num_resumable_chunks = None
-    current_resumable_chunk = None
-
-    # TODO(sergey): Add some nice error ptins if argument is not used properly.
-    idx = 0
-    while idx < len(argv) - 1:
-        arg = argv[idx]
-        if arg == '--cycles-resumable-num-chunks':
-            num_resumable_chunks = int(argv[idx + 1])
-        elif arg == '--cycles-resumable-current-chunk':
-            current_resumable_chunk = int(argv[idx + 1])
-        idx += 1
-
-    if num_resumable_chunks is not None and current_resumable_chunk is not None:
-        import _cycles
-        _cycles.set_resumable_chunks(num_resumable_chunks,
-                                     current_resumable_chunk)
+    if args.cycles_resumable_num_chunks is not None:
+        if args.cycles_resumable_current_chunk is not None:
+            import _cycles
+            _cycles.set_resumable_chunk(
+                    int(args.cycles_resumable_num_chunks),
+                    int(args.cycles_resumable_current_chunk))
+        elif args.cycles_resumable_start_chunk is not None and \
+             args.cycles_resumable_end_chunk:
+            import _cycles
+            _cycles.set_resumable_chunk_range(
+                    int(args.cycles_resumable_num_chunks),
+                    int(args.cycles_resumable_start_chunk),
+                    int(args.cycles_resumable_end_chunk))


 def init():
@@ -190,3 +205,48 @@ def with_network():
 def system_info():
    import _cycles
    return _cycles.system_info()
+
+def register_passes(engine, scene, srl):
+    engine.register_pass(scene, srl, "Combined", 4, "RGBA", 'COLOR')
+
+    if srl.use_pass_z:                     engine.register_pass(scene, srl, "Depth",         1, "Z",    'VALUE')
+    if srl.use_pass_mist:                  engine.register_pass(scene, srl, "Mist",          1, "Z",    'VALUE')
+    if srl.use_pass_normal:                engine.register_pass(scene, srl, "Normal",        3, "XYZ",  'VECTOR')
+    if srl.use_pass_vector:                engine.register_pass(scene, srl, "Vector",        4, "XYZW", 'VECTOR')
+    if srl.use_pass_uv:                    engine.register_pass(scene, srl, "UV",            3, "UVA",  'VECTOR')
+    if srl.use_pass_object_index:          engine.register_pass(scene, srl, "IndexOB",       1, "X",    'VALUE')
+    if srl.use_pass_material_index:        engine.register_pass(scene, srl, "IndexMA",       1, "X",    'VALUE')
+    if srl.use_pass_shadow:                engine.register_pass(scene, srl, "Shadow",        3, "RGB",  'COLOR')
+    if srl.use_pass_ambient_occlusion:     engine.register_pass(scene, srl, "AO",            3, "RGB",  'COLOR')
+    if srl.use_pass_diffuse_direct:        engine.register_pass(scene, srl, "DiffDir",       3, "RGB",  'COLOR')
+    if srl.use_pass_diffuse_indirect:      engine.register_pass(scene, srl, "DiffInd",       3, "RGB",  'COLOR')
+    if srl.use_pass_diffuse_color:         engine.register_pass(scene, srl, "DiffCol",       3, "RGB",  'COLOR')
+    if srl.use_pass_glossy_direct:         engine.register_pass(scene, srl, "GlossDir",      3, "RGB",  'COLOR')
+    if srl.use_pass_glossy_indirect:       engine.register_pass(scene, srl, "GlossInd",      3, "RGB",  'COLOR')
+    if srl.use_pass_glossy_color:          engine.register_pass(scene, srl, "GlossCol",      3, "RGB",  'COLOR')
+    if srl.use_pass_transmission_direct:   engine.register_pass(scene, srl, "TransDir",      3, "RGB",  'COLOR')
+    if srl.use_pass_transmission_indirect: engine.register_pass(scene, srl, "TransInd",      3, "RGB",  'COLOR')
+    if srl.use_pass_transmission_color:    engine.register_pass(scene, srl, "TransCol",      3, "RGB",  'COLOR')
+    if srl.use_pass_subsurface_direct:     engine.register_pass(scene, srl, "SubsurfaceDir", 3, "RGB",  'COLOR')
+    if srl.use_pass_subsurface_indirect:   engine.register_pass(scene, srl, "SubsurfaceInd", 3, "RGB",  'COLOR')
+    if srl.use_pass_subsurface_color:      engine.register_pass(scene, srl, "SubsurfaceCol", 3, "RGB",  'COLOR')
+    if srl.use_pass_emit:                  engine.register_pass(scene, srl, "Emit",          3, "RGB",  'COLOR')
+    if srl.use_pass_environment:           engine.register_pass(scene, srl, "Env",           3, "RGB",  'COLOR')
+
+    crl = srl.cycles
+    if crl.pass_debug_bvh_traversed_nodes:     engine.register_pass(scene, srl, "Debug BVH Traversed Nodes",     1, "X", 'VALUE')
+    if crl.pass_debug_bvh_traversed_instances: engine.register_pass(scene, srl, "Debug BVH Traversed Instances", 1, "X", 'VALUE')
+    if crl.pass_debug_bvh_intersections:       engine.register_pass(scene, srl, "Debug BVH Intersections",       1, "X", 'VALUE')
+    if crl.pass_debug_ray_bounces:             engine.register_pass(scene, srl, "Debug Ray Bounces",             1, "X", 'VALUE')
+
+    if crl.use_denoising and crl.denoising_store_passes:
+        engine.register_pass(scene, srl, "Denoising Normal",          3, "XYZ", 'VECTOR');
+        engine.register_pass(scene, srl, "Denoising Normal Variance", 3, "XYZ", 'VECTOR');
+        engine.register_pass(scene, srl, "Denoising Albedo",          3, "RGB", 'COLOR');
+        engine.register_pass(scene, srl, "Denoising Albedo Variance", 3, "RGB", 'COLOR');
+        engine.register_pass(scene, srl, "Denoising Depth",           1, "Z",   'VALUE');
+        engine.register_pass(scene, srl, "Denoising Depth Variance",  1, "Z",   'VALUE');
+        engine.register_pass(scene, srl, "Denoising Shadow A",        3, "XYV", 'VECTOR');
+        engine.register_pass(scene, srl, "Denoising Shadow B",        3, "XYV", 'VECTOR');
+        engine.register_pass(scene, srl, "Denoising Image",           3, "RGB", 'COLOR');
+        engine.register_pass(scene, srl, "Denoising Image Variance",  3, "RGB", 'COLOR');
--- a/intern/cycles/blender/addon/presets.py
+++ b/intern/cycles/blender/addon/presets.py
@@ -82,12 +82,23 @@ class AddPresetSampling(AddPresetBase, Operator):
    preset_subdir = "cycles/sampling"


+classes = (
+    AddPresetIntegrator,
+    AddPresetSampling,
+)
+
+
 def register():
-    pass
+    from bpy.utils import register_class
+    for cls in classes:
+        register_class(cls)


 def unregister():
-    pass
+    from bpy.utils import unregister_class
+    for cls in classes:
+        unregister_class(cls)
+

 if __name__ == "__main__":
    register()
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -638,6 +638,20 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
            items=enum_texture_limit
            )

+        cls.ao_bounces = IntProperty(
+            name="AO Bounces",
+            default=0,
+            description="Approximate indirect light with background tinted ambient occlusion at the specified bounce, 0 disables this feature",
+            min=0, max=1024,
+            )
+
+        cls.ao_bounces_render = IntProperty(
+            name="AO Bounces Render",
+            default=0,
+            description="Approximate indirect light with background tinted ambient occlusion at the specified bounce, 0 disables this feature",
+            min=0, max=1024,
+            )
+
        # Various fine-tuning debug flags

        def devices_update_callback(self, context):
@@ -651,8 +665,10 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
        cls.debug_use_cpu_sse3 = BoolProperty(name="SSE3", default=True)
        cls.debug_use_cpu_sse2 = BoolProperty(name="SSE2", default=True)
        cls.debug_use_qbvh = BoolProperty(name="QBVH", default=True)
+        cls.debug_use_cpu_split_kernel = BoolProperty(name="Split Kernel", default=False)

        cls.debug_use_cuda_adaptive_compile = BoolProperty(name="Adaptive Compile", default=False)
+        cls.debug_use_cuda_split_kernel = BoolProperty(name="Split Kernel", default=False)

        cls.debug_opencl_kernel_type = EnumProperty(
            name="OpenCL Kernel Type",
@@ -679,6 +695,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
            update=devices_update_callback
            )

+        cls.debug_opencl_kernel_single_program = BoolProperty(name="Single Program", default=True, update=devices_update_callback);
+
        cls.debug_use_opencl_debug = BoolProperty(name="Debug OpenCL", default=False)

    @classmethod
@@ -1078,6 +1096,12 @@ class CyclesObjectSettings(bpy.types.PropertyGroup):
                default=1.0,
                )

+        cls.is_shadow_catcher = BoolProperty(
+                name="Shadow Catcher",
+                description="Only render shadows on this object, for compositing renders into real footage",
+                default=False,
+                )
+
    @classmethod
    def unregister(cls):
        del bpy.types.Object.cycles
@@ -1142,6 +1166,113 @@ class CyclesCurveRenderSettings(bpy.types.PropertyGroup):
    def unregister(cls):
        del bpy.types.Scene.cycles_curves

+class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
+    @classmethod
+    def register(cls):
+        bpy.types.SceneRenderLayer.cycles = PointerProperty(
+                name="Cycles SceneRenderLayer Settings",
+                description="Cycles SceneRenderLayer Settings",
+                type=cls,
+                )
+        cls.pass_debug_bvh_traversed_nodes = BoolProperty(
+                name="Debug BVH Traversed Nodes",
+                description="Store Debug BVH Traversed Nodes pass",
+                default=False,
+                )
+        cls.pass_debug_bvh_traversed_instances = BoolProperty(
+                name="Debug BVH Traversed Instances",
+                description="Store Debug BVH Traversed Instances pass",
+                default=False,
+                )
+        cls.pass_debug_bvh_intersections = BoolProperty(
+                name="Debug BVH Intersections",
+                description="Store Debug BVH Intersections",
+                default=False,
+                )
+        cls.pass_debug_ray_bounces = BoolProperty(
+                name="Debug Ray Bounces",
+                description="Store Debug Ray Bounces pass",
+                default=False,
+                )
+
+        cls.use_denoising = BoolProperty(
+                name="Use Denoising",
+                description="Denoise the rendered image",
+                default=False,
+                )
+        cls.denoising_diffuse_direct = BoolProperty(
+                name="Diffuse Direct",
+                description="Denoise the direct diffuse lighting",
+                default=True,
+                )
+        cls.denoising_diffuse_indirect = BoolProperty(
+                name="Diffuse Indirect",
+                description="Denoise the indirect diffuse lighting",
+                default=True,
+                )
+        cls.denoising_glossy_direct = BoolProperty(
+                name="Glossy Direct",
+                description="Denoise the direct glossy lighting",
+                default=True,
+                )
+        cls.denoising_glossy_indirect = BoolProperty(
+                name="Glossy Indirect",
+                description="Denoise the indirect glossy lighting",
+                default=True,
+                )
+        cls.denoising_transmission_direct = BoolProperty(
+                name="Transmission Direct",
+                description="Denoise the direct transmission lighting",
+                default=True,
+                )
+        cls.denoising_transmission_indirect = BoolProperty(
+                name="Transmission Indirect",
+                description="Denoise the indirect transmission lighting",
+                default=True,
+                )
+        cls.denoising_subsurface_direct = BoolProperty(
+                name="Subsurface Direct",
+                description="Denoise the direct subsurface lighting",
+                default=True,
+                )
+        cls.denoising_subsurface_indirect = BoolProperty(
+                name="Subsurface Indirect",
+                description="Denoise the indirect subsurface lighting",
+                default=True,
+                )
+        cls.denoising_strength = FloatProperty(
+                name="Denoising Strength",
+                description="Controls neighbor pixel weighting for the denoising filter (lower values preserve more detail, but aren't as smooth)",
+                min=0.0, max=1.0,
+                default=0.5,
+                )
+        cls.denoising_feature_strength = FloatProperty(
+                name="Denoising Feature Strength",
+                description="Controls removal of noisy image feature passes (lower values preserve more detail, but aren't as smooth)",
+                min=0.0, max=1.0,
+                default=0.5,
+                )
+        cls.denoising_radius = IntProperty(
+                name="Denoising Radius",
+                description="Size of the image area that's used to denoise a pixel (higher values are smoother, but might lose detail and are slower)",
+                min=1, max=50,
+                default=8,
+        )
+        cls.denoising_relative_pca = BoolProperty(
+                name="Relative filter",
+                description="When removing that don't carry information, use a relative threshold instead of an absolute one (can help to reduce artifacts, but might cause detail loss around edges)",
+                default=False,
+        )
+        cls.denoising_store_passes = BoolProperty(
+                name="Store denoising passes",
+                description="Store the denoising feature passes and the noisy image",
+                default=False,
+        )
+
+    @classmethod
+    def unregister(cls):
+        del bpy.types.SceneRenderLayer.cycles
+

 class CyclesCurveSettings(bpy.types.PropertyGroup):
    @classmethod
@@ -1273,14 +1404,14 @@ class CyclesPreferences(bpy.types.AddonPreferences):
        row = layout.row()

        if self.compute_device_type == 'CUDA' and cuda_devices:
-            col = row.column(align=True)
+            box = row.box()
            for device in cuda_devices:
-                col.prop(device, "use", text=device.name, toggle=True)
+                box.prop(device, "use", text=device.name)

        if self.compute_device_type == 'OPENCL' and opencl_devices:
-            col = row.column(align=True)
+            box = row.box()
            for device in opencl_devices:
-                col.prop(device, "use", text=device.name, toggle=True)
+                box.prop(device, "use", text=device.name)


    def draw(self, context):
@@ -1300,6 +1431,7 @@ def register():
    bpy.utils.register_class(CyclesCurveSettings)
    bpy.utils.register_class(CyclesDeviceSettings)
    bpy.utils.register_class(CyclesPreferences)
+    bpy.utils.register_class(CyclesRenderLayerSettings)


 def unregister():
@@ -1315,3 +1447,4 @@ def unregister():
    bpy.utils.unregister_class(CyclesCurveSettings)
    bpy.utils.unregister_class(CyclesDeviceSettings)
    bpy.utils.unregister_class(CyclesPreferences)
+    bpy.utils.unregister_class(CyclesRenderLayerSettings)
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -78,7 +78,7 @@ def use_cuda(context):
 def use_branched_path(context):
    cscene = context.scene.cycles

-    return (cscene.progressive == 'BRANCHED_PATH' and not use_opencl(context))
+    return (cscene.progressive == 'BRANCHED_PATH')


 def use_sample_all_lights(context):
@@ -86,12 +86,10 @@ def use_sample_all_lights(context):

    return cscene.sample_all_lights_direct or cscene.sample_all_lights_indirect

-def show_device_selection(context):
-    type = get_device_type(context)
-    if type == 'NETWORK':
+def show_device_active(context):
+    cscene = context.scene.cycles
+    if cscene.device != 'GPU':
        return True
-    if not type in {'CUDA', 'OPENCL'}:
-        return False
    return context.user_preferences.addons[__package__].preferences.has_active_device()


@@ -158,7 +156,6 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel):

        row = layout.row()
        sub = row.row()
-        sub.active = get_device_type(context) != 'OPENCL' or use_cpu(context)
        sub.prop(cscene, "progressive", text="")
        row.prop(cscene, "use_square_samples")

@@ -186,9 +183,6 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel):
            sub.label(text="AA Samples:")
            sub.prop(cscene, "aa_samples", text="Render")
            sub.prop(cscene, "preview_aa_samples", text="Preview")
-            sub.separator()
-            sub.prop(cscene, "sample_all_lights_direct")
-            sub.prop(cscene, "sample_all_lights_indirect")

            col = split.column()
            sub = col.column(align=True)
@@ -205,8 +199,11 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel):
            sub.prop(cscene, "subsurface_samples", text="Subsurface")
            sub.prop(cscene, "volume_samples", text="Volume")

-        if not (use_opencl(context) and cscene.feature_set != 'EXPERIMENTAL'):
-            layout.row().prop(cscene, "sampling_pattern", text="Pattern")
+            col = layout.column(align=True)
+            col.prop(cscene, "sample_all_lights_direct")
+            col.prop(cscene, "sample_all_lights_indirect")
+
+        layout.row().prop(cscene, "sampling_pattern", text="Pattern")

        for rl in scene.render.layers:
            if rl.samples > 0:
@@ -270,7 +267,7 @@ class CyclesRender_PT_geometry(CyclesButtonsPanel, Panel):

        row = col.row()
        row.prop(ccscene, "minimum_width", text="Min Pixels")
-        row.prop(ccscene, "maximum_width", text="Max Ext.")
+        row.prop(ccscene, "maximum_width", text="Max Extension")


 class CyclesRender_PT_light_paths(CyclesButtonsPanel, Panel):
@@ -479,11 +476,14 @@ class CyclesRender_PT_layer_passes(CyclesButtonsPanel, Panel):
    bl_options = {'DEFAULT_CLOSED'}

    def draw(self, context):
+        import _cycles
+
        layout = self.layout

        scene = context.scene
        rd = scene.render
        rl = rd.layers.active
+        crl = rl.cycles

        split = layout.split()

@@ -530,8 +530,18 @@ class CyclesRender_PT_layer_passes(CyclesButtonsPanel, Panel):
        col.prop(rl, "use_pass_emit", text="Emission")
        col.prop(rl, "use_pass_environment")

-        if hasattr(rd, "debug_pass_type"):
-            layout.prop(rd, "debug_pass_type")
+        if context.scene.cycles.feature_set == 'EXPERIMENTAL':
+           col.separator()
+           sub = col.column()
+           sub.active = crl.use_denoising
+           sub.prop(crl, "denoising_store_passes", text="Denoising")
+
+        if _cycles.with_cycles_debug:
+          col = layout.column()
+          col.prop(crl, "pass_debug_bvh_traversed_nodes")
+          col.prop(crl, "pass_debug_bvh_traversed_instances")
+          col.prop(crl, "pass_debug_bvh_intersections")
+          col.prop(crl, "pass_debug_ray_bounces")


 class CyclesRender_PT_views(CyclesButtonsPanel, Panel):
@@ -577,6 +587,64 @@ class CyclesRender_PT_views(CyclesButtonsPanel, Panel):
            row.prop(rv, "camera_suffix", text="")


+class CyclesRender_PT_denoising(CyclesButtonsPanel, Panel):
+    bl_label = "Denoising"
+    bl_context = "render_layer"
+    bl_options = {'DEFAULT_CLOSED'}
+
+    def draw_header(self, context):
+        rd = context.scene.render
+        rl = rd.layers.active
+        crl = rl.cycles
+        self.layout.prop(crl, "use_denoising", text="")
+
+    def draw(self, context):
+        layout = self.layout
+
+        scene = context.scene
+        rd = scene.render
+        rl = rd.layers.active
+        crl = rl.cycles
+
+        split = layout.split()
+
+        col = split.column()
+        sub = col.column(align=True)
+        sub.prop(crl, "denoising_radius", text="Radius")
+        sub.prop(crl, "denoising_strength", slider=True, text="Strength")
+
+        col = split.column()
+        sub = col.column(align=True)
+        sub.prop(crl, "denoising_feature_strength", slider=True, text="Feature Strength")
+        sub.prop(crl, "denoising_relative_pca")
+
+        layout.separator()
+
+        row = layout.row()
+        row.label(text="Diffuse:")
+        sub = row.row(align=True)
+        sub.prop(crl, "denoising_diffuse_direct", text="Direct", toggle=True)
+        sub.prop(crl, "denoising_diffuse_indirect", text="Indirect", toggle=True)
+
+        row = layout.row()
+        row.label(text="Glossy:")
+        sub = row.row(align=True)
+        sub.prop(crl, "denoising_glossy_direct", text="Direct", toggle=True)
+        sub.prop(crl, "denoising_glossy_indirect", text="Indirect", toggle=True)
+
+        row = layout.row()
+        row.label(text="Transmission:")
+        sub = row.row(align=True)
+        sub.prop(crl, "denoising_transmission_direct", text="Direct", toggle=True)
+        sub.prop(crl, "denoising_transmission_indirect", text="Indirect", toggle=True)
+
+        row = layout.row()
+        row.label(text="Subsurface:")
+        sub = row.row(align=True)
+        sub.prop(crl, "denoising_subsurface_direct", text="Direct", toggle=True)
+        sub.prop(crl, "denoising_subsurface_indirect", text="Indirect", toggle=True)
+
+
 class Cycles_PT_post_processing(CyclesButtonsPanel, Panel):
    bl_label = "Post Processing"
    bl_options = {'DEFAULT_CLOSED'}
@@ -788,6 +856,8 @@ class CyclesObject_PT_cycles_settings(CyclesButtonsPanel, Panel):
        if ob.type != 'LAMP':
            flow.prop(visibility, "shadow")

+        layout.prop(cob, "is_shadow_catcher")
+
        col = layout.column()
        col.label(text="Performance:")
        row = col.row()
@@ -1038,10 +1108,11 @@ class CyclesWorld_PT_ambient_occlusion(CyclesButtonsPanel, Panel):
        layout = self.layout

        light = context.world.light_settings
+        scene = context.scene

        row = layout.row()
        sub = row.row()
-        sub.active = light.use_ambient_occlusion
+        sub.active = light.use_ambient_occlusion or scene.render.use_simplify
        sub.prop(light, "ao_factor", text="Factor")
        row.prop(light, "distance", text="Distance")

@@ -1517,15 +1588,18 @@ class CyclesRender_PT_debug(CyclesButtonsPanel, Panel):
        row.prop(cscene, "debug_use_cpu_avx", toggle=True)
        row.prop(cscene, "debug_use_cpu_avx2", toggle=True)
        col.prop(cscene, "debug_use_qbvh")
+        col.prop(cscene, "debug_use_cpu_split_kernel")

        col = layout.column()
        col.label('CUDA Flags:')
        col.prop(cscene, "debug_use_cuda_adaptive_compile")
+        col.prop(cscene, "debug_use_cuda_split_kernel")

        col = layout.column()
        col.label('OpenCL Flags:')
        col.prop(cscene, "debug_opencl_kernel_type", text="Kernel")
        col.prop(cscene, "debug_opencl_device_type", text="Device")
+        col.prop(cscene, "debug_opencl_kernel_single_program", text="Single Program")
        col.prop(cscene, "debug_use_opencl_debug", text="Debug")


@@ -1612,6 +1686,13 @@ class CyclesScene_PT_simplify(CyclesButtonsPanel, Panel):
        row.active = cscene.use_distance_cull
        row.prop(cscene, "distance_cull_margin", text="Distance")

+        split = layout.split()
+        col = split.column()
+        col.prop(cscene, "ao_bounces")
+
+        col = split.column()
+        col.prop(cscene, "ao_bounces_render")
+
 def draw_device(self, context):
    scene = context.scene
    layout = self.layout
@@ -1625,7 +1706,7 @@ def draw_device(self, context):
        split = layout.split(percentage=1/3)
        split.label("Device:")
        row = split.row()
-        row.active = show_device_selection(context)
+        row.active = show_device_active(context)
        row.prop(cscene, "device", text="")

        if engine.with_osl() and use_cpu(context):
@@ -1704,17 +1785,76 @@ def get_panels():

    return panels

+
+classes = (
+    CYCLES_MT_sampling_presets,
+    CYCLES_MT_integrator_presets,
+    CyclesRender_PT_sampling,
+    CyclesRender_PT_geometry,
+    CyclesRender_PT_light_paths,
+    CyclesRender_PT_motion_blur,
+    CyclesRender_PT_film,
+    CyclesRender_PT_performance,
+    CyclesRender_PT_layer_options,
+    CyclesRender_PT_layer_passes,
+    CyclesRender_PT_views,
+    CyclesRender_PT_denoising,
+    Cycles_PT_post_processing,
+    CyclesCamera_PT_dof,
+    Cycles_PT_context_material,
+    CyclesObject_PT_motion_blur,
+    CyclesObject_PT_cycles_settings,
+    CYCLES_OT_use_shading_nodes,
+    CyclesLamp_PT_preview,
+    CyclesLamp_PT_lamp,
+    CyclesLamp_PT_nodes,
+    CyclesLamp_PT_spot,
+    CyclesWorld_PT_preview,
+    CyclesWorld_PT_surface,
+    CyclesWorld_PT_volume,
+    CyclesWorld_PT_ambient_occlusion,
+    CyclesWorld_PT_mist,
+    CyclesWorld_PT_ray_visibility,
+    CyclesWorld_PT_settings,
+    CyclesMaterial_PT_preview,
+    CyclesMaterial_PT_surface,
+    CyclesMaterial_PT_volume,
+    CyclesMaterial_PT_displacement,
+    CyclesMaterial_PT_settings,
+    CyclesTexture_PT_context,
+    CyclesTexture_PT_node,
+    CyclesTexture_PT_mapping,
+    CyclesTexture_PT_colors,
+    CyclesParticle_PT_textures,
+    CyclesRender_PT_bake,
+    CyclesRender_PT_debug,
+    CyclesParticle_PT_CurveSettings,
+    CyclesScene_PT_simplify,
+)
+
+
 def register():
+    from bpy.utils import register_class
+
    bpy.types.RENDER_PT_render.append(draw_device)
    bpy.types.VIEW3D_HT_header.append(draw_pause)

    for panel in get_panels():
        panel.COMPAT_ENGINES.add('CYCLES')

+    for cls in classes:
+        register_class(cls)
+
+
 def unregister():
+    from bpy.utils import unregister_class
+
    bpy.types.RENDER_PT_render.remove(draw_device)
    bpy.types.VIEW3D_HT_header.remove(draw_pause)

    for panel in get_panels():
        if 'CYCLES' in panel.COMPAT_ENGINES:
            panel.COMPAT_ENGINES.remove('CYCLES')
+
+    for cls in classes:
+        unregister_class(cls)
--- a/intern/cycles/blender/blender_camera.cpp
+++ b/intern/cycles/blender/blender_camera.cpp
@@ -14,13 +14,13 @@
 * limitations under the License.
 */

-#include "camera.h"
-#include "scene.h"
+#include "render/camera.h"
+#include "render/scene.h"

-#include "blender_sync.h"
-#include "blender_util.h"
+#include "blender/blender_sync.h"
+#include "blender/blender_util.h"

-#include "util_logging.h"
+#include "util/util_logging.h"

 CCL_NAMESPACE_BEGIN

--- a/intern/cycles/blender/blender_curves.cpp
+++ b/intern/cycles/blender/blender_curves.cpp
@@ -14,18 +14,18 @@
 * limitations under the License.
 */

-#include "attribute.h"
-#include "camera.h"
-#include "curves.h"
-#include "mesh.h"
-#include "object.h"
-#include "scene.h"
+#include "render/attribute.h"
+#include "render/camera.h"
+#include "render/curves.h"
+#include "render/mesh.h"
+#include "render/object.h"
+#include "render/scene.h"

-#include "blender_sync.h"
-#include "blender_util.h"
+#include "blender/blender_sync.h"
+#include "blender/blender_util.h"

-#include "util_foreach.h"
-#include "util_logging.h"
+#include "util/util_foreach.h"
+#include "util/util_logging.h"

 CCL_NAMESPACE_BEGIN

@@ -411,6 +411,7 @@ static void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
 		}
 	}

+	mesh->resize_mesh(mesh->verts.size(), mesh->num_triangles());
 	mesh->attributes.remove(ATTR_STD_VERTEX_NORMAL);
 	mesh->attributes.remove(ATTR_STD_FACE_NORMAL);
 	mesh->add_face_normals();
@@ -434,8 +435,8 @@ static void ExportCurveTriangleGeometry(Mesh *mesh,
 			if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
 				continue;

-			numverts += (CData->curve_keynum[curve] - 2)*2*resolution + resolution;
-			numtris += (CData->curve_keynum[curve] - 2)*resolution;
+			numverts += (CData->curve_keynum[curve] - 1)*resolution + resolution;
+			numtris += (CData->curve_keynum[curve] - 1)*2*resolution;
 		}
 	}

@@ -545,6 +546,7 @@ static void ExportCurveTriangleGeometry(Mesh *mesh,
 		}
 	}

+	mesh->resize_mesh(mesh->verts.size(), mesh->num_triangles());
 	mesh->attributes.remove(ATTR_STD_VERTEX_NORMAL);
 	mesh->attributes.remove(ATTR_STD_FACE_NORMAL);
 	mesh->add_face_normals();
@@ -774,17 +776,17 @@ static void ExportCurveTriangleVcol(ParticleCurveData *CData,

 			for(int curvekey = CData->curve_firstkey[curve]; curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1; curvekey++) {
 				for(int section = 0; section < resol; section++) {
-					cdata[vertexindex] = color_float_to_byte(color_srgb_to_scene_linear(CData->curve_vcol[curve]));
+					cdata[vertexindex] = color_float_to_byte(color_srgb_to_scene_linear_v3(CData->curve_vcol[curve]));
 					vertexindex++;
-					cdata[vertexindex] = color_float_to_byte(color_srgb_to_scene_linear(CData->curve_vcol[curve]));
+					cdata[vertexindex] = color_float_to_byte(color_srgb_to_scene_linear_v3(CData->curve_vcol[curve]));
 					vertexindex++;
-					cdata[vertexindex] = color_float_to_byte(color_srgb_to_scene_linear(CData->curve_vcol[curve]));
+					cdata[vertexindex] = color_float_to_byte(color_srgb_to_scene_linear_v3(CData->curve_vcol[curve]));
 					vertexindex++;
-					cdata[vertexindex] = color_float_to_byte(color_srgb_to_scene_linear(CData->curve_vcol[curve]));
+					cdata[vertexindex] = color_float_to_byte(color_srgb_to_scene_linear_v3(CData->curve_vcol[curve]));
 					vertexindex++;
-					cdata[vertexindex] = color_float_to_byte(color_srgb_to_scene_linear(CData->curve_vcol[curve]));
+					cdata[vertexindex] = color_float_to_byte(color_srgb_to_scene_linear_v3(CData->curve_vcol[curve]));
 					vertexindex++;
-					cdata[vertexindex] = color_float_to_byte(color_srgb_to_scene_linear(CData->curve_vcol[curve]));
+					cdata[vertexindex] = color_float_to_byte(color_srgb_to_scene_linear_v3(CData->curve_vcol[curve]));
 					vertexindex++;
 				}
 			}
@@ -890,7 +892,7 @@ void BlenderSync::sync_curves(Mesh *mesh,
 	}

 	/* obtain general settings */
-	bool use_curves = scene->curve_system_manager->use_curves;
+	const bool use_curves = scene->curve_system_manager->use_curves;

 	if(!(use_curves && b_ob.mode() != b_ob.mode_PARTICLE_EDIT)) {
 		if(!motion)
@@ -898,11 +900,11 @@ void BlenderSync::sync_curves(Mesh *mesh,
 		return;
 	}

-	int primitive = scene->curve_system_manager->primitive;
-	int triangle_method = scene->curve_system_manager->triangle_method;
-	int resolution = scene->curve_system_manager->resolution;
-	size_t vert_num = mesh->verts.size();
-	size_t tri_num = mesh->num_triangles();
+	const int primitive = scene->curve_system_manager->primitive;
+	const int triangle_method = scene->curve_system_manager->triangle_method;
+	const int resolution = scene->curve_system_manager->resolution;
+	const size_t vert_num = mesh->verts.size();
+	const size_t tri_num = mesh->num_triangles();
 	int used_res = 1;

 	/* extract particle hair data - should be combined with connecting to mesh later*/
@@ -1002,7 +1004,7 @@ void BlenderSync::sync_curves(Mesh *mesh,

 					for(size_t curve = 0; curve < CData.curve_vcol.size(); curve++)
 						if(!(CData.curve_keynum[curve] <= 1 || CData.curve_length[curve] == 0.0f))
-							fdata[i++] = color_srgb_to_scene_linear(CData.curve_vcol[curve]);
+							fdata[i++] = color_srgb_to_scene_linear_v3(CData.curve_vcol[curve]);
 				}
 			}
 		}
--- a/intern/cycles/blender/blender_logging.cpp
+++ b/intern/cycles/blender/blender_logging.cpp
@@ -14,8 +14,8 @@
 * limitations under the License.
 */

-#include "CCL_api.h"
-#include "util_logging.h"
+#include "blender/CCL_api.h"
+#include "util/util_logging.h"

 void CCL_init_logging(const char *argv0)
 {
--- a/intern/cycles/blender/blender_mesh.cpp
+++ b/intern/cycles/blender/blender_mesh.cpp
@@ -15,21 +15,22 @@
 */

 
-#include "mesh.h"
-#include "object.h"
-#include "scene.h"
-#include "camera.h"
+#include "render/mesh.h"
+#include "render/object.h"
+#include "render/scene.h"
+#include "render/camera.h"

-#include "blender_sync.h"
-#include "blender_session.h"
-#include "blender_util.h"
+#include "blender/blender_sync.h"
+#include "blender/blender_session.h"
+#include "blender/blender_util.h"

-#include "subd_patch.h"
-#include "subd_split.h"
+#include "subd/subd_patch.h"
+#include "subd/subd_split.h"

-#include "util_foreach.h"
-#include "util_logging.h"
-#include "util_math.h"
+#include "util/util_algorithm.h"
+#include "util/util_foreach.h"
+#include "util/util_logging.h"
+#include "util/util_math.h"

 #include "mikktspace.h"

@@ -355,7 +356,7 @@ static void attr_create_vertex_color(Scene *scene,
 				int n = p->loop_total();
 				for(int i = 0; i < n; i++) {
 					float3 color = get_float3(l->data[p->loop_start() + i].color());
-					*(cdata++) = color_float_to_byte(color_srgb_to_scene_linear(color));
+					*(cdata++) = color_float_to_byte(color_srgb_to_scene_linear_v3(color));
 				}
 			}
 		}
@@ -379,11 +380,11 @@ static void attr_create_vertex_color(Scene *scene,
 				face_split_tri_indices(nverts[i], face_flags[i], tri_a, tri_b);

 				uchar4 colors[4];
-				colors[0] = color_float_to_byte(color_srgb_to_scene_linear(get_float3(c->color1())));
-				colors[1] = color_float_to_byte(color_srgb_to_scene_linear(get_float3(c->color2())));
-				colors[2] = color_float_to_byte(color_srgb_to_scene_linear(get_float3(c->color3())));
+				colors[0] = color_float_to_byte(color_srgb_to_scene_linear_v3(get_float3(c->color1())));
+				colors[1] = color_float_to_byte(color_srgb_to_scene_linear_v3(get_float3(c->color2())));
+				colors[2] = color_float_to_byte(color_srgb_to_scene_linear_v3(get_float3(c->color3())));
 				if(nverts[i] == 4) {
-					colors[3] = color_float_to_byte(color_srgb_to_scene_linear(get_float3(c->color4())));
+					colors[3] = color_float_to_byte(color_srgb_to_scene_linear_v3(get_float3(c->color4())));
 				}

 				cdata[0] = colors[tri_a[0]];
@@ -525,69 +526,180 @@ static void attr_create_uv_map(Scene *scene,
 }

 /* Create vertex pointiness attributes. */
+
+/* Compare vertices by sum of their coordinates. */
+class VertexAverageComparator {
+public:
+	VertexAverageComparator(const array<float3>& verts)
+	        : verts_(verts) {
+	}
+
+	bool operator()(const int& vert_idx_a, const int& vert_idx_b)
+	{
+		const float3 &vert_a = verts_[vert_idx_a];
+		const float3 &vert_b = verts_[vert_idx_b];
+		if(vert_a == vert_b) {
+			/* Special case for doubles, so we ensure ordering. */
+			return vert_idx_a > vert_idx_b;
+		}
+		const float x1 = vert_a.x + vert_a.y + vert_a.z;
+		const float x2 = vert_b.x + vert_b.y + vert_b.z;
+		return x1 < x2;
+	}
+
+protected:
+	const array<float3>& verts_;
+};
+
 static void attr_create_pointiness(Scene *scene,
                                   Mesh *mesh,
                                   BL::Mesh& b_mesh,
                                   bool subdivision)
 {
-	if(mesh->need_attribute(scene, ATTR_STD_POINTINESS)) {
-		const int numverts = b_mesh.vertices.length();
-		AttributeSet& attributes = (subdivision)? mesh->subd_attributes: mesh->attributes;
-		Attribute *attr = attributes.add(ATTR_STD_POINTINESS);
-		float *data = attr->data_float();
-		int *counter = new int[numverts];
-		float *raw_data = new float[numverts];
-		float3 *edge_accum = new float3[numverts];
-
-		/* Calculate pointiness using single ring neighborhood. */
-		memset(counter, 0, sizeof(int) * numverts);
-		memset(raw_data, 0, sizeof(float) * numverts);
-		memset(edge_accum, 0, sizeof(float3) * numverts);
-		BL::Mesh::edges_iterator e;
-		int i = 0;
-		for(b_mesh.edges.begin(e); e != b_mesh.edges.end(); ++e, ++i) {
-			int v0 = b_mesh.edges[i].vertices()[0],
-			    v1 = b_mesh.edges[i].vertices()[1];
-			float3 co0 = get_float3(b_mesh.vertices[v0].co()),
-			       co1 = get_float3(b_mesh.vertices[v1].co());
-			float3 edge = normalize(co1 - co0);
-			edge_accum[v0] += edge;
-			edge_accum[v1] += -edge;
-			++counter[v0];
-			++counter[v1];
-		}
-		i = 0;
-		BL::Mesh::vertices_iterator v;
-		for(b_mesh.vertices.begin(v); v != b_mesh.vertices.end(); ++v, ++i) {
-			if(counter[i] > 0) {
-				float3 normal = get_float3(b_mesh.vertices[i].normal());
-				float angle = safe_acosf(dot(normal, edge_accum[i] / counter[i]));
-				raw_data[i] = angle * M_1_PI_F;
+	if(!mesh->need_attribute(scene, ATTR_STD_POINTINESS)) {
+		return;
+	}
+	const int num_verts = b_mesh.vertices.length();
+	if(num_verts == 0) {
+		return;
+	}
+	/* STEP 1: Find out duplicated vertices and point duplicates to a single
+	 *         original vertex.
+	 */
+	vector<int> sorted_vert_indeices(num_verts);
+	for(int vert_index = 0; vert_index < num_verts; ++vert_index) {
+		sorted_vert_indeices[vert_index] = vert_index;
+	}
+	VertexAverageComparator compare(mesh->verts);
+	sort(sorted_vert_indeices.begin(), sorted_vert_indeices.end(), compare);
+	/* This array stores index of the original vertex for the given vertex
+	 * index.
+	 */
+	vector<int> vert_orig_index(num_verts);
+	for(int sorted_vert_index = 0;
+	    sorted_vert_index < num_verts;
+	    ++sorted_vert_index)
+	{
+		const int vert_index = sorted_vert_indeices[sorted_vert_index];
+		const float3 &vert_co = mesh->verts[vert_index];
+		bool found = false;
+		for(int other_sorted_vert_index = sorted_vert_index + 1;
+		    other_sorted_vert_index < num_verts;
+		    ++other_sorted_vert_index)
+		{
+			const int other_vert_index =
+			        sorted_vert_indeices[other_sorted_vert_index];
+			const float3 &other_vert_co = mesh->verts[other_vert_index];
+			/* We are too far away now, we wouldn't have duplicate. */
+			if((other_vert_co.x + other_vert_co.y + other_vert_co.z) -
+			   (vert_co.x + vert_co.y + vert_co.z) > 3 * FLT_EPSILON)
+			{
+				break;
 			}
-			else {
-				raw_data[i] = 0.0f;
+			/* Found duplicate. */
+			if(len_squared(other_vert_co - vert_co) < FLT_EPSILON) {
+				found = true;
+				vert_orig_index[vert_index] = other_vert_index;
+				break;
 			}
 		}
-
-		/* Blur vertices to approximate 2 ring neighborhood. */
-		memset(counter, 0, sizeof(int) * numverts);
-		memcpy(data, raw_data, sizeof(float) * numverts);
-		i = 0;
-		for(b_mesh.edges.begin(e); e != b_mesh.edges.end(); ++e, ++i) {
-			int v0 = b_mesh.edges[i].vertices()[0],
-			    v1 = b_mesh.edges[i].vertices()[1];
-			data[v0] += raw_data[v1];
-			data[v1] += raw_data[v0];
-			++counter[v0];
-			++counter[v1];
+		if(!found) {
+			vert_orig_index[vert_index] = vert_index;
 		}
-		for(i = 0; i < numverts; ++i) {
-			data[i] /= counter[i] + 1;
+	}
+	/* Make sure we always points to the very first orig vertex. */
+	for(int vert_index = 0; vert_index < num_verts; ++vert_index) {
+		int orig_index = vert_orig_index[vert_index];
+		while(orig_index != vert_orig_index[orig_index]) {
+			orig_index = vert_orig_index[orig_index];
 		}
-
-		delete [] counter;
-		delete [] raw_data;
-		delete [] edge_accum;
+		vert_orig_index[vert_index] = orig_index;
+	}
+	sorted_vert_indeices.free_memory();
+	/* STEP 2: Calculate vertex normals taking into account their possible
+	 *         duplicates which gets "welded" together.
+	 */
+	vector<float3> vert_normal(num_verts, make_float3(0.0f, 0.0f, 0.0f));
+	/* First we accumulate all vertex normals in the original index. */
+	for(int vert_index = 0; vert_index < num_verts; ++vert_index) {
+		const float3 normal = get_float3(b_mesh.vertices[vert_index].normal());
+		const int orig_index = vert_orig_index[vert_index];
+		vert_normal[orig_index] += normal;
+	}
+	/* Then we normalize the accumulated result and flush it to all duplicates
+	 * as well.
+	 */
+	for(int vert_index = 0; vert_index < num_verts; ++vert_index) {
+		const int orig_index = vert_orig_index[vert_index];
+		vert_normal[vert_index] = normalize(vert_normal[orig_index]);
+	}
+	/* STEP 3: Calculate pointiness using single ring neighborhood. */
+	vector<int> counter(num_verts, 0);
+	vector<float> raw_data(num_verts, 0.0f);
+	vector<float3> edge_accum(num_verts, make_float3(0.0f, 0.0f, 0.0f));
+	BL::Mesh::edges_iterator e;
+	EdgeMap visited_edges;
+	int edge_index = 0;
+	memset(&counter[0], 0, sizeof(int) * counter.size());
+	for(b_mesh.edges.begin(e); e != b_mesh.edges.end(); ++e, ++edge_index) {
+		const int v0 = vert_orig_index[b_mesh.edges[edge_index].vertices()[0]],
+		          v1 = vert_orig_index[b_mesh.edges[edge_index].vertices()[1]];
+		if(visited_edges.exists(v0, v1)) {
+			continue;
+		}
+		visited_edges.insert(v0, v1);
+		float3 co0 = get_float3(b_mesh.vertices[v0].co()),
+		       co1 = get_float3(b_mesh.vertices[v1].co());
+		float3 edge = normalize(co1 - co0);
+		edge_accum[v0] += edge;
+		edge_accum[v1] += -edge;
+		++counter[v0];
+		++counter[v1];
+	}
+	for(int vert_index = 0; vert_index < num_verts; ++vert_index) {
+		const int orig_index = vert_orig_index[vert_index];
+		if(orig_index != vert_index) {
+			/* Skip duplicates, they'll be overwritten later on. */
+			continue;
+		}
+		if(counter[vert_index] > 0) {
+			const float3 normal = vert_normal[vert_index];
+			const float angle =
+			        safe_acosf(dot(normal,
+			                       edge_accum[vert_index] / counter[vert_index]));
+			raw_data[vert_index] = angle * M_1_PI_F;
+		}
+		else {
+			raw_data[vert_index] = 0.0f;
+		}
+	}
+	/* STEP 3: Blur vertices to approximate 2 ring neighborhood. */
+	AttributeSet& attributes = (subdivision)? mesh->subd_attributes: mesh->attributes;
+	Attribute *attr = attributes.add(ATTR_STD_POINTINESS);
+	float *data = attr->data_float();
+	memcpy(data, &raw_data[0], sizeof(float) * raw_data.size());
+	memset(&counter[0], 0, sizeof(int) * counter.size());
+	edge_index = 0;
+	visited_edges.clear();
+	for(b_mesh.edges.begin(e); e != b_mesh.edges.end(); ++e, ++edge_index) {
+		const int v0 = vert_orig_index[b_mesh.edges[edge_index].vertices()[0]],
+		          v1 = vert_orig_index[b_mesh.edges[edge_index].vertices()[1]];
+		if(visited_edges.exists(v0, v1)) {
+			continue;
+		}
+		visited_edges.insert(v0, v1);
+		data[v0] += raw_data[v1];
+		data[v1] += raw_data[v0];
+		++counter[v0];
+		++counter[v1];
+	}
+	for(int vert_index = 0; vert_index < num_verts; ++vert_index) {
+		data[vert_index] /= counter[vert_index] + 1;
+	}
+	/* STEP 4: Copy attribute to the duplicated vertices. */
+	for(int vert_index = 0; vert_index < num_verts; ++vert_index) {
+		const int orig_index = vert_orig_index[vert_index];
+		data[vert_index] = data[orig_index];
 	}
 }

@@ -656,9 +768,6 @@ static void create_mesh(Scene *scene,
 			generated[i++] = get_float3(v->undeformed_co())*size - loc;
 	}

-	/* Create needed vertex attributes. */
-	attr_create_pointiness(scene, mesh, b_mesh, subdivision);
-
 	/* create faces */
 	vector<int> nverts(numfaces);
 	vector<int> face_flags(numfaces, FACE_FLAG_NONE);
@@ -671,6 +780,15 @@ static void create_mesh(Scene *scene,
 			int shader = clamp(f->material_index(), 0, used_shaders.size()-1);
 			bool smooth = f->use_smooth() || use_loop_normals;

+			if(use_loop_normals) {
+				BL::Array<float, 12> loop_normals = f->split_normals();
+				for(int i = 0; i < n; i++) {
+					N[vi[i]] = make_float3(loop_normals[i * 3],
+					                       loop_normals[i * 3 + 1],
+					                       loop_normals[i * 3 + 2]);
+				}
+			}
+
 			/* Create triangles.
 			 *
 			 * NOTE: Autosmooth is already taken care about.
@@ -704,7 +822,7 @@ static void create_mesh(Scene *scene,
 			int shader = clamp(p->material_index(), 0, used_shaders.size()-1);
 			bool smooth = p->use_smooth() || use_loop_normals;

-			vi.reserve(n);
+			vi.resize(n);
 			for(int i = 0; i < n; i++) {
 				/* NOTE: Autosmooth is already taken care about. */
 				vi[i] = b_mesh.loops[p->loop_start() + i].vertex_index();
@@ -718,6 +836,7 @@ static void create_mesh(Scene *scene,
 	/* Create all needed attributes.
 	 * The calculate functions will check whether they're needed or not.
 	 */
+	attr_create_pointiness(scene, mesh, b_mesh, subdivision);
 	attr_create_vertex_color(scene, mesh, b_mesh, nverts, face_flags, subdivision);
 	attr_create_uv_map(scene, mesh, b_mesh, nverts, face_flags, subdivision, subdivide_uvs);

@@ -927,6 +1046,13 @@ Mesh *BlenderSync::sync_mesh(BL::Object& b_ob,

 		mesh->subdivision_type = object_subdivision_type(b_ob, preview, experimental);

+		/* Disable adaptive subdivision while baking as the baking system
+		 * currently doesnt support the topology and will crash.
+		 */
+		if(scene->bake_manager->get_baking()) {
+			mesh->subdivision_type = Mesh::SUBDIVISION_NONE;
+		}
+
 		BL::Mesh b_mesh = object_to_mesh(b_data,
 		                                 b_ob,
 		                                 b_scene,
@@ -1041,8 +1167,8 @@ void BlenderSync::sync_mesh_motion(BL::Object& b_ob,
 	}

 	/* skip empty meshes */
-	size_t numverts = mesh->verts.size();
-	size_t numkeys = mesh->curve_keys.size();
+	const size_t numverts = mesh->verts.size();
+	const size_t numkeys = mesh->curve_keys.size();

 	if(!numverts && !numkeys)
 		return;
@@ -1100,13 +1226,12 @@ void BlenderSync::sync_mesh_motion(BL::Object& b_ob,

 	/* TODO(sergey): Perform preliminary check for number of verticies. */
 	if(numverts) {
-		/* find attributes */
+		/* Find attributes. */
 		Attribute *attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
 		Attribute *attr_mN = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_NORMAL);
 		Attribute *attr_N = mesh->attributes.find(ATTR_STD_VERTEX_NORMAL);
 		bool new_attribute = false;
-
-		/* add new attributes if they don't exist already */
+		/* Add new attributes if they don't exist already. */
 		if(!attr_mP) {
 			attr_mP = mesh->attributes.add(ATTR_STD_MOTION_VERTEX_POSITION);
 			if(attr_N)
@@ -1114,22 +1239,21 @@ void BlenderSync::sync_mesh_motion(BL::Object& b_ob,

 			new_attribute = true;
 		}
-
-		/* load vertex data from mesh */
+		/* Load vertex data from mesh. */
 		float3 *mP = attr_mP->data_float3() + time_index*numverts;
 		float3 *mN = (attr_mN)? attr_mN->data_float3() + time_index*numverts: NULL;
-
+		/* NOTE: We don't copy more that existing amount of vertices to prevent
+		 * possible memory corruption.
+		 */
 		BL::Mesh::vertices_iterator v;
 		int i = 0;
-
 		for(b_mesh.vertices.begin(v); v != b_mesh.vertices.end() && i < numverts; ++v, ++i) {
 			mP[i] = get_float3(v->co());
 			if(mN)
 				mN[i] = get_float3(v->normal());
 		}
-
-		/* in case of new attribute, we verify if there really was any motion */
 		if(new_attribute) {
+			/* In case of new attribute, we verify if there really was any motion. */
 			if(b_mesh.vertices.length() != numverts ||
 			   memcmp(mP, &mesh->verts[0], sizeof(float3)*numverts) == 0)
 			{
@@ -1152,7 +1276,6 @@ void BlenderSync::sync_mesh_motion(BL::Object& b_ob,
 				 * they had no motion, but we need them anyway now */
 				float3 *P = &mesh->verts[0];
 				float3 *N = (attr_N)? attr_N->data_float3(): NULL;
-
 				for(int step = 0; step < time_index; step++) {
 					memcpy(attr_mP->data_float3() + step*numverts, P, sizeof(float3)*numverts);
 					if(attr_mN)
@@ -1160,6 +1283,16 @@ void BlenderSync::sync_mesh_motion(BL::Object& b_ob,
 				}
 			}
 		}
+		else {
+			if(b_mesh.vertices.length() != numverts) {
+				VLOG(1) << "Topology differs, discarding motion blur for object "
+				        << b_ob.name() << " at time " << time_index;
+				memcpy(mP, &mesh->verts[0], sizeof(float3)*numverts);
+				if(mN != NULL) {
+					memcpy(mN, attr_N->data_float3(), sizeof(float3)*numverts);
+				}
+			}
+		}
 	}

 	/* hair motion */
@@ -1171,4 +1304,3 @@ void BlenderSync::sync_mesh_motion(BL::Object& b_ob,
 }

 CCL_NAMESPACE_END
-
--- a/intern/cycles/blender/blender_object.cpp
+++ b/intern/cycles/blender/blender_object.cpp
@@ -14,24 +14,24 @@
 * limitations under the License.
 */

-#include "camera.h"
-#include "integrator.h"
-#include "graph.h"
-#include "light.h"
-#include "mesh.h"
-#include "object.h"
-#include "scene.h"
-#include "nodes.h"
-#include "particles.h"
-#include "shader.h"
+#include "render/camera.h"
+#include "render/integrator.h"
+#include "render/graph.h"
+#include "render/light.h"
+#include "render/mesh.h"
+#include "render/object.h"
+#include "render/scene.h"
+#include "render/nodes.h"
+#include "render/particles.h"
+#include "render/shader.h"

-#include "blender_object_cull.h"
-#include "blender_sync.h"
-#include "blender_util.h"
+#include "blender/blender_object_cull.h"
+#include "blender/blender_sync.h"
+#include "blender/blender_util.h"

-#include "util_foreach.h"
-#include "util_hash.h"
-#include "util_logging.h"
+#include "util/util_foreach.h"
+#include "util/util_hash.h"
+#include "util/util_logging.h"

 CCL_NAMESPACE_BEGIN

@@ -343,6 +343,13 @@ Object *BlenderSync::sync_object(BL::Object& b_parent,
 		object_updated = true;
 	}

+	PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
+	bool is_shadow_catcher = get_boolean(cobject, "is_shadow_catcher");
+	if(is_shadow_catcher != object->is_shadow_catcher) {
+		object->is_shadow_catcher = is_shadow_catcher;
+		object_updated = true;
+	}
+
 	/* object sync
 	 * transform comparison should not be needed, but duplis don't work perfect
 	 * in the depsgraph and may not signal changes, so this is a workaround */
@@ -372,27 +379,16 @@ Object *BlenderSync::sync_object(BL::Object& b_parent,
 			}
 		}

-		/* random number */
-		object->random_id = hash_string(object->name.c_str());
-
-		if(persistent_id) {
-			for(int i = 0; i < OBJECT_PERSISTENT_ID_SIZE; i++)
-				object->random_id = hash_int_2d(object->random_id, persistent_id[i]);
-		}
-		else
-			object->random_id = hash_int_2d(object->random_id, 0);
-
-		if(b_parent.ptr.data != b_ob.ptr.data)
-			object->random_id ^= hash_int(hash_string(b_parent.name().c_str()));
-
-		/* dupli texture coordinates */
+		/* dupli texture coordinates and random_id */
 		if(b_dupli_ob) {
 			object->dupli_generated = 0.5f*get_float3(b_dupli_ob.orco()) - make_float3(0.5f, 0.5f, 0.5f);
 			object->dupli_uv = get_float2(b_dupli_ob.uv());
+			object->random_id = b_dupli_ob.random_id();
 		}
 		else {
 			object->dupli_generated = make_float3(0.0f, 0.0f, 0.0f);
 			object->dupli_uv = make_float2(0.0f, 0.0f);
+			object->random_id =  hash_int_2d(hash_string(object->name.c_str()), 0);
 		}

 		object->tag_update(scene);
@@ -482,7 +478,7 @@ static bool object_render_hide_duplis(BL::Object& b_ob)

 /* Object Loop */

-void BlenderSync::sync_objects(BL::SpaceView3D& b_v3d, float motion_time)
+void BlenderSync::sync_objects(float motion_time)
 {
 	/* layer data */
 	uint scene_layer = render_layer.scene_layer;
@@ -510,7 +506,7 @@ void BlenderSync::sync_objects(BL::SpaceView3D& b_v3d, float motion_time)
 	 * 1 : DAG_EVAL_PREVIEW
 	 * 2 : DAG_EVAL_RENDER
 	 */
-	int dupli_settings = preview ? 1 : 2;
+	int dupli_settings = (render_layer.use_viewport_visibility) ? 1 : 2;

 	bool cancel = false;
 	bool use_portal = false;
@@ -545,7 +541,7 @@ void BlenderSync::sync_objects(BL::SpaceView3D& b_v3d, float motion_time)
 					for(b_ob.dupli_list.begin(b_dup); b_dup != b_ob.dupli_list.end(); ++b_dup) {
 						Transform tfm = get_transform(b_dup->matrix());
 						BL::Object b_dup_ob = b_dup->object();
-						bool dup_hide = (b_v3d)? b_dup_ob.hide(): b_dup_ob.hide_render();
+						bool dup_hide = (render_layer.use_viewport_visibility)? b_dup_ob.hide(): b_dup_ob.hide_render();
 						bool in_dupli_group = (b_dup->type() == BL::DupliObject::type_GROUP);
 						bool hide_tris;

@@ -621,7 +617,6 @@ void BlenderSync::sync_objects(BL::SpaceView3D& b_v3d, float motion_time)
 }

 void BlenderSync::sync_motion(BL::RenderSettings& b_render,
-                              BL::SpaceView3D& b_v3d,
                              BL::Object& b_override,
                              int width, int height,
                              void **python_thread_state)
@@ -658,7 +653,7 @@ void BlenderSync::sync_motion(BL::RenderSettings& b_render,
 		b_engine.frame_set(frame, subframe);
 		python_thread_state_save(python_thread_state);
 		sync_camera_motion(b_render, b_cam, width, height, 0.0f);
-		sync_objects(b_v3d, 0.0f);
+		sync_objects(0.0f);
 	}

 	/* always sample these times for camera motion */
@@ -692,7 +687,7 @@ void BlenderSync::sync_motion(BL::RenderSettings& b_render,
 		}

 		/* sync object */
-		sync_objects(b_v3d, relative_time);
+		sync_objects(relative_time);
 	}

 	/* we need to set the python thread state again because this
--- a/intern/cycles/blender/blender_object_cull.cpp
+++ b/intern/cycles/blender/blender_object_cull.cpp
@@ -16,9 +16,9 @@

 #include <cstdlib>

-#include "camera.h"
+#include "render/camera.h"

-#include "blender_object_cull.h"
+#include "blender/blender_object_cull.h"

 CCL_NAMESPACE_BEGIN

--- a/intern/cycles/blender/blender_object_cull.h
+++ b/intern/cycles/blender/blender_object_cull.h
@@ -17,8 +17,8 @@
 #ifndef __BLENDER_OBJECT_CULL_H__
 #define __BLENDER_OBJECT_CULL_H__

-#include "blender_sync.h"
-#include "util_types.h"
+#include "blender/blender_sync.h"
+#include "util/util_types.h"

 CCL_NAMESPACE_BEGIN

--- a/intern/cycles/blender/blender_particles.cpp
+++ b/intern/cycles/blender/blender_particles.cpp
@@ -14,14 +14,14 @@
 * limitations under the License.
 */

-#include "mesh.h"
-#include "object.h"
-#include "particles.h"
+#include "render/mesh.h"
+#include "render/object.h"
+#include "render/particles.h"

-#include "blender_sync.h"
-#include "blender_util.h"
+#include "blender/blender_sync.h"
+#include "blender/blender_util.h"

-#include "util_foreach.h"
+#include "util/util_foreach.h"

 CCL_NAMESPACE_BEGIN

--- a/intern/cycles/blender/blender_python.cpp
+++ b/intern/cycles/blender/blender_python.cpp
@@ -16,21 +16,21 @@

 #include <Python.h>

-#include "CCL_api.h"
+#include "blender/CCL_api.h"

-#include "blender_sync.h"
-#include "blender_session.h"
+#include "blender/blender_sync.h"
+#include "blender/blender_session.h"

-#include "util_foreach.h"
-#include "util_logging.h"
-#include "util_md5.h"
-#include "util_opengl.h"
-#include "util_path.h"
-#include "util_string.h"
-#include "util_types.h"
+#include "util/util_foreach.h"
+#include "util/util_logging.h"
+#include "util/util_md5.h"
+#include "util/util_opengl.h"
+#include "util/util_path.h"
+#include "util/util_string.h"
+#include "util/util_types.h"

 #ifdef WITH_OSL
-#include "osl.h"
+#include "render/osl.h"

 #include <OSL/oslquery.h>
 #include <OSL/oslconfig.h>
@@ -67,8 +67,10 @@ bool debug_flags_sync_from_scene(BL::Scene b_scene)
 	flags.cpu.sse3 = get_boolean(cscene, "debug_use_cpu_sse3");
 	flags.cpu.sse2 = get_boolean(cscene, "debug_use_cpu_sse2");
 	flags.cpu.qbvh = get_boolean(cscene, "debug_use_qbvh");
+	flags.cpu.split_kernel = get_boolean(cscene, "debug_use_cpu_split_kernel");
 	/* Synchronize CUDA flags. */
 	flags.cuda.adaptive_compile = get_boolean(cscene, "debug_use_cuda_adaptive_compile");
+	flags.cuda.split_kernel = get_boolean(cscene, "debug_use_cuda_split_kernel");
 	/* Synchronize OpenCL kernel type. */
 	switch(get_enum(cscene, "debug_opencl_kernel_type")) {
 		case 0:
@@ -104,6 +106,7 @@ bool debug_flags_sync_from_scene(BL::Scene b_scene)
 	}
 	/* Synchronize other OpenCL flags. */
 	flags.opencl.debug = get_boolean(cscene, "debug_use_opencl_debug");
+	flags.opencl.single_program = get_boolean(cscene, "debug_opencl_kernel_single_program");
 	return flags.opencl.device_type != opencl_device_type ||
 	       flags.opencl.kernel_type != opencl_kernel_type;
 }
@@ -641,7 +644,7 @@ static PyObject *debug_flags_reset_func(PyObject * /*self*/, PyObject * /*args*/
 	Py_RETURN_NONE;
 }

-static PyObject *set_resumable_chunks_func(PyObject * /*self*/, PyObject *args)
+static PyObject *set_resumable_chunk_func(PyObject * /*self*/, PyObject *args)
 {
 	int num_resumable_chunks, current_resumable_chunk;
 	if(!PyArg_ParseTuple(args, "ii",
@@ -676,6 +679,53 @@ static PyObject *set_resumable_chunks_func(PyObject * /*self*/, PyObject *args)
 	Py_RETURN_NONE;
 }

+static PyObject *set_resumable_chunk_range_func(PyObject * /*self*/, PyObject *args)
+{
+	int num_chunks, start_chunk, end_chunk;
+	if(!PyArg_ParseTuple(args, "iii",
+	                     &num_chunks,
+	                     &start_chunk,
+	                     &end_chunk)) {
+		Py_RETURN_NONE;
+	}
+
+	if(num_chunks <= 0) {
+		fprintf(stderr, "Cycles: Bad value for number of resumable chunks.\n");
+		abort();
+		Py_RETURN_NONE;
+	}
+	if(start_chunk < 1 || start_chunk > num_chunks) {
+		fprintf(stderr, "Cycles: Bad value for start chunk number.\n");
+		abort();
+		Py_RETURN_NONE;
+	}
+	if(end_chunk < 1 || end_chunk > num_chunks) {
+		fprintf(stderr, "Cycles: Bad value for start chunk number.\n");
+		abort();
+		Py_RETURN_NONE;
+	}
+	if(start_chunk > end_chunk) {
+		fprintf(stderr, "Cycles: End chunk should be higher than start one.\n");
+		abort();
+		Py_RETURN_NONE;
+	}
+
+	VLOG(1) << "Initialized resumable render: "
+	        << "num_resumable_chunks=" << num_chunks << ", "
+	        << "start_resumable_chunk=" << start_chunk
+	        << "end_resumable_chunk=" << end_chunk;
+	BlenderSession::num_resumable_chunks = num_chunks;
+	BlenderSession::start_resumable_chunk = start_chunk;
+	BlenderSession::end_resumable_chunk = end_chunk;
+
+	printf("Cycles: Will render chunks %d to %d of %d\n",
+	       start_chunk,
+	       end_chunk,
+	       num_chunks);
+
+	Py_RETURN_NONE;
+}
+
 static PyObject *get_device_types_func(PyObject * /*self*/, PyObject * /*args*/)
 {
 	vector<DeviceInfo>& devices = Device::available_devices();
@@ -715,7 +765,8 @@ static PyMethodDef methods[] = {
 	{"debug_flags_reset", debug_flags_reset_func, METH_NOARGS, ""},

 	/* Resumable render */
-	{"set_resumable_chunks", set_resumable_chunks_func, METH_VARARGS, ""},
+	{"set_resumable_chunk", set_resumable_chunk_func, METH_VARARGS, ""},
+	{"set_resumable_chunk_range", set_resumable_chunk_range_func, METH_VARARGS, ""},

 	/* Compute Device selection */
 	{"get_device_types", get_device_types_func, METH_VARARGS, ""},
@@ -760,6 +811,14 @@ void *CCL_python_module_init()
 	PyModule_AddStringConstant(mod, "osl_version_string", "unknown");
 #endif

+#ifdef WITH_CYCLES_DEBUG
+	PyModule_AddObject(mod, "with_cycles_debug", Py_True);
+	Py_INCREF(Py_True);
+#else
+	PyModule_AddObject(mod, "with_cycles_debug", Py_False);
+	Py_INCREF(Py_False);
+#endif
+
 #ifdef WITH_NETWORK
 	PyModule_AddObject(mod, "with_network", Py_True);
 	Py_INCREF(Py_True);
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -16,36 +16,38 @@

 #include <stdlib.h>

-#include "background.h"
-#include "buffers.h"
-#include "camera.h"
-#include "device.h"
-#include "integrator.h"
-#include "film.h"
-#include "light.h"
-#include "mesh.h"
-#include "object.h"
-#include "scene.h"
-#include "session.h"
-#include "shader.h"
+#include "render/background.h"
+#include "render/buffers.h"
+#include "render/camera.h"
+#include "device/device.h"
+#include "render/integrator.h"
+#include "render/film.h"
+#include "render/light.h"
+#include "render/mesh.h"
+#include "render/object.h"
+#include "render/scene.h"
+#include "render/session.h"
+#include "render/shader.h"

-#include "util_color.h"
-#include "util_foreach.h"
-#include "util_function.h"
-#include "util_hash.h"
-#include "util_logging.h"
-#include "util_progress.h"
-#include "util_time.h"
+#include "util/util_color.h"
+#include "util/util_foreach.h"
+#include "util/util_function.h"
+#include "util/util_hash.h"
+#include "util/util_logging.h"
+#include "util/util_progress.h"
+#include "util/util_time.h"

-#include "blender_sync.h"
-#include "blender_session.h"
-#include "blender_util.h"
+#include "blender/blender_sync.h"
+#include "blender/blender_session.h"
+#include "blender/blender_util.h"

 CCL_NAMESPACE_BEGIN

 bool BlenderSession::headless = false;
 int BlenderSession::num_resumable_chunks = 0;
 int BlenderSession::current_resumable_chunk = 0;
+int BlenderSession::start_resumable_chunk = 0;
+int BlenderSession::end_resumable_chunk = 0;

 BlenderSession::BlenderSession(BL::RenderEngine& b_engine,
                               BL::UserPreferences& b_userpref,
@@ -68,6 +70,7 @@ BlenderSession::BlenderSession(BL::RenderEngine& b_engine,
 	background = true;
 	last_redraw_time = 0.0;
 	start_resize_time = 0.0;
+	last_status_time = 0.0;
 }

 BlenderSession::BlenderSession(BL::RenderEngine& b_engine,
@@ -93,6 +96,7 @@ BlenderSession::BlenderSession(BL::RenderEngine& b_engine,
 	background = false;
 	last_redraw_time = 0.0;
 	start_resize_time = 0.0;
+	last_status_time = 0.0;
 }

 BlenderSession::~BlenderSession()
@@ -239,90 +243,6 @@ void BlenderSession::free_session()
 	delete session;
 }

-static PassType get_pass_type(BL::RenderPass& b_pass)
-{
-	switch(b_pass.type()) {
-		case BL::RenderPass::type_COMBINED:
-			return PASS_COMBINED;
-
-		case BL::RenderPass::type_Z:
-			return PASS_DEPTH;
-		case BL::RenderPass::type_MIST:
-			return PASS_MIST;
-		case BL::RenderPass::type_NORMAL:
-			return PASS_NORMAL;
-		case BL::RenderPass::type_OBJECT_INDEX:
-			return PASS_OBJECT_ID;
-		case BL::RenderPass::type_UV:
-			return PASS_UV;
-		case BL::RenderPass::type_VECTOR:
-			return PASS_MOTION;
-		case BL::RenderPass::type_MATERIAL_INDEX:
-			return PASS_MATERIAL_ID;
-
-		case BL::RenderPass::type_DIFFUSE_DIRECT:
-			return PASS_DIFFUSE_DIRECT;
-		case BL::RenderPass::type_GLOSSY_DIRECT:
-			return PASS_GLOSSY_DIRECT;
-		case BL::RenderPass::type_TRANSMISSION_DIRECT:
-			return PASS_TRANSMISSION_DIRECT;
-		case BL::RenderPass::type_SUBSURFACE_DIRECT:
-			return PASS_SUBSURFACE_DIRECT;
-
-		case BL::RenderPass::type_DIFFUSE_INDIRECT:
-			return PASS_DIFFUSE_INDIRECT;
-		case BL::RenderPass::type_GLOSSY_INDIRECT:
-			return PASS_GLOSSY_INDIRECT;
-		case BL::RenderPass::type_TRANSMISSION_INDIRECT:
-			return PASS_TRANSMISSION_INDIRECT;
-		case BL::RenderPass::type_SUBSURFACE_INDIRECT:
-			return PASS_SUBSURFACE_INDIRECT;
-
-		case BL::RenderPass::type_DIFFUSE_COLOR:
-			return PASS_DIFFUSE_COLOR;
-		case BL::RenderPass::type_GLOSSY_COLOR:
-			return PASS_GLOSSY_COLOR;
-		case BL::RenderPass::type_TRANSMISSION_COLOR:
-			return PASS_TRANSMISSION_COLOR;
-		case BL::RenderPass::type_SUBSURFACE_COLOR:
-			return PASS_SUBSURFACE_COLOR;
-
-		case BL::RenderPass::type_EMIT:
-			return PASS_EMISSION;
-		case BL::RenderPass::type_ENVIRONMENT:
-			return PASS_BACKGROUND;
-		case BL::RenderPass::type_AO:
-			return PASS_AO;
-		case BL::RenderPass::type_SHADOW:
-			return PASS_SHADOW;
-
-		case BL::RenderPass::type_DIFFUSE:
-		case BL::RenderPass::type_COLOR:
-		case BL::RenderPass::type_REFRACTION:
-		case BL::RenderPass::type_SPECULAR:
-		case BL::RenderPass::type_REFLECTION:
-			return PASS_NONE;
-#ifdef WITH_CYCLES_DEBUG
-		case BL::RenderPass::type_DEBUG:
-		{
-			switch(b_pass.debug_type()) {
-				case BL::RenderPass::debug_type_BVH_TRAVERSED_NODES:
-					return PASS_BVH_TRAVERSED_NODES;
-				case BL::RenderPass::debug_type_BVH_TRAVERSED_INSTANCES:
-					return PASS_BVH_TRAVERSED_INSTANCES;
-				case BL::RenderPass::debug_type_BVH_INTERSECTIONS:
-					return PASS_BVH_INTERSECTIONS;
-				case BL::RenderPass::debug_type_RAY_BOUNCES:
-					return PASS_RAY_BOUNCES;
-			}
-			break;
-		}
-#endif
-	}
-	
-	return PASS_NONE;
-}
-
 static ShaderEvalType get_shader_type(const string& pass_type)
 {
 	const char *shader_type = pass_type.c_str();
@@ -379,12 +299,13 @@ static BL::RenderResult begin_render_result(BL::RenderEngine& b_engine,
 static void end_render_result(BL::RenderEngine& b_engine,
                              BL::RenderResult& b_rr,
                              bool cancel,
+                              bool highlight,
                              bool do_merge_results)
 {
-	b_engine.end_result(b_rr, (int)cancel, (int)do_merge_results);
+	b_engine.end_result(b_rr, (int)cancel, (int) highlight, (int)do_merge_results);
 }

-void BlenderSession::do_write_update_render_tile(RenderTile& rtile, bool do_update_only)
+void BlenderSession::do_write_update_render_tile(RenderTile& rtile, bool do_update_only, bool highlight)
 {
 	BufferParams& params = rtile.buffers->params;
 	int x = params.full_x - session->tile_manager.params.full_x;
@@ -420,37 +341,37 @@ void BlenderSession::do_write_update_render_tile(RenderTile& rtile, bool do_upda
 			update_render_result(b_rr, b_rlay, rtile);
 		}

-		end_render_result(b_engine, b_rr, true, true);
+		end_render_result(b_engine, b_rr, true, highlight, true);
 	}
 	else {
 		/* write result */
 		write_render_result(b_rr, b_rlay, rtile);
-		end_render_result(b_engine, b_rr, false, true);
+		end_render_result(b_engine, b_rr, false, false, true);
 	}
 }

 void BlenderSession::write_render_tile(RenderTile& rtile)
 {
-	do_write_update_render_tile(rtile, false);
+	do_write_update_render_tile(rtile, false, false);
 }

-void BlenderSession::update_render_tile(RenderTile& rtile)
+void BlenderSession::update_render_tile(RenderTile& rtile, bool highlight)
 {
 	/* use final write for preview renders, otherwise render result wouldn't be
 	 * be updated in blender side
 	 * would need to be investigated a bit further, but for now shall be fine
 	 */
 	if(!b_engine.is_preview())
-		do_write_update_render_tile(rtile, true);
+		do_write_update_render_tile(rtile, true, highlight);
 	else
-		do_write_update_render_tile(rtile, false);
+		do_write_update_render_tile(rtile, false, false);
 }

 void BlenderSession::render()
 {
 	/* set callback to write out render results */
 	session->write_render_tile_cb = function_bind(&BlenderSession::write_render_tile, this, _1);
-	session->update_render_tile_cb = function_bind(&BlenderSession::update_render_tile, this, _1);
+	session->update_render_tile_cb = function_bind(&BlenderSession::update_render_tile, this, _1, _2);

 	/* get buffer parameters */
 	SessionParams session_params = BlenderSync::get_session_params(b_engine, b_userpref, b_scene, background);
@@ -471,7 +392,7 @@ void BlenderSession::render()

 		/* layer will be missing if it was disabled in the UI */
 		if(b_single_rlay == b_rr.layers.end()) {
-			end_render_result(b_engine, b_rr, true, false);
+			end_render_result(b_engine, b_rr, true, true, false);
 			continue;
 		}

@@ -479,25 +400,36 @@ void BlenderSession::render()

 		/* add passes */
 		array<Pass> passes;
-		Pass::add(PASS_COMBINED, passes);
-
 		if(session_params.device.advanced_shading) {
-
-			/* loop over passes */
-			BL::RenderLayer::passes_iterator b_pass_iter;
-
-			for(b_rlay.passes.begin(b_pass_iter); b_pass_iter != b_rlay.passes.end(); ++b_pass_iter) {
-				BL::RenderPass b_pass(*b_pass_iter);
-				PassType pass_type = get_pass_type(b_pass);
-
-				if(pass_type == PASS_MOTION && scene->integrator->motion_blur)
-					continue;
-				if(pass_type != PASS_NONE)
-					Pass::add(pass_type, passes);
-			}
+			passes = sync->sync_render_passes(b_rlay, *b_layer_iter);
+		}
+		else {
+			Pass::add(PASS_COMBINED, passes);
 		}

 		buffer_params.passes = passes;
+
+		PointerRNA crl = RNA_pointer_get(&b_layer_iter->ptr, "cycles");
+		buffer_params.denoising_data_pass = get_boolean(crl, "use_denoising");
+		session->tile_manager.schedule_denoising = get_boolean(crl, "use_denoising");
+		session->params.use_denoising = get_boolean(crl, "use_denoising");
+		scene->film->denoising_data_pass = buffer_params.denoising_data_pass;
+		scene->film->denoising_flags = 0;
+		if(!get_boolean(crl, "denoising_diffuse_direct"))        scene->film->denoising_flags |= DENOISING_CLEAN_DIFFUSE_DIR;
+		if(!get_boolean(crl, "denoising_diffuse_indirect"))      scene->film->denoising_flags |= DENOISING_CLEAN_DIFFUSE_IND;
+		if(!get_boolean(crl, "denoising_glossy_direct"))         scene->film->denoising_flags |= DENOISING_CLEAN_GLOSSY_DIR;
+		if(!get_boolean(crl, "denoising_glossy_indirect"))       scene->film->denoising_flags |= DENOISING_CLEAN_GLOSSY_IND;
+		if(!get_boolean(crl, "denoising_transmission_direct"))   scene->film->denoising_flags |= DENOISING_CLEAN_TRANSMISSION_DIR;
+		if(!get_boolean(crl, "denoising_transmission_indirect")) scene->film->denoising_flags |= DENOISING_CLEAN_TRANSMISSION_IND;
+		if(!get_boolean(crl, "denoising_subsurface_direct"))     scene->film->denoising_flags |= DENOISING_CLEAN_SUBSURFACE_DIR;
+		if(!get_boolean(crl, "denoising_subsurface_indirect"))   scene->film->denoising_flags |= DENOISING_CLEAN_SUBSURFACE_IND;
+		scene->film->denoising_clean_pass = (scene->film->denoising_flags & DENOISING_CLEAN_ALL_PASSES);
+		buffer_params.denoising_clean_pass = scene->film->denoising_clean_pass;
+		session->params.denoising_radius = get_int(crl, "denoising_radius");
+		session->params.denoising_strength = get_float(crl, "denoising_strength");
+		session->params.denoising_feature_strength = get_float(crl, "denoising_feature_strength");
+		session->params.denoising_relative_pca = get_boolean(crl, "denoising_relative_pca");
+
 		scene->film->pass_alpha_threshold = b_layer_iter->pass_alpha_threshold();
 		scene->film->tag_passes_update(scene, passes);
 		scene->film->tag_update(scene);
@@ -551,7 +483,7 @@ void BlenderSession::render()
 		}

 		/* free result without merging */
-		end_render_result(b_engine, b_rr, true, false);
+		end_render_result(b_engine, b_rr, true, true, false);

 		if(session->progress.get_cancel())
 			break;
@@ -749,19 +681,31 @@ void BlenderSession::do_write_update_render_result(BL::RenderResult& b_rr,
 			BL::RenderPass b_pass(*b_iter);

 			/* find matching pass type */
-			PassType pass_type = get_pass_type(b_pass);
+			PassType pass_type = BlenderSync::get_pass_type(b_pass);
 			int components = b_pass.channels();

-			/* copy pixels */
-			if(!buffers->get_pass_rect(pass_type, exposure, sample, components, &pixels[0]))
+			bool read = false;
+			if(pass_type != PASS_NONE) {
+				/* copy pixels */
+				read = buffers->get_pass_rect(pass_type, exposure, sample, components, &pixels[0]);
+			}
+			else {
+				int denoising_offset = BlenderSync::get_denoising_pass(b_pass);
+				if(denoising_offset >= 0) {
+					read = buffers->get_denoising_pass_rect(denoising_offset, exposure, sample, components, &pixels[0]);
+				}
+			}
+
+			if(!read) {
 				memset(&pixels[0], 0, pixels.size()*sizeof(float));
+			}

 			b_pass.rect(&pixels[0]);
 		}
 	}
 	else {
 		/* copy combined pass */
-		BL::RenderPass b_combined_pass(b_rlay.passes.find_by_type(BL::RenderPass::type_COMBINED, b_rview_name.c_str()));
+		BL::RenderPass b_combined_pass(b_rlay.passes.find_by_name("Combined", b_rview_name.c_str()));
 		if(buffers->get_pass_rect(PASS_COMBINED, exposure, sample, 4, &pixels[0]))
 			b_combined_pass.rect(&pixels[0]);
 	}
@@ -989,10 +933,14 @@ void BlenderSession::update_status_progress()
 	if(substatus.size() > 0)
 		status += " | " + substatus;

-	if(status != last_status) {
+	double current_time = time_dt();
+	/* When rendering in a window, redraw the status at least once per second to keep the elapsed and remaining time up-to-date.
+	 * For headless rendering, only report when something significant changes to keep the console output readable. */
+	if(status != last_status || (!headless && (current_time - last_status_time) > 1.0)) {
 		b_engine.update_stats("", (timestatus + scene + status).c_str());
 		b_engine.update_memory_stats(mem_used, mem_peak);
 		last_status = status;
+		last_status_time = current_time;
 	}
 	if(progress != last_progress) {
 		b_engine.update_progress(progress);
@@ -1342,9 +1290,21 @@ void BlenderSession::update_resumable_tile_manager(int num_samples)
 		return;
 	}

-	int num_samples_per_chunk = (int)ceilf((float)num_samples / num_resumable_chunks);
-	int range_start_sample = num_samples_per_chunk * (current_resumable_chunk - 1);
-	int range_num_samples = num_samples_per_chunk;
+	const int num_samples_per_chunk = (int)ceilf((float)num_samples / num_resumable_chunks);
+
+	int range_start_sample, range_num_samples;
+	if(current_resumable_chunk != 0) {
+		/* Single chunk rendering. */
+		range_start_sample = num_samples_per_chunk * (current_resumable_chunk - 1);
+		range_num_samples = num_samples_per_chunk;
+	}
+	else {
+		/* Ranged-chunks. */
+		const int num_chunks = end_resumable_chunk - start_resumable_chunk + 1;
+		range_start_sample = num_samples_per_chunk * (start_resumable_chunk - 1);
+		range_num_samples = num_chunks * num_samples_per_chunk;
+	}
+	/* Make sure we don't overshoot. */
 	if(range_start_sample + range_num_samples > num_samples) {
 		range_num_samples = num_samples - range_num_samples;
 	}
@@ -1352,6 +1312,9 @@ void BlenderSession::update_resumable_tile_manager(int num_samples)
 	VLOG(1) << "Samples range start is " << range_start_sample << ", "
 	        << "number of samples to render is " << range_num_samples;

+	scene->integrator->start_sample = range_start_sample;
+	scene->integrator->tag_update(scene);
+
 	session->tile_manager.range_start_sample = range_start_sample;
 	session->tile_manager.range_num_samples = range_num_samples;
 }
--- a/intern/cycles/blender/blender_session.h
+++ b/intern/cycles/blender/blender_session.h
@@ -17,12 +17,12 @@
 #ifndef __BLENDER_SESSION_H__
 #define __BLENDER_SESSION_H__

-#include "device.h"
-#include "scene.h"
-#include "session.h"
-#include "bake.h"
+#include "device/device.h"
+#include "render/scene.h"
+#include "render/session.h"
+#include "render/bake.h"

-#include "util_vector.h"
+#include "util/util_vector.h"

 CCL_NAMESPACE_BEGIN

@@ -79,7 +79,7 @@ public:
 	void update_render_result(BL::RenderResult& b_rr,
 	                          BL::RenderLayer& b_rlay,
 	                          RenderTile& rtile);
-	void update_render_tile(RenderTile& rtile);
+	void update_render_tile(RenderTile& rtile, bool highlight);

 	/* interactive updates */
 	void synchronize();
@@ -113,6 +113,7 @@ public:
 	string last_status;
 	string last_error;
 	float last_progress;
+	double last_status_time;

 	int width, height;
 	double start_resize_time;
@@ -137,12 +138,16 @@ public:
 	/* Current resumable chunk index to render. */
 	static int current_resumable_chunk;

+	/* Alternative to single-chunk rendering to render a range of chunks. */
+	static int start_resumable_chunk;
+	static int end_resumable_chunk;
+
 protected:
 	void do_write_update_render_result(BL::RenderResult& b_rr,
 	                                   BL::RenderLayer& b_rlay,
 	                                   RenderTile& rtile,
 	                                   bool do_update_only);
-	void do_write_update_render_tile(RenderTile& rtile, bool do_update_only);
+	void do_write_update_render_tile(RenderTile& rtile, bool do_update_only, bool highlight);

 	int builtin_image_frame(const string &builtin_name);
 	void builtin_image_info(const string &builtin_name,
--- a/intern/cycles/blender/blender_shader.cpp
+++ b/intern/cycles/blender/blender_shader.cpp
@@ -14,20 +14,23 @@
 * limitations under the License.
 */

-#include "background.h"
-#include "graph.h"
-#include "light.h"
-#include "nodes.h"
-#include "osl.h"
-#include "scene.h"
-#include "shader.h"
+#include "render/background.h"
+#include "render/graph.h"
+#include "render/light.h"
+#include "render/nodes.h"
+#include "render/osl.h"
+#include "render/scene.h"
+#include "render/shader.h"

-#include "blender_texture.h"
-#include "blender_sync.h"
-#include "blender_util.h"
+#include "blender/blender_texture.h"
+#include "blender/blender_sync.h"
+#include "blender/blender_util.h"

-#include "util_debug.h"
-#include "util_string.h"
+#include "util/util_debug.h"
+#include "util/util_foreach.h"
+#include "util/util_string.h"
+#include "util/util_set.h"
+#include "util/util_task.h"

 CCL_NAMESPACE_BEGIN

@@ -518,6 +521,19 @@ static ShaderNode *add_node(Scene *scene,
 		}
 		node = hair;
 	}
+	else if(b_node.is_a(&RNA_ShaderNodeBsdfPrincipled)) {
+		BL::ShaderNodeBsdfPrincipled b_principled_node(b_node);
+		PrincipledBsdfNode *principled = new PrincipledBsdfNode();
+		switch (b_principled_node.distribution()) {
+			case BL::ShaderNodeBsdfPrincipled::distribution_GGX:
+				principled->distribution = CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID;
+				break;
+			case BL::ShaderNodeBsdfPrincipled::distribution_MULTI_GGX:
+				principled->distribution = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID;
+				break;
+		}
+		node = principled;
+	}
 	else if(b_node.is_a(&RNA_ShaderNodeBsdfTranslucent)) {
 		node = new TranslucentBsdfNode();
 	}
@@ -609,7 +625,8 @@ static ShaderNode *add_node(Scene *scene,
 			bool is_builtin = b_image.packed_file() ||
 			                  b_image.source() == BL::Image::source_GENERATED ||
 			                  b_image.source() == BL::Image::source_MOVIE ||
-			                  b_engine.is_preview();
+			                  (b_engine.is_preview() &&
+			                   b_image.source() != BL::Image::source_SEQUENCE);

 			if(is_builtin) {
 				/* for builtin images we're using image datablock name to find an image to
@@ -640,7 +657,8 @@ static ShaderNode *add_node(Scene *scene,
 				        image->filename.string(),
 				        image->builtin_data,
 				        get_image_interpolation(b_image_node),
-				        get_image_extension(b_image_node));
+				        get_image_extension(b_image_node),
+				        image->use_alpha);
 			}
 		}
 		image->color_space = (NodeImageColorSpace)b_image_node.color_space();
@@ -661,7 +679,8 @@ static ShaderNode *add_node(Scene *scene,
 			bool is_builtin = b_image.packed_file() ||
 			                  b_image.source() == BL::Image::source_GENERATED ||
 			                  b_image.source() == BL::Image::source_MOVIE ||
-			                  b_engine.is_preview();
+			                  (b_engine.is_preview() &&
+			                   b_image.source() != BL::Image::source_SEQUENCE);

 			if(is_builtin) {
 				int scene_frame = b_scene.frame_current();
@@ -686,7 +705,8 @@ static ShaderNode *add_node(Scene *scene,
 				        env->filename.string(),
 				        env->builtin_data,
 				        get_image_interpolation(b_env_node),
-				        EXTENSION_REPEAT);
+				        EXTENSION_REPEAT,
+				        env->use_alpha);
 			}
 		}
 		env->color_space = (NodeImageColorSpace)b_env_node.color_space();
@@ -823,7 +843,8 @@ static ShaderNode *add_node(Scene *scene,
 			        point_density->filename.string(),
 			        point_density->builtin_data,
 			        point_density->interpolation,
-			        EXTENSION_CLIP);
+			        EXTENSION_CLIP,
+			        true);
 		}
 		node = point_density;

@@ -1159,6 +1180,9 @@ void BlenderSync::sync_materials(bool update_all)
 	/* material loop */
 	BL::BlendData::materials_iterator b_mat;

+	TaskPool pool;
+	set<Shader*> updated_shaders;
+
 	for(b_data.materials.begin(b_mat); b_mat != b_data.materials.end(); ++b_mat) {
 		Shader *shader;

@@ -1194,9 +1218,37 @@ void BlenderSync::sync_materials(bool update_all)
 			shader->displacement_method = (experimental) ? get_displacement_method(cmat) : DISPLACE_BUMP;

 			shader->set_graph(graph);
-			shader->tag_update(scene);
+
+			/* By simplifying the shader graph as soon as possible, some
+			 * redundant shader nodes might be removed which prevents loading
+			 * unnecessary attributes later.
+			 *
+			 * However, since graph simplification also accounts for e.g. mix
+			 * weight, this would cause frequent expensive resyncs in interactive
+			 * sessions, so for those sessions optimization is only performed
+			 * right before compiling.
+			 */
+			if(!preview) {
+				pool.push(function_bind(&ShaderGraph::simplify, graph, scene));
+				/* NOTE: Update shaders out of the threads since those routines
+				 * are accessing and writing to a global context.
+				 */
+				updated_shaders.insert(shader);
+			}
+			else {
+				/* NOTE: Update tagging can access links which are being
+				 * optimized out.
+				 */
+				shader->tag_update(scene);
+			}
 		}
 	}
+
+	pool.wait_work();
+
+	foreach(Shader *shader, updated_shaders) {
+		shader->tag_update(scene);
+	}
 }

 /* Sync World */
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -14,29 +14,29 @@
 * limitations under the License.
 */

-#include "background.h"
-#include "camera.h"
-#include "film.h"
-#include "graph.h"
-#include "integrator.h"
-#include "light.h"
-#include "mesh.h"
-#include "nodes.h"
-#include "object.h"
-#include "scene.h"
-#include "shader.h"
-#include "curves.h"
+#include "render/background.h"
+#include "render/camera.h"
+#include "render/film.h"
+#include "render/graph.h"
+#include "render/integrator.h"
+#include "render/light.h"
+#include "render/mesh.h"
+#include "render/nodes.h"
+#include "render/object.h"
+#include "render/scene.h"
+#include "render/shader.h"
+#include "render/curves.h"

-#include "device.h"
+#include "device/device.h"

-#include "blender_sync.h"
-#include "blender_session.h"
-#include "blender_util.h"
+#include "blender/blender_sync.h"
+#include "blender/blender_session.h"
+#include "blender/blender_util.h"

-#include "util_debug.h"
-#include "util_foreach.h"
-#include "util_opengl.h"
-#include "util_hash.h"
+#include "util/util_debug.h"
+#include "util/util_foreach.h"
+#include "util/util_opengl.h"
+#include "util/util_hash.h"

 CCL_NAMESPACE_BEGIN

@@ -210,10 +210,9 @@ void BlenderSync::sync_data(BL::RenderSettings& b_render,
 	   scene->need_motion() == Scene::MOTION_NONE ||
 	   scene->camera->motion_position == Camera::MOTION_POSITION_CENTER)
 	{
-		sync_objects(b_v3d);
+		sync_objects();
 	}
 	sync_motion(b_render,
-	            b_v3d,
 	            b_override,
 	            width, height,
 	            python_thread_state);
@@ -322,6 +321,15 @@ void BlenderSync::sync_integrator()
 		integrator->volume_samples = volume_samples;
 	}

+	if(b_scene.render().use_simplify()) {
+		if(preview) {
+			integrator->ao_bounces = get_int(cscene, "ao_bounces");
+		}
+		else {
+			integrator->ao_bounces = get_int(cscene, "ao_bounces_render");
+		}
+	}
+
 	if(integrator->modified(previntegrator))
 		integrator->tag_update(scene);
 }
@@ -471,6 +479,132 @@ void BlenderSync::sync_images()
 	}
 }

+/* Passes */
+PassType BlenderSync::get_pass_type(BL::RenderPass& b_pass)
+{
+	string name = b_pass.name();
+#define MAP_PASS(passname, passtype) if(name == passname) return passtype;
+	/* NOTE: Keep in sync with defined names from DNA_scene_types.h */
+	MAP_PASS("Combined", PASS_COMBINED);
+	MAP_PASS("Depth", PASS_DEPTH);
+	MAP_PASS("Mist", PASS_MIST);
+	MAP_PASS("Normal", PASS_NORMAL);
+	MAP_PASS("IndexOB", PASS_OBJECT_ID);
+	MAP_PASS("UV", PASS_UV);
+	MAP_PASS("Vector", PASS_MOTION);
+	MAP_PASS("IndexMA", PASS_MATERIAL_ID);
+
+	MAP_PASS("DiffDir", PASS_DIFFUSE_DIRECT);
+	MAP_PASS("GlossDir", PASS_GLOSSY_DIRECT);
+	MAP_PASS("TransDir", PASS_TRANSMISSION_DIRECT);
+	MAP_PASS("SubsurfaceDir", PASS_SUBSURFACE_DIRECT);
+
+	MAP_PASS("DiffInd", PASS_DIFFUSE_INDIRECT);
+	MAP_PASS("GlossInd", PASS_GLOSSY_INDIRECT);
+	MAP_PASS("TransInd", PASS_TRANSMISSION_INDIRECT);
+	MAP_PASS("SubsurfaceInd", PASS_SUBSURFACE_INDIRECT);
+
+	MAP_PASS("DiffCol", PASS_DIFFUSE_COLOR);
+	MAP_PASS("GlossCol", PASS_GLOSSY_COLOR);
+	MAP_PASS("TransCol", PASS_TRANSMISSION_COLOR);
+	MAP_PASS("SubsurfaceCol", PASS_SUBSURFACE_COLOR);
+
+	MAP_PASS("Emit", PASS_EMISSION);
+	MAP_PASS("Env", PASS_BACKGROUND);
+	MAP_PASS("AO", PASS_AO);
+	MAP_PASS("Shadow", PASS_SHADOW);
+
+#ifdef __KERNEL_DEBUG__
+	MAP_PASS("Debug BVH Traversed Nodes", PASS_BVH_TRAVERSED_NODES);
+	MAP_PASS("Debug BVH Traversed Instances", PASS_BVH_TRAVERSED_INSTANCES);
+	MAP_PASS("Debug BVH Intersections", PASS_BVH_INTERSECTIONS);
+	MAP_PASS("Debug Ray Bounces", PASS_RAY_BOUNCES);
+#endif
+#undef MAP_PASS
+
+	return PASS_NONE;
+}
+
+int BlenderSync::get_denoising_pass(BL::RenderPass& b_pass)
+{
+	string name = b_pass.name();
+	if(name.substr(0, 10) != "Denoising ") {
+		return -1;
+	}
+	name = name.substr(10);
+
+#define MAP_PASS(passname, offset) if(name == passname) return offset;
+	MAP_PASS("Normal", DENOISING_PASS_NORMAL);
+	MAP_PASS("Normal Variance", DENOISING_PASS_NORMAL_VAR);
+	MAP_PASS("Albedo", DENOISING_PASS_ALBEDO);
+	MAP_PASS("Albedo Variance", DENOISING_PASS_ALBEDO_VAR);
+	MAP_PASS("Depth", DENOISING_PASS_DEPTH);
+	MAP_PASS("Depth Variance", DENOISING_PASS_DEPTH_VAR);
+	MAP_PASS("Shadow A", DENOISING_PASS_SHADOW_A);
+	MAP_PASS("Shadow B", DENOISING_PASS_SHADOW_B);
+	MAP_PASS("Image", DENOISING_PASS_COLOR);
+	MAP_PASS("Image Variance", DENOISING_PASS_COLOR_VAR);
+#undef MAP_PASS
+
+	return -1;
+}
+
+array<Pass> BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay,
+                                            BL::SceneRenderLayer& b_srlay)
+{
+	array<Pass> passes;
+	Pass::add(PASS_COMBINED, passes);
+
+	/* loop over passes */
+	BL::RenderLayer::passes_iterator b_pass_iter;
+
+	for(b_rlay.passes.begin(b_pass_iter); b_pass_iter != b_rlay.passes.end(); ++b_pass_iter) {
+		BL::RenderPass b_pass(*b_pass_iter);
+		PassType pass_type = get_pass_type(b_pass);
+
+		if(pass_type == PASS_MOTION && scene->integrator->motion_blur)
+			continue;
+		if(pass_type != PASS_NONE)
+			Pass::add(pass_type, passes);
+	}
+
+	PointerRNA crp = RNA_pointer_get(&b_srlay.ptr, "cycles");
+	if(get_boolean(crp, "denoising_store_passes")) {
+		b_engine.add_pass("Denoising Normal",          3, "XYZ", b_srlay.name().c_str());
+		b_engine.add_pass("Denoising Normal Variance", 3, "XYZ", b_srlay.name().c_str());
+		b_engine.add_pass("Denoising Albedo",          3, "RGB", b_srlay.name().c_str());
+		b_engine.add_pass("Denoising Albedo Variance", 3, "RGB", b_srlay.name().c_str());
+		b_engine.add_pass("Denoising Depth",           1, "Z",   b_srlay.name().c_str());
+		b_engine.add_pass("Denoising Depth Variance",  1, "Z",   b_srlay.name().c_str());
+		b_engine.add_pass("Denoising Shadow A",        3, "XYV", b_srlay.name().c_str());
+		b_engine.add_pass("Denoising Shadow B",        3, "XYV", b_srlay.name().c_str());
+		b_engine.add_pass("Denoising Image",           3, "RGB", b_srlay.name().c_str());
+		b_engine.add_pass("Denoising Image Variance",  3, "RGB", b_srlay.name().c_str());
+	}
+#ifdef __KERNEL_DEBUG__
+	if(get_boolean(crp, "pass_debug_bvh_traversed_nodes")) {
+		b_engine.add_pass("Debug BVH Traversed Nodes", 1, "X", b_srlay.name().c_str());
+		Pass::add(PASS_BVH_TRAVERSED_NODES, passes);
+	}
+	if(get_boolean(crp, "pass_debug_bvh_traversed_instances")) {
+		b_engine.add_pass("Debug BVH Traversed Instances", 1, "X", b_srlay.name().c_str());
+		Pass::add(PASS_BVH_TRAVERSED_INSTANCES, passes);
+	}
+	if(get_boolean(crp, "pass_debug_bvh_intersections")) {
+		b_engine.add_pass("Debug BVH Intersections", 1, "X", b_srlay.name().c_str());
+		Pass::add(PASS_BVH_INTERSECTIONS, passes);
+	}
+	if(get_boolean(crp, "pass_debug_ray_bounces")) {
+		b_engine.add_pass("Debug Ray Bounces", 1, "X", b_srlay.name().c_str());
+		Pass::add(PASS_RAY_BOUNCES, passes);
+	}
+#else
+	(void) b_srlay;  /* Ignored. */
+#endif
+
+	return passes;
+}
+
 /* Scene Parameters */

 SceneParams BlenderSync::get_scene_params(BL::Scene& b_scene,
--- a/intern/cycles/blender/blender_sync.h
+++ b/intern/cycles/blender/blender_sync.h
@@ -22,15 +22,15 @@
 #include "RNA_access.h"
 #include "RNA_blender_cpp.h"

-#include "blender_util.h"
+#include "blender/blender_util.h"

-#include "scene.h"
-#include "session.h"
+#include "render/scene.h"
+#include "render/session.h"

-#include "util_map.h"
-#include "util_set.h"
-#include "util_transform.h"
-#include "util_vector.h"
+#include "util/util_map.h"
+#include "util/util_set.h"
+#include "util/util_transform.h"
+#include "util/util_vector.h"

 CCL_NAMESPACE_BEGIN

@@ -67,6 +67,8 @@ public:
 	               void **python_thread_state,
 	               const char *layer = 0);
 	void sync_render_layers(BL::SpaceView3D& b_v3d, const char *layer);
+	array<Pass> sync_render_passes(BL::RenderLayer& b_rlay,
+	                               BL::SceneRenderLayer& b_srlay);
 	void sync_integrator();
 	void sync_camera(BL::RenderSettings& b_render,
 	                 BL::Object& b_override,
@@ -93,13 +95,15 @@ public:
 	                                      Camera *cam,
 	                                      int width, int height);

+	static PassType get_pass_type(BL::RenderPass& b_pass);
+	static int get_denoising_pass(BL::RenderPass& b_pass);
+
 private:
 	/* sync */
 	void sync_lamps(bool update_all);
 	void sync_materials(bool update_all);
-	void sync_objects(BL::SpaceView3D& b_v3d, float motion_time = 0.0f);
+	void sync_objects(float motion_time = 0.0f);
 	void sync_motion(BL::RenderSettings& b_render,
-	                 BL::SpaceView3D& b_v3d,
 	                 BL::Object& b_override,
 	                 int width, int height,
 	                 void **python_thread_state);
--- a/intern/cycles/blender/blender_texture.cpp
+++ b/intern/cycles/blender/blender_texture.cpp
@@ -14,7 +14,7 @@
 * limitations under the License.
 */

-#include "blender_texture.h"
+#include "blender/blender_texture.h"

 CCL_NAMESPACE_BEGIN

--- a/intern/cycles/blender/blender_texture.h
+++ b/intern/cycles/blender/blender_texture.h
@@ -18,7 +18,7 @@
 #define __BLENDER_TEXTURE_H__

 #include <stdlib.h>
-#include "blender_sync.h"
+#include "blender/blender_sync.h"

 CCL_NAMESPACE_BEGIN

--- a/intern/cycles/blender/blender_util.h
+++ b/intern/cycles/blender/blender_util.h
@@ -17,14 +17,15 @@
 #ifndef __BLENDER_UTIL_H__
 #define __BLENDER_UTIL_H__

-#include "mesh.h"
+#include "render/mesh.h"

-#include "util_map.h"
-#include "util_path.h"
-#include "util_set.h"
-#include "util_transform.h"
-#include "util_types.h"
-#include "util_vector.h"
+#include "util/util_algorithm.h"
+#include "util/util_map.h"
+#include "util/util_path.h"
+#include "util/util_set.h"
+#include "util/util_transform.h"
+#include "util/util_types.h"
+#include "util/util_vector.h"

 /* Hacks to hook into Blender API
 * todo: clean this up ... */
@@ -78,7 +79,7 @@ static inline BL::Mesh object_to_mesh(BL::BlendData& data,
 				me.calc_normals_split();
 			}
 			else {
-				me.split_faces();
+				me.split_faces(false);
 			}
 		}
 		if(subdivision_type == Mesh::SUBDIVISION_NONE) {
@@ -173,22 +174,19 @@ static inline void curvemapping_color_to_array(BL::CurveMapping& cumap,

 	if(rgb_curve) {
 		BL::CurveMap mapI = cumap.curves[3];
-
 		for(int i = 0; i < size; i++) {
-			float t = min_x + (float)i/(float)(size-1) * range_x;
-
-			data[i][0] = mapR.evaluate(mapI.evaluate(t));
-			data[i][1] = mapG.evaluate(mapI.evaluate(t));
-			data[i][2] = mapB.evaluate(mapI.evaluate(t));
+			const float t = min_x + (float)i/(float)(size-1) * range_x;
+			data[i] = make_float3(mapR.evaluate(mapI.evaluate(t)),
+			                      mapG.evaluate(mapI.evaluate(t)),
+			                      mapB.evaluate(mapI.evaluate(t)));
 		}
 	}
 	else {
 		for(int i = 0; i < size; i++) {
 			float t = min_x + (float)i/(float)(size-1) * range_x;
-
-			data[i][0] = mapR.evaluate(t);
-			data[i][1] = mapG.evaluate(t);
-			data[i][2] = mapB.evaluate(t);
+			data[i] = make_float3(mapR.evaluate(t),
+			                      mapG.evaluate(t),
+			                      mapB.evaluate(t));
 		}
 	}
 }
@@ -786,6 +784,35 @@ struct ParticleSystemKey {
 	}
 };

+class EdgeMap {
+public:
+	EdgeMap() {
+	}
+
+	void clear() {
+		edges_.clear();
+	}
+
+	void insert(int v0, int v1) {
+		get_sorted_verts(v0, v1);
+		edges_.insert(std::pair<int, int>(v0, v1));
+	}
+
+	bool exists(int v0, int v1) {
+		get_sorted_verts(v0, v1);
+		return edges_.find(std::pair<int, int>(v0, v1)) != edges_.end();
+	}
+
+protected:
+	void get_sorted_verts(int& v0, int& v1) {
+		if(v0 > v1) {
+			swap(v0, v1);
+		}
+	}
+
+	set< std::pair<int, int> > edges_;
+};
+
 CCL_NAMESPACE_END

 #endif /* __BLENDER_UTIL_H__ */
--- a/intern/cycles/bvh/CMakeLists.txt
+++ b/intern/cycles/bvh/CMakeLists.txt
@@ -1,12 +1,6 @@

 set(INC
-	.
-	../graph
-	../kernel
-	../kernel/svm
-	../render
-	../util
-	../device
+	..
 )

 set(INC_SYS
@@ -14,6 +8,8 @@ set(INC_SYS

 set(SRC
 	bvh.cpp
+	bvh2.cpp
+	bvh4.cpp
 	bvh_binning.cpp
 	bvh_build.cpp
 	bvh_node.cpp
@@ -24,6 +20,8 @@ set(SRC

 set(SRC_HEADERS
 	bvh.h
+	bvh2.h
+	bvh4.h
 	bvh_binning.h
 	bvh_build.h
 	bvh_node.h
--- a/intern/cycles/bvh/bvh.cpp
+++ b/intern/cycles/bvh/bvh.cpp
@@ -15,45 +15,32 @@
 * limitations under the License.
 */

-#include "mesh.h"
-#include "object.h"
-#include "scene.h"
-#include "curves.h"
+#include "bvh/bvh.h"

-#include "bvh.h"
-#include "bvh_build.h"
-#include "bvh_node.h"
-#include "bvh_params.h"
-#include "bvh_unaligned.h"
+#include "render/mesh.h"
+#include "render/object.h"

-#include "util_debug.h"
-#include "util_foreach.h"
-#include "util_logging.h"
-#include "util_map.h"
-#include "util_progress.h"
-#include "util_system.h"
-#include "util_types.h"
-#include "util_math.h"
+#include "bvh/bvh2.h"
+#include "bvh/bvh4.h"
+#include "bvh/bvh_build.h"
+#include "bvh/bvh_node.h"
+
+#include "util/util_foreach.h"
+#include "util/util_progress.h"

 CCL_NAMESPACE_BEGIN

 /* Pack Utility */

-struct BVHStackEntry
+BVHStackEntry::BVHStackEntry(const BVHNode *n, int i)
+    : node(n), idx(i)
 {
-	const BVHNode *node;
-	int idx;
+}

-	BVHStackEntry(const BVHNode* n = 0, int i = 0)
-	: node(n), idx(i)
-	{
-	}
-
-	int encodeIdx() const
-	{
-		return (node->is_leaf())? ~idx: idx;
-	}
-};
+int BVHStackEntry::encodeIdx() const
+{
+	return (node->is_leaf())? ~idx: idx;
+}

 /* BVH */

@@ -65,9 +52,9 @@ BVH::BVH(const BVHParams& params_, const vector<Object*>& objects_)
 BVH *BVH::create(const BVHParams& params, const vector<Object*>& objects)
 {
 	if(params.use_qbvh)
-		return new QBVH(params, objects);
+		return new BVH4(params, objects);
 	else
-		return new RegularBVH(params, objects);
+		return new BVH2(params, objects);
 }

 /* Building */
@@ -81,6 +68,7 @@ void BVH::build(Progress& progress)
 	                   pack.prim_type,
 	                   pack.prim_index,
 	                   pack.prim_object,
+	                   pack.prim_time,
 	                   params,
 	                   progress);
 	BVHNode *root = bvh_build.run();
@@ -256,6 +244,10 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
 	pack.leaf_nodes.resize(leaf_nodes_size);
 	pack.object_node.resize(objects.size());

+	if(params.num_motion_curve_steps > 0 || params.num_motion_triangle_steps > 0) {
+		pack.prim_time.resize(prim_index_size);
+	}
+
 	int *pack_prim_index = (pack.prim_index.size())? &pack.prim_index[0]: NULL;
 	int *pack_prim_type = (pack.prim_type.size())? &pack.prim_type[0]: NULL;
 	int *pack_prim_object = (pack.prim_object.size())? &pack.prim_object[0]: NULL;
@@ -264,6 +256,7 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
 	uint *pack_prim_tri_index = (pack.prim_tri_index.size())? &pack.prim_tri_index[0]: NULL;
 	int4 *pack_nodes = (pack.nodes.size())? &pack.nodes[0]: NULL;
 	int4 *pack_leaf_nodes = (pack.leaf_nodes.size())? &pack.leaf_nodes[0]: NULL;
+	float2 *pack_prim_time = (pack.prim_time.size())? &pack.prim_time[0]: NULL;

 	/* merge */
 	foreach(Object *ob, objects) {
@@ -309,6 +302,7 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
 			int *bvh_prim_type = &bvh->pack.prim_type[0];
 			uint *bvh_prim_visibility = &bvh->pack.prim_visibility[0];
 			uint *bvh_prim_tri_index = &bvh->pack.prim_tri_index[0];
+			float2 *bvh_prim_time = bvh->pack.prim_time.size()? &bvh->pack.prim_time[0]: NULL;

 			for(size_t i = 0; i < bvh_prim_index_size; i++) {
 				if(bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
@@ -324,6 +318,9 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
 				pack_prim_type[pack_prim_index_offset] = bvh_prim_type[i];
 				pack_prim_visibility[pack_prim_index_offset] = bvh_prim_visibility[i];
 				pack_prim_object[pack_prim_index_offset] = 0;  // unused for instances
+				if(bvh_prim_time != NULL) {
+					pack_prim_time[pack_prim_index_offset] = bvh_prim_time[i];
+				}
 				pack_prim_index_offset++;
 			}
 		}
@@ -408,832 +405,4 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
 	}
 }

-/* Regular BVH */
-
-static bool node_bvh_is_unaligned(const BVHNode *node)
-{
-	const BVHNode *node0 = node->get_child(0),
-	              *node1 = node->get_child(1);
-	return node0->is_unaligned() || node1->is_unaligned();
-}
-
-RegularBVH::RegularBVH(const BVHParams& params_, const vector<Object*>& objects_)
-: BVH(params_, objects_)
-{
-}
-
-void RegularBVH::pack_leaf(const BVHStackEntry& e,
-                           const LeafNode *leaf)
-{
-	assert(e.idx + BVH_NODE_LEAF_SIZE <= pack.leaf_nodes.size());
-	float4 data[BVH_NODE_LEAF_SIZE];
-	memset(data, 0, sizeof(data));
-	if(leaf->num_triangles() == 1 && pack.prim_index[leaf->m_lo] == -1) {
-		/* object */
-		data[0].x = __int_as_float(~(leaf->m_lo));
-		data[0].y = __int_as_float(0);
-	}
-	else {
-		/* triangle */
-		data[0].x = __int_as_float(leaf->m_lo);
-		data[0].y = __int_as_float(leaf->m_hi);
-	}
-	data[0].z = __uint_as_float(leaf->m_visibility);
-	if(leaf->num_triangles() != 0) {
-		data[0].w = __uint_as_float(pack.prim_type[leaf->m_lo]);
-	}
-
-	memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4)*BVH_NODE_LEAF_SIZE);
-}
-
-void RegularBVH::pack_inner(const BVHStackEntry& e,
-                            const BVHStackEntry& e0,
-                            const BVHStackEntry& e1)
-{
-	if(e0.node->is_unaligned() || e1.node->is_unaligned()) {
-		pack_unaligned_inner(e, e0, e1);
-	} else {
-		pack_aligned_inner(e, e0, e1);
-	}
-}
-
-void RegularBVH::pack_aligned_inner(const BVHStackEntry& e,
-                                    const BVHStackEntry& e0,
-                                    const BVHStackEntry& e1)
-{
-	pack_aligned_node(e.idx,
-	                  e0.node->m_bounds, e1.node->m_bounds,
-	                  e0.encodeIdx(), e1.encodeIdx(),
-	                  e0.node->m_visibility, e1.node->m_visibility);
-}
-
-void RegularBVH::pack_aligned_node(int idx,
-                                   const BoundBox& b0,
-                                   const BoundBox& b1,
-                                   int c0, int c1,
-                                   uint visibility0, uint visibility1)
-{
-	assert(idx + BVH_NODE_SIZE <= pack.nodes.size());
-	assert(c0 < 0 || c0 < pack.nodes.size());
-	assert(c1 < 0 || c1 < pack.nodes.size());
-
-	int4 data[BVH_NODE_SIZE] = {
-		make_int4(visibility0 & ~PATH_RAY_NODE_UNALIGNED,
-		          visibility1 & ~PATH_RAY_NODE_UNALIGNED,
-		          c0, c1),
-		make_int4(__float_as_int(b0.min.x),
-		          __float_as_int(b1.min.x),
-		          __float_as_int(b0.max.x),
-		          __float_as_int(b1.max.x)),
-		make_int4(__float_as_int(b0.min.y),
-		          __float_as_int(b1.min.y),
-		          __float_as_int(b0.max.y),
-		          __float_as_int(b1.max.y)),
-		make_int4(__float_as_int(b0.min.z),
-		          __float_as_int(b1.min.z),
-		          __float_as_int(b0.max.z),
-		          __float_as_int(b1.max.z)),
-	};
-
-	memcpy(&pack.nodes[idx], data, sizeof(int4)*BVH_NODE_SIZE);
-}
-
-void RegularBVH::pack_unaligned_inner(const BVHStackEntry& e,
-                                      const BVHStackEntry& e0,
-                                      const BVHStackEntry& e1)
-{
-	pack_unaligned_node(e.idx,
-	                    e0.node->get_aligned_space(),
-	                    e1.node->get_aligned_space(),
-	                    e0.node->m_bounds,
-	                    e1.node->m_bounds,
-	                    e0.encodeIdx(), e1.encodeIdx(),
-	                    e0.node->m_visibility, e1.node->m_visibility);
-}
-
-void RegularBVH::pack_unaligned_node(int idx,
-                                     const Transform& aligned_space0,
-                                     const Transform& aligned_space1,
-                                     const BoundBox& bounds0,
-                                     const BoundBox& bounds1,
-                                     int c0, int c1,
-                                     uint visibility0, uint visibility1)
-{
-	assert(idx + BVH_UNALIGNED_NODE_SIZE <= pack.nodes.size());
-	assert(c0 < 0 || c0 < pack.nodes.size());
-	assert(c1 < 0 || c1 < pack.nodes.size());
-
-	float4 data[BVH_UNALIGNED_NODE_SIZE];
-	Transform space0 = BVHUnaligned::compute_node_transform(bounds0,
-	                                                        aligned_space0);
-	Transform space1 = BVHUnaligned::compute_node_transform(bounds1,
-	                                                        aligned_space1);
-	data[0] = make_float4(__int_as_float(visibility0 | PATH_RAY_NODE_UNALIGNED),
-	                      __int_as_float(visibility1 | PATH_RAY_NODE_UNALIGNED),
-	                      __int_as_float(c0),
-	                      __int_as_float(c1));
-
-	data[1] = space0.x;
-	data[2] = space0.y;
-	data[3] = space0.z;
-	data[4] = space1.x;
-	data[5] = space1.y;
-	data[6] = space1.z;
-
-	memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_UNALIGNED_NODE_SIZE);
-}
-
-void RegularBVH::pack_nodes(const BVHNode *root)
-{
-	const size_t num_nodes = root->getSubtreeSize(BVH_STAT_NODE_COUNT);
-	const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
-	assert(num_leaf_nodes <= num_nodes);
-	const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
-	size_t node_size;
-	if(params.use_unaligned_nodes) {
-		const size_t num_unaligned_nodes =
-		        root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
-		node_size = (num_unaligned_nodes * BVH_UNALIGNED_NODE_SIZE) +
-		            (num_inner_nodes - num_unaligned_nodes) * BVH_NODE_SIZE;
-	}
-	else {
-		node_size = num_inner_nodes * BVH_NODE_SIZE;
-	}
-	/* Resize arrays */
-	pack.nodes.clear();
-	pack.leaf_nodes.clear();
-	/* For top level BVH, first merge existing BVH's so we know the offsets. */
-	if(params.top_level) {
-		pack_instances(node_size, num_leaf_nodes*BVH_NODE_LEAF_SIZE);
-	}
-	else {
-		pack.nodes.resize(node_size);
-		pack.leaf_nodes.resize(num_leaf_nodes*BVH_NODE_LEAF_SIZE);
-	}
-
-	int nextNodeIdx = 0, nextLeafNodeIdx = 0;
-
-	vector<BVHStackEntry> stack;
-	stack.reserve(BVHParams::MAX_DEPTH*2);
-	if(root->is_leaf()) {
-		stack.push_back(BVHStackEntry(root, nextLeafNodeIdx++));
-	}
-	else {
-		stack.push_back(BVHStackEntry(root, nextNodeIdx));
-		nextNodeIdx += node_bvh_is_unaligned(root)
-		                       ? BVH_UNALIGNED_NODE_SIZE
-		                       : BVH_NODE_SIZE;
-	}
-
-	while(stack.size()) {
-		BVHStackEntry e = stack.back();
-		stack.pop_back();
-
-		if(e.node->is_leaf()) {
-			/* leaf node */
-			const LeafNode *leaf = reinterpret_cast<const LeafNode*>(e.node);
-			pack_leaf(e, leaf);
-		}
-		else {
-			/* innner node */
-			int idx[2];
-			for(int i = 0; i < 2; ++i) {
-				if(e.node->get_child(i)->is_leaf()) {
-					idx[i] = nextLeafNodeIdx++;
-				}
-				else {
-					idx[i] = nextNodeIdx;
-					nextNodeIdx += node_bvh_is_unaligned(e.node->get_child(i))
-					                       ? BVH_UNALIGNED_NODE_SIZE
-					                       : BVH_NODE_SIZE;
-				}
-			}
-
-			stack.push_back(BVHStackEntry(e.node->get_child(0), idx[0]));
-			stack.push_back(BVHStackEntry(e.node->get_child(1), idx[1]));
-
-			pack_inner(e, stack[stack.size()-2], stack[stack.size()-1]);
-		}
-	}
-	assert(node_size == nextNodeIdx);
-	/* root index to start traversal at, to handle case of single leaf node */
-	pack.root_index = (root->is_leaf())? -1: 0;
-}
-
-void RegularBVH::refit_nodes()
-{
-	assert(!params.top_level);
-
-	BoundBox bbox = BoundBox::empty;
-	uint visibility = 0;
-	refit_node(0, (pack.root_index == -1)? true: false, bbox, visibility);
-}
-
-void RegularBVH::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
-{
-	if(leaf) {
-		assert(idx + BVH_NODE_LEAF_SIZE <= pack.leaf_nodes.size());
-		const int4 *data = &pack.leaf_nodes[idx];
-		const int c0 = data[0].x;
-		const int c1 = data[0].y;
-		/* refit leaf node */
-		for(int prim = c0; prim < c1; prim++) {
-			int pidx = pack.prim_index[prim];
-			int tob = pack.prim_object[prim];
-			Object *ob = objects[tob];
-
-			if(pidx == -1) {
-				/* object instance */
-				bbox.grow(ob->bounds);
-			}
-			else {
-				/* primitives */
-				const Mesh *mesh = ob->mesh;
-
-				if(pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) {
-					/* curves */
-					int str_offset = (params.top_level)? mesh->curve_offset: 0;
-					Mesh::Curve curve = mesh->get_curve(pidx - str_offset);
-					int k = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[prim]);
-
-					curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bbox);
-
-					visibility |= PATH_RAY_CURVE;
-
-					/* motion curves */
-					if(mesh->use_motion_blur) {
-						Attribute *attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-
-						if(attr) {
-							size_t mesh_size = mesh->curve_keys.size();
-							size_t steps = mesh->motion_steps - 1;
-							float3 *key_steps = attr->data_float3();
-
-							for(size_t i = 0; i < steps; i++)
-								curve.bounds_grow(k, key_steps + i*mesh_size, &mesh->curve_radius[0], bbox);
-						}
-					}
-				}
-				else {
-					/* triangles */
-					int tri_offset = (params.top_level)? mesh->tri_offset: 0;
-					Mesh::Triangle triangle = mesh->get_triangle(pidx - tri_offset);
-					const float3 *vpos = &mesh->verts[0];
-
-					triangle.bounds_grow(vpos, bbox);
-
-					/* motion triangles */
-					if(mesh->use_motion_blur) {
-						Attribute *attr = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-
-						if(attr) {
-							size_t mesh_size = mesh->verts.size();
-							size_t steps = mesh->motion_steps - 1;
-							float3 *vert_steps = attr->data_float3();
-
-							for(size_t i = 0; i < steps; i++)
-								triangle.bounds_grow(vert_steps + i*mesh_size, bbox);
-						}
-					}
-				}
-			}
-
-			visibility |= ob->visibility;
-		}
-
-		/* TODO(sergey): De-duplicate with pack_leaf(). */
-		float4 leaf_data[BVH_NODE_LEAF_SIZE];
-		leaf_data[0].x = __int_as_float(c0);
-		leaf_data[0].y = __int_as_float(c1);
-		leaf_data[0].z = __uint_as_float(visibility);
-		leaf_data[0].w = __uint_as_float(data[0].w);
-		memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4)*BVH_NODE_LEAF_SIZE);
-	}
-	else {
-		assert(idx + BVH_NODE_SIZE <= pack.nodes.size());
-
-		const int4 *data = &pack.nodes[idx];
-		const bool is_unaligned = (data[0].x & PATH_RAY_NODE_UNALIGNED) != 0;
-		const int c0 = data[0].z;
-		const int c1 = data[0].w;
-		/* refit inner node, set bbox from children */
-		BoundBox bbox0 = BoundBox::empty, bbox1 = BoundBox::empty;
-		uint visibility0 = 0, visibility1 = 0;
-
-		refit_node((c0 < 0)? -c0-1: c0, (c0 < 0), bbox0, visibility0);
-		refit_node((c1 < 0)? -c1-1: c1, (c1 < 0), bbox1, visibility1);
-
-		if(is_unaligned) {
-			Transform aligned_space = transform_identity();
-			pack_unaligned_node(idx,
-			                    aligned_space, aligned_space,
-			                    bbox0, bbox1,
-			                    c0, c1,
-			                    visibility0,
-			                    visibility1);
-		}
-		else {
-			pack_aligned_node(idx,
-			                  bbox0, bbox1,
-			                  c0, c1,
-			                  visibility0,
-			                  visibility1);
-		}
-
-		bbox.grow(bbox0);
-		bbox.grow(bbox1);
-		visibility = visibility0|visibility1;
-	}
-}
-
-/* QBVH */
-
-/* Can we avoid this somehow or make more generic?
- *
- * Perhaps we can merge nodes in actual tree and make our
- * life easier all over the place.
- */
-static bool node_qbvh_is_unaligned(const BVHNode *node)
-{
-	const BVHNode *node0 = node->get_child(0),
-	              *node1 = node->get_child(1);
-	bool has_unaligned = false;
-	if(node0->is_leaf()) {
-		has_unaligned |= node0->is_unaligned();
-	}
-	else {
-		has_unaligned |= node0->get_child(0)->is_unaligned();
-		has_unaligned |= node0->get_child(1)->is_unaligned();
-	}
-	if(node1->is_leaf()) {
-		has_unaligned |= node1->is_unaligned();
-	}
-	else {
-		has_unaligned |= node1->get_child(0)->is_unaligned();
-		has_unaligned |= node1->get_child(1)->is_unaligned();
-	}
-	return has_unaligned;
-}
-
-QBVH::QBVH(const BVHParams& params_, const vector<Object*>& objects_)
-: BVH(params_, objects_)
-{
-	params.use_qbvh = true;
-}
-
-void QBVH::pack_leaf(const BVHStackEntry& e, const LeafNode *leaf)
-{
-	float4 data[BVH_QNODE_LEAF_SIZE];
-	memset(data, 0, sizeof(data));
-	if(leaf->num_triangles() == 1 && pack.prim_index[leaf->m_lo] == -1) {
-		/* object */
-		data[0].x = __int_as_float(~(leaf->m_lo));
-		data[0].y = __int_as_float(0);
-	}
-	else {
-		/* triangle */
-		data[0].x = __int_as_float(leaf->m_lo);
-		data[0].y = __int_as_float(leaf->m_hi);
-	}
-	data[0].z = __uint_as_float(leaf->m_visibility);
-	if(leaf->num_triangles() != 0) {
-		data[0].w = __uint_as_float(pack.prim_type[leaf->m_lo]);
-	}
-
-	memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4)*BVH_QNODE_LEAF_SIZE);
-}
-
-void QBVH::pack_inner(const BVHStackEntry& e,
-                      const BVHStackEntry *en,
-                      int num)
-{
-	bool has_unaligned = false;
-	/* Check whether we have to create unaligned node or all nodes are aligned
-	 * and we can cut some corner here.
-	 */
-	if(params.use_unaligned_nodes) {
-		for(int i = 0; i < num; i++) {
-			if(en[i].node->is_unaligned()) {
-				has_unaligned = true;
-				break;
-			}
-		}
-	}
-	if(has_unaligned) {
-		/* There's no unaligned children, pack into AABB node. */
-		pack_unaligned_inner(e, en, num);
-	}
-	else {
-		/* Create unaligned node with orientation transform for each of the
-		 * children.
-		 */
-		pack_aligned_inner(e, en, num);
-	}
-}
-
-void QBVH::pack_aligned_inner(const BVHStackEntry& e,
-                              const BVHStackEntry *en,
-                              int num)
-{
-	BoundBox bounds[4];
-	int child[4];
-	for(int i = 0; i < num; ++i) {
-		bounds[i] = en[i].node->m_bounds;
-		child[i] = en[i].encodeIdx();
-	}
-	pack_aligned_node(e.idx,
-	                  bounds,
-	                  child,
-	                  e.node->m_visibility,
-	                  e.node->m_time_from,
-	                  e.node->m_time_to,
-	                  num);
-}
-
-void QBVH::pack_aligned_node(int idx,
-                             const BoundBox *bounds,
-                             const int *child,
-                             const uint visibility,
-                             const float time_from,
-                             const float time_to,
-                             const int num)
-{
-	float4 data[BVH_QNODE_SIZE];
-	memset(data, 0, sizeof(data));
-
-	data[0].x = __uint_as_float(visibility & ~PATH_RAY_NODE_UNALIGNED);
-	data[0].y = time_from;
-	data[0].z = time_to;
-
-	for(int i = 0; i < num; i++) {
-		float3 bb_min = bounds[i].min;
-		float3 bb_max = bounds[i].max;
-
-		data[1][i] = bb_min.x;
-		data[2][i] = bb_max.x;
-		data[3][i] = bb_min.y;
-		data[4][i] = bb_max.y;
-		data[5][i] = bb_min.z;
-		data[6][i] = bb_max.z;
-
-		data[7][i] = __int_as_float(child[i]);
-	}
-
-	for(int i = num; i < 4; i++) {
-		/* We store BB which would never be recorded as intersection
-		 * so kernel might safely assume there are always 4 child nodes.
-		 */
-		data[1][i] = FLT_MAX;
-		data[2][i] = -FLT_MAX;
-
-		data[3][i] = FLT_MAX;
-		data[4][i] = -FLT_MAX;
-
-		data[5][i] = FLT_MAX;
-		data[6][i] = -FLT_MAX;
-
-		data[7][i] = __int_as_float(0);
-	}
-
-	memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_QNODE_SIZE);
-}
-
-void QBVH::pack_unaligned_inner(const BVHStackEntry& e,
-                                const BVHStackEntry *en,
-                                int num)
-{
-	Transform aligned_space[4];
-	BoundBox bounds[4];
-	int child[4];
-	for(int i = 0; i < num; ++i) {
-		aligned_space[i] = en[i].node->get_aligned_space();
-		bounds[i] = en[i].node->m_bounds;
-		child[i] = en[i].encodeIdx();
-	}
-	pack_unaligned_node(e.idx,
-	                    aligned_space,
-	                    bounds,
-	                    child,
-	                    e.node->m_visibility,
-	                    e.node->m_time_from,
-	                    e.node->m_time_to,
-	                    num);
-}
-
-void QBVH::pack_unaligned_node(int idx,
-                               const Transform *aligned_space,
-                               const BoundBox *bounds,
-                               const int *child,
-                               const uint visibility,
-                               const float time_from,
-                               const float time_to,
-                               const int num)
-{
-	float4 data[BVH_UNALIGNED_QNODE_SIZE];
-	memset(data, 0, sizeof(data));
-
-	data[0].x = __uint_as_float(visibility | PATH_RAY_NODE_UNALIGNED);
-	data[0].y = time_from;
-	data[0].z = time_to;
-
-	for(int i = 0; i < num; i++) {
-		Transform space = BVHUnaligned::compute_node_transform(
-		        bounds[i],
-		        aligned_space[i]);
-
-		data[1][i] = space.x.x;
-		data[2][i] = space.x.y;
-		data[3][i] = space.x.z;
-
-		data[4][i] = space.y.x;
-		data[5][i] = space.y.y;
-		data[6][i] = space.y.z;
-
-		data[7][i] = space.z.x;
-		data[8][i] = space.z.y;
-		data[9][i] = space.z.z;
-
-		data[10][i] = space.x.w;
-		data[11][i] = space.y.w;
-		data[12][i] = space.z.w;
-
-		data[13][i] = __int_as_float(child[i]);
-	}
-
-	for(int i = num; i < 4; i++) {
-		/* We store BB which would never be recorded as intersection
-		 * so kernel might safely assume there are always 4 child nodes.
-		 */
-
-		data[1][i] = 1.0f;
-		data[2][i] = 0.0f;
-		data[3][i] = 0.0f;
-
-		data[4][i] = 0.0f;
-		data[5][i] = 0.0f;
-		data[6][i] = 0.0f;
-
-		data[7][i] = 0.0f;
-		data[8][i] = 0.0f;
-		data[9][i] = 0.0f;
-
-		data[10][i] = -FLT_MAX;
-		data[11][i] = -FLT_MAX;
-		data[12][i] = -FLT_MAX;
-
-		data[13][i] = __int_as_float(0);
-	}
-
-	memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_UNALIGNED_QNODE_SIZE);
-}
-
-/* Quad SIMD Nodes */
-
-void QBVH::pack_nodes(const BVHNode *root)
-{
-	/* Calculate size of the arrays required. */
-	const size_t num_nodes = root->getSubtreeSize(BVH_STAT_QNODE_COUNT);
-	const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
-	assert(num_leaf_nodes <= num_nodes);
-	const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
-	size_t node_size;
-	if(params.use_unaligned_nodes) {
-		const size_t num_unaligned_nodes =
-		        root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_QNODE_COUNT);
-		node_size = (num_unaligned_nodes * BVH_UNALIGNED_QNODE_SIZE) +
-		            (num_inner_nodes - num_unaligned_nodes) * BVH_QNODE_SIZE;
-	}
-	else {
-		node_size = num_inner_nodes * BVH_QNODE_SIZE;
-	}
-	/* Resize arrays. */
-	pack.nodes.clear();
-	pack.leaf_nodes.clear();
-	/* For top level BVH, first merge existing BVH's so we know the offsets. */
-	if(params.top_level) {
-		pack_instances(node_size, num_leaf_nodes*BVH_QNODE_LEAF_SIZE);
-	}
-	else {
-		pack.nodes.resize(node_size);
-		pack.leaf_nodes.resize(num_leaf_nodes*BVH_QNODE_LEAF_SIZE);
-	}
-
-	int nextNodeIdx = 0, nextLeafNodeIdx = 0;
-
-	vector<BVHStackEntry> stack;
-	stack.reserve(BVHParams::MAX_DEPTH*2);
-	if(root->is_leaf()) {
-		stack.push_back(BVHStackEntry(root, nextLeafNodeIdx++));
-	}
-	else {
-		stack.push_back(BVHStackEntry(root, nextNodeIdx));
-		nextNodeIdx += node_qbvh_is_unaligned(root)
-		                       ? BVH_UNALIGNED_QNODE_SIZE
-		                       : BVH_QNODE_SIZE;
-	}
-
-	while(stack.size()) {
-		BVHStackEntry e = stack.back();
-		stack.pop_back();
-
-		if(e.node->is_leaf()) {
-			/* leaf node */
-			const LeafNode *leaf = reinterpret_cast<const LeafNode*>(e.node);
-			pack_leaf(e, leaf);
-		}
-		else {
-			/* Inner node. */
-			const BVHNode *node = e.node;
-			const BVHNode *node0 = node->get_child(0);
-			const BVHNode *node1 = node->get_child(1);
-			/* Collect nodes. */
-			const BVHNode *nodes[4];
-			int numnodes = 0;
-			if(node0->is_leaf()) {
-				nodes[numnodes++] = node0;
-			}
-			else {
-				nodes[numnodes++] = node0->get_child(0);
-				nodes[numnodes++] = node0->get_child(1);
-			}
-			if(node1->is_leaf()) {
-				nodes[numnodes++] = node1;
-			}
-			else {
-				nodes[numnodes++] = node1->get_child(0);
-				nodes[numnodes++] = node1->get_child(1);
-			}
-			/* Push entries on the stack. */
-			for(int i = 0; i < numnodes; ++i) {
-				int idx;
-				if(nodes[i]->is_leaf()) {
-					idx = nextLeafNodeIdx++;
-				}
-				else {
-					idx = nextNodeIdx;
-					nextNodeIdx += node_qbvh_is_unaligned(nodes[i])
-					                       ? BVH_UNALIGNED_QNODE_SIZE
-					                       : BVH_QNODE_SIZE;
-				}
-				stack.push_back(BVHStackEntry(nodes[i], idx));
-			}
-			/* Set node. */
-			pack_inner(e, &stack[stack.size()-numnodes], numnodes);
-		}
-	}
-	assert(node_size == nextNodeIdx);
-	/* Root index to start traversal at, to handle case of single leaf node. */
-	pack.root_index = (root->is_leaf())? -1: 0;
-}
-
-void QBVH::refit_nodes()
-{
-	assert(!params.top_level);
-
-	BoundBox bbox = BoundBox::empty;
-	uint visibility = 0;
-	refit_node(0, (pack.root_index == -1)? true: false, bbox, visibility);
-}
-
-void QBVH::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
-{
-	if(leaf) {
-		int4 *data = &pack.leaf_nodes[idx];
-		int4 c = data[0];
-		/* Refit leaf node. */
-		for(int prim = c.x; prim < c.y; prim++) {
-			int pidx = pack.prim_index[prim];
-			int tob = pack.prim_object[prim];
-			Object *ob = objects[tob];
-
-			if(pidx == -1) {
-				/* Object instance. */
-				bbox.grow(ob->bounds);
-			}
-			else {
-				/* Primitives. */
-				const Mesh *mesh = ob->mesh;
-
-				if(pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) {
-					/* Curves. */
-					int str_offset = (params.top_level)? mesh->curve_offset: 0;
-					Mesh::Curve curve = mesh->get_curve(pidx - str_offset);
-					int k = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[prim]);
-
-					curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bbox);
-
-					visibility |= PATH_RAY_CURVE;
-
-					/* Motion curves. */
-					if(mesh->use_motion_blur) {
-						Attribute *attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-
-						if(attr) {
-							size_t mesh_size = mesh->curve_keys.size();
-							size_t steps = mesh->motion_steps - 1;
-							float3 *key_steps = attr->data_float3();
-
-							for(size_t i = 0; i < steps; i++)
-								curve.bounds_grow(k, key_steps + i*mesh_size, &mesh->curve_radius[0], bbox);
-						}
-					}
-				}
-				else {
-					/* Triangles. */
-					int tri_offset = (params.top_level)? mesh->tri_offset: 0;
-					Mesh::Triangle triangle = mesh->get_triangle(pidx - tri_offset);
-					const float3 *vpos = &mesh->verts[0];
-
-					triangle.bounds_grow(vpos, bbox);
-
-					/* Motion triangles. */
-					if(mesh->use_motion_blur) {
-						Attribute *attr = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-
-						if(attr) {
-							size_t mesh_size = mesh->verts.size();
-							size_t steps = mesh->motion_steps - 1;
-							float3 *vert_steps = attr->data_float3();
-
-							for(size_t i = 0; i < steps; i++)
-								triangle.bounds_grow(vert_steps + i*mesh_size, bbox);
-						}
-					}
-				}
-			}
-
-			visibility |= ob->visibility;
-		}
-
-		/* TODO(sergey): This is actually a copy of pack_leaf(),
-		 * but this chunk of code only knows actual data and has
-		 * no idea about BVHNode.
-		 *
-		 * Would be nice to de-duplicate code, but trying to make
-		 * making code more general ends up in much nastier code
-		 * in my opinion so far.
-		 *
-		 * Same applies to the inner nodes case below.
-		 */
-		float4 leaf_data[BVH_QNODE_LEAF_SIZE];
-		leaf_data[0].x = __int_as_float(c.x);
-		leaf_data[0].y = __int_as_float(c.y);
-		leaf_data[0].z = __uint_as_float(visibility);
-		leaf_data[0].w = __uint_as_float(c.w);
-		memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4)*BVH_QNODE_LEAF_SIZE);
-	}
-	else {
-		int4 *data = &pack.nodes[idx];
-		bool is_unaligned = (data[0].x & PATH_RAY_NODE_UNALIGNED) != 0;
-		int4 c;
-		if(is_unaligned) {
-			c = data[13];
-		}
-		else {
-			c = data[7];
-		}
-		/* Refit inner node, set bbox from children. */
-		BoundBox child_bbox[4] = {BoundBox::empty,
-		                          BoundBox::empty,
-		                          BoundBox::empty,
-		                          BoundBox::empty};
-		uint child_visibility[4] = {0};
-		int num_nodes = 0;
-
-		for(int i = 0; i < 4; ++i) {
-			if(c[i] != 0) {
-				refit_node((c[i] < 0)? -c[i]-1: c[i], (c[i] < 0),
-				           child_bbox[i], child_visibility[i]);
-				++num_nodes;
-				bbox.grow(child_bbox[i]);
-				visibility |= child_visibility[i];
-			}
-		}
-
-		if(is_unaligned) {
-			Transform aligned_space[4] = {transform_identity(),
-			                              transform_identity(),
-			                              transform_identity(),
-			                              transform_identity()};
-			pack_unaligned_node(idx,
-			                    aligned_space,
-			                    child_bbox,
-			                    &c[0],
-			                    visibility,
-			                    0.0f,
-			                    1.0f,
-			                    4);
-		}
-		else {
-			pack_aligned_node(idx,
-			                  child_bbox,
-			                  &c[0],
-			                  visibility,
-			                  0.0f,
-			                  1.0f,
-			                  4);
-		}
-	}
-}
-
 CCL_NAMESPACE_END
--- a/intern/cycles/bvh/bvh.h
+++ b/intern/cycles/bvh/bvh.h
@@ -18,10 +18,10 @@
 #ifndef __BVH_H__
 #define __BVH_H__

-#include "bvh_params.h"
+#include "bvh/bvh_params.h"

-#include "util_types.h"
-#include "util_vector.h"
+#include "util/util_types.h"
+#include "util/util_vector.h"

 CCL_NAMESPACE_BEGIN

@@ -33,15 +33,8 @@ class LeafNode;
 class Object;
 class Progress;

-#define BVH_NODE_SIZE	4
-#define BVH_NODE_LEAF_SIZE	1
-#define BVH_QNODE_SIZE	8
-#define BVH_QNODE_LEAF_SIZE	1
-#define BVH_ALIGN		4096
-#define TRI_NODE_SIZE	3
-
-#define BVH_UNALIGNED_NODE_SIZE 7
-#define BVH_UNALIGNED_QNODE_SIZE 14
+#define BVH_ALIGN     4096
+#define TRI_NODE_SIZE 3

 /* Packed BVH
 *
@@ -54,7 +47,7 @@ struct PackedBVH {
 	/* BVH leaf nodes storage. */
 	array<int4> leaf_nodes;
 	/* object index to BVH node index mapping for instances */
-	array<int> object_node; 
+	array<int> object_node;
 	/* Mapping from primitive index to index in triangle array. */
 	array<uint> prim_tri_index;
 	/* Continuous storage of triangle vertices. */
@@ -68,6 +61,8 @@ struct PackedBVH {
 	array<int> prim_index;
 	/* mapping from BVH primitive index, to the object id of that primitive. */
 	array<int> prim_object;
+	/* Time range of BVH primitive. */
+	array<float2> prim_time;

 	/* index of the root node. */
 	int root_index;
@@ -108,95 +103,16 @@ protected:
 	virtual void refit_nodes() = 0;
 };

-/* Regular BVH
- *
- * Typical BVH with each node having two children. */
+/* Pack Utility */
+struct BVHStackEntry
+{
+	const BVHNode *node;
+	int idx;

-class RegularBVH : public BVH {
-protected:
-	/* constructor */
-	friend class BVH;
-	RegularBVH(const BVHParams& params, const vector<Object*>& objects);
-
-	/* pack */
-	void pack_nodes(const BVHNode *root);
-
-	void pack_leaf(const BVHStackEntry& e,
-	               const LeafNode *leaf);
-	void pack_inner(const BVHStackEntry& e,
-	                const BVHStackEntry& e0,
-	                const BVHStackEntry& e1);
-
-	void pack_aligned_inner(const BVHStackEntry& e,
-	                        const BVHStackEntry& e0,
-	                        const BVHStackEntry& e1);
-	void pack_aligned_node(int idx,
-	                       const BoundBox& b0,
-	                       const BoundBox& b1,
-	                       int c0, int c1,
-	                       uint visibility0, uint visibility1);
-
-	void pack_unaligned_inner(const BVHStackEntry& e,
-	                          const BVHStackEntry& e0,
-	                          const BVHStackEntry& e1);
-	void pack_unaligned_node(int idx,
-	                         const Transform& aligned_space0,
-	                         const Transform& aligned_space1,
-	                         const BoundBox& b0,
-	                         const BoundBox& b1,
-	                         int c0, int c1,
-	                         uint visibility0, uint visibility1);
-
-	/* refit */
-	void refit_nodes();
-	void refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility);
-};
-
-/* QBVH
- *
- * Quad BVH, with each node having four children, to use with SIMD instructions. */
-
-class QBVH : public BVH {
-protected:
-	/* constructor */
-	friend class BVH;
-	QBVH(const BVHParams& params, const vector<Object*>& objects);
-
-	/* pack */
-	void pack_nodes(const BVHNode *root);
-
-	void pack_leaf(const BVHStackEntry& e, const LeafNode *leaf);
-	void pack_inner(const BVHStackEntry& e, const BVHStackEntry *en, int num);
-
-	void pack_aligned_inner(const BVHStackEntry& e,
-	                        const BVHStackEntry *en,
-	                        int num);
-	void pack_aligned_node(int idx,
-	                       const BoundBox *bounds,
-	                       const int *child,
-	                       const uint visibility,
-	                       const float time_from,
-	                       const float time_to,
-	                       const int num);
-
-	void pack_unaligned_inner(const BVHStackEntry& e,
-	                          const BVHStackEntry *en,
-	                          int num);
-	void pack_unaligned_node(int idx,
-	                         const Transform *aligned_space,
-	                         const BoundBox *bounds,
-	                         const int *child,
-	                         const uint visibility,
-	                         const float time_from,
-	                         const float time_to,
-	                         const int num);
-
-	/* refit */
-	void refit_nodes();
-	void refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility);
+	BVHStackEntry(const BVHNode *n = 0, int i = 0);
+	int encodeIdx() const;
 };

 CCL_NAMESPACE_END

 #endif /* __BVH_H__ */
-
--- a/intern/cycles/bvh/bvh2.cpp
+++ b/intern/cycles/bvh/bvh2.cpp
@@ -0,0 +1,364 @@
+/*
+ * Adapted from code copyright 2009-2010 NVIDIA Corporation
+ * Modifications Copyright 2011, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "bvh/bvh2.h"
+
+#include "render/mesh.h"
+#include "render/object.h"
+
+#include "bvh/bvh_node.h"
+#include "bvh/bvh_unaligned.h"
+
+CCL_NAMESPACE_BEGIN
+
+static bool node_bvh_is_unaligned(const BVHNode *node)
+{
+	const BVHNode *node0 = node->get_child(0),
+	              *node1 = node->get_child(1);
+	return node0->is_unaligned || node1->is_unaligned;
+}
+
+BVH2::BVH2(const BVHParams& params_, const vector<Object*>& objects_)
+: BVH(params_, objects_)
+{
+}
+
+void BVH2::pack_leaf(const BVHStackEntry& e,
+                     const LeafNode *leaf)
+{
+	assert(e.idx + BVH_NODE_LEAF_SIZE <= pack.leaf_nodes.size());
+	float4 data[BVH_NODE_LEAF_SIZE];
+	memset(data, 0, sizeof(data));
+	if(leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) {
+		/* object */
+		data[0].x = __int_as_float(~(leaf->lo));
+		data[0].y = __int_as_float(0);
+	}
+	else {
+		/* triangle */
+		data[0].x = __int_as_float(leaf->lo);
+		data[0].y = __int_as_float(leaf->hi);
+	}
+	data[0].z = __uint_as_float(leaf->visibility);
+	if(leaf->num_triangles() != 0) {
+		data[0].w = __uint_as_float(pack.prim_type[leaf->lo]);
+	}
+
+	memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4)*BVH_NODE_LEAF_SIZE);
+}
+
+void BVH2::pack_inner(const BVHStackEntry& e,
+                      const BVHStackEntry& e0,
+                      const BVHStackEntry& e1)
+{
+	if(e0.node->is_unaligned || e1.node->is_unaligned) {
+		pack_unaligned_inner(e, e0, e1);
+	} else {
+		pack_aligned_inner(e, e0, e1);
+	}
+}
+
+void BVH2::pack_aligned_inner(const BVHStackEntry& e,
+                              const BVHStackEntry& e0,
+                              const BVHStackEntry& e1)
+{
+	pack_aligned_node(e.idx,
+	                  e0.node->bounds, e1.node->bounds,
+	                  e0.encodeIdx(), e1.encodeIdx(),
+	                  e0.node->visibility, e1.node->visibility);
+}
+
+void BVH2::pack_aligned_node(int idx,
+                             const BoundBox& b0,
+                             const BoundBox& b1,
+                             int c0, int c1,
+                             uint visibility0, uint visibility1)
+{
+	assert(idx + BVH_NODE_SIZE <= pack.nodes.size());
+	assert(c0 < 0 || c0 < pack.nodes.size());
+	assert(c1 < 0 || c1 < pack.nodes.size());
+
+	int4 data[BVH_NODE_SIZE] = {
+		make_int4(visibility0 & ~PATH_RAY_NODE_UNALIGNED,
+		          visibility1 & ~PATH_RAY_NODE_UNALIGNED,
+		          c0, c1),
+		make_int4(__float_as_int(b0.min.x),
+		          __float_as_int(b1.min.x),
+		          __float_as_int(b0.max.x),
+		          __float_as_int(b1.max.x)),
+		make_int4(__float_as_int(b0.min.y),
+		          __float_as_int(b1.min.y),
+		          __float_as_int(b0.max.y),
+		          __float_as_int(b1.max.y)),
+		make_int4(__float_as_int(b0.min.z),
+		          __float_as_int(b1.min.z),
+		          __float_as_int(b0.max.z),
+		          __float_as_int(b1.max.z)),
+	};
+
+	memcpy(&pack.nodes[idx], data, sizeof(int4)*BVH_NODE_SIZE);
+}
+
+void BVH2::pack_unaligned_inner(const BVHStackEntry& e,
+                                const BVHStackEntry& e0,
+                                const BVHStackEntry& e1)
+{
+	pack_unaligned_node(e.idx,
+	                    e0.node->get_aligned_space(),
+	                    e1.node->get_aligned_space(),
+	                    e0.node->bounds,
+	                    e1.node->bounds,
+	                    e0.encodeIdx(), e1.encodeIdx(),
+	                    e0.node->visibility, e1.node->visibility);
+}
+
+void BVH2::pack_unaligned_node(int idx,
+                               const Transform& aligned_space0,
+                               const Transform& aligned_space1,
+                               const BoundBox& bounds0,
+                               const BoundBox& bounds1,
+                               int c0, int c1,
+                               uint visibility0, uint visibility1)
+{
+	assert(idx + BVH_UNALIGNED_NODE_SIZE <= pack.nodes.size());
+	assert(c0 < 0 || c0 < pack.nodes.size());
+	assert(c1 < 0 || c1 < pack.nodes.size());
+
+	float4 data[BVH_UNALIGNED_NODE_SIZE];
+	Transform space0 = BVHUnaligned::compute_node_transform(bounds0,
+	                                                        aligned_space0);
+	Transform space1 = BVHUnaligned::compute_node_transform(bounds1,
+	                                                        aligned_space1);
+	data[0] = make_float4(__int_as_float(visibility0 | PATH_RAY_NODE_UNALIGNED),
+	                      __int_as_float(visibility1 | PATH_RAY_NODE_UNALIGNED),
+	                      __int_as_float(c0),
+	                      __int_as_float(c1));
+
+	data[1] = space0.x;
+	data[2] = space0.y;
+	data[3] = space0.z;
+	data[4] = space1.x;
+	data[5] = space1.y;
+	data[6] = space1.z;
+
+	memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_UNALIGNED_NODE_SIZE);
+}
+
+void BVH2::pack_nodes(const BVHNode *root)
+{
+	const size_t num_nodes = root->getSubtreeSize(BVH_STAT_NODE_COUNT);
+	const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
+	assert(num_leaf_nodes <= num_nodes);
+	const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
+	size_t node_size;
+	if(params.use_unaligned_nodes) {
+		const size_t num_unaligned_nodes =
+		        root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
+		node_size = (num_unaligned_nodes * BVH_UNALIGNED_NODE_SIZE) +
+		            (num_inner_nodes - num_unaligned_nodes) * BVH_NODE_SIZE;
+	}
+	else {
+		node_size = num_inner_nodes * BVH_NODE_SIZE;
+	}
+	/* Resize arrays */
+	pack.nodes.clear();
+	pack.leaf_nodes.clear();
+	/* For top level BVH, first merge existing BVH's so we know the offsets. */
+	if(params.top_level) {
+		pack_instances(node_size, num_leaf_nodes*BVH_NODE_LEAF_SIZE);
+	}
+	else {
+		pack.nodes.resize(node_size);
+		pack.leaf_nodes.resize(num_leaf_nodes*BVH_NODE_LEAF_SIZE);
+	}
+
+	int nextNodeIdx = 0, nextLeafNodeIdx = 0;
+
+	vector<BVHStackEntry> stack;
+	stack.reserve(BVHParams::MAX_DEPTH*2);
+	if(root->is_leaf()) {
+		stack.push_back(BVHStackEntry(root, nextLeafNodeIdx++));
+	}
+	else {
+		stack.push_back(BVHStackEntry(root, nextNodeIdx));
+		nextNodeIdx += node_bvh_is_unaligned(root)
+		                       ? BVH_UNALIGNED_NODE_SIZE
+		                       : BVH_NODE_SIZE;
+	}
+
+	while(stack.size()) {
+		BVHStackEntry e = stack.back();
+		stack.pop_back();
+
+		if(e.node->is_leaf()) {
+			/* leaf node */
+			const LeafNode *leaf = reinterpret_cast<const LeafNode*>(e.node);
+			pack_leaf(e, leaf);
+		}
+		else {
+			/* innner node */
+			int idx[2];
+			for(int i = 0; i < 2; ++i) {
+				if(e.node->get_child(i)->is_leaf()) {
+					idx[i] = nextLeafNodeIdx++;
+				}
+				else {
+					idx[i] = nextNodeIdx;
+					nextNodeIdx += node_bvh_is_unaligned(e.node->get_child(i))
+					                       ? BVH_UNALIGNED_NODE_SIZE
+					                       : BVH_NODE_SIZE;
+				}
+			}
+
+			stack.push_back(BVHStackEntry(e.node->get_child(0), idx[0]));
+			stack.push_back(BVHStackEntry(e.node->get_child(1), idx[1]));
+
+			pack_inner(e, stack[stack.size()-2], stack[stack.size()-1]);
+		}
+	}
+	assert(node_size == nextNodeIdx);
+	/* root index to start traversal at, to handle case of single leaf node */
+	pack.root_index = (root->is_leaf())? -1: 0;
+}
+
+void BVH2::refit_nodes()
+{
+	assert(!params.top_level);
+
+	BoundBox bbox = BoundBox::empty;
+	uint visibility = 0;
+	refit_node(0, (pack.root_index == -1)? true: false, bbox, visibility);
+}
+
+void BVH2::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
+{
+	if(leaf) {
+		assert(idx + BVH_NODE_LEAF_SIZE <= pack.leaf_nodes.size());
+		const int4 *data = &pack.leaf_nodes[idx];
+		const int c0 = data[0].x;
+		const int c1 = data[0].y;
+		/* refit leaf node */
+		for(int prim = c0; prim < c1; prim++) {
+			int pidx = pack.prim_index[prim];
+			int tob = pack.prim_object[prim];
+			Object *ob = objects[tob];
+
+			if(pidx == -1) {
+				/* object instance */
+				bbox.grow(ob->bounds);
+			}
+			else {
+				/* primitives */
+				const Mesh *mesh = ob->mesh;
+
+				if(pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) {
+					/* curves */
+					int str_offset = (params.top_level)? mesh->curve_offset: 0;
+					Mesh::Curve curve = mesh->get_curve(pidx - str_offset);
+					int k = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[prim]);
+
+					curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bbox);
+
+					visibility |= PATH_RAY_CURVE;
+
+					/* motion curves */
+					if(mesh->use_motion_blur) {
+						Attribute *attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+
+						if(attr) {
+							size_t mesh_size = mesh->curve_keys.size();
+							size_t steps = mesh->motion_steps - 1;
+							float3 *key_steps = attr->data_float3();
+
+							for(size_t i = 0; i < steps; i++)
+								curve.bounds_grow(k, key_steps + i*mesh_size, &mesh->curve_radius[0], bbox);
+						}
+					}
+				}
+				else {
+					/* triangles */
+					int tri_offset = (params.top_level)? mesh->tri_offset: 0;
+					Mesh::Triangle triangle = mesh->get_triangle(pidx - tri_offset);
+					const float3 *vpos = &mesh->verts[0];
+
+					triangle.bounds_grow(vpos, bbox);
+
+					/* motion triangles */
+					if(mesh->use_motion_blur) {
+						Attribute *attr = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+
+						if(attr) {
+							size_t mesh_size = mesh->verts.size();
+							size_t steps = mesh->motion_steps - 1;
+							float3 *vert_steps = attr->data_float3();
+
+							for(size_t i = 0; i < steps; i++)
+								triangle.bounds_grow(vert_steps + i*mesh_size, bbox);
+						}
+					}
+				}
+			}
+
+			visibility |= ob->visibility;
+		}
+
+		/* TODO(sergey): De-duplicate with pack_leaf(). */
+		float4 leaf_data[BVH_NODE_LEAF_SIZE];
+		leaf_data[0].x = __int_as_float(c0);
+		leaf_data[0].y = __int_as_float(c1);
+		leaf_data[0].z = __uint_as_float(visibility);
+		leaf_data[0].w = __uint_as_float(data[0].w);
+		memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4)*BVH_NODE_LEAF_SIZE);
+	}
+	else {
+		assert(idx + BVH_NODE_SIZE <= pack.nodes.size());
+
+		const int4 *data = &pack.nodes[idx];
+		const bool is_unaligned = (data[0].x & PATH_RAY_NODE_UNALIGNED) != 0;
+		const int c0 = data[0].z;
+		const int c1 = data[0].w;
+		/* refit inner node, set bbox from children */
+		BoundBox bbox0 = BoundBox::empty, bbox1 = BoundBox::empty;
+		uint visibility0 = 0, visibility1 = 0;
+
+		refit_node((c0 < 0)? -c0-1: c0, (c0 < 0), bbox0, visibility0);
+		refit_node((c1 < 0)? -c1-1: c1, (c1 < 0), bbox1, visibility1);
+
+		if(is_unaligned) {
+			Transform aligned_space = transform_identity();
+			pack_unaligned_node(idx,
+			                    aligned_space, aligned_space,
+			                    bbox0, bbox1,
+			                    c0, c1,
+			                    visibility0,
+			                    visibility1);
+		}
+		else {
+			pack_aligned_node(idx,
+			                  bbox0, bbox1,
+			                  c0, c1,
+			                  visibility0,
+			                  visibility1);
+		}
+
+		bbox.grow(bbox0);
+		bbox.grow(bbox1);
+		visibility = visibility0|visibility1;
+	}
+}
+
+CCL_NAMESPACE_END
--- a/intern/cycles/bvh/bvh2.h
+++ b/intern/cycles/bvh/bvh2.h
@@ -0,0 +1,87 @@
+/*
+ * Adapted from code copyright 2009-2010 NVIDIA Corporation
+ * Modifications Copyright 2011, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BVH2_H__
+#define __BVH2_H__
+
+#include "bvh/bvh.h"
+#include "bvh/bvh_params.h"
+
+#include "util/util_types.h"
+#include "util/util_vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+class BVHNode;
+struct BVHStackEntry;
+class BVHParams;
+class BoundBox;
+class LeafNode;
+class Object;
+class Progress;
+
+#define BVH_NODE_SIZE           4
+#define BVH_NODE_LEAF_SIZE      1
+#define BVH_UNALIGNED_NODE_SIZE 7
+
+/* BVH2
+ *
+ * Typical BVH with each node having two children.
+ */
+class BVH2 : public BVH {
+protected:
+	/* constructor */
+	friend class BVH;
+	BVH2(const BVHParams& params, const vector<Object*>& objects);
+
+	/* pack */
+	void pack_nodes(const BVHNode *root);
+
+	void pack_leaf(const BVHStackEntry& e,
+	               const LeafNode *leaf);
+	void pack_inner(const BVHStackEntry& e,
+	                const BVHStackEntry& e0,
+	                const BVHStackEntry& e1);
+
+	void pack_aligned_inner(const BVHStackEntry& e,
+	                        const BVHStackEntry& e0,
+	                        const BVHStackEntry& e1);
+	void pack_aligned_node(int idx,
+	                       const BoundBox& b0,
+	                       const BoundBox& b1,
+	                       int c0, int c1,
+	                       uint visibility0, uint visibility1);
+
+	void pack_unaligned_inner(const BVHStackEntry& e,
+	                          const BVHStackEntry& e0,
+	                          const BVHStackEntry& e1);
+	void pack_unaligned_node(int idx,
+	                         const Transform& aligned_space0,
+	                         const Transform& aligned_space1,
+	                         const BoundBox& b0,
+	                         const BoundBox& b1,
+	                         int c0, int c1,
+	                         uint visibility0, uint visibility1);
+
+	/* refit */
+	void refit_nodes();
+	void refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility);
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __BVH2_H__ */
--- a/intern/cycles/bvh/bvh4.cpp
+++ b/intern/cycles/bvh/bvh4.cpp
@@ -0,0 +1,516 @@
+/*
+ * Adapted from code copyright 2009-2010 NVIDIA Corporation
+ * Modifications Copyright 2011, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "bvh/bvh4.h"
+
+#include "render/mesh.h"
+#include "render/object.h"
+
+#include "bvh/bvh_node.h"
+#include "bvh/bvh_unaligned.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Can we avoid this somehow or make more generic?
+ *
+ * Perhaps we can merge nodes in actual tree and make our
+ * life easier all over the place.
+ */
+static bool node_qbvh_is_unaligned(const BVHNode *node)
+{
+	const BVHNode *node0 = node->get_child(0),
+	              *node1 = node->get_child(1);
+	bool has_unaligned = false;
+	if(node0->is_leaf()) {
+		has_unaligned |= node0->is_unaligned;
+	}
+	else {
+		has_unaligned |= node0->get_child(0)->is_unaligned;
+		has_unaligned |= node0->get_child(1)->is_unaligned;
+	}
+	if(node1->is_leaf()) {
+		has_unaligned |= node1->is_unaligned;
+	}
+	else {
+		has_unaligned |= node1->get_child(0)->is_unaligned;
+		has_unaligned |= node1->get_child(1)->is_unaligned;
+	}
+	return has_unaligned;
+}
+
+BVH4::BVH4(const BVHParams& params_, const vector<Object*>& objects_)
+: BVH(params_, objects_)
+{
+	params.use_qbvh = true;
+}
+
+void BVH4::pack_leaf(const BVHStackEntry& e, const LeafNode *leaf)
+{
+	float4 data[BVH_QNODE_LEAF_SIZE];
+	memset(data, 0, sizeof(data));
+	if(leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) {
+		/* object */
+		data[0].x = __int_as_float(~(leaf->lo));
+		data[0].y = __int_as_float(0);
+	}
+	else {
+		/* triangle */
+		data[0].x = __int_as_float(leaf->lo);
+		data[0].y = __int_as_float(leaf->hi);
+	}
+	data[0].z = __uint_as_float(leaf->visibility);
+	if(leaf->num_triangles() != 0) {
+		data[0].w = __uint_as_float(pack.prim_type[leaf->lo]);
+	}
+
+	memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4)*BVH_QNODE_LEAF_SIZE);
+}
+
+void BVH4::pack_inner(const BVHStackEntry& e,
+                      const BVHStackEntry *en,
+                      int num)
+{
+	bool has_unaligned = false;
+	/* Check whether we have to create unaligned node or all nodes are aligned
+	 * and we can cut some corner here.
+	 */
+	if(params.use_unaligned_nodes) {
+		for(int i = 0; i < num; i++) {
+			if(en[i].node->is_unaligned) {
+				has_unaligned = true;
+				break;
+			}
+		}
+	}
+	if(has_unaligned) {
+		/* There's no unaligned children, pack into AABB node. */
+		pack_unaligned_inner(e, en, num);
+	}
+	else {
+		/* Create unaligned node with orientation transform for each of the
+		 * children.
+		 */
+		pack_aligned_inner(e, en, num);
+	}
+}
+
+void BVH4::pack_aligned_inner(const BVHStackEntry& e,
+                              const BVHStackEntry *en,
+                              int num)
+{
+	BoundBox bounds[4];
+	int child[4];
+	for(int i = 0; i < num; ++i) {
+		bounds[i] = en[i].node->bounds;
+		child[i] = en[i].encodeIdx();
+	}
+	pack_aligned_node(e.idx,
+	                  bounds,
+	                  child,
+	                  e.node->visibility,
+	                  e.node->time_from,
+	                  e.node->time_to,
+	                  num);
+}
+
+void BVH4::pack_aligned_node(int idx,
+                             const BoundBox *bounds,
+                             const int *child,
+                             const uint visibility,
+                             const float time_from,
+                             const float time_to,
+                             const int num)
+{
+	float4 data[BVH_QNODE_SIZE];
+	memset(data, 0, sizeof(data));
+
+	data[0].x = __uint_as_float(visibility & ~PATH_RAY_NODE_UNALIGNED);
+	data[0].y = time_from;
+	data[0].z = time_to;
+
+	for(int i = 0; i < num; i++) {
+		float3 bb_min = bounds[i].min;
+		float3 bb_max = bounds[i].max;
+
+		data[1][i] = bb_min.x;
+		data[2][i] = bb_max.x;
+		data[3][i] = bb_min.y;
+		data[4][i] = bb_max.y;
+		data[5][i] = bb_min.z;
+		data[6][i] = bb_max.z;
+
+		data[7][i] = __int_as_float(child[i]);
+	}
+
+	for(int i = num; i < 4; i++) {
+		/* We store BB which would never be recorded as intersection
+		 * so kernel might safely assume there are always 4 child nodes.
+		 */
+		data[1][i] = FLT_MAX;
+		data[2][i] = -FLT_MAX;
+
+		data[3][i] = FLT_MAX;
+		data[4][i] = -FLT_MAX;
+
+		data[5][i] = FLT_MAX;
+		data[6][i] = -FLT_MAX;
+
+		data[7][i] = __int_as_float(0);
+	}
+
+	memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_QNODE_SIZE);
+}
+
+void BVH4::pack_unaligned_inner(const BVHStackEntry& e,
+                                const BVHStackEntry *en,
+                                int num)
+{
+	Transform aligned_space[4];
+	BoundBox bounds[4];
+	int child[4];
+	for(int i = 0; i < num; ++i) {
+		aligned_space[i] = en[i].node->get_aligned_space();
+		bounds[i] = en[i].node->bounds;
+		child[i] = en[i].encodeIdx();
+	}
+	pack_unaligned_node(e.idx,
+	                    aligned_space,
+	                    bounds,
+	                    child,
+	                    e.node->visibility,
+	                    e.node->time_from,
+	                    e.node->time_to,
+	                    num);
+}
+
+void BVH4::pack_unaligned_node(int idx,
+                               const Transform *aligned_space,
+                               const BoundBox *bounds,
+                               const int *child,
+                               const uint visibility,
+                               const float time_from,
+                               const float time_to,
+                               const int num)
+{
+	float4 data[BVH_UNALIGNED_QNODE_SIZE];
+	memset(data, 0, sizeof(data));
+
+	data[0].x = __uint_as_float(visibility | PATH_RAY_NODE_UNALIGNED);
+	data[0].y = time_from;
+	data[0].z = time_to;
+
+	for(int i = 0; i < num; i++) {
+		Transform space = BVHUnaligned::compute_node_transform(
+		        bounds[i],
+		        aligned_space[i]);
+
+		data[1][i] = space.x.x;
+		data[2][i] = space.x.y;
+		data[3][i] = space.x.z;
+
+		data[4][i] = space.y.x;
+		data[5][i] = space.y.y;
+		data[6][i] = space.y.z;
+
+		data[7][i] = space.z.x;
+		data[8][i] = space.z.y;
+		data[9][i] = space.z.z;
+
+		data[10][i] = space.x.w;
+		data[11][i] = space.y.w;
+		data[12][i] = space.z.w;
+
+		data[13][i] = __int_as_float(child[i]);
+	}
+
+	for(int i = num; i < 4; i++) {
+		/* We store BB which would never be recorded as intersection
+		 * so kernel might safely assume there are always 4 child nodes.
+		 */
+
+		data[1][i] = 1.0f;
+		data[2][i] = 0.0f;
+		data[3][i] = 0.0f;
+
+		data[4][i] = 0.0f;
+		data[5][i] = 0.0f;
+		data[6][i] = 0.0f;
+
+		data[7][i] = 0.0f;
+		data[8][i] = 0.0f;
+		data[9][i] = 0.0f;
+
+		data[10][i] = -FLT_MAX;
+		data[11][i] = -FLT_MAX;
+		data[12][i] = -FLT_MAX;
+
+		data[13][i] = __int_as_float(0);
+	}
+
+	memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_UNALIGNED_QNODE_SIZE);
+}
+
+/* Quad SIMD Nodes */
+
+void BVH4::pack_nodes(const BVHNode *root)
+{
+	/* Calculate size of the arrays required. */
+	const size_t num_nodes = root->getSubtreeSize(BVH_STAT_QNODE_COUNT);
+	const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
+	assert(num_leaf_nodes <= num_nodes);
+	const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
+	size_t node_size;
+	if(params.use_unaligned_nodes) {
+		const size_t num_unaligned_nodes =
+		        root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_QNODE_COUNT);
+		node_size = (num_unaligned_nodes * BVH_UNALIGNED_QNODE_SIZE) +
+		            (num_inner_nodes - num_unaligned_nodes) * BVH_QNODE_SIZE;
+	}
+	else {
+		node_size = num_inner_nodes * BVH_QNODE_SIZE;
+	}
+	/* Resize arrays. */
+	pack.nodes.clear();
+	pack.leaf_nodes.clear();
+	/* For top level BVH, first merge existing BVH's so we know the offsets. */
+	if(params.top_level) {
+		pack_instances(node_size, num_leaf_nodes*BVH_QNODE_LEAF_SIZE);
+	}
+	else {
+		pack.nodes.resize(node_size);
+		pack.leaf_nodes.resize(num_leaf_nodes*BVH_QNODE_LEAF_SIZE);
+	}
+
+	int nextNodeIdx = 0, nextLeafNodeIdx = 0;
+
+	vector<BVHStackEntry> stack;
+	stack.reserve(BVHParams::MAX_DEPTH*2);
+	if(root->is_leaf()) {
+		stack.push_back(BVHStackEntry(root, nextLeafNodeIdx++));
+	}
+	else {
+		stack.push_back(BVHStackEntry(root, nextNodeIdx));
+		nextNodeIdx += node_qbvh_is_unaligned(root)
+		                       ? BVH_UNALIGNED_QNODE_SIZE
+		                       : BVH_QNODE_SIZE;
+	}
+
+	while(stack.size()) {
+		BVHStackEntry e = stack.back();
+		stack.pop_back();
+
+		if(e.node->is_leaf()) {
+			/* leaf node */
+			const LeafNode *leaf = reinterpret_cast<const LeafNode*>(e.node);
+			pack_leaf(e, leaf);
+		}
+		else {
+			/* Inner node. */
+			const BVHNode *node = e.node;
+			const BVHNode *node0 = node->get_child(0);
+			const BVHNode *node1 = node->get_child(1);
+			/* Collect nodes. */
+			const BVHNode *nodes[4];
+			int numnodes = 0;
+			if(node0->is_leaf()) {
+				nodes[numnodes++] = node0;
+			}
+			else {
+				nodes[numnodes++] = node0->get_child(0);
+				nodes[numnodes++] = node0->get_child(1);
+			}
+			if(node1->is_leaf()) {
+				nodes[numnodes++] = node1;
+			}
+			else {
+				nodes[numnodes++] = node1->get_child(0);
+				nodes[numnodes++] = node1->get_child(1);
+			}
+			/* Push entries on the stack. */
+			for(int i = 0; i < numnodes; ++i) {
+				int idx;
+				if(nodes[i]->is_leaf()) {
+					idx = nextLeafNodeIdx++;
+				}
+				else {
+					idx = nextNodeIdx;
+					nextNodeIdx += node_qbvh_is_unaligned(nodes[i])
+					                       ? BVH_UNALIGNED_QNODE_SIZE
+					                       : BVH_QNODE_SIZE;
+				}
+				stack.push_back(BVHStackEntry(nodes[i], idx));
+			}
+			/* Set node. */
+			pack_inner(e, &stack[stack.size()-numnodes], numnodes);
+		}
+	}
+	assert(node_size == nextNodeIdx);
+	/* Root index to start traversal at, to handle case of single leaf node. */
+	pack.root_index = (root->is_leaf())? -1: 0;
+}
+
+void BVH4::refit_nodes()
+{
+	assert(!params.top_level);
+
+	BoundBox bbox = BoundBox::empty;
+	uint visibility = 0;
+	refit_node(0, (pack.root_index == -1)? true: false, bbox, visibility);
+}
+
+void BVH4::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
+{
+	if(leaf) {
+		int4 *data = &pack.leaf_nodes[idx];
+		int4 c = data[0];
+		/* Refit leaf node. */
+		for(int prim = c.x; prim < c.y; prim++) {
+			int pidx = pack.prim_index[prim];
+			int tob = pack.prim_object[prim];
+			Object *ob = objects[tob];
+
+			if(pidx == -1) {
+				/* Object instance. */
+				bbox.grow(ob->bounds);
+			}
+			else {
+				/* Primitives. */
+				const Mesh *mesh = ob->mesh;
+
+				if(pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) {
+					/* Curves. */
+					int str_offset = (params.top_level)? mesh->curve_offset: 0;
+					Mesh::Curve curve = mesh->get_curve(pidx - str_offset);
+					int k = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[prim]);
+
+					curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bbox);
+
+					visibility |= PATH_RAY_CURVE;
+
+					/* Motion curves. */
+					if(mesh->use_motion_blur) {
+						Attribute *attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+
+						if(attr) {
+							size_t mesh_size = mesh->curve_keys.size();
+							size_t steps = mesh->motion_steps - 1;
+							float3 *key_steps = attr->data_float3();
+
+							for(size_t i = 0; i < steps; i++)
+								curve.bounds_grow(k, key_steps + i*mesh_size, &mesh->curve_radius[0], bbox);
+						}
+					}
+				}
+				else {
+					/* Triangles. */
+					int tri_offset = (params.top_level)? mesh->tri_offset: 0;
+					Mesh::Triangle triangle = mesh->get_triangle(pidx - tri_offset);
+					const float3 *vpos = &mesh->verts[0];
+
+					triangle.bounds_grow(vpos, bbox);
+
+					/* Motion triangles. */
+					if(mesh->use_motion_blur) {
+						Attribute *attr = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+
+						if(attr) {
+							size_t mesh_size = mesh->verts.size();
+							size_t steps = mesh->motion_steps - 1;
+							float3 *vert_steps = attr->data_float3();
+
+							for(size_t i = 0; i < steps; i++)
+								triangle.bounds_grow(vert_steps + i*mesh_size, bbox);
+						}
+					}
+				}
+			}
+
+			visibility |= ob->visibility;
+		}
+
+		/* TODO(sergey): This is actually a copy of pack_leaf(),
+		 * but this chunk of code only knows actual data and has
+		 * no idea about BVHNode.
+		 *
+		 * Would be nice to de-duplicate code, but trying to make
+		 * making code more general ends up in much nastier code
+		 * in my opinion so far.
+		 *
+		 * Same applies to the inner nodes case below.
+		 */
+		float4 leaf_data[BVH_QNODE_LEAF_SIZE];
+		leaf_data[0].x = __int_as_float(c.x);
+		leaf_data[0].y = __int_as_float(c.y);
+		leaf_data[0].z = __uint_as_float(visibility);
+		leaf_data[0].w = __uint_as_float(c.w);
+		memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4)*BVH_QNODE_LEAF_SIZE);
+	}
+	else {
+		int4 *data = &pack.nodes[idx];
+		bool is_unaligned = (data[0].x & PATH_RAY_NODE_UNALIGNED) != 0;
+		int4 c;
+		if(is_unaligned) {
+			c = data[13];
+		}
+		else {
+			c = data[7];
+		}
+		/* Refit inner node, set bbox from children. */
+		BoundBox child_bbox[4] = {BoundBox::empty,
+		                          BoundBox::empty,
+		                          BoundBox::empty,
+		                          BoundBox::empty};
+		uint child_visibility[4] = {0};
+		int num_nodes = 0;
+
+		for(int i = 0; i < 4; ++i) {
+			if(c[i] != 0) {
+				refit_node((c[i] < 0)? -c[i]-1: c[i], (c[i] < 0),
+				           child_bbox[i], child_visibility[i]);
+				++num_nodes;
+				bbox.grow(child_bbox[i]);
+				visibility |= child_visibility[i];
+			}
+		}
+
+		if(is_unaligned) {
+			Transform aligned_space[4] = {transform_identity(),
+			                              transform_identity(),
+			                              transform_identity(),
+			                              transform_identity()};
+			pack_unaligned_node(idx,
+			                    aligned_space,
+			                    child_bbox,
+			                    &c[0],
+			                    visibility,
+			                    0.0f,
+			                    1.0f,
+			                    4);
+		}
+		else {
+			pack_aligned_node(idx,
+			                  child_bbox,
+			                  &c[0],
+			                  visibility,
+			                  0.0f,
+			                  1.0f,
+			                  4);
+		}
+	}
+}
+
+CCL_NAMESPACE_END
--- a/intern/cycles/bvh/bvh4.h
+++ b/intern/cycles/bvh/bvh4.h
@@ -0,0 +1,87 @@
+/*
+ * Adapted from code copyright 2009-2010 NVIDIA Corporation
+ * Modifications Copyright 2011, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BVH4_H__
+#define __BVH4_H__
+
+#include "bvh/bvh.h"
+#include "bvh/bvh_params.h"
+
+#include "util/util_types.h"
+#include "util/util_vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+class BVHNode;
+struct BVHStackEntry;
+class BVHParams;
+class BoundBox;
+class LeafNode;
+class Object;
+class Progress;
+
+#define BVH_QNODE_SIZE           8
+#define BVH_QNODE_LEAF_SIZE      1
+#define BVH_UNALIGNED_QNODE_SIZE 14
+
+/* BVH4
+ *
+ * Quad BVH, with each node having four children, to use with SIMD instructions.
+ */
+class BVH4 : public BVH {
+protected:
+	/* constructor */
+	friend class BVH;
+	BVH4(const BVHParams& params, const vector<Object*>& objects);
+
+	/* pack */
+	void pack_nodes(const BVHNode *root);
+
+	void pack_leaf(const BVHStackEntry& e, const LeafNode *leaf);
+	void pack_inner(const BVHStackEntry& e, const BVHStackEntry *en, int num);
+
+	void pack_aligned_inner(const BVHStackEntry& e,
+	                        const BVHStackEntry *en,
+	                        int num);
+	void pack_aligned_node(int idx,
+	                       const BoundBox *bounds,
+	                       const int *child,
+	                       const uint visibility,
+	                       const float time_from,
+	                       const float time_to,
+	                       const int num);
+
+	void pack_unaligned_inner(const BVHStackEntry& e,
+	                          const BVHStackEntry *en,
+	                          int num);
+	void pack_unaligned_node(int idx,
+	                         const Transform *aligned_space,
+	                         const BoundBox *bounds,
+	                         const int *child,
+	                         const uint visibility,
+	                         const float time_from,
+	                         const float time_to,
+	                         const int num);
+
+	/* refit */
+	void refit_nodes();
+	void refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility);
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __BVH4_H__ */
--- a/intern/cycles/bvh/bvh_binning.cpp
+++ b/intern/cycles/bvh/bvh_binning.cpp
@@ -17,13 +17,13 @@

 //#define __KERNEL_SSE__

+#include "bvh/bvh_binning.h"
+
 #include <stdlib.h>

-#include "bvh_binning.h"
-
-#include "util_algorithm.h"
-#include "util_boundbox.h"
-#include "util_types.h"
+#include "util/util_algorithm.h"
+#include "util/util_boundbox.h"
+#include "util/util_types.h"

 CCL_NAMESPACE_BEGIN

--- a/intern/cycles/bvh/bvh_binning.h
+++ b/intern/cycles/bvh/bvh_binning.h
@@ -18,10 +18,10 @@
 #ifndef __BVH_BINNING_H__
 #define __BVH_BINNING_H__

-#include "bvh_params.h"
-#include "bvh_unaligned.h"
+#include "bvh/bvh_params.h"
+#include "bvh/bvh_unaligned.h"

-#include "util_types.h"
+#include "util/util_types.h"

 CCL_NAMESPACE_BEGIN

@@ -111,5 +111,4 @@ protected:

 CCL_NAMESPACE_END

-#endif
-
+#endif  /* __BVH_BINNING_H__ */
--- a/intern/cycles/bvh/bvh_build.cpp
+++ b/intern/cycles/bvh/bvh_build.cpp
@@ -15,25 +15,27 @@
 * limitations under the License.
 */

-#include "bvh_binning.h"
-#include "bvh_build.h"
-#include "bvh_node.h"
-#include "bvh_params.h"
+#include "bvh/bvh_build.h"
+
+#include "bvh/bvh_binning.h"
+#include "bvh/bvh_node.h"
+#include "bvh/bvh_params.h"
 #include "bvh_split.h"

-#include "mesh.h"
-#include "object.h"
-#include "scene.h"
-#include "curves.h"
+#include "render/mesh.h"
+#include "render/object.h"
+#include "render/scene.h"
+#include "render/curves.h"

-#include "util_debug.h"
-#include "util_foreach.h"
-#include "util_logging.h"
-#include "util_progress.h"
-#include "util_stack_allocator.h"
-#include "util_simd.h"
-#include "util_time.h"
-#include "util_queue.h"
+#include "util/util_algorithm.h"
+#include "util/util_debug.h"
+#include "util/util_foreach.h"
+#include "util/util_logging.h"
+#include "util/util_progress.h"
+#include "util/util_stack_allocator.h"
+#include "util/util_simd.h"
+#include "util/util_time.h"
+#include "util/util_queue.h"

 CCL_NAMESPACE_BEGIN

@@ -92,12 +94,14 @@ BVHBuild::BVHBuild(const vector<Object*>& objects_,
                   array<int>& prim_type_,
                   array<int>& prim_index_,
                   array<int>& prim_object_,
+                   array<float2>& prim_time_,
                   const BVHParams& params_,
                   Progress& progress_)
 : objects(objects_),
   prim_type(prim_type_),
   prim_index(prim_index_),
   prim_object(prim_object_),
+   prim_time(prim_time_),
   params(params_),
   progress(progress_),
   progress_start_time(0.0),
@@ -464,6 +468,9 @@ BVHNode* BVHBuild::run()
 	}
 	spatial_free_index = 0;

+	need_prim_time = params.num_motion_curve_steps > 0 ||
+	                 params.num_motion_triangle_steps > 0;
+
 	/* init progress updates */
 	double build_start_time;
 	build_start_time = progress_start_time = time_dt();
@@ -474,6 +481,12 @@ BVHNode* BVHBuild::run()
 	prim_type.resize(references.size());
 	prim_index.resize(references.size());
 	prim_object.resize(references.size());
+	if(need_prim_time) {
+		prim_time.resize(references.size());
+	}
+	else {
+		prim_time.resize(0);
+	}

 	/* build recursively */
 	BVHNode *rootnode;
@@ -848,11 +861,14 @@ BVHNode *BVHBuild::create_object_leaf_nodes(const BVHReference *ref, int start,
 		prim_type[start] = ref->prim_type();
 		prim_index[start] = ref->prim_index();
 		prim_object[start] = ref->prim_object();
+		if(need_prim_time) {
+			prim_time[start] = make_float2(ref->time_from(), ref->time_to());
+		}

 		uint visibility = objects[ref->prim_object()]->visibility;
 		BVHNode *leaf_node =  new LeafNode(ref->bounds(), visibility, start, start+1);
-		leaf_node->m_time_from = ref->time_from();
-		leaf_node->m_time_to = ref->time_to();
+		leaf_node->time_from = ref->time_from();
+		leaf_node->time_to = ref->time_to();
 		return leaf_node;
 	}
 	else {
@@ -861,12 +877,12 @@ BVHNode *BVHBuild::create_object_leaf_nodes(const BVHReference *ref, int start,
 		BVHNode *leaf1 = create_object_leaf_nodes(ref+mid, start+mid, num-mid);

 		BoundBox bounds = BoundBox::empty;
-		bounds.grow(leaf0->m_bounds);
-		bounds.grow(leaf1->m_bounds);
+		bounds.grow(leaf0->bounds);
+		bounds.grow(leaf1->bounds);

 		BVHNode *inner_node = new InnerNode(bounds, leaf0, leaf1);
-		inner_node->m_time_from = min(leaf0->m_time_from, leaf1->m_time_from);
-		inner_node->m_time_to = max(leaf0->m_time_to, leaf1->m_time_to);
+		inner_node->time_from = min(leaf0->time_from, leaf1->time_from);
+		inner_node->time_to = max(leaf0->time_to, leaf1->time_to);
 		return inner_node;
 	}
 }
@@ -890,11 +906,13 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
 	 *    can not control.
 	 */
 	typedef StackAllocator<256, int> LeafStackAllocator;
+	typedef StackAllocator<256, float2> LeafTimeStackAllocator;
 	typedef StackAllocator<256, BVHReference> LeafReferenceStackAllocator;

 	vector<int, LeafStackAllocator> p_type[PRIMITIVE_NUM_TOTAL];
 	vector<int, LeafStackAllocator> p_index[PRIMITIVE_NUM_TOTAL];
 	vector<int, LeafStackAllocator> p_object[PRIMITIVE_NUM_TOTAL];
+	vector<float2, LeafTimeStackAllocator> p_time[PRIMITIVE_NUM_TOTAL];
 	vector<BVHReference, LeafReferenceStackAllocator> p_ref[PRIMITIVE_NUM_TOTAL];

 	/* TODO(sergey): In theory we should be able to store references. */
@@ -917,6 +935,8 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
 			p_type[type_index].push_back(ref.prim_type());
 			p_index[type_index].push_back(ref.prim_index());
 			p_object[type_index].push_back(ref.prim_object());
+			p_time[type_index].push_back(make_float2(ref.time_from(),
+			                                         ref.time_to()));

 			bounds[type_index].grow(ref.bounds());
 			visibility[type_index] |= objects[ref.prim_object()]->visibility;
@@ -946,9 +966,13 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
 	vector<int, LeafStackAllocator> local_prim_type,
 	                                local_prim_index,
 	                                local_prim_object;
+	vector<float2, LeafTimeStackAllocator> local_prim_time;
 	local_prim_type.resize(num_new_prims);
 	local_prim_index.resize(num_new_prims);
 	local_prim_object.resize(num_new_prims);
+	if(need_prim_time) {
+		local_prim_time.resize(num_new_prims);
+	}
 	for(int i = 0; i < PRIMITIVE_NUM_TOTAL; ++i) {
 		int num = (int)p_type[i].size();
 		if(num != 0) {
@@ -961,6 +985,9 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
 				local_prim_type[index] = p_type[i][j];
 				local_prim_index[index] = p_index[i][j];
 				local_prim_object[index] = p_object[i][j];
+				if(need_prim_time) {
+					local_prim_time[index] = p_time[i][j];
+				}
 				if(params.use_unaligned_nodes && !alignment_found) {
 					alignment_found =
 						unaligned_heuristic.compute_aligned_space(p_ref[i][j],
@@ -978,19 +1005,19 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
 					time_from = min(time_from, ref.time_from());
 					time_to = max(time_to, ref.time_to());
 				}
-				leaf_node->m_time_from = time_from;
-				leaf_node->m_time_to = time_to;
+				leaf_node->time_from = time_from;
+				leaf_node->time_to = time_to;
 			}
 			if(alignment_found) {
 				/* Need to recalculate leaf bounds with new alignment. */
-				leaf_node->m_bounds = BoundBox::empty;
+				leaf_node->bounds = BoundBox::empty;
 				for(int j = 0; j < num; ++j) {
 					const BVHReference &ref = p_ref[i][j];
 					BoundBox ref_bounds =
 					        unaligned_heuristic.compute_aligned_prim_boundbox(
 					                ref,
 					                aligned_space);
-					leaf_node->m_bounds.grow(ref_bounds);
+					leaf_node->bounds.grow(ref_bounds);
 				}
 				/* Set alignment space. */
 				leaf_node->set_aligned_space(aligned_space);
@@ -1027,11 +1054,17 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
 				prim_type.reserve(reserve);
 				prim_index.reserve(reserve);
 				prim_object.reserve(reserve);
+				if(need_prim_time) {
+					prim_time.reserve(reserve);
+				}
 			}

 			prim_type.resize(range_end);
 			prim_index.resize(range_end);
 			prim_object.resize(range_end);
+			if(need_prim_time) {
+				prim_time.resize(range_end);
+			}
 		}
 		spatial_spin_lock.unlock();

@@ -1040,6 +1073,9 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
 			memcpy(&prim_type[start_index], &local_prim_type[0], new_leaf_data_size);
 			memcpy(&prim_index[start_index], &local_prim_index[0], new_leaf_data_size);
 			memcpy(&prim_object[start_index], &local_prim_object[0], new_leaf_data_size);
+			if(need_prim_time) {
+				memcpy(&prim_time[start_index], &local_prim_time[0], sizeof(float2)*num_new_leaf_data);
+			}
 		}
 	}
 	else {
@@ -1052,6 +1088,9 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
 			memcpy(&prim_type[start_index], &local_prim_type[0], new_leaf_data_size);
 			memcpy(&prim_index[start_index], &local_prim_index[0], new_leaf_data_size);
 			memcpy(&prim_object[start_index], &local_prim_object[0], new_leaf_data_size);
+			if(need_prim_time) {
+				memcpy(&prim_time[start_index], &local_prim_time[0], sizeof(float2)*num_new_leaf_data);
+			}
 		}
 	}

@@ -1061,8 +1100,8 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
 	 */
 	for(int i = 0; i < num_leaves; ++i) {
 		LeafNode *leaf = (LeafNode *)leaves[i];
-		leaf->m_lo += start_index;
-		leaf->m_hi += start_index;
+		leaf->lo += start_index;
+		leaf->hi += start_index;
 	}

 	/* Create leaf node for object. */
@@ -1091,17 +1130,17 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
 		return new InnerNode(range.bounds(), leaves[0], leaves[1]);
 	}
 	else if(num_leaves == 3) {
-		BoundBox inner_bounds = merge(leaves[1]->m_bounds, leaves[2]->m_bounds);
+		BoundBox inner_bounds = merge(leaves[1]->bounds, leaves[2]->bounds);
 		BVHNode *inner = new InnerNode(inner_bounds, leaves[1], leaves[2]);
 		return new InnerNode(range.bounds(), leaves[0], inner);
 	} else {
 		/* Should be doing more branches if more primitive types added. */
 		assert(num_leaves <= 5);
-		BoundBox inner_bounds_a = merge(leaves[0]->m_bounds, leaves[1]->m_bounds);
-		BoundBox inner_bounds_b = merge(leaves[2]->m_bounds, leaves[3]->m_bounds);
+		BoundBox inner_bounds_a = merge(leaves[0]->bounds, leaves[1]->bounds);
+		BoundBox inner_bounds_b = merge(leaves[2]->bounds, leaves[3]->bounds);
 		BVHNode *inner_a = new InnerNode(inner_bounds_a, leaves[0], leaves[1]);
 		BVHNode *inner_b = new InnerNode(inner_bounds_b, leaves[2], leaves[3]);
-		BoundBox inner_bounds_c = merge(inner_a->m_bounds, inner_b->m_bounds);
+		BoundBox inner_bounds_c = merge(inner_a->bounds, inner_b->bounds);
 		BVHNode *inner_c = new InnerNode(inner_bounds_c, inner_a, inner_b);
 		if(num_leaves == 5) {
 			return new InnerNode(range.bounds(), inner_c, leaves[4]);
@@ -1136,8 +1175,8 @@ void BVHBuild::rotate(BVHNode *node, int max_depth)
 		rotate(parent->children[c], max_depth-1);

 	/* compute current area of all children */
-	BoundBox bounds0 = parent->children[0]->m_bounds;
-	BoundBox bounds1 = parent->children[1]->m_bounds;
+	BoundBox bounds0 = parent->children[0]->bounds;
+	BoundBox bounds1 = parent->children[1]->bounds;

 	float area0 = bounds0.half_area();
 	float area1 = bounds1.half_area();
@@ -1157,8 +1196,8 @@ void BVHBuild::rotate(BVHNode *node, int max_depth)
 		BoundBox& other = (c == 0)? bounds1: bounds0;

 		/* transpose child bounds */
-		BoundBox target0 = child->children[0]->m_bounds;
-		BoundBox target1 = child->children[1]->m_bounds;
+		BoundBox target0 = child->children[0]->bounds;
+		BoundBox target1 = child->children[1]->bounds;

 		/* compute cost for both possible swaps */
 		float cost0 = merge(other, target1).half_area() - child_area[c];
@@ -1190,7 +1229,7 @@ void BVHBuild::rotate(BVHNode *node, int max_depth)
 	InnerNode *child = (InnerNode*)parent->children[best_child];

 	swap(parent->children[best_other], child->children[best_target]);
-	child->m_bounds = merge(child->children[0]->m_bounds, child->children[1]->m_bounds);
+	child->bounds = merge(child->children[0]->bounds, child->children[1]->bounds);
 }

 CCL_NAMESPACE_END
--- a/intern/cycles/bvh/bvh_build.h
+++ b/intern/cycles/bvh/bvh_build.h
@@ -20,17 +20,17 @@

 #include <float.h>

-#include "bvh.h"
-#include "bvh_binning.h"
-#include "bvh_unaligned.h"
+#include "bvh/bvh_params.h"
+#include "bvh/bvh_unaligned.h"

-#include "util_boundbox.h"
-#include "util_task.h"
-#include "util_vector.h"
+#include "util/util_task.h"
+#include "util/util_vector.h"

 CCL_NAMESPACE_BEGIN

+class Boundbox;
 class BVHBuildTask;
+class BVHNode;
 class BVHSpatialSplitBuildTask;
 class BVHParams;
 class InnerNode;
@@ -48,6 +48,7 @@ public:
 	         array<int>& prim_type,
 	         array<int>& prim_index,
 	         array<int>& prim_object,
+	         array<float2>& prim_time,
 	         const BVHParams& params,
 	         Progress& progress);
 	~BVHBuild();
@@ -112,6 +113,9 @@ protected:
 	array<int>& prim_type;
 	array<int>& prim_index;
 	array<int>& prim_object;
+	array<float2>& prim_time;
+
+	bool need_prim_time;

 	/* Build parameters. */
 	BVHParams params;
--- a/intern/cycles/bvh/bvh_node.cpp
+++ b/intern/cycles/bvh/bvh_node.cpp
@@ -15,12 +15,13 @@
 * limitations under the License.
 */

-#include "bvh.h"
-#include "bvh_build.h"
-#include "bvh_node.h"
+#include "bvh/bvh_node.h"

-#include "util_debug.h"
-#include "util_vector.h"
+#include "bvh/bvh.h"
+#include "bvh/bvh_build.h"
+
+#include "util/util_debug.h"
+#include "util/util_vector.h"

 CCL_NAMESPACE_BEGIN

@@ -62,12 +63,12 @@ int BVHNode::getSubtreeSize(BVH_STAT stat) const
 			}
 			return cnt;
 		case BVH_STAT_ALIGNED_COUNT:
-			if(!is_unaligned()) {
+			if(!is_unaligned) {
 				cnt = 1;
 			}
 			break;
 		case BVH_STAT_UNALIGNED_COUNT:
-			if(is_unaligned()) {
+			if(is_unaligned) {
 				cnt = 1;
 			}
 			break;
@@ -75,7 +76,7 @@ int BVHNode::getSubtreeSize(BVH_STAT stat) const
 			if(!is_leaf()) {
 				bool has_unaligned = false;
 				for(int j = 0; j < num_children(); j++) {
-					has_unaligned |= get_child(j)->is_unaligned();
+					has_unaligned |= get_child(j)->is_unaligned;
 				}
 				cnt += has_unaligned? 0: 1;
 			}
@@ -84,7 +85,7 @@ int BVHNode::getSubtreeSize(BVH_STAT stat) const
 			if(!is_leaf()) {
 				bool has_unaligned = false;
 				for(int j = 0; j < num_children(); j++) {
-					has_unaligned |= get_child(j)->is_unaligned();
+					has_unaligned |= get_child(j)->is_unaligned;
 				}
 				cnt += has_unaligned? 1: 0;
 			}
@@ -95,12 +96,12 @@ int BVHNode::getSubtreeSize(BVH_STAT stat) const
 				for(int i = 0; i < num_children(); i++) {
 					BVHNode *node = get_child(i);
 					if(node->is_leaf()) {
-						has_unaligned |= node->is_unaligned();
+						has_unaligned |= node->is_unaligned;
 					}
 					else {
 						for(int j = 0; j < node->num_children(); j++) {
 							cnt += node->get_child(j)->getSubtreeSize(stat);
-							has_unaligned |= node->get_child(j)->is_unaligned();
+							has_unaligned |= node->get_child(j)->is_unaligned;
 						}
 					}
 				}
@@ -113,12 +114,12 @@ int BVHNode::getSubtreeSize(BVH_STAT stat) const
 				for(int i = 0; i < num_children(); i++) {
 					BVHNode *node = get_child(i);
 					if(node->is_leaf()) {
-						has_unaligned |= node->is_unaligned();
+						has_unaligned |= node->is_unaligned;
 					}
 					else {
 						for(int j = 0; j < node->num_children(); j++) {
 							cnt += node->get_child(j)->getSubtreeSize(stat);
-							has_unaligned |= node->get_child(j)->is_unaligned();
+							has_unaligned |= node->get_child(j)->is_unaligned;
 						}
 					}
 				}
@@ -126,10 +127,10 @@ int BVHNode::getSubtreeSize(BVH_STAT stat) const
 			}
 			return cnt;
 		case BVH_STAT_ALIGNED_LEAF_COUNT:
-			cnt = (is_leaf() && !is_unaligned()) ? 1 : 0;
+			cnt = (is_leaf() && !is_unaligned) ? 1 : 0;
 			break;
 		case BVH_STAT_UNALIGNED_LEAF_COUNT:
-			cnt = (is_leaf() && is_unaligned()) ? 1 : 0;
+			cnt = (is_leaf() && is_unaligned) ? 1 : 0;
 			break;
 		default:
 			assert(0); /* unknown mode */
@@ -157,7 +158,7 @@ float BVHNode::computeSubtreeSAHCost(const BVHParams& p, float probability) cons

 	for(int i = 0; i < num_children(); i++) {
 		BVHNode *child = get_child(i);
-		SAH += child->computeSubtreeSAHCost(p, probability * child->m_bounds.safe_area()/m_bounds.safe_area());
+		SAH += child->computeSubtreeSAHCost(p, probability * child->bounds.safe_area()/bounds.safe_area());
 	}

 	return SAH;
@@ -165,15 +166,15 @@ float BVHNode::computeSubtreeSAHCost(const BVHParams& p, float probability) cons

 uint BVHNode::update_visibility()
 {
-	if(!is_leaf() && m_visibility == 0) {
+	if(!is_leaf() && visibility == 0) {
 		InnerNode *inner = (InnerNode*)this;
 		BVHNode *child0 = inner->children[0];
 		BVHNode *child1 = inner->children[1];

-		m_visibility = child0->update_visibility()|child1->update_visibility();
+		visibility = child0->update_visibility()|child1->update_visibility();
 	}

-	return m_visibility;
+	return visibility;
 }

 void BVHNode::update_time()
@@ -184,8 +185,8 @@ void BVHNode::update_time()
 		BVHNode *child1 = inner->children[1];
 		child0->update_time();
 		child1->update_time();
-		m_time_from = min(child0->m_time_from, child1->m_time_from);
-		m_time_to =  max(child0->m_time_to, child1->m_time_to);
+		time_from = min(child0->time_from, child1->time_from);
+		time_to =  max(child0->time_to, child1->time_to);
 	}
 }

@@ -209,7 +210,7 @@ void LeafNode::print(int depth) const
 	for(int i = 0; i < depth; i++)
 		printf("  ");
 	
-	printf("leaf node %d to %d\n", m_lo, m_hi);
+	printf("leaf node %d to %d\n", lo, hi);
 }

 CCL_NAMESPACE_END
--- a/intern/cycles/bvh/bvh_node.h
+++ b/intern/cycles/bvh/bvh_node.h
@@ -18,9 +18,8 @@
 #ifndef __BVH_NODE_H__
 #define __BVH_NODE_H__

-#include "util_boundbox.h"
-#include "util_debug.h"
-#include "util_types.h"
+#include "util/util_boundbox.h"
+#include "util/util_types.h"

 CCL_NAMESPACE_BEGIN

@@ -46,16 +45,16 @@ class BVHParams;
 class BVHNode
 {
 public:
-	BVHNode() : m_is_unaligned(false),
-	            m_aligned_space(NULL),
-	            m_time_from(0.0f),
-	            m_time_to(1.0f)
+	BVHNode() : is_unaligned(false),
+	            aligned_space(NULL),
+	            time_from(0.0f),
+	            time_to(1.0f)
 	{
 	}

 	virtual ~BVHNode()
 	{
-		delete m_aligned_space;
+		delete aligned_space;
 	}

 	virtual bool is_leaf() const = 0;
@@ -63,30 +62,26 @@ public:
 	virtual BVHNode *get_child(int i) const = 0;
 	virtual int num_triangles() const { return 0; }
 	virtual void print(int depth = 0) const = 0;
-	bool is_unaligned() const { return m_is_unaligned; }

 	inline void set_aligned_space(const Transform& aligned_space)
 	{
-		m_is_unaligned = true;
-		if(m_aligned_space == NULL) {
-			m_aligned_space = new Transform(aligned_space);
+		is_unaligned = true;
+		if(this->aligned_space == NULL) {
+			this->aligned_space = new Transform(aligned_space);
 		}
 		else {
-			*m_aligned_space = aligned_space;
+			*this->aligned_space = aligned_space;
 		}
 	}

 	inline Transform get_aligned_space() const
 	{
-		if(m_aligned_space == NULL) {
+		if(aligned_space == NULL) {
 			return transform_identity();
 		}
-		return *m_aligned_space;
+		return *aligned_space;
 	}

-	BoundBox m_bounds;
-	uint m_visibility;
-
 	// Subtree functions
 	int getSubtreeSize(BVH_STAT stat=BVH_STAT_NODE_COUNT) const;
 	float computeSubtreeSAHCost(const BVHParams& p, float probability = 1.0f) const;
@@ -95,13 +90,18 @@ public:
 	uint update_visibility();
 	void update_time();

-	bool m_is_unaligned;
+	// Properties.
+	BoundBox bounds;
+	uint visibility;

-	// TODO(sergey): Can be stored as 3x3 matrix, but better to have some
-	// utilities and type defines in util_transform first.
-	Transform *m_aligned_space;
+	bool is_unaligned;

-	float m_time_from, m_time_to;
+	/* TODO(sergey): Can be stored as 3x3 matrix, but better to have some
+	 * utilities and type defines in util_transform first.
+	 */
+	Transform *aligned_space;
+
+	float time_from, time_to;
 };

 class InnerNode : public BVHNode
@@ -111,20 +111,20 @@ public:
 	          BVHNode* child0,
 	          BVHNode* child1)
 	{
-		m_bounds = bounds;
+		this->bounds = bounds;
 		children[0] = child0;
 		children[1] = child1;

 		if(child0 && child1)
-			m_visibility = child0->m_visibility|child1->m_visibility;
+			visibility = child0->visibility|child1->visibility;
 		else
-			m_visibility = 0; /* happens on build cancel */
+			visibility = 0; /* happens on build cancel */
 	}

 	explicit InnerNode(const BoundBox& bounds)
 	{
-		m_bounds = bounds;
-		m_visibility = 0;
+		this->bounds = bounds;
+		visibility = 0;
 		children[0] = NULL;
 		children[1] = NULL;
 	}
@@ -140,12 +140,12 @@ public:
 class LeafNode : public BVHNode
 {
 public:
-	LeafNode(const BoundBox& bounds, uint visibility, int lo, int hi) 
+	LeafNode(const BoundBox& bounds, uint visibility, int lo, int hi)
+	: lo(lo),
+	  hi(hi)
 	{
-		m_bounds = bounds;
-		m_visibility = visibility;
-		m_lo = lo;
-		m_hi = hi;
+		this->bounds = bounds;
+		this->visibility = visibility;
 	}

 	LeafNode(const LeafNode& s)
@@ -157,14 +157,13 @@ public:
 	bool is_leaf() const { return true; }
 	int num_children() const { return 0; }
 	BVHNode *get_child(int) const { return NULL; }
-	int num_triangles() const { return m_hi - m_lo; }
+	int num_triangles() const { return hi - lo; }
 	void print(int depth) const;

-	int m_lo;
-	int m_hi;
+	int lo;
+	int hi;
 };

 CCL_NAMESPACE_END

 #endif /* __BVH_NODE_H__ */
-
--- a/intern/cycles/bvh/bvh_params.h
+++ b/intern/cycles/bvh/bvh_params.h
@@ -18,9 +18,9 @@
 #ifndef __BVH_PARAMS_H__
 #define __BVH_PARAMS_H__

-#include "util_boundbox.h"
+#include "util/util_boundbox.h"

-#include "kernel_types.h"
+#include "kernel/kernel_types.h"

 CCL_NAMESPACE_BEGIN

@@ -104,6 +104,7 @@ public:
 		primitive_mask = PRIMITIVE_ALL;

 		num_motion_curve_steps = 0;
+		num_motion_triangle_steps = 0;
 	}

 	/* SAH costs */
@@ -245,4 +246,3 @@ struct BVHSpatialStorage {
 CCL_NAMESPACE_END

 #endif /* __BVH_PARAMS_H__ */
-
--- a/intern/cycles/bvh/bvh_sort.cpp
+++ b/intern/cycles/bvh/bvh_sort.cpp
@@ -15,12 +15,13 @@
 * limitations under the License.
 */

-#include "bvh_build.h"
-#include "bvh_sort.h"
+#include "bvh/bvh_sort.h"

-#include "util_algorithm.h"
-#include "util_debug.h"
-#include "util_task.h"
+#include "bvh/bvh_build.h"
+
+#include "util/util_algorithm.h"
+#include "util/util_debug.h"
+#include "util/util_task.h"

 CCL_NAMESPACE_BEGIN

--- a/intern/cycles/bvh/bvh_sort.h
+++ b/intern/cycles/bvh/bvh_sort.h
@@ -18,8 +18,11 @@
 #ifndef __BVH_SORT_H__
 #define __BVH_SORT_H__

+#include <cstddef>
+
 CCL_NAMESPACE_BEGIN

+class BVHReference;
 class BVHUnaligned;
 struct Transform;

@@ -33,4 +36,3 @@ void bvh_reference_sort(int start,
 CCL_NAMESPACE_END

 #endif /* __BVH_SORT_H__ */
-
--- a/intern/cycles/bvh/bvh_split.cpp
+++ b/intern/cycles/bvh/bvh_split.cpp
@@ -15,14 +15,15 @@
 * limitations under the License.
 */

-#include "bvh_build.h"
-#include "bvh_split.h"
-#include "bvh_sort.h"
+#include "bvh/bvh_split.h"

-#include "mesh.h"
-#include "object.h"
+#include "bvh/bvh_build.h"
+#include "bvh/bvh_sort.h"

-#include "util_algorithm.h"
+#include "render/mesh.h"
+#include "render/object.h"
+
+#include "util/util_algorithm.h"

 CCL_NAMESPACE_BEGIN

--- a/intern/cycles/bvh/bvh_split.h
+++ b/intern/cycles/bvh/bvh_split.h
@@ -18,8 +18,8 @@
 #ifndef __BVH_SPLIT_H__
 #define __BVH_SPLIT_H__

-#include "bvh_build.h"
-#include "bvh_params.h"
+#include "bvh/bvh_build.h"
+#include "bvh/bvh_params.h"

 CCL_NAMESPACE_BEGIN

--- a/intern/cycles/bvh/bvh_unaligned.cpp
+++ b/intern/cycles/bvh/bvh_unaligned.cpp
@@ -14,18 +14,17 @@
 * limitations under the License.
 */

+#include "bvh/bvh_unaligned.h"

-#include "bvh_unaligned.h"
+#include "render/mesh.h"
+#include "render/object.h"

-#include "mesh.h"
-#include "object.h"
-
-#include "bvh_binning.h"
+#include "bvh/bvh_binning.h"
 #include "bvh_params.h"

-#include "util_boundbox.h"
-#include "util_debug.h"
-#include "util_transform.h"
+#include "util/util_boundbox.h"
+#include "util/util_debug.h"
+#include "util/util_transform.h"

 CCL_NAMESPACE_BEGIN

--- a/intern/cycles/bvh/bvh_unaligned.h
+++ b/intern/cycles/bvh/bvh_unaligned.h
@@ -17,7 +17,7 @@
 #ifndef __BVH_UNALIGNED_H__
 #define __BVH_UNALIGNED_H__

-#include "util_vector.h"
+#include "util/util_vector.h"

 CCL_NAMESPACE_BEGIN

@@ -78,4 +78,3 @@ protected:
 CCL_NAMESPACE_END

 #endif /* __BVH_UNALIGNED_H__ */
-
--- a/intern/cycles/cmake/external_libs.cmake
+++ b/intern/cycles/cmake/external_libs.cmake
@@ -135,13 +135,5 @@ if(CYCLES_STANDALONE_REPOSITORY)

 	unset(_lib_DIR)
 else()
-	if(WIN32)
-		set(GLOG_INCLUDE_DIRS ${CMAKE_SOURCE_DIR}/extern/glog/src/windows)
-		set(GFLAGS_INCLUDE_DIRS ${CMAKE_SOURCE_DIR}/extern/gflags/src)
-	else()
-		set(GLOG_INCLUDE_DIRS ${CMAKE_SOURCE_DIR}/extern/glog/src)
-		set(GFLAGS_INCLUDE_DIRS ${CMAKE_SOURCE_DIR}/extern/gflags/src)
-	endif()
-	set(GFLAGS_NAMESPACE "gflags")
 	set(LLVM_LIBRARIES ${LLVM_LIBRARY})
 endif()
--- a/intern/cycles/device/CMakeLists.txt
+++ b/intern/cycles/device/CMakeLists.txt
@@ -1,12 +1,6 @@

 set(INC
-	.
-	../graph
-	../kernel
-	../kernel/svm
-	../kernel/osl
-	../util
-	../render
+	..
 	../../glew-mx
 )

@@ -31,8 +25,10 @@ set(SRC
 	device.cpp
 	device_cpu.cpp
 	device_cuda.cpp
+	device_denoising.cpp
 	device_multi.cpp
 	device_opencl.cpp
+	device_split_kernel.cpp
 	device_task.cpp
 )

@@ -53,9 +49,11 @@ endif()

 set(SRC_HEADERS
 	device.h
+	device_denoising.h
 	device_memory.h
 	device_intern.h
 	device_network.h
+	device_split_kernel.h
 	device_task.h
 )

--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -17,18 +17,18 @@
 #include <stdlib.h>
 #include <string.h>

-#include "device.h"
-#include "device_intern.h"
+#include "device/device.h"
+#include "device/device_intern.h"

-#include "util_debug.h"
-#include "util_foreach.h"
-#include "util_half.h"
-#include "util_math.h"
-#include "util_opengl.h"
-#include "util_time.h"
-#include "util_types.h"
-#include "util_vector.h"
-#include "util_string.h"
+#include "util/util_debug.h"
+#include "util/util_foreach.h"
+#include "util/util_half.h"
+#include "util/util_math.h"
+#include "util/util_opengl.h"
+#include "util/util_time.h"
+#include "util/util_types.h"
+#include "util/util_vector.h"
+#include "util/util_string.h"

 CCL_NAMESPACE_BEGIN

@@ -48,11 +48,11 @@ std::ostream& operator <<(std::ostream &os,
 	os << "Max nodes group: " << requested_features.max_nodes_group << std::endl;
 	/* TODO(sergey): Decode bitflag into list of names. */
 	os << "Nodes features: " << requested_features.nodes_features << std::endl;
-	os << "Use hair: "
+	os << "Use Hair: "
 	   << string_from_bool(requested_features.use_hair) << std::endl;
-	os << "Use object motion: "
+	os << "Use Object Motion: "
 	   << string_from_bool(requested_features.use_object_motion) << std::endl;
-	os << "Use camera motion: "
+	os << "Use Camera Motion: "
 	   << string_from_bool(requested_features.use_camera_motion) << std::endl;
 	os << "Use Baking: "
 	   << string_from_bool(requested_features.use_baking) << std::endl;
@@ -66,6 +66,8 @@ std::ostream& operator <<(std::ostream &os,
 	   << string_from_bool(requested_features.use_patch_evaluation) << std::endl;
 	os << "Use Transparent Shadows: "
 	   << string_from_bool(requested_features.use_transparent) << std::endl;
+	os << "Use Principled BSDF: "
+	   << string_from_bool(requested_features.use_principled) << std::endl;
 	return os;
 }

@@ -80,7 +82,7 @@ Device::~Device()

 void Device::pixels_alloc(device_memory& mem)
 {
-	mem_alloc(mem, MEM_READ_WRITE);
+	mem_alloc("pixels", mem, MEM_READ_WRITE);
 }

 void Device::pixels_copy_from(device_memory& mem, int y, int w, int h)
@@ -400,4 +402,16 @@ void Device::free_memory()
 	devices.free_memory();
 }

+
+device_sub_ptr::device_sub_ptr(Device *device, device_memory& mem, int offset, int size, MemoryType type)
+ : device(device)
+{
+	ptr = device->mem_alloc_sub_ptr(mem, offset, size, type);
+}
+
+device_sub_ptr::~device_sub_ptr()
+{
+	device->mem_free_sub_ptr(ptr);
+}
+
 CCL_NAMESPACE_END
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -19,15 +19,15 @@

 #include <stdlib.h>

-#include "device_memory.h"
-#include "device_task.h"
+#include "device/device_memory.h"
+#include "device/device_task.h"

-#include "util_list.h"
-#include "util_stats.h"
-#include "util_string.h"
-#include "util_thread.h"
-#include "util_types.h"
-#include "util_vector.h"
+#include "util/util_list.h"
+#include "util/util_stats.h"
+#include "util/util_string.h"
+#include "util/util_thread.h"
+#include "util/util_types.h"
+#include "util/util_vector.h"

 CCL_NAMESPACE_BEGIN

@@ -121,6 +121,12 @@ public:
 	/* Use Transparent shadows */
 	bool use_transparent;

+	/* Use various shadow tricks, such as shadow catcher. */
+	bool use_shadow_tricks;
+
+	/* Per-uber shader usage flags. */
+	bool use_principled;
+
 	DeviceRequestedFeatures()
 	{
 		/* TODO(sergey): Find more meaningful defaults. */
@@ -137,6 +143,8 @@ public:
 		use_integrator_branched = false;
 		use_patch_evaluation = false;
 		use_transparent = false;
+		use_shadow_tricks = false;
+		use_principled = false;
 	}

 	bool modified(const DeviceRequestedFeatures& requested_features)
@@ -153,7 +161,9 @@ public:
 		         use_volume == requested_features.use_volume &&
 		         use_integrator_branched == requested_features.use_integrator_branched &&
 		         use_patch_evaluation == requested_features.use_patch_evaluation &&
-		         use_transparent == requested_features.use_transparent);
+		         use_transparent == requested_features.use_transparent &&
+		         use_shadow_tricks == requested_features.use_shadow_tricks &&
+		         use_principled == requested_features.use_principled);
 	}

 	/* Convert the requested features structure to a build options,
@@ -194,9 +204,15 @@ public:
 		if(!use_patch_evaluation) {
 			build_options += " -D__NO_PATCH_EVAL__";
 		}
-		if(!use_transparent) {
+		if(!use_transparent && !use_volume) {
 			build_options += " -D__NO_TRANSPARENT__";
 		}
+		if(!use_shadow_tricks) {
+			build_options += " -D__NO_SHADOW_TRICKS__";
+		}
+		if(!use_principled) {
+			build_options += " -D__NO_PRINCIPLED__";
+		}
 		return build_options;
 	}
 };
@@ -212,6 +228,7 @@ struct DeviceDrawParams {
 };

 class Device {
+	friend class device_sub_ptr;
 protected:
 	Device(DeviceInfo& info_, Stats &stats_, bool background) : background(background), vertex_buffer(0), info(info_), stats(stats_) {}

@@ -221,6 +238,14 @@ protected:
 	/* used for real time display */
 	unsigned int vertex_buffer;

+	virtual device_ptr mem_alloc_sub_ptr(device_memory& /*mem*/, int /*offset*/, int /*size*/, MemoryType /*type*/)
+	{
+		/* Only required for devices that implement denoising. */
+		assert(false);
+		return (device_ptr) 0;
+	}
+	virtual void mem_free_sub_ptr(device_ptr /*ptr*/) {};
+
 public:
 	virtual ~Device();

@@ -228,19 +253,29 @@ public:
 	DeviceInfo info;
 	virtual const string& error_message() { return error_msg; }
 	bool have_error() { return !error_message().empty(); }
+	virtual void set_error(const string& error)
+	{
+		if(!have_error()) {
+			error_msg = error;
+		}
+		fprintf(stderr, "%s\n", error.c_str());
+		fflush(stderr);
+	}
 	virtual bool show_samples() const { return false; }

 	/* statistics */
 	Stats &stats;

 	/* regular memory */
-	virtual void mem_alloc(device_memory& mem, MemoryType type) = 0;
+	virtual void mem_alloc(const char *name, device_memory& mem, MemoryType type) = 0;
 	virtual void mem_copy_to(device_memory& mem) = 0;
 	virtual void mem_copy_from(device_memory& mem,
 		int y, int w, int h, int elem) = 0;
 	virtual void mem_zero(device_memory& mem) = 0;
 	virtual void mem_free(device_memory& mem) = 0;

+	virtual int mem_address_alignment() { return 16; }
+
 	/* constant memory */
 	virtual void const_copy_to(const char *name, void *host, size_t size) = 0;

@@ -288,6 +323,8 @@ public:
 	/* multi device */
 	virtual void map_tile(Device * /*sub_device*/, RenderTile& /*tile*/) {}
 	virtual int device_number(Device * /*sub_device*/) { return 0; }
+	virtual void map_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/) {}
+	virtual void unmap_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/) {}

 	/* static */
 	static Device *create(DeviceInfo& info, Stats &stats, bool background = true);
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
--- a/intern/cycles/device/device_denoising.cpp
+++ b/intern/cycles/device/device_denoising.cpp
@@ -0,0 +1,218 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "device/device_denoising.h"
+
+#include "kernel/filter/filter_defines.h"
+
+CCL_NAMESPACE_BEGIN
+
+void DenoisingTask::init_from_devicetask(const DeviceTask &task)
+{
+	radius = task.denoising_radius;
+	nlm_k_2 = powf(2.0f, lerp(-5.0f, 3.0f, task.denoising_strength));
+	if(task.denoising_relative_pca) {
+		pca_threshold = -powf(10.0f, lerp(-8.0f, 0.0f, task.denoising_feature_strength));
+	}
+	else {
+		pca_threshold = powf(10.0f, lerp(-5.0f, 3.0f, task.denoising_feature_strength));
+	}
+
+	render_buffer.pass_stride = task.pass_stride;
+	render_buffer.denoising_data_offset  = task.pass_denoising_data;
+	render_buffer.denoising_clean_offset = task.pass_denoising_clean;
+
+	/* Expand filter_area by radius pixels and clamp the result to the extent of the neighboring tiles */
+	rect = make_int4(max(tiles->x[0], filter_area.x - radius),
+	                 max(tiles->y[0], filter_area.y - radius),
+	                 min(tiles->x[3], filter_area.x + filter_area.z + radius),
+	                 min(tiles->y[3], filter_area.y + filter_area.w + radius));
+}
+
+void DenoisingTask::tiles_from_rendertiles(RenderTile *rtiles)
+{
+	tiles = (TilesInfo*) tiles_mem.resize(sizeof(TilesInfo)/sizeof(int));
+
+	device_ptr buffers[9];
+	for(int i = 0; i < 9; i++) {
+		buffers[i] = rtiles[i].buffer;
+		tiles->offsets[i] = rtiles[i].offset;
+		tiles->strides[i] = rtiles[i].stride;
+	}
+	tiles->x[0] = rtiles[3].x;
+	tiles->x[1] = rtiles[4].x;
+	tiles->x[2] = rtiles[5].x;
+	tiles->x[3] = rtiles[5].x + rtiles[5].w;
+	tiles->y[0] = rtiles[1].y;
+	tiles->y[1] = rtiles[4].y;
+	tiles->y[2] = rtiles[7].y;
+	tiles->y[3] = rtiles[7].y + rtiles[7].h;
+
+	render_buffer.offset = rtiles[4].offset;
+	render_buffer.stride = rtiles[4].stride;
+	render_buffer.ptr    = rtiles[4].buffer;
+
+	functions.set_tiles(buffers);
+}
+
+bool DenoisingTask::run_denoising()
+{
+	/* Allocate denoising buffer. */
+	buffer.passes = 14;
+	buffer.w = align_up(rect.z - rect.x, 4);
+	buffer.h = rect.w - rect.y;
+	buffer.pass_stride = align_up(buffer.w * buffer.h, divide_up(device->mem_address_alignment(), sizeof(float)));
+	buffer.mem.resize(buffer.pass_stride * buffer.passes);
+	device->mem_alloc("Denoising Pixel Buffer", buffer.mem, MEM_READ_WRITE);
+
+	device_ptr null_ptr = (device_ptr) 0;
+
+	/* Prefilter shadow feature. */
+	{
+		device_sub_ptr unfiltered_a   (device, buffer.mem, 0,                    buffer.pass_stride, MEM_READ_WRITE);
+		device_sub_ptr unfiltered_b   (device, buffer.mem, 1*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+		device_sub_ptr sample_var     (device, buffer.mem, 2*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+		device_sub_ptr sample_var_var (device, buffer.mem, 3*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+		device_sub_ptr buffer_var     (device, buffer.mem, 5*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+		device_sub_ptr filtered_var   (device, buffer.mem, 6*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+		device_sub_ptr nlm_temporary_1(device, buffer.mem, 7*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+		device_sub_ptr nlm_temporary_2(device, buffer.mem, 8*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+		device_sub_ptr nlm_temporary_3(device, buffer.mem, 9*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+
+		nlm_state.temporary_1_ptr = *nlm_temporary_1;
+		nlm_state.temporary_2_ptr = *nlm_temporary_2;
+		nlm_state.temporary_3_ptr = *nlm_temporary_3;
+
+		/* Get the A/B unfiltered passes, the combined sample variance, the estimated variance of the sample variance and the buffer variance. */
+		functions.divide_shadow(*unfiltered_a, *unfiltered_b, *sample_var, *sample_var_var, *buffer_var);
+
+		/* Smooth the (generally pretty noisy) buffer variance using the spatial information from the sample variance. */
+		nlm_state.set_parameters(6, 3, 4.0f, 1.0f);
+		functions.non_local_means(*buffer_var, *sample_var, *sample_var_var, *filtered_var);
+
+		/* Reuse memory, the previous data isn't needed anymore. */
+		device_ptr filtered_a = *buffer_var,
+		           filtered_b = *sample_var;
+		/* Use the smoothed variance to filter the two shadow half images using each other for weight calculation. */
+		nlm_state.set_parameters(5, 3, 1.0f, 0.25f);
+		functions.non_local_means(*unfiltered_a, *unfiltered_b, *filtered_var, filtered_a);
+		functions.non_local_means(*unfiltered_b, *unfiltered_a, *filtered_var, filtered_b);
+
+		device_ptr residual_var = *sample_var_var;
+		/* Estimate the residual variance between the two filtered halves. */
+		functions.combine_halves(filtered_a, filtered_b, null_ptr, residual_var, 2, rect);
+
+		device_ptr final_a = *unfiltered_a,
+		           final_b = *unfiltered_b;
+		/* Use the residual variance for a second filter pass. */
+		nlm_state.set_parameters(4, 2, 1.0f, 0.5f);
+		functions.non_local_means(filtered_a, filtered_b, residual_var, final_a);
+		functions.non_local_means(filtered_b, filtered_a, residual_var, final_b);
+
+		/* Combine the two double-filtered halves to a final shadow feature. */
+		device_sub_ptr shadow_pass(device, buffer.mem, 4*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+		functions.combine_halves(final_a, final_b, *shadow_pass, null_ptr, 0, rect);
+	}
+
+	/* Prefilter general features. */
+	{
+		device_sub_ptr unfiltered     (device, buffer.mem,  8*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+		device_sub_ptr variance       (device, buffer.mem,  9*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+		device_sub_ptr nlm_temporary_1(device, buffer.mem, 10*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+		device_sub_ptr nlm_temporary_2(device, buffer.mem, 11*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+		device_sub_ptr nlm_temporary_3(device, buffer.mem, 12*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+
+		nlm_state.temporary_1_ptr = *nlm_temporary_1;
+		nlm_state.temporary_2_ptr = *nlm_temporary_2;
+		nlm_state.temporary_3_ptr = *nlm_temporary_3;
+
+		int mean_from[]     = { 0, 1, 2, 6,  7,  8, 12 };
+		int variance_from[] = { 3, 4, 5, 9, 10, 11, 13 };
+		int pass_to[]       = { 1, 2, 3, 0,  5,  6,  7 };
+		for(int pass = 0; pass < 7; pass++) {
+			device_sub_ptr feature_pass(device, buffer.mem, pass_to[pass]*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+			/* Get the unfiltered pass and its variance from the RenderBuffers. */
+			functions.get_feature(mean_from[pass], variance_from[pass], *unfiltered, *variance);
+			/* Smooth the pass and store the result in the denoising buffers. */
+			nlm_state.set_parameters(2, 2, 1.0f, 0.25f);
+			functions.non_local_means(*unfiltered, *unfiltered, *variance, *feature_pass);
+		}
+	}
+
+	/* Copy color passes. */
+	{
+		int mean_from[]     = {20, 21, 22};
+		int variance_from[] = {23, 24, 25};
+		int mean_to[]       = { 8,  9, 10};
+		int variance_to[]   = {11, 12, 13};
+		int num_color_passes = 3;
+		for(int pass = 0; pass < num_color_passes; pass++) {
+			device_sub_ptr color_pass    (device, buffer.mem,     mean_to[pass]*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+			device_sub_ptr color_var_pass(device, buffer.mem, variance_to[pass]*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+			functions.get_feature(mean_from[pass], variance_from[pass], *color_pass, *color_var_pass);
+		}
+	}
+
+	storage.w = filter_area.z;
+	storage.h = filter_area.w;
+	storage.transform.resize(storage.w*storage.h*TRANSFORM_SIZE);
+	storage.rank.resize(storage.w*storage.h);
+	device->mem_alloc("Denoising Transform", storage.transform, MEM_READ_WRITE);
+	device->mem_alloc("Denoising Rank", storage.rank, MEM_READ_WRITE);
+
+	functions.construct_transform();
+
+	device_only_memory<float> temporary_1;
+	device_only_memory<float> temporary_2;
+	temporary_1.resize(buffer.w*buffer.h);
+	temporary_2.resize(buffer.w*buffer.h);
+	device->mem_alloc("Denoising NLM temporary 1", temporary_1, MEM_READ_WRITE);
+	device->mem_alloc("Denoising NLM temporary 2", temporary_2, MEM_READ_WRITE);
+	reconstruction_state.temporary_1_ptr = temporary_1.device_pointer;
+	reconstruction_state.temporary_2_ptr = temporary_2.device_pointer;
+
+	storage.XtWX.resize(storage.w*storage.h*XTWX_SIZE);
+	storage.XtWY.resize(storage.w*storage.h*XTWY_SIZE);
+	device->mem_alloc("Denoising XtWX", storage.XtWX, MEM_READ_WRITE);
+	device->mem_alloc("Denoising XtWY", storage.XtWY, MEM_READ_WRITE);
+
+	reconstruction_state.filter_rect = make_int4(filter_area.x-rect.x, filter_area.y-rect.y, storage.w, storage.h);
+	int tile_coordinate_offset = filter_area.y*render_buffer.stride + filter_area.x;
+	reconstruction_state.buffer_params = make_int4(render_buffer.offset + tile_coordinate_offset,
+	                                               render_buffer.stride,
+	                                               render_buffer.pass_stride,
+	                                               render_buffer.denoising_clean_offset);
+	reconstruction_state.source_w = rect.z-rect.x;
+	reconstruction_state.source_h = rect.w-rect.y;
+
+	{
+		device_sub_ptr color_ptr    (device, buffer.mem,  8*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE);
+		device_sub_ptr color_var_ptr(device, buffer.mem, 11*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE);
+		functions.reconstruct(*color_ptr, *color_var_ptr, *color_ptr, *color_var_ptr, render_buffer.ptr);
+	}
+
+	device->mem_free(storage.XtWX);
+	device->mem_free(storage.XtWY);
+	device->mem_free(storage.transform);
+	device->mem_free(storage.rank);
+	device->mem_free(temporary_1);
+	device->mem_free(temporary_2);
+	device->mem_free(buffer.mem);
+	device->mem_free(tiles_mem);
+	return true;
+}
+
+CCL_NAMESPACE_END
--- a/intern/cycles/device/device_denoising.h
+++ b/intern/cycles/device/device_denoising.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DEVICE_DENOISING_H__
+#define __DEVICE_DENOISING_H__
+
+#include "device/device.h"
+
+#include "render/buffers.h"
+
+#include "kernel/filter/filter_defines.h"
+
+CCL_NAMESPACE_BEGIN
+
+class DenoisingTask {
+public:
+	/* Parameters of the denoising algorithm. */
+	int radius;
+	float nlm_k_2;
+	float pca_threshold;
+
+	/* Pointer and parameters of the RenderBuffers. */
+	struct RenderBuffers {
+		int denoising_data_offset;
+		int denoising_clean_offset;
+		int pass_stride;
+		int offset;
+		int stride;
+		device_ptr ptr;
+		int samples;
+	} render_buffer;
+
+	TilesInfo *tiles;
+	device_vector<int> tiles_mem;
+	void tiles_from_rendertiles(RenderTile *rtiles);
+
+	int4 rect;
+	int4 filter_area;
+
+	struct DeviceFunctions {
+		function<bool(device_ptr image_ptr,    /* Contains the values that are smoothed. */
+		              device_ptr guide_ptr,    /* Contains the values that are used to calculate weights. */
+		              device_ptr variance_ptr, /* Contains the variance of the guide image. */
+		              device_ptr out_ptr       /* The filtered output is written into this image. */
+		              )> non_local_means;
+		function<bool(device_ptr color_ptr,
+		              device_ptr color_variance_ptr,
+		              device_ptr guide_ptr,
+		              device_ptr guide_variance_ptr,
+		              device_ptr output_ptr
+		              )> reconstruct;
+		function<bool()> construct_transform;
+
+		function<bool(device_ptr a_ptr,
+		              device_ptr b_ptr,
+		              device_ptr mean_ptr,
+		              device_ptr variance_ptr,
+		              int r,
+		              int4 rect
+		              )> combine_halves;
+		function<bool(device_ptr a_ptr,
+		              device_ptr b_ptr,
+		              device_ptr sample_variance_ptr,
+		              device_ptr sv_variance_ptr,
+		              device_ptr buffer_variance_ptr
+		              )> divide_shadow;
+		function<bool(int mean_offset,
+		              int variance_offset,
+		              device_ptr mean_ptr,
+		              device_ptr variance_ptr
+		              )> get_feature;
+		function<bool(device_ptr*)> set_tiles;
+	} functions;
+
+	/* Stores state of the current Reconstruction operation,
+	 * which is accessed by the device in order to perform the operation. */
+	struct ReconstructionState {
+		device_ptr temporary_1_ptr; /* There two images are used as temporary storage. */
+		device_ptr temporary_2_ptr;
+
+		int4 filter_rect;
+		int4 buffer_params;
+
+		int source_w;
+		int source_h;
+	} reconstruction_state;
+
+	/* Stores state of the current NLM operation,
+	 * which is accessed by the device in order to perform the operation. */
+	struct NLMState {
+		device_ptr temporary_1_ptr; /* There three images are used as temporary storage. */
+		device_ptr temporary_2_ptr;
+		device_ptr temporary_3_ptr;
+
+		int r;      /* Search radius of the filter. */
+		int f;      /* Patch size of the filter. */
+		float a;    /* Variance compensation factor in the MSE estimation. */
+		float k_2;  /* Squared value of the k parameter of the filter. */
+
+		void set_parameters(int r_, int f_, float a_, float k_2_) { r = r_; f = f_; a = a_, k_2 = k_2_; }
+	} nlm_state;
+
+	struct Storage {
+		device_only_memory<float>  transform;
+		device_only_memory<int>    rank;
+		device_only_memory<float>  XtWX;
+		device_only_memory<float3> XtWY;
+		int w;
+		int h;
+	} storage;
+
+	DenoisingTask(Device *device) : device(device) {}
+
+	void init_from_devicetask(const DeviceTask &task);
+
+	bool run_denoising();
+
+	struct DenoiseBuffers {
+		int pass_stride;
+		int passes;
+		int w;
+		int h;
+		device_only_memory<float> mem;
+	} buffer;
+
+protected:
+	Device *device;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __DEVICE_DENOISING_H__ */
--- a/intern/cycles/device/device_memory.h
+++ b/intern/cycles/device/device_memory.h
@@ -28,13 +28,15 @@
 * other devices this is a pointer to device memory, where we will copy memory
 * to and from. */

-#include "util_debug.h"
-#include "util_half.h"
-#include "util_types.h"
-#include "util_vector.h"
+#include "util/util_debug.h"
+#include "util/util_half.h"
+#include "util/util_types.h"
+#include "util/util_vector.h"

 CCL_NAMESPACE_BEGIN

+class Device;
+
 enum MemoryType {
 	MEM_READ_ONLY,
 	MEM_WRITE_ONLY,
@@ -48,7 +50,8 @@ enum DataType {
 	TYPE_UINT,
 	TYPE_INT,
 	TYPE_FLOAT,
-	TYPE_HALF
+	TYPE_HALF,
+	TYPE_UINT64,
 };

 static inline size_t datatype_size(DataType datatype) 
@@ -59,6 +62,7 @@ static inline size_t datatype_size(DataType datatype)
 		case TYPE_UINT: return sizeof(uint);
 		case TYPE_INT: return sizeof(int);
 		case TYPE_HALF: return sizeof(half);
+		case TYPE_UINT64: return sizeof(uint64_t);
 		default: return 0;
 	}
 }
@@ -142,7 +146,7 @@ template<> struct device_type_traits<float2> {

 template<> struct device_type_traits<float3> {
 	static const DataType data_type = TYPE_FLOAT;
-	static const int num_elements = 3;
+	static const int num_elements = 4;
 };

 template<> struct device_type_traits<float4> {
@@ -160,12 +164,20 @@ template<> struct device_type_traits<half4> {
 	static const int num_elements = 4;
 };

+template<> struct device_type_traits<uint64_t> {
+	static const DataType data_type = TYPE_UINT64;
+	static const int num_elements = 1;
+};
+
 /* Device Memory */

 class device_memory
 {
 public:
 	size_t memory_size() { return data_size*data_elements*datatype_size(data_type); }
+	size_t memory_elements_size(int elements) {
+		return elements*data_elements*datatype_size(data_type);
+	}

 	/* data information */
 	DataType data_type;
@@ -180,15 +192,48 @@ public:
 	/* device pointer */
 	device_ptr device_pointer;

-protected:
-	device_memory() {}
+	device_memory()
+	{
+		data_type = device_type_traits<uchar>::data_type;
+		data_elements = device_type_traits<uchar>::num_elements;
+		data_pointer = 0;
+		data_size = 0;
+		device_size = 0;
+		data_width = 0;
+		data_height = 0;
+		data_depth = 0;
+		device_pointer = 0;
+	}
 	virtual ~device_memory() { assert(!device_pointer); }

+	void resize(size_t size)
+	{
+		data_size = size;
+		data_width = size;
+	}
+
+protected:
 	/* no copying */
 	device_memory(const device_memory&);
 	device_memory& operator = (const device_memory&);
 };

+template<typename T>
+class device_only_memory : public device_memory
+{
+public:
+	device_only_memory()
+	{
+		data_type = device_type_traits<T>::data_type;
+		data_elements = max(device_type_traits<T>::num_elements, 1);
+	}
+
+	void resize(size_t num)
+	{
+		device_memory::resize(num*sizeof(T));
+	}
+};
+
 /* Device Vector */

 template<typename T> class device_vector : public device_memory
@@ -198,16 +243,8 @@ public:
 	{
 		data_type = device_type_traits<T>::data_type;
 		data_elements = device_type_traits<T>::num_elements;
-		data_pointer = 0;
-		data_size = 0;
-		device_size = 0;
-		data_width = 0;
-		data_height = 0;
-		data_depth = 0;

 		assert(data_elements > 0);
-
-		device_pointer = 0;
 	}

 	virtual ~device_vector() {}
@@ -266,6 +303,7 @@ public:
 		data_height = 0;
 		data_depth = 0;
 		data_size = 0;
+		device_pointer = 0;
 	}

 	size_t size()
@@ -282,6 +320,27 @@ private:
 	array<T> data;
 };

+/* A device_sub_ptr is a pointer into another existing memory.
+ * Therefore, it is not allocated separately, but just created from the already allocated base memory.
+ * It is freed automatically when it goes out of scope, which should happen before the base memory is freed.
+ * Note that some devices require the offset and size of the sub_ptr to be properly aligned. */
+class device_sub_ptr
+{
+public:
+	device_sub_ptr(Device *device, device_memory& mem, int offset, int size, MemoryType type);
+	~device_sub_ptr();
+	/* No copying. */
+	device_sub_ptr& operator = (const device_sub_ptr&);
+
+	device_ptr operator*() const
+	{
+		return ptr;
+	}
+protected:
+	Device *device;
+	device_ptr ptr;
+};
+
 CCL_NAMESPACE_END

 #endif /* __DEVICE_MEMORY_H__ */
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -17,17 +17,17 @@
 #include <stdlib.h>
 #include <sstream>

-#include "device.h"
-#include "device_intern.h"
-#include "device_network.h"
+#include "device/device.h"
+#include "device/device_intern.h"
+#include "device/device_network.h"

-#include "buffers.h"
+#include "render/buffers.h"

-#include "util_foreach.h"
-#include "util_list.h"
-#include "util_logging.h"
-#include "util_map.h"
-#include "util_time.h"
+#include "util/util_foreach.h"
+#include "util/util_list.h"
+#include "util/util_logging.h"
+#include "util/util_map.h"
+#include "util/util_time.h"

 CCL_NAMESPACE_BEGIN

@@ -106,11 +106,11 @@ public:
 		return true;
 	}

-	void mem_alloc(device_memory& mem, MemoryType type)
+	void mem_alloc(const char *name, device_memory& mem, MemoryType type)
 	{
 		foreach(SubDevice& sub, devices) {
 			mem.device_pointer = 0;
-			sub.device->mem_alloc(mem, type);
+			sub.device->mem_alloc(name, mem, type);
 			sub.ptr_map[unique_ptr] = mem.device_pointer;
 		}

@@ -162,6 +162,7 @@ public:
 	void mem_free(device_memory& mem)
 	{
 		device_ptr tmp = mem.device_pointer;
+		stats.mem_free(mem.device_size);

 		foreach(SubDevice& sub, devices) {
 			mem.device_pointer = sub.ptr_map[tmp];
@@ -170,7 +171,6 @@ public:
 		}

 		mem.device_pointer = 0;
-		stats.mem_free(mem.device_size);
 	}

 	void const_copy_to(const char *name, void *host, size_t size)
@@ -202,6 +202,7 @@ public:
 	void tex_free(device_memory& mem)
 	{
 		device_ptr tmp = mem.device_pointer;
+		stats.mem_free(mem.device_size);

 		foreach(SubDevice& sub, devices) {
 			mem.device_pointer = sub.ptr_map[tmp];
@@ -210,7 +211,6 @@ public:
 		}

 		mem.device_pointer = 0;
-		stats.mem_free(mem.device_size);
 	}

 	void pixels_alloc(device_memory& mem)
@@ -299,6 +299,60 @@ public:
 		return -1;
 	}

+	void map_neighbor_tiles(Device *sub_device, RenderTile *tiles)
+	{
+		for(int i = 0; i < 9; i++) {
+			if(!tiles[i].buffers) {
+				continue;
+			}
+			/* If the tile was rendered on another device, copy its memory to
+			 * to the current device now, for the duration of the denoising task.
+			 * Note that this temporarily modifies the RenderBuffers and calls
+			 * the device, so this function is not thread safe. */
+			if(tiles[i].buffers->device != sub_device) {
+				device_vector<float> &mem = tiles[i].buffers->buffer;
+
+				tiles[i].buffers->copy_from_device();
+				device_ptr original_ptr = mem.device_pointer;
+				mem.device_pointer = 0;
+				sub_device->mem_alloc("Temporary memory for neighboring tile", mem, MEM_READ_WRITE);
+				sub_device->mem_copy_to(mem);
+				tiles[i].buffer = mem.device_pointer;
+				mem.device_pointer = original_ptr;
+			}
+		}
+	}
+
+	void unmap_neighbor_tiles(Device * sub_device, RenderTile * tiles)
+	{
+		for(int i = 0; i < 9; i++) {
+			if(!tiles[i].buffers) {
+				continue;
+			}
+			if(tiles[i].buffers->device != sub_device) {
+				device_vector<float> &mem = tiles[i].buffers->buffer;
+
+				device_ptr original_ptr = mem.device_pointer;
+				mem.device_pointer = tiles[i].buffer;
+
+				/* Copy denoised tile to the host device. */
+				if(i == 4) {
+					tiles[i].buffers->copy_from_device(sub_device);
+				}
+
+				size_t mem_size = mem.device_size;
+				sub_device->mem_free(mem);
+				mem.device_pointer = original_ptr;
+				mem.device_size = mem_size;
+
+				/* Copy denoised tile to the original device. */
+				if(i == 4) {
+					tiles[i].buffers->device->mem_copy_to(mem);
+				}
+			}
+		}
+	}
+
 	int get_split_task_count(DeviceTask& task)
 	{
 		int total_tasks = 0;
--- a/intern/cycles/device/device_network.cpp
+++ b/intern/cycles/device/device_network.cpp
@@ -14,12 +14,12 @@
 * limitations under the License.
 */

-#include "device.h"
-#include "device_intern.h"
-#include "device_network.h"
+#include "device/device.h"
+#include "device/device_intern.h"
+#include "device/device_network.h"

-#include "util_foreach.h"
-#include "util_logging.h"
+#include "util/util_foreach.h"
+#include "util/util_logging.h"

 #if defined(WITH_NETWORK)

@@ -87,8 +87,14 @@ public:
 		snd.write();
 	}

-	void mem_alloc(device_memory& mem, MemoryType type)
+	void mem_alloc(const char *name, device_memory& mem, MemoryType type)
 	{
+		if(name) {
+			VLOG(1) << "Buffer allocate: " << name << ", "
+				    << string_human_readable_number(mem.memory_size()) << " bytes. ("
+				    << string_human_readable_size(mem.memory_size()) << ")";
+		}
+
 		thread_scoped_lock lock(rpc_lock);

 		mem.device_pointer = ++mem_counter;
@@ -481,7 +487,7 @@ protected:
 				mem.data_pointer = 0;

 			/* perform the allocation on the actual device */
-			device->mem_alloc(mem, type);
+			device->mem_alloc(NULL, mem, type);

 			/* store a mapping to/from client_pointer and real device pointer */
 			pointer_mapping_insert(client_pointer, mem.device_pointer);
--- a/intern/cycles/device/device_network.h
+++ b/intern/cycles/device/device_network.h
@@ -33,12 +33,12 @@
 #include <sstream>
 #include <deque>

-#include "buffers.h"
+#include "render/buffers.h"

-#include "util_foreach.h"
-#include "util_list.h"
-#include "util_map.h"
-#include "util_string.h"
+#include "util/util_foreach.h"
+#include "util/util_list.h"
+#include "util/util_map.h"
+#include "util/util_string.h"

 CCL_NAMESPACE_BEGIN

--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -16,12 +16,12 @@

 #ifdef WITH_OPENCL

-#include "opencl/opencl.h"
+#include "device/opencl/opencl.h"

-#include "device_intern.h"
+#include "device/device_intern.h"

-#include "util_foreach.h"
-#include "util_logging.h"
+#include "util/util_foreach.h"
+#include "util/util_logging.h"

 CCL_NAMESPACE_BEGIN

--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -0,0 +1,327 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "device/device_split_kernel.h"
+
+#include "kernel/kernel_types.h"
+#include "kernel/split/kernel_split_data_types.h"
+
+#include "util/util_logging.h"
+#include "util/util_time.h"
+
+CCL_NAMESPACE_BEGIN
+
+static const double alpha = 0.1; /* alpha for rolling average */
+
+DeviceSplitKernel::DeviceSplitKernel(Device *device) : device(device)
+{
+	current_max_closure = -1;
+	first_tile = true;
+
+	avg_time_per_sample = 0.0;
+
+	kernel_path_init = NULL;
+	kernel_scene_intersect = NULL;
+	kernel_lamp_emission = NULL;
+	kernel_do_volume = NULL;
+	kernel_queue_enqueue = NULL;
+	kernel_indirect_background = NULL;
+	kernel_shader_setup = NULL;
+	kernel_shader_sort = NULL;
+	kernel_shader_eval = NULL;
+	kernel_holdout_emission_blurring_pathtermination_ao = NULL;
+	kernel_subsurface_scatter = NULL;
+	kernel_direct_lighting = NULL;
+	kernel_shadow_blocked_ao = NULL;
+	kernel_shadow_blocked_dl = NULL;
+	kernel_next_iteration_setup = NULL;
+	kernel_indirect_subsurface = NULL;
+	kernel_buffer_update = NULL;
+}
+
+DeviceSplitKernel::~DeviceSplitKernel()
+{
+	device->mem_free(split_data);
+	device->mem_free(ray_state);
+	device->mem_free(use_queues_flag);
+	device->mem_free(queue_index);
+	device->mem_free(work_pool_wgs);
+
+	delete kernel_path_init;
+	delete kernel_scene_intersect;
+	delete kernel_lamp_emission;
+	delete kernel_do_volume;
+	delete kernel_queue_enqueue;
+	delete kernel_indirect_background;
+	delete kernel_shader_setup;
+	delete kernel_shader_sort;
+	delete kernel_shader_eval;
+	delete kernel_holdout_emission_blurring_pathtermination_ao;
+	delete kernel_subsurface_scatter;
+	delete kernel_direct_lighting;
+	delete kernel_shadow_blocked_ao;
+	delete kernel_shadow_blocked_dl;
+	delete kernel_next_iteration_setup;
+	delete kernel_indirect_subsurface;
+	delete kernel_buffer_update;
+}
+
+bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_features)
+{
+#define LOAD_KERNEL(name) \
+		kernel_##name = get_split_kernel_function(#name, requested_features); \
+		if(!kernel_##name) { \
+			return false; \
+		}
+
+	LOAD_KERNEL(path_init);
+	LOAD_KERNEL(scene_intersect);
+	LOAD_KERNEL(lamp_emission);
+	LOAD_KERNEL(do_volume);
+	LOAD_KERNEL(queue_enqueue);
+	LOAD_KERNEL(indirect_background);
+	LOAD_KERNEL(shader_setup);
+	LOAD_KERNEL(shader_sort);
+	LOAD_KERNEL(shader_eval);
+	LOAD_KERNEL(holdout_emission_blurring_pathtermination_ao);
+	LOAD_KERNEL(subsurface_scatter);
+	LOAD_KERNEL(direct_lighting);
+	LOAD_KERNEL(shadow_blocked_ao);
+	LOAD_KERNEL(shadow_blocked_dl);
+	LOAD_KERNEL(next_iteration_setup);
+	LOAD_KERNEL(indirect_subsurface);
+	LOAD_KERNEL(buffer_update);
+
+#undef LOAD_KERNEL
+
+	current_max_closure = requested_features.max_closure;
+
+	return true;
+}
+
+size_t DeviceSplitKernel::max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, uint64_t max_buffer_size)
+{
+	uint64_t size_per_element = state_buffer_size(kg, data, 1024) / 1024;
+	VLOG(1) << "Split state element size: "
+	        << string_human_readable_number(size_per_element) << " bytes. ("
+	        << string_human_readable_size(size_per_element) << ").";
+	return max_buffer_size / size_per_element;
+}
+
+bool DeviceSplitKernel::path_trace(DeviceTask *task,
+                                   RenderTile& tile,
+                                   device_memory& kgbuffer,
+                                   device_memory& kernel_data)
+{
+	if(device->have_error()) {
+		return false;
+	}
+
+	/* Get local size */
+	size_t local_size[2];
+	{
+		int2 lsize = split_kernel_local_size();
+		local_size[0] = lsize[0];
+		local_size[1] = lsize[1];
+	}
+
+	/* Number of elements in the global state buffer */
+	int num_global_elements = global_size[0] * global_size[1];
+
+	/* Allocate all required global memory once. */
+	if(first_tile) {
+		first_tile = false;
+
+		/* Set gloabl size */
+		{
+			int2 gsize = split_kernel_global_size(kgbuffer, kernel_data, task);
+
+			/* Make sure that set work size is a multiple of local
+			 * work size dimensions.
+			 */
+			global_size[0] = round_up(gsize[0], local_size[0]);
+			global_size[1] = round_up(gsize[1], local_size[1]);
+		}
+
+		num_global_elements = global_size[0] * global_size[1];
+		assert(num_global_elements % WORK_POOL_SIZE == 0);
+
+		/* Calculate max groups */
+
+		/* Denotes the maximum work groups possible w.r.t. current requested tile size. */
+		unsigned int work_pool_size = (device->info.type == DEVICE_CPU) ? WORK_POOL_SIZE_CPU : WORK_POOL_SIZE_GPU;
+		unsigned int max_work_groups = num_global_elements / work_pool_size + 1;
+
+		/* Allocate work_pool_wgs memory. */
+		work_pool_wgs.resize(max_work_groups);
+		device->mem_alloc("work_pool_wgs", work_pool_wgs, MEM_READ_WRITE);
+
+		queue_index.resize(NUM_QUEUES);
+		device->mem_alloc("queue_index", queue_index, MEM_READ_WRITE);
+
+		use_queues_flag.resize(1);
+		device->mem_alloc("use_queues_flag", use_queues_flag, MEM_READ_WRITE);
+
+		ray_state.resize(num_global_elements);
+		device->mem_alloc("ray_state", ray_state, MEM_READ_WRITE);
+
+		split_data.resize(state_buffer_size(kgbuffer, kernel_data, num_global_elements));
+		device->mem_alloc("split_data", split_data, MEM_READ_WRITE);
+	}
+
+#define ENQUEUE_SPLIT_KERNEL(name, global_size, local_size) \
+		if(device->have_error()) { \
+			return false; \
+		} \
+		if(!kernel_##name->enqueue(KernelDimensions(global_size, local_size), kgbuffer, kernel_data)) { \
+			return false; \
+		}
+
+	tile.sample = tile.start_sample;
+
+	/* for exponential increase between tile updates */
+	int time_multiplier = 1;
+
+	while(tile.sample < tile.start_sample + tile.num_samples) {
+		/* to keep track of how long it takes to run a number of samples */
+		double start_time = time_dt();
+
+		/* initial guess to start rolling average */
+		const int initial_num_samples = 1;
+		/* approx number of samples per second */
+		int samples_per_second = (avg_time_per_sample > 0.0) ?
+		                         int(double(time_multiplier) / avg_time_per_sample) + 1 : initial_num_samples;
+
+		RenderTile subtile = tile;
+		subtile.start_sample = tile.sample;
+		subtile.num_samples = min(samples_per_second, tile.start_sample + tile.num_samples - tile.sample);
+
+		if(device->have_error()) {
+			return false;
+		}
+
+		/* reset state memory here as global size for data_init
+		 * kernel might not be large enough to do in kernel
+		 */
+		device->mem_zero(work_pool_wgs);
+		device->mem_zero(split_data);
+		device->mem_zero(ray_state);
+
+		if(!enqueue_split_kernel_data_init(KernelDimensions(global_size, local_size),
+		                                   subtile,
+		                                   num_global_elements,
+		                                   kgbuffer,
+		                                   kernel_data,
+		                                   split_data,
+		                                   ray_state,
+		                                   queue_index,
+		                                   use_queues_flag,
+		                                   work_pool_wgs))
+		{
+			return false;
+		}
+
+		ENQUEUE_SPLIT_KERNEL(path_init, global_size, local_size);
+
+		bool activeRaysAvailable = true;
+		double cancel_time = DBL_MAX;
+
+		while(activeRaysAvailable) {
+			/* Do path-iteration in host [Enqueue Path-iteration kernels. */
+			for(int PathIter = 0; PathIter < 16; PathIter++) {
+				ENQUEUE_SPLIT_KERNEL(scene_intersect, global_size, local_size);
+				ENQUEUE_SPLIT_KERNEL(lamp_emission, global_size, local_size);
+				ENQUEUE_SPLIT_KERNEL(do_volume, global_size, local_size);
+				ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
+				ENQUEUE_SPLIT_KERNEL(indirect_background, global_size, local_size);
+				ENQUEUE_SPLIT_KERNEL(shader_setup, global_size, local_size);
+				ENQUEUE_SPLIT_KERNEL(shader_sort, global_size, local_size);
+				ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size);
+				ENQUEUE_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao, global_size, local_size);
+				ENQUEUE_SPLIT_KERNEL(subsurface_scatter, global_size, local_size);
+				ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
+				ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size);
+				ENQUEUE_SPLIT_KERNEL(shadow_blocked_ao, global_size, local_size);
+				ENQUEUE_SPLIT_KERNEL(shadow_blocked_dl, global_size, local_size);
+				ENQUEUE_SPLIT_KERNEL(next_iteration_setup, global_size, local_size);
+				ENQUEUE_SPLIT_KERNEL(indirect_subsurface, global_size, local_size);
+				ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
+				ENQUEUE_SPLIT_KERNEL(buffer_update, global_size, local_size);
+
+				if(task->get_cancel() && cancel_time == DBL_MAX) {
+					/* Wait up to twice as many seconds for current samples to finish 
+					 * to avoid artifacts in render result from ending too soon.
+					 */
+					cancel_time = time_dt() + 2.0 * time_multiplier;
+				}
+
+				if(time_dt() > cancel_time) {
+					return true;
+				}
+			}
+
+			/* Decide if we should exit path-iteration in host. */
+			device->mem_copy_from(ray_state, 0, global_size[0] * global_size[1] * sizeof(char), 1, 1);
+
+			activeRaysAvailable = false;
+
+			for(int rayStateIter = 0; rayStateIter < global_size[0] * global_size[1]; ++rayStateIter) {
+				if(!IS_STATE(ray_state.get_data(), rayStateIter, RAY_INACTIVE)) {
+					if(IS_STATE(ray_state.get_data(), rayStateIter, RAY_INVALID)) {
+						/* Something went wrong, abort to avoid looping endlessly. */
+						device->set_error("Split kernel error: invalid ray state");
+						return false;
+					}
+
+					/* Not all rays are RAY_INACTIVE. */
+					activeRaysAvailable = true;
+					break;
+				}
+			}
+
+			if(time_dt() > cancel_time) {
+				return true;
+			}
+		}
+
+		double time_per_sample = ((time_dt()-start_time) / subtile.num_samples);
+
+		if(avg_time_per_sample == 0.0) {
+			/* start rolling average */
+			avg_time_per_sample = time_per_sample;
+		}
+		else {
+			avg_time_per_sample = alpha*time_per_sample + (1.0-alpha)*avg_time_per_sample;
+		}
+
+#undef ENQUEUE_SPLIT_KERNEL
+
+		tile.sample += subtile.num_samples;
+		task->update_progress(&tile, tile.w*tile.h*subtile.num_samples);
+
+		time_multiplier = min(time_multiplier << 1, 10);
+
+		if(task->get_cancel()) {
+			return true;
+		}
+	}
+
+	return true;
+}
+
+CCL_NAMESPACE_END
+
+
--- a/Show More
+++ b/Show More