Compilation error fix after recent cleanup

Please do not do cleanups in minimal configuration, doing that has been proven to only cause issues without solving anything meaningful ;)
Cleanup: GPU arg wrapping
2016-06-09 13:28:00 +02:00 · 2016-06-09 13:28:00 +02:00 · 2016-06-09 13:28:00 +02:00 · 2016-06-09 13:28:00 +02:00 · 2016-06-09 13:28:00 +02:00 · 2016-06-09 13:28:00 +02:00
613 changed files with 25035 additions and 13597 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -172,7 +172,6 @@ if(UNIX AND NOT APPLE)
 	set(_init_CODEC_FFMPEG                   OFF)
 	set(_init_CYCLES_OSL                     OFF)
 	set(_init_IMAGE_OPENEXR                  OFF)
-	set(_init_INPUT_NDOF                     OFF)
 	set(_init_JACK                           OFF)
 	set(_init_OPENCOLLADA                    OFF)
 	set(_init_OPENCOLORIO                    OFF)
@@ -478,9 +477,19 @@ if(WIN32)
 endif()

 # Experimental support of C11 and C++11
-option(WITH_C11 "Build with C11 standard enabled, for development use only!" OFF)
+#
+# We default options to whatever default standard in the current compiler.
+if(CMAKE_COMPILER_IS_GNUCC AND (NOT "${CMAKE_C_COMPILER_VERSION}" VERSION_LESS "6.0") AND (NOT WITH_CXX11))
+	set(_c11_init ON)
+	set(_cxx11_init ON)
+else()
+	set(_c11_init OFF)
+	set(_cxx11_init OFF)
+endif()
+
+option(WITH_C11 "Build with C11 standard enabled, for development use only!" ${_c11_init})
 mark_as_advanced(WITH_C11)
-option(WITH_CXX11 "Build with C++11 standard enabled, for development use only!" OFF)
+option(WITH_CXX11 "Build with C++11 standard enabled, for development use only!" ${_cxx11_init})
 mark_as_advanced(WITH_CXX11)

 # Dependency graph
@@ -519,8 +528,8 @@ if(APPLE)

 	if(NOT CMAKE_OSX_ARCHITECTURES)
 		set(CMAKE_OSX_ARCHITECTURES x86_64 CACHE STRING
-		"Choose the architecture you want to build Blender for: i386, x86_64 or ppc"
-		FORCE)
+			"Choose the architecture you want to build Blender for: i386, x86_64 or ppc"
+			FORCE)
 	endif()

 	if(NOT DEFINED OSX_SYSTEM)
@@ -530,15 +539,20 @@ if(APPLE)
 		        OUTPUT_STRIP_TRAILING_WHITESPACE)
 	endif()

-	# workaround for incorrect cmake xcode lookup for developer previews - XCODE_VERSION does not take xcode-select path into accout
-	# but would always look into /Applications/Xcode.app while dev versions are named Xcode<version>-DP<preview_number>
-	execute_process(COMMAND xcode-select --print-path  OUTPUT_VARIABLE XCODE_CHECK OUTPUT_STRIP_TRAILING_WHITESPACE)
+	# workaround for incorrect cmake xcode lookup for developer previews - XCODE_VERSION does not
+	# take xcode-select path into account but would always look  into /Applications/Xcode.app
+	# while dev versions are named Xcode<version>-DP<preview_number>
+	execute_process(
+	        COMMAND xcode-select --print-path
+	        OUTPUT_VARIABLE XCODE_CHECK OUTPUT_STRIP_TRAILING_WHITESPACE)
 	string(REPLACE "/Contents/Developer" "" XCODE_BUNDLE ${XCODE_CHECK}) # truncate to bundlepath in any case
 	
 	if(${CMAKE_GENERATOR} MATCHES "Xcode")
 	
-		if(${XCODE_VERSION} VERSION_GREATER 4.2) # earlier xcode has no bundled developer dir, no sense in getting xcode path from
-			string(SUBSTRING "${XCODE_CHECK}" 14 6 DP_NAME) # reduce to XCode name without dp extension
+		# earlier xcode has no bundled developer dir, no sense in getting xcode path from
+		if(${XCODE_VERSION} VERSION_GREATER 4.2) 
+			# reduce to XCode name without dp extension
+			string(SUBSTRING "${XCODE_CHECK}" 14 6 DP_NAME) 
 			if(${DP_NAME} MATCHES Xcode5)
 				set(XCODE_VERSION 5)
 			endif()
@@ -565,25 +579,30 @@ if(APPLE)
 	message(STATUS "Detected OS X ${OSX_SYSTEM} and Xcode ${XCODE_VERSION} at ${XCODE_BUNDLE}")

 	if(${XCODE_VERSION} VERSION_LESS 4.3)
-		set(CMAKE_OSX_SYSROOT /Developer/SDKs/MacOSX${OSX_SYSTEM}.sdk CACHE PATH "" FORCE)  # use guaranteed existing sdk
+		# use guaranteed existing sdk
+		set(CMAKE_OSX_SYSROOT /Developer/SDKs/MacOSX${OSX_SYSTEM}.sdk CACHE PATH "" FORCE)
 	else()
-		# note: xcode-select path could be ambigous, cause /Applications/Xcode.app/Contents/Developer or /Applications/Xcode.app would be allowed
+		# note: xcode-select path could be ambigous,
+		# cause /Applications/Xcode.app/Contents/Developer or /Applications/Xcode.app would be allowed
 		# so i use a selfcomposed bundlepath here  
 		set(OSX_SYSROOT_PREFIX ${XCODE_BUNDLE}/Contents/Developer/Platforms/MacOSX.platform)
 		message(STATUS "OSX_SYSROOT_PREFIX: " ${OSX_SYSROOT_PREFIX})
 		set(OSX_DEVELOPER_PREFIX /Developer/SDKs/MacOSX${OSX_SYSTEM}.sdk) # use guaranteed existing sdk
 		set(CMAKE_OSX_SYSROOT ${OSX_SYSROOT_PREFIX}/${OSX_DEVELOPER_PREFIX} CACHE PATH "" FORCE)
 		if(${CMAKE_GENERATOR} MATCHES "Xcode")
-			set(CMAKE_XCODE_ATTRIBUTE_SDKROOT macosx${OSX_SYSTEM}) # to silence sdk not found warning, just overrides CMAKE_OSX_SYSROOT
+			# to silence sdk not found warning, just overrides CMAKE_OSX_SYSROOT
+			set(CMAKE_XCODE_ATTRIBUTE_SDKROOT macosx${OSX_SYSTEM})
 		endif()
 	endif()

 	if(OSX_SYSTEM MATCHES 10.9)
-		set(CMAKE_FIND_ROOT_PATH ${CMAKE_OSX_SYSROOT}) # make sure syslibs and headers are looked up in sdk ( expecially for 10.9 openGL atm. )
+		# make sure syslibs and headers are looked up in sdk ( expecially for 10.9 openGL atm. )
+		set(CMAKE_FIND_ROOT_PATH ${CMAKE_OSX_SYSROOT})
 	endif()

 	if(NOT CMAKE_OSX_DEPLOYMENT_TARGET)
-		set(CMAKE_OSX_DEPLOYMENT_TARGET "10.6" CACHE STRING "" FORCE) # 10.6 is our min. target, if you use higher sdk, weak linking happens
+		# 10.6 is our min. target, if you use higher sdk, weak linking happens
+		set(CMAKE_OSX_DEPLOYMENT_TARGET "10.6" CACHE STRING "" FORCE)
 	endif()
 	
 	if(NOT ${CMAKE_GENERATOR} MATCHES "Xcode")
@@ -592,8 +611,6 @@ if(APPLE)
 		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mmacosx-version-min=${CMAKE_OSX_DEPLOYMENT_TARGET}")
 		add_definitions("-DMACOSX_DEPLOYMENT_TARGET=${CMAKE_OSX_DEPLOYMENT_TARGET}")
 	endif()
-
-	option(WITH_LIBS10.5  "Use 10.5 libs (needed for 64bit builds)" OFF)
 endif()


@@ -601,7 +618,10 @@ endif()
 # Check for conflicting/unsupported configurations

 if(NOT WITH_BLENDER AND NOT WITH_PLAYER AND NOT WITH_CYCLES_STANDALONE)
-	message(FATAL_ERROR "At least one of WITH_BLENDER or WITH_PLAYER or WITH_CYCLES_STANDALONE must be enabled, nothing to do!")
+	message(FATAL_ERROR
+		"At least one of WITH_BLENDER or WITH_PLAYER or "
+		"WITH_CYCLES_STANDALONE must be enabled, nothing to do!"
+	)
 endif()

 if(NOT WITH_GAMEENGINE AND WITH_PLAYER)
@@ -665,7 +685,8 @@ if(NOT WITH_BOOST)
 	set_and_warn(WITH_OPENAL         OFF)  # depends on AUDASPACE
 	set_and_warn(WITH_GAMEENGINE     OFF)  # depends on AUDASPACE
 	set_and_warn(WITH_PLAYER         OFF)  # depends on GAMEENGINE
-elseif(WITH_CYCLES OR WITH_OPENIMAGEIO OR WITH_AUDASPACE OR WITH_INTERNATIONAL OR WITH_OPENVDB OR WITH_OPENCOLORIO OR WITH_MOD_BOOLEAN)
+elseif(WITH_CYCLES OR WITH_OPENIMAGEIO OR WITH_AUDASPACE OR WITH_INTERNATIONAL OR
+       WITH_OPENVDB OR WITH_OPENCOLORIO OR WITH_MOD_BOOLEAN)
 	# Keep enabled
 else()
 	# New dependency graph needs either Boost or C++11 for function bindings.
@@ -762,18 +783,22 @@ endif()

 if(WITH_INTERNATIONAL)
 	if(NOT EXISTS "${CMAKE_SOURCE_DIR}/release/datafiles/locale/languages")
-		message(WARNING "Translation path '${CMAKE_SOURCE_DIR}/release/datafiles/locale' is missing, "
-						"This is a 'git submodule', which are known not to work with bridges to other version "
-						"control systems, disabling 'WITH_INTERNATIONAL'.")
+		message(WARNING
+			"Translation path '${CMAKE_SOURCE_DIR}/release/datafiles/locale' is missing, "
+			"This is a 'git submodule', which are known not to work with bridges to other version "
+			"control systems, disabling 'WITH_INTERNATIONAL'."
+		)
 		set(WITH_INTERNATIONAL OFF)
 	endif()
 endif()

 if(WITH_PYTHON)
 	if(NOT EXISTS "${CMAKE_SOURCE_DIR}/release/scripts/addons/modules")
-		message(WARNING "Addons path '${CMAKE_SOURCE_DIR}/release/scripts/addons' is missing, "
-						"This is a 'git submodule', which are known not to work with bridges to other version "
-						"control systems: * CONTINUING WITHOUT ADDONS *")
+		message(WARNING
+			"Addons path '${CMAKE_SOURCE_DIR}/release/scripts/addons' is missing, "
+			"This is a 'git submodule', which are known not to work with bridges to other version "
+			"control systems: * CONTINUING WITHOUT ADDONS *"
+		)
 	endif()
 endif()

@@ -807,21 +832,6 @@ set(PLATFORM_LINKFLAGS "")
 set(PLATFORM_LINKFLAGS_DEBUG "")


-# For alternate Python locations the commandline can be used to override detected/default cache settings, e.g:
-# On Unix:
-#   cmake ../blender \
-#         -D PYTHON_VERSION=3.5 \
-#         -D PYTHON_INCLUDE_DIR=/opt/py35/include/python3.5d \
-#         -D PYTHON_LIBRARY=/opt/py35/lib/libpython3.5d.so
-#
-# On Macs:
-#   cmake ../blender \
-#         -D PYTHON_INCLUDE_DIR=/System/Library/Frameworks/Python.framework/Versions/3.5/include/python3.5 \
-#         -D PYTHON_LIBPATH=/System/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/config \
-#         -G Xcode
-#
-# When changing any of this remember to update the notes in doc/build_systems/cmake.txt
-
 #-----------------------------------------------------------------------------
 #Platform specifics

@@ -1023,14 +1033,12 @@ if(UNIX AND NOT APPLE)

 	if(WITH_INPUT_NDOF)
 		find_package_wrapper(Spacenav)
-		if(NOT SPACENAV_FOUND)
-			set(WITH_INPUT_NDOF OFF)
-		endif()
-
-		# use generic names within blenders buildsystem.
 		if(SPACENAV_FOUND)
+			# use generic names within blenders buildsystem.
 			set(NDOF_INCLUDE_DIRS ${SPACENAV_INCLUDE_DIRS})
 			set(NDOF_LIBRARIES ${SPACENAV_LIBRARIES})
+		else()
+			set(WITH_INPUT_NDOF OFF)
 		endif()
 	endif()

@@ -1044,7 +1052,11 @@ if(UNIX AND NOT APPLE)
 			if(${OSL_LIBRARY_VERSION_MAJOR} EQUAL "1" AND ${OSL_LIBRARY_VERSION_MINOR} LESS "6")
 				# Note: --whole-archive is needed to force loading of all symbols in liboslexec,
 				# otherwise LLVM is missing the osl_allocate_closure_component function
-				set(OSL_LIBRARIES ${OSL_OSLCOMP_LIBRARY} -Wl,--whole-archive ${OSL_OSLEXEC_LIBRARY} -Wl,--no-whole-archive ${OSL_OSLQUERY_LIBRARY})
+				set(OSL_LIBRARIES
+					${OSL_OSLCOMP_LIBRARY}
+					-Wl,--whole-archive ${OSL_OSLEXEC_LIBRARY}
+					-Wl,--no-whole-archive ${OSL_OSLQUERY_LIBRARY}
+				)
 			endif()
 		else()
 			message(STATUS "OSL not found, disabling it from Cycles")
@@ -1115,7 +1127,13 @@ if(UNIX AND NOT APPLE)
 			set(PUGIXML_LIBRARIES "")
 		endif()

-		set(OPENIMAGEIO_LIBRARIES ${OPENIMAGEIO_LIBRARIES} ${PNG_LIBRARIES} ${JPEG_LIBRARIES} ${ZLIB_LIBRARIES} ${BOOST_LIBRARIES})
+		set(OPENIMAGEIO_LIBRARIES
+			${OPENIMAGEIO_LIBRARIES}
+			${PNG_LIBRARIES}
+			${JPEG_LIBRARIES}
+			${ZLIB_LIBRARIES}
+			${BOOST_LIBRARIES}
+		)
 		set(OPENIMAGEIO_LIBPATH)  # TODO, remove and reference the absolute path everywhere
 		set(OPENIMAGEIO_DEFINITIONS "")

@@ -1156,7 +1174,9 @@ if(UNIX AND NOT APPLE)

 	if(WITH_LLVM OR WITH_SDL_DYNLOAD)
 		# Fix for conflict with Mesa llvmpipe
-		set(PLATFORM_LINKFLAGS "${PLATFORM_LINKFLAGS} -Wl,--version-script='${CMAKE_SOURCE_DIR}/source/creator/blender.map'")
+		set(PLATFORM_LINKFLAGS
+			"${PLATFORM_LINKFLAGS} -Wl,--version-script='${CMAKE_SOURCE_DIR}/source/creator/blender.map'"
+		)
 	endif()

 	if(WITH_OPENSUBDIV)
@@ -1259,7 +1279,10 @@ elseif(WIN32)
 		set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /SAFESEH:NO")
 		set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} /SAFESEH:NO")

-		list(APPEND PLATFORM_LINKLIBS ws2_32 vfw32 winmm kernel32 user32 gdi32 comdlg32 advapi32 shfolder shell32 ole32 oleaut32 uuid psapi Dbghelp)
+		list(APPEND PLATFORM_LINKLIBS
+			ws2_32 vfw32 winmm kernel32 user32 gdi32 comdlg32
+			advapi32 shfolder shell32 ole32 oleaut32 uuid psapi Dbghelp
+		)

 		if(WITH_INPUT_IME)
 			list(APPEND PLATFORM_LINKLIBS imm32)
@@ -1297,7 +1320,8 @@ elseif(WIN32)
 		set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /MT")
 		set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} /MT")

-		set(PLATFORM_LINKFLAGS "/SUBSYSTEM:CONSOLE /STACK:2097152 /INCREMENTAL:NO /NODEFAULTLIB:msvcrt.lib /NODEFAULTLIB:msvcmrt.lib /NODEFAULTLIB:msvcurt.lib /NODEFAULTLIB:msvcrtd.lib")
+		set(PLATFORM_LINKFLAGS "/SUBSYSTEM:CONSOLE /STACK:2097152 /INCREMENTAL:NO ")
+		set(PLATFORM_LINKFLAGS "${PLATFORM_LINKFLAGS} /NODEFAULTLIB:msvcrt.lib /NODEFAULTLIB:msvcmrt.lib /NODEFAULTLIB:msvcurt.lib /NODEFAULTLIB:msvcrtd.lib ")

 		# Ignore meaningless for us linker warnings.
 		set(PLATFORM_LINKFLAGS "${PLATFORM_LINKFLAGS} /ignore:4049 /ignore:4217 /ignore:4221")
@@ -1351,9 +1375,9 @@ elseif(WIN32)
 		# Add each of our libraries to our cmake_prefix_path so find_package() could work
 		file(GLOB children RELATIVE ${LIBDIR} ${LIBDIR}/*)
 		foreach(child ${children})
-		if(IS_DIRECTORY ${LIBDIR}/${child})
-			list(APPEND CMAKE_PREFIX_PATH  ${LIBDIR}/${child})
-		endif()
+			if(IS_DIRECTORY ${LIBDIR}/${child})
+				list(APPEND CMAKE_PREFIX_PATH  ${LIBDIR}/${child})
+			endif()
 		endforeach()

 		set(ZLIB_INCLUDE_DIRS ${LIBDIR}/zlib/include)
@@ -1458,23 +1482,28 @@ elseif(WIN32)
 				set(OPENEXR_INCLUDE_DIRS ${OPENEXR_INCLUDE_DIR} ${OPENEXR}/include/OpenEXR)
 				set(OPENEXR_LIBPATH ${OPENEXR}/lib)
 				set(OPENEXR_LIBRARIES
-					optimized ${OPENEXR_LIBPATH}/Iex-2_2.lib debug ${OPENEXR_LIBPATH}/Iex-2_2_d.lib
-					optimized ${OPENEXR_LIBPATH}/Half.lib debug ${OPENEXR_LIBPATH}/Half_d.lib
-					optimized ${OPENEXR_LIBPATH}/IlmImf-2_2.lib debug ${OPENEXR_LIBPATH}/IlmImf-2_2_d.lib
-					optimized ${OPENEXR_LIBPATH}/Imath-2_2.lib debug ${OPENEXR_LIBPATH}/Imath-2_2_d.lib
-					optimized ${OPENEXR_LIBPATH}/IlmThread-2_2.lib debug ${OPENEXR_LIBPATH}/IlmThread-2_2_d.lib
+					optimized ${OPENEXR_LIBPATH}/Iex-2_2.lib
+					optimized ${OPENEXR_LIBPATH}/Half.lib
+					optimized ${OPENEXR_LIBPATH}/IlmImf-2_2.lib
+					optimized ${OPENEXR_LIBPATH}/Imath-2_2.lib
+					optimized ${OPENEXR_LIBPATH}/IlmThread-2_2.lib
+					debug ${OPENEXR_LIBPATH}/Iex-2_2_d.lib
+					debug ${OPENEXR_LIBPATH}/Half_d.lib
+					debug ${OPENEXR_LIBPATH}/IlmImf-2_2_d.lib
+					debug ${OPENEXR_LIBPATH}/Imath-2_2_d.lib
+					debug ${OPENEXR_LIBPATH}/IlmThread-2_2_d.lib
 				)
 			endif()
 		endif()

 		if(WITH_IMAGE_TIFF)
-		# Try to find tiff first then complain and set static and maybe wrong paths
-		find_package(TIFF)
-		if(NOT TIFF_FOUND)
-			message(WARNING "Using HARDCODED libtiff locations")
-			set(TIFF_LIBRARY ${LIBDIR}/tiff/lib/libtiff.lib)
-			set(TIFF_INCLUDE_DIR ${LIBDIR}/tiff/include)
-		endif()
+			# Try to find tiff first then complain and set static and maybe wrong paths
+			find_package(TIFF)
+			if(NOT TIFF_FOUND)
+				message(WARNING "Using HARDCODED libtiff locations")
+				set(TIFF_LIBRARY ${LIBDIR}/tiff/lib/libtiff.lib)
+				set(TIFF_INCLUDE_DIR ${LIBDIR}/tiff/include)
+			endif()
 		endif()

 		if(WITH_JACK)
@@ -1529,12 +1558,17 @@ elseif(WIN32)
 					set(BOOST_DEBUG_POSTFIX "vc140-mt-sgd-1_60.lib")
 				endif()
 				set(BOOST_LIBRARIES
-					optimized libboost_date_time-${BOOST_POSTFIX} optimized libboost_filesystem-${BOOST_POSTFIX}
+					optimized libboost_date_time-${BOOST_POSTFIX}
+					optimized libboost_filesystem-${BOOST_POSTFIX}
 					optimized libboost_regex-${BOOST_POSTFIX}
-					optimized libboost_system-${BOOST_POSTFIX} optimized libboost_thread-${BOOST_POSTFIX}
-					debug libboost_date_time-${BOOST_DEBUG_POSTFIX} debug libboost_filesystem-${BOOST_DEBUG_POSTFIX}
+					optimized libboost_system-${BOOST_POSTFIX}
+					optimized libboost_thread-${BOOST_POSTFIX}
+					debug libboost_date_time-${BOOST_DEBUG_POSTFIX}
+					debug libboost_filesystem-${BOOST_DEBUG_POSTFIX}
 					debug libboost_regex-${BOOST_DEBUG_POSTFIX}
-					debug libboost_system-${BOOST_DEBUG_POSTFIX} debug libboost_thread-${BOOST_DEBUG_POSTFIX})
+					debug libboost_system-${BOOST_DEBUG_POSTFIX}
+					debug libboost_thread-${BOOST_DEBUG_POSTFIX}
+				)
 				if(WITH_CYCLES_OSL)
 					set(BOOST_LIBRARIES ${BOOST_LIBRARIES}
 						optimized libboost_wave-${BOOST_POSTFIX}
@@ -1663,7 +1697,8 @@ elseif(WIN32)
 			#endif
 			int main(void) { return 0; }
 			" 
-			WITH_MINGW64)
+			WITH_MINGW64
+		)
 		
 		if(NOT DEFINED LIBDIR)
 			if(WITH_MINGW64)
@@ -1684,7 +1719,10 @@ elseif(WIN32)
 			message(FATAL_ERROR "Windows requires pre-compiled libs at: '${LIBDIR}'")
 		endif()

-		list(APPEND PLATFORM_LINKLIBS -lshell32 -lshfolder -lgdi32 -lmsvcrt -lwinmm -lmingw32 -lm -lws2_32 -lz -lstdc++ -lole32 -luuid -lwsock32 -lpsapi -ldbghelp)
+		list(APPEND PLATFORM_LINKLIBS
+			-lshell32 -lshfolder -lgdi32 -lmsvcrt -lwinmm -lmingw32 -lm -lws2_32
+			-lz -lstdc++ -lole32 -luuid -lwsock32 -lpsapi -ldbghelp
+		)

 		if(WITH_INPUT_IME)
 			list(APPEND PLATFORM_LINKLIBS -limm32)
@@ -1753,7 +1791,14 @@ elseif(WIN32)
 				${OPENCOLLADA}/include/opencollada/GeneratedSaxParser
 			)
 			set(OPENCOLLADA_LIBPATH ${OPENCOLLADA}/lib/opencollada)
-			set(OPENCOLLADA_LIBRARIES OpenCOLLADAStreamWriter OpenCOLLADASaxFrameworkLoader OpenCOLLADAFramework OpenCOLLADABaseUtils GeneratedSaxParser UTF MathMLSolver buffer ftoa xml)
+			set(OPENCOLLADA_LIBRARIES
+				OpenCOLLADAStreamWriter
+				OpenCOLLADASaxFrameworkLoader
+				OpenCOLLADAFramework
+				OpenCOLLADABaseUtils
+				GeneratedSaxParser
+				UTF MathMLSolver buffer ftoa xml
+			)
 			set(PCRE_LIBRARIES pcre)
 		endif()

@@ -1826,12 +1871,14 @@ elseif(WIN32)
 			if(WITH_INTERNATIONAL)
 				set(BOOST_LIBRARIES ${BOOST_LIBRARIES}
 					optimized boost_locale-${BOOST_POSTFIX}
-					debug boost_locale-${BOOST_DEBUG_POSTFIX}) 
+					debug boost_locale-${BOOST_DEBUG_POSTFIX}
+				)
 			endif()
 			if(WITH_CYCLES_OSL)
 				set(BOOST_LIBRARIES ${BOOST_LIBRARIES}
 					optimized boost_wave-${BOOST_POSTFIX}
-					debug boost_wave-${BOOST_DEBUG_POSTFIX}) 
+					debug boost_wave-${BOOST_DEBUG_POSTFIX}
+				)
 			endif()
 			set(BOOST_LIBPATH ${BOOST}/lib)
 			set(BOOST_DEFINITIONS "-DBOOST_ALL_NO_LIB -DBOOST_THREAD_USE_LIB ")
@@ -1912,7 +1959,7 @@ elseif(WIN32)
 		set(OPENAL ${LIBDIR}/openal)
 		set(OPENALDIR ${LIBDIR}/openal)
 		set(OPENAL_INCLUDE_DIR ${OPENAL}/include)
-		if(MSVC12)
+		if(MSVC)
 			set(OPENAL_LIBRARY openal32)
 		else()
 			set(OPENAL_LIBRARY wrap_oal)
@@ -1940,7 +1987,14 @@ elseif(WIN32)
 		find_library(OSL_LIB_EXEC_DEBUG NAMES oslexec_d PATHS ${CYCLES_OSL}/lib)
 		find_library(OSL_LIB_COMP_DEBUG NAMES oslcomp_d PATHS ${CYCLES_OSL}/lib)
 		find_library(OSL_LIB_QUERY_DEBUG NAMES oslquery_d PATHS ${CYCLES_OSL}/lib)
-		list(APPEND OSL_LIBRARIES optimized ${OSL_LIB_COMP} optimized ${OSL_LIB_EXEC} optimized ${OSL_LIB_QUERY} debug ${OSL_LIB_EXEC_DEBUG} debug ${OSL_LIB_COMP_DEBUG} debug ${OSL_LIB_QUERY_DEBUG})
+		list(APPEND OSL_LIBRARIES
+			optimized ${OSL_LIB_COMP}
+			optimized ${OSL_LIB_EXEC}
+			optimized ${OSL_LIB_QUERY}
+			debug ${OSL_LIB_EXEC_DEBUG}
+			debug ${OSL_LIB_COMP_DEBUG}
+			debug ${OSL_LIB_QUERY_DEBUG}
+		)
 		find_path(OSL_INCLUDE_DIR OSL/oslclosure.h PATHS ${CYCLES_OSL}/include)
 		find_program(OSL_COMPILER NAMES oslc PATHS ${CYCLES_OSL}/bin)
 	
@@ -1954,20 +2008,8 @@ elseif(WIN32)

 elseif(APPLE)

-	if(${CMAKE_OSX_DEPLOYMENT_TARGET} STREQUAL "10.5" OR ${CMAKE_OSX_DEPLOYMENT_TARGET} STRGREATER "10.5")
-		set(WITH_LIBS10.5 ON CACHE BOOL "Use 10.5 libs" FORCE) # valid also for 10.6/7/8/9
-	endif()
-
 	if(NOT DEFINED LIBDIR)
-		if(WITH_LIBS10.5)
-			set(LIBDIR ${CMAKE_SOURCE_DIR}/../lib/darwin-9.x.universal)
-		else()
-			if(CMAKE_OSX_ARCHITECTURES MATCHES i386)
-				set(LIBDIR ${CMAKE_SOURCE_DIR}/../lib/darwin-8.x.i386)
-			else()
-				set(LIBDIR ${CMAKE_SOURCE_DIR}/../lib/darwin-8.0.0-powerpc)
-			endif()
-		endif()
+		set(LIBDIR ${CMAKE_SOURCE_DIR}/../lib/darwin-9.x.universal)
 	else()
 		message(STATUS "Using pre-compiled LIBDIR: ${LIBDIR}")
 	endif()
@@ -2025,11 +2067,15 @@ elseif(APPLE)
 			# set(PYTHON_LINKFLAGS "-u _PyMac_Error")  # won't  build with this enabled
 		else()
 			# module must be compiled against Python framework
-			set(PYTHON_INCLUDE_DIR "/Library/Frameworks/Python.framework/Versions/${PYTHON_VERSION}/include/python${PYTHON_VERSION}m")
-			set(PYTHON_EXECUTABLE "/Library/Frameworks/Python.framework/Versions/${PYTHON_VERSION}/bin/python${PYTHON_VERSION}m")
+			set(_py_framework "/Library/Frameworks/Python.framework/Versions/${PYTHON_VERSION}")
+
+			set(PYTHON_INCLUDE_DIR "${_py_framework}/include/python${PYTHON_VERSION}m")
+			set(PYTHON_EXECUTABLE "${_py_framework}/bin/python${PYTHON_VERSION}m")
+			set(PYTHON_LIBPATH "${_py_framework}/lib/python${PYTHON_VERSION}/config-${PYTHON_VERSION}m")
 			#set(PYTHON_LIBRARY python${PYTHON_VERSION})
-			set(PYTHON_LIBPATH "/Library/Frameworks/Python.framework/Versions/${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/config-${PYTHON_VERSION}m")
 			#set(PYTHON_LINKFLAGS "-u _PyMac_Error -framework Python")  # won't  build with this enabled
+
+			unset(_py_framework)
 		endif()
 		
 		# uncached vars
@@ -2071,7 +2117,10 @@ elseif(APPLE)
 	if(WITH_CODEC_FFMPEG)
 		set(FFMPEG ${LIBDIR}/ffmpeg)
 		set(FFMPEG_INCLUDE_DIRS ${FFMPEG}/include)
-		set(FFMPEG_LIBRARIES avcodec avdevice avformat avutil mp3lame swscale x264 xvidcore theora theoradec theoraenc vorbis vorbisenc vorbisfile ogg)
+		set(FFMPEG_LIBRARIES
+			avcodec avdevice avformat avutil
+			mp3lame swscale x264 xvidcore theora theoradec theoraenc vorbis vorbisenc vorbisfile ogg
+		)
 		set(FFMPEG_LIBPATH ${FFMPEG}/lib)
 	endif()

@@ -2082,13 +2131,13 @@ elseif(APPLE)
 	)
 	mark_as_advanced(SYSTEMSTUBS_LIBRARY)
 	if(SYSTEMSTUBS_LIBRARY)
-		list(APPEND PLATFORM_LINKLIBS stdc++ SystemStubs)
-	else()
-		list(APPEND PLATFORM_LINKLIBS stdc++)
+		list(APPEND PLATFORM_LINKLIBS SystemStubs)
 	endif()

 	set(PLATFORM_CFLAGS "-pipe -funsigned-char")
-	set(PLATFORM_LINKFLAGS "-fexceptions -framework CoreServices -framework Foundation -framework IOKit -framework AppKit -framework Cocoa -framework Carbon -framework AudioUnit -framework AudioToolbox -framework CoreAudio")
+	set(PLATFORM_LINKFLAGS
+		"-fexceptions -framework CoreServices -framework Foundation -framework IOKit -framework AppKit -framework Cocoa -framework Carbon -framework AudioUnit -framework AudioToolbox -framework CoreAudio"
+	)
 	if(WITH_CODEC_QUICKTIME)
 		set(PLATFORM_LINKFLAGS "${PLATFORM_LINKFLAGS} -framework QTKit")
 		if(CMAKE_OSX_ARCHITECTURES MATCHES i386)
@@ -2097,22 +2146,10 @@ elseif(APPLE)
 		endif()
 	endif()

-	# XXX - SOME MAC DEV PLEASE TEST WITH THE SDK INSTALLED!
-	# ALSO SHOULD BE MOVED INTO OWN MODULE WHEN FUNCTIONAL
-	if(WITH_INPUT_NDOF)
-		# This thread it *should* work and check the framework - campbell
-		# http://www.cmake.org/pipermail/cmake/2005-December/007740.html
-		find_library(3DCONNEXION_CLIENT_FRAMEWORK
-			NAMES 3DconnexionClient
-		)
-		if(NOT 3DCONNEXION_CLIENT_FRAMEWORK)
-			set(WITH_INPUT_NDOF OFF)
-		endif()
-
-		if(WITH_INPUT_NDOF)
-			set(PLATFORM_LINKFLAGS "${PLATFORM_LINKFLAGS} -F/Library/Frameworks -weak_framework 3DconnexionClient")
-			set(NDOF_INCLUDE_DIRS /Library/Frameworks/3DconnexionClient.framework/Headers )
-		endif()
+	if(WITH_CXX11)
+		list(APPEND PLATFORM_LINKLIBS c++)
+	else()
+		list(APPEND PLATFORM_LINKLIBS stdc++)
 	endif()

 	if(WITH_JACK)
@@ -2120,7 +2157,8 @@ elseif(APPLE)
 	endif()
 	
 	if(WITH_PYTHON_MODULE OR WITH_PYTHON_FRAMEWORK)
-		set(PLATFORM_LINKFLAGS "${PLATFORM_LINKFLAGS} /Library/Frameworks/Python.framework/Versions/${PYTHON_VERSION}/Python")# force cmake to link right framework
+		# force cmake to link right framework
+		set(PLATFORM_LINKFLAGS "${PLATFORM_LINKFLAGS} /Library/Frameworks/Python.framework/Versions/${PYTHON_VERSION}/Python")
 	endif()
 	
 	if(WITH_OPENCOLLADA)
@@ -2135,10 +2173,18 @@ elseif(APPLE)
 		)

 		set(OPENCOLLADA_LIBPATH ${OPENCOLLADA}/lib)
-		set(OPENCOLLADA_LIBRARIES "OpenCOLLADASaxFrameworkLoader -lOpenCOLLADAFramework -lOpenCOLLADABaseUtils -lOpenCOLLADAStreamWriter -lMathMLSolver -lGeneratedSaxParser -lxml2 -lbuffer -lftoa")
+		set(OPENCOLLADA_LIBRARIES
+			OpenCOLLADASaxFrameworkLoader
+			-lOpenCOLLADAFramework
+			-lOpenCOLLADABaseUtils
+			-lOpenCOLLADAStreamWriter
+			-lMathMLSolver
+			-lGeneratedSaxParser
+			-lxml2 -lbuffer -lftoa
+		)
 		# Use UTF functions from collada if LLVM is not enabled
 		if(NOT WITH_LLVM)
-			set(OPENCOLLADA_LIBRARIES "${OPENCOLLADA_LIBRARIES} -lUTF")
+			list(APPEND OPENCOLLADA_LIBRARIES -lUTF)
 		endif()
 		# pcre is bundled with openCollada
 		#set(PCRE ${LIBDIR}/pcre)
@@ -2173,14 +2219,17 @@ elseif(APPLE)
 		set(TIFF_LIBPATH ${TIFF}/lib)
 	endif()

-	if(WITH_INPUT_NDOF)
-		# linker needs "-weak_framework 3DconnexionClient"
-	endif()
-
 	if(WITH_BOOST)
 		set(BOOST ${LIBDIR}/boost)
 		set(BOOST_INCLUDE_DIR ${BOOST}/include)
-		set(BOOST_LIBRARIES boost_date_time-mt boost_filesystem-mt boost_regex-mt boost_system-mt boost_thread-mt boost_wave-mt)
+		set(BOOST_LIBRARIES
+			boost_date_time-mt
+			boost_filesystem-mt
+			boost_regex-mt
+			boost_system-mt
+			boost_thread-mt
+			boost_wave-mt
+		)
 		if(WITH_INTERNATIONAL)
 			list(APPEND BOOST_LIBRARIES boost_locale-mt)
 		endif()
@@ -2201,8 +2250,22 @@ elseif(APPLE)
 	if(WITH_OPENIMAGEIO)
 		set(OPENIMAGEIO ${LIBDIR}/openimageio)
 		set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO}/include)
-		set(OPENIMAGEIO_LIBRARIES ${OPENIMAGEIO}/lib/libOpenImageIO.a ${PNG_LIBRARIES} ${JPEG_LIBRARIES} ${TIFF_LIBRARY} ${OPENEXR_LIBRARIES} ${ZLIB_LIBRARIES})
-		set(OPENIMAGEIO_LIBPATH ${OPENIMAGEIO}/lib ${JPEG_LIBPATH} ${PNG_LIBPATH} ${TIFF_LIBPATH} ${OPENEXR_LIBPATH} ${ZLIB_LIBPATH})
+		set(OPENIMAGEIO_LIBRARIES
+			${OPENIMAGEIO}/lib/libOpenImageIO.a
+			${PNG_LIBRARIES}
+			${JPEG_LIBRARIES}
+			${TIFF_LIBRARY}
+			${OPENEXR_LIBRARIES}
+			${ZLIB_LIBRARIES}
+		)
+		set(OPENIMAGEIO_LIBPATH
+			${OPENIMAGEIO}/lib
+			${JPEG_LIBPATH}
+			${PNG_LIBPATH}
+			${TIFF_LIBPATH}
+			${OPENEXR_LIBPATH}
+			${ZLIB_LIBPATH}
+		)
 		set(OPENIMAGEIO_DEFINITIONS "-DOIIO_STATIC_BUILD")
 		set(OPENIMAGEIO_IDIFF "${LIBDIR}/openimageio/bin/idiff")
 	endif()
@@ -2295,8 +2358,14 @@ elseif(APPLE)
 				include_directories(${LIBDIR}/openmp/include)
 				link_directories(${LIBDIR}/openmp/lib)
 				# This is a workaround for our helperbinaries ( datatoc, masgfmt, ... ),
-				# They are linked also to omp lib, so we need it in builddir for runtime exexcution, TODO: remove all unneeded dependencies from these
-				execute_process(COMMAND ditto -arch ${CMAKE_OSX_ARCHITECTURES} ${LIBDIR}/openmp/lib/libiomp5.dylib ${CMAKE_BINARY_DIR}/Resources/lib/libiomp5.dylib) # for intermediate binaries, in respect to lib ID
+				# They are linked also to omp lib, so we need it in builddir for runtime exexcution,
+				# TODO: remove all unneeded dependencies from these
+
+				# for intermediate binaries, in respect to lib ID
+				execute_process(
+				        COMMAND ditto -arch ${CMAKE_OSX_ARCHITECTURES}
+				        ${LIBDIR}/openmp/lib/libiomp5.dylib
+				        ${CMAKE_BINARY_DIR}/Resources/lib/libiomp5.dylib)
 			endif()
 		endif()
 	endif()
@@ -2322,7 +2391,14 @@ elseif(APPLE)
 		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ftemplate-depth=1024")
 	endif()
 	# Get rid of eventually clashes, we export some symbols explicite as local
-	set(PLATFORM_LINKFLAGS "${PLATFORM_LINKFLAGS} -Xlinker -unexported_symbols_list -Xlinker ${CMAKE_SOURCE_DIR}/source/creator/osx_locals.map")
+	set(PLATFORM_LINKFLAGS
+		"${PLATFORM_LINKFLAGS} -Xlinker -unexported_symbols_list -Xlinker ${CMAKE_SOURCE_DIR}/source/creator/osx_locals.map"
+	)
+
+	if(WITH_CXX11)
+		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
+		set(PLATFORM_LINKFLAGS "${PLATFORM_LINKFLAGS} -stdlib=libc++")
+	endif()

 	# Suppress ranlib "has no symbols" warnings (workaround for T48250)
 	set(CMAKE_C_ARCHIVE_CREATE   "<CMAKE_AR> Scr <TARGET> <LINK_FLAGS> <OBJECTS>")
@@ -2340,22 +2416,34 @@ endif()

 if(WITH_CYCLES)
 	if(NOT WITH_OPENIMAGEIO)
-		message(FATAL_ERROR "Cycles requires WITH_OPENIMAGEIO, the library may not have been found. Configure OIIO or disable WITH_CYCLES")
+		message(FATAL_ERROR
+			"Cycles requires WITH_OPENIMAGEIO, the library may not have been found. "
+			"Configure OIIO or disable WITH_CYCLES"
+		)
 	endif()
 	if(NOT WITH_BOOST)
-		message(FATAL_ERROR "Cycles requires WITH_BOOST, the library may not have been found. Configure BOOST or disable WITH_CYCLES")
+		message(FATAL_ERROR
+			"Cycles requires WITH_BOOST, the library may not have been found. "
+			"Configure BOOST or disable WITH_CYCLES"
+		)
 	endif()

 	if(WITH_CYCLES_OSL)
 		if(NOT WITH_LLVM)
-			message(FATAL_ERROR "Cycles OSL requires WITH_LLVM, the library may not have been found. Configure LLVM or disable WITH_CYCLES_OSL")
+			message(FATAL_ERROR
+				"Cycles OSL requires WITH_LLVM, the library may not have been found. "
+				"Configure LLVM or disable WITH_CYCLES_OSL"
+			)
 		endif()
 	endif()
 endif()

 if(WITH_INTERNATIONAL)
 	if(NOT WITH_BOOST)
-		message(FATAL_ERROR "Internationalization requires WITH_BOOST, the library may not have been found. Configure BOOST or disable WITH_INTERNATIONAL")
+		message(FATAL_ERROR
+			"Internationalization requires WITH_BOOST, the library may not have been found. "
+			"Configure BOOST or disable WITH_INTERNATIONAL"
+		)
 	endif()
 endif()

@@ -2441,7 +2529,10 @@ if(WITH_GL_PROFILE_COMPAT OR WITH_GL_PROFILE_CORE)
 elseif(WITH_GL_PROFILE_ES20)
 	if(WITH_SYSTEM_GLES)
 		if(NOT OPENGLES_LIBRARY)
-			message(FATAL_ERROR "Unable to find OpenGL ES libraries.  Install them or disable WITH_SYSTEM_GLES.")
+			message(FATAL_ERROR
+				"Unable to find OpenGL ES libraries. "
+				"Install them or disable WITH_SYSTEM_GLES."
+			)
 		endif()

 		list(APPEND BLENDER_GL_LIBRARIES OPENGLES_LIBRARY)
@@ -2453,7 +2544,10 @@ elseif(WITH_GL_PROFILE_ES20)
 		list(APPEND BLENDER_GL_LIBRARIES "${OPENGLES_LIBRARY}")

 		if(NOT OPENGLES_LIBRARY)
-			message(FATAL_ERROR "To compile WITH_GL_EGL you need to set OPENGLES_LIBRARY to the file path of an OpenGL ES 2.0 library.")
+			message(FATAL_ERROR
+				"To compile WITH_GL_EGL you need to set OPENGLES_LIBRARY "
+				"to the file path of an OpenGL ES 2.0 library."
+			)
 		endif()

 	endif()
@@ -2465,7 +2559,10 @@ elseif(WITH_GL_PROFILE_ES20)
 		mark_as_advanced(OPENGLES_DLL)

 		if(NOT OPENGLES_DLL)
-			message(FATAL_ERROR "To compile WITH_GL_PROFILE_ES20 you need to set OPENGLES_DLL to the file path of an OpenGL ES 2.0 runtime dynamic link library (DLL).")
+			message(FATAL_ERROR
+				"To compile WITH_GL_PROFILE_ES20 you need to set OPENGLES_DLL to the file "
+				"path of an OpenGL ES 2.0 runtime dynamic link library (DLL)."
+			)
 		endif()

 		if(WITH_GL_ANGLE)
@@ -2479,7 +2576,10 @@ elseif(WITH_GL_PROFILE_ES20)
 			mark_as_advanced(D3DCOMPILER_DLL)

 			if(D3DCOMPILER_DLL STREQUAL "")
-				message(FATAL_ERROR "To compile WITH_GL_ANGLE you need to set D3DCOMPILER_DLL to the file path of a copy of the DirectX redistributable DLL file: D3DCompiler_46.dll")
+				message(FATAL_ERROR
+					"To compile WITH_GL_ANGLE you need to set D3DCOMPILER_DLL to the file "
+					"path of a copy of the DirectX redistributable DLL file: D3DCompiler_46.dll"
+				)
 			endif()

 		endif()
@@ -2493,7 +2593,10 @@ if(WITH_GL_EGL)

 	if(WITH_SYSTEM_GLES)
 		if(NOT OPENGLES_EGL_LIBRARY)
-			message(FATAL_ERROR "Unable to find OpenGL ES libraries.  Install them or disable WITH_SYSTEM_GLES.")
+			message(FATAL_ERROR
+				"Unable to find OpenGL ES libraries. "
+				"Install them or disable WITH_SYSTEM_GLES."
+			)
 		endif()

 		list(APPEND BLENDER_GL_LIBRARIES OPENGLES_EGL_LIBRARY)
@@ -2505,7 +2608,10 @@ if(WITH_GL_EGL)
 		list(APPEND BLENDER_GL_LIBRARIES "${OPENGLES_LIBRARY}" "${OPENGLES_EGL_LIBRARY}")

 		if(NOT OPENGLES_EGL_LIBRARY)
-			message(FATAL_ERROR "To compile WITH_GL_EGL you need to set OPENGLES_EGL_LIBRARY to the file path of an EGL library.")
+			message(FATAL_ERROR
+				"To compile WITH_GL_EGL you need to set OPENGLES_EGL_LIBRARY "
+				"to the file path of an EGL library."
+			)
 		endif()

 	endif()
@@ -2517,7 +2623,10 @@ if(WITH_GL_EGL)
 		mark_as_advanced(OPENGLES_EGL_DLL)

 		if(NOT OPENGLES_EGL_DLL)
-			message(FATAL_ERROR "To compile WITH_GL_EGL you need to set OPENGLES_EGL_DLL to the file path of an EGL runtime dynamic link library (DLL).")
+			message(FATAL_ERROR
+				"To compile WITH_GL_EGL you need to set OPENGLES_EGL_DLL "
+				"to the file path of an EGL runtime dynamic link library (DLL)."
+			)
 		endif()

 	endif()
@@ -2682,7 +2791,9 @@ endif()
 if(WITH_LIBMV)
 	set(CERES_DEFINES)

-	if(SHARED_PTR_FOUND)
+	if(WITH_CXX11)
+		# nothing to be done
+	elseif(SHARED_PTR_FOUND)
 		if(SHARED_PTR_TR1_MEMORY_HEADER)
 			list(APPEND CERES_DEFINES -DCERES_TR1_MEMORY_HEADER)
 		endif()
@@ -2693,7 +2804,9 @@ if(WITH_LIBMV)
 		message(FATAL_ERROR "Ceres: Unable to find shared_ptr.")
 	endif()

-	if(HAVE_STD_UNORDERED_MAP_HEADER)
+	if(WITH_CXX11)
+		list(APPEND CERES_DEFINES -DCERES_STD_UNORDERED_MAP)
+	elseif(HAVE_STD_UNORDERED_MAP_HEADER)
 		if(HAVE_UNORDERED_MAP_IN_STD_NAMESPACE)
 			list(APPEND CERES_DEFINES -DCERES_STD_UNORDERED_MAP)
 		else()
@@ -2895,10 +3008,12 @@ endif()
 # be most problematic.
 if(WITH_PYTHON)
 	if(NOT EXISTS "${PYTHON_INCLUDE_DIR}/Python.h")
-		message(FATAL_ERROR "Missing: \"${PYTHON_INCLUDE_DIR}/Python.h\",\n"
-							"Set the cache entry 'PYTHON_INCLUDE_DIR' to point "
-							"to a valid python include path. Containing "
-							"Python.h for python version \"${PYTHON_VERSION}\"")
+		message(FATAL_ERROR
+			"Missing: \"${PYTHON_INCLUDE_DIR}/Python.h\",\n"
+			"Set the cache entry 'PYTHON_INCLUDE_DIR' to point "
+			"to a valid python include path. Containing "
+			"Python.h for python version \"${PYTHON_VERSION}\""
+		)
 	endif()

 	if(WIN32 OR APPLE)
@@ -2916,12 +3031,22 @@ endif()

 if(WITH_CXX11)
 	if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+		# TODO(sergey): Do we want c++11 or gnu-c++11 here?
 		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
 	elseif(MSVC12)
 		# Nothing special is needed, C++11 features are available by default.
 	else()
 		message(FATAL_ERROR "Compiler ${CMAKE_C_COMPILER_ID} is not supported for C++11 build yet")
 	endif()
+else()
+	# GCC-6 switched to C++11 by default, which would break linking with existing libraries
+	# by default. So we explicitly disable C++11 for a new GCC so no linking issues happens.
+	if(CMAKE_COMPILER_IS_GNUCC AND (NOT "${CMAKE_C_COMPILER_VERSION}" VERSION_LESS "6.0"))
+		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++98")
+		# We also disable any of C++11 ABI from usage, so we wouldn't even try to
+		# link to stuff from std::__cxx11 namespace.
+		add_definitions("-D_GLIBCXX_USE_CXX11_ABI=0")
+	endif()
 endif()

 # Visual Studio has all standards it supports available by default
@@ -3036,7 +3161,7 @@ if(FIRST_RUN)
 		string(LENGTH "${_msg}" _len)
 		while("32" GREATER "${_len}")
 			set(_msg "${_msg} ")
-			 math(EXPR _len "${_len} + 1")
+			math(EXPR _len "${_len} + 1")
 		endwhile()

 		set(_config_msg "${_config_msg}\n${_msg}${${_setting}}" PARENT_SCOPE)
--- a/74
+++ b/74
@@ -120,7 +120,7 @@ endif

 # -----------------------------------------------------------------------------
 # Build Blender
-all: FORCE
+all: .FORCE
 	@echo
 	@echo Configuring Blender in \"$(BUILD_DIR)\" ...

@@ -149,13 +149,13 @@ bpy: all

 # -----------------------------------------------------------------------------
 # Configuration (save some cd'ing around)
-config: FORCE
+config: .FORCE
 	$(CMAKE_CONFIG_TOOL) "$(BUILD_DIR)"


 # -----------------------------------------------------------------------------
 # Help for build targets
-help: FORCE
+help: .FORCE
 	@echo ""
 	@echo "Convenience targets provided for building blender, (multiple at once can be used)"
 	@echo "  * debug     - build a debug binary"
@@ -228,13 +228,13 @@ help: FORCE
 # -----------------------------------------------------------------------------
 # Packages
 #
-package_debian: FORCE
+package_debian: .FORCE
 	cd build_files/package_spec ; DEB_BUILD_OPTIONS="parallel=$(NPROCS)" sh ./build_debian.sh

-package_pacman: FORCE
+package_pacman: .FORCE
 	cd build_files/package_spec/pacman ; MAKEFLAGS="-j$(NPROCS)" makepkg

-package_archive: FORCE
+package_archive: .FORCE
 	make -C "$(BUILD_DIR)" -s package_archive
 	@echo archive in "$(BUILD_DIR)/release"

@@ -242,24 +242,24 @@ package_archive: FORCE
 # -----------------------------------------------------------------------------
 # Tests
 #
-test: FORCE
+test: .FORCE
 	cd $(BUILD_DIR) ; ctest . --output-on-failure

 # run pep8 check check on scripts we distribute.
-test_pep8: FORCE
+test_pep8: .FORCE
 	$(PYTHON) tests/python/pep8.py > test_pep8.log 2>&1
 	@echo "written: test_pep8.log"

 # run some checks on our cmakefiles.
-test_cmake: FORCE
+test_cmake: .FORCE
 	$(PYTHON) build_files/cmake/cmake_consistency_check.py > test_cmake_consistency.log 2>&1
 	@echo "written: test_cmake_consistency.log"

 # run deprecation tests, see if we have anything to remove.
-test_deprecated: FORCE
+test_deprecated: .FORCE
 	$(PYTHON) tests/check_deprecated.py

-test_style_c: FORCE
+test_style_c: .FORCE
 	# run our own checks on C/C++ style
 	PYTHONIOENCODING=utf_8 $(PYTHON) \
 	    "$(BLENDER_DIR)/source/tools/check_source/check_style_c.py" \
@@ -267,7 +267,7 @@ test_style_c: FORCE
 	    "$(BLENDER_DIR)/source/creator" \
 	    --no-length-check

-test_style_c_qtc: FORCE
+test_style_c_qtc: .FORCE
 	# run our own checks on C/C++ style
 	USE_QTC_TASK=1 \
 	PYTHONIOENCODING=utf_8 $(PYTHON) \
@@ -280,7 +280,7 @@ test_style_c_qtc: FORCE
 	@echo "written: test_style.tasks"


-test_style_osl: FORCE
+test_style_osl: .FORCE
 	# run our own checks on C/C++ style
 	PYTHONIOENCODING=utf_8 $(PYTHON) \
 	    "$(BLENDER_DIR)/source/tools/check_source/check_style_c.py" \
@@ -288,7 +288,7 @@ test_style_osl: FORCE
 	    "$(BLENDER_DIR)/release/scripts/templates_osl"


-test_style_osl_qtc: FORCE
+test_style_osl_qtc: .FORCE
 	# run our own checks on C/C++ style
 	USE_QTC_TASK=1 \
 	PYTHONIOENCODING=utf_8 $(PYTHON) \
@@ -303,13 +303,13 @@ test_style_osl_qtc: FORCE
 # Project Files
 #

-project_qtcreator: FORCE
+project_qtcreator: .FORCE
 	$(PYTHON) build_files/cmake/cmake_qtcreator_project.py "$(BUILD_DIR)"

-project_netbeans: FORCE
+project_netbeans: .FORCE
 	$(PYTHON) build_files/cmake/cmake_netbeans_project.py "$(BUILD_DIR)"

-project_eclipse: FORCE
+project_eclipse: .FORCE
 	cmake -G"Eclipse CDT4 - Unix Makefiles" -H"$(BLENDER_DIR)" -B"$(BUILD_DIR)"


@@ -317,40 +317,40 @@ project_eclipse: FORCE
 # Static Checking
 #

-check_cppcheck: FORCE
+check_cppcheck: .FORCE
 	$(CMAKE_CONFIG)
 	cd "$(BUILD_DIR)" ; \
 	$(PYTHON) "$(BLENDER_DIR)/build_files/cmake/cmake_static_check_cppcheck.py" 2> \
 	    "$(BLENDER_DIR)/check_cppcheck.txt"
 	@echo "written: check_cppcheck.txt"

-check_clang_array: FORCE
+check_clang_array: .FORCE
 	$(CMAKE_CONFIG)
 	cd "$(BUILD_DIR)" ; \
 	$(PYTHON) "$(BLENDER_DIR)/build_files/cmake/cmake_static_check_clang_array.py"

-check_splint: FORCE
+check_splint: .FORCE
 	$(CMAKE_CONFIG)
 	cd "$(BUILD_DIR)" ; \
 	$(PYTHON) "$(BLENDER_DIR)/build_files/cmake/cmake_static_check_splint.py"

-check_sparse: FORCE
+check_sparse: .FORCE
 	$(CMAKE_CONFIG)
 	cd "$(BUILD_DIR)" ; \
 	$(PYTHON) "$(BLENDER_DIR)/build_files/cmake/cmake_static_check_sparse.py"

-check_smatch: FORCE
+check_smatch: .FORCE
 	$(CMAKE_CONFIG)
 	cd "$(BUILD_DIR)" ; \
 	$(PYTHON) "$(BLENDER_DIR)/build_files/cmake/cmake_static_check_smatch.py"

-check_spelling_py: FORCE
+check_spelling_py: .FORCE
 	cd "$(BUILD_DIR)" ; \
 	PYTHONIOENCODING=utf_8 $(PYTHON) \
 	    "$(BLENDER_DIR)/source/tools/check_source/check_spelling.py" \
 	    "$(BLENDER_DIR)/release/scripts"

-check_spelling_c: FORCE
+check_spelling_c: .FORCE
 	cd "$(BUILD_DIR)" ; \
 	PYTHONIOENCODING=utf_8 $(PYTHON) \
 	    "$(BLENDER_DIR)/source/tools/check_source/check_spelling.py" \
@@ -359,7 +359,7 @@ check_spelling_c: FORCE
 	    "$(BLENDER_DIR)/intern/guardedalloc" \
 	    "$(BLENDER_DIR)/intern/ghost" \

-check_spelling_c_qtc: FORCE
+check_spelling_c_qtc: .FORCE
 	cd "$(BUILD_DIR)" ; USE_QTC_TASK=1 \
 	PYTHONIOENCODING=utf_8 $(PYTHON) \
 	    "$(BLENDER_DIR)/source/tools/check_source/check_spelling.py" \
@@ -370,13 +370,13 @@ check_spelling_c_qtc: FORCE
 	    > \
 	    "$(BLENDER_DIR)/check_spelling_c.tasks"

-check_spelling_osl: FORCE
+check_spelling_osl: .FORCE
 	cd "$(BUILD_DIR)" ;\
 	PYTHONIOENCODING=utf_8 $(PYTHON) \
 	    "$(BLENDER_DIR)/source/tools/check_source/check_spelling.py" \
 	    "$(BLENDER_DIR)/intern/cycles/kernel/shaders"

-check_descriptions: FORCE
+check_descriptions: .FORCE
 	"$(BUILD_DIR)/bin/blender" --background -noaudio --factory-startup --python \
 	    "$(BLENDER_DIR)/source/tools/check_source/check_descriptions.py"

@@ -384,14 +384,14 @@ check_descriptions: FORCE
 # Utilities
 #

-tgz: FORCE
+tgz: .FORCE
 	./build_files/utils/build_tgz.sh

-icons: FORCE
+icons: .FORCE
 	"$(BLENDER_DIR)/release/datafiles/blender_icons_update.py"
 	"$(BLENDER_DIR)/release/datafiles/prvicons_update.py"

-update: FORCE
+update: .FORCE
 	if [ -d "../lib" ]; then \
 		svn update ../lib/* ; \
 	fi
@@ -404,23 +404,23 @@ update: FORCE
 #

 # Simple version of ./doc/python_api/sphinx_doc_gen.sh with no PDF generation.
-doc_py: FORCE
+doc_py: .FORCE
 	"$(BUILD_DIR)/bin/blender" --background -noaudio --factory-startup --python doc/python_api/sphinx_doc_gen.py
 	cd doc/python_api ; sphinx-build -b html sphinx-in sphinx-out
 	@echo "docs written into: '$(BLENDER_DIR)/doc/python_api/sphinx-out/contents.html'"

-doc_doxy: FORCE
+doc_doxy: .FORCE
 	cd doc/doxygen; doxygen Doxyfile
 	@echo "docs written into: '$(BLENDER_DIR)/doc/doxygen/html/index.html'"

-doc_dna: FORCE
+doc_dna: .FORCE
 	"$(BUILD_DIR)/bin/blender" --background -noaudio --factory-startup --python doc/blender_file_format/BlendFileDnaExporter_25.py
 	@echo "docs written into: '$(BLENDER_DIR)/doc/blender_file_format/dna.html'"

-doc_man: FORCE
+doc_man: .FORCE
 	$(PYTHON) doc/manpage/blender.1.py "$(BUILD_DIR)/bin/blender"

-help_features: FORCE
+help_features: .FORCE
 	@$(PYTHON) -c \
 		"import re; \
 		print('\n'.join([ \
@@ -431,9 +431,9 @@ help_features: FORCE
 		if w.startswith('WITH_')]))" | uniq


-clean: FORCE
+clean: .FORCE
 	$(MAKE) -C "$(BUILD_DIR)" clean

 .PHONY: all

-FORCE:
+.FORCE:
--- a/build_files/build_environment/install_deps.sh
+++ b/build_files/build_environment/install_deps.sh
@@ -3342,7 +3342,15 @@ install_ARCH() {
  OGG_DEV="libogg"
  THEORA_DEV="libtheora"

-  _packages="base-devel git cmake \
+  BASE_DEVEL="base-devel"
+
+  # Avoid conflicts when gcc-multilib is installed
+  pacman -Qi gcc-multilib &>/dev/null
+  if [ $? -eq 0 ]; then
+    BASE_DEVEL=`pacman -Sgq base-devel | sed -e 's/^gcc$/gcc-multilib/g' | paste -s -d' '`
+  fi
+
+  _packages="$BASE_DEVEL git cmake \
             libxi libxcursor libxrandr libxinerama glew libpng libtiff wget openal \
             $OPENJPEG_DEV $VORBIS_DEV $OGG_DEV $THEORA_DEV yasm sdl fftw intel-tbb \
             libxml2 yaml-cpp tinyxml python-requests jemalloc"
--- a/build_files/buildbot/master.cfg
+++ b/build_files/buildbot/master.cfg
@@ -285,7 +285,7 @@ def generic_builder(id, libdir='', branch='', rsync=False):
                             maxsize=150 * 1024 * 1024,
                             workdir='install'))
    f.addStep(MasterShellCommand(name='unpack',
-                                 command=['python', unpack_script, filename],
+                                 command=['python2.7', unpack_script, filename],
                                 description='unpacking',
                                 descriptionDone='unpacked'))
    return f
--- a/build_files/buildbot/slave_compile.py
+++ b/build_files/buildbot/slave_compile.py
@@ -75,11 +75,20 @@ if 'cmake' in builder:
        cmake_extra_options.append('-DCUDA_NVCC_EXECUTABLE=/usr/local/cuda-hack/bin/nvcc')

    elif builder.startswith('win'):
-        if builder.startswith('win64'):
-            cmake_options.append(['-G', '"Visual Studio 12 2013 Win64"'])
-        elif builder.startswith('win32'):
-            bits = 32
-            cmake_options.append(['-G', '"Visual Studio 12 2013"'])
+        if builder.endswith('_vc2015'):
+            if builder.startswith('win64'):
+                cmake_options.extend(['-G', 'Visual Studio 14 2015 Win64'])
+            elif builder.startswith('win32'):
+                bits = 32
+                cmake_options.extend(['-G', 'Visual Studio 14 2015'])
+            cmake_extra_options.append('-DCUDA_NVCC_FLAGS=--cl-version;2013;' +
+                '--compiler-bindir;C:\\Program Files (x86)\\Microsoft Visual Studio 12.0\\VC\\bin')
+        else:
+            if builder.startswith('win64'):
+                cmake_options.extend(['-G', 'Visual Studio 12 2013 Win64'])
+            elif builder.startswith('win32'):
+                bits = 32
+                cmake_options.extend(['-G', 'Visual Studio 12 2013'])

    elif builder.startswith('linux'):
        tokens = builder.split("_")
--- a/build_files/buildbot/slave_pack.py
+++ b/build_files/buildbot/slave_pack.py
@@ -108,6 +108,8 @@ if builder.find('cmake') != -1:
                platform += 'i386'
            elif builder.endswith('ppc_10_6_cmake'):
                platform += 'ppc'
+        if builder.endswith('vc2015'):
+            platform += "-vc14"
        builderified_name = 'blender-{}-{}-{}'.format(blender_full_version, git_hash, platform)
        if branch != '':
            builderified_name = branch + "-" + builderified_name
--- a/build_files/cmake/macros.cmake
+++ b/build_files/cmake/macros.cmake
@@ -435,9 +435,6 @@ function(setup_liblinks
 	if(WITH_MEM_JEMALLOC)
 		target_link_libraries(${target} ${JEMALLOC_LIBRARIES})
 	endif()
-	if(WITH_INPUT_NDOF)
-		target_link_libraries(${target} ${NDOF_LIBRARIES})
-	endif()
 	if(WITH_MOD_CLOTH_ELTOPO)
 		target_link_libraries(${target} ${LAPACK_LIBRARIES})
 	endif()
@@ -451,6 +448,9 @@ function(setup_liblinks
 		if(WITH_OPENMP_STATIC)
 			target_link_libraries(${target} ${OpenMP_LIBRARIES})
 		endif()
+		if(WITH_INPUT_NDOF)
+			target_link_libraries(${target} ${NDOF_LIBRARIES})
+		endif()
 	endif()

 	# We put CLEW and CUEW here because OPENSUBDIV_LIBRARIES dpeends on them..
@@ -487,6 +487,7 @@ function(SETUP_BLENDER_SORTED_LIBS)
 	if(WITH_CYCLES)
 		list(APPEND BLENDER_LINK_LIBS
 			cycles_render
+			cycles_graph
 			cycles_bvh
 			cycles_device
 			cycles_kernel
@@ -551,11 +552,11 @@ function(SETUP_BLENDER_SORTED_LIBS)
 		bf_modifiers
 		bf_bmesh
 		bf_gpu
+		bf_blenloader
 		bf_blenkernel
 		bf_physics
 		bf_nodes
 		bf_rna
-		bf_blenloader
 		bf_imbuf
 		bf_blenlib
 		bf_depsgraph
@@ -600,6 +601,7 @@ function(SETUP_BLENDER_SORTED_LIBS)
 		bf_intern_dualcon
 		bf_intern_cycles
 		cycles_render
+		cycles_graph
 		cycles_bvh
 		cycles_device
 		cycles_kernel
@@ -659,10 +661,6 @@ function(SETUP_BLENDER_SORTED_LIBS)
 		list(APPEND BLENDER_SORTED_LIBS bf_quicktime)
 	endif()

-	if(WITH_INPUT_NDOF)
-		list(APPEND BLENDER_SORTED_LIBS bf_intern_ghostndof3dconnexion)
-	endif()
-	
 	if(WITH_MOD_BOOLEAN)
 		list(APPEND BLENDER_SORTED_LIBS extern_carve)
 	endif()
@@ -803,7 +801,15 @@ macro(TEST_UNORDERED_MAP_SUPPORT)
 	#  UNORDERED_MAP_NAMESPACE, namespace for unordered_map, if found

 	include(CheckIncludeFileCXX)
-	CHECK_INCLUDE_FILE_CXX("unordered_map" HAVE_STD_UNORDERED_MAP_HEADER)
+
+	# Workaround for newer GCC (6.x+) where C++11 was enabled by default, which lead us
+	# to a situation when there is <unordered_map> include but which can't be used uless
+	# C++11 is enabled.
+	if(CMAKE_COMPILER_IS_GNUCC AND (NOT "${CMAKE_C_COMPILER_VERSION}" VERSION_LESS "6.0") AND (NOT WITH_CXX11))
+		set(HAVE_STD_UNORDERED_MAP_HEADER False)
+	else()
+		CHECK_INCLUDE_FILE_CXX("unordered_map" HAVE_STD_UNORDERED_MAP_HEADER)
+	endif()
 	if(HAVE_STD_UNORDERED_MAP_HEADER)
 		# Even so we've found unordered_map header file it doesn't
 		# mean unordered_map and unordered_set will be declared in
@@ -873,8 +879,16 @@ macro(TEST_SHARED_PTR_SUPPORT)
 	# otherwise it's assumed to be defined in std namespace.

 	include(CheckIncludeFileCXX)
+	include(CheckCXXSourceCompiles)
 	set(SHARED_PTR_FOUND FALSE)
-	CHECK_INCLUDE_FILE_CXX(memory HAVE_STD_MEMORY_HEADER)
+	# Workaround for newer GCC (6.x+) where C++11 was enabled by default, which lead us
+	# to a situation when there is <unordered_map> include but which can't be used uless
+	# C++11 is enabled.
+	if(CMAKE_COMPILER_IS_GNUCC AND (NOT "${CMAKE_C_COMPILER_VERSION}" VERSION_LESS "6.0") AND (NOT WITH_CXX11))
+		set(HAVE_STD_MEMORY_HEADER False)
+	else()
+		CHECK_INCLUDE_FILE_CXX(memory HAVE_STD_MEMORY_HEADER)
+	endif()
 	if(HAVE_STD_MEMORY_HEADER)
 		# Finding the memory header doesn't mean that shared_ptr is in std
 		# namespace.
@@ -882,7 +896,6 @@ macro(TEST_SHARED_PTR_SUPPORT)
 		# In particular, MSVC 2008 has shared_ptr declared in std::tr1.  In
 		# order to support this, we do an extra check to see which namespace
 		# should be used.
-		include(CheckCXXSourceCompiles)
 		CHECK_CXX_SOURCE_COMPILES("#include <memory>
 		                           int main() {
 		                             std::shared_ptr<int> int_ptr;
@@ -1050,6 +1063,19 @@ macro(remove_strict_flags_file

 endmacro()

+# External libs may need 'signed char' to be default.
+macro(remove_cc_flag_unsigned_char)
+	if(CMAKE_C_COMPILER_ID MATCHES "^(GNU|Clang|Intel)$")
+		remove_cc_flag("-funsigned-char")
+	elseif(MSVC)
+		remove_cc_flag("/J")
+	else()
+		message(WARNING
+			"Compiler '${CMAKE_C_COMPILER_ID}' failed to disable 'unsigned char' flag."
+			"Build files need updating."
+		)
+	endif()
+endmacro()

 function(ADD_CHECK_C_COMPILER_FLAG
 	_CFLAGS
--- a/doc/python_api/rst/bge_types/bge.types.KX_LightObject.rst
+++ b/doc/python_api/rst/bge_types/bge.types.KX_LightObject.rst
@@ -60,37 +60,37 @@ base class --- :class:`KX_GameObject`

      :type: float (read only)

-   ..attribute:: shadowFrustumSize
+   .. attribute:: shadowFrustumSize

      Size of the frustum used for creating the shadowmap.

      :type: float (read only)

-   ..attribute:: shadowBindId
+   .. attribute:: shadowBindId

      The OpenGL shadow texture bind number/id.

      :type: int (read only)

-   ..attribute:: shadowMapType
+   .. attribute:: shadowMapType

      The shadow shadow map type (0 -> Simple; 1 -> Variance)

      :type: int (read only)

-   ..attribute:: shadowBias
+   .. attribute:: shadowBias

      The shadow buffer sampling bias.

      :type: float (read only)

-   ..attribute:: shadowBleedBias
+   .. attribute:: shadowBleedBias

      The bias for reducing light-bleed on variance shadow maps.

      :type: float (read only)

-   ..attribute:: useShadow
+   .. attribute:: useShadow

      Returns True if the light has Shadow option activated, else returns False.

--- a/doc/python_api/sphinx_doc_gen.sh
+++ b/doc/python_api/sphinx_doc_gen.sh
@@ -61,7 +61,7 @@ if $DO_EXE_BLENDER ; then
 		--python-exit-code 1 \
 		--python $SPHINXBASE/sphinx_doc_gen.py

-	if (($? == 1)) ; then
+	if (($? != 0)) ; then
 		echo "Generating documentation failed, aborting"
 		exit 1
 	fi
--- a/extern/Eigen3/README.blender
+++ b/extern/Eigen3/README.blender
@@ -0,0 +1,6 @@
+Project: Eigen, template library for linear algebra: matrices, vectors, numerical solvers, and related algorithms
+URL: http://eigen.tuxfamily.org/index.php?title=Main_Page
+License: GPLv3+
+Upstream version: 3.2.7
+Local modifications:
+- OpenMP fix for MSVC2015, see http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1131
--- a/extern/binreloc/README.blender
+++ b/extern/binreloc/README.blender
@@ -0,0 +1,6 @@
+Project: AutoPackage
+URL: http://autopackage.org/docs/binreloc (original, defunct)
+     http://alien.cern.ch/cache/autopackage-1.0/site/docs/binreloc/ (cache)
+License: Public Domain
+Upstream version: Unknown (Last Release)
+Local modifications: None
--- a/extern/carve/README.blender
+++ b/extern/carve/README.blender
@@ -0,0 +1,4 @@
+Project: Carve, CSG library
+URL: https://code.google.com/archive/p/carve/
+Upstream version 9a85d733a43d
+Local modifications: See patches/ folder
--- a/extern/ceres/README.blender
+++ b/extern/ceres/README.blender
@@ -0,0 +1,4 @@
+Project: Ceres Solver
+URL: http://ceres-solver.org/
+Upstream version 1.11 (aef9c9563b08d5f39eee1576af133a84749d1b48)
+Local modifications: None
--- a/extern/clew/README.blender
+++ b/extern/clew/README.blender
@@ -0,0 +1,5 @@
+Project: OpenCL Wrangler
+URL: https://github.com/OpenCLWrangler/clew
+License: Apache 2.0
+Upstream version: 277db43
+Local modifications: None
--- a/extern/cuew/README.blender
+++ b/extern/cuew/README.blender
@@ -0,0 +1,5 @@
+Project: Cuda Wrangler
+URL: https://github.com/CudaWrangler/cuew
+License: Apache 2.0
+Upstream version: e2e0315
+Local modifications: None
--- a/extern/cuew/include/cuew.h
+++ b/extern/cuew/include/cuew.h
@@ -131,8 +131,8 @@ typedef struct CUsurfref_st* CUsurfref;
 typedef struct CUevent_st* CUevent;
 typedef struct CUstream_st* CUstream;
 typedef struct CUgraphicsResource_st* CUgraphicsResource;
-typedef unsigned CUtexObject;
-typedef unsigned CUsurfObject;
+typedef unsigned long long CUtexObject;
+typedef unsigned long long CUsurfObject;

 typedef struct CUuuid_st {
  char bytes[16];
@@ -603,7 +603,7 @@ typedef struct CUDA_ARRAY_DESCRIPTOR_st {
  size_t Width;
  size_t Height;
  CUarray_format Format;
-  unsigned NumChannels;
+  unsigned int NumChannels;
 } CUDA_ARRAY_DESCRIPTOR;

 typedef struct CUDA_ARRAY3D_DESCRIPTOR_st {
@@ -611,8 +611,8 @@ typedef struct CUDA_ARRAY3D_DESCRIPTOR_st {
  size_t Height;
  size_t Depth;
  CUarray_format Format;
-  unsigned NumChannels;
-  unsigned Flags;
+  unsigned int NumChannels;
+  unsigned int Flags;
 } CUDA_ARRAY3D_DESCRIPTOR;

 typedef struct CUDA_RESOURCE_DESC_st {
@@ -627,13 +627,13 @@ typedef struct CUDA_RESOURCE_DESC_st {
    struct {
      CUdeviceptr devPtr;
      CUarray_format format;
-      unsigned numChannels;
+      unsigned int numChannels;
      size_t sizeInBytes;
    } linear;
    struct {
      CUdeviceptr devPtr;
      CUarray_format format;
-      unsigned numChannels;
+      unsigned int numChannels;
      size_t width;
      size_t height;
      size_t pitchInBytes;
@@ -642,14 +642,14 @@ typedef struct CUDA_RESOURCE_DESC_st {
      int reserved[32];
    } reserved;
  } res;
-  unsigned flags;
+  unsigned int flags;
 } CUDA_RESOURCE_DESC;

 typedef struct CUDA_TEXTURE_DESC_st {
  CUaddress_mode addressMode[3];
  CUfilter_mode filterMode;
-  unsigned flags;
-  unsigned maxAnisotropy;
+  unsigned int flags;
+  unsigned int maxAnisotropy;
  CUfilter_mode mipmapFilterMode;
  float mipmapLevelBias;
  float minMipmapLevelClamp;
@@ -700,19 +700,19 @@ typedef struct CUDA_RESOURCE_VIEW_DESC_st {
  size_t width;
  size_t height;
  size_t depth;
-  unsigned firstMipmapLevel;
-  unsigned lastMipmapLevel;
-  unsigned firstLayer;
-  unsigned lastLayer;
-  unsigned reserved[16];
+  unsigned int firstMipmapLevel;
+  unsigned int lastMipmapLevel;
+  unsigned int firstLayer;
+  unsigned int lastLayer;
+  unsigned int reserved[16];
 } CUDA_RESOURCE_VIEW_DESC;

 typedef struct CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st {
-  unsigned p2pToken;
-  unsigned vaSpaceToken;
+  unsigned long long p2pToken;
+  unsigned int vaSpaceToken;
 } CUDA_POINTER_ATTRIBUTE_P2P_TOKENS;
-typedef unsigned GLenum;
-typedef unsigned GLuint;
+typedef unsigned int GLenum;
+typedef unsigned int GLuint;
 typedef int GLint;

 typedef enum CUGLDeviceList_enum {
@@ -751,7 +751,7 @@ typedef struct _nvrtcProgram* nvrtcProgram;
 /* Function types. */
 typedef CUresult CUDAAPI tcuGetErrorString(CUresult error, const char* pStr);
 typedef CUresult CUDAAPI tcuGetErrorName(CUresult error, const char* pStr);
-typedef CUresult CUDAAPI tcuInit(unsigned Flags);
+typedef CUresult CUDAAPI tcuInit(unsigned int Flags);
 typedef CUresult CUDAAPI tcuDriverGetVersion(int* driverVersion);
 typedef CUresult CUDAAPI tcuDeviceGet(CUdevice* device, int ordinal);
 typedef CUresult CUDAAPI tcuDeviceGetCount(int* count);
@@ -762,17 +762,17 @@ typedef CUresult CUDAAPI tcuDeviceGetProperties(CUdevprop* prop, CUdevice dev);
 typedef CUresult CUDAAPI tcuDeviceComputeCapability(int* major, int* minor, CUdevice dev);
 typedef CUresult CUDAAPI tcuDevicePrimaryCtxRetain(CUcontext* pctx, CUdevice dev);
 typedef CUresult CUDAAPI tcuDevicePrimaryCtxRelease(CUdevice dev);
-typedef CUresult CUDAAPI tcuDevicePrimaryCtxSetFlags(CUdevice dev, unsigned flags);
-typedef CUresult CUDAAPI tcuDevicePrimaryCtxGetState(CUdevice dev, unsigned* flags, int* active);
+typedef CUresult CUDAAPI tcuDevicePrimaryCtxSetFlags(CUdevice dev, unsigned int flags);
+typedef CUresult CUDAAPI tcuDevicePrimaryCtxGetState(CUdevice dev, unsigned int* flags, int* active);
 typedef CUresult CUDAAPI tcuDevicePrimaryCtxReset(CUdevice dev);
-typedef CUresult CUDAAPI tcuCtxCreate_v2(CUcontext* pctx, unsigned flags, CUdevice dev);
+typedef CUresult CUDAAPI tcuCtxCreate_v2(CUcontext* pctx, unsigned int flags, CUdevice dev);
 typedef CUresult CUDAAPI tcuCtxDestroy_v2(CUcontext ctx);
 typedef CUresult CUDAAPI tcuCtxPushCurrent_v2(CUcontext ctx);
 typedef CUresult CUDAAPI tcuCtxPopCurrent_v2(CUcontext* pctx);
 typedef CUresult CUDAAPI tcuCtxSetCurrent(CUcontext ctx);
 typedef CUresult CUDAAPI tcuCtxGetCurrent(CUcontext* pctx);
 typedef CUresult CUDAAPI tcuCtxGetDevice(CUdevice* device);
-typedef CUresult CUDAAPI tcuCtxGetFlags(unsigned* flags);
+typedef CUresult CUDAAPI tcuCtxGetFlags(unsigned int* flags);
 typedef CUresult CUDAAPI tcuCtxSynchronize(void);
 typedef CUresult CUDAAPI tcuCtxSetLimit(CUlimit limit, size_t value);
 typedef CUresult CUDAAPI tcuCtxGetLimit(size_t* pvalue, CUlimit limit);
@@ -780,43 +780,43 @@ typedef CUresult CUDAAPI tcuCtxGetCacheConfig(CUfunc_cache* pconfig);
 typedef CUresult CUDAAPI tcuCtxSetCacheConfig(CUfunc_cache config);
 typedef CUresult CUDAAPI tcuCtxGetSharedMemConfig(CUsharedconfig* pConfig);
 typedef CUresult CUDAAPI tcuCtxSetSharedMemConfig(CUsharedconfig config);
-typedef CUresult CUDAAPI tcuCtxGetApiVersion(CUcontext ctx, unsigned* version);
+typedef CUresult CUDAAPI tcuCtxGetApiVersion(CUcontext ctx, unsigned int* version);
 typedef CUresult CUDAAPI tcuCtxGetStreamPriorityRange(int* leastPriority, int* greatestPriority);
-typedef CUresult CUDAAPI tcuCtxAttach(CUcontext* pctx, unsigned flags);
+typedef CUresult CUDAAPI tcuCtxAttach(CUcontext* pctx, unsigned int flags);
 typedef CUresult CUDAAPI tcuCtxDetach(CUcontext ctx);
 typedef CUresult CUDAAPI tcuModuleLoad(CUmodule* module, const char* fname);
 typedef CUresult CUDAAPI tcuModuleLoadData(CUmodule* module, const void* image);
-typedef CUresult CUDAAPI tcuModuleLoadDataEx(CUmodule* module, const void* image, unsigned numOptions, CUjit_option* options, void* optionValues);
+typedef CUresult CUDAAPI tcuModuleLoadDataEx(CUmodule* module, const void* image, unsigned int numOptions, CUjit_option* options, void* optionValues);
 typedef CUresult CUDAAPI tcuModuleLoadFatBinary(CUmodule* module, const void* fatCubin);
 typedef CUresult CUDAAPI tcuModuleUnload(CUmodule hmod);
 typedef CUresult CUDAAPI tcuModuleGetFunction(CUfunction* hfunc, CUmodule hmod, const char* name);
 typedef CUresult CUDAAPI tcuModuleGetGlobal_v2(CUdeviceptr* dptr, size_t* bytes, CUmodule hmod, const char* name);
 typedef CUresult CUDAAPI tcuModuleGetTexRef(CUtexref* pTexRef, CUmodule hmod, const char* name);
 typedef CUresult CUDAAPI tcuModuleGetSurfRef(CUsurfref* pSurfRef, CUmodule hmod, const char* name);
-typedef CUresult CUDAAPI tcuLinkCreate_v2(unsigned numOptions, CUjit_option* options, void* optionValues, CUlinkState* stateOut);
-typedef CUresult CUDAAPI tcuLinkAddData_v2(CUlinkState state, CUjitInputType type, void* data, size_t size, const char* name, unsigned numOptions, CUjit_option* options, void* optionValues);
-typedef CUresult CUDAAPI tcuLinkAddFile_v2(CUlinkState state, CUjitInputType type, const char* path, unsigned numOptions, CUjit_option* options, void* optionValues);
+typedef CUresult CUDAAPI tcuLinkCreate_v2(unsigned int numOptions, CUjit_option* options, void* optionValues, CUlinkState* stateOut);
+typedef CUresult CUDAAPI tcuLinkAddData_v2(CUlinkState state, CUjitInputType type, void* data, size_t size, const char* name, unsigned int numOptions, CUjit_option* options, void* optionValues);
+typedef CUresult CUDAAPI tcuLinkAddFile_v2(CUlinkState state, CUjitInputType type, const char* path, unsigned int numOptions, CUjit_option* options, void* optionValues);
 typedef CUresult CUDAAPI tcuLinkComplete(CUlinkState state, void* cubinOut, size_t* sizeOut);
 typedef CUresult CUDAAPI tcuLinkDestroy(CUlinkState state);
 typedef CUresult CUDAAPI tcuMemGetInfo_v2(size_t* free, size_t* total);
 typedef CUresult CUDAAPI tcuMemAlloc_v2(CUdeviceptr* dptr, size_t bytesize);
-typedef CUresult CUDAAPI tcuMemAllocPitch_v2(CUdeviceptr* dptr, size_t* pPitch, size_t WidthInBytes, size_t Height, unsigned ElementSizeBytes);
+typedef CUresult CUDAAPI tcuMemAllocPitch_v2(CUdeviceptr* dptr, size_t* pPitch, size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes);
 typedef CUresult CUDAAPI tcuMemFree_v2(CUdeviceptr dptr);
 typedef CUresult CUDAAPI tcuMemGetAddressRange_v2(CUdeviceptr* pbase, size_t* psize, CUdeviceptr dptr);
 typedef CUresult CUDAAPI tcuMemAllocHost_v2(void* pp, size_t bytesize);
 typedef CUresult CUDAAPI tcuMemFreeHost(void* p);
-typedef CUresult CUDAAPI tcuMemHostAlloc(void* pp, size_t bytesize, unsigned Flags);
-typedef CUresult CUDAAPI tcuMemHostGetDevicePointer_v2(CUdeviceptr* pdptr, void* p, unsigned Flags);
-typedef CUresult CUDAAPI tcuMemHostGetFlags(unsigned* pFlags, void* p);
-typedef CUresult CUDAAPI tcuMemAllocManaged(CUdeviceptr* dptr, size_t bytesize, unsigned flags);
+typedef CUresult CUDAAPI tcuMemHostAlloc(void* pp, size_t bytesize, unsigned int Flags);
+typedef CUresult CUDAAPI tcuMemHostGetDevicePointer_v2(CUdeviceptr* pdptr, void* p, unsigned int Flags);
+typedef CUresult CUDAAPI tcuMemHostGetFlags(unsigned int* pFlags, void* p);
+typedef CUresult CUDAAPI tcuMemAllocManaged(CUdeviceptr* dptr, size_t bytesize, unsigned int flags);
 typedef CUresult CUDAAPI tcuDeviceGetByPCIBusId(CUdevice* dev, const char* pciBusId);
 typedef CUresult CUDAAPI tcuDeviceGetPCIBusId(char* pciBusId, int len, CUdevice dev);
 typedef CUresult CUDAAPI tcuIpcGetEventHandle(CUipcEventHandle* pHandle, CUevent event);
 typedef CUresult CUDAAPI tcuIpcOpenEventHandle(CUevent* phEvent, CUipcEventHandle handle);
 typedef CUresult CUDAAPI tcuIpcGetMemHandle(CUipcMemHandle* pHandle, CUdeviceptr dptr);
-typedef CUresult CUDAAPI tcuIpcOpenMemHandle(CUdeviceptr* pdptr, CUipcMemHandle handle, unsigned Flags);
+typedef CUresult CUDAAPI tcuIpcOpenMemHandle(CUdeviceptr* pdptr, CUipcMemHandle handle, unsigned int Flags);
 typedef CUresult CUDAAPI tcuIpcCloseMemHandle(CUdeviceptr dptr);
-typedef CUresult CUDAAPI tcuMemHostRegister_v2(void* p, size_t bytesize, unsigned Flags);
+typedef CUresult CUDAAPI tcuMemHostRegister_v2(void* p, size_t bytesize, unsigned int Flags);
 typedef CUresult CUDAAPI tcuMemHostUnregister(void* p);
 typedef CUresult CUDAAPI tcuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount);
 typedef CUresult CUDAAPI tcuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount);
@@ -842,40 +842,40 @@ typedef CUresult CUDAAPI tcuMemcpyAtoHAsync_v2(void* dstHost, CUarray srcArray,
 typedef CUresult CUDAAPI tcuMemcpy2DAsync_v2(const CUDA_MEMCPY2D* pCopy, CUstream hStream);
 typedef CUresult CUDAAPI tcuMemcpy3DAsync_v2(const CUDA_MEMCPY3D* pCopy, CUstream hStream);
 typedef CUresult CUDAAPI tcuMemcpy3DPeerAsync(const CUDA_MEMCPY3D_PEER* pCopy, CUstream hStream);
-typedef CUresult CUDAAPI tcuMemsetD8_v2(CUdeviceptr dstDevice, unsigned uc, size_t N);
-typedef CUresult CUDAAPI tcuMemsetD16_v2(CUdeviceptr dstDevice, unsigned us, size_t N);
-typedef CUresult CUDAAPI tcuMemsetD32_v2(CUdeviceptr dstDevice, unsigned ui, size_t N);
-typedef CUresult CUDAAPI tcuMemsetD2D8_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned uc, size_t Width, size_t Height);
-typedef CUresult CUDAAPI tcuMemsetD2D16_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned us, size_t Width, size_t Height);
-typedef CUresult CUDAAPI tcuMemsetD2D32_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned ui, size_t Width, size_t Height);
-typedef CUresult CUDAAPI tcuMemsetD8Async(CUdeviceptr dstDevice, unsigned uc, size_t N, CUstream hStream);
-typedef CUresult CUDAAPI tcuMemsetD16Async(CUdeviceptr dstDevice, unsigned us, size_t N, CUstream hStream);
-typedef CUresult CUDAAPI tcuMemsetD32Async(CUdeviceptr dstDevice, unsigned ui, size_t N, CUstream hStream);
-typedef CUresult CUDAAPI tcuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned uc, size_t Width, size_t Height, CUstream hStream);
-typedef CUresult CUDAAPI tcuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned us, size_t Width, size_t Height, CUstream hStream);
-typedef CUresult CUDAAPI tcuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned ui, size_t Width, size_t Height, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemsetD8_v2(CUdeviceptr dstDevice, unsigned char uc, size_t N);
+typedef CUresult CUDAAPI tcuMemsetD16_v2(CUdeviceptr dstDevice, unsigned short us, size_t N);
+typedef CUresult CUDAAPI tcuMemsetD32_v2(CUdeviceptr dstDevice, unsigned int ui, size_t N);
+typedef CUresult CUDAAPI tcuMemsetD2D8_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height);
+typedef CUresult CUDAAPI tcuMemsetD2D16_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height);
+typedef CUresult CUDAAPI tcuMemsetD2D32_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height);
+typedef CUresult CUDAAPI tcuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemsetD16Async(CUdeviceptr dstDevice, unsigned short us, size_t N, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemsetD32Async(CUdeviceptr dstDevice, unsigned int ui, size_t N, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height, CUstream hStream);
 typedef CUresult CUDAAPI tcuArrayCreate_v2(CUarray* pHandle, const CUDA_ARRAY_DESCRIPTOR* pAllocateArray);
 typedef CUresult CUDAAPI tcuArrayGetDescriptor_v2(CUDA_ARRAY_DESCRIPTOR* pArrayDescriptor, CUarray hArray);
 typedef CUresult CUDAAPI tcuArrayDestroy(CUarray hArray);
 typedef CUresult CUDAAPI tcuArray3DCreate_v2(CUarray* pHandle, const CUDA_ARRAY3D_DESCRIPTOR* pAllocateArray);
 typedef CUresult CUDAAPI tcuArray3DGetDescriptor_v2(CUDA_ARRAY3D_DESCRIPTOR* pArrayDescriptor, CUarray hArray);
-typedef CUresult CUDAAPI tcuMipmappedArrayCreate(CUmipmappedArray* pHandle, const CUDA_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc, unsigned numMipmapLevels);
-typedef CUresult CUDAAPI tcuMipmappedArrayGetLevel(CUarray* pLevelArray, CUmipmappedArray hMipmappedArray, unsigned level);
+typedef CUresult CUDAAPI tcuMipmappedArrayCreate(CUmipmappedArray* pHandle, const CUDA_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc, unsigned int numMipmapLevels);
+typedef CUresult CUDAAPI tcuMipmappedArrayGetLevel(CUarray* pLevelArray, CUmipmappedArray hMipmappedArray, unsigned int level);
 typedef CUresult CUDAAPI tcuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray);
 typedef CUresult CUDAAPI tcuPointerGetAttribute(void* data, CUpointer_attribute attribute, CUdeviceptr ptr);
 typedef CUresult CUDAAPI tcuPointerSetAttribute(const void* value, CUpointer_attribute attribute, CUdeviceptr ptr);
-typedef CUresult CUDAAPI tcuPointerGetAttributes(unsigned numAttributes, CUpointer_attribute* attributes, void* data, CUdeviceptr ptr);
-typedef CUresult CUDAAPI tcuStreamCreate(CUstream* phStream, unsigned Flags);
-typedef CUresult CUDAAPI tcuStreamCreateWithPriority(CUstream* phStream, unsigned flags, int priority);
+typedef CUresult CUDAAPI tcuPointerGetAttributes(unsigned int numAttributes, CUpointer_attribute* attributes, void* data, CUdeviceptr ptr);
+typedef CUresult CUDAAPI tcuStreamCreate(CUstream* phStream, unsigned int Flags);
+typedef CUresult CUDAAPI tcuStreamCreateWithPriority(CUstream* phStream, unsigned int flags, int priority);
 typedef CUresult CUDAAPI tcuStreamGetPriority(CUstream hStream, int* priority);
-typedef CUresult CUDAAPI tcuStreamGetFlags(CUstream hStream, unsigned* flags);
-typedef CUresult CUDAAPI tcuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsigned Flags);
-typedef CUresult CUDAAPI tcuStreamAddCallback(CUstream hStream, CUstreamCallback callback, void* userData, unsigned flags);
-typedef CUresult CUDAAPI tcuStreamAttachMemAsync(CUstream hStream, CUdeviceptr dptr, size_t length, unsigned flags);
+typedef CUresult CUDAAPI tcuStreamGetFlags(CUstream hStream, unsigned int* flags);
+typedef CUresult CUDAAPI tcuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsigned int Flags);
+typedef CUresult CUDAAPI tcuStreamAddCallback(CUstream hStream, CUstreamCallback callback, void* userData, unsigned int flags);
+typedef CUresult CUDAAPI tcuStreamAttachMemAsync(CUstream hStream, CUdeviceptr dptr, size_t length, unsigned int flags);
 typedef CUresult CUDAAPI tcuStreamQuery(CUstream hStream);
 typedef CUresult CUDAAPI tcuStreamSynchronize(CUstream hStream);
 typedef CUresult CUDAAPI tcuStreamDestroy_v2(CUstream hStream);
-typedef CUresult CUDAAPI tcuEventCreate(CUevent* phEvent, unsigned Flags);
+typedef CUresult CUDAAPI tcuEventCreate(CUevent* phEvent, unsigned int Flags);
 typedef CUresult CUDAAPI tcuEventRecord(CUevent hEvent, CUstream hStream);
 typedef CUresult CUDAAPI tcuEventQuery(CUevent hEvent);
 typedef CUresult CUDAAPI tcuEventSynchronize(CUevent hEvent);
@@ -884,23 +884,23 @@ typedef CUresult CUDAAPI tcuEventElapsedTime(float* pMilliseconds, CUevent hStar
 typedef CUresult CUDAAPI tcuFuncGetAttribute(int* pi, CUfunction_attribute attrib, CUfunction hfunc);
 typedef CUresult CUDAAPI tcuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config);
 typedef CUresult CUDAAPI tcuFuncSetSharedMemConfig(CUfunction hfunc, CUsharedconfig config);
-typedef CUresult CUDAAPI tcuLaunchKernel(CUfunction f, unsigned gridDimX, unsigned gridDimY, unsigned gridDimZ, unsigned blockDimX, unsigned blockDimY, unsigned blockDimZ, unsigned sharedMemBytes, CUstream hStream, void* kernelParams, void* extra);
+typedef CUresult CUDAAPI tcuLaunchKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void* kernelParams, void* extra);
 typedef CUresult CUDAAPI tcuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z);
-typedef CUresult CUDAAPI tcuFuncSetSharedSize(CUfunction hfunc, unsigned bytes);
-typedef CUresult CUDAAPI tcuParamSetSize(CUfunction hfunc, unsigned numbytes);
-typedef CUresult CUDAAPI tcuParamSeti(CUfunction hfunc, int offset, unsigned value);
+typedef CUresult CUDAAPI tcuFuncSetSharedSize(CUfunction hfunc, unsigned int bytes);
+typedef CUresult CUDAAPI tcuParamSetSize(CUfunction hfunc, unsigned int numbytes);
+typedef CUresult CUDAAPI tcuParamSeti(CUfunction hfunc, int offset, unsigned int value);
 typedef CUresult CUDAAPI tcuParamSetf(CUfunction hfunc, int offset, float value);
-typedef CUresult CUDAAPI tcuParamSetv(CUfunction hfunc, int offset, void* ptr, unsigned numbytes);
+typedef CUresult CUDAAPI tcuParamSetv(CUfunction hfunc, int offset, void* ptr, unsigned int numbytes);
 typedef CUresult CUDAAPI tcuLaunch(CUfunction f);
 typedef CUresult CUDAAPI tcuLaunchGrid(CUfunction f, int grid_width, int grid_height);
 typedef CUresult CUDAAPI tcuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream);
 typedef CUresult CUDAAPI tcuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef);
 typedef CUresult CUDAAPI tcuOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize);
-typedef CUresult CUDAAPI tcuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize, unsigned flags);
+typedef CUresult CUDAAPI tcuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize, unsigned int flags);
 typedef CUresult CUDAAPI tcuOccupancyMaxPotentialBlockSize(int* minGridSize, int* blockSize, CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, int blockSizeLimit);
-typedef CUresult CUDAAPI tcuOccupancyMaxPotentialBlockSizeWithFlags(int* minGridSize, int* blockSize, CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, int blockSizeLimit, unsigned flags);
-typedef CUresult CUDAAPI tcuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigned Flags);
-typedef CUresult CUDAAPI tcuTexRefSetMipmappedArray(CUtexref hTexRef, CUmipmappedArray hMipmappedArray, unsigned Flags);
+typedef CUresult CUDAAPI tcuOccupancyMaxPotentialBlockSizeWithFlags(int* minGridSize, int* blockSize, CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, int blockSizeLimit, unsigned int flags);
+typedef CUresult CUDAAPI tcuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigned int Flags);
+typedef CUresult CUDAAPI tcuTexRefSetMipmappedArray(CUtexref hTexRef, CUmipmappedArray hMipmappedArray, unsigned int Flags);
 typedef CUresult CUDAAPI tcuTexRefSetAddress_v2(size_t* ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, size_t bytes);
 typedef CUresult CUDAAPI tcuTexRefSetAddress2D_v3(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR* desc, CUdeviceptr dptr, size_t Pitch);
 typedef CUresult CUDAAPI tcuTexRefSetFormat(CUtexref hTexRef, CUarray_format fmt, int NumPackedComponents);
@@ -909,8 +909,8 @@ typedef CUresult CUDAAPI tcuTexRefSetFilterMode(CUtexref hTexRef, CUfilter_mode
 typedef CUresult CUDAAPI tcuTexRefSetMipmapFilterMode(CUtexref hTexRef, CUfilter_mode fm);
 typedef CUresult CUDAAPI tcuTexRefSetMipmapLevelBias(CUtexref hTexRef, float bias);
 typedef CUresult CUDAAPI tcuTexRefSetMipmapLevelClamp(CUtexref hTexRef, float minMipmapLevelClamp, float maxMipmapLevelClamp);
-typedef CUresult CUDAAPI tcuTexRefSetMaxAnisotropy(CUtexref hTexRef, unsigned maxAniso);
-typedef CUresult CUDAAPI tcuTexRefSetFlags(CUtexref hTexRef, unsigned Flags);
+typedef CUresult CUDAAPI tcuTexRefSetMaxAnisotropy(CUtexref hTexRef, unsigned int maxAniso);
+typedef CUresult CUDAAPI tcuTexRefSetFlags(CUtexref hTexRef, unsigned int Flags);
 typedef CUresult CUDAAPI tcuTexRefGetAddress_v2(CUdeviceptr* pdptr, CUtexref hTexRef);
 typedef CUresult CUDAAPI tcuTexRefGetArray(CUarray* phArray, CUtexref hTexRef);
 typedef CUresult CUDAAPI tcuTexRefGetMipmappedArray(CUmipmappedArray* phMipmappedArray, CUtexref hTexRef);
@@ -921,10 +921,10 @@ typedef CUresult CUDAAPI tcuTexRefGetMipmapFilterMode(CUfilter_mode* pfm, CUtexr
 typedef CUresult CUDAAPI tcuTexRefGetMipmapLevelBias(float* pbias, CUtexref hTexRef);
 typedef CUresult CUDAAPI tcuTexRefGetMipmapLevelClamp(float* pminMipmapLevelClamp, float* pmaxMipmapLevelClamp, CUtexref hTexRef);
 typedef CUresult CUDAAPI tcuTexRefGetMaxAnisotropy(int* pmaxAniso, CUtexref hTexRef);
-typedef CUresult CUDAAPI tcuTexRefGetFlags(unsigned* pFlags, CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuTexRefGetFlags(unsigned int* pFlags, CUtexref hTexRef);
 typedef CUresult CUDAAPI tcuTexRefCreate(CUtexref* pTexRef);
 typedef CUresult CUDAAPI tcuTexRefDestroy(CUtexref hTexRef);
-typedef CUresult CUDAAPI tcuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, unsigned Flags);
+typedef CUresult CUDAAPI tcuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, unsigned int Flags);
 typedef CUresult CUDAAPI tcuSurfRefGetArray(CUarray* phArray, CUsurfref hSurfRef);
 typedef CUresult CUDAAPI tcuTexObjectCreate(CUtexObject* pTexObject, const CUDA_RESOURCE_DESC* pResDesc, const CUDA_TEXTURE_DESC* pTexDesc, const CUDA_RESOURCE_VIEW_DESC* pResViewDesc);
 typedef CUresult CUDAAPI tcuTexObjectDestroy(CUtexObject texObject);
@@ -935,27 +935,27 @@ typedef CUresult CUDAAPI tcuSurfObjectCreate(CUsurfObject* pSurfObject, const CU
 typedef CUresult CUDAAPI tcuSurfObjectDestroy(CUsurfObject surfObject);
 typedef CUresult CUDAAPI tcuSurfObjectGetResourceDesc(CUDA_RESOURCE_DESC* pResDesc, CUsurfObject surfObject);
 typedef CUresult CUDAAPI tcuDeviceCanAccessPeer(int* canAccessPeer, CUdevice dev, CUdevice peerDev);
-typedef CUresult CUDAAPI tcuCtxEnablePeerAccess(CUcontext peerContext, unsigned Flags);
+typedef CUresult CUDAAPI tcuCtxEnablePeerAccess(CUcontext peerContext, unsigned int Flags);
 typedef CUresult CUDAAPI tcuCtxDisablePeerAccess(CUcontext peerContext);
 typedef CUresult CUDAAPI tcuGraphicsUnregisterResource(CUgraphicsResource resource);
-typedef CUresult CUDAAPI tcuGraphicsSubResourceGetMappedArray(CUarray* pArray, CUgraphicsResource resource, unsigned arrayIndex, unsigned mipLevel);
+typedef CUresult CUDAAPI tcuGraphicsSubResourceGetMappedArray(CUarray* pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel);
 typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedMipmappedArray(CUmipmappedArray* pMipmappedArray, CUgraphicsResource resource);
 typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedPointer_v2(CUdeviceptr* pDevPtr, size_t* pSize, CUgraphicsResource resource);
-typedef CUresult CUDAAPI tcuGraphicsResourceSetMapFlags_v2(CUgraphicsResource resource, unsigned flags);
-typedef CUresult CUDAAPI tcuGraphicsMapResources(unsigned count, CUgraphicsResource* resources, CUstream hStream);
-typedef CUresult CUDAAPI tcuGraphicsUnmapResources(unsigned count, CUgraphicsResource* resources, CUstream hStream);
+typedef CUresult CUDAAPI tcuGraphicsResourceSetMapFlags_v2(CUgraphicsResource resource, unsigned int flags);
+typedef CUresult CUDAAPI tcuGraphicsMapResources(unsigned int count, CUgraphicsResource* resources, CUstream hStream);
+typedef CUresult CUDAAPI tcuGraphicsUnmapResources(unsigned int count, CUgraphicsResource* resources, CUstream hStream);
 typedef CUresult CUDAAPI tcuGetExportTable(const void* ppExportTable, const CUuuid* pExportTableId);

-typedef CUresult CUDAAPI tcuGraphicsGLRegisterBuffer(CUgraphicsResource* pCudaResource, GLuint buffer, unsigned Flags);
-typedef CUresult CUDAAPI tcuGraphicsGLRegisterImage(CUgraphicsResource* pCudaResource, GLuint image, GLenum target, unsigned Flags);
-typedef CUresult CUDAAPI tcuGLGetDevices_v2(unsigned* pCudaDeviceCount, CUdevice* pCudaDevices, unsigned cudaDeviceCount, CUGLDeviceList deviceList);
-typedef CUresult CUDAAPI tcuGLCtxCreate_v2(CUcontext* pCtx, unsigned Flags, CUdevice device);
+typedef CUresult CUDAAPI tcuGraphicsGLRegisterBuffer(CUgraphicsResource* pCudaResource, GLuint buffer, unsigned int Flags);
+typedef CUresult CUDAAPI tcuGraphicsGLRegisterImage(CUgraphicsResource* pCudaResource, GLuint image, GLenum target, unsigned int Flags);
+typedef CUresult CUDAAPI tcuGLGetDevices_v2(unsigned int* pCudaDeviceCount, CUdevice* pCudaDevices, unsigned int cudaDeviceCount, CUGLDeviceList deviceList);
+typedef CUresult CUDAAPI tcuGLCtxCreate_v2(CUcontext* pCtx, unsigned int Flags, CUdevice device);
 typedef CUresult CUDAAPI tcuGLInit(void);
 typedef CUresult CUDAAPI tcuGLRegisterBufferObject(GLuint buffer);
 typedef CUresult CUDAAPI tcuGLMapBufferObject_v2(CUdeviceptr* dptr, size_t* size, GLuint buffer);
 typedef CUresult CUDAAPI tcuGLUnmapBufferObject(GLuint buffer);
 typedef CUresult CUDAAPI tcuGLUnregisterBufferObject(GLuint buffer);
-typedef CUresult CUDAAPI tcuGLSetBufferObjectMapFlags(GLuint buffer, unsigned Flags);
+typedef CUresult CUDAAPI tcuGLSetBufferObjectMapFlags(GLuint buffer, unsigned int Flags);
 typedef CUresult CUDAAPI tcuGLMapBufferObjectAsync_v2(CUdeviceptr* dptr, size_t* size, GLuint buffer, CUstream hStream);
 typedef CUresult CUDAAPI tcuGLUnmapBufferObjectAsync(GLuint buffer, CUstream hStream);

--- a/extern/curve_fit_nd/intern/curve_fit_cubic.c
+++ b/extern/curve_fit_nd/intern/curve_fit_cubic.c
@@ -429,14 +429,45 @@ static double points_calc_circumference_factor(
 		 * (tangents that point away from each other).
 		 * We could try support this but will likely cause extreme >1 scales which could cause other issues. */
 		// assert(angle >= len_tangent);
-		double factor = (angle / len_tangent) / (M_PI / 2);
-		factor = 1.0 - pow(1.0 - factor, 1.75);
-		assert(factor < 1.0 + DBL_EPSILON);
+		double factor = (angle / len_tangent);
+		assert(factor < (M_PI / 2) + DBL_EPSILON);
 		return factor;
 	}
 	else {
 		/* tangents are exactly aligned (think two opposite sides of a circle). */
-		return 1.0;
+		return (M_PI / 2);
+	}
+}
+
+/**
+ * Return the value which the distance between points will need to be scaled by,
+ * to define a handle, given both points are on a perfect circle.
+ *
+ * \note the return value will need to be multiplied by 1.3... for correct results.
+ */
+static double points_calc_circle_tangent_factor(
+        const double  tan_l[],
+        const double  tan_r[],
+        const uint dims)
+{
+	const double eps = 1e-8;
+	const double tan_dot = dot_vnvn(tan_l, tan_r, dims);
+	if (tan_dot > 1.0 - eps) {
+		/* no angle difference (use fallback, length wont make any difference) */
+		return (1.0 / 3.0) * 0.75;
+	}
+	else if (tan_dot < -1.0 + eps) {
+		/* parallele tangents (half-circle) */
+		return (1.0 / 2.0);
+	}
+	else {
+		/* non-aligned tangents, calculate handle length */
+		const double angle = acos(tan_dot) / 2.0;
+
+		/* could also use 'angle_sin = len_vnvn(tan_l, tan_r, dims) / 2.0' */
+		const double angle_sin = sin(angle);
+		const double angle_cos = cos(angle);
+		return ((1.0 - angle_cos) / (angle_sin * 2.0)) / angle_sin;
 	}
 }

@@ -451,9 +482,20 @@ static double points_calc_cubic_scale(
        const double coords_length, uint dims)
 {
 	const double len_direct = len_vnvn(v_l, v_r, dims);
-	const double len_circle_factor = points_calc_circumference_factor(tan_l, tan_r, dims) * 1.75;
-	const double len_points = min(coords_length, len_circle_factor * len_direct);
-	return (len_direct + ((len_points - len_direct) * len_circle_factor)) / 3.0;
+	const double len_circle_factor = points_calc_circle_tangent_factor(tan_l, tan_r, dims);
+
+	/* if this curve is a circle, this value doesn't need modification */
+	const double len_circle_handle = (len_direct * (len_circle_factor / 0.75));
+
+	/* scale by the difference from the circumference distance */
+	const double len_circle = len_direct * points_calc_circumference_factor(tan_l, tan_r, dims);
+	double scale_handle = (coords_length / len_circle);
+
+	/* Could investigate an accurate calculation here,
+	 * though this gives close results */
+	scale_handle = ((scale_handle - 1.0) * 1.75) + 1.0;
+
+	return len_circle_handle * scale_handle;
 }

 static void cubic_from_points_fallback(
--- a/extern/gflags/README.blender
+++ b/extern/gflags/README.blender
@@ -1,5 +1,5 @@
 Project: Google Flags
-URL: http://code.google.com/p/google-gflags/
+URL: https://github.com/gflags/gflags
 License: New BSD
 Upstream version: 2.2.0 (9db82895)
 Local modifications:
@@ -17,3 +17,7 @@ Local modifications:

 - Applied some modifications from fork https://github.com/Nazg-Gul/gflags.git
  (see https://github.com/gflags/gflags/pull/129)
+
+- Avoid attemot of acquiring mutex lock in FlagRegistry::GlobalRegistry when
+  doing static flags initialization. See d81dd2d in Blender repository.
+
--- a/extern/glog/README.blender
+++ b/extern/glog/README.blender
@@ -1,5 +1,5 @@
 Project: Google Logging
-URL: http://code.google.com/p/google-glog/
+URL: https://github.com/google/glog
 License: New BSD
 Upstream version: 0.3.4, 4d391fe
 Local modifications:
--- a/extern/gtest/README.blender
+++ b/extern/gtest/README.blender
@@ -1,7 +1,5 @@
 Project: Google C++ Testing Framework
-URL: http://code.google.com/p/googletest
+URL: https://github.com/google/googletest
 License: New BSD
 Upstream version: 1.7.0
-Local modifications:
-
-None.
+Local modifications:None
--- a/extern/libopenjpeg/README.blender
+++ b/extern/libopenjpeg/README.blender
@@ -0,0 +1,5 @@
+Project: OpenJPEG
+URL: http://www.openjpeg.org
+License: BSD 2-Clause
+Upstream version: 1.5.2
+Local modifications:
--- a/extern/rangetree/README.blender
+++ b/extern/rangetree/README.blender
@@ -0,0 +1,5 @@
+Project: RangeTree
+URL: https://github.com/nicholasbishop/RangeTree
+License: GPLv2+
+Upstream version: c4ecf6bb7dfd
+Local modifications: None
--- a/extern/recastnavigation/CMakeLists.txt
+++ b/extern/recastnavigation/CMakeLists.txt
@@ -23,9 +23,11 @@
 #
 # ***** END GPL LICENSE BLOCK *****

+remove_cc_flag_unsigned_char()
+
 set(INC 
-		Recast/Include
-		Detour/Include
+	Recast/Include
+	Detour/Include
 )

 set(INC_SYS
@@ -33,38 +35,38 @@ set(INC_SYS
 )

 set(SRC 
-		recast-capi.cpp
-		recast-capi.h
+	recast-capi.cpp
+	recast-capi.h


-		Detour/Source/DetourCommon.cpp
-		Detour/Source/DetourNode.cpp
-		Detour/Source/DetourStatNavMesh.cpp
-		Detour/Source/DetourStatNavMeshBuilder.cpp
-		Detour/Source/DetourTileNavMesh.cpp
-		Detour/Source/DetourTileNavMeshBuilder.cpp
+	Detour/Source/DetourCommon.cpp
+	Detour/Source/DetourNode.cpp
+	Detour/Source/DetourStatNavMesh.cpp
+	Detour/Source/DetourStatNavMeshBuilder.cpp
+	Detour/Source/DetourTileNavMesh.cpp
+	Detour/Source/DetourTileNavMeshBuilder.cpp

-		Detour/Include/DetourCommon.h
-		Detour/Include/DetourNode.h
-		Detour/Include/DetourStatNavMesh.h
-		Detour/Include/DetourStatNavMeshBuilder.h
-		Detour/Include/DetourTileNavMesh.h
-		Detour/Include/DetourTileNavMeshBuilder.h
+	Detour/Include/DetourCommon.h
+	Detour/Include/DetourNode.h
+	Detour/Include/DetourStatNavMesh.h
+	Detour/Include/DetourStatNavMeshBuilder.h
+	Detour/Include/DetourTileNavMesh.h
+	Detour/Include/DetourTileNavMeshBuilder.h

-		Recast/Source/Recast.cpp
-		Recast/Source/RecastAlloc.cpp
-		Recast/Source/RecastArea.cpp
-		Recast/Source/RecastContour.cpp
-		Recast/Source/RecastFilter.cpp
-		Recast/Source/RecastLayers.cpp
-		Recast/Source/RecastMesh.cpp
-		Recast/Source/RecastMeshDetail.cpp
-		Recast/Source/RecastRasterization.cpp
-		Recast/Source/RecastRegion.cpp
+	Recast/Source/Recast.cpp
+	Recast/Source/RecastAlloc.cpp
+	Recast/Source/RecastArea.cpp
+	Recast/Source/RecastContour.cpp
+	Recast/Source/RecastFilter.cpp
+	Recast/Source/RecastLayers.cpp
+	Recast/Source/RecastMesh.cpp
+	Recast/Source/RecastMeshDetail.cpp
+	Recast/Source/RecastRasterization.cpp
+	Recast/Source/RecastRegion.cpp

-		Recast/Include/Recast.h
-		Recast/Include/RecastAlloc.h
-		Recast/Include/RecastAssert.h
+	Recast/Include/Recast.h
+	Recast/Include/RecastAlloc.h
+	Recast/Include/RecastAssert.h
 )

 blender_add_lib(extern_recastnavigation "${SRC}" "${INC}" "${INC_SYS}")
--- a/extern/sdlew/README.blender
+++ b/extern/sdlew/README.blender
@@ -0,0 +1,5 @@
+Project: SDL Extension Wrangler
+URL: https://github.com/SDLWrangler/sdlew
+License: Apache 2.0
+Upstream version: 15edf8e 
+Local modifications: None
--- a/extern/wcwidth/README.blender
+++ b/extern/wcwidth/README.blender
@@ -0,0 +1,5 @@
+Project: WC Width
+URL: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
+License: ICS
+Upstream version: 2007-05-26
+Local modifications: None
--- a/extern/xdnd/README.blender
+++ b/extern/xdnd/README.blender
@@ -0,0 +1,8 @@
+Project: X Drag and Drop
+URL: http://www.newplanetsoftware.com/xdnd/ (defunct)
+     https://freedesktop.org/wiki/Specifications/XDND/ (cache)
+License: GPLv2+
+Upstream version: 2000-08-08
+Local modifications:
+* Fix T33192
+  Opening Blender breaks drag-and-drop support on the KDE desktop.
--- a/intern/atomic/atomic_ops.h
+++ b/intern/atomic/atomic_ops.h
@@ -86,6 +86,8 @@ ATOMIC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x);
 ATOMIC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x);
 ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new);

+ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x);
+
 ATOMIC_INLINE uint8_t atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b);
 ATOMIC_INLINE uint8_t atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b);

--- a/intern/atomic/intern/atomic_ops_msvc.h
+++ b/intern/atomic/intern/atomic_ops_msvc.h
@@ -76,6 +76,11 @@ ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _ne
 	return InterlockedCompareExchange((long *)v, _new, old);
 }

+ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x)
+{
+	return InterlockedExchangeAdd(p, x);
+}
+
 /******************************************************************************/
 /* 8-bit operations. */

--- a/intern/atomic/intern/atomic_ops_unix.h
+++ b/intern/atomic/intern/atomic_ops_unix.h
@@ -162,6 +162,16 @@ ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _ne
 #  error "Missing implementation for 32-bit atomic operations"
 #endif

+#if (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4))
+ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x)
+{
+	return __sync_fetch_and_add(p, x);
+}
+
+#else
+#  error "Missing implementation for 32-bit atomic operations"
+#endif
+
 /******************************************************************************/
 /* 8-bit operations. */
 #if (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1) || defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_1))
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -153,7 +153,9 @@ set(WITH_CYCLES_DEVICE_MULTI TRUE)
 if(CYCLES_STANDALONE_REPOSITORY)
 	TEST_UNORDERED_MAP_SUPPORT()
 endif()
-if(HAVE_STD_UNORDERED_MAP_HEADER)
+if(WITH_CXX11)
+	add_definitions(-DCYCLES_STD_UNORDERED_MAP)
+elseif(HAVE_STD_UNORDERED_MAP_HEADER)
 	if(HAVE_UNORDERED_MAP_IN_STD_NAMESPACE)
 		add_definitions(-DCYCLES_STD_UNORDERED_MAP)
 	else()
@@ -235,6 +237,7 @@ endif()
 add_subdirectory(bvh)
 add_subdirectory(device)
 add_subdirectory(doc)
+add_subdirectory(graph)
 add_subdirectory(kernel)
 add_subdirectory(render)
 add_subdirectory(subd)
--- a/intern/cycles/app/CMakeLists.txt
+++ b/intern/cycles/app/CMakeLists.txt
@@ -1,13 +1,14 @@

 set(INC
 	.
+	../bvh
 	../device
+	../graph
 	../kernel
 	../kernel/svm
-	../bvh
-	../util
 	../render
 	../subd
+	../util
 )
 set(INC_SYS
 )
@@ -20,6 +21,7 @@ set(LIBRARIES
 	cycles_render
 	cycles_bvh
 	cycles_subd
+	cycles_graph
 	cycles_util
 	${BLENDER_GL_LIBRARIES}
 	${CYCLES_APP_GLEW_LIBRARY}
--- a/intern/cycles/app/cycles_xml.cpp
+++ b/intern/cycles/app/cycles_xml.cpp
@@ -20,6 +20,8 @@
 #include <algorithm>
 #include <iterator>

+#include "node_xml.h"
+
 #include "background.h"
 #include "camera.h"
 #include "film.h"
@@ -29,6 +31,7 @@
 #include "mesh.h"
 #include "nodes.h"
 #include "object.h"
+#include "osl.h"
 #include "shader.h"
 #include "scene.h"

@@ -48,11 +51,11 @@ CCL_NAMESPACE_BEGIN

 /* XML reading state */

-struct XMLReadState {
+struct XMLReadState : public XMLReader {
 	Scene *scene;		/* scene pointer */
 	Transform tfm;		/* current transform state */
 	bool smooth;		/* smooth normal state */
-	int shader;			/* current shader */
+	Shader *shader;		/* current shader */
 	string base;		/* base path to current file*/
 	float dicing_rate;	/* current dicing rate */
 	Mesh::DisplacementMethod displacement_method;
@@ -60,7 +63,7 @@ struct XMLReadState {
 	XMLReadState()
 	  : scene(NULL),
 	    smooth(false),
-	    shader(0),
+	    shader(NULL),
 	    dicing_rate(0.0f),
 	    displacement_method(Mesh::DISPLACE_BUMP)
 	{
@@ -212,7 +215,7 @@ static bool xml_equal_string(pugi::xml_node node, const char *name, const char *
 	return false;
 }

-static bool xml_read_enum(ustring *str, ShaderEnum& enm, pugi::xml_node node, const char *name)
+static bool xml_read_enum_value(int *value, NodeEnum& enm, pugi::xml_node node, const char *name)
 {
 	pugi::xml_attribute attr = node.attribute(name);

@@ -220,7 +223,7 @@ static bool xml_read_enum(ustring *str, ShaderEnum& enm, pugi::xml_node node, co
 		ustring ustr(attr.value());

 		if(enm.exists(ustr)) {
-			*str = ustr;
+			*value = enm[ustr];
 			return true;
 		}
 		else
@@ -230,141 +233,16 @@ static bool xml_read_enum(ustring *str, ShaderEnum& enm, pugi::xml_node node, co
 	return false;
 }

-static ShaderSocketType xml_read_socket_type(pugi::xml_node node, const char *name)
-{
-	pugi::xml_attribute attr = node.attribute(name);
-
-	if(attr) {
-		string value = attr.value();
-		if(string_iequals(value, "float"))
-			return SHADER_SOCKET_FLOAT;
-		else if(string_iequals(value, "int"))
-			return SHADER_SOCKET_INT;
-		else if(string_iequals(value, "color"))
-			return SHADER_SOCKET_COLOR;
-		else if(string_iequals(value, "vector"))
-			return SHADER_SOCKET_VECTOR;
-		else if(string_iequals(value, "point"))
-			return SHADER_SOCKET_POINT;
-		else if(string_iequals(value, "normal"))
-			return SHADER_SOCKET_NORMAL;
-		else if(string_iequals(value, "closure color"))
-			return SHADER_SOCKET_CLOSURE;
-		else if(string_iequals(value, "string"))
-			return SHADER_SOCKET_STRING;
-		else
-			fprintf(stderr, "Unknown shader socket type \"%s\" for attribute \"%s\".\n", value.c_str(), name);
-	}
-	
-	return SHADER_SOCKET_UNDEFINED;
-}
-
-/* Film */
-
-static void xml_read_film(const XMLReadState& state, pugi::xml_node node)
-{
-	Film *film = state.scene->film;
-	
-	xml_read_float(&film->exposure, node, "exposure");
-
-	/* ToDo: Filter Type */
-	xml_read_float(&film->filter_width, node, "filter_width");
-}
-
-/* Integrator */
-
-static void xml_read_integrator(const XMLReadState& state, pugi::xml_node node)
-{
-	Integrator *integrator = state.scene->integrator;
-	
-	/* Branched Path */
-	bool branched = false;
-	xml_read_bool(&branched, node, "branched");
-
-	if(branched) {
-		integrator->method = Integrator::BRANCHED_PATH;
-
-		xml_read_int(&integrator->diffuse_samples, node, "diffuse_samples");
-		xml_read_int(&integrator->glossy_samples, node, "glossy_samples");
-		xml_read_int(&integrator->transmission_samples, node, "transmission_samples");
-		xml_read_int(&integrator->ao_samples, node, "ao_samples");
-		xml_read_int(&integrator->mesh_light_samples, node, "mesh_light_samples");
-		xml_read_int(&integrator->subsurface_samples, node, "subsurface_samples");
-		xml_read_int(&integrator->volume_samples, node, "volume_samples");
-		xml_read_bool(&integrator->sample_all_lights_direct, node, "sample_all_lights_direct");
-		xml_read_bool(&integrator->sample_all_lights_indirect, node, "sample_all_lights_indirect");
-	}
-	
-	/* Bounces */
-	xml_read_int(&integrator->min_bounce, node, "min_bounce");
-	xml_read_int(&integrator->max_bounce, node, "max_bounce");
-	
-	xml_read_int(&integrator->max_diffuse_bounce, node, "max_diffuse_bounce");
-	xml_read_int(&integrator->max_glossy_bounce, node, "max_glossy_bounce");
-	xml_read_int(&integrator->max_transmission_bounce, node, "max_transmission_bounce");
-	xml_read_int(&integrator->max_volume_bounce, node, "max_volume_bounce");
-	
-	/* Transparency */
-	xml_read_int(&integrator->transparent_min_bounce, node, "transparent_min_bounce");
-	xml_read_int(&integrator->transparent_max_bounce, node, "transparent_max_bounce");
-	xml_read_bool(&integrator->transparent_shadows, node, "transparent_shadows");
-	
-	/* Volume */
-	xml_read_float(&integrator->volume_step_size, node, "volume_step_size");
-	xml_read_int(&integrator->volume_max_steps, node, "volume_max_steps");
-	
-	/* Various Settings */
-	xml_read_bool(&integrator->caustics_reflective, node, "caustics_reflective");
-	xml_read_bool(&integrator->caustics_refractive, node, "caustics_refractive");
-	xml_read_float(&integrator->filter_glossy, node, "filter_glossy");
-	
-	xml_read_int(&integrator->seed, node, "seed");
-	xml_read_float(&integrator->sample_clamp_direct, node, "sample_clamp_direct");
-	xml_read_float(&integrator->sample_clamp_indirect, node, "sample_clamp_indirect");
-}
-
 /* Camera */

-static void xml_read_camera(const XMLReadState& state, pugi::xml_node node)
+static void xml_read_camera(XMLReadState& state, pugi::xml_node node)
 {
 	Camera *cam = state.scene->camera;

 	xml_read_int(&cam->width, node, "width");
 	xml_read_int(&cam->height, node, "height");

-	if(xml_read_float(&cam->fov, node, "fov"))
-		cam->fov = DEG2RADF(cam->fov);
-
-	xml_read_float(&cam->nearclip, node, "nearclip");
-	xml_read_float(&cam->farclip, node, "farclip");
-	xml_read_float(&cam->aperturesize, node, "aperturesize"); // 0.5*focallength/fstop
-	xml_read_float(&cam->focaldistance, node, "focaldistance");
-	xml_read_float(&cam->shuttertime, node, "shuttertime");
-	xml_read_float(&cam->aperture_ratio, node, "aperture_ratio");
-
-	if(xml_equal_string(node, "type", "orthographic"))
-		cam->type = CAMERA_ORTHOGRAPHIC;
-	else if(xml_equal_string(node, "type", "perspective"))
-		cam->type = CAMERA_PERSPECTIVE;
-	else if(xml_equal_string(node, "type", "panorama"))
-		cam->type = CAMERA_PANORAMA;
-
-	if(xml_equal_string(node, "panorama_type", "equirectangular"))
-		cam->panorama_type = PANORAMA_EQUIRECTANGULAR;
-	else if(xml_equal_string(node, "panorama_type", "fisheye_equidistant"))
-		cam->panorama_type = PANORAMA_FISHEYE_EQUIDISTANT;
-	else if(xml_equal_string(node, "panorama_type", "fisheye_equisolid"))
-		cam->panorama_type = PANORAMA_FISHEYE_EQUISOLID;
-
-	xml_read_float(&cam->fisheye_fov, node, "fisheye_fov");
-	xml_read_float(&cam->fisheye_lens, node, "fisheye_lens");
-
-	xml_read_bool(&cam->use_spherical_stereo, node, "use_spherical_stereo");
-	xml_read_float(&cam->interocular_distance, node, "interocular_distance");
-	xml_read_float(&cam->convergence_distance, node, "convergence_distance");
-
-	xml_read_float(&cam->sensorwidth, node, "sensorwidth");
-	xml_read_float(&cam->sensorheight, node, "sensorheight");
+	xml_read_node(state, cam, node);

 	cam->matrix = state.tfm;

@@ -385,8 +263,11 @@ static string xml_socket_name(const char *name)
 	return sname;
 }

-static void xml_read_shader_graph(const XMLReadState& state, Shader *shader, pugi::xml_node graph_node)
+static void xml_read_shader_graph(XMLReadState& state, Shader *shader, pugi::xml_node graph_node)
 {
+	xml_read_node(state, shader, graph_node);
+
+	ShaderManager *manager = state.scene->shader_manager;
 	ShaderGraph *graph = new ShaderGraph();

 	map<string, ShaderNode*> nodemap;
@@ -406,8 +287,8 @@ static void xml_read_shader_graph(const XMLReadState& state, Shader *shader, pug
 			xml_read_string(&img->filename, node, "src");
 			img->filename = path_join(state.base, img->filename);
 			
-			xml_read_enum(&img->color_space, ImageTextureNode::color_space_enum, node, "color_space");
-			xml_read_enum(&img->projection, ImageTextureNode::projection_enum, node, "projection");
+			xml_read_enum_value((int*)&img->color_space, ImageTextureNode::color_space_enum, node, "color_space");
+			xml_read_enum_value((int*)&img->projection, ImageTextureNode::projection_enum, node, "projection");
 			xml_read_float(&img->projection_blend, node, "projection_blend");

 			/* ToDo: Interpolation */
@@ -420,56 +301,40 @@ static void xml_read_shader_graph(const XMLReadState& state, Shader *shader, pug
 			xml_read_string(&env->filename, node, "src");
 			env->filename = path_join(state.base, env->filename);
 			
-			xml_read_enum(&env->color_space, EnvironmentTextureNode::color_space_enum, node, "color_space");
-			xml_read_enum(&env->projection, EnvironmentTextureNode::projection_enum, node, "projection");
+			xml_read_enum_value((int*)&env->color_space, EnvironmentTextureNode::color_space_enum, node, "color_space");
+			xml_read_enum_value((int*)&env->projection, EnvironmentTextureNode::projection_enum, node, "projection");

 			snode = env;
 		}
+#ifdef WITH_OSL
 		else if(string_iequals(node.name(), "osl_shader")) {
-			OSLScriptNode *osl = new OSLScriptNode();
+			if(manager->use_osl()) {
+				std::string filepath;

-			/* Source */
-			xml_read_string(&osl->filepath, node, "src");
-			if(path_is_relative(osl->filepath)) {
-				osl->filepath = path_join(state.base, osl->filepath);
-			}
+				if(xml_read_string(&filepath, node, "src")) {
+					if(path_is_relative(filepath)) {
+						filepath = path_join(state.base, filepath);
+					}

-			/* Generate inputs/outputs from node sockets
-			 *
-			 * Note: ShaderInput/ShaderOutput store shallow string copies only!
-			 * So we register them as ustring to ensure the pointer stays valid. */
-			/* read input values */
-			for(pugi::xml_node param = node.first_child(); param; param = param.next_sibling()) {
-				if(string_iequals(param.name(), "input")) {
-					string name;
-					if(!xml_read_string(&name, param, "name"))
-						continue;
-					
-					ShaderSocketType type = xml_read_socket_type(param, "type");
-					if(type == SHADER_SOCKET_UNDEFINED)
-						continue;
-					
-					osl->add_input(ustring(name).c_str(), type);
+					snode = ((OSLShaderManager*)manager)->osl_node(filepath);
+
+					if(!snode) {
+						fprintf(stderr, "Failed to create OSL node from \"%s\".\n", filepath.c_str());
+					}
 				}
-				else if(string_iequals(param.name(), "output")) {
-					string name;
-					if(!xml_read_string(&name, param, "name"))
-						continue;
-					
-					ShaderSocketType type = xml_read_socket_type(param, "type");
-					if(type == SHADER_SOCKET_UNDEFINED)
-						continue;
-					
-					osl->add_output(ustring(name).c_str(), type);
+				else {
+					fprintf(stderr, "OSL node missing \"src\" attribute.\n");
 				}
 			}
-			
-			snode = osl;
+			else {
+				fprintf(stderr, "OSL node without using --shadingsys osl.\n");
+			}
 		}
+#endif
 		else if(string_iequals(node.name(), "sky_texture")) {
 			SkyTextureNode *sky = new SkyTextureNode();
 			
-			xml_read_enum(&sky->type, SkyTextureNode::type_enum, node, "type");
+			xml_read_enum_value((int*)&sky->type, SkyTextureNode::type_enum, node, "type");
 			xml_read_float3(&sky->sun_direction, node, "sun_direction");
 			xml_read_float(&sky->turbidity, node, "turbidity");
 			xml_read_float(&sky->ground_albedo, node, "ground_albedo");
@@ -494,17 +359,17 @@ static void xml_read_shader_graph(const XMLReadState& state, Shader *shader, pug
 		}
 		else if(string_iequals(node.name(), "gradient_texture")) {
 			GradientTextureNode *blend = new GradientTextureNode();
-			xml_read_enum(&blend->type, GradientTextureNode::type_enum, node, "type");
+			xml_read_enum_value((int*)&blend->type, GradientTextureNode::type_enum, node, "type");
 			snode = blend;
 		}
 		else if(string_iequals(node.name(), "voronoi_texture")) {
 			VoronoiTextureNode *voronoi = new VoronoiTextureNode();
-			xml_read_enum(&voronoi->coloring, VoronoiTextureNode::coloring_enum, node, "coloring");
+			xml_read_enum_value((int*)&voronoi->coloring, VoronoiTextureNode::coloring_enum, node, "coloring");
 			snode = voronoi;
 		}
 		else if(string_iequals(node.name(), "musgrave_texture")) {
 			MusgraveTextureNode *musgrave = new MusgraveTextureNode();
-			xml_read_enum(&musgrave->type, MusgraveTextureNode::type_enum, node, "type");
+			xml_read_enum_value((int*)&musgrave->type, MusgraveTextureNode::type_enum, node, "type");
 			snode = musgrave;
 		}
 		else if(string_iequals(node.name(), "magic_texture")) {
@@ -514,8 +379,8 @@ static void xml_read_shader_graph(const XMLReadState& state, Shader *shader, pug
 		}
 		else if(string_iequals(node.name(), "wave_texture")) {
 			WaveTextureNode *wave = new WaveTextureNode();
-			xml_read_enum(&wave->type, WaveTextureNode::type_enum, node, "type");
-			xml_read_enum(&wave->profile, WaveTextureNode::profile_enum, node, "profile");
+			xml_read_enum_value((int*)&wave->type, WaveTextureNode::type_enum, node, "type");
+			xml_read_enum_value((int*)&wave->profile, WaveTextureNode::profile_enum, node, "profile");
 			snode = wave;
 		}
 		else if(string_iequals(node.name(), "normal")) {
@@ -529,11 +394,28 @@ static void xml_read_shader_graph(const XMLReadState& state, Shader *shader, pug
 			snode = bump;
 		}
 		else if(string_iequals(node.name(), "mapping")) {
-			snode = new MappingNode();
+			MappingNode *map = new MappingNode();
+
+			TextureMapping *texmap = &map->tex_mapping;
+			xml_read_enum_value((int*) &texmap->type, TextureMapping::type_enum, node, "type");
+			xml_read_enum_value((int*) &texmap->projection, TextureMapping::projection_enum, node, "projection");
+			xml_read_enum_value((int*) &texmap->x_mapping, TextureMapping::mapping_enum, node, "x_mapping");
+			xml_read_enum_value((int*) &texmap->y_mapping, TextureMapping::mapping_enum, node, "y_mapping");
+			xml_read_enum_value((int*) &texmap->z_mapping, TextureMapping::mapping_enum, node, "z_mapping");
+			xml_read_bool(&texmap->use_minmax, node, "use_minmax");
+			if(texmap->use_minmax) {
+				xml_read_float3(&texmap->min, node, "min");
+				xml_read_float3(&texmap->max, node, "max");
+			}
+			xml_read_float3(&texmap->translation, node, "translation");
+			xml_read_float3(&texmap->rotation, node, "rotation");
+			xml_read_float3(&texmap->scale, node, "scale");
+
+			snode = map;
 		}
 		else if(string_iequals(node.name(), "anisotropic_bsdf")) {
 			AnisotropicBsdfNode *aniso = new AnisotropicBsdfNode();
-			xml_read_enum(&aniso->distribution, AnisotropicBsdfNode::distribution_enum, node, "distribution");
+			xml_read_enum_value((int*)&aniso->distribution, AnisotropicBsdfNode::distribution_enum, node, "distribution");
 			snode = aniso;
 		}
 		else if(string_iequals(node.name(), "diffuse_bsdf")) {
@@ -550,27 +432,27 @@ static void xml_read_shader_graph(const XMLReadState& state, Shader *shader, pug
 		}
 		else if(string_iequals(node.name(), "toon_bsdf")) {
 			ToonBsdfNode *toon = new ToonBsdfNode();
-			xml_read_enum(&toon->component, ToonBsdfNode::component_enum, node, "component");
+			xml_read_enum_value((int*)&toon->component, ToonBsdfNode::component_enum, node, "component");
 			snode = toon;
 		}
 		else if(string_iequals(node.name(), "glossy_bsdf")) {
 			GlossyBsdfNode *glossy = new GlossyBsdfNode();
-			xml_read_enum(&glossy->distribution, GlossyBsdfNode::distribution_enum, node, "distribution");
+			xml_read_enum_value((int*)&glossy->distribution, GlossyBsdfNode::distribution_enum, node, "distribution");
 			snode = glossy;
 		}
 		else if(string_iequals(node.name(), "glass_bsdf")) {
 			GlassBsdfNode *diel = new GlassBsdfNode();
-			xml_read_enum(&diel->distribution, GlassBsdfNode::distribution_enum, node, "distribution");
+			xml_read_enum_value((int*)&diel->distribution, GlassBsdfNode::distribution_enum, node, "distribution");
 			snode = diel;
 		}
 		else if(string_iequals(node.name(), "refraction_bsdf")) {
 			RefractionBsdfNode *diel = new RefractionBsdfNode();
-			xml_read_enum(&diel->distribution, RefractionBsdfNode::distribution_enum, node, "distribution");
+			xml_read_enum_value((int*)&diel->distribution, RefractionBsdfNode::distribution_enum, node, "distribution");
 			snode = diel;
 		}
 		else if(string_iequals(node.name(), "hair_bsdf")) {
 			HairBsdfNode *hair = new HairBsdfNode();
-			xml_read_enum(&hair->component, HairBsdfNode::component_enum, node, "component");
+			xml_read_enum_value((int*)&hair->component, HairBsdfNode::component_enum, node, "component");
 			snode = hair;
 		}
 		else if(string_iequals(node.name(), "emission")) {
@@ -648,7 +530,7 @@ static void xml_read_shader_graph(const XMLReadState& state, Shader *shader, pug
 		else if(string_iequals(node.name(), "mix")) {
 			/* ToDo: Tag Mix case for optimization */
 			MixNode *mix = new MixNode();
-			xml_read_enum(&mix->type, MixNode::type_enum, node, "type");
+			xml_read_enum_value((int*)&mix->type, MixNode::type_enum, node, "type");
 			xml_read_bool(&mix->use_clamp, node, "use_clamp");
 			snode = mix;
 		}
@@ -712,32 +594,32 @@ static void xml_read_shader_graph(const XMLReadState& state, Shader *shader, pug
 		else if(string_iequals(node.name(), "normal_map")) {
 			NormalMapNode *nmap = new NormalMapNode;
 			xml_read_ustring(&nmap->attribute, node, "attribute");
-			xml_read_enum(&nmap->space, NormalMapNode::space_enum, node, "space");
+			xml_read_enum_value((int*)&nmap->space, NormalMapNode::space_enum, node, "space");
 			snode = nmap;
 		}
 		else if(string_iequals(node.name(), "tangent")) {
 			TangentNode *tangent = new TangentNode;
 			xml_read_ustring(&tangent->attribute, node, "attribute");
-			xml_read_enum(&tangent->direction_type, TangentNode::direction_type_enum, node, "direction_type");
-			xml_read_enum(&tangent->axis, TangentNode::axis_enum, node, "axis");
+			xml_read_enum_value((int*)&tangent->direction_type, TangentNode::direction_type_enum, node, "direction_type");
+			xml_read_enum_value((int*)&tangent->axis, TangentNode::axis_enum, node, "axis");
 			snode = tangent;
 		}
 		else if(string_iequals(node.name(), "math")) {
 			MathNode *math = new MathNode();
-			xml_read_enum(&math->type, MathNode::type_enum, node, "type");
+			xml_read_enum_value((int*)&math->type, MathNode::type_enum, node, "type");
 			xml_read_bool(&math->use_clamp, node, "use_clamp");
 			snode = math;
 		}
 		else if(string_iequals(node.name(), "vector_math")) {
 			VectorMathNode *vmath = new VectorMathNode();
-			xml_read_enum(&vmath->type, VectorMathNode::type_enum, node, "type");
+			xml_read_enum_value((int*)&vmath->type, VectorMathNode::type_enum, node, "type");
 			snode = vmath;
 		}
 		else if(string_iequals(node.name(), "vector_transform")) {
 			VectorTransformNode *vtransform = new VectorTransformNode();
-			xml_read_enum(&vtransform->type, VectorTransformNode::type_enum, node, "type");
-			xml_read_enum(&vtransform->convert_from, VectorTransformNode::convert_space_enum, node, "convert_from");
-			xml_read_enum(&vtransform->convert_to, VectorTransformNode::convert_space_enum, node, "convert_to");
+			xml_read_enum_value((int*)&vtransform->type, VectorTransformNode::type_enum, node, "type");
+			xml_read_enum_value((int*)&vtransform->convert_from, VectorTransformNode::convert_space_enum, node, "convert_from");
+			xml_read_enum_value((int*)&vtransform->convert_to, VectorTransformNode::convert_space_enum, node, "convert_to");
 			snode = vtransform;
 		}
 		else if(string_iequals(node.name(), "connect")) {
@@ -756,7 +638,7 @@ static void xml_read_shader_graph(const XMLReadState& state, Shader *shader, pug
 					ShaderNode *fromnode = nodemap[from_tokens[0]];

 					foreach(ShaderOutput *out, fromnode->outputs)
-						if(string_iequals(xml_socket_name(out->name), from_tokens[1]))
+						if(string_iequals(xml_socket_name(out->name().c_str()), from_tokens[1]))
 							output = out;

 					if(!output)
@@ -769,7 +651,7 @@ static void xml_read_shader_graph(const XMLReadState& state, Shader *shader, pug
 					ShaderNode *tonode = nodemap[to_tokens[0]];

 					foreach(ShaderInput *in, tonode->inputs)
-						if(string_iequals(xml_socket_name(in->name), to_tokens[1]))
+						if(string_iequals(xml_socket_name(in->name().c_str()), to_tokens[1]))
 							input = in;

 					if(!input)
@@ -801,20 +683,20 @@ static void xml_read_shader_graph(const XMLReadState& state, Shader *shader, pug
 			/* read input values */
 			for(pugi::xml_attribute attr = node.first_attribute(); attr; attr = attr.next_attribute()) {
 				foreach(ShaderInput *in, snode->inputs) {
-					if(string_iequals(in->name, attr.name())) {
-						switch(in->type) {
-							case SHADER_SOCKET_FLOAT:
-							case SHADER_SOCKET_INT:
-								xml_read_float(&in->value.x, node, attr.name());
+					if(string_iequals(in->name().c_str(), attr.name())) {
+						switch(in->type()) {
+							case SocketType::FLOAT:
+							case SocketType::INT:
+								xml_read_float(&in->value_float(), node, attr.name());
 								break;
-							case SHADER_SOCKET_COLOR:
-							case SHADER_SOCKET_VECTOR:
-							case SHADER_SOCKET_POINT:
-							case SHADER_SOCKET_NORMAL:
-								xml_read_float3(&in->value, node, attr.name());
+							case SocketType::COLOR:
+							case SocketType::VECTOR:
+							case SocketType::POINT:
+							case SocketType::NORMAL:
+								xml_read_float3(&in->value(), node, attr.name());
 								break;
-							case SHADER_SOCKET_STRING:
-								xml_read_ustring( &in->value_string, node, attr.name() );
+							case SocketType::STRING:
+								xml_read_ustring( &in->value_string(), node, attr.name() );
 								break;
 							default:
 								break;
@@ -829,54 +711,22 @@ static void xml_read_shader_graph(const XMLReadState& state, Shader *shader, pug
 	shader->tag_update(state.scene);
 }

-static void xml_read_shader(const XMLReadState& state, pugi::xml_node node)
+static void xml_read_shader(XMLReadState& state, pugi::xml_node node)
 {
 	Shader *shader = new Shader();
-
-	xml_read_string(&shader->name, node, "name");
-	xml_read_bool(&shader->use_mis, node, "use_mis");
-	xml_read_bool(&shader->use_transparent_shadow, node, "use_transparent_shadow");
-
-	/* Volume */
-	xml_read_bool(&shader->heterogeneous_volume, node, "heterogeneous_volume");
-	xml_read_int(&shader->volume_interpolation_method, node, "volume_interpolation_method");
-
-	if(xml_equal_string(node, "volume_sampling_method", "distance"))
-		shader->volume_sampling_method = VOLUME_SAMPLING_DISTANCE;
-	else if(xml_equal_string(node, "volume_sampling_method", "equiangular"))
-		shader->volume_sampling_method = VOLUME_SAMPLING_EQUIANGULAR;
-	else if(xml_equal_string(node, "volume_sampling_method", "multiple_importance"))
-		shader->volume_sampling_method = VOLUME_SAMPLING_MULTIPLE_IMPORTANCE;
-
 	xml_read_shader_graph(state, shader, node);
 	state.scene->shaders.push_back(shader);
 }

 /* Background */

-static void xml_read_background(const XMLReadState& state, pugi::xml_node node)
+static void xml_read_background(XMLReadState& state, pugi::xml_node node)
 {
 	/* Background Settings */
-	Background *bg = state.scene->background;
-
-	xml_read_float(&bg->ao_distance, node, "ao_distance");
-	xml_read_float(&bg->ao_factor, node, "ao_factor");
-
-	xml_read_bool(&bg->transparent, node, "transparent");
+	xml_read_node(state, state.scene->background, node);

 	/* Background Shader */
-	Shader *shader = state.scene->shaders[state.scene->default_background];
-	
-	xml_read_bool(&shader->heterogeneous_volume, node, "heterogeneous_volume");
-	xml_read_int(&shader->volume_interpolation_method, node, "volume_interpolation_method");
-
-	if(xml_equal_string(node, "volume_sampling_method", "distance"))
-		shader->volume_sampling_method = VOLUME_SAMPLING_DISTANCE;
-	else if(xml_equal_string(node, "volume_sampling_method", "equiangular"))
-		shader->volume_sampling_method = VOLUME_SAMPLING_EQUIANGULAR;
-	else if(xml_equal_string(node, "volume_sampling_method", "multiple_importance"))
-		shader->volume_sampling_method = VOLUME_SAMPLING_MULTIPLE_IMPORTANCE;
-
+	Shader *shader = state.scene->default_background;
 	xml_read_shader_graph(state, shader, node);
 }

@@ -904,7 +754,7 @@ static void xml_read_mesh(const XMLReadState& state, pugi::xml_node node)
 	mesh->used_shaders.push_back(state.shader);

 	/* read state */
-	int shader = state.shader;
+	int shader = 0;
 	bool smooth = state.smooth;

 	mesh->displacement_method = state.displacement_method;
@@ -965,6 +815,11 @@ static void xml_read_mesh(const XMLReadState& state, pugi::xml_node node)
 		/* create vertices */
 		mesh->verts = P;

+		size_t num_triangles = 0;
+		for(size_t i = 0; i < nverts.size(); i++)
+			num_triangles += nverts[i]-2;
+		mesh->reserve_mesh(mesh->verts.size(), num_triangles);
+
 		/* create triangles */
 		int index_offset = 0;

@@ -993,9 +848,9 @@ static void xml_read_mesh(const XMLReadState& state, pugi::xml_node node)
 			index_offset = 0;
 			for(size_t i = 0; i < nverts.size(); i++) {
 				for(int j = 0; j < nverts[i]-2; j++) {
-					int v0 = verts[index_offset];
-					int v1 = verts[index_offset + j + 1];
-					int v2 = verts[index_offset + j + 2];
+					int v0 = index_offset;
+					int v1 = index_offset + j + 1;
+					int v2 = index_offset + j + 2;

 					assert(v0*2+1 < (int)UV.size());
 					assert(v1*2+1 < (int)UV.size());
@@ -1064,7 +919,7 @@ static void xml_read_patch(const XMLReadState& state, pugi::xml_node node)
 		mesh->used_shaders.push_back(state.shader);

 		/* split */
-		SubdParams sdparams(mesh, state.shader, state.smooth);
+		SubdParams sdparams(mesh, 0, state.smooth);
 		xml_read_float(&sdparams.dicing_rate, node, "dicing_rate");

 		DiagSplit dsplit(sdparams);
@@ -1079,47 +934,12 @@ static void xml_read_patch(const XMLReadState& state, pugi::xml_node node)

 /* Light */

-static void xml_read_light(const XMLReadState& state, pugi::xml_node node)
+static void xml_read_light(XMLReadState& state, pugi::xml_node node)
 {
 	Light *light = new Light();
+
 	light->shader = state.shader;
-
-	/* Light Type
-	 * 0: Point, 1: Sun, 3: Area, 5: Spot */
-	int type = 0;
-	xml_read_int(&type, node, "type");
-	light->type = (LightType)type;
-
-	/* Spot Light */
-	xml_read_float(&light->spot_angle, node, "spot_angle");
-	xml_read_float(&light->spot_smooth, node, "spot_smooth");
-
-	/* Area Light */
-	xml_read_float(&light->sizeu, node, "sizeu");
-	xml_read_float(&light->sizev, node, "sizev");
-	xml_read_float3(&light->axisu, node, "axisu");
-	xml_read_float3(&light->axisv, node, "axisv");
-
-	/* Portal? (Area light only) */
-	xml_read_bool(&light->is_portal, node, "is_portal");
-
-	/* Generic */
-	xml_read_float(&light->size, node, "size");
-	xml_read_float3(&light->dir, node, "dir");
-	xml_read_float3(&light->co, node, "P");
-	light->co = transform_point(&state.tfm, light->co);
-
-	/* Settings */
-	xml_read_bool(&light->cast_shadow, node, "cast_shadow");
-	xml_read_bool(&light->use_mis, node, "use_mis");
-	xml_read_int(&light->samples, node, "samples");
-	xml_read_int(&light->max_bounces, node, "max_bounces");
-
-	/* Ray Visibility */
-	xml_read_bool(&light->use_diffuse, node, "use_diffuse");
-	xml_read_bool(&light->use_glossy, node, "use_glossy");
-	xml_read_bool(&light->use_transmission, node, "use_transmission");
-	xml_read_bool(&light->use_scatter, node, "use_scatter");
+	xml_read_node(state, light, node);

 	state.scene->lights.push_back(light);
 }
@@ -1161,17 +981,14 @@ static void xml_read_state(XMLReadState& state, pugi::xml_node node)
 	string shadername;

 	if(xml_read_string(&shadername, node, "shader")) {
-		int i = 0;
 		bool found = false;

 		foreach(Shader *shader, state.scene->shaders) {
 			if(shader->name == shadername) {
-				state.shader = i;
+				state.shader = shader;
 				found = true;
 				break;
 			}
-
-			i++;
 		}

 		if(!found)
@@ -1197,16 +1014,16 @@ static void xml_read_state(XMLReadState& state, pugi::xml_node node)

 /* Scene */

-static void xml_read_include(const XMLReadState& state, const string& src);
+static void xml_read_include(XMLReadState& state, const string& src);

-static void xml_read_scene(const XMLReadState& state, pugi::xml_node scene_node)
+static void xml_read_scene(XMLReadState& state, pugi::xml_node scene_node)
 {
 	for(pugi::xml_node node = scene_node.first_child(); node; node = node.next_sibling()) {
 		if(string_iequals(node.name(), "film")) {
-			xml_read_film(state, node);
+			xml_read_node(state, state.scene->film, node);
 		}
 		else if(string_iequals(node.name(), "integrator")) {
-			xml_read_integrator(state, node);
+			xml_read_node(state, state.scene->integrator, node);
 		}
 		else if(string_iequals(node.name(), "camera")) {
 			xml_read_camera(state, node);
@@ -1251,7 +1068,7 @@ static void xml_read_scene(const XMLReadState& state, pugi::xml_node scene_node)

 /* Include */

-static void xml_read_include(const XMLReadState& state, const string& src)
+static void xml_read_include(XMLReadState& state, const string& src)
 {
 	/* open XML document */
 	pugi::xml_document doc;
--- a/intern/cycles/blender/CMakeLists.txt
+++ b/intern/cycles/blender/CMakeLists.txt
@@ -1,5 +1,6 @@

 set(INC
+	../graph
 	../render
 	../device
 	../kernel
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -359,7 +359,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
                description="Distance between volume shader samples when rendering the volume "
                            "(lower values give more accurate and detailed results, but also increased render time)",
                default=0.1,
-                min=0.0000001, max=100000.0, soft_min=0.01, soft_max=1.0
+                min=0.0000001, max=100000.0, soft_min=0.01, soft_max=1.0, precision=4
                )

        cls.volume_max_steps = IntProperty(
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -76,9 +76,8 @@ def use_cuda(context):

 def use_branched_path(context):
    cscene = context.scene.cycles
-    device_type = context.user_preferences.system.compute_device_type

-    return (cscene.progressive == 'BRANCHED_PATH' and device_type != 'OPENCL')
+    return (cscene.progressive == 'BRANCHED_PATH' and not use_opencl(context))


 def use_sample_all_lights(context):
@@ -704,7 +703,7 @@ class Cycles_PT_mesh_displacement(CyclesButtonsPanel, Panel):

        col = split.column()
        sub = col.column(align=True)
-        sub.label(text="Displacment:")
+        sub.label(text="Displacement:")
        sub.prop(cdata, "displacement_method", text="")

        col = split.column()
--- a/intern/cycles/blender/blender_camera.cpp
+++ b/intern/cycles/blender/blender_camera.cpp
@@ -37,7 +37,7 @@ struct BlenderCamera {
 	float lens;
 	float shuttertime;
 	Camera::MotionPosition motion_position;
-	float shutter_curve[RAMP_TABLE_SIZE];
+	array<float> shutter_curve;

 	Camera::RollingShutterType rolling_shutter_type;
 	float rolling_shutter_duration;
@@ -65,6 +65,9 @@ struct BlenderCamera {
 	bool use_spherical_stereo;
 	float interocular_distance;
 	float convergence_distance;
+	bool use_pole_merge;
+	float pole_merge_angle_from;
+	float pole_merge_angle_to;

 	enum { AUTO, HORIZONTAL, VERTICAL } sensor_fit;
 	float sensor_width;
@@ -105,10 +108,6 @@ static void blender_camera_init(BlenderCamera *bcam,
 	/* render resolution */
 	bcam->full_width = render_resolution_x(b_render);
 	bcam->full_height = render_resolution_y(b_render);
-
-	/* pixel aspect */
-	bcam->pixelaspect.x = b_render.pixel_aspect_x();
-	bcam->pixelaspect.y = b_render.pixel_aspect_y();
 }

 static float blender_camera_focal_distance(BL::RenderEngine& b_engine,
@@ -183,6 +182,10 @@ static void blender_camera_from_object(BlenderCamera *bcam,
 		}
 		bcam->use_spherical_stereo = b_engine.use_spherical_stereo(b_ob);

+		bcam->use_pole_merge = b_camera.stereo().use_pole_merge();
+		bcam->pole_merge_angle_from = b_camera.stereo().pole_merge_angle_from();
+		bcam->pole_merge_angle_to = b_camera.stereo().pole_merge_angle_to();
+
 		bcam->ortho_scale = b_camera.ortho_scale();

 		bcam->lens = b_camera.lens();
@@ -427,6 +430,10 @@ static void blender_camera_sync(Camera *cam, BlenderCamera *bcam, int width, int
 			cam->stereo_eye = Camera::STEREO_NONE;
 	}

+	cam->use_pole_merge = bcam->use_pole_merge;
+	cam->pole_merge_angle_from = bcam->pole_merge_angle_from;
+	cam->pole_merge_angle_to = bcam->pole_merge_angle_to;
+
 	/* anamorphic lens bokeh */
 	cam->aperture_ratio = bcam->aperture_ratio;

@@ -453,7 +460,7 @@ static void blender_camera_sync(Camera *cam, BlenderCamera *bcam, int width, int
 	cam->rolling_shutter_type = bcam->rolling_shutter_type;
 	cam->rolling_shutter_duration = bcam->rolling_shutter_duration;

-	memcpy(cam->shutter_curve, bcam->shutter_curve, sizeof(cam->shutter_curve));
+	cam->shutter_curve = bcam->shutter_curve;

 	/* border */
 	cam->border = bcam->border;
@@ -552,6 +559,10 @@ void BlenderSync::sync_camera_motion(BL::RenderSettings& b_render,
 		float aspectratio, sensor_size;
 		blender_camera_init(&bcam, b_render);

+		/* TODO(sergey): Consider making it a part of blender_camera_init(). */
+		bcam.pixelaspect.x = b_render.pixel_aspect_x();
+		bcam.pixelaspect.y = b_render.pixel_aspect_y();
+
 		blender_camera_from_object(&bcam, b_engine, b_ob);
 		blender_camera_viewplane(&bcam,
 		                         width, height,
--- a/intern/cycles/blender/blender_curves.cpp
+++ b/intern/cycles/blender/blender_curves.cpp
@@ -138,8 +138,7 @@ bool ObtainCacheParticleData(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, Par
 			BL::ParticleSettings b_part((const PointerRNA)b_psys.settings().ptr);

 			if((b_part.render_type() == BL::ParticleSettings::render_type_PATH) && (b_part.type() == BL::ParticleSettings::type_HAIR)) {
-				int mi = clamp(b_part.material()-1, 0, mesh->used_shaders.size()-1);
-				int shader = mesh->used_shaders[mi];
+				int shader = clamp(b_part.material()-1, 0, mesh->used_shaders.size()-1);
 				int draw_step = background ? b_part.render_step() : b_part.draw_step();
 				int totparts = b_psys.particles.length();
 				int totchild = background ? b_psys.child_particles.length() : (int)((float)b_psys.child_particles.length() * (float)b_part.draw_percentage() / 100.0f);
@@ -157,16 +156,16 @@ bool ObtainCacheParticleData(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, Par

 				PointerRNA cpsys = RNA_pointer_get(&b_part.ptr, "cycles");

-				CData->psys_firstcurve.push_back(curvenum);
-				CData->psys_curvenum.push_back(totcurves);
-				CData->psys_shader.push_back(shader);
+				CData->psys_firstcurve.push_back_slow(curvenum);
+				CData->psys_curvenum.push_back_slow(totcurves);
+				CData->psys_shader.push_back_slow(shader);

 				float radius = get_float(cpsys, "radius_scale") * 0.5f;
 	
-				CData->psys_rootradius.push_back(radius * get_float(cpsys, "root_width"));
-				CData->psys_tipradius.push_back(radius * get_float(cpsys, "tip_width"));
-				CData->psys_shape.push_back(get_float(cpsys, "shape"));
-				CData->psys_closetip.push_back(get_boolean(cpsys, "use_closetip"));
+				CData->psys_rootradius.push_back_slow(radius * get_float(cpsys, "root_width"));
+				CData->psys_tipradius.push_back_slow(radius * get_float(cpsys, "tip_width"));
+				CData->psys_shape.push_back_slow(get_float(cpsys, "shape"));
+				CData->psys_closetip.push_back_slow(get_boolean(cpsys, "use_closetip"));

 				int pa_no = 0;
 				if(!(b_part.child_type() == 0) && totchild != 0)
@@ -181,7 +180,7 @@ bool ObtainCacheParticleData(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, Par

 				for(; pa_no < totparts+totchild; pa_no++) {
 					int keynum = 0;
-					CData->curve_firstkey.push_back(keyno);
+					CData->curve_firstkey.push_back_slow(keyno);
 					
 					float curve_length = 0.0f;
 					float3 pcKey;
@@ -196,15 +195,15 @@ bool ObtainCacheParticleData(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, Par
 								continue;
 							curve_length += step_length;
 						}
-						CData->curvekey_co.push_back(cKey);
-						CData->curvekey_time.push_back(curve_length);
+						CData->curvekey_co.push_back_slow(cKey);
+						CData->curvekey_time.push_back_slow(curve_length);
 						pcKey = cKey;
 						keynum++;
 					}
 					keyno += keynum;

-					CData->curve_keynum.push_back(keynum);
-					CData->curve_length.push_back(curve_length);
+					CData->curve_keynum.push_back_slow(keynum);
+					CData->curve_length.push_back_slow(curve_length);
 					curvenum++;
 				}
 			}
@@ -256,7 +255,7 @@ bool ObtainCacheParticleUV(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, Parti
 					float3 uv = make_float3(0.0f, 0.0f, 0.0f);
 					if(b_mesh->tessface_uv_textures.length())
 						b_psys.uv_on_emitter(psmd, *b_pa, pa_no, uv_num, &uv.x);
-					CData->curve_uv.push_back(uv);
+					CData->curve_uv.push_back_slow(uv);

 					if(pa_no < totparts && b_pa != b_psys.particles.end())
 						++b_pa;
@@ -310,7 +309,7 @@ bool ObtainCacheParticleVcol(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, Par
 					float3 vcol = make_float3(0.0f, 0.0f, 0.0f);
 					if(b_mesh->tessface_vertex_colors.length())
 						b_psys.mcol_on_emitter(psmd, *b_pa, pa_no, vcol_num, &vcol.x);
-					CData->curve_vcol.push_back(vcol);
+					CData->curve_vcol.push_back_slow(vcol);

 					if(pa_no < totparts && b_pa != b_psys.particles.end())
 						++b_pa;
@@ -352,10 +351,7 @@ void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
 		}
 	}

-	mesh->verts.reserve(mesh->verts.size() + numverts);
-	mesh->triangles.reserve(mesh->triangles.size() + numtris);
-	mesh->shader.reserve(mesh->shader.size() + numtris);
-	mesh->smooth.reserve(mesh->smooth.size() + numtris);
+	mesh->reserve_mesh(mesh->verts.size() + numverts, mesh->num_triangles() + numtris);

 	/* actually export */
 	for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
@@ -375,8 +371,8 @@ void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
 				xbasis = normalize(cross(RotCam - ickey_loc, v1));
 			float3 ickey_loc_shfl = ickey_loc - radius * xbasis;
 			float3 ickey_loc_shfr = ickey_loc + radius * xbasis;
-			mesh->verts.push_back(ickey_loc_shfl);
-			mesh->verts.push_back(ickey_loc_shfr);
+			mesh->add_vertex(ickey_loc_shfl);
+			mesh->add_vertex(ickey_loc_shfr);
 			vertexindex += 2;

 			for(int curvekey = CData->curve_firstkey[curve] + 1; curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve]; curvekey++) {
@@ -402,8 +398,8 @@ void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
 					xbasis = normalize(cross(RotCam - ickey_loc, v1));
 				float3 ickey_loc_shfl = ickey_loc - radius * xbasis;
 				float3 ickey_loc_shfr = ickey_loc + radius * xbasis;
-				mesh->verts.push_back(ickey_loc_shfl);
-				mesh->verts.push_back(ickey_loc_shfr);
+				mesh->add_vertex(ickey_loc_shfl);
+				mesh->add_vertex(ickey_loc_shfr);
 				mesh->add_triangle(vertexindex-2, vertexindex, vertexindex-1, CData->psys_shader[sys], true);
 				mesh->add_triangle(vertexindex+1, vertexindex-1, vertexindex, CData->psys_shader[sys], true);
 				vertexindex += 2;
@@ -411,7 +407,6 @@ void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
 		}
 	}

-	mesh->reserve(mesh->verts.size(), mesh->triangles.size(), 0, 0);
 	mesh->attributes.remove(ATTR_STD_VERTEX_NORMAL);
 	mesh->attributes.remove(ATTR_STD_FACE_NORMAL);
 	mesh->add_face_normals();
@@ -438,10 +433,7 @@ void ExportCurveTriangleGeometry(Mesh *mesh, ParticleCurveData *CData, int resol
 		}
 	}

-	mesh->verts.reserve(mesh->verts.size() + numverts);
-	mesh->triangles.reserve(mesh->triangles.size() + numtris);
-	mesh->shader.reserve(mesh->shader.size() + numtris);
-	mesh->smooth.reserve(mesh->smooth.size() + numtris);
+	mesh->reserve_mesh(mesh->verts.size() + numverts, mesh->num_triangles() + numtris);

 	/* actually export */
 	for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
@@ -530,7 +522,7 @@ void ExportCurveTriangleGeometry(Mesh *mesh, ParticleCurveData *CData, int resol
 					float angle = M_2PI_F / (float)resolution;
 					for(int section = 0; section < resolution; section++) {
 						float3 ickey_loc_shf = ickey_loc + radius * (cosf(angle * section) * xbasis + sinf(angle * section) * ybasis);
-						mesh->verts.push_back(ickey_loc_shf);
+						mesh->add_vertex(ickey_loc_shf);
 					}

 					if(subv != 0) {
@@ -547,7 +539,6 @@ void ExportCurveTriangleGeometry(Mesh *mesh, ParticleCurveData *CData, int resol
 		}
 	}

-	mesh->reserve(mesh->verts.size(), mesh->triangles.size(), 0, 0);
 	mesh->attributes.remove(ATTR_STD_VERTEX_NORMAL);
 	mesh->attributes.remove(ATTR_STD_FACE_NORMAL);
 	mesh->add_face_normals();
@@ -562,7 +553,7 @@ void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CData)
 	int num_keys = 0;
 	int num_curves = 0;

-	if(!(mesh->curves.empty() && mesh->curve_keys.empty()))
+	if(mesh->num_curves())
 		return;

 	Attribute *attr_intercept = NULL;
@@ -585,8 +576,7 @@ void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CData)
 		VLOG(1) << "Exporting curve segments for mesh " << mesh->name;
 	}

-	mesh->curve_keys.reserve(mesh->curve_keys.size() + num_keys);
-	mesh->curves.reserve(mesh->curves.size() + num_curves);
+	mesh->reserve_curves(mesh->num_curves() + num_curves, mesh->curve_keys.size() + num_keys);

 	num_keys = 0;
 	num_curves = 0;
@@ -614,18 +604,16 @@ void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CData)
 				num_curve_keys++;
 			}

-			mesh->add_curve(num_keys, num_curve_keys, CData->psys_shader[sys]);
+			mesh->add_curve(num_keys, CData->psys_shader[sys]);
 			num_keys += num_curve_keys;
 			num_curves++;
 		}
 	}

 	/* check allocation */
-	if((mesh->curve_keys.size() != num_keys) || (mesh->curves.size() != num_curves)) {
+	if((mesh->curve_keys.size() != num_keys) || (mesh->num_curves() != num_curves)) {
 		VLOG(1) << "Allocation failed, clearing data";
-		mesh->curve_keys.clear();
-		mesh->curves.clear();
-		mesh->curve_attributes.clear();
+		mesh->clear();
 	}
 }

@@ -668,13 +656,16 @@ static void ExportCurveSegmentsMotion(Mesh *mesh, ParticleCurveData *CData, int
 					if(CData->psys_closetip[sys] && (curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1))
 						radius = 0.0f;

+					/* curve motion keys store both position and radius in float4 */
 					mP[i] = float3_to_float4(ickey_loc);
 					mP[i].w = radius;

 					/* unlike mesh coordinates, these tend to be slightly different
 					 * between frames due to particle transforms into/out of object
 					 * space, so we use an epsilon to detect actual changes */
-					if(len_squared(mP[i] - mesh->curve_keys[i]) > 1e-5f*1e-5f)
+					float4 curve_key = float3_to_float4(mesh->curve_keys[i]);
+					curve_key.w = mesh->curve_radius[i];
+					if(len_squared(mP[i] - curve_key) > 1e-5f*1e-5f)
 						have_motion = true;
 				}

@@ -698,8 +689,10 @@ static void ExportCurveSegmentsMotion(Mesh *mesh, ParticleCurveData *CData, int
 			for(int step = 0; step < time_index; step++) {
 				float4 *mP = attr_mP->data_float4() + step*numkeys;

-				for(int key = 0; key < numkeys; key++)
-					mP[key] = mesh->curve_keys[key];
+				for(int key = 0; key < numkeys; key++) {
+					mP[key] = float3_to_float4(mesh->curve_keys[key]);
+					mP[key].w = mesh->curve_radius[key];
+				}
 			}
 		}
 	}
@@ -873,7 +866,9 @@ void BlenderSync::sync_curves(Mesh *mesh,
 	if(!motion) {
 		/* Clear stored curve data */
 		mesh->curve_keys.clear();
-		mesh->curves.clear();
+		mesh->curve_radius.clear();
+		mesh->curve_first_key.clear();
+		mesh->curve_shader.clear();
 		mesh->curve_attributes.clear();
 	}

@@ -890,7 +885,7 @@ void BlenderSync::sync_curves(Mesh *mesh,
 	int triangle_method = scene->curve_system_manager->triangle_method;
 	int resolution = scene->curve_system_manager->resolution;
 	size_t vert_num = mesh->verts.size();
-	size_t tri_num = mesh->triangles.size();
+	size_t tri_num = mesh->num_triangles();
 	int used_res = 1;

 	/* extract particle hair data - should be combined with connecting to mesh later*/
@@ -951,11 +946,10 @@ void BlenderSync::sync_curves(Mesh *mesh,
 			else {
 				Attribute *attr_generated = mesh->curve_attributes.add(ATTR_STD_GENERATED);
 				float3 *generated = attr_generated->data_float3();
-				size_t i = 0;

-				foreach(Mesh::Curve& curve, mesh->curves) {
-					float3 co = float4_to_float3(mesh->curve_keys[curve.first_key]);
-					generated[i++] = co*size - loc;
+				for(size_t i = 0; i < mesh->num_curves(); i++) {
+					float3 co = mesh->curve_keys[mesh->get_curve(i).first_key];
+					generated[i] = co*size - loc;
 				}
 			}
 		}
--- a/intern/cycles/blender/blender_mesh.cpp
+++ b/intern/cycles/blender/blender_mesh.cpp
@@ -532,7 +532,7 @@ static void attr_create_pointiness(Scene *scene,
 static void create_mesh(Scene *scene,
                        Mesh *mesh,
                        BL::Mesh& b_mesh,
-                        const vector<uint>& used_shaders)
+                        const vector<Shader*>& used_shaders)
 {
 	/* count vertices and faces */
 	int numverts = b_mesh.vertices.length();
@@ -548,13 +548,12 @@ static void create_mesh(Scene *scene,
 		numtris += (vi[3] == 0)? 1: 2;
 	}

-	/* reserve memory */
-	mesh->reserve(numverts, numtris, 0, 0);
+	/* allocate memory */
+	mesh->reserve_mesh(numverts, numtris);

 	/* create vertex coordinates and normals */
-	int i = 0;
-	for(b_mesh.vertices.begin(v); v != b_mesh.vertices.end(); ++v, ++i)
-		mesh->verts[i] = get_float3(v->co());
+	for(b_mesh.vertices.begin(v); v != b_mesh.vertices.end(); ++v)
+		mesh->add_vertex(get_float3(v->co()));

 	Attribute *attr_N = mesh->attributes.add(ATTR_STD_VERTEX_NORMAL);
 	float3 *N = attr_N->data_float3();
@@ -583,13 +582,12 @@ static void create_mesh(Scene *scene,
 	/* create faces */
 	vector<int> nverts(numfaces);
 	vector<int> face_flags(numfaces, FACE_FLAG_NONE);
-	int fi = 0, ti = 0;
+	int fi = 0;

 	for(b_mesh.tessfaces.begin(f); f != b_mesh.tessfaces.end(); ++f, ++fi) {
 		int4 vi = get_int4(f->vertices_raw());
 		int n = (vi[3] == 0)? 3: 4;
-		int mi = clamp(f->material_index(), 0, used_shaders.size()-1);
-		int shader = used_shaders[mi];
+		int shader = clamp(f->material_index(), 0, used_shaders.size()-1);
 		bool smooth = f->use_smooth() || use_loop_normals;

 		/* split vertices if normal is different
@@ -619,18 +617,18 @@ static void create_mesh(Scene *scene,
 			   is_zero(cross(mesh->verts[vi[2]] - mesh->verts[vi[0]], mesh->verts[vi[3]] - mesh->verts[vi[0]])))
 			{
 				// TODO(mai): order here is probably wrong
-				mesh->set_triangle(ti++, vi[0], vi[1], vi[3], shader, smooth, true);
-				mesh->set_triangle(ti++, vi[2], vi[3], vi[1], shader, smooth, true);
+				mesh->add_triangle(vi[0], vi[1], vi[3], shader, smooth, true);
+				mesh->add_triangle(vi[2], vi[3], vi[1], shader, smooth, true);
 				face_flags[fi] |= FACE_FLAG_DIVIDE_24;
 			}
 			else {
-				mesh->set_triangle(ti++, vi[0], vi[1], vi[2], shader, smooth, true);
-				mesh->set_triangle(ti++, vi[0], vi[2], vi[3], shader, smooth, true);
+				mesh->add_triangle(vi[0], vi[1], vi[2], shader, smooth, true);
+				mesh->add_triangle(vi[0], vi[2], vi[3], shader, smooth, true);
 				face_flags[fi] |= FACE_FLAG_DIVIDE_13;
 			}
 		}
 		else
-			mesh->set_triangle(ti++, vi[0], vi[1], vi[2], shader, smooth, false);
+			mesh->add_triangle(vi[0], vi[1], vi[2], shader, smooth, false);

 		nverts[fi] = n;
 	}
@@ -660,14 +658,14 @@ static void create_subd_mesh(Scene *scene,
                             BL::Object& b_ob,
                             BL::Mesh& b_mesh,
                             PointerRNA *cmesh,
-                             const vector<uint>& used_shaders,
+                             const vector<Shader*>& used_shaders,
                             float dicing_rate,
                             int max_subdivisions)
 {
 	Mesh basemesh;
 	create_mesh(scene, &basemesh, b_mesh, used_shaders);

-	SubdParams sdparams(mesh, used_shaders[0], true, false);
+	SubdParams sdparams(mesh, 0, true, false);
 	sdparams.dicing_rate = max(0.1f, RNA_float_get(cmesh, "dicing_rate") * dicing_rate);
 	sdparams.max_level = max_subdivisions;

@@ -700,7 +698,7 @@ Mesh *BlenderSync::sync_mesh(BL::Object& b_ob,
 	BL::Material material_override = render_layer.material_override;

 	/* find shader indices */
-	vector<uint> used_shaders;
+	vector<Shader*> used_shaders;

 	BL::Object::material_slots_iterator slot;
 	for(b_ob.material_slots.begin(slot); slot != b_ob.material_slots.end(); ++slot) {
@@ -742,8 +740,8 @@ Mesh *BlenderSync::sync_mesh(BL::Object& b_ob,
 			 * because the shader needs different mesh attributes */
 			bool attribute_recalc = false;

-			foreach(uint shader, mesh->used_shaders)
-				if(scene->shaders[shader]->need_update_attributes)
+			foreach(Shader *shader, mesh->used_shaders)
+				if(shader->need_update_attributes)
 					attribute_recalc = true;

 			if(!attribute_recalc)
@@ -760,11 +758,12 @@ Mesh *BlenderSync::sync_mesh(BL::Object& b_ob,
 	/* create derived mesh */
 	PointerRNA cmesh = RNA_pointer_get(&b_ob_data.ptr, "cycles");

-	vector<Mesh::Triangle> oldtriangle = mesh->triangles;
+	array<int> oldtriangle = mesh->triangles;
 	
 	/* compares curve_keys rather than strands in order to handle quick hair
 	 * adjustments in dynamic BVH - other methods could probably do this better*/
-	vector<float4> oldcurve_keys = mesh->curve_keys;
+	array<float3> oldcurve_keys = mesh->curve_keys;
+	array<float> oldcurve_radius = mesh->curve_radius;

 	mesh->clear();
 	mesh->used_shaders = used_shaders;
@@ -828,14 +827,21 @@ Mesh *BlenderSync::sync_mesh(BL::Object& b_ob,
 	if(oldtriangle.size() != mesh->triangles.size())
 		rebuild = true;
 	else if(oldtriangle.size()) {
-		if(memcmp(&oldtriangle[0], &mesh->triangles[0], sizeof(Mesh::Triangle)*oldtriangle.size()) != 0)
+		if(memcmp(&oldtriangle[0], &mesh->triangles[0], sizeof(int)*oldtriangle.size()) != 0)
 			rebuild = true;
 	}

 	if(oldcurve_keys.size() != mesh->curve_keys.size())
 		rebuild = true;
 	else if(oldcurve_keys.size()) {
-		if(memcmp(&oldcurve_keys[0], &mesh->curve_keys[0], sizeof(float4)*oldcurve_keys.size()) != 0)
+		if(memcmp(&oldcurve_keys[0], &mesh->curve_keys[0], sizeof(float3)*oldcurve_keys.size()) != 0)
+			rebuild = true;
+	}
+
+	if(oldcurve_radius.size() != mesh->curve_radius.size())
+		rebuild = true;
+	else if(oldcurve_radius.size()) {
+		if(memcmp(&oldcurve_radius[0], &mesh->curve_radius[0], sizeof(float)*oldcurve_radius.size()) != 0)
 			rebuild = true;
 	}
 	
@@ -932,8 +938,8 @@ void BlenderSync::sync_mesh_motion(BL::Object& b_ob,
 			Attribute *attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);

 			if(attr_mP) {
-				float4 *keys = &mesh->curve_keys[0];
-				memcpy(attr_mP->data_float4() + time_index*numkeys, keys, sizeof(float4)*numkeys);
+				float3 *keys = &mesh->curve_keys[0];
+				memcpy(attr_mP->data_float3() + time_index*numkeys, keys, sizeof(float3)*numkeys);
 			}
 		}

--- a/intern/cycles/blender/blender_object.cpp
+++ b/intern/cycles/blender/blender_object.cpp
@@ -155,13 +155,8 @@ void BlenderSync::sync_light(BL::Object& b_parent,
 	light->dir = -transform_get_column(&tfm, 2);

 	/* shader */
-	vector<uint> used_shaders;
-
+	vector<Shader*> used_shaders;
 	find_shader(b_lamp, used_shaders, scene->default_light);
-
-	if(used_shaders.size() == 0)
-		used_shaders.push_back(scene->default_light);
-
 	light->shader = used_shaders[0];

 	/* shadow */
@@ -370,13 +365,12 @@ Object *BlenderSync::sync_object(BL::Object& b_parent,
 	}

 	/* make holdout objects on excluded layer invisible for non-camera rays */
-	if(use_holdout && (layer_flag & render_layer.exclude_layer))
+	if(use_holdout && (layer_flag & render_layer.exclude_layer)) {
 		visibility &= ~(PATH_RAY_ALL_VISIBILITY - PATH_RAY_CAMERA);
+	}

-	/* camera flag is not actually used, instead is tested against render layer
-	 * flags */
-	if(visibility & PATH_RAY_CAMERA) {
-		visibility |= layer_flag << PATH_RAY_LAYER_SHIFT;
+	/* hide objects not on render layer from camera rays */
+	if(!(layer_flag & render_layer.layer)) {
 		visibility &= ~PATH_RAY_CAMERA;
 	}

@@ -577,7 +571,6 @@ void BlenderSync::sync_objects(BL::SpaceView3D& b_v3d, float motion_time)
 			bool hide = (render_layer.use_viewport_visibility)? b_ob.hide(): b_ob.hide_render();
 			uint ob_layer = get_layer(b_base->layers(),
 			                          b_base->layers_local_view(),
-			                          render_layer.use_localview,
 			                          object_is_light(b_ob),
 			                          scene_layers);
 			hide = hide || !(ob_layer & scene_layer);
--- a/intern/cycles/blender/blender_particles.cpp
+++ b/intern/cycles/blender/blender_particles.cpp
@@ -76,7 +76,7 @@ bool BlenderSync::sync_dupli_particle(BL::Object& b_ob,
 	pa.velocity = get_float3(b_pa.velocity());
 	pa.angular_velocity = get_float3(b_pa.angular_velocity());

-	psys->particles.push_back(pa);
+	psys->particles.push_back_slow(pa);

 	if(object->particle_index != psys->particles.size() - 1)
 		scene->object_manager->tag_update(scene);
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -473,7 +473,7 @@ void BlenderSession::render()
 		BL::RenderLayer b_rlay = *b_single_rlay;

 		/* add passes */
-		vector<Pass> passes;
+		array<Pass> passes;
 		Pass::add(PASS_COMBINED, passes);

 		if(session_params.device.advanced_shading) {
--- a/intern/cycles/blender/blender_shader.cpp
+++ b/intern/cycles/blender/blender_shader.cpp
@@ -37,18 +37,13 @@ typedef map<std::string, ConvertNode*> ProxyMap;
 /* Find */

 void BlenderSync::find_shader(BL::ID& id,
-                              vector<uint>& used_shaders,
-                              int default_shader)
+                              vector<Shader*>& used_shaders,
+                              Shader *default_shader)
 {
-	Shader *shader = (id)? shader_map.find(id): scene->shaders[default_shader];
+	Shader *shader = (id)? shader_map.find(id): default_shader;

-	for(size_t i = 0; i < scene->shaders.size(); i++) {
-		if(scene->shaders[i] == shader) {
-			used_shaders.push_back(i);
-			scene->shaders[i]->tag_used(scene);
-			break;
-		}
-	}
+	used_shaders.push_back(shader);
+	shader->tag_used(scene);
 }

 /* RNA translation utilities */
@@ -132,82 +127,57 @@ static float3 get_node_output_vector(BL::Node& b_node, const string& name)
 	return make_float3(value[0], value[1], value[2]);
 }

-static ShaderSocketType convert_socket_type(BL::NodeSocket& b_socket)
+static SocketType::Type convert_socket_type(BL::NodeSocket& b_socket)
 {
 	switch(b_socket.type()) {
 		case BL::NodeSocket::type_VALUE:
-			return SHADER_SOCKET_FLOAT;
+			return SocketType::FLOAT;
 		case BL::NodeSocket::type_INT:
-			return SHADER_SOCKET_INT;
+			return SocketType::INT;
 		case BL::NodeSocket::type_VECTOR:
-			return SHADER_SOCKET_VECTOR;
+			return SocketType::VECTOR;
 		case BL::NodeSocket::type_RGBA:
-			return SHADER_SOCKET_COLOR;
+			return SocketType::COLOR;
 		case BL::NodeSocket::type_STRING:
-			return SHADER_SOCKET_STRING;
+			return SocketType::STRING;
 		case BL::NodeSocket::type_SHADER:
-			return SHADER_SOCKET_CLOSURE;
+			return SocketType::CLOSURE;
 		
 		default:
-			return SHADER_SOCKET_UNDEFINED;
+			return SocketType::UNDEFINED;
 	}
 }

-#ifdef WITH_OSL
-static ShaderSocketType convert_osl_socket_type(OSL::OSLQuery& query,
-                                                BL::NodeSocket& b_socket)
-{
-	ShaderSocketType socket_type = convert_socket_type(b_socket);
-	if(socket_type == SHADER_SOCKET_VECTOR) {
-		/* TODO(sergey): Do we need compatible_name() here? */
-		const OSL::OSLQuery::Parameter *param = query.getparam(b_socket.name());
-		assert(param != NULL);
-		if(param != NULL) {
-			if(param->type.vecsemantics == TypeDesc::POINT) {
-				socket_type = SHADER_SOCKET_POINT;
-			}
-			else if(param->type.vecsemantics == TypeDesc::NORMAL) {
-				socket_type = SHADER_SOCKET_NORMAL;
-			}
-		}
-	}
-
-	return socket_type;
-}
-#endif  /* WITH_OSL */
-
 static void set_default_value(ShaderInput *input,
                              BL::NodeSocket& b_sock,
                              BL::BlendData& b_data,
                              BL::ID& b_id)
 {
 	/* copy values for non linked inputs */
-	switch(input->type) {
-		case SHADER_SOCKET_FLOAT: {
+	switch(input->type()) {
+		case SocketType::FLOAT: {
 			input->set(get_float(b_sock.ptr, "default_value"));
 			break;
 		}
-		case SHADER_SOCKET_INT: {
-			input->set((float)get_int(b_sock.ptr, "default_value"));
+		case SocketType::INT: {
+			input->set(get_int(b_sock.ptr, "default_value"));
 			break;
 		}
-		case SHADER_SOCKET_COLOR: {
+		case SocketType::COLOR: {
 			input->set(float4_to_float3(get_float4(b_sock.ptr, "default_value")));
 			break;
 		}
-		case SHADER_SOCKET_NORMAL:
-		case SHADER_SOCKET_POINT:
-		case SHADER_SOCKET_VECTOR: {
+		case SocketType::NORMAL:
+		case SocketType::POINT:
+		case SocketType::VECTOR: {
 			input->set(get_float3(b_sock.ptr, "default_value"));
 			break;
 		}
-		case SHADER_SOCKET_STRING: {
+		case SocketType::STRING: {
 			input->set((ustring)blender_absolute_path(b_data, b_id, get_string(b_sock.ptr, "default_value")));
 			break;
 		}
-
-		case SHADER_SOCKET_CLOSURE:
-		case SHADER_SOCKET_UNDEFINED:
+		default:
 			break;
 	}
 }
@@ -291,7 +261,7 @@ static ShaderNode *add_node(Scene *scene,
 		RGBRampNode *ramp = new RGBRampNode();
 		BL::ShaderNodeValToRGB b_ramp_node(b_node);
 		BL::ColorRamp b_color_ramp(b_ramp_node.color_ramp());
-		colorramp_to_array(b_color_ramp, ramp->ramp, RAMP_TABLE_SIZE);
+		colorramp_to_array(b_color_ramp, ramp->ramp, ramp->ramp_alpha, RAMP_TABLE_SIZE);
 		ramp->interpolate = b_color_ramp.interpolation() != BL::ColorRamp::interpolation_CONSTANT;
 		node = ramp;
 	}
@@ -320,7 +290,7 @@ static ShaderNode *add_node(Scene *scene,
 	else if(b_node.is_a(&RNA_ShaderNodeMixRGB)) {
 		BL::ShaderNodeMixRGB b_mix_node(b_node);
 		MixNode *mix = new MixNode();
-		mix->type = MixNode::type_enum[b_mix_node.blend_type()];
+		mix->type = (NodeMix)b_mix_node.blend_type();
 		mix->use_clamp = b_mix_node.use_clamp();
 		node = mix;
 	}
@@ -346,27 +316,27 @@ static ShaderNode *add_node(Scene *scene,
 		node = new HSVNode();
 	}
 	else if(b_node.is_a(&RNA_ShaderNodeRGBToBW)) {
-		node = new ConvertNode(SHADER_SOCKET_COLOR, SHADER_SOCKET_FLOAT);
+		node = new RGBToBWNode();
 	}
 	else if(b_node.is_a(&RNA_ShaderNodeMath)) {
 		BL::ShaderNodeMath b_math_node(b_node);
 		MathNode *math = new MathNode();
-		math->type = MathNode::type_enum[b_math_node.operation()];
+		math->type = (NodeMath)b_math_node.operation();
 		math->use_clamp = b_math_node.use_clamp();
 		node = math;
 	}
 	else if(b_node.is_a(&RNA_ShaderNodeVectorMath)) {
 		BL::ShaderNodeVectorMath b_vector_math_node(b_node);
 		VectorMathNode *vmath = new VectorMathNode();
-		vmath->type = VectorMathNode::type_enum[b_vector_math_node.operation()];
+		vmath->type = (NodeVectorMath)b_vector_math_node.operation();
 		node = vmath;
 	}
 	else if(b_node.is_a(&RNA_ShaderNodeVectorTransform)) {
 		BL::ShaderNodeVectorTransform b_vector_transform_node(b_node);
 		VectorTransformNode *vtransform = new VectorTransformNode();
-		vtransform->type = VectorTransformNode::type_enum[b_vector_transform_node.vector_type()];
-		vtransform->convert_from = VectorTransformNode::convert_space_enum[b_vector_transform_node.convert_from()];
-		vtransform->convert_to = VectorTransformNode::convert_space_enum[b_vector_transform_node.convert_to()];
+		vtransform->type = (NodeVectorTransformType)b_vector_transform_node.vector_type();
+		vtransform->convert_from = (NodeVectorTransformConvertSpace)b_vector_transform_node.convert_from();
+		vtransform->convert_to = (NodeVectorTransformConvertSpace)b_vector_transform_node.convert_to();
 		node = vtransform;
 	}
 	else if(b_node.is_a(&RNA_ShaderNodeNormal)) {
@@ -415,13 +385,13 @@ static ShaderNode *add_node(Scene *scene,

 		switch(b_aniso_node.distribution()) {
 			case BL::ShaderNodeBsdfAnisotropic::distribution_BECKMANN:
-				aniso->distribution = ustring("Beckmann");
+				aniso->distribution = CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID;
 				break;
 			case BL::ShaderNodeBsdfAnisotropic::distribution_GGX:
-				aniso->distribution = ustring("GGX");
+				aniso->distribution = CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID;
 				break;
 			case BL::ShaderNodeBsdfAnisotropic::distribution_ASHIKHMIN_SHIRLEY:
-				aniso->distribution = ustring("Ashikhmin-Shirley");
+				aniso->distribution = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID;
 				break;
 		}

@@ -437,13 +407,13 @@ static ShaderNode *add_node(Scene *scene,

 		switch(b_subsurface_node.falloff()) {
 			case BL::ShaderNodeSubsurfaceScattering::falloff_CUBIC:
-				subsurface->closure = CLOSURE_BSSRDF_CUBIC_ID;
+				subsurface->falloff = CLOSURE_BSSRDF_CUBIC_ID;
 				break;
 			case BL::ShaderNodeSubsurfaceScattering::falloff_GAUSSIAN:
-				subsurface->closure = CLOSURE_BSSRDF_GAUSSIAN_ID;
+				subsurface->falloff = CLOSURE_BSSRDF_GAUSSIAN_ID;
 				break;
 			case BL::ShaderNodeSubsurfaceScattering::falloff_BURLEY:
-				subsurface->closure = CLOSURE_BSSRDF_BURLEY_ID;
+				subsurface->falloff = CLOSURE_BSSRDF_BURLEY_ID;
 				break;
 		}

@@ -455,16 +425,16 @@ static ShaderNode *add_node(Scene *scene,
 		
 		switch(b_glossy_node.distribution()) {
 			case BL::ShaderNodeBsdfGlossy::distribution_SHARP:
-				glossy->distribution = ustring("Sharp");
+				glossy->distribution = CLOSURE_BSDF_REFLECTION_ID;
 				break;
 			case BL::ShaderNodeBsdfGlossy::distribution_BECKMANN:
-				glossy->distribution = ustring("Beckmann");
+				glossy->distribution = CLOSURE_BSDF_MICROFACET_BECKMANN_ID;
 				break;
 			case BL::ShaderNodeBsdfGlossy::distribution_GGX:
-				glossy->distribution = ustring("GGX");
+				glossy->distribution = CLOSURE_BSDF_MICROFACET_GGX_ID;
 				break;
 			case BL::ShaderNodeBsdfGlossy::distribution_ASHIKHMIN_SHIRLEY:
-				glossy->distribution = ustring("Ashikhmin-Shirley");
+				glossy->distribution = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID;
 				break;
 		}
 		node = glossy;
@@ -474,13 +444,13 @@ static ShaderNode *add_node(Scene *scene,
 		GlassBsdfNode *glass = new GlassBsdfNode();
 		switch(b_glass_node.distribution()) {
 			case BL::ShaderNodeBsdfGlass::distribution_SHARP:
-				glass->distribution = ustring("Sharp");
+				glass->distribution = CLOSURE_BSDF_SHARP_GLASS_ID;
 				break;
 			case BL::ShaderNodeBsdfGlass::distribution_BECKMANN:
-				glass->distribution = ustring("Beckmann");
+				glass->distribution = CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID;
 				break;
 			case BL::ShaderNodeBsdfGlass::distribution_GGX:
-				glass->distribution = ustring("GGX");
+				glass->distribution = CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID;
 				break;
 		}
 		node = glass;
@@ -490,13 +460,13 @@ static ShaderNode *add_node(Scene *scene,
 		RefractionBsdfNode *refraction = new RefractionBsdfNode();
 		switch(b_refraction_node.distribution()) {
 			case BL::ShaderNodeBsdfRefraction::distribution_SHARP:
-				refraction->distribution = ustring("Sharp");
+				refraction->distribution = CLOSURE_BSDF_REFRACTION_ID;
 				break;
 			case BL::ShaderNodeBsdfRefraction::distribution_BECKMANN:
-				refraction->distribution = ustring("Beckmann");
+				refraction->distribution = CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
 				break;
 			case BL::ShaderNodeBsdfRefraction::distribution_GGX:
-				refraction->distribution = ustring("GGX");
+				refraction->distribution = CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
 				break;
 		}
 		node = refraction;
@@ -506,10 +476,10 @@ static ShaderNode *add_node(Scene *scene,
 		ToonBsdfNode *toon = new ToonBsdfNode();
 		switch(b_toon_node.component()) {
 			case BL::ShaderNodeBsdfToon::component_DIFFUSE:
-				toon->component = ustring("Diffuse");
+				toon->component = CLOSURE_BSDF_DIFFUSE_TOON_ID;
 				break;
 			case BL::ShaderNodeBsdfToon::component_GLOSSY:
-				toon->component = ustring("Glossy");
+				toon->component = CLOSURE_BSDF_GLOSSY_TOON_ID;
 				break;
 		}
 		node = toon;
@@ -519,10 +489,10 @@ static ShaderNode *add_node(Scene *scene,
 		HairBsdfNode *hair = new HairBsdfNode();
 		switch(b_hair_node.component()) {
 			case BL::ShaderNodeBsdfHair::component_Reflection:
-				hair->component = ustring("Reflection");
+				hair->component = CLOSURE_BSDF_HAIR_REFLECTION_ID;
 				break;
 			case BL::ShaderNodeBsdfHair::component_Transmission:
-				hair->component = ustring("Transmission");
+				hair->component = CLOSURE_BSDF_HAIR_TRANSMISSION_ID;
 				break;
 		}
 		node = hair;
@@ -589,62 +559,17 @@ static ShaderNode *add_node(Scene *scene,
 		if(scene->shader_manager->use_osl()) {
 			/* create script node */
 			BL::ShaderNodeScript b_script_node(b_node);
-			OSLScriptNode *script_node = new OSLScriptNode();

 			OSLShaderManager *manager = (OSLShaderManager*)scene->shader_manager;
 			string bytecode_hash = b_script_node.bytecode_hash();

-			/* Gather additional information from the shader, such as
-			 * input/output type info needed for proper node construction.
-			 */
-			OSL::OSLQuery query;
-			string absolute_filepath;
-
 			if(!bytecode_hash.empty()) {
-				query.open_bytecode(b_script_node.bytecode());
+				node = manager->osl_node("", bytecode_hash, b_script_node.bytecode());
 			}
 			else {
-				absolute_filepath = blender_absolute_path(b_data, b_ntree, b_script_node.filepath());
-				OSLShaderManager::osl_query(query, absolute_filepath);
+				string absolute_filepath = blender_absolute_path(b_data, b_ntree, b_script_node.filepath());
+				node = manager->osl_node(absolute_filepath, "");
 			}
-			/* TODO(sergey): Add proper query info error parsing. */
-
-			/* Generate inputs/outputs from node sockets
-			 *
-			 * Note: the node sockets are generated from OSL parameters,
-			 * so the names match those of the corresponding parameters exactly.
-			 *
-			 * Note 2: ShaderInput/ShaderOutput store shallow string copies only!
-			 * So we register them as ustring to ensure the pointer stays valid. */
-			BL::Node::inputs_iterator b_input;
-
-			for(b_script_node.inputs.begin(b_input); b_input != b_script_node.inputs.end(); ++b_input) {
-				ShaderInput *input = script_node->add_input(ustring(b_input->name()).c_str(),
-				                                            convert_osl_socket_type(query, *b_input));
-				set_default_value(input, *b_input, b_data, b_ntree);
-			}
-
-			BL::Node::outputs_iterator b_output;
-
-			for(b_script_node.outputs.begin(b_output); b_output != b_script_node.outputs.end(); ++b_output) {
-				script_node->add_output(ustring(b_output->name()).c_str(),
-				                        convert_osl_socket_type(query, *b_output));
-			}
-
-			/* load bytecode or filepath */
-			if(!bytecode_hash.empty()) {
-				/* loaded bytecode if not already done */
-				if(!manager->shader_test_loaded(bytecode_hash))
-					manager->shader_load_bytecode(bytecode_hash, b_script_node.bytecode());
-
-				script_node->bytecode_hash = bytecode_hash;
-			}
-			else {
-				/* set filepath */
-				script_node->filepath = absolute_filepath;
-			}
-
-			node = script_node;
 		}
 #else
 		(void)b_data;
@@ -697,8 +622,8 @@ static ShaderNode *add_node(Scene *scene,
 				        get_image_extension(b_image_node));
 			}
 		}
-		image->color_space = ImageTextureNode::color_space_enum[(int)b_image_node.color_space()];
-		image->projection = ImageTextureNode::projection_enum[(int)b_image_node.projection()];
+		image->color_space = (NodeImageColorSpace)b_image_node.color_space();
+		image->projection = (NodeImageProjection)b_image_node.projection();
 		image->interpolation = get_image_interpolation(b_image_node);
 		image->extension = get_image_extension(b_image_node);
 		image->projection_blend = b_image_node.projection_blend();
@@ -743,9 +668,9 @@ static ShaderNode *add_node(Scene *scene,
 				        EXTENSION_REPEAT);
 			}
 		}
-		env->color_space = EnvironmentTextureNode::color_space_enum[(int)b_env_node.color_space()];
+		env->color_space = (NodeImageColorSpace)b_env_node.color_space();
 		env->interpolation = get_image_interpolation(b_env_node);
-		env->projection = EnvironmentTextureNode::projection_enum[(int)b_env_node.projection()];
+		env->projection = (NodeEnvironmentProjection)b_env_node.projection();
 		BL::TexMapping b_texture_mapping(b_env_node.texture_mapping());
 		get_tex_mapping(&env->tex_mapping, b_texture_mapping);
 		node = env;
@@ -753,7 +678,7 @@ static ShaderNode *add_node(Scene *scene,
 	else if(b_node.is_a(&RNA_ShaderNodeTexGradient)) {
 		BL::ShaderNodeTexGradient b_gradient_node(b_node);
 		GradientTextureNode *gradient = new GradientTextureNode();
-		gradient->type = GradientTextureNode::type_enum[(int)b_gradient_node.gradient_type()];
+		gradient->type = (NodeGradientType)b_gradient_node.gradient_type();
 		BL::TexMapping b_texture_mapping(b_gradient_node.texture_mapping());
 		get_tex_mapping(&gradient->tex_mapping, b_texture_mapping);
 		node = gradient;
@@ -761,7 +686,7 @@ static ShaderNode *add_node(Scene *scene,
 	else if(b_node.is_a(&RNA_ShaderNodeTexVoronoi)) {
 		BL::ShaderNodeTexVoronoi b_voronoi_node(b_node);
 		VoronoiTextureNode *voronoi = new VoronoiTextureNode();
-		voronoi->coloring = VoronoiTextureNode::coloring_enum[(int)b_voronoi_node.coloring()];
+		voronoi->coloring = (NodeVoronoiColoring)b_voronoi_node.coloring();
 		BL::TexMapping b_texture_mapping(b_voronoi_node.texture_mapping());
 		get_tex_mapping(&voronoi->tex_mapping, b_texture_mapping);
 		node = voronoi;
@@ -777,8 +702,8 @@ static ShaderNode *add_node(Scene *scene,
 	else if(b_node.is_a(&RNA_ShaderNodeTexWave)) {
 		BL::ShaderNodeTexWave b_wave_node(b_node);
 		WaveTextureNode *wave = new WaveTextureNode();
-		wave->type = WaveTextureNode::type_enum[(int)b_wave_node.wave_type()];
-		wave->profile = WaveTextureNode::profile_enum[(int)b_wave_node.wave_profile()];
+		wave->type = (NodeWaveType)b_wave_node.wave_type();
+		wave->profile = (NodeWaveProfile)b_wave_node.wave_profile();
 		BL::TexMapping b_texture_mapping(b_wave_node.texture_mapping());
 		get_tex_mapping(&wave->tex_mapping, b_texture_mapping);
 		node = wave;
@@ -811,7 +736,7 @@ static ShaderNode *add_node(Scene *scene,
 	else if(b_node.is_a(&RNA_ShaderNodeTexMusgrave)) {
 		BL::ShaderNodeTexMusgrave b_musgrave_node(b_node);
 		MusgraveTextureNode *musgrave = new MusgraveTextureNode();
-		musgrave->type = MusgraveTextureNode::type_enum[(int)b_musgrave_node.musgrave_type()];
+		musgrave->type = (NodeMusgraveType)b_musgrave_node.musgrave_type();
 		BL::TexMapping b_texture_mapping(b_musgrave_node.texture_mapping());
 		get_tex_mapping(&musgrave->tex_mapping, b_texture_mapping);
 		node = musgrave;
@@ -829,7 +754,7 @@ static ShaderNode *add_node(Scene *scene,
 	else if(b_node.is_a(&RNA_ShaderNodeTexSky)) {
 		BL::ShaderNodeTexSky b_sky_node(b_node);
 		SkyTextureNode *sky = new SkyTextureNode();
-		sky->type = SkyTextureNode::type_enum[(int)b_sky_node.sky_type()];
+		sky->type = (NodeSkyType)b_sky_node.sky_type();
 		sky->sun_direction = normalize(get_float3(b_sky_node.sun_direction()));
 		sky->turbidity = b_sky_node.turbidity();
 		sky->ground_albedo = b_sky_node.ground_albedo();
@@ -840,15 +765,15 @@ static ShaderNode *add_node(Scene *scene,
 	else if(b_node.is_a(&RNA_ShaderNodeNormalMap)) {
 		BL::ShaderNodeNormalMap b_normal_map_node(b_node);
 		NormalMapNode *nmap = new NormalMapNode();
-		nmap->space = NormalMapNode::space_enum[(int)b_normal_map_node.space()];
+		nmap->space = (NodeNormalMapSpace)b_normal_map_node.space();
 		nmap->attribute = b_normal_map_node.uv_map();
 		node = nmap;
 	}
 	else if(b_node.is_a(&RNA_ShaderNodeTangent)) {
 		BL::ShaderNodeTangent b_tangent_node(b_node);
 		TangentNode *tangent = new TangentNode();
-		tangent->direction_type = TangentNode::direction_type_enum[(int)b_tangent_node.direction_type()];
-		tangent->axis = TangentNode::axis_enum[(int)b_tangent_node.axis()];
+		tangent->direction_type = (NodeTangentDirectionType)b_tangent_node.direction_type();
+		tangent->axis = (NodeTangentAxis)b_tangent_node.axis();
 		tangent->attribute = b_tangent_node.uv_map();
 		node = tangent;
 	}
@@ -863,8 +788,7 @@ static ShaderNode *add_node(Scene *scene,
 		BL::ShaderNodeTexPointDensity b_point_density_node(b_node);
 		PointDensityTextureNode *point_density = new PointDensityTextureNode();
 		point_density->filename = b_point_density_node.name();
-		point_density->space =
-		        PointDensityTextureNode::space_enum[(int)b_point_density_node.space()];
+		point_density->space = (NodeTexVoxelSpace)b_point_density_node.space();
 		point_density->interpolation = get_image_interpolation(b_point_density_node);
 		point_density->builtin_data = b_point_density_node.ptr.data;

@@ -1025,7 +949,7 @@ static void add_nodes(Scene *scene,
 			BL::Node::internal_links_iterator b_link;
 			for(b_node->internal_links.begin(b_link); b_link != b_node->internal_links.end(); ++b_link) {
 				BL::NodeSocket to_socket(b_link->to_socket());
-				ShaderSocketType to_socket_type = convert_socket_type(to_socket);
+				SocketType::Type to_socket_type = convert_socket_type(to_socket);
 				ConvertNode *proxy = new ConvertNode(to_socket_type, to_socket_type, true);

 				input_map[b_link->from_socket().ptr.data] = proxy->inputs[0];
@@ -1048,7 +972,7 @@ static void add_nodes(Scene *scene,
 			 * so that links have something to connect to and assert won't fail.
 			 */
 			for(b_node->inputs.begin(b_input); b_input != b_node->inputs.end(); ++b_input) {
-				ShaderSocketType input_type = convert_socket_type(*b_input);
+				SocketType::Type input_type = convert_socket_type(*b_input);
 				ConvertNode *proxy = new ConvertNode(input_type, input_type, true);
 				graph->add(proxy);

@@ -1060,7 +984,7 @@ static void add_nodes(Scene *scene,
 				set_default_value(proxy->inputs[0], *b_input, b_data, b_ntree);
 			}
 			for(b_node->outputs.begin(b_output); b_output != b_node->outputs.end(); ++b_output) {
-				ShaderSocketType output_type = convert_socket_type(*b_output);
+				SocketType::Type output_type = convert_socket_type(*b_output);
 				ConvertNode *proxy = new ConvertNode(output_type, output_type, true);
 				graph->add(proxy);

@@ -1207,7 +1131,7 @@ static void add_nodes(Scene *scene,

 void BlenderSync::sync_materials(bool update_all)
 {
-	shader_map.set_default(scene->shaders[scene->default_surface]);
+	shader_map.set_default(scene->default_surface);

 	/* material loop */
 	BL::BlendData::materials_iterator b_mat;
@@ -1232,7 +1156,7 @@ void BlenderSync::sync_materials(bool update_all)
 				ShaderNode *closure, *out;

 				closure = graph->add(new DiffuseBsdfNode());
-				closure->input("Color")->value = get_float3(b_mat->diffuse_color());
+				closure->input("Color")->set(get_float3(b_mat->diffuse_color()));
 				out = graph->output();

 				graph->connect(closure->output("BSDF"), out->input("Surface"));
@@ -1262,7 +1186,7 @@ void BlenderSync::sync_world(bool update_all)
 	BL::World b_world = b_scene.world();

 	if(world_recalc || update_all || b_world.ptr.data != world_map) {
-		Shader *shader = scene->shaders[scene->default_background];
+		Shader *shader = scene->default_background;
 		ShaderGraph *graph = new ShaderGraph();

 		/* create nodes */
@@ -1281,7 +1205,7 @@ void BlenderSync::sync_world(bool update_all)
 			ShaderNode *closure, *out;

 			closure = graph->add(new BackgroundNode());
-			closure->input("Color")->value = get_float3(b_world.horizon_color());
+			closure->input("Color")->set(get_float3(b_world.horizon_color()));
 			out = graph->output();

 			graph->connect(closure->output("Background"), out->input("Surface"));
@@ -1342,7 +1266,7 @@ void BlenderSync::sync_world(bool update_all)

 void BlenderSync::sync_lamps(bool update_all)
 {
-	shader_map.set_default(scene->shaders[scene->default_light]);
+	shader_map.set_default(scene->default_light);

 	/* lamp loop */
 	BL::BlendData::lamps_iterator b_lamp;
@@ -1374,8 +1298,8 @@ void BlenderSync::sync_lamps(bool update_all)
 				}

 				closure = graph->add(new EmissionNode());
-				closure->input("Color")->value = get_float3(b_lamp->color());
-				closure->input("Strength")->value.x = strength;
+				closure->input("Color")->set(get_float3(b_lamp->color()));
+				closure->input("Strength")->set(strength);
 				out = graph->output();

 				graph->connect(closure->output("Emission"), out->input("Surface"));
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -175,8 +175,8 @@ bool BlenderSync::sync_recalc()
 				world_recalc = true;
 			}
 			else if(b_world->node_tree() && b_world->use_nodes()) {
-				Shader *shader = scene->shaders[scene->default_background];
-				if(has_updated_objects && shader != NULL && shader->has_object_dependency) {
+				Shader *shader = scene->default_background;
+				if(has_updated_objects && shader->has_object_dependency) {
 					world_recalc = true;
 				}
 			}
@@ -269,8 +269,6 @@ void BlenderSync::sync_integrator()
 	        SAMPLING_NUM_PATTERNS,
 	        SAMPLING_PATTERN_SOBOL);

-	integrator->layer_flag = render_layer.layer;
-
 	integrator->sample_clamp_direct = get_float(cscene, "sample_clamp_direct");
 	integrator->sample_clamp_indirect = get_float(cscene, "sample_clamp_indirect");
 #ifdef __CAMERA_MOTION__
@@ -377,8 +375,7 @@ void BlenderSync::sync_render_layers(BL::SpaceView3D& b_v3d, const char *layer)
 			layer = layername.c_str();
 		}
 		else {
-			render_layer.use_localview = (b_v3d.local_view() ? true : false);
-			render_layer.scene_layer = get_layer(b_v3d.layers(), b_v3d.layers_local_view(), render_layer.use_localview);
+			render_layer.scene_layer = get_layer(b_v3d.layers(), b_v3d.layers_local_view());
 			render_layer.layer = render_layer.scene_layer;
 			render_layer.exclude_layer = 0;
 			render_layer.holdout_layer = 0;
@@ -421,7 +418,6 @@ void BlenderSync::sync_render_layers(BL::SpaceView3D& b_v3d, const char *layer)
 			render_layer.use_surfaces = b_rlay->use_solid();
 			render_layer.use_hair = b_rlay->use_strand();
 			render_layer.use_viewport_visibility = false;
-			render_layer.use_localview = false;

 			render_layer.bound_samples = (use_layer_samples == 1);
 			if(use_layer_samples != 2) {
@@ -631,9 +627,9 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine& b_engine,
 	else
 		params.threads = 0;

-	params.cancel_timeout = get_float(cscene, "debug_cancel_timeout");
-	params.reset_timeout = get_float(cscene, "debug_reset_timeout");
-	params.text_timeout = get_float(cscene, "debug_text_timeout");
+	params.cancel_timeout = (double)get_float(cscene, "debug_cancel_timeout");
+	params.reset_timeout = (double)get_float(cscene, "debug_reset_timeout");
+	params.text_timeout = (double)get_float(cscene, "debug_text_timeout");

 	params.progressive_refine = get_boolean(cscene, "use_progressive_refine");

--- a/intern/cycles/blender/blender_sync.h
+++ b/intern/cycles/blender/blender_sync.h
@@ -146,7 +146,7 @@ private:
 	void sync_images();

 	/* util */
-	void find_shader(BL::ID& id, vector<uint>& used_shaders, int default_shader);
+	void find_shader(BL::ID& id, vector<Shader*>& used_shaders, Shader *default_shader);
 	bool BKE_object_is_modified(BL::Object& b_ob);
 	bool object_is_mesh(BL::Object& b_ob);
 	bool object_is_light(BL::Object& b_ob);
@@ -185,7 +185,6 @@ private:
 		  use_surfaces(true),
 		  use_hair(true),
 		  use_viewport_visibility(false),
-		  use_localview(false),
 		  samples(0), bound_samples(false)
 		{}

@@ -200,7 +199,6 @@ private:
 		bool use_surfaces;
 		bool use_hair;
 		bool use_viewport_visibility;
-		bool use_localview;
 		int samples;
 		bool bound_samples;
 	} render_layer;
--- a/intern/cycles/blender/blender_util.h
+++ b/intern/cycles/blender/blender_util.h
@@ -58,14 +58,19 @@ static inline BL::Mesh object_to_mesh(BL::BlendData& data,
 }

 static inline void colorramp_to_array(BL::ColorRamp& ramp,
-                                      float4 *data,
+                                      array<float3>& ramp_color,
+                                      array<float>& ramp_alpha,
                                      int size)
 {
+	ramp_color.resize(size);
+	ramp_alpha.resize(size);
+
 	for(int i = 0; i < size; i++) {
 		float color[4];

 		ramp.evaluate((float)i/(float)(size-1), color);
-		data[i] = make_float4(color[0], color[1], color[2], color[3]);
+		ramp_color[i] = make_float3(color[0], color[1], color[2]);
+		ramp_alpha[i] = color[3];
 	}
 }

@@ -93,11 +98,12 @@ static inline void curvemapping_minmax(/*const*/ BL::CurveMapping& cumap,
 }

 static inline void curvemapping_to_array(BL::CurveMapping& cumap,
-                                         float *data,
+                                         array<float>& data,
                                         int size)
 {
 	cumap.update();
 	BL::CurveMap curve = cumap.curves[0];
+	data.resize(size);
 	for(int i = 0; i < size; i++) {
 		float t = (float)i/(float)(size-1);
 		data[i] = curve.evaluate(t);
@@ -105,7 +111,7 @@ static inline void curvemapping_to_array(BL::CurveMapping& cumap,
 }

 static inline void curvemapping_color_to_array(BL::CurveMapping& cumap,
-                                               float4 *data,
+                                               array<float3>& data,
                                               int size,
                                               bool rgb_curve)
 {
@@ -132,6 +138,8 @@ static inline void curvemapping_color_to_array(BL::CurveMapping& cumap,
 	BL::CurveMap mapG = cumap.curves[1];
 	BL::CurveMap mapB = cumap.curves[2];

+	data.resize(size);
+
 	if(rgb_curve) {
 		BL::CurveMap mapI = cumap.curves[3];

@@ -268,7 +276,6 @@ static inline uint get_layer(const BL::Array<int, 20>& array)

 static inline uint get_layer(const BL::Array<int, 20>& array,
                             const BL::Array<int, 8>& local_array,
-                             bool use_local,
                             bool is_light = false,
                             uint scene_layers = (1 << 20) - 1)
 {
@@ -293,13 +300,6 @@ static inline uint get_layer(const BL::Array<int, 20>& array,
 				layer |= (1 << (20+i));
 	}

-	/* we don't have spare bits for localview (normally 20-28) because
-	 * PATH_RAY_LAYER_SHIFT uses 20-32. So - check if we have localview and if
-	 * so, shift local view bits down to 1-8, since this is done for the view
-	 * port only - it should be OK and not conflict with render layers. */
-	if(use_local)
-		layer >>= 20;
-
 	return layer;
 }

--- a/intern/cycles/bvh/CMakeLists.txt
+++ b/intern/cycles/bvh/CMakeLists.txt
@@ -1,6 +1,7 @@

 set(INC
 	.
+	../graph
 	../kernel
 	../kernel/svm
 	../render
--- a/intern/cycles/bvh/bvh.cpp
+++ b/intern/cycles/bvh/bvh.cpp
@@ -128,11 +128,11 @@ void BVH::pack_triangle(int idx, float4 storage[3])
 	const Mesh *mesh = objects[tob]->mesh;

 	int tidx = pack.prim_index[idx];
-	const int *vidx = mesh->triangles[tidx].v;
+	Mesh::Triangle t = mesh->get_triangle(tidx);
 	const float3* vpos = &mesh->verts[0];
-	float3 v0 = vpos[vidx[0]];
-	float3 v1 = vpos[vidx[1]];
-	float3 v2 = vpos[vidx[2]];
+	float3 v0 = vpos[t.v[0]];
+	float3 v1 = vpos[t.v[1]];
+	float3 v2 = vpos[t.v[2]];

 	storage[0] = float3_to_float4(v0);
 	storage[1] = float3_to_float4(v1);
@@ -506,10 +506,10 @@ void RegularBVH::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility
 				if(pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) {
 					/* curves */
 					int str_offset = (params.top_level)? mesh->curve_offset: 0;
-					const Mesh::Curve& curve = mesh->curves[pidx - str_offset];
+					Mesh::Curve curve = mesh->get_curve(pidx - str_offset);
 					int k = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[prim]);

-					curve.bounds_grow(k, &mesh->curve_keys[0], bbox);
+					curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bbox);

 					visibility |= PATH_RAY_CURVE;

@@ -520,17 +520,17 @@ void RegularBVH::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility
 						if(attr) {
 							size_t mesh_size = mesh->curve_keys.size();
 							size_t steps = mesh->motion_steps - 1;
-							float4 *key_steps = attr->data_float4();
+							float3 *key_steps = attr->data_float3();

 							for(size_t i = 0; i < steps; i++)
-								curve.bounds_grow(k, key_steps + i*mesh_size, bbox);
+								curve.bounds_grow(k, key_steps + i*mesh_size, &mesh->curve_radius[0], bbox);
 						}
 					}
 				}
 				else {
 					/* triangles */
 					int tri_offset = (params.top_level)? mesh->tri_offset: 0;
-					const Mesh::Triangle& triangle = mesh->triangles[pidx - tri_offset];
+					Mesh::Triangle triangle = mesh->get_triangle(pidx - tri_offset);
 					const float3 *vpos = &mesh->verts[0];

 					triangle.bounds_grow(vpos, bbox);
@@ -770,10 +770,10 @@ void QBVH::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
 				if(pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) {
 					/* Curves. */
 					int str_offset = (params.top_level)? mesh->curve_offset: 0;
-					const Mesh::Curve& curve = mesh->curves[pidx - str_offset];
+					Mesh::Curve curve = mesh->get_curve(pidx - str_offset);
 					int k = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[prim]);

-					curve.bounds_grow(k, &mesh->curve_keys[0], bbox);
+					curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bbox);

 					visibility |= PATH_RAY_CURVE;

@@ -784,17 +784,17 @@ void QBVH::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
 						if(attr) {
 							size_t mesh_size = mesh->curve_keys.size();
 							size_t steps = mesh->motion_steps - 1;
-							float4 *key_steps = attr->data_float4();
+							float3 *key_steps = attr->data_float3();

 							for(size_t i = 0; i < steps; i++)
-								curve.bounds_grow(k, key_steps + i*mesh_size, bbox);
+								curve.bounds_grow(k, key_steps + i*mesh_size, &mesh->curve_radius[0], bbox);
 						}
 					}
 				}
 				else {
 					/* Triangles. */
 					int tri_offset = (params.top_level)? mesh->tri_offset: 0;
-					const Mesh::Triangle& triangle = mesh->triangles[pidx - tri_offset];
+					Mesh::Triangle triangle = mesh->get_triangle(pidx - tri_offset);
 					const float3 *vpos = &mesh->verts[0];

 					triangle.bounds_grow(vpos, bbox);
--- a/intern/cycles/bvh/bvh_build.cpp
+++ b/intern/cycles/bvh/bvh_build.cpp
@@ -117,8 +117,9 @@ void BVHBuild::add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh,
 	if(mesh->has_motion_blur())
 		attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);

-	for(uint j = 0; j < mesh->triangles.size(); j++) {
-		Mesh::Triangle t = mesh->triangles[j];
+	size_t num_triangles = mesh->num_triangles();
+	for(uint j = 0; j < num_triangles; j++) {
+		Mesh::Triangle t = mesh->get_triangle(j);
 		BoundBox bounds = BoundBox::empty;
 		PrimitiveType type = PRIMITIVE_TRIANGLE;

@@ -148,22 +149,23 @@ void BVHBuild::add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh,
 	if(mesh->has_motion_blur())
 		curve_attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);

-	for(uint j = 0; j < mesh->curves.size(); j++) {
-		Mesh::Curve curve = mesh->curves[j];
+	size_t num_curves = mesh->num_curves();
+	for(uint j = 0; j < num_curves; j++) {
+		Mesh::Curve curve = mesh->get_curve(j);
 		PrimitiveType type = PRIMITIVE_CURVE;

 		for(int k = 0; k < curve.num_keys - 1; k++) {
 			BoundBox bounds = BoundBox::empty;
-			curve.bounds_grow(k, &mesh->curve_keys[0], bounds);
+			curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bounds);

 			/* motion curve */
 			if(curve_attr_mP) {
 				size_t mesh_size = mesh->curve_keys.size();
 				size_t steps = mesh->motion_steps - 1;
-				float4 *key_steps = curve_attr_mP->data_float4();
+				float3 *key_steps = curve_attr_mP->data_float3();

 				for(size_t i = 0; i < steps; i++)
-					curve.bounds_grow(k, key_steps + i*mesh_size, bounds);
+					curve.bounds_grow(k, key_steps + i*mesh_size, &mesh->curve_radius[0], bounds);

 				type = PRIMITIVE_MOTION_CURVE;
 			}
@@ -188,10 +190,10 @@ void BVHBuild::add_reference_object(BoundBox& root, BoundBox& center, Object *ob

 static size_t count_curve_segments(Mesh *mesh)
 {
-	size_t num = 0, num_curves = mesh->curves.size();
+	size_t num = 0, num_curves = mesh->num_curves();

 	for(size_t i = 0; i < num_curves; i++)
-		num += mesh->curves[i].num_keys - 1;
+		num += mesh->get_curve(i).num_keys - 1;
 	
 	return num;
 }
@@ -203,15 +205,18 @@ void BVHBuild::add_references(BVHRange& root)

 	foreach(Object *ob, objects) {
 		if(params.top_level) {
+			if(!ob->is_traceable()) {
+				continue;
+			}
 			if(!ob->mesh->is_instanced()) {
-				num_alloc_references += ob->mesh->triangles.size();
+				num_alloc_references += ob->mesh->num_triangles();
 				num_alloc_references += count_curve_segments(ob->mesh);
 			}
 			else
 				num_alloc_references++;
 		}
 		else {
-			num_alloc_references += ob->mesh->triangles.size();
+			num_alloc_references += ob->mesh->num_triangles();
 			num_alloc_references += count_curve_segments(ob->mesh);
 		}
 	}
@@ -224,6 +229,10 @@ void BVHBuild::add_references(BVHRange& root)

 	foreach(Object *ob, objects) {
 		if(params.top_level) {
+			if(!ob->is_traceable()) {
+				++i;
+				continue;
+			}
 			if(!ob->mesh->is_instanced())
 				add_reference_mesh(bounds, center, ob->mesh, i);
 			else
@@ -326,11 +335,11 @@ BVHNode* BVHBuild::run()
 			VLOG(1) << "BVH build statistics:\n"
 			        << "  Build time: " << time_dt() - build_start_time << "\n"
 			        << "  Total number of nodes: "
-			        << rootnode->getSubtreeSize(BVH_STAT_NODE_COUNT) << "\n"
+			        << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_NODE_COUNT)) << "\n"
 			        << "  Number of inner nodes: "
-			        << rootnode->getSubtreeSize(BVH_STAT_INNER_COUNT)  << "\n"
+			        << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_INNER_COUNT)) << "\n"
 			        << "  Number of leaf nodes: "
-			        << rootnode->getSubtreeSize(BVH_STAT_LEAF_COUNT)  << "\n"
+			        << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_LEAF_COUNT)) << "\n"
 			        << "  Allocation slop factor: "
 			               << ((prim_type.capacity() != 0)
 			                       ? (float)prim_type.size() / prim_type.capacity()
@@ -607,8 +616,10 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
 	vector<int, LeafStackAllocator> p_type[PRIMITIVE_NUM_TOTAL];
 	vector<int, LeafStackAllocator> p_index[PRIMITIVE_NUM_TOTAL];
 	vector<int, LeafStackAllocator> p_object[PRIMITIVE_NUM_TOTAL];
+
 	/* TODO(sergey): In theory we should be able to store references. */
-	vector<BVHReference, LeafStackAllocator> object_references;
+	typedef StackAllocator<256, BVHReference> LeafReferenceStackAllocator;
+	vector<BVHReference, LeafReferenceStackAllocator> object_references;

 	uint visibility[PRIMITIVE_NUM_TOTAL] = {0};
 	/* NOTE: Keep initializtion in sync with actual number of primitives. */
@@ -629,7 +640,7 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,

 			bounds[type_index].grow(ref.bounds());
 			visibility[type_index] |= objects[ref.prim_object()]->visibility;
-			if (ref.prim_type() & PRIMITIVE_ALL_CURVE) {
+			if(ref.prim_type() & PRIMITIVE_ALL_CURVE) {
 				visibility[type_index] |= PATH_RAY_CURVE;
 			}
 			++num_new_prims;
--- a/intern/cycles/bvh/bvh_sort.cpp
+++ b/intern/cycles/bvh/bvh_sort.cpp
@@ -125,7 +125,7 @@ static void bvh_reference_sort_threaded(TaskPool *task_pool,
 		if(compare.compare(data[left], data[right]) > 0) {
 			swap(data[left], data[right]);
 		}
-		if (compare.compare(data[center], data[right]) > 0) {
+		if(compare.compare(data[center], data[right]) > 0) {
 			swap(data[center], data[right]);
 		}
 		swap(data[center], data[right - 1]);
--- a/intern/cycles/bvh/bvh_split.cpp
+++ b/intern/cycles/bvh/bvh_split.cpp
@@ -292,13 +292,13 @@ void BVHSpatialSplit::split_triangle_primitive(const Mesh *mesh,
                                               BoundBox& left_bounds,
                                               BoundBox& right_bounds)
 {
-	const int *inds = mesh->triangles[prim_index].v;
+	Mesh::Triangle t = mesh->get_triangle(prim_index);
 	const float3 *verts = &mesh->verts[0];
-	float3 v1 = tfm ? transform_point(tfm, verts[inds[2]]) : verts[inds[2]];
+	float3 v1 = tfm ? transform_point(tfm, verts[t.v[2]]) : verts[t.v[2]];

 	for(int i = 0; i < 3; i++) {
 		float3 v0 = v1;
-		int vindex = inds[i];
+		int vindex = t.v[i];
 		v1 = tfm ? transform_point(tfm, verts[vindex]) : verts[vindex];
 		float v0p = v0[dim];
 		float v1p = v1[dim];
@@ -329,12 +329,11 @@ void BVHSpatialSplit::split_curve_primitive(const Mesh *mesh,
                                            BoundBox& right_bounds)
 {
 	/* curve split: NOTE - Currently ignores curve width and needs to be fixed.*/
-	const int k0 = mesh->curves[prim_index].first_key + segment_index;
+	Mesh::Curve curve = mesh->get_curve(prim_index);
+	const int k0 = curve.first_key + segment_index;
 	const int k1 = k0 + 1;
-	const float4& key0 = mesh->curve_keys[k0];
-	const float4& key1 = mesh->curve_keys[k1];
-	float3 v0 = float4_to_float3(key0);
-	float3 v1 = float4_to_float3(key1);
+	float3 v0 = mesh->curve_keys[k0];
+	float3 v1 = mesh->curve_keys[k1];

 	if(tfm != NULL) {
 		v0 = transform_point(tfm, v0);
@@ -405,7 +404,7 @@ void BVHSpatialSplit::split_object_reference(const Object *object,
                                             BoundBox& right_bounds)
 {
 	Mesh *mesh = object->mesh;
-	for(int tri_idx = 0; tri_idx < mesh->triangles.size(); ++tri_idx) {
+	for(int tri_idx = 0; tri_idx < mesh->num_triangles(); ++tri_idx) {
 		split_triangle_primitive(mesh,
 		                         &object->tfm,
 		                         tri_idx,
@@ -414,8 +413,8 @@ void BVHSpatialSplit::split_object_reference(const Object *object,
 		                         left_bounds,
 		                         right_bounds);
 	}
-	for(int curve_idx = 0; curve_idx < mesh->curves.size(); ++curve_idx) {
-		Mesh::Curve &curve = mesh->curves[curve_idx];
+	for(int curve_idx = 0; curve_idx < mesh->num_curves(); ++curve_idx) {
+		Mesh::Curve curve = mesh->get_curve(curve_idx);
 		for(int segment_idx = 0;
 		    segment_idx < curve.num_keys - 1;
 		    ++segment_idx)
--- a/intern/cycles/device/CMakeLists.txt
+++ b/intern/cycles/device/CMakeLists.txt
@@ -1,6 +1,7 @@

 set(INC
 	.
+	../graph
 	../kernel
 	../kernel/svm
 	../kernel/osl
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -56,6 +56,8 @@ std::ostream& operator <<(std::ostream &os,
 	   << string_from_bool(requested_features.use_camera_motion)  << std::endl;
 	os << "Use Baking: "
 	   << string_from_bool(requested_features.use_baking)  << std::endl;
+	os << "Use Volume: "
+	   << string_from_bool(requested_features.use_volume)  << std::endl;
 	return os;
 }

--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -54,7 +54,7 @@ public:
 	bool display_device;
 	bool advanced_shading;
 	bool pack_images;
-	bool extended_images; /* flag for GPU and Multi device */
+	bool has_bindless_textures; /* flag for GPU and Multi device */
 	bool use_split_kernel; /* Denotes if the device is going to run cycles using split-kernel */
 	vector<DeviceInfo> multi_devices;

@@ -66,7 +66,7 @@ public:
 		display_device = false;
 		advanced_shading = true;
 		pack_images = false;
-		extended_images = false;
+		has_bindless_textures = false;
 		use_split_kernel = false;
 	}
 };
@@ -230,6 +230,7 @@ public:
 		(void)interpolation;  /* Ignored. */
 		(void)extension;  /* Ignored. */
 	};
+
 	virtual void tex_free(device_memory& /*mem*/) {};

 	/* pixel memory */
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -155,7 +155,9 @@ public:
 	               InterpolationType interpolation,
 	               ExtensionType extension)
 	{
-		VLOG(1) << "Texture allocate: " << name << ", " << mem.memory_size() << " bytes.";
+		VLOG(1) << "Texture allocate: " << name << ", "
+		        << string_human_readable_number(mem.memory_size()) << " bytes. ("
+		        << string_human_readable_size(mem.memory_size()) << ")";
 		kernel_tex_copy(&kernel_globals,
 		                name,
 		                mem.data_pointer,
@@ -213,12 +215,7 @@ public:
 				return;
 		}

-		KernelGlobals kg = kernel_globals;
-
-#ifdef WITH_OSL
-		OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
-#endif
-
+		KernelGlobals kg = thread_kernel_globals_init();
 		RenderTile tile;

 		void(*path_trace_kernel)(KernelGlobals*, float*, unsigned int*, int, int, int, int, int);
@@ -289,9 +286,7 @@ public:
 			}
 		}

-#ifdef WITH_OSL
-		OSLShader::thread_free(&kg);
-#endif
+		thread_kernel_globals_free(&kg);
 	}

 	void thread_film_convert(DeviceTask& task)
@@ -481,6 +476,40 @@ public:
 	{
 		task_pool.cancel();
 	}
+
+protected:
+	inline KernelGlobals thread_kernel_globals_init()
+	{
+		KernelGlobals kg = kernel_globals;
+		kg.transparent_shadow_intersections = NULL;
+		const int decoupled_count = sizeof(kg.decoupled_volume_steps) /
+		                            sizeof(*kg.decoupled_volume_steps);
+		for(int i = 0; i < decoupled_count; ++i) {
+			kg.decoupled_volume_steps[i] = NULL;
+		}
+		kg.decoupled_volume_steps_index = 0;
+#ifdef WITH_OSL
+		OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
+#endif
+		return kg;
+	}
+
+	inline void thread_kernel_globals_free(KernelGlobals *kg)
+	{
+		if(kg->transparent_shadow_intersections != NULL) {
+			free(kg->transparent_shadow_intersections);
+		}
+		const int decoupled_count = sizeof(kg->decoupled_volume_steps) /
+		                            sizeof(*kg->decoupled_volume_steps);
+		for(int i = 0; i < decoupled_count; ++i) {
+			if(kg->decoupled_volume_steps[i] != NULL) {
+				free(kg->decoupled_volume_steps[i]);
+			}
+		}
+#ifdef WITH_OSL
+		OSLShader::thread_free(kg);
+#endif
+	}
 };

 Device *device_cpu_create(DeviceInfo& info, Stats &stats, bool background)
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -85,10 +85,10 @@ public:
 	CUcontext cuContext;
 	CUmodule cuModule;
 	map<device_ptr, bool> tex_interp_map;
+	map<device_ptr, uint> tex_bindless_map;
 	int cuDevId;
 	int cuDevArchitecture;
 	bool first_error;
-	bool use_texture_storage;

 	struct PixelMem {
 		GLuint cuPBO;
@@ -99,6 +99,10 @@ public:

 	map<device_ptr, PixelMem> pixel_mem_map;

+	/* Bindless Textures */
+	device_vector<uint> bindless_mapping;
+	bool need_bindless_mapping;
+
 	CUdeviceptr cuda_device_ptr(device_ptr mem)
 	{
 		return (CUdeviceptr)mem;
@@ -176,12 +180,13 @@ public:
 	{
 		first_error = true;
 		background = background_;
-		use_texture_storage = true;

 		cuDevId = info.num;
 		cuDevice = 0;
 		cuContext = 0;

+		need_bindless_mapping = false;
+
 		/* intialize */
 		if(cuda_error(cuInit(0)))
 			return;
@@ -211,11 +216,6 @@ public:
 		cuDeviceComputeCapability(&major, &minor, cuDevId);
 		cuDevArchitecture = major*100 + minor*10;

-		/* In order to use full 6GB of memory on Titan cards, use arrays instead
-		 * of textures. On earlier cards this seems slower, but on Titan it is
-		 * actually slightly faster in tests. */
-		use_texture_storage = (cuDevArchitecture < 300);
-
 		cuda_pop_context();
 	}

@@ -223,6 +223,10 @@ public:
 	{
 		task_pool.stop();

+		if(info.has_bindless_textures) {
+			tex_free(bindless_mapping);
+		}
+
 		cuda_assert(cuCtxDestroy(cuContext));
 	}

@@ -247,23 +251,26 @@ public:

 	string compile_kernel(const DeviceRequestedFeatures& requested_features)
 	{
-		/* compute cubin name */
+		/* Compute cubin name. */
 		int major, minor;
 		cuDeviceComputeCapability(&major, &minor, cuDevId);
 		string cubin;

-		/* adaptive compile */
+		/* Adaptive Compile.
+		 * If enabled, always use that */
 		bool use_adaptive_compile = use_adaptive_compilation();

-		/* attempt to use kernel provided with blender */
-		cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin", major, minor));
-		VLOG(1) << "Testing for pre-compiled kernel " << cubin;
-		if(path_exists(cubin)) {
-			VLOG(1) << "Using precompiled kernel";
-			return cubin;
+		/* Attempt to use kernel provided with Blender. */
+		if(!use_adaptive_compile) {
+			cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin", major, minor));
+			VLOG(1) << "Testing for pre-compiled kernel " << cubin;
+			if(path_exists(cubin)) {
+				VLOG(1) << "Using precompiled kernel";
+				return cubin;
+			}
 		}

-		/* not found, try to use locally compiled kernel */
+		/* Try to use locally compiled kernel. */
 		string kernel_path = path_get("kernel");
 		string md5 = path_files_md5_hash(kernel_path);

@@ -282,7 +289,7 @@ public:

 		cubin = path_user_get(path_join("cache", cubin));
 		VLOG(1) << "Testing for locally compiled kernel " << cubin;
-		/* if exists already, use it */
+		/* If exists already, use it. */
 		if(path_exists(cubin)) {
 			VLOG(1) << "Using locally compiled kernel";
 			return cubin;
@@ -298,7 +305,7 @@ public:
 		}
 #endif

-		/* if not, find CUDA compiler */
+		/* If not, find CUDA compiler. */
 		const char *nvcc = cuewCompilerPath();

 		if(nvcc == NULL) {
@@ -320,7 +327,7 @@ public:
 		else if(cuda_version != 75)
 			printf("CUDA version %d.%d detected, build may succeed but only CUDA 7.5 is officially supported.\n", cuda_version/10, cuda_version%10);

-		/* compile */
+		/* Compile. */
 		string kernel = path_join(kernel_path, path_join("kernels", path_join("cuda", "kernel.cu")));
 		string include = kernel_path;
 		const int machine = system_cpu_bits();
@@ -354,7 +361,7 @@ public:
 			return "";
 		}

-		/* verify if compilation succeeded */
+		/* Verify if compilation succeeded */
 		if(!path_exists(cubin)) {
 			cuda_error_message("CUDA kernel compilation failed, see console for details.");
 			return "";
@@ -400,6 +407,15 @@ public:
 		return (result == CUDA_SUCCESS);
 	}

+	void load_bindless_mapping()
+	{
+		if(info.has_bindless_textures && need_bindless_mapping) {
+			tex_free(bindless_mapping);
+			tex_alloc("__bindless_mapping", bindless_mapping, INTERPOLATION_NONE, EXTENSION_REPEAT);
+			need_bindless_mapping = false;
+		}
+	}
+
 	void mem_alloc(device_memory& mem, MemoryType /*type*/)
 	{
 		cuda_push_context();
@@ -477,128 +493,103 @@ public:
 	               InterpolationType interpolation,
 	               ExtensionType extension)
 	{
-		VLOG(1) << "Texture allocate: " << name << ", " << mem.memory_size() << " bytes.";
+		VLOG(1) << "Texture allocate: " << name << ", "
+		        << string_human_readable_number(mem.memory_size()) << " bytes. ("
+		        << string_human_readable_size(mem.memory_size()) << ")";

+		/* Check if we are on sm_30 or above.
+		 * We use arrays and bindles textures for storage there */
+		bool has_bindless_textures = info.has_bindless_textures;
+
+		/* General variables for both architectures */
 		string bind_name = name;
-		if(mem.data_depth > 1) {
-			/* Kernel uses different bind names for 2d and 3d float textures,
-			 * so we have to adjust couple of things here.
-			 */
-			vector<string> tokens;
-			string_split(tokens, name, "_");
-			bind_name = string_printf("__tex_image_%s_3d_%s",
-			                          tokens[2].c_str(),
-			                          tokens[3].c_str());
-		}
-
-		/* determine format */
-		CUarray_format_enum format;
 		size_t dsize = datatype_size(mem.data_type);
 		size_t size = mem.memory_size();
-		bool use_texture = (interpolation != INTERPOLATION_NONE) || use_texture_storage;

-		if(use_texture) {
+		CUaddress_mode address_mode = CU_TR_ADDRESS_MODE_WRAP;
+		switch(extension) {
+			case EXTENSION_REPEAT:
+				address_mode = CU_TR_ADDRESS_MODE_WRAP;
+				break;
+			case EXTENSION_EXTEND:
+				address_mode = CU_TR_ADDRESS_MODE_CLAMP;
+				break;
+			case EXTENSION_CLIP:
+				address_mode = CU_TR_ADDRESS_MODE_BORDER;
+				break;
+			default:
+				assert(0);
+				break;
+		}

-			switch(mem.data_type) {
-				case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break;
-				case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break;
-				case TYPE_INT: format = CU_AD_FORMAT_SIGNED_INT32; break;
-				case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; break;
-				default: assert(0); return;
+		CUfilter_mode filter_mode;
+		if(interpolation == INTERPOLATION_CLOSEST) {
+			filter_mode = CU_TR_FILTER_MODE_POINT;
+		}
+		else {
+			filter_mode = CU_TR_FILTER_MODE_LINEAR;
+		}
+
+		CUarray_format_enum format;
+		switch(mem.data_type) {
+			case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break;
+			case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break;
+			case TYPE_INT: format = CU_AD_FORMAT_SIGNED_INT32; break;
+			case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; break;
+			default: assert(0); return;
+		}
+
+		/* General variables for Fermi */
+		CUtexref texref = NULL;
+
+		if(!has_bindless_textures) {
+			if(mem.data_depth > 1) {
+				/* Kernel uses different bind names for 2d and 3d float textures,
+				 * so we have to adjust couple of things here.
+				 */
+				vector<string> tokens;
+				string_split(tokens, name, "_");
+				bind_name = string_printf("__tex_image_%s_3d_%s",
+				                          tokens[2].c_str(),
+				                          tokens[3].c_str());
 			}

-			CUtexref texref = NULL;
-
 			cuda_push_context();
 			cuda_assert(cuModuleGetTexRef(&texref, cuModule, bind_name.c_str()));
+			cuda_pop_context();

 			if(!texref) {
-				cuda_pop_context();
 				return;
 			}
+		}

-			if(interpolation != INTERPOLATION_NONE) {
-				CUarray handle = NULL;
+		/* Data Storage */
+		if(interpolation == INTERPOLATION_NONE) {
+			if(has_bindless_textures) {
+				mem_alloc(mem, MEM_READ_ONLY);
+				mem_copy_to(mem);

-				if(mem.data_depth > 1) {
-					CUDA_ARRAY3D_DESCRIPTOR desc;
+				cuda_push_context();

-					desc.Width = mem.data_width;
-					desc.Height = mem.data_height;
-					desc.Depth = mem.data_depth;
-					desc.Format = format;
-					desc.NumChannels = mem.data_elements;
-					desc.Flags = 0;
+				CUdeviceptr cumem;
+				size_t cubytes;

-					cuda_assert(cuArray3DCreate(&handle, &desc));
+				cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, bind_name.c_str()));
+
+				if(cubytes == 8) {
+					/* 64 bit device pointer */
+					uint64_t ptr = mem.device_pointer;
+					cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
 				}
 				else {
-					CUDA_ARRAY_DESCRIPTOR desc;
-
-					desc.Width = mem.data_width;
-					desc.Height = mem.data_height;
-					desc.Format = format;
-					desc.NumChannels = mem.data_elements;
-
-					cuda_assert(cuArrayCreate(&handle, &desc));
+					/* 32 bit device pointer */
+					uint32_t ptr = (uint32_t)mem.device_pointer;
+					cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
 				}

-				if(!handle) {
-					cuda_pop_context();
-					return;
-				}
-
-				if(mem.data_depth > 1) {
-					CUDA_MEMCPY3D param;
-					memset(&param, 0, sizeof(param));
-					param.dstMemoryType = CU_MEMORYTYPE_ARRAY;
-					param.dstArray = handle;
-					param.srcMemoryType = CU_MEMORYTYPE_HOST;
-					param.srcHost = (void*)mem.data_pointer;
-					param.srcPitch = mem.data_width*dsize*mem.data_elements;
-					param.WidthInBytes = param.srcPitch;
-					param.Height = mem.data_height;
-					param.Depth = mem.data_depth;
-
-					cuda_assert(cuMemcpy3D(&param));
-				}
-				if(mem.data_height > 1) {
-					CUDA_MEMCPY2D param;
-					memset(&param, 0, sizeof(param));
-					param.dstMemoryType = CU_MEMORYTYPE_ARRAY;
-					param.dstArray = handle;
-					param.srcMemoryType = CU_MEMORYTYPE_HOST;
-					param.srcHost = (void*)mem.data_pointer;
-					param.srcPitch = mem.data_width*dsize*mem.data_elements;
-					param.WidthInBytes = param.srcPitch;
-					param.Height = mem.data_height;
-
-					cuda_assert(cuMemcpy2D(&param));
-				}
-				else
-					cuda_assert(cuMemcpyHtoA(handle, 0, (void*)mem.data_pointer, size));
-
-				cuda_assert(cuTexRefSetArray(texref, handle, CU_TRSA_OVERRIDE_FORMAT));
-
-				if(interpolation == INTERPOLATION_CLOSEST) {
-					cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT));
-				}
-				else if(interpolation == INTERPOLATION_LINEAR) {
-					cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_LINEAR));
-				}
-				else {/* CUBIC and SMART are unsupported for CUDA */
-					cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_LINEAR));
-				}
-				cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_NORMALIZED_COORDINATES));
-
-				mem.device_pointer = (device_ptr)handle;
-				mem.device_size = size;
-
-				stats.mem_alloc(size);
+				cuda_pop_context();
 			}
 			else {
-				cuda_pop_context();
-
 				mem_alloc(mem, MEM_READ_ONLY);
 				mem_copy_to(mem);

@@ -607,23 +598,137 @@ public:
 				cuda_assert(cuTexRefSetAddress(NULL, texref, cuda_device_ptr(mem.device_pointer), size));
 				cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT));
 				cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_READ_AS_INTEGER));
+
+				cuda_pop_context();
+			}
+		}
+		/* Texture Storage */
+		else {
+			CUarray handle = NULL;
+
+			cuda_push_context();
+
+			if(mem.data_depth > 1) {
+				CUDA_ARRAY3D_DESCRIPTOR desc;
+
+				desc.Width = mem.data_width;
+				desc.Height = mem.data_height;
+				desc.Depth = mem.data_depth;
+				desc.Format = format;
+				desc.NumChannels = mem.data_elements;
+				desc.Flags = 0;
+
+				cuda_assert(cuArray3DCreate(&handle, &desc));
+			}
+			else {
+				CUDA_ARRAY_DESCRIPTOR desc;
+
+				desc.Width = mem.data_width;
+				desc.Height = mem.data_height;
+				desc.Format = format;
+				desc.NumChannels = mem.data_elements;
+
+				cuda_assert(cuArrayCreate(&handle, &desc));
 			}

-			CUaddress_mode address_mode = CU_TR_ADDRESS_MODE_WRAP;
-			switch(extension) {
-				case EXTENSION_REPEAT:
-					address_mode = CU_TR_ADDRESS_MODE_WRAP;
-					break;
-				case EXTENSION_EXTEND:
-					address_mode = CU_TR_ADDRESS_MODE_CLAMP;
-					break;
-				case EXTENSION_CLIP:
-					address_mode = CU_TR_ADDRESS_MODE_BORDER;
-					break;
-				default:
-					assert(0);
-					break;
+			if(!handle) {
+				cuda_pop_context();
+				return;
 			}
+
+			/* Allocate 3D, 2D or 1D memory */
+			if(mem.data_depth > 1) {
+				CUDA_MEMCPY3D param;
+				memset(&param, 0, sizeof(param));
+				param.dstMemoryType = CU_MEMORYTYPE_ARRAY;
+				param.dstArray = handle;
+				param.srcMemoryType = CU_MEMORYTYPE_HOST;
+				param.srcHost = (void*)mem.data_pointer;
+				param.srcPitch = mem.data_width*dsize*mem.data_elements;
+				param.WidthInBytes = param.srcPitch;
+				param.Height = mem.data_height;
+				param.Depth = mem.data_depth;
+
+				cuda_assert(cuMemcpy3D(&param));
+			}
+			else if(mem.data_height > 1) {
+				CUDA_MEMCPY2D param;
+				memset(&param, 0, sizeof(param));
+				param.dstMemoryType = CU_MEMORYTYPE_ARRAY;
+				param.dstArray = handle;
+				param.srcMemoryType = CU_MEMORYTYPE_HOST;
+				param.srcHost = (void*)mem.data_pointer;
+				param.srcPitch = mem.data_width*dsize*mem.data_elements;
+				param.WidthInBytes = param.srcPitch;
+				param.Height = mem.data_height;
+
+				cuda_assert(cuMemcpy2D(&param));
+			}
+			else
+				cuda_assert(cuMemcpyHtoA(handle, 0, (void*)mem.data_pointer, size));
+
+			/* Fermi and Kepler */
+			mem.device_pointer = (device_ptr)handle;
+			mem.device_size = size;
+
+			stats.mem_alloc(size);
+
+			/* Bindless Textures - Kepler */
+			if(has_bindless_textures) {
+				int flat_slot = 0;
+				if(string_startswith(name, "__tex_image")) {
+					int pos =  string(name).rfind("_");
+					flat_slot = atoi(name + pos + 1);
+				}
+				else {
+					assert(0);
+				}
+
+				CUDA_RESOURCE_DESC resDesc;
+				memset(&resDesc, 0, sizeof(resDesc));
+				resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
+				resDesc.res.array.hArray = handle;
+				resDesc.flags = 0;
+
+				CUDA_TEXTURE_DESC texDesc;
+				memset(&texDesc, 0, sizeof(texDesc));
+				texDesc.addressMode[0] = address_mode;
+				texDesc.addressMode[1] = address_mode;
+				texDesc.addressMode[2] = address_mode;
+				texDesc.filterMode = filter_mode;
+				texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
+
+				CUtexObject tex = 0;
+				cuda_assert(cuTexObjectCreate(&tex, &resDesc, &texDesc, NULL));
+
+				/* Safety check */
+				if((uint)tex > UINT_MAX) {
+					assert(0);
+				}
+
+				/* Resize once */
+				if(flat_slot >= bindless_mapping.size())
+					bindless_mapping.resize(4096); /*TODO(dingto): Make this a variable */
+
+				/* Set Mapping and tag that we need to (re-)upload to device */
+				bindless_mapping.get_data()[flat_slot] = (uint)tex;
+				tex_bindless_map[mem.device_pointer] = (uint)tex;
+				need_bindless_mapping = true;
+			}
+			/* Regular Textures - Fermi */
+			else {
+				cuda_assert(cuTexRefSetArray(texref, handle, CU_TRSA_OVERRIDE_FORMAT));
+				cuda_assert(cuTexRefSetFilterMode(texref, filter_mode));
+				cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_NORMALIZED_COORDINATES));
+			}
+
+			cuda_pop_context();
+		}
+
+		/* Fermi, Data and Image Textures */
+		if(!has_bindless_textures) {
+			cuda_push_context();
+
 			cuda_assert(cuTexRefSetAddressMode(texref, 0, address_mode));
 			cuda_assert(cuTexRefSetAddressMode(texref, 1, address_mode));
 			if(mem.data_depth > 1) {
@@ -634,31 +739,8 @@ public:

 			cuda_pop_context();
 		}
-		else {
-			mem_alloc(mem, MEM_READ_ONLY);
-			mem_copy_to(mem);
-
-			cuda_push_context();
-
-			CUdeviceptr cumem;
-			size_t cubytes;
-
-			cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, bind_name.c_str()));
-
-			if(cubytes == 8) {
-				/* 64 bit device pointer */
-				uint64_t ptr = mem.device_pointer;
-				cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
-			}
-			else {
-				/* 32 bit device pointer */
-				uint32_t ptr = (uint32_t)mem.device_pointer;
-				cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
-			}
-
-			cuda_pop_context();
-		}

+		/* Fermi and Kepler */
 		tex_interp_map[mem.device_pointer] = (interpolation != INTERPOLATION_NONE);
 	}

@@ -670,6 +752,12 @@ public:
 				cuArrayDestroy((CUarray)mem.device_pointer);
 				cuda_pop_context();

+				/* Free CUtexObject (Bindless Textures) */
+				if(info.has_bindless_textures && tex_bindless_map[mem.device_pointer]) {
+					uint flat_slot = tex_bindless_map[mem.device_pointer];
+					cuTexObjectDestroy(flat_slot);
+				}
+
 				tex_interp_map.erase(tex_interp_map.find(mem.device_pointer));
 				mem.device_pointer = 0;

@@ -726,8 +814,8 @@ public:
 		printf("threads_per_block %d\n", threads_per_block);
 		printf("num_registers %d\n", num_registers);*/

-		int xthreads = (int)sqrt((float)threads_per_block);
-		int ythreads = (int)sqrt((float)threads_per_block);
+		int xthreads = (int)sqrt(threads_per_block);
+		int ythreads = (int)sqrt(threads_per_block);
 		int xblocks = (rtile.w + xthreads - 1)/xthreads;
 		int yblocks = (rtile.h + ythreads - 1)/ythreads;

@@ -780,8 +868,8 @@ public:
 		int threads_per_block;
 		cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuFilmConvert));

-		int xthreads = (int)sqrt((float)threads_per_block);
-		int ythreads = (int)sqrt((float)threads_per_block);
+		int xthreads = (int)sqrt(threads_per_block);
+		int ythreads = (int)sqrt(threads_per_block);
 		int xblocks = (task.w + xthreads - 1)/xthreads;
 		int yblocks = (task.h + ythreads - 1)/ythreads;

@@ -1111,6 +1199,9 @@ public:
 			RenderTile tile;
 			
 			bool branched = task->integrator_branched;
+
+			/* Upload Bindless Mapping */
+			load_bindless_mapping();
 			
 			/* keep rendering tiles until done */
 			while(task->acquire_tile(this, tile)) {
@@ -1134,6 +1225,9 @@ public:
 			}
 		}
 		else if(task->type == DeviceTask::SHADER) {
+			/* Upload Bindless Mapping */
+			load_bindless_mapping();
+
 			shader(*task);

 			cuda_push_context();
@@ -1269,11 +1363,12 @@ void device_cuda_info(vector<DeviceInfo>& devices)
 		info.num = num;

 		info.advanced_shading = (major >= 2);
-		info.extended_images = (major >= 3);
+		info.has_bindless_textures = (major >= 3);
 		info.pack_images = false;

 		/* if device has a kernel timeout, assume it is used for display */
 		if(cuDeviceGetAttribute(&attr, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, num) == CUDA_SUCCESS && attr == 1) {
+			info.description += " (Display)";
 			info.display_device = true;
 			display_devices.push_back(info);
 		}
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -175,7 +175,9 @@ public:
 	               interpolation,
 	               ExtensionType extension)
 	{
-		VLOG(1) << "Texture allocate: " << name << ", " << mem.memory_size() << " bytes.";
+		VLOG(1) << "Texture allocate: " << name << ", "
+		        << string_human_readable_number(mem.memory_size()) << " bytes. ("
+		        << string_human_readable_size(mem.memory_size()) << ")";

 		foreach(SubDevice& sub, devices) {
 			mem.device_pointer = 0;
@@ -352,7 +354,7 @@ static bool device_multi_add(vector<DeviceInfo>& devices, DeviceType type, bool

 	info.advanced_shading = with_advanced_shading;
 	info.pack_images = false;
-	info.extended_images = true;
+	info.has_bindless_textures = true;

 	foreach(DeviceInfo& subinfo, devices) {
 		if(subinfo.type == type) {
@@ -376,7 +378,7 @@ static bool device_multi_add(vector<DeviceInfo>& devices, DeviceType type, bool
 			if(subinfo.display_device)
 				info.display_device = true;
 			info.pack_images = info.pack_images || subinfo.pack_images;
-			info.extended_images = info.extended_images && subinfo.extended_images;
+			info.has_bindless_textures = info.has_bindless_textures && subinfo.has_bindless_textures;
 			num_added++;
 		}
 	}
--- a/intern/cycles/device/device_network.cpp
+++ b/intern/cycles/device/device_network.cpp
@@ -168,7 +168,9 @@ public:
 	               InterpolationType interpolation,
 	               ExtensionType extension)
 	{
-		VLOG(1) << "Texture allocate: " << name << ", " << mem.memory_size() << " bytes.";
+		VLOG(1) << "Texture allocate: " << name << ", "
+		        << string_human_readable_number(mem.memory_size()) << " bytes. ("
+		        << string_human_readable_size(mem.memory_size()) << ")";

 		thread_scoped_lock lock(rpc_lock);

--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -1187,7 +1187,9 @@ public:
 	               InterpolationType /*interpolation*/,
 	               ExtensionType /*extension*/)
 	{
-		VLOG(1) << "Texture allocate: " << name << ", " << mem.memory_size() << " bytes.";
+		VLOG(1) << "Texture allocate: " << name << ", "
+		        << string_human_readable_number(mem.memory_size()) << " bytes. ("
+		        << string_human_readable_size(mem.memory_size()) << ")";
 		mem_alloc(mem, MEM_READ_ONLY);
 		mem_copy_to(mem);
 		assert(mem_map.find(name) == mem_map.end());
@@ -1222,18 +1224,28 @@ public:
 			CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &workgroup_size, NULL);
 		clGetDeviceInfo(cdDevice,
 			CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t)*3, max_work_items, NULL);
-	
-		/* try to divide evenly over 2 dimensions */
+
+		/* Try to divide evenly over 2 dimensions. */
 		size_t sqrt_workgroup_size = max((size_t)sqrt((double)workgroup_size), 1);
 		size_t local_size[2] = {sqrt_workgroup_size, sqrt_workgroup_size};

-		/* some implementations have max size 1 on 2nd dimension */
+		/* Some implementations have max size 1 on 2nd dimension. */
 		if(local_size[1] > max_work_items[1]) {
 			local_size[0] = workgroup_size/max_work_items[1];
 			local_size[1] = max_work_items[1];
 		}

-		size_t global_size[2] = {global_size_round_up(local_size[0], w), global_size_round_up(local_size[1], h)};
+		size_t global_size[2] = {global_size_round_up(local_size[0], w),
+		                         global_size_round_up(local_size[1], h)};
+
+		/* Vertical size of 1 is coming from bake/shade kernels where we should
+		 * not round anything up because otherwise we'll either be doing too
+		 * much work per pixel (if we don't check global ID on Y axis) or will
+		 * be checking for global ID to always have Y of 0.
+		 */
+		if (h == 1) {
+			global_size[h] = 1;
+		}

 		/* run kernel */
 		opencl_assert(clEnqueueNDRangeKernel(cqCommandQueue, kernel, 2, NULL, global_size, NULL, 0, NULL, NULL));
@@ -1318,48 +1330,49 @@ public:
 		else
 			kernel = ckShaderKernel;

+		cl_uint start_arg_index =
+			kernel_set_args(kernel,
+			                0,
+			                d_data,
+			                d_input,
+			                d_output);
+
+		if(task.shader_eval_type < SHADER_EVAL_BAKE) {
+			start_arg_index += kernel_set_args(kernel,
+			                                   start_arg_index,
+			                                   d_output_luma);
+		}
+
+#define KERNEL_TEX(type, ttype, name) \
+		set_kernel_arg_mem(kernel, &start_arg_index, #name);
+#include "kernel_textures.h"
+#undef KERNEL_TEX
+
+		start_arg_index += kernel_set_args(kernel,
+		                                   start_arg_index,
+		                                   d_shader_eval_type);
+		if(task.shader_eval_type >= SHADER_EVAL_BAKE) {
+			start_arg_index += kernel_set_args(kernel,
+			                                   start_arg_index,
+			                                   d_shader_filter);
+		}
+		start_arg_index += kernel_set_args(kernel,
+		                                   start_arg_index,
+		                                   d_shader_x,
+		                                   d_shader_w,
+		                                   d_offset);
+
 		for(int sample = 0; sample < task.num_samples; sample++) {

 			if(task.get_cancel())
 				break;

-			cl_int d_sample = sample;
-
-			cl_uint start_arg_index =
-				kernel_set_args(kernel,
-				                0,
-				                d_data,
-				                d_input,
-				                d_output);
-
-			if(task.shader_eval_type < SHADER_EVAL_BAKE) {
-				start_arg_index += kernel_set_args(kernel,
-				                                   start_arg_index,
-				                                   d_output_luma);
-			}
-
-#define KERNEL_TEX(type, ttype, name) \
-			set_kernel_arg_mem(kernel, &start_arg_index, #name);
-#include "kernel_textures.h"
-#undef KERNEL_TEX
-
-			start_arg_index += kernel_set_args(kernel,
-			                                   start_arg_index,
-			                                   d_shader_eval_type);
-			if(task.shader_eval_type >= SHADER_EVAL_BAKE) {
-				start_arg_index += kernel_set_args(kernel,
-				                                   start_arg_index,
-				                                   d_shader_filter);
-			}
-			start_arg_index += kernel_set_args(kernel,
-			                                   start_arg_index,
-			                                   d_shader_x,
-			                                   d_shader_w,
-			                                   d_offset,
-			                                   d_sample);
+			kernel_set_args(kernel, start_arg_index, sample);

 			enqueue_kernel(kernel, task.shader_w, 1);

+			clFinish(cqCommandQueue);
+
 			task.update_progress(NULL);
 		}
 	}
--- a/intern/cycles/graph/CMakeLists.txt
+++ b/intern/cycles/graph/CMakeLists.txt
@@ -0,0 +1,24 @@
+
+set(INC
+	.
+	../util
+)
+
+set(SRC
+	node.cpp
+	node_type.cpp
+	node_xml.cpp
+)
+
+set(SRC_HEADERS
+	node.h
+	node_enum.h
+	node_type.h
+	node_xml.h
+)
+
+include_directories(${INC})
+include_directories(SYSTEM ${INC_SYS})
+
+add_library(cycles_graph ${SRC} ${SRC_HEADERS})
+
--- a/intern/cycles/graph/node.cpp
+++ b/intern/cycles/graph/node.cpp
@@ -0,0 +1,395 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "node.h"
+#include "node_type.h"
+
+#include "util_foreach.h"
+#include "util_param.h"
+#include "util_transform.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Node Type */
+
+Node::Node(const NodeType *type_, ustring name_)
+: name(name_), type(type_)
+{
+	assert(type);
+
+	/* assign non-empty name, convenient for debugging */
+	if(name.empty()) {
+		name = type->name;
+	}
+
+	/* initialize default values */
+	foreach(const SocketType& socket, type->inputs) {
+		set_default_value(socket);
+	}
+}
+
+Node::~Node()
+{
+}
+
+template<typename T>
+static T& get_socket_value(const Node *node, const SocketType& socket)
+{
+	return (T&)*(((char*)node) + socket.struct_offset);
+}
+
+#ifndef NDEBUG
+static bool is_socket_float3(const SocketType& socket)
+{
+	return socket.type == SocketType::COLOR ||
+	       socket.type == SocketType::POINT ||
+		   socket.type == SocketType::VECTOR ||
+		   socket.type == SocketType::NORMAL;
+}
+
+static bool is_socket_array_float3(const SocketType& socket)
+{
+	return socket.type == SocketType::COLOR_ARRAY ||
+	       socket.type == SocketType::POINT_ARRAY ||
+		   socket.type == SocketType::VECTOR_ARRAY ||
+		   socket.type == SocketType::NORMAL_ARRAY;
+}
+#endif
+
+/* set values */
+void Node::set(const SocketType& input, bool value)
+{
+	assert(input.type == SocketType::BOOLEAN);
+	get_socket_value<bool>(this, input) = value;
+}
+
+void Node::set(const SocketType& input, int value)
+{
+	assert((input.type == SocketType::INT || input.type == SocketType::ENUM));
+	get_socket_value<int>(this, input) = value;
+}
+
+void Node::set(const SocketType& input, float value)
+{
+	assert(input.type == SocketType::FLOAT);
+	get_socket_value<float>(this, input) = value;
+}
+
+void Node::set(const SocketType& input, float2 value)
+{
+	assert(input.type == SocketType::FLOAT);
+	get_socket_value<float2>(this, input) = value;
+}
+
+void Node::set(const SocketType& input, float3 value)
+{
+	assert(is_socket_float3(input));
+	get_socket_value<float3>(this, input) = value;
+}
+
+void Node::set(const SocketType& input, const char *value)
+{
+	set(input, ustring(value));
+}
+
+void Node::set(const SocketType& input, ustring value)
+{
+	if(input.type == SocketType::STRING) {
+		get_socket_value<ustring>(this, input) = value;
+	}
+	else if(input.type == SocketType::ENUM) {
+		const NodeEnum& enm = *input.enum_values;
+		if(enm.exists(value)) {
+			get_socket_value<int>(this, input) = enm[value];
+		}
+		else {
+			assert(0);
+		}
+	}
+	else {
+		assert(0);
+	}
+}
+
+void Node::set(const SocketType& input, const Transform& value)
+{
+	assert(input.type == SocketType::TRANSFORM);
+	get_socket_value<Transform>(this, input) = value;
+}
+
+void Node::set(const SocketType& input, Node *value)
+{
+	assert(input.type == SocketType::TRANSFORM);
+	get_socket_value<Node*>(this, input) = value;
+}
+
+/* set array values */
+void Node::set(const SocketType& input, array<bool>& value)
+{
+	assert(input.type == SocketType::BOOLEAN_ARRAY);
+	get_socket_value<array<bool> >(this, input).steal_data(value);
+}
+
+void Node::set(const SocketType& input, array<int>& value)
+{
+	assert(input.type == SocketType::INT_ARRAY);
+	get_socket_value<array<int> >(this, input).steal_data(value);
+}
+
+void Node::set(const SocketType& input, array<float>& value)
+{
+	assert(input.type == SocketType::FLOAT_ARRAY);
+	get_socket_value<array<float> >(this, input).steal_data(value);
+}
+
+void Node::set(const SocketType& input, array<float2>& value)
+{
+	assert(input.type == SocketType::FLOAT_ARRAY);
+	get_socket_value<array<float2> >(this, input).steal_data(value);
+}
+
+void Node::set(const SocketType& input, array<float3>& value)
+{
+	assert(is_socket_array_float3(input));
+	get_socket_value<array<float3> >(this, input).steal_data(value);
+}
+
+void Node::set(const SocketType& input, array<ustring>& value)
+{
+	assert(input.type == SocketType::STRING_ARRAY);
+	get_socket_value<array<ustring> >(this, input).steal_data(value);
+}
+
+void Node::set(const SocketType& input, array<Transform>& value)
+{
+	assert(input.type == SocketType::TRANSFORM_ARRAY);
+	get_socket_value<array<Transform> >(this, input).steal_data(value);
+}
+
+void Node::set(const SocketType& input, array<Node*>& value)
+{
+	assert(input.type == SocketType::TRANSFORM_ARRAY);
+	get_socket_value<array<Node*> >(this, input).steal_data(value);
+}
+
+/* get values */
+bool Node::get_bool(const SocketType& input) const
+{
+	assert(input.type == SocketType::BOOLEAN);
+	return get_socket_value<bool>(this, input);
+}
+
+int Node::get_int(const SocketType& input) const
+{
+	assert(input.type == SocketType::INT || input.type == SocketType::ENUM);
+	return get_socket_value<int>(this, input);
+}
+
+float Node::get_float(const SocketType& input) const
+{
+	assert(input.type == SocketType::FLOAT);
+	return get_socket_value<float>(this, input);
+}
+
+float2 Node::get_float2(const SocketType& input) const
+{
+	assert(input.type == SocketType::FLOAT);
+	return get_socket_value<float2>(this, input);
+}
+
+float3 Node::get_float3(const SocketType& input) const
+{
+	assert(is_socket_float3(input));
+	return get_socket_value<float3>(this, input);
+}
+
+ustring Node::get_string(const SocketType& input) const
+{
+	if(input.type == SocketType::STRING) {
+		return get_socket_value<ustring>(this, input);
+	}
+	else if(input.type == SocketType::ENUM) {
+		const NodeEnum& enm = *input.enum_values;
+		int intvalue = get_socket_value<int>(this, input);
+		return (enm.exists(intvalue)) ? enm[intvalue] : ustring();
+	}
+	else {
+		assert(0);
+		return ustring();
+	}
+}
+
+Transform Node::get_transform(const SocketType& input) const
+{
+	assert(input.type == SocketType::TRANSFORM);
+	return get_socket_value<Transform>(this, input);
+}
+
+Node *Node::get_node(const SocketType& input) const
+{
+	assert(input.type == SocketType::NODE);
+	return get_socket_value<Node*>(this, input);
+}
+
+/* get array values */
+const array<bool>& Node::get_bool_array(const SocketType& input) const
+{
+	assert(input.type == SocketType::BOOLEAN_ARRAY);
+	return get_socket_value<array<bool> >(this, input);
+}
+
+const array<int>& Node::get_int_array(const SocketType& input) const
+{
+	assert(input.type == SocketType::INT_ARRAY);
+	return get_socket_value<array<int> >(this, input);
+}
+
+const array<float>& Node::get_float_array(const SocketType& input) const
+{
+	assert(input.type == SocketType::FLOAT_ARRAY);
+	return get_socket_value<array<float> >(this, input);
+}
+
+const array<float2>& Node::get_float2_array(const SocketType& input) const
+{
+	assert(input.type == SocketType::FLOAT_ARRAY);
+	return get_socket_value<array<float2> >(this, input);
+}
+
+const array<float3>& Node::get_float3_array(const SocketType& input) const
+{
+	assert(is_socket_array_float3(input));
+	return get_socket_value<array<float3> >(this, input);
+}
+
+const array<ustring>& Node::get_string_array(const SocketType& input) const
+{
+	assert(input.type == SocketType::STRING_ARRAY);
+	return get_socket_value<array<ustring> >(this, input);
+}
+
+const array<Transform>& Node::get_transform_array(const SocketType& input) const
+{
+	assert(input.type == SocketType::TRANSFORM_ARRAY);
+	return get_socket_value<array<Transform> >(this, input);
+}
+
+const array<Node*>& Node::get_node_array(const SocketType& input) const
+{
+	assert(input.type == SocketType::NODE_ARRAY);
+	return get_socket_value<array<Node*> >(this, input);
+}
+
+/* generic value operations */
+
+bool Node::has_default_value(const SocketType& input) const
+{
+	const void *src = input.default_value;
+	void *dst = &get_socket_value<char>(this, input);
+	return memcmp(dst, src, input.size()) == 0;
+}
+
+void Node::set_default_value(const SocketType& socket)
+{
+	const void *src = socket.default_value;
+	void *dst = ((char*)this) + socket.struct_offset;
+	memcpy(dst, src, socket.size());
+}
+
+template<typename T>
+static void copy_array(const Node *node, const SocketType& socket, const Node *other, const SocketType& other_socket)
+{
+	const array<T>* src = (const array<T>*)(((char*)other) + other_socket.struct_offset);
+	array<T>* dst = (array<T>*)(((char*)node) + socket.struct_offset);
+	*dst = *src;
+}
+
+void Node::copy_value(const SocketType& socket, const Node& other, const SocketType& other_socket)
+{
+	assert(socket.type == other_socket.type);
+
+	if(socket.is_array()) {
+		switch(socket.type) {
+			case SocketType::BOOLEAN_ARRAY: copy_array<bool>(this, socket, &other, other_socket); break;
+			case SocketType::FLOAT_ARRAY: copy_array<float>(this, socket, &other, other_socket); break;
+			case SocketType::INT_ARRAY: copy_array<int>(this, socket, &other, other_socket); break;
+			case SocketType::COLOR_ARRAY: copy_array<float3>(this, socket, &other, other_socket); break;
+			case SocketType::VECTOR_ARRAY: copy_array<float3>(this, socket, &other, other_socket); break;
+			case SocketType::POINT_ARRAY: copy_array<float3>(this, socket, &other, other_socket); break;
+			case SocketType::NORMAL_ARRAY: copy_array<float3>(this, socket, &other, other_socket); break;
+			case SocketType::POINT2_ARRAY: copy_array<float2>(this, socket, &other, other_socket); break;
+			case SocketType::STRING_ARRAY: copy_array<ustring>(this, socket, &other, other_socket); break;
+			case SocketType::TRANSFORM_ARRAY: copy_array<Transform>(this, socket, &other, other_socket); break;
+			case SocketType::NODE_ARRAY: copy_array<void*>(this, socket, &other, other_socket); break;
+			default: assert(0); break;
+		}
+	}
+	else {
+		const void *src = ((char*)&other) + other_socket.struct_offset;
+		void *dst = ((char*)this) + socket.struct_offset;
+		memcpy(dst, src, socket.size());
+	}
+}
+
+template<typename T>
+static bool is_array_equal(const Node *node, const Node *other, const SocketType& socket)
+{
+	const array<T>* a = (const array<T>*)(((char*)node) + socket.struct_offset);
+	const array<T>* b = (const array<T>*)(((char*)other) + socket.struct_offset);
+	return *a == *b;
+}
+
+bool Node::equals_value(const Node& other, const SocketType& socket) const
+{
+	if(socket.is_array()) {
+		switch(socket.type) {
+			case SocketType::BOOLEAN_ARRAY: return is_array_equal<bool>(this, &other, socket);
+			case SocketType::FLOAT_ARRAY: return is_array_equal<float>(this, &other, socket);
+			case SocketType::INT_ARRAY: return is_array_equal<int>(this, &other, socket);
+			case SocketType::COLOR_ARRAY: return is_array_equal<float3>(this, &other, socket);
+			case SocketType::VECTOR_ARRAY: return is_array_equal<float3>(this, &other, socket);
+			case SocketType::POINT_ARRAY: return is_array_equal<float3>(this, &other, socket);
+			case SocketType::NORMAL_ARRAY: return is_array_equal<float3>(this, &other, socket);
+			case SocketType::POINT2_ARRAY: return is_array_equal<float2>(this, &other, socket);
+			case SocketType::STRING_ARRAY: return is_array_equal<ustring>(this, &other, socket);
+			case SocketType::TRANSFORM_ARRAY: return is_array_equal<Transform>(this, &other, socket);
+			case SocketType::NODE_ARRAY: return is_array_equal<void*>(this, &other, socket);
+			default: assert(0); return true;
+		}
+	}
+	else {
+		const void *a = ((char*)this) + socket.struct_offset;
+		const void *b = ((char*)&other) + socket.struct_offset;
+		return (memcmp(a, b, socket.size()) == 0);
+	}
+}
+
+/* equals */
+
+bool Node::equals(const Node& other) const
+{
+	assert(type == other.type);
+
+	foreach(const SocketType& socket, type->inputs) {
+		if(!equals_value(other, socket))
+			return false;
+	}
+
+	return true;
+}
+
+CCL_NAMESPACE_END
+
--- a/intern/cycles/graph/node.h
+++ b/intern/cycles/graph/node.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "node_type.h"
+
+#include "util_map.h"
+#include "util_param.h"
+#include "util_vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+struct Node;
+struct NodeType;
+struct Transform;
+
+/* Node */
+
+struct Node
+{
+	explicit Node(const NodeType *type, ustring name = ustring());
+	virtual ~Node();
+
+	/* set values */
+	void set(const SocketType& input, bool value);
+	void set(const SocketType& input, int value);
+	void set(const SocketType& input, float value);
+	void set(const SocketType& input, float2 value);
+	void set(const SocketType& input, float3 value);
+	void set(const SocketType& input, const char *value);
+	void set(const SocketType& input, ustring value);
+	void set(const SocketType& input, const Transform& value);
+	void set(const SocketType& input, Node *value);
+
+	/* set array values. the memory from the input array will taken over
+	 * by the node and the input array will be empty after return */
+	void set(const SocketType& input, array<bool>& value);
+	void set(const SocketType& input, array<int>& value);
+	void set(const SocketType& input, array<float>& value);
+	void set(const SocketType& input, array<float2>& value);
+	void set(const SocketType& input, array<float3>& value);
+	void set(const SocketType& input, array<ustring>& value);
+	void set(const SocketType& input, array<Transform>& value);
+	void set(const SocketType& input, array<Node*>& value);
+
+	/* get values */
+	bool get_bool(const SocketType& input) const;
+	int get_int(const SocketType& input) const;
+	float get_float(const SocketType& input) const;
+	float2 get_float2(const SocketType& input) const;
+	float3 get_float3(const SocketType& input) const;
+	ustring get_string(const SocketType& input) const;
+	Transform get_transform(const SocketType& input) const;
+	Node *get_node(const SocketType& input) const;
+
+	/* get array values */
+	const array<bool>& get_bool_array(const SocketType& input) const;
+	const array<int>& get_int_array(const SocketType& input) const;
+	const array<float>& get_float_array(const SocketType& input) const;
+	const array<float2>& get_float2_array(const SocketType& input) const;
+	const array<float3>& get_float3_array(const SocketType& input) const;
+	const array<ustring>& get_string_array(const SocketType& input) const;
+	const array<Transform>& get_transform_array(const SocketType& input) const;
+	const array<Node*>& get_node_array(const SocketType& input) const;
+
+	/* generic values operations */
+	bool has_default_value(const SocketType& input) const;
+	void set_default_value(const SocketType& input);
+	bool equals_value(const Node& other, const SocketType& input) const;
+	void copy_value(const SocketType& input, const Node& other, const SocketType& other_input);
+
+	/* equals */
+	bool equals(const Node& other) const;
+
+	ustring name;
+	const NodeType *type;
+};
+
+CCL_NAMESPACE_END
+
--- a/intern/cycles/graph/node_enum.h
+++ b/intern/cycles/graph/node_enum.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "util_map.h"
+#include "util_param.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Enum
+ *
+ * Utility class for enum values. */
+
+struct NodeEnum {
+	bool empty() const { return left.empty(); }
+	void insert(const char *x, int y) {
+		left[ustring(x)] = y;
+		right[y] = ustring(x);
+	}
+
+	bool exists(ustring x) const { return left.find(x) != left.end(); }
+	bool exists(int y) const { return right.find(y) != right.end(); }
+
+	int operator[](const char *x) const { return left.find(ustring(x))->second; }
+	int operator[](ustring x) const { return left.find(x)->second; }
+	ustring operator[](int y) const { return right.find(y)->second; }
+
+private:
+	unordered_map<ustring, int, ustringHash> left;
+	unordered_map<int, ustring> right;
+};
+
+CCL_NAMESPACE_END
+
--- a/intern/cycles/graph/node_type.cpp
+++ b/intern/cycles/graph/node_type.cpp
@@ -0,0 +1,218 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "node_type.h"
+#include "util_foreach.h"
+#include "util_transform.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Node Socket Type */
+
+size_t SocketType::size() const
+{
+	return size(type);
+}
+
+bool SocketType::is_array() const
+{
+	return (type >= BOOLEAN_ARRAY);
+}
+
+size_t SocketType::size(Type type)
+{
+	switch(type)
+	{
+		case UNDEFINED: return 0;
+
+		case BOOLEAN: return sizeof(bool);
+		case FLOAT: return sizeof(float);
+		case INT: return sizeof(int);
+		case COLOR: return sizeof(float3);
+		case VECTOR: return sizeof(float3);
+		case POINT: return sizeof(float3);
+		case NORMAL: return sizeof(float3);
+		case POINT2: return sizeof(float2);
+		case CLOSURE: return 0;
+		case STRING: return sizeof(ustring);
+		case ENUM: return sizeof(int);
+		case TRANSFORM: return sizeof(Transform);
+		case NODE: return sizeof(void*);
+
+		case BOOLEAN_ARRAY: return sizeof(array<bool>);
+		case FLOAT_ARRAY: return sizeof(array<float>);
+		case INT_ARRAY: return sizeof(array<int>);
+		case COLOR_ARRAY: return sizeof(array<float3>);
+		case VECTOR_ARRAY: return sizeof(array<float3>);
+		case POINT_ARRAY: return sizeof(array<float3>);
+		case NORMAL_ARRAY: return sizeof(array<float3>);
+		case POINT2_ARRAY: return sizeof(array<float2>);
+		case STRING_ARRAY: return sizeof(array<ustring>);
+		case TRANSFORM_ARRAY: return sizeof(array<Transform>);
+		case NODE_ARRAY: return sizeof(array<void*>);
+	}
+
+	assert(0);
+	return 0;
+}
+
+size_t SocketType::max_size()
+{
+	return sizeof(Transform);
+}
+
+void *SocketType::zero_default_value()
+{
+	static Transform zero_transform = {{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}};
+	return &zero_transform;
+}
+
+ustring SocketType::type_name(Type type)
+{
+	static ustring names[] = {
+		ustring("undefined"),
+
+		ustring("boolean"),
+		ustring("float"),
+		ustring("int"),
+		ustring("color"),
+		ustring("vector"),
+		ustring("point"),
+		ustring("normal"),
+		ustring("point2"),
+		ustring("closure"),
+		ustring("string"),
+		ustring("enum"),
+		ustring("transform"),
+		ustring("node"),
+
+		ustring("array_boolean"),
+		ustring("array_float"),
+		ustring("array_int"),
+		ustring("array_color"),
+		ustring("array_vector"),
+		ustring("array_point"),
+		ustring("array_normal"),
+		ustring("array_point2"),
+		ustring("array_string"),
+		ustring("array_transform"),
+		ustring("array_node")};
+
+	return names[(int)type];
+}
+
+bool SocketType::is_float3(Type type)
+{
+	return (type == COLOR || type == VECTOR || type == POINT || type == NORMAL);
+}
+
+/* Node Type */
+
+NodeType::NodeType(Type type_)
+: type(type_)
+{
+}
+
+NodeType::~NodeType()
+{
+}
+
+void NodeType::register_input(ustring name, ustring ui_name, SocketType::Type type, int struct_offset,
+                              const void *default_value, const NodeEnum *enum_values,
+							  const NodeType **node_type, int flags, int extra_flags)
+{
+	SocketType socket;
+	socket.name = name;
+	socket.ui_name = ui_name;
+	socket.type = type;
+	socket.struct_offset = struct_offset;
+	socket.default_value = default_value;
+	socket.enum_values = enum_values;
+	socket.node_type = node_type;
+	socket.flags = flags | extra_flags;
+	inputs.push_back(socket);
+}
+
+void NodeType::register_output(ustring name, ustring ui_name, SocketType::Type type)
+{
+	SocketType socket;
+	socket.name = name;
+	socket.ui_name = ui_name;
+	socket.type = type;
+	socket.struct_offset = 0;
+	socket.default_value = NULL;
+	socket.enum_values = NULL;
+	socket.node_type = NULL;
+	socket.flags = SocketType::LINKABLE;
+	outputs.push_back(socket);
+}
+
+const SocketType *NodeType::find_input(ustring name) const
+{
+	foreach(const SocketType& socket, inputs) {
+		if(socket.name == name) {
+			return &socket;
+		}
+	}
+
+	return NULL;
+}
+
+const SocketType *NodeType::find_output(ustring name) const
+{
+	foreach(const SocketType& socket, outputs) {
+		if(socket.name == name) {
+			return &socket;
+		}
+	}
+
+	return NULL;
+}
+
+/* Node Type Registry */
+
+unordered_map<ustring, NodeType, ustringHash>& NodeType::types()
+{
+	static unordered_map<ustring, NodeType, ustringHash> _types;
+	return _types;
+}
+
+NodeType *NodeType::add(const char *name_, CreateFunc create_, Type type_)
+{
+	ustring name(name_);
+
+	if(types().find(name) != types().end()) {
+		fprintf(stderr, "Node type %s registered twice!\n", name_);
+		assert(0);
+		return NULL;
+	}
+
+	types()[name] = NodeType(type_);
+
+	NodeType *type = &types()[name];
+	type->name = name;
+	type->create = create_;
+	return type;
+}
+
+const NodeType *NodeType::find(ustring name)
+{
+	unordered_map<ustring, NodeType, ustringHash>::iterator it = types().find(name);
+	return (it == types().end()) ? NULL : &it->second;
+}
+
+CCL_NAMESPACE_END
+
--- a/intern/cycles/graph/node_type.h
+++ b/intern/cycles/graph/node_type.h
@@ -0,0 +1,262 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "node_enum.h"
+
+#include "util_map.h"
+#include "util_param.h"
+#include "util_string.h"
+#include "util_vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+struct Node;
+struct NodeType;
+
+/* Socket Type */
+
+struct SocketType
+{
+	enum Type
+	{
+		UNDEFINED,
+
+		BOOLEAN,
+		FLOAT,
+		INT,
+		COLOR,
+		VECTOR,
+		POINT,
+		NORMAL,
+		POINT2,
+		CLOSURE,
+		STRING,
+		ENUM,
+		TRANSFORM,
+		NODE,
+
+		BOOLEAN_ARRAY,
+		FLOAT_ARRAY,
+		INT_ARRAY,
+		COLOR_ARRAY,
+		VECTOR_ARRAY,
+		POINT_ARRAY,
+		NORMAL_ARRAY,
+		POINT2_ARRAY,
+		STRING_ARRAY,
+		TRANSFORM_ARRAY,
+		NODE_ARRAY,
+	};
+
+	enum Flags {
+		LINKABLE               = (1 << 0),
+		ANIMATABLE             = (1 << 1),
+
+		SVM_INTERNAL           = (1 << 2),
+		OSL_INTERNAL           = (1 << 3),
+		INTERNAL               = (1 << 2) | (1 << 3),
+
+		LINK_TEXTURE_GENERATED = (1 << 4),
+		LINK_TEXTURE_UV        = (1 << 5),
+		LINK_INCOMING          = (1 << 6),
+		LINK_NORMAL            = (1 << 7),
+		LINK_POSITION          = (1 << 8),
+		LINK_TANGENT           = (1 << 9),
+		DEFAULT_LINK_MASK      = (1 << 4) | (1 << 5) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 9)
+	};
+
+	ustring name;
+	Type type;
+	int struct_offset;
+	const void *default_value;
+	const NodeEnum *enum_values;
+	const NodeType **node_type;
+	int flags;
+	ustring ui_name;
+
+	size_t size() const;
+	bool is_array() const;
+	static size_t size(Type type);
+	static size_t max_size();
+	static ustring type_name(Type type);
+	static void *zero_default_value();
+	static bool is_float3(Type type);
+};
+
+/* Node Type */
+
+struct NodeType
+{
+	enum Type {
+		NONE,
+		SHADER
+	};
+
+	explicit NodeType(Type type = NONE);
+	~NodeType();
+
+	void register_input(ustring name, ustring ui_name, SocketType::Type type,
+	                    int struct_offset, const void *default_value,
+						const NodeEnum *enum_values = NULL,
+						const NodeType **node_type = NULL,
+						int flags = 0, int extra_flags = 0);
+	void register_output(ustring name, ustring ui_name, SocketType::Type type);
+
+	const SocketType *find_input(ustring name) const;
+	const SocketType *find_output(ustring name) const;
+
+	typedef Node *(*CreateFunc)(const NodeType *type);
+
+	ustring name;
+	Type type;
+	std::vector<SocketType> inputs;
+	std::vector<SocketType> outputs;
+	CreateFunc create;
+
+	static NodeType *add(const char *name, CreateFunc create, Type type = NONE);
+	static const NodeType *find(ustring name);
+	static unordered_map<ustring, NodeType, ustringHash>& types();
+};
+
+/* Node Definition Macros */
+
+#define NODE_DECLARE                       \
+template<typename T>                       \
+static const NodeType *register_type();    \
+static Node *create(const NodeType *type); \
+static const NodeType *node_type;
+
+#define NODE_DEFINE(structname)                                                  \
+const NodeType *structname::node_type = structname::register_type<structname>(); \
+Node *structname::create(const NodeType*) { return new structname(); }           \
+template<typename T>                                                             \
+const NodeType *structname::register_type()
+
+/* Sock Definition Macros */
+
+#define SOCKET_OFFSETOF(T, name) (((char *)&(((T *)1)->name)) - (char *)1)
+#define SOCKET_SIZEOF(T, name) (sizeof(((T *)1)->name))
+#define SOCKET_DEFINE(name, ui_name, default_value, datatype, TYPE, flags, ...) \
+	{ \
+		static datatype defval = default_value; \
+		assert(SOCKET_SIZEOF(T, name) == sizeof(datatype)); \
+		type->register_input(ustring(#name), ustring(ui_name), TYPE, SOCKET_OFFSETOF(T, name), &defval, NULL, NULL, flags, ##__VA_ARGS__); \
+	}
+
+#define SOCKET_BOOLEAN(name, ui_name, default_value, ...) \
+	SOCKET_DEFINE(name, ui_name, default_value, bool, SocketType::BOOLEAN, 0, ##__VA_ARGS__)
+#define SOCKET_INT(name, ui_name, default_value, ...) \
+	SOCKET_DEFINE(name, ui_name, default_value, int, SocketType::INT, 0, ##__VA_ARGS__)
+#define SOCKET_FLOAT(name, ui_name, default_value, ...) \
+	SOCKET_DEFINE(name, ui_name, default_value, float, SocketType::FLOAT, 0, ##__VA_ARGS__)
+#define SOCKET_COLOR(name, ui_name, default_value, ...) \
+	SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::COLOR, 0, ##__VA_ARGS__)
+#define SOCKET_VECTOR(name, ui_name, default_value, ...) \
+	SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::VECTOR, 0, ##__VA_ARGS__)
+#define SOCKET_POINT(name, ui_name, default_value, ...) \
+	SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::POINT, 0, ##__VA_ARGS__)
+#define SOCKET_NORMAL(name, ui_name, default_value, ...) \
+	SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::NORMAL, 0, ##__VA_ARGS__)
+#define SOCKET_POINT2(name, ui_name, default_value, ...) \
+	SOCKET_DEFINE(name, ui_name, default_value, float2, SocketType::POINT2, 0, ##__VA_ARGS__)
+#define SOCKET_STRING(name, ui_name, default_value, ...) \
+	SOCKET_DEFINE(name, ui_name, default_value, ustring, SocketType::STRING, 0, ##__VA_ARGS__)
+#define SOCKET_TRANSFORM(name, ui_name, default_value, ...) \
+	SOCKET_DEFINE(name, ui_name, default_value, Transform, SocketType::TRANSFORM, 0, ##__VA_ARGS__)
+#define SOCKET_ENUM(name, ui_name, values, default_value, ...) \
+	{ \
+		static int defval = default_value; \
+		assert(SOCKET_SIZEOF(T, name) == sizeof(int)); \
+		type->register_input(ustring(#name), ustring(ui_name), SocketType::ENUM, SOCKET_OFFSETOF(T, name), &defval, &values, NULL, ##__VA_ARGS__); \
+	}
+#define SOCKET_NODE(name, ui_name, node_type, ...) \
+	{ \
+	    static Node *defval = NULL; \
+		assert(SOCKET_SIZEOF(T, name) == sizeof(Node*)); \
+		type->register_input(ustring(#name), ustring(ui_name), SocketType::NODE, SOCKET_OFFSETOF(T, name), &defval, NULL, node_type, ##__VA_ARGS__); \
+	}
+
+#define SOCKET_BOOLEAN_ARRAY(name, ui_name, default_value, ...) \
+	SOCKET_DEFINE(name, ui_name, default_value, array<bool>, SocketType::BOOLEAN_ARRAY, 0, ##__VA_ARGS__)
+#define SOCKET_INT_ARRAY(name, ui_name, default_value, ...) \
+	SOCKET_DEFINE(name, ui_name, default_value, array<int>, SocketType::INT_ARRAY, 0, ##__VA_ARGS__)
+#define SOCKET_FLOAT_ARRAY(name, ui_name, default_value, ...) \
+	SOCKET_DEFINE(name, ui_name, default_value, array<float>, SocketType::FLOAT_ARRAY, 0, ##__VA_ARGS__)
+#define SOCKET_COLOR_ARRAY(name, ui_name, default_value, ...) \
+	SOCKET_DEFINE(name, ui_name, default_value, array<float3>, SocketType::COLOR_ARRAY, 0, ##__VA_ARGS__)
+#define SOCKET_VECTOR_ARRAY(name, ui_name, default_value, ...) \
+	SOCKET_DEFINE(name, ui_name, default_value, array<float3>, SocketType::VECTOR_ARRAY, 0, ##__VA_ARGS__)
+#define SOCKET_POINT_ARRAY(name, ui_name, default_value, ...) \
+	SOCKET_DEFINE(name, ui_name, default_value, array<float3>, SocketType::POINT_ARRAY, 0, ##__VA_ARGS__)
+#define SOCKET_NORMAL_ARRAY(name, ui_name, default_value, ...) \
+	SOCKET_DEFINE(name, ui_name, default_value, array<float3>, SocketType::NORMAL_ARRAY, 0, ##__VA_ARGS__)
+#define SOCKET_POINT2_ARRAY(name, ui_name, default_value, ...) \
+	SOCKET_DEFINE(name, ui_name, default_value, array<float2>, SocketType::POINT2_ARRAY, 0, ##__VA_ARGS__)
+#define SOCKET_STRING_ARRAY(name, ui_name, default_value, ...) \
+	SOCKET_DEFINE(name, ui_name, default_value, array<ustring>, SocketType::STRING_ARRAY, 0, ##__VA_ARGS__)
+#define SOCKET_TRANSFORM_ARRAY(name, ui_name, default_value, ...) \
+	SOCKET_DEFINE(name, ui_name, default_value, array<Transform>, SocketType::TRANSFORM_ARRAY, 0, ##__VA_ARGS__)
+#define SOCKET_NODE_ARRAY(name, ui_name, node_type, ...) \
+	{ \
+	    static Node *defval = NULL; \
+		assert(SOCKET_SIZEOF(T, name) == sizeof(Node*)); \
+		type->register_input(ustring(#name), ustring(ui_name), SocketType::NODE_ARRAY, SOCKET_OFFSETOF(T, name), &defval, NULL, node_type, ##__VA_ARGS__); \
+	}
+
+#define SOCKET_IN_BOOLEAN(name, ui_name, default_value, ...) \
+	SOCKET_DEFINE(name, ui_name, default_value, bool, SocketType::BOOLEAN, SocketType::LINKABLE, ##__VA_ARGS__)
+#define SOCKET_IN_INT(name, ui_name, default_value, ...) \
+	SOCKET_DEFINE(name, ui_name, default_value, int, SocketType::INT, SocketType::LINKABLE, ##__VA_ARGS__)
+#define SOCKET_IN_FLOAT(name, ui_name, default_value, ...) \
+	SOCKET_DEFINE(name, ui_name, default_value, float, SocketType::FLOAT, SocketType::LINKABLE, ##__VA_ARGS__)
+#define SOCKET_IN_COLOR(name, ui_name, default_value, ...) \
+	SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::COLOR, SocketType::LINKABLE, ##__VA_ARGS__)
+#define SOCKET_IN_VECTOR(name, ui_name, default_value, ...) \
+	SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::VECTOR, SocketType::LINKABLE, ##__VA_ARGS__)
+#define SOCKET_IN_POINT(name, ui_name, default_value, ...) \
+	SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::POINT, SocketType::LINKABLE, ##__VA_ARGS__)
+#define SOCKET_IN_NORMAL(name, ui_name, default_value, ...) \
+	SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::NORMAL, SocketType::LINKABLE, ##__VA_ARGS__)
+#define SOCKET_IN_STRING(name, ui_name, default_value, ...) \
+	SOCKET_DEFINE(name, ui_name, default_value, ustring, SocketType::STRING, SocketType::LINKABLE, ##__VA_ARGS__)
+#define SOCKET_IN_CLOSURE(name, ui_name, ...) \
+	type->register_input(ustring(#name), ustring(ui_name), SocketType::CLOSURE, 0, NULL, NULL, NULL, SocketType::LINKABLE, ##__VA_ARGS__)
+
+#define SOCKET_OUT_BOOLEAN(name, ui_name) \
+	{ type->register_output(ustring(#name), ustring(ui_name), SocketType::BOOLEAN); }
+#define SOCKET_OUT_INT(name, ui_name) \
+	{ type->register_output(ustring(#name), ustring(ui_name), SocketType::INT); }
+#define SOCKET_OUT_FLOAT(name, ui_name) \
+	{ type->register_output(ustring(#name), ustring(ui_name), SocketType::FLOAT); }
+#define SOCKET_OUT_COLOR(name, ui_name) \
+	{ type->register_output(ustring(#name), ustring(ui_name), SocketType::COLOR); }
+#define SOCKET_OUT_VECTOR(name, ui_name) \
+	{ type->register_output(ustring(#name), ustring(ui_name), SocketType::VECTOR); }
+#define SOCKET_OUT_POINT(name, ui_name) \
+	{ type->register_output(ustring(#name), ustring(ui_name), SocketType::POINT); }
+#define SOCKET_OUT_NORMAL(name, ui_name) \
+	{ type->register_output(ustring(#name), ustring(ui_name), SocketType::NORMAL); }
+#define SOCKET_OUT_CLOSURE(name, ui_name) \
+	{ type->register_output(ustring(#name), ustring(ui_name), SocketType::CLOSURE); }
+#define SOCKET_OUT_STRING(name, ui_name) \
+	{ type->register_output(ustring(#name), ustring(ui_name), SocketType::STRING); }
+#define SOCKET_OUT_ENUM(name, ui_name) \
+	{ type->register_output(ustring(#name), ustring(ui_name), SocketType::ENUM); }
+
+CCL_NAMESPACE_END
+
--- a/intern/cycles/graph/node_xml.cpp
+++ b/intern/cycles/graph/node_xml.cpp
@@ -0,0 +1,452 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "node_xml.h"
+
+#include "util_foreach.h"
+#include "util_string.h"
+#include "util_transform.h"
+
+CCL_NAMESPACE_BEGIN
+
+static bool xml_read_boolean(const char *value)
+{
+	return string_iequals(value, "true") || (atoi(value) != 0);
+}
+
+static const char *xml_write_boolean(bool value)
+{
+	return (value) ? "true" : "false";
+}
+
+template<int VECTOR_SIZE, typename T>
+static void xml_read_float_array(T& value, pugi::xml_attribute attr)
+{
+	vector<string> tokens;
+	string_split(tokens, attr.value());
+
+	if(tokens.size() % VECTOR_SIZE != 0) {
+		return;
+	}
+
+	value.resize(tokens.size() / VECTOR_SIZE);
+	for(size_t i = 0; i < value.size(); i++) {
+		float *value_float = (float*)&value[i];
+
+		for(size_t j = 0; j < VECTOR_SIZE; j++)
+			value_float[j] = (float)atof(tokens[i * VECTOR_SIZE + j].c_str());
+	}
+}
+
+void xml_read_node(XMLReader& reader, Node *node, pugi::xml_node xml_node)
+{
+	pugi::xml_attribute name_attr = xml_node.attribute("name");
+	if(name_attr) {
+		node->name = ustring(name_attr.value());
+	}
+
+	foreach(const SocketType& socket, node->type->inputs) {
+		if(socket.type == SocketType::CLOSURE || socket.type == SocketType::UNDEFINED) {
+			continue;
+		}
+		if(socket.flags & SocketType::INTERNAL) {
+			continue;
+		}
+
+		pugi::xml_attribute attr = xml_node.attribute(socket.name.c_str());
+
+		if(!attr) {
+			continue;
+		}
+
+		switch(socket.type)
+		{
+			case SocketType::BOOLEAN:
+			{
+				node->set(socket, xml_read_boolean(attr.value()));
+				break;
+			}
+			case SocketType::BOOLEAN_ARRAY:
+			{
+				vector<string> tokens;
+				string_split(tokens, attr.value());
+
+				array<bool> value;
+				value.resize(tokens.size());
+				for(size_t i = 0; i < value.size(); i++)
+					value[i] = xml_read_boolean(tokens[i].c_str());
+				node->set(socket, value);
+				break;
+			}
+			case SocketType::FLOAT:
+			{
+				node->set(socket, (float)atof(attr.value()));
+				break;
+			}
+			case SocketType::FLOAT_ARRAY:
+			{
+				array<float> value;
+				xml_read_float_array<1>(value, attr);
+				node->set(socket, value);
+				break;
+			}
+			case SocketType::INT:
+			{
+				node->set(socket, (int)atoi(attr.value()));
+				break;
+			}
+			case SocketType::INT_ARRAY:
+			{
+				vector<string> tokens;
+				string_split(tokens, attr.value());
+
+				array<int> value;
+				value.resize(tokens.size());
+				for(size_t i = 0; i < value.size(); i++) {
+					value[i] = (int)atoi(attr.value());
+				}
+				node->set(socket, value);
+				break;
+			}
+			case SocketType::COLOR:
+			case SocketType::VECTOR:
+			case SocketType::POINT:
+			case SocketType::NORMAL:
+			{
+				array<float3> value;
+				xml_read_float_array<3>(value, attr);
+				if(value.size() == 1) {
+					node->set(socket, value[0]);
+				}
+				break;
+			}
+			case SocketType::COLOR_ARRAY:
+			case SocketType::VECTOR_ARRAY:
+			case SocketType::POINT_ARRAY:
+			case SocketType::NORMAL_ARRAY:
+			{
+				array<float3> value;
+				xml_read_float_array<3>(value, attr);
+				node->set(socket, value);
+				break;
+			}
+			case SocketType::POINT2:
+			{
+				array<float2> value;
+				xml_read_float_array<2>(value, attr);
+				if(value.size() == 1) {
+					node->set(socket, value[0]);
+				}
+				break;
+			}
+			case SocketType::POINT2_ARRAY:
+			{
+				array<float2> value;
+				xml_read_float_array<2>(value, attr);
+				node->set(socket, value);
+				break;
+			}
+			case SocketType::STRING:
+			{
+				node->set(socket, attr.value());
+				break;
+			}
+			case SocketType::ENUM:
+			{
+				ustring value(attr.value());
+				if(socket.enum_values->exists(value)) {
+					node->set(socket, value);
+				}
+				else {
+					fprintf(stderr, "Unknown value \"%s\" for attribute \"%s\".\n", value.c_str(), socket.name.c_str());
+				}
+				break;
+			}
+			case SocketType::STRING_ARRAY:
+			{
+				vector<string> tokens;
+				string_split(tokens, attr.value());
+
+				array<ustring> value;
+				value.resize(tokens.size());
+				for(size_t i = 0; i < value.size(); i++) {
+					value[i] = ustring(tokens[i]);
+				}
+				node->set(socket, value);
+				break;
+			}
+			case SocketType::TRANSFORM:
+			{
+				array<Transform> value;
+				xml_read_float_array<16>(value, attr);
+				if(value.size() == 1) {
+					node->set(socket, value[0]);
+				}
+				break;
+			}
+			case SocketType::TRANSFORM_ARRAY:
+			{
+				array<Transform> value;
+				xml_read_float_array<16>(value, attr);
+				node->set(socket, value);
+				break;
+			}
+			case SocketType::NODE:
+			{
+				ustring value(attr.value());
+				map<ustring, Node*>::iterator it = reader.node_map.find(value);
+				if(it != reader.node_map.end())
+				{
+					Node *value_node = it->second;
+					if(value_node->type == *(socket.node_type))
+						node->set(socket, it->second);
+				}
+				break;
+			}
+			case SocketType::NODE_ARRAY:
+			{
+				vector<string> tokens;
+				string_split(tokens, attr.value());
+
+				array<Node*> value;
+				value.resize(tokens.size());
+				for(size_t i = 0; i < value.size(); i++)
+				{
+					map<ustring, Node*>::iterator it = reader.node_map.find(ustring(tokens[i]));
+					if(it != reader.node_map.end())
+					{
+						Node *value_node = it->second;
+						value[i] = (value_node->type == *(socket.node_type)) ? value_node : NULL;
+					}
+					else
+					{
+						value[i] = NULL;
+					}
+				}
+				node->set(socket, value);
+				break;
+			}
+			case SocketType::CLOSURE:
+			case SocketType::UNDEFINED:
+				break;
+		}
+	}
+
+	if(node->name)
+		reader.node_map[node->name] = node;
+}
+
+pugi::xml_node xml_write_node(Node *node, pugi::xml_node xml_root)
+{
+	pugi::xml_node xml_node = xml_root.append_child(node->type->name.c_str());
+
+	xml_node.append_attribute("name") = node->name.c_str();
+
+	foreach(const SocketType& socket, node->type->inputs) {
+		if(socket.type == SocketType::CLOSURE || socket.type == SocketType::UNDEFINED) {
+			continue;
+		}
+		if(socket.flags & SocketType::INTERNAL) {
+			continue;
+		}
+		if(node->has_default_value(socket)) {
+			continue;
+		}
+
+		pugi::xml_attribute attr = xml_node.append_attribute(socket.name.c_str());
+
+		switch(socket.type)
+		{
+			case SocketType::BOOLEAN:
+			{
+				attr = xml_write_boolean(node->get_bool(socket));
+				break;
+			}
+			case SocketType::BOOLEAN_ARRAY:
+			{
+				std::stringstream ss;
+				const array<bool>& value = node->get_bool_array(socket);
+				for(size_t i = 0; i < value.size(); i++) {
+					ss << xml_write_boolean(value[i]);
+					if(i != value.size() - 1)
+						ss << " ";
+				}
+				attr = ss.str().c_str();
+				break;
+			}
+			case SocketType::FLOAT:
+			{
+				attr = (double)node->get_float(socket);
+				break;
+			}
+			case SocketType::FLOAT_ARRAY:
+			{
+				std::stringstream ss;
+				const array<float>& value = node->get_float_array(socket);
+				for(size_t i = 0; i < value.size(); i++) {
+					ss << value[i];
+					if(i != value.size() - 1) {
+						ss << " ";
+					}
+				}
+				attr = ss.str().c_str();
+				break;
+			}
+			case SocketType::INT:
+			{
+				attr = node->get_int(socket);
+				break;
+			}
+			case SocketType::INT_ARRAY:
+			{
+				std::stringstream ss;
+				const array<int>& value = node->get_int_array(socket);
+				for(size_t i = 0; i < value.size(); i++) {
+					ss << value[i];
+					if(i != value.size() - 1) {
+						ss << " ";
+					}
+				}
+				attr = ss.str().c_str();
+				break;
+			}
+			case SocketType::COLOR:
+			case SocketType::VECTOR:
+			case SocketType::POINT:
+			case SocketType::NORMAL:
+			{
+				float3 value = node->get_float3(socket);
+				attr = string_printf("%g %g %g", (double)value.x, (double)value.y, (double)value.z).c_str();
+				break;
+			}
+			case SocketType::COLOR_ARRAY:
+			case SocketType::VECTOR_ARRAY:
+			case SocketType::POINT_ARRAY:
+			case SocketType::NORMAL_ARRAY:
+			{
+				std::stringstream ss;
+				const array<float3>& value = node->get_float3_array(socket);
+				for(size_t i = 0; i < value.size(); i++) {
+					ss << string_printf("%g %g %g", (double)value[i].x, (double)value[i].y, (double)value[i].z);
+					if(i != value.size() - 1) {
+						ss << " ";
+					}
+				}
+				attr = ss.str().c_str();
+				break;
+			}
+			case SocketType::POINT2:
+			{
+				float2 value = node->get_float2(socket);
+				attr = string_printf("%g %g", (double)value.x, (double)value.y).c_str();
+				break;
+			}
+			case SocketType::POINT2_ARRAY:
+			{
+				std::stringstream ss;
+				const array<float2>& value = node->get_float2_array(socket);
+				for(size_t i = 0; i < value.size(); i++) {
+					ss << string_printf("%g %g", (double)value[i].x, (double)value[i].y);
+					if(i != value.size() - 1) {
+						ss << " ";
+					}
+				}
+				attr = ss.str().c_str();
+				break;
+			}
+			case SocketType::STRING:
+			case SocketType::ENUM:
+			{
+				attr = node->get_string(socket).c_str();
+				break;
+			}
+			case SocketType::STRING_ARRAY:
+			{
+				std::stringstream ss;
+				const array<ustring>& value = node->get_string_array(socket);
+				for(size_t i = 0; i < value.size(); i++) {
+					ss << value[i];
+					if(i != value.size() - 1) {
+						ss << " ";
+					}
+				}
+				attr = ss.str().c_str();
+				break;
+			}
+			case SocketType::TRANSFORM:
+			{
+				Transform tfm = node->get_transform(socket);
+				std::stringstream ss;
+				for(int i = 0; i < 4; i++) {
+					ss << string_printf("%g %g %g %g", (double)tfm[i][0], (double)tfm[i][1], (double)tfm[i][2], (double)tfm[i][3]);
+					if(i != 3) {
+						ss << " ";
+					}
+				}
+				attr = ss.str().c_str();
+				break;
+			}
+			case SocketType::TRANSFORM_ARRAY:
+			{
+				std::stringstream ss;
+				const array<Transform>& value = node->get_transform_array(socket);
+				for(size_t j = 0; j < value.size(); j++) {
+					const Transform& tfm = value[j];
+
+					for(int i = 0; i < 4; i++) {
+						ss << string_printf("%g %g %g %g", (double)tfm[i][0], (double)tfm[i][1], (double)tfm[i][2], (double)tfm[i][3]);
+						if(j != value.size() - 1 || i != 3) {
+							ss << " ";
+						}
+					}
+				}
+				attr = ss.str().c_str();
+				break;
+			}
+			case SocketType::NODE:
+			{
+				Node *value = node->get_node(socket);
+				if(value) {
+					attr = value->name.c_str();
+				}
+				break;
+			}
+			case SocketType::NODE_ARRAY:
+			{
+				std::stringstream ss;
+				const array<Node*>& value = node->get_node_array(socket);
+				for(size_t i = 0; i < value.size(); i++) {
+					if(value[i]) {
+						ss << value[i]->name.c_str();
+					}
+					if(i != value.size() - 1) {
+						ss << " ";
+					}
+				}
+				attr = ss.str().c_str();
+				break;
+			}
+			case SocketType::CLOSURE:
+			case SocketType::UNDEFINED:
+				break;
+		}
+	}
+
+	return xml_node;
+}
+
+CCL_NAMESPACE_END
+
--- a/intern/cycles/graph/node_xml.h
+++ b/intern/cycles/graph/node_xml.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "node.h"
+
+#include "util_map.h"
+#include "util_string.h"
+#include "util_xml.h"
+
+CCL_NAMESPACE_BEGIN
+
+struct XMLReader {
+	map<ustring, Node*> node_map;
+};
+
+void xml_read_node(XMLReader& reader, Node *node, pugi::xml_node xml_node);
+pugi::xml_node xml_write_node(Node *node, pugi::xml_node xml_root);
+
+CCL_NAMESPACE_END
+
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -67,6 +67,7 @@ set(SRC_KERNELS_CPU_HEADERS
 	kernel.h
 	kernels/cpu/kernel_cpu.h
 	kernels/cpu/kernel_cpu_impl.h
+	kernels/cpu/kernel_cpu_image.h
 )

 set(SRC_CLOSURE_HEADERS
@@ -233,6 +234,7 @@ if(WITH_CYCLES_CUDA_BINARIES)
 			OUTPUT ${cuda_cubin}
 			COMMAND ${CUDA_NVCC_EXECUTABLE}
 					-arch=${arch}
+					${CUDA_NVCC_FLAGS}
 					-m${CUDA_BITS}
 					--cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda/kernel.cu
 					-o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin}
--- a/intern/cycles/kernel/geom/geom_bvh.h
+++ b/intern/cycles/kernel/geom/geom_bvh.h
@@ -48,6 +48,28 @@ CCL_NAMESPACE_BEGIN

 #define BVH_FEATURE(f) (((BVH_FUNCTION_FEATURES) & (f)) != 0)

+/* Debugging heleprs */
+#ifdef __KERNEL_DEBUG__
+#  define BVH_DEBUG_INIT() \
+	do { \
+		isect->num_traversal_steps = 0; \
+		isect->num_traversed_instances = 0; \
+	} while(0)
+#  define BVH_DEBUG_NEXT_STEP() \
+	do { \
+		++isect->num_traversal_steps; \
+	} while(0)
+#  define BVH_DEBUG_NEXT_INSTANCE() \
+	do { \
+		++isect->num_traversed_instances; \
+	} while(0)
+#else  /* __KERNEL_DEBUG__ */
+#  define BVH_DEBUG_INIT()
+#  define BVH_DEBUG_NEXT_STEP()
+#  define BVH_DEBUG_NEXT_INSTANCE()
+#endif  /* __KERNEL_DEBUG__ */
+
+
 /* Common QBVH functions. */
 #ifdef __QBVH__
 #  include "geom_qbvh.h"
--- a/intern/cycles/kernel/geom/geom_bvh_traversal.h
+++ b/intern/cycles/kernel/geom/geom_bvh_traversal.h
@@ -74,10 +74,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 	isect->prim = PRIM_NONE;
 	isect->object = OBJECT_NONE;

-#if defined(__KERNEL_DEBUG__)
-	isect->num_traversal_steps = 0;
-	isect->num_traversed_instances = 0;
-#endif
+	BVH_DEBUG_INIT();

 #if defined(__KERNEL_SSE2__)
 	const shuffle_swap_t shuf_identity = shuffle_swap_identity();
@@ -241,10 +238,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 						--stackPtr;
 					}
 				}
-
-#if defined(__KERNEL_DEBUG__)
-				isect->num_traversal_steps++;
-#endif
+				BVH_DEBUG_NEXT_STEP();
 			}

 			/* if node is leaf, fetch triangle list */
@@ -266,9 +260,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 					switch(type & PRIMITIVE_ALL) {
 						case PRIMITIVE_TRIANGLE: {
 							for(; primAddr < primAddr2; primAddr++) {
-#if defined(__KERNEL_DEBUG__)
-								isect->num_traversal_steps++;
-#endif
+								BVH_DEBUG_NEXT_STEP();
 								kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
 								if(triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr)) {
 									/* shadow ray early termination */
@@ -287,9 +279,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 #if BVH_FEATURE(BVH_MOTION)
 						case PRIMITIVE_MOTION_TRIANGLE: {
 							for(; primAddr < primAddr2; primAddr++) {
-#  if defined(__KERNEL_DEBUG__)
-								isect->num_traversal_steps++;
-#  endif
+								BVH_DEBUG_NEXT_STEP();
 								kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
 								if(motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr)) {
 									/* shadow ray early termination */
@@ -310,9 +300,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 						case PRIMITIVE_CURVE:
 						case PRIMITIVE_MOTION_CURVE: {
 							for(; primAddr < primAddr2; primAddr++) {
-#  if defined(__KERNEL_DEBUG__)
-								isect->num_traversal_steps++;
-#  endif
+								BVH_DEBUG_NEXT_STEP();
 								kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
 								bool hit;
 								if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
@@ -364,9 +352,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,

 					nodeAddr = kernel_tex_fetch(__object_node, object);

-#  if defined(__KERNEL_DEBUG__)
-					isect->num_traversed_instances++;
-#  endif
+					BVH_DEBUG_NEXT_INSTANCE();
 				}
 			}
 #endif  /* FEATURE(BVH_INSTANCING) */
--- a/intern/cycles/kernel/geom/geom_qbvh_traversal.h
+++ b/intern/cycles/kernel/geom/geom_qbvh_traversal.h
@@ -78,10 +78,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 	isect->prim = PRIM_NONE;
 	isect->object = OBJECT_NONE;

-#if defined(__KERNEL_DEBUG__)
-	isect->num_traversal_steps = 0;
-	isect->num_traversed_instances = 0;
-#endif
+	BVH_DEBUG_INIT();

 	ssef tnear(0.0f), tfar(ray->t);
 	sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
@@ -120,9 +117,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 				int traverseChild;
 				ssef dist;

-#if defined(__KERNEL_DEBUG__)
-				isect->num_traversal_steps++;
-#endif
+				BVH_DEBUG_NEXT_STEP();

 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
 				if(difl != 0.0f) {
@@ -295,9 +290,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 					switch(type & PRIMITIVE_ALL) {
 						case PRIMITIVE_TRIANGLE: {
 							for(; primAddr < primAddr2; primAddr++) {
-#if defined(__KERNEL_DEBUG__)
-								isect->num_traversal_steps++;
-#endif
+								BVH_DEBUG_NEXT_STEP();
 								kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
 								if(triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr)) {
 									tfar = ssef(isect->t);
@@ -311,9 +304,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 #if BVH_FEATURE(BVH_MOTION)
 						case PRIMITIVE_MOTION_TRIANGLE: {
 							for(; primAddr < primAddr2; primAddr++) {
-#  if defined(__KERNEL_DEBUG__)
-								isect->num_traversal_steps++;
-#  endif
+								BVH_DEBUG_NEXT_STEP();
 								kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
 								if(motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr)) {
 									tfar = ssef(isect->t);
@@ -329,9 +320,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 						case PRIMITIVE_CURVE:
 						case PRIMITIVE_MOTION_CURVE: {
 							for(; primAddr < primAddr2; primAddr++) {
-#  if defined(__KERNEL_DEBUG__)
-								isect->num_traversal_steps++;
-#  endif
+								BVH_DEBUG_NEXT_STEP();
 								kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
 								bool hit;
 								if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
@@ -381,9 +370,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,

 					nodeAddr = kernel_tex_fetch(__object_node, object);

-#  if defined(__KERNEL_DEBUG__)
-					isect->num_traversed_instances++;
-#  endif
+					BVH_DEBUG_NEXT_INSTANCE();
 				}
 			}
 #endif  /* FEATURE(BVH_INSTANCING) */
--- a/intern/cycles/kernel/geom/geom_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h
@@ -159,16 +159,11 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
 	if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility)
 #endif
 	{
-#ifdef __KERNEL_GPU__
-		float4 a = tri_b - tri_a, b = tri_c - tri_a;
-		if(len_squared(make_float3(a.y*b.z - a.z*b.y,
-		                           a.z*b.x - a.x*b.z,
-		                           a.x*b.y - a.y*b.x)) == 0.0f)
-		{
+#ifdef __KERNEL_CUDA__
+		if(A == B && B == C) {
 			return false;
 		}
 #endif
-
 		/* Normalize U, V, W, and T. */
 		const float inv_det = 1.0f / det;
 		isect->prim = triAddr;
--- a/intern/cycles/kernel/geom/geom_volume.h
+++ b/intern/cycles/kernel/geom/geom_volume.h
@@ -29,7 +29,7 @@ CCL_NAMESPACE_BEGIN

 /* Return position normalized to 0..1 in mesh bounds */

-#ifdef __KERNEL_GPU__
+#if defined(__KERNEL_GPU__) && __CUDA_ARCH__ < 300
 ccl_device float4 volume_image_texture_3d(int id, float x, float y, float z)
 {
 	float4 r;
@@ -65,7 +65,13 @@ ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd,
 {
 	float3 P = volume_normalized_position(kg, sd, sd->P);
 #ifdef __KERNEL_GPU__
+#  if __CUDA_ARCH__ >= 300
+	CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id);
+	float f = kernel_tex_image_interp_3d_float(tex, P.x, P.y, P.z);
+	float4 r = make_float4(f, f, f, 1.0);
+#  else
 	float4 r = volume_image_texture_3d(id, P.x, P.y, P.z);
+#  endif
 #else
 	float4 r;
 	if(sd->flag & SD_VOLUME_CUBIC)
@@ -84,7 +90,12 @@ ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *s
 {
 	float3 P = volume_normalized_position(kg, sd, sd->P);
 #ifdef __KERNEL_GPU__
+#  if __CUDA_ARCH__ >= 300
+	CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id);
+	float4 r = kernel_tex_image_interp_3d_float4(tex, P.x, P.y, P.z);
+#  else
 	float4 r = volume_image_texture_3d(id, P.x, P.y, P.z);
+#  endif
 #else
 	float4 r;
 	if(sd->flag & SD_VOLUME_CUBIC)
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -30,6 +30,9 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
 	Ray ray;
 	float3 throughput = make_float3(1.0f, 1.0f, 1.0f);

+	/* emission and indirect shader data memory used by various functions */
+	ShaderData emission_sd, indirect_sd;
+
 	ray.P = sd->P + sd->Ng;
 	ray.D = -sd->Ng;
 	ray.t = FLT_MAX;
@@ -41,7 +44,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
 	path_radiance_init(&L_sample, kernel_data.film.use_light_pass);

 	/* init path state */
-	path_state_init(kg, &state, &rng, sample, NULL);
+	path_state_init(kg, &emission_sd, &state, &rng, sample, NULL);

 	/* evaluate surface shader */
 	float rbsdf = path_state_rng_1D(kg, &rng, &state, PRNG_BSDF);
@@ -56,7 +59,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian

 		/* sample ambient occlusion */
 		if(pass_filter & BAKE_FILTER_AO) {
-			kernel_path_ao(kg, sd, &L_sample, &state, &rng, throughput);
+			kernel_path_ao(kg, sd, &emission_sd, &L_sample, &state, &rng, throughput);
 		}

 		/* sample emission */
@@ -75,6 +78,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
 			kernel_path_subsurface_init_indirect(&ss_indirect);
 			if(kernel_path_subsurface_scatter(kg,
 			                                  sd,
+			                                  &emission_sd,
 			                                  &L_sample,
 			                                  &state,
 			                                  &rng,
@@ -90,6 +94,8 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
 					                                      &L_sample,
 					                                      &throughput);
 					kernel_path_indirect(kg,
+					                     &indirect_sd,
+					                     &emission_sd,
 					                     &rng,
 					                     &ray,
 					                     throughput,
@@ -105,14 +111,14 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian

 		/* sample light and BSDF */
 		if(!is_sss_sample && (pass_filter & (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT))) {
-			kernel_path_surface_connect_light(kg, &rng, sd, throughput, &state, &L_sample);
+			kernel_path_surface_connect_light(kg, &rng, sd, &emission_sd, throughput, &state, &L_sample);

 			if(kernel_path_surface_bounce(kg, &rng, sd, &throughput, &state, &L_sample, &ray)) {
 #ifdef __LAMP_MIS__
 				state.ray_t = 0.0f;
 #endif
 				/* compute indirect light */
-				kernel_path_indirect(kg, &rng, &ray, throughput, 1, &state, &L_sample);
+				kernel_path_indirect(kg, &indirect_sd, &emission_sd, &rng, &ray, throughput, 1, &state, &L_sample);

 				/* sum and reset indirect light pass variables for the next samples */
 				path_radiance_sum_indirect(&L_sample);
@@ -126,7 +132,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian

 		/* sample ambient occlusion */
 		if(pass_filter & BAKE_FILTER_AO) {
-			kernel_branched_path_ao(kg, sd, &L_sample, &state, &rng, throughput);
+			kernel_branched_path_ao(kg, sd, &emission_sd, &L_sample, &state, &rng, throughput);
 		}

 		/* sample emission */
@@ -139,7 +145,8 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
 		/* sample subsurface scattering */
 		if((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) {
 			/* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
-			kernel_branched_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, &ray, throughput);
+			kernel_branched_path_subsurface_scatter(kg, sd, &indirect_sd,
+				&emission_sd, &L_sample, &state, &rng, &ray, throughput);
 		}
 #endif

@@ -150,13 +157,13 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
 			if(kernel_data.integrator.use_direct_light) {
 				int all = kernel_data.integrator.sample_all_lights_direct;
 				kernel_branched_path_surface_connect_light(kg, &rng,
-					sd, &state, throughput, 1.0f, &L_sample, all);
+					sd, &emission_sd, &state, throughput, 1.0f, &L_sample, all);
 			}
 #endif

 			/* indirect light */
 			kernel_branched_path_surface_indirect_light(kg, &rng,
-				sd, throughput, 1.0f, &state, &L_sample);
+				sd, &indirect_sd, &emission_sd, throughput, 1.0f, &state, &L_sample);
 		}
 	}
 #endif
@@ -242,11 +249,11 @@ ccl_device float3 kernel_bake_evaluate_direct_indirect(KernelGlobals *kg,
 	}

 	if(is_direct) {
-		out += safe_divide_color(direct, color);
+		out += safe_divide_even_color(direct, color);
 	}

 	if(is_indirect) {
-		out += safe_divide_color(indirect, color);
+		out += safe_divide_even_color(indirect, color);
 	}

 	return out;
@@ -475,12 +482,10 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
 	}

 	/* write output */
-	float output_fac = is_aa_pass(type)? 1.0f/num_samples: 1.0f;
+	const float output_fac = is_aa_pass(type)? 1.0f/num_samples: 1.0f;
+	const float4 scaled_result = make_float4(out.x, out.y, out.z, 1.0f) * output_fac;

-	if(sample == 0)
-		output[i] = make_float4(out.x, out.y, out.z, 1.0f) * output_fac;
-	else
-		output[i] += make_float4(out.x, out.y, out.z, 1.0f) * output_fac;
+	output[i] = (sample == 0)?  scaled_result: output[i] + scaled_result;
 }

 #endif  /* __BAKING__ */
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@@ -109,6 +109,12 @@ template<typename T> struct texture_image  {
 		return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
 	}

+	ccl_always_inline float4 read(uchar r)
+	{
+		float f = r*(1.0f/255.0f);
+		return make_float4(f, f, f, 1.0);
+	}
+
 	ccl_always_inline float4 read(float r)
 	{
 		/* TODO(dingto): Optimize this, so interpolation
@@ -479,6 +485,7 @@ typedef texture<int> texture_int;
 typedef texture<uint4> texture_uint4;
 typedef texture<uchar4> texture_uchar4;
 typedef texture_image<float> texture_image_float;
+typedef texture_image<uchar> texture_image_uchar;
 typedef texture_image<float4> texture_image_float4;
 typedef texture_image<uchar4> texture_image_uchar4;

@@ -489,20 +496,9 @@ typedef texture_image<uchar4> texture_image_uchar4;
 #define kernel_tex_fetch_ssei(tex, index) (kg->tex.fetch_ssei(index))
 #define kernel_tex_lookup(tex, t, offset, size) (kg->tex.lookup(t, offset, size))

-#define kernel_tex_image_interp(tex, x, y) \
-	((tex >= TEX_IMAGE_FLOAT_START_CPU) ? kg->texture_float_images[tex - TEX_IMAGE_FLOAT_START_CPU].interp(x, y) : \
-	(tex >= TEX_IMAGE_BYTE4_START_CPU) ? kg->texture_byte4_images[tex - TEX_IMAGE_BYTE4_START_CPU].interp(x, y) : \
-	kg->texture_float4_images[tex].interp(x, y))
-
-#define kernel_tex_image_interp_3d(tex, x, y, z) \
-	((tex >= TEX_IMAGE_FLOAT_START_CPU) ? kg->texture_float_images[tex - TEX_IMAGE_FLOAT_START_CPU].interp_3d(x, y, z) : \
-	(tex >= TEX_IMAGE_BYTE4_START_CPU) ? kg->texture_byte4_images[tex - TEX_IMAGE_BYTE4_START_CPU].interp_3d(x, y, z) : \
-	kg->texture_float4_images[tex].interp_3d(x, y, z))
-
-#define kernel_tex_image_interp_3d_ex(tex, x, y, z, interpolation) \
-	((tex >= TEX_IMAGE_FLOAT_START_CPU) ? kg->texture_float_images[tex - TEX_IMAGE_FLOAT_START_CPU].interp_3d_ex(x, y, z, interpolation) : \
-	(tex >= TEX_IMAGE_BYTE4_START_CPU) ? kg->texture_byte4_images[tex - TEX_IMAGE_BYTE4_START_CPU].interp_3d_ex(x, y, z, interpolation) : \
-	kg->texture_float4_images[tex].interp_3d_ex(x, y, z, interpolation))
+#define kernel_tex_image_interp(tex,x,y) kernel_tex_image_interp_impl(kg,tex,x,y)
+#define kernel_tex_image_interp_3d(tex, x, y, z) kernel_tex_image_interp_3d_impl(kg,tex,x,y,z)
+#define kernel_tex_image_interp_3d_ex(tex, x, y, z, interpolation) kernel_tex_image_interp_3d_ex_impl(kg,tex, x, y, z, interpolation)

 #define kernel_data (kg->__data)

--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -67,20 +67,29 @@ typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4;

 /* Macros to handle different memory storage on different devices */

-/* In order to use full 6GB of memory on Titan cards, use arrays instead
- * of textures. On earlier cards this seems slower, but on Titan it is
- * actually slightly faster in tests. */
+/* On Fermi cards (4xx and 5xx), we use regular textures for both data and images.
+ * On Kepler (6xx) and above, we use Bindless Textures for images and arrays for data.
+ *
+ * Arrays are necessary in order to use the full VRAM on newer cards, and it's slightly faster.
+ * Using Arrays on Fermi turned out to be slower.*/
+
+/* Fermi */
 #if __CUDA_ARCH__ < 300
 #  define __KERNEL_CUDA_TEX_STORAGE__
-#endif
-
-#ifdef __KERNEL_CUDA_TEX_STORAGE__
 #  define kernel_tex_fetch(t, index) tex1Dfetch(t, index)
+
+#  define kernel_tex_image_interp(t, x, y) tex2D(t, x, y)
+#  define kernel_tex_image_interp_3d(t, x, y, z) tex3D(t, x, y, z)
+
+/* Kepler */
 #else
 #  define kernel_tex_fetch(t, index) t[(index)]
+
+#  define kernel_tex_image_interp_float4(t, x, y) tex2D<float4>(t, x, y)
+#  define kernel_tex_image_interp_float(t, x, y) tex2D<float>(t, x, y)
+#  define kernel_tex_image_interp_3d_float4(t, x, y, z) tex3D<float4>(t, x, y, z)
+#  define kernel_tex_image_interp_3d_float(t, x, y, z) tex3D<float>(t, x, y, z)
 #endif
-#define kernel_tex_image_interp(t, x, y) tex2D(t, x, y)
-#define kernel_tex_image_interp_3d(t, x, y, z) tex3D(t, x, y, z)

 #define kernel_data __data

--- a/intern/cycles/kernel/kernel_emission.h
+++ b/intern/cycles/kernel/kernel_emission.h
@@ -18,6 +18,7 @@ CCL_NAMESPACE_BEGIN

 /* Direction Emission */
 ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
+                                                ShaderData *emission_sd,
                                                LightSample *ls,
                                                ccl_addr_space PathState *state,
                                                float3 I,
@@ -26,12 +27,6 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
                                                float time)
 {
 	/* setup shading at emitter */
-#ifdef __SPLIT_KERNEL__
-	ShaderData *sd = kg->sd_input;
-#else
-	ShaderData sd_object;
-	ShaderData *sd = &sd_object;
-#endif
 	float3 eval;

 #ifdef __BACKGROUND_MIS__
@@ -46,28 +41,28 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
 		ray.dP = differential3_zero();
 		ray.dD = dI;

-		shader_setup_from_background(kg, sd, &ray);
+		shader_setup_from_background(kg, emission_sd, &ray);

 		path_state_modify_bounce(state, true);
-		eval = shader_eval_background(kg, sd, state, 0, SHADER_CONTEXT_EMISSION);
+		eval = shader_eval_background(kg, emission_sd, state, 0, SHADER_CONTEXT_EMISSION);
 		path_state_modify_bounce(state, false);
 	}
 	else
 #endif
 	{
-		shader_setup_from_sample(kg, sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, ls->u, ls->v, t, time);
+		shader_setup_from_sample(kg, emission_sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, ls->u, ls->v, t, time);

-		ls->Ng = ccl_fetch(sd, Ng);
+		ls->Ng = ccl_fetch(emission_sd, Ng);

 		/* no path flag, we're evaluating this for all closures. that's weak but
 		 * we'd have to do multiple evaluations otherwise */
 		path_state_modify_bounce(state, true);
-		shader_eval_surface(kg, sd, state, 0.0f, 0, SHADER_CONTEXT_EMISSION);
+		shader_eval_surface(kg, emission_sd, state, 0.0f, 0, SHADER_CONTEXT_EMISSION);
 		path_state_modify_bounce(state, false);

 		/* evaluate emissive closure */
-		if(ccl_fetch(sd, flag) & SD_EMISSION)
-			eval = shader_emissive_eval(kg, sd);
+		if(ccl_fetch(emission_sd, flag) & SD_EMISSION)
+			eval = shader_emissive_eval(kg, emission_sd);
 		else
 			eval = make_float3(0.0f, 0.0f, 0.0f);
 	}
@@ -79,6 +74,7 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,

 ccl_device_noinline bool direct_emission(KernelGlobals *kg,
                                         ShaderData *sd,
+                                         ShaderData *emission_sd,
                                         LightSample *ls,
                                         ccl_addr_space PathState *state,
                                         Ray *ray,
@@ -94,6 +90,7 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg,
 	/* evaluate closure */

 	float3 light_eval = direct_emissive_eval(kg,
+	                                         emission_sd,
 	                                         ls,
 	                                         state,
 	                                         -ls->D,
@@ -198,6 +195,7 @@ ccl_device_noinline float3 indirect_primitive_emission(KernelGlobals *kg, Shader
 /* Indirect Lamp Emission */

 ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
+                                                ShaderData *emission_sd,
                                                ccl_addr_space PathState *state,
                                                Ray *ray,
                                                float3 *emission)
@@ -225,6 +223,7 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
 #endif

 		float3 L = direct_emissive_eval(kg,
+		                                emission_sd,
 		                                &ls,
 		                                state,
 		                                -ray->D,
@@ -238,7 +237,7 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
 			Ray volume_ray = *ray;
 			volume_ray.t = ls.t;
 			float3 volume_tp = make_float3(1.0f, 1.0f, 1.0f);
-			kernel_volume_shadow(kg, state, &volume_ray, &volume_tp);
+			kernel_volume_shadow(kg, emission_sd, state, &volume_ray, &volume_tp);
 			L *= volume_tp;
 		}
 #endif
@@ -260,6 +259,7 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
 /* Indirect Background */

 ccl_device_noinline float3 indirect_background(KernelGlobals *kg,
+                                               ShaderData *emission_sd,
                                               ccl_addr_space PathState *state,
                                               ccl_addr_space Ray *ray)
 {
@@ -280,19 +280,14 @@ ccl_device_noinline float3 indirect_background(KernelGlobals *kg,
 	/* evaluate background closure */
 #  ifdef __SPLIT_KERNEL__
 	Ray priv_ray = *ray;
-	shader_setup_from_background(kg, kg->sd_input, &priv_ray);
-
-	path_state_modify_bounce(state, true);
-	float3 L = shader_eval_background(kg, kg->sd_input, state, state->flag, SHADER_CONTEXT_EMISSION);
-	path_state_modify_bounce(state, false);
+	shader_setup_from_background(kg, emission_sd, &priv_ray);
 #  else
-	ShaderData sd;
-	shader_setup_from_background(kg, &sd, ray);
+	shader_setup_from_background(kg, emission_sd, ray);
+#  endif

 	path_state_modify_bounce(state, true);
-	float3 L = shader_eval_background(kg, &sd, state, state->flag, SHADER_CONTEXT_EMISSION);
+	float3 L = shader_eval_background(kg, emission_sd, state, state->flag, SHADER_CONTEXT_EMISSION);
 	path_state_modify_bounce(state, false);
-#  endif

 #ifdef __BACKGROUND_MIS__
 	/* check if background light exists or if we should skip pdf */
--- a/intern/cycles/kernel/kernel_globals.h
+++ b/intern/cycles/kernel/kernel_globals.h
@@ -31,10 +31,14 @@ struct OSLThreadData;
 struct OSLShadingSystem;
 #  endif

+struct Intersection;
+struct VolumeStep;
+
 typedef struct KernelGlobals {
-	texture_image_uchar4 texture_byte4_images[TEX_NUM_BYTE4_IMAGES_CPU];
-	texture_image_float4 texture_float4_images[TEX_NUM_FLOAT4_IMAGES_CPU];
-	texture_image_float texture_float_images[TEX_NUM_FLOAT_IMAGES_CPU];
+	texture_image_uchar4 texture_byte4_images[TEX_NUM_BYTE4_CPU];
+	texture_image_float4 texture_float4_images[TEX_NUM_FLOAT4_CPU];
+	texture_image_float texture_float_images[TEX_NUM_FLOAT_CPU];
+	texture_image_uchar texture_byte_images[TEX_NUM_BYTE_CPU];

 #  define KERNEL_TEX(type, ttype, name) ttype name;
 #  define KERNEL_IMAGE_TEX(type, ttype, name)
@@ -50,6 +54,14 @@ typedef struct KernelGlobals {
 	OSLThreadData *osl_tdata;
 #  endif

+	/* **** Run-time data ****  */
+
+	/* Heap-allocated storage for transparent shadows intersections. */
+	Intersection *transparent_shadow_intersections;
+
+	/* Storage for decoupled volume steps. */
+	VolumeStep *decoupled_volume_steps[2];
+	int decoupled_volume_steps_index;
 } KernelGlobals;

 #endif  /* __KERNEL_CPU__ */
--- a/intern/cycles/kernel/kernel_light.h
+++ b/intern/cycles/kernel/kernel_light.h
@@ -291,24 +291,13 @@ ccl_device float background_portal_pdf(KernelGlobals *kg,
 		}
 		num_possible++;

-		float t = -(dot(P, dir) - dot(lightpos, dir)) / dot(direction, dir);
-		if(t <= 1e-4f) {
-			/* Either behind the portal or too close. */
-			continue;
-		}
-
 		float4 data1 = kernel_tex_fetch(__light_data, (p + kernel_data.integrator.portal_offset)*LIGHT_SIZE + 1);
 		float4 data2 = kernel_tex_fetch(__light_data, (p + kernel_data.integrator.portal_offset)*LIGHT_SIZE + 2);

 		float3 axisu = make_float3(data1.y, data1.z, data1.w);
 		float3 axisv = make_float3(data2.y, data2.z, data2.w);

-		float3 hit = P + t*direction;
-		float3 inplane = hit - lightpos;
-		/* Skip if the the ray doesn't pass through portal. */
-		if(fabsf(dot(inplane, axisu) / dot(axisu, axisu)) > 0.5f)
-			continue;
-		if(fabsf(dot(inplane, axisv) / dot(axisv, axisv)) > 0.5f)
+		if(!ray_quad_intersect(P, direction, 1e-4f, FLT_MAX, lightpos, axisu, axisv, dir, NULL, NULL))
 			continue;

 		portal_pdf += area_light_sample(P, &lightpos, axisu, axisv, 0.0f, 0.0f, false);
@@ -729,8 +718,8 @@ ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D,

 		float3 light_P = make_float3(data0.y, data0.z, data0.w);

-		if(!ray_quad_intersect(P, D, t,
-		                       light_P, axisu, axisv, &ls->P, &ls->t))
+		if(!ray_quad_intersect(P, D, 0.0f, t,
+		                       light_P, axisu, axisv, Ng, &ls->P, &ls->t))
 		{
 			return false;
 		}
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -53,6 +53,8 @@
 CCL_NAMESPACE_BEGIN

 ccl_device void kernel_path_indirect(KernelGlobals *kg,
+                                     ShaderData *sd,
+                                     ShaderData *emission_sd,
                                     RNG *rng,
                                     Ray *ray,
                                     float3 throughput,
@@ -87,7 +89,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,

 			/* intersect with lamp */
 			float3 emission;
-			if(indirect_lamp_emission(kg, state, &light_ray, &emission)) {
+			if(indirect_lamp_emission(kg, emission_sd, state, &light_ray, &emission)) {
 				path_radiance_accum_emission(L,
 				                             throughput,
 				                             emission,
@@ -115,15 +117,14 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
 			if(decoupled) {
 				/* cache steps along volume for repeated sampling */
 				VolumeSegment volume_segment;
-				ShaderData volume_sd;

 				shader_setup_from_volume(kg,
-				                         &volume_sd,
+				                         sd,
 				                         &volume_ray);
 				kernel_volume_decoupled_record(kg,
 				                               state,
 				                               &volume_ray,
-				                               &volume_sd,
+				                               sd,
 				                               &volume_segment,
 				                               heterogeneous);

@@ -146,7 +147,8 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
 					/* direct light sampling */
 					kernel_branched_path_volume_connect_light(kg,
 					                                          rng,
-					                                          &volume_sd,
+					                                          sd,
+					                                          emission_sd,
 					                                          throughput,
 					                                          state,
 					                                          L,
@@ -163,7 +165,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
 					result = kernel_volume_decoupled_scatter(kg,
 					                                         state,
 					                                         &volume_ray,
-					                                         &volume_sd,
+					                                         sd,
 					                                         &throughput,
 					                                         rphase,
 					                                         rscatter,
@@ -178,7 +180,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
 				if(result == VOLUME_PATH_SCATTERED) {
 					if(kernel_path_volume_bounce(kg,
 					                             rng,
-					                             &volume_sd,
+					                             sd,
 					                             &throughput,
 					                             state,
 					                             L,
@@ -198,16 +200,16 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
 #  endif
 			{
 				/* integrate along volume segment with distance sampling */
-				ShaderData volume_sd;
 				VolumeIntegrateResult result = kernel_volume_integrate(
-					kg, state, &volume_sd, &volume_ray, L, &throughput, rng, heterogeneous);
+					kg, state, sd, &volume_ray, L, &throughput, rng, heterogeneous);

 #  ifdef __VOLUME_SCATTER__
 				if(result == VOLUME_PATH_SCATTERED) {
 					/* direct lighting */
 					kernel_path_volume_connect_light(kg,
 					                                 rng,
-					                                 &volume_sd,
+					                                 sd,
+					                                 emission_sd,
 					                                 throughput,
 					                                 state,
 					                                 L);
@@ -215,7 +217,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
 					/* indirect light bounce */
 					if(kernel_path_volume_bounce(kg,
 					                             rng,
-					                             &volume_sd,
+					                             sd,
 					                             &throughput,
 					                             state,
 					                             L,
@@ -235,7 +237,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
 		if(!hit) {
 #ifdef __BACKGROUND__
 			/* sample background shader */
-			float3 L_background = indirect_background(kg, state, ray);
+			float3 L_background = indirect_background(kg, emission_sd, state, ray);
 			path_radiance_accum_background(L,
 			                               throughput,
 			                               L_background,
@@ -246,15 +248,14 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
 		}

 		/* setup shading */
-		ShaderData sd;
 		shader_setup_from_ray(kg,
-		                      &sd,
+		                      sd,
 		                      &isect,
 		                      ray);
 		float rbsdf = path_state_rng_1D_for_decision(kg, rng, state, PRNG_BSDF);
-		shader_eval_surface(kg, &sd, state, rbsdf, state->flag, SHADER_CONTEXT_INDIRECT);
+		shader_eval_surface(kg, sd, state, rbsdf, state->flag, SHADER_CONTEXT_INDIRECT);
 #ifdef __BRANCHED_PATH__
-		shader_merge_closures(&sd);
+		shader_merge_closures(sd);
 #endif

 		/* blurring of bsdf after bounces, for rays that have a small likelihood
@@ -264,15 +265,15 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,

 			if(blur_pdf < 1.0f) {
 				float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
-				shader_bsdf_blur(kg, &sd, blur_roughness);
+				shader_bsdf_blur(kg, sd, blur_roughness);
 			}
 		}

 #ifdef __EMISSION__
 		/* emission */
-		if(sd.flag & SD_EMISSION) {
+		if(sd->flag & SD_EMISSION) {
 			float3 emission = indirect_primitive_emission(kg,
-			                                              &sd,
+			                                              sd,
 			                                              isect.t,
 			                                              state->flag,
 			                                              state->ray_pdf);
@@ -302,33 +303,33 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,

 #ifdef __AO__
 		/* ambient occlusion */
-		if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
+		if(kernel_data.integrator.use_ambient_occlusion || (sd->flag & SD_AO)) {
 			float bsdf_u, bsdf_v;
 			path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);

 			float ao_factor = kernel_data.background.ao_factor;
 			float3 ao_N;
-			float3 ao_bsdf = shader_bsdf_ao(kg, &sd, ao_factor, &ao_N);
+			float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
 			float3 ao_D;
 			float ao_pdf;
 			float3 ao_alpha = make_float3(0.0f, 0.0f, 0.0f);

 			sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);

-			if(dot(sd.Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
+			if(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
 				Ray light_ray;
 				float3 ao_shadow;

-				light_ray.P = ray_offset(sd.P, sd.Ng);
+				light_ray.P = ray_offset(sd->P, sd->Ng);
 				light_ray.D = ao_D;
 				light_ray.t = kernel_data.background.ao_distance;
 #  ifdef __OBJECT_MOTION__
-				light_ray.time = sd.time;
+				light_ray.time = sd->time;
 #  endif
-				light_ray.dP = sd.dP;
+				light_ray.dP = sd->dP;
 				light_ray.dD = differential3_zero();

-				if(!shadow_blocked(kg, state, &light_ray, &ao_shadow)) {
+				if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow)) {
 					path_radiance_accum_ao(L,
 					                       throughput,
 					                       ao_alpha,
@@ -343,9 +344,9 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
 #ifdef __SUBSURFACE__
 		/* bssrdf scatter to a different location on the same object, replacing
 		 * the closures with a diffuse BSDF */
-		if(sd.flag & SD_BSSRDF) {
+		if(sd->flag & SD_BSSRDF) {
 			float bssrdf_probability;
-			ShaderClosure *sc = subsurface_scatter_pick_closure(kg, &sd, &bssrdf_probability);
+			ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability);

 			/* modify throughput for picking bssrdf or bsdf */
 			throughput *= bssrdf_probability;
@@ -361,7 +362,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
 				                  PRNG_BSDF_U,
 				                  &bssrdf_u, &bssrdf_v);
 				subsurface_scatter_step(kg,
-				                        &sd,
+				                        sd,
 				                        state,
 				                        state->flag,
 				                        sc,
@@ -377,7 +378,8 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
 			int all = kernel_data.integrator.sample_all_lights_indirect;
 			kernel_branched_path_surface_connect_light(kg,
 			                                           rng,
-			                                           &sd,
+			                                           sd,
+			                                           emission_sd,
 			                                           state,
 			                                           throughput,
 			                                           1.0f,
@@ -386,13 +388,14 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
 		}
 #endif

-		if(!kernel_path_surface_bounce(kg, rng, &sd, &throughput, state, L, ray))
+		if(!kernel_path_surface_bounce(kg, rng, sd, &throughput, state, L, ray))
 			break;
 	}
 }

 ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
                                        ShaderData *sd,
+                                        ShaderData *emission_sd,
                                        PathRadiance *L,
                                        PathState *state,
                                        RNG *rng,
@@ -425,7 +428,7 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
 		light_ray.dP = ccl_fetch(sd, dP);
 		light_ray.dD = differential3_zero();

-		if(!shadow_blocked(kg, state, &light_ray, &ao_shadow))
+		if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow))
 			path_radiance_accum_ao(L, throughput, ao_alpha, ao_bsdf, ao_shadow, state->bounce);
 	}
 }
@@ -435,6 +438,7 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
 ccl_device bool kernel_path_subsurface_scatter(
        KernelGlobals *kg,
        ShaderData *sd,
+        ShaderData *emission_sd,
        PathRadiance *L,
        PathState *state,
        RNG *rng,
@@ -503,7 +507,7 @@ ccl_device bool kernel_path_subsurface_scatter(
 			hit_L->direct_throughput = L->direct_throughput;
 			path_radiance_copy_indirect(hit_L, L);

-			kernel_path_surface_connect_light(kg, rng, sd, *hit_tp, state, hit_L);
+			kernel_path_surface_connect_light(kg, rng, sd, emission_sd, *hit_tp, state, hit_L);

 			if(kernel_path_surface_bounce(kg,
 			                              rng,
@@ -526,6 +530,7 @@ ccl_device bool kernel_path_subsurface_scatter(

 					kernel_volume_stack_update_for_subsurface(
 					    kg,
+					    emission_sd,
 					    &volume_ray,
 					    hit_state->volume_stack);
 				}
@@ -604,8 +609,13 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,

 	path_radiance_init(&L, kernel_data.film.use_light_pass);

+	/* shader data memory used for both volumes and surfaces, saves stack space */
+	ShaderData sd;
+	/* shader data used by emission, shadows, volume stacks */
+	ShaderData emission_sd;
+
 	PathState state;
-	path_state_init(kg, &state, rng, sample, &ray);
+	path_state_init(kg, &emission_sd, &state, rng, sample, &ray);

 #ifdef __KERNEL_DEBUG__
 	DebugData debug_data;
@@ -669,7 +679,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
 			/* intersect with lamp */
 			float3 emission;

-			if(indirect_lamp_emission(kg, &state, &light_ray, &emission))
+			if(indirect_lamp_emission(kg, &emission_sd, &state, &light_ray, &emission))
 				path_radiance_accum_emission(&L, throughput, emission, state.bounce);
 		}
 #endif
@@ -689,11 +699,10 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
 			if(decoupled) {
 				/* cache steps along volume for repeated sampling */
 				VolumeSegment volume_segment;
-				ShaderData volume_sd;

-				shader_setup_from_volume(kg, &volume_sd, &volume_ray);
+				shader_setup_from_volume(kg, &sd, &volume_ray);
 				kernel_volume_decoupled_record(kg, &state,
-					&volume_ray, &volume_sd, &volume_segment, heterogeneous);
+					&volume_ray, &sd, &volume_segment, heterogeneous);

 				volume_segment.sampling_method = sampling_method;

@@ -708,8 +717,9 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
 					int all = false;

 					/* direct light sampling */
-					kernel_branched_path_volume_connect_light(kg, rng, &volume_sd,
-						throughput, &state, &L, all, &volume_ray, &volume_segment);
+					kernel_branched_path_volume_connect_light(kg, rng, &sd,
+						&emission_sd, throughput, &state, &L, all,
+						&volume_ray, &volume_segment);

 					/* indirect sample. if we use distance sampling and take just
 					 * one sample for direct and indirect light, we could share
@@ -718,7 +728,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
 					float rscatter = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_SCATTER_DISTANCE);

 					result = kernel_volume_decoupled_scatter(kg,
-						&state, &volume_ray, &volume_sd, &throughput,
+						&state, &volume_ray, &sd, &throughput,
 						rphase, rscatter, &volume_segment, NULL, true);
 				}

@@ -726,7 +736,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
 				kernel_volume_decoupled_free(kg, &volume_segment);

 				if(result == VOLUME_PATH_SCATTERED) {
-					if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray))
+					if(kernel_path_volume_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
 						continue;
 					else
 						break;
@@ -739,17 +749,16 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
 #  endif
 			{
 				/* integrate along volume segment with distance sampling */
-				ShaderData volume_sd;
 				VolumeIntegrateResult result = kernel_volume_integrate(
-					kg, &state, &volume_sd, &volume_ray, &L, &throughput, rng, heterogeneous);
+					kg, &state, &sd, &volume_ray, &L, &throughput, rng, heterogeneous);

 #  ifdef __VOLUME_SCATTER__
 				if(result == VOLUME_PATH_SCATTERED) {
 					/* direct lighting */
-					kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, &L);
+					kernel_path_volume_connect_light(kg, rng, &sd, &emission_sd, throughput, &state, &L);

 					/* indirect light bounce */
-					if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray))
+					if(kernel_path_volume_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
 						continue;
 					else
 						break;
@@ -772,7 +781,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,

 #ifdef __BACKGROUND__
 			/* sample background shader */
-			float3 L_background = indirect_background(kg, &state, &ray);
+			float3 L_background = indirect_background(kg, &emission_sd, &state, &ray);
 			path_radiance_accum_background(&L, throughput, L_background, state.bounce);
 #endif

@@ -780,7 +789,6 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
 		}

 		/* setup shading */
-		ShaderData sd;
 		shader_setup_from_ray(kg, &sd, &isect, &ray);
 		float rbsdf = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_BSDF);
 		shader_eval_surface(kg, &sd, &state, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
@@ -848,7 +856,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
 #ifdef __AO__
 		/* ambient occlusion */
 		if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
-			kernel_path_ao(kg, &sd, &L, &state, rng, throughput);
+			kernel_path_ao(kg, &sd, &emission_sd, &L, &state, rng, throughput);
 		}
 #endif

@@ -858,6 +866,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
 		if(sd.flag & SD_BSSRDF) {
 			if(kernel_path_subsurface_scatter(kg,
 			                                  &sd,
+			                                  &emission_sd,
 			                                  &L,
 			                                  &state,
 			                                  rng,
@@ -871,7 +880,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
 #endif  /* __SUBSURFACE__ */

 		/* direct lighting */
-		kernel_path_surface_connect_light(kg, rng, &sd, throughput, &state, &L);
+		kernel_path_surface_connect_light(kg, rng, &sd, &emission_sd, throughput, &state, &L);

 		/* compute direct lighting and next bounce */
 		if(!kernel_path_surface_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
--- a/intern/cycles/kernel/kernel_path_branched.h
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -18,7 +18,13 @@ CCL_NAMESPACE_BEGIN

 #ifdef __BRANCHED_PATH__

-ccl_device void kernel_branched_path_ao(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, PathState *state, RNG *rng, float3 throughput)
+ccl_device void kernel_branched_path_ao(KernelGlobals *kg,
+                                        ShaderData *sd,
+                                        ShaderData *emission_sd,
+                                        PathRadiance *L,
+                                        PathState *state,
+                                        RNG *rng,
+                                        float3 throughput)
 {
 	int num_samples = kernel_data.integrator.ao_samples;
 	float num_samples_inv = 1.0f/num_samples;
@@ -49,7 +55,7 @@ ccl_device void kernel_branched_path_ao(KernelGlobals *kg, ShaderData *sd, PathR
 			light_ray.dP = ccl_fetch(sd, dP);
 			light_ray.dD = differential3_zero();

-			if(!shadow_blocked(kg, state, &light_ray, &ao_shadow))
+			if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow))
 				path_radiance_accum_ao(L, throughput*num_samples_inv, ao_alpha, ao_bsdf, ao_shadow, state->bounce);
 		}
 	}
@@ -58,8 +64,8 @@ ccl_device void kernel_branched_path_ao(KernelGlobals *kg, ShaderData *sd, PathR

 /* bounce off surface and integrate indirect light */
 ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg,
-	RNG *rng, ShaderData *sd, float3 throughput, float num_samples_adjust,
-	PathState *state, PathRadiance *L)
+	RNG *rng, ShaderData *sd, ShaderData *indirect_sd, ShaderData *emission_sd,
+	float3 throughput, float num_samples_adjust, PathState *state, PathRadiance *L)
 {
 	for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
 		const ShaderClosure *sc = &ccl_fetch(sd, closure)[i];
@@ -106,6 +112,8 @@ ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGloba
 			}

 			kernel_path_indirect(kg,
+						         indirect_sd,
+			                     emission_sd,
 			                     rng,
 			                     &bsdf_ray,
 			                     tp*num_samples_inv,
@@ -124,6 +132,8 @@ ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGloba
 #ifdef __SUBSURFACE__
 ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
                                                        ShaderData *sd,
+                                                        ShaderData *indirect_sd,
+                                                        ShaderData *emission_sd,
                                                        PathRadiance *L,
                                                        PathState *state,
                                                        RNG *rng,
@@ -186,6 +196,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,

 					kernel_volume_stack_update_for_subsurface(
 					    kg,
+					    emission_sd,
 					    &volume_ray,
 					    hit_state.volume_stack);
 				}
@@ -199,6 +210,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
 					        kg,
 					        rng,
 					        &bssrdf_sd,
+					        emission_sd,
 					        &hit_state,
 					        throughput,
 					        num_samples_inv,
@@ -212,6 +224,8 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
 				        kg,
 				        rng,
 				        &bssrdf_sd,
+						indirect_sd,
+				        emission_sd,
 				        throughput,
 				        num_samples_inv,
 				        &hit_state,
@@ -231,8 +245,13 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in

 	path_radiance_init(&L, kernel_data.film.use_light_pass);

+	/* shader data memory used for both volumes and surfaces, saves stack space */
+	ShaderData sd;
+	/* shader data used by emission, shadows, volume stacks, indirect path */
+	ShaderData emission_sd, indirect_sd;
+
 	PathState state;
-	path_state_init(kg, &state, rng, sample, &ray);
+	path_state_init(kg, &emission_sd, &state, rng, sample, &ray);

 #ifdef __KERNEL_DEBUG__
 	DebugData debug_data;
@@ -287,11 +306,10 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in

 			/* cache steps along volume for repeated sampling */
 			VolumeSegment volume_segment;
-			ShaderData volume_sd;

-			shader_setup_from_volume(kg, &volume_sd, &volume_ray);
+			shader_setup_from_volume(kg, &sd, &volume_ray);
 			kernel_volume_decoupled_record(kg, &state,
-				&volume_ray, &volume_sd, &volume_segment, heterogeneous);
+				&volume_ray, &sd, &volume_segment, heterogeneous);

 			/* direct light sampling */
 			if(volume_segment.closure_flag & SD_SCATTER) {
@@ -299,8 +317,9 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in

 				int all = kernel_data.integrator.sample_all_lights_direct;

-				kernel_branched_path_volume_connect_light(kg, rng, &volume_sd,
-					throughput, &state, &L, all, &volume_ray, &volume_segment);
+				kernel_branched_path_volume_connect_light(kg, rng, &sd,
+					&emission_sd, throughput, &state, &L, all,
+					&volume_ray, &volume_segment);

 				/* indirect light sampling */
 				int num_samples = kernel_data.integrator.volume_samples;
@@ -326,20 +345,22 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
 					float rscatter = path_state_rng_1D_for_decision(kg, &tmp_rng, &ps, PRNG_SCATTER_DISTANCE);

 					VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
-						&ps, &pray, &volume_sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
+						&ps, &pray, &sd, &tp, rphase, rscatter, &volume_segment, NULL, false);

 					(void)result;
 					kernel_assert(result == VOLUME_PATH_SCATTERED);

 					if(kernel_path_volume_bounce(kg,
 					                             rng,
-					                             &volume_sd,
+					                             &sd,
 					                             &tp,
 					                             &ps,
 					                             &L,
 					                             &pray))
 					{
 						kernel_path_indirect(kg,
+						                     &indirect_sd,
+						                     &emission_sd,
 						                     rng,
 						                     &pray,
 						                     tp*num_samples_inv,
@@ -373,30 +394,31 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
 			for(int j = 0; j < num_samples; j++) {
 				PathState ps = state;
 				Ray pray = ray;
-				ShaderData volume_sd;
 				float3 tp = throughput * num_samples_inv;

 				/* branch RNG state */
 				path_state_branch(&ps, j, num_samples);

 				VolumeIntegrateResult result = kernel_volume_integrate(
-					kg, &ps, &volume_sd, &volume_ray, &L, &tp, rng, heterogeneous);
+					kg, &ps, &sd, &volume_ray, &L, &tp, rng, heterogeneous);

 #ifdef __VOLUME_SCATTER__
 				if(result == VOLUME_PATH_SCATTERED) {
 					/* todo: support equiangular, MIS and all light sampling.
 					 * alternatively get decoupled ray marching working on the GPU */
-					kernel_path_volume_connect_light(kg, rng, &volume_sd, tp, &state, &L);
+					kernel_path_volume_connect_light(kg, rng, &sd, &emission_sd, tp, &state, &L);

 					if(kernel_path_volume_bounce(kg,
 					                             rng,
-					                             &volume_sd,
+					                             &sd,
 					                             &tp,
 					                             &ps,
 					                             &L,
 					                             &pray))
 					{
 						kernel_path_indirect(kg,
+						                     &indirect_sd,
+						                     &emission_sd,
 						                     rng,
 						                     &pray,
 						                     tp,
@@ -414,7 +436,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
 			}

 			/* todo: avoid this calculation using decoupled ray marching */
-			kernel_volume_shadow(kg, &state, &volume_ray, &throughput);
+			kernel_volume_shadow(kg, &emission_sd, &state, &volume_ray, &throughput);
 #endif
 		}
 #endif
@@ -432,7 +454,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in

 #ifdef __BACKGROUND__
 			/* sample background shader */
-			float3 L_background = indirect_background(kg, &state, &ray);
+			float3 L_background = indirect_background(kg, &emission_sd, &state, &ray);
 			path_radiance_accum_background(&L, throughput, L_background, state.bounce);
 #endif

@@ -440,7 +462,6 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
 		}

 		/* setup shading */
-		ShaderData sd;
 		shader_setup_from_ray(kg, &sd, &isect, &ray);
 		shader_eval_surface(kg, &sd, &state, 0.0f, state.flag, SHADER_CONTEXT_MAIN);
 		shader_merge_closures(&sd);
@@ -499,15 +520,15 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
 #ifdef __AO__
 		/* ambient occlusion */
 		if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
-			kernel_branched_path_ao(kg, &sd, &L, &state, rng, throughput);
+			kernel_branched_path_ao(kg, &sd, &emission_sd, &L, &state, rng, throughput);
 		}
 #endif

 #ifdef __SUBSURFACE__
 		/* bssrdf scatter to a different location on the same object */
 		if(sd.flag & SD_BSSRDF) {
-			kernel_branched_path_subsurface_scatter(kg, &sd, &L, &state,
-			                                        rng, &ray, throughput);
+			kernel_branched_path_subsurface_scatter(kg, &sd, &indirect_sd, &emission_sd,
+			                                        &L, &state, rng, &ray, throughput);
 		}
 #endif

@@ -519,13 +540,13 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
 			if(kernel_data.integrator.use_direct_light) {
 				int all = kernel_data.integrator.sample_all_lights_direct;
 				kernel_branched_path_surface_connect_light(kg, rng,
-					&sd, &hit_state, throughput, 1.0f, &L, all);
+					&sd, &emission_sd, &hit_state, throughput, 1.0f, &L, all);
 			}
 #endif

 			/* indirect light */
 			kernel_branched_path_surface_indirect_light(kg, rng,
-				&sd, throughput, 1.0f, &hit_state, &L);
+				&sd, &indirect_sd, &emission_sd, throughput, 1.0f, &hit_state, &L);

 			/* continue in case of transparency */
 			throughput *= shader_bsdf_transparency(kg, &sd);
--- a/intern/cycles/kernel/kernel_path_state.h
+++ b/intern/cycles/kernel/kernel_path_state.h
@@ -16,7 +16,12 @@

 CCL_NAMESPACE_BEGIN

-ccl_device_inline void path_state_init(KernelGlobals *kg, ccl_addr_space PathState *state, ccl_addr_space RNG *rng, int sample, ccl_addr_space Ray *ray)
+ccl_device_inline void path_state_init(KernelGlobals *kg,
+                                       ShaderData *stack_sd,
+                                       ccl_addr_space PathState *state,
+                                       ccl_addr_space RNG *rng,
+                                       int sample,
+                                       ccl_addr_space Ray *ray)
 {
 	state->flag = PATH_RAY_CAMERA|PATH_RAY_MIS_SKIP;

@@ -41,7 +46,7 @@ ccl_device_inline void path_state_init(KernelGlobals *kg, ccl_addr_space PathSta

 	if(kernel_data.integrator.use_volumes) {
 		/* initialize volume stack with volume we are inside of */
-		kernel_volume_stack_init(kg, ray, state->volume_stack);
+		kernel_volume_stack_init(kg, stack_sd, ray, state->volume_stack);
 		/* seed RNG for cases where we can't use stratified samples */
 		state->rng_congruential = lcg_init(*rng + sample*0x51633e2d);
 	}
@@ -131,9 +136,6 @@ ccl_device_inline uint path_state_ray_visibility(KernelGlobals *kg, PathState *s
 	/* todo: this is not supported as its own ray visibility yet */
 	if(state->flag & PATH_RAY_VOLUME_SCATTER)
 		flag |= PATH_RAY_DIFFUSE;
-	/* for camera visibility, use render layer flags */
-	if(flag & PATH_RAY_CAMERA)
-		flag |= kernel_data.integrator.layer_flag;

 	return flag;
 }
--- a/intern/cycles/kernel/kernel_path_surface.h
+++ b/intern/cycles/kernel/kernel_path_surface.h
@@ -20,7 +20,8 @@ CCL_NAMESPACE_BEGIN

 /* branched path tracing: connect path directly to position on one or more lights and add it to L */
 ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RNG *rng,
-	ShaderData *sd, PathState *state, float3 throughput, float num_samples_adjust, PathRadiance *L, int sample_all_lights)
+	ShaderData *sd, ShaderData *emission_sd, PathState *state, float3 throughput,
+	float num_samples_adjust, PathRadiance *L, int sample_all_lights)
 {
 #ifdef __EMISSION__
 	/* sample illumination from lights to find path contribution */
@@ -55,11 +56,11 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobal
 				LightSample ls;
 				lamp_light_sample(kg, i, light_u, light_v, ccl_fetch(sd, P), &ls);

-				if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
+				if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
 					/* trace shadow ray */
 					float3 shadow;

-					if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+					if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
 						/* accumulate */
 						path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
 					}
@@ -87,11 +88,11 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobal
 				LightSample ls;
 				light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls);

-				if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
+				if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
 					/* trace shadow ray */
 					float3 shadow;

-					if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+					if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
 						/* accumulate */
 						path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
 					}
@@ -109,11 +110,11 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobal
 		light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls);

 		/* sample random light */
-		if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
+		if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
 			/* trace shadow ray */
 			float3 shadow;

-			if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+			if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
 				/* accumulate */
 				path_radiance_accum_light(L, throughput*num_samples_adjust, &L_light, shadow, num_samples_adjust, state->bounce, is_lamp);
 			}
@@ -184,7 +185,8 @@ ccl_device bool kernel_branched_path_surface_bounce(KernelGlobals *kg, RNG *rng,
 #ifndef __SPLIT_KERNEL__
 /* path tracing: connect path directly to position on a light and add it to L */
 ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_addr_space RNG *rng,
-	ShaderData *sd, float3 throughput, ccl_addr_space PathState *state, PathRadiance *L)
+	ShaderData *sd, ShaderData *emission_sd, float3 throughput, ccl_addr_space PathState *state,
+	PathRadiance *L)
 {
 #ifdef __EMISSION__
 	if(!(kernel_data.integrator.use_direct_light && (ccl_fetch(sd, flag) & SD_BSDF_HAS_EVAL)))
@@ -206,11 +208,11 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_
 	LightSample ls;
 	light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls);

-	if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
+	if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
 		/* trace shadow ray */
 		float3 shadow;

-		if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+		if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
 			/* accumulate */
 			path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp);
 		}
--- a/intern/cycles/kernel/kernel_path_volume.h
+++ b/intern/cycles/kernel/kernel_path_volume.h
@@ -19,7 +19,7 @@ CCL_NAMESPACE_BEGIN
 #ifdef __VOLUME_SCATTER__

 ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng,
-	ShaderData *sd, float3 throughput, PathState *state, PathRadiance *L)
+	ShaderData *sd, ShaderData *emission_sd, float3 throughput, PathState *state, PathRadiance *L)
 {
 #ifdef __EMISSION__
 	if(!kernel_data.integrator.use_direct_light)
@@ -44,11 +44,11 @@ ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng,
 	if(ls.pdf == 0.0f)
 		return;
 	
-	if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
+	if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
 		/* trace shadow ray */
 		float3 shadow;

-		if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+		if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
 			/* accumulate */
 			path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp);
 		}
@@ -106,7 +106,7 @@ bool kernel_path_volume_bounce(KernelGlobals *kg, RNG *rng,
 }

 ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG *rng,
-	ShaderData *sd, float3 throughput, PathState *state, PathRadiance *L,
+	ShaderData *sd, ShaderData *emission_sd, float3 throughput, PathState *state, PathRadiance *L,
 	bool sample_all_lights, Ray *ray, const VolumeSegment *segment)
 {
 #ifdef __EMISSION__
@@ -160,11 +160,11 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG
 				if(ls.pdf == 0.0f)
 					continue;

-				if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
+				if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
 					/* trace shadow ray */
 					float3 shadow;

-					if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+					if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
 						/* accumulate */
 						path_radiance_accum_light(L, tp*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
 					}
@@ -211,11 +211,11 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG
 				if(ls.pdf == 0.0f)
 					continue;

-				if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
+				if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
 					/* trace shadow ray */
 					float3 shadow;

-					if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+					if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
 						/* accumulate */
 						path_radiance_accum_light(L, tp*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
 					}
@@ -251,11 +251,11 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG
 			return;

 		/* sample random light */
-		if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
+		if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
 			/* trace shadow ray */
 			float3 shadow;

-			if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+			if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
 				/* accumulate */
 				path_radiance_accum_light(L, tp, &L_light, shadow, 1.0f, state->bounce, is_lamp);
 			}
--- a/intern/cycles/kernel/kernel_projection.h
+++ b/intern/cycles/kernel/kernel_projection.h
@@ -225,7 +225,7 @@ ccl_device float3 spherical_stereo_position(KernelGlobals *kg,
                                            float3 dir,
                                            float3 pos)
 {
-	const float interocular_offset = kernel_data.cam.interocular_offset;
+	float interocular_offset = kernel_data.cam.interocular_offset;

 	/* Interocular offset of zero means either non stereo, or stereo without
 	 * spherical stereo.
@@ -234,6 +234,21 @@ ccl_device float3 spherical_stereo_position(KernelGlobals *kg,
 		return pos;
 	}

+	if(kernel_data.cam.pole_merge_angle_to > 0.0f) {
+		float3 normalized_direction = normalize(dir);
+		const float pole_merge_angle_from = kernel_data.cam.pole_merge_angle_from,
+		            pole_merge_angle_to = kernel_data.cam.pole_merge_angle_to;
+		float altitude = fabsf(safe_asinf(normalized_direction.z));
+		if(altitude > pole_merge_angle_to) {
+			interocular_offset = 0.0f;
+		}
+		else if(altitude > pole_merge_angle_from) {
+			float fac = (altitude - pole_merge_angle_from) / (pole_merge_angle_to - pole_merge_angle_from);
+			float fade = cosf(fac * M_PI_2_F);
+			interocular_offset *= fade;
+		}
+	}
+
 	float3 up = make_float3(0.0f, 0.0f, 1.0f);
 	float3 side = normalize(cross(dir, up));

--- a/intern/cycles/kernel/kernel_shadow.h
+++ b/intern/cycles/kernel/kernel_shadow.h
@@ -41,7 +41,7 @@ CCL_NAMESPACE_BEGIN

 #define STACK_MAX_HITS 64

-ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *ray, float3 *shadow)
+ccl_device_inline bool shadow_blocked(KernelGlobals *kg, ShaderData *shadow_sd, PathState *state, Ray *ray, float3 *shadow)
 {
 	*shadow = make_float3(1.0f, 1.0f, 1.0f);

@@ -59,14 +59,20 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
 		/* intersect to find an opaque surface, or record all transparent surface hits */
 		Intersection hits_stack[STACK_MAX_HITS];
 		Intersection *hits = hits_stack;
-		uint max_hits = kernel_data.integrator.transparent_max_bounce - state->transparent_bounce - 1;
+		const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
+		uint max_hits = transparent_max_bounce - state->transparent_bounce - 1;

 		/* prefer to use stack but use dynamic allocation if too deep max hits
 		 * we need max_hits + 1 storage space due to the logic in
 		 * scene_intersect_shadow_all which will first store and then check if
 		 * the limit is exceeded */
-		if(max_hits + 1 > STACK_MAX_HITS)
-			hits = (Intersection*)malloc(sizeof(Intersection)*(max_hits + 1));
+		if(max_hits + 1 > STACK_MAX_HITS) {
+			if(kg->transparent_shadow_intersections == NULL) {
+				kg->transparent_shadow_intersections =
+				    (Intersection*)malloc(sizeof(Intersection)*(transparent_max_bounce + 1));
+			}
+			hits = kg->transparent_shadow_intersections;
+		}

 		uint num_hits;
 		blocked = scene_intersect_shadow_all(kg, ray, hits, max_hits, &num_hits);
@@ -101,39 +107,36 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
 				if(ps.volume_stack[0].shader != SHADER_NONE) {
 					Ray segment_ray = *ray;
 					segment_ray.t = isect->t;
-					kernel_volume_shadow(kg, &ps, &segment_ray, &throughput);
+					kernel_volume_shadow(kg, shadow_sd, &ps, &segment_ray, &throughput);
 				}
 #endif

 				/* setup shader data at surface */
-				ShaderData sd;
-				shader_setup_from_ray(kg, &sd, isect, ray);
+				shader_setup_from_ray(kg, shadow_sd, isect, ray);

 				/* attenuation from transparent surface */
-				if(!(sd.flag & SD_HAS_ONLY_VOLUME)) {
+				if(!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) {
 					path_state_modify_bounce(state, true);
-					shader_eval_surface(kg, &sd, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
+					shader_eval_surface(kg, shadow_sd, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
 					path_state_modify_bounce(state, false);

-					throughput *= shader_bsdf_transparency(kg, &sd);
+					throughput *= shader_bsdf_transparency(kg, shadow_sd);
 				}

 				/* stop if all light is blocked */
 				if(is_zero(throughput)) {
 					/* free dynamic storage */
-					if(hits != hits_stack)
-						free(hits);
 					return true;
 				}

 				/* move ray forward */
-				ray->P = sd.P;
+				ray->P = shadow_sd->P;
 				if(ray->t != FLT_MAX)
 					ray->D = normalize_len(Pend - ray->P, &ray->t);

 #ifdef __VOLUME__
 				/* exit/enter volume */
-				kernel_volume_stack_enter_exit(kg, &sd, ps.volume_stack);
+				kernel_volume_stack_enter_exit(kg, shadow_sd, ps.volume_stack);
 #endif

 				bounce++;
@@ -142,19 +145,13 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
 #ifdef __VOLUME__
 			/* attenuation for last line segment towards light */
 			if(ps.volume_stack[0].shader != SHADER_NONE)
-				kernel_volume_shadow(kg, &ps, ray, &throughput);
+				kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput);
 #endif

 			*shadow = throughput;

-			if(hits != hits_stack)
-				free(hits);
 			return is_zero(throughput);
 		}
-
-		/* free dynamic storage */
-		if(hits != hits_stack)
-			free(hits);
 	}
 	else {
 		Intersection isect;
@@ -164,7 +161,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
 #ifdef __VOLUME__
 	if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
 		/* apply attenuation from current volume shader */
-		kernel_volume_shadow(kg, state, ray, shadow);
+		kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
 	}
 #endif

@@ -184,6 +181,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
 * one extra ray cast for the cases were we do want transparency. */

 ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
+                                        ShaderData *shadow_sd,
                                        ccl_addr_space PathState *state,
                                        ccl_addr_space Ray *ray_input,
                                        float3 *shadow)
@@ -228,7 +226,7 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
 #ifdef __VOLUME__
 					/* attenuation for last line segment towards light */
 					if(ps.volume_stack[0].shader != SHADER_NONE)
-						kernel_volume_shadow(kg, &ps, ray, &throughput);
+						kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput);
 #endif

 					*shadow *= throughput;
@@ -244,39 +242,33 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
 				if(ps.volume_stack[0].shader != SHADER_NONE) {
 					Ray segment_ray = *ray;
 					segment_ray.t = isect->t;
-					kernel_volume_shadow(kg, &ps, &segment_ray, &throughput);
+					kernel_volume_shadow(kg, shadow_sd, &ps, &segment_ray, &throughput);
 				}
 #endif

 				/* setup shader data at surface */
-#ifdef __SPLIT_KERNEL__
-				ShaderData *sd = kg->sd_input;
-#else
-				ShaderData sd_object;
-				ShaderData *sd = &sd_object;
-#endif
-				shader_setup_from_ray(kg, sd, isect, ray);
+				shader_setup_from_ray(kg, shadow_sd, isect, ray);

 				/* attenuation from transparent surface */
-				if(!(ccl_fetch(sd, flag) & SD_HAS_ONLY_VOLUME)) {
+				if(!(ccl_fetch(shadow_sd, flag) & SD_HAS_ONLY_VOLUME)) {
 					path_state_modify_bounce(state, true);
-					shader_eval_surface(kg, sd, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
+					shader_eval_surface(kg, shadow_sd, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
 					path_state_modify_bounce(state, false);

-					throughput *= shader_bsdf_transparency(kg, sd);
+					throughput *= shader_bsdf_transparency(kg, shadow_sd);
 				}

 				if(is_zero(throughput))
 					return true;

 				/* move ray forward */
-				ray->P = ray_offset(ccl_fetch(sd, P), -ccl_fetch(sd, Ng));
+				ray->P = ray_offset(ccl_fetch(shadow_sd, P), -ccl_fetch(shadow_sd, Ng));
 				if(ray->t != FLT_MAX)
 					ray->D = normalize_len(Pend - ray->P, &ray->t);

 #ifdef __VOLUME__
 				/* exit/enter volume */
-				kernel_volume_stack_enter_exit(kg, sd, ps.volume_stack);
+				kernel_volume_stack_enter_exit(kg, shadow_sd, ps.volume_stack);
 #endif

 				bounce++;
@@ -286,7 +278,7 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
 #ifdef __VOLUME__
 	else if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
 		/* apply attenuation from current volume shader */
-		kernel_volume_shadow(kg, state, ray, shadow);
+		kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
 	}
 #endif
 #endif
--- a/intern/cycles/kernel/kernel_textures.h
+++ b/intern/cycles/kernel/kernel_textures.h
@@ -72,6 +72,8 @@ KERNEL_TEX(float, texture_float, __lookup_table)
 /* sobol */
 KERNEL_TEX(uint, texture_uint, __sobol_directions)

+#ifdef __KERNEL_CUDA__
+#  if __CUDA_ARCH__ < 300
 /* full-float image */
 KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float4_000)
 KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float4_001)
@@ -174,66 +176,12 @@ KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_089)
 KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_090)
 KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_091)
 KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_092)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_093)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_094)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_095)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_096)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_097)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_098)

-/* Kepler and above */
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_099)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_100)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_101)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_102)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_103)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_104)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_105)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_106)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_107)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_108)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_109)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_110)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_111)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_112)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_113)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_114)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_115)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_116)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_117)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_118)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_119)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_120)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_121)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_122)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_123)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_124)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_125)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_126)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_127)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_128)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_129)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_130)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_131)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_132)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_133)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_134)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_135)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_136)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_137)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_138)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_139)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_140)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_141)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_142)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_143)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_144)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_145)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_146)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_147)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_148)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_149)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_150)
+#  else
+/* bindless textures */
+KERNEL_TEX(uint, texture_uint, __bindless_mapping)
+#  endif
+#endif

 /* packed image (opencl) */
 KERNEL_TEX(uchar4, texture_uchar4, __tex_image_byte4_packed)
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -121,9 +121,7 @@ CCL_NAMESPACE_BEGIN
 #    define __OBJECT_MOTION__
 #    define __HAIR__
 #    define __BAKING__
-#    ifdef __KERNEL_EXPERIMENTAL__
-#      define __TRANSPARENT_SHADOWS__
-#    endif
+#    define __TRANSPARENT_SHADOWS__
 #  endif  /* __KERNEL_OPENCL_AMD__ */

 #  ifdef __KERNEL_OPENCL_INTEL_CPU__
@@ -276,10 +274,7 @@ enum SamplingPattern {
 	SAMPLING_NUM_PATTERNS,
 };

-/* these flags values correspond to raytypes in osl.cpp, so keep them in sync!
- *
- * for ray visibility tests in BVH traversal, the upper 20 bits are used for
- * layer visibility tests. */
+/* these flags values correspond to raytypes in osl.cpp, so keep them in sync! */

 enum PathRayFlag {
 	PATH_RAY_CAMERA = 1,
@@ -303,9 +298,6 @@ enum PathRayFlag {
 	PATH_RAY_MIS_SKIP = 2048,
 	PATH_RAY_DIFFUSE_ANCESTOR = 4096,
 	PATH_RAY_SINGLE_PASS_DONE = 8192,
-
-	/* we need layer member flags to be the 20 upper bits */
-	PATH_RAY_LAYER_SHIFT = (32-20)
 };

 /* Closure Label */
@@ -909,9 +901,10 @@ typedef struct KernelCamera {
 	float4 equirectangular_range;

 	/* stereo */
-	int pad1, pad2;
 	float interocular_offset;
 	float convergence_distance;
+	float pole_merge_angle_from;
+	float pole_merge_angle_to;

 	/* matrices */
 	Transform cameratoworld;
@@ -1080,9 +1073,6 @@ typedef struct KernelIntegrator {
 	/* seed */
 	int seed;

-	/* render layer */
-	int layer_flag;
-
 	/* clamp */
 	float sample_clamp_direct;
 	float sample_clamp_indirect;
--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@@ -219,15 +219,14 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState

 /* get the volume attenuation over line segment defined by ray, with the
 * assumption that there are no surfaces blocking light between the endpoints */
-ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg, PathState *state, Ray *ray, float3 *throughput)
+ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg, ShaderData *shadow_sd, PathState *state, Ray *ray, float3 *throughput)
 {
-	ShaderData sd;
-	shader_setup_from_volume(kg, &sd, ray);
+	shader_setup_from_volume(kg, shadow_sd, ray);

 	if(volume_stack_is_heterogeneous(kg, state->volume_stack))
-		kernel_volume_shadow_heterogeneous(kg, state, ray, &sd, throughput);
+		kernel_volume_shadow_heterogeneous(kg, state, ray, shadow_sd, throughput);
 	else
-		kernel_volume_shadow_homogeneous(kg, state, ray, &sd, throughput);
+		kernel_volume_shadow_homogeneous(kg, state, ray, shadow_sd, throughput);
 }

 /* Equi-angular sampling as in:
@@ -277,7 +276,7 @@ ccl_device float kernel_volume_distance_sample(float max_t, float3 sigma_t, int
 	float sample_t = min(max_t, -logf(1.0f - xi*(1.0f - sample_transmittance))/sample_sigma_t);

 	*transmittance = volume_color_transmittance(sigma_t, sample_t);
-	*pdf = (sigma_t * *transmittance)/(make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
+	*pdf = safe_divide_color(sigma_t * *transmittance, make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);

 	/* todo: optimization: when taken together with hit/miss decision,
 	 * the full_transmittance cancels out drops out and xi does not
@@ -291,7 +290,7 @@ ccl_device float3 kernel_volume_distance_pdf(float max_t, float3 sigma_t, float
 	float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
 	float3 transmittance = volume_color_transmittance(sigma_t, sample_t);

-	return (sigma_t * transmittance)/(make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
+	return safe_divide_color(sigma_t * transmittance, make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
 }

 /* Emission */
@@ -626,12 +625,36 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *sta
 		const int global_max_steps = kernel_data.integrator.volume_max_steps;
 		step_size = kernel_data.integrator.volume_step_size;
 		/* compute exact steps in advance for malloc */
-		max_steps = max((int)ceilf(ray->t/step_size), 1);
-		if(max_steps > global_max_steps) {
+		if(ray->t > global_max_steps*step_size) {
 			max_steps = global_max_steps;
 			step_size = ray->t / (float)max_steps;
 		}
+		else {
+			max_steps = max((int)ceilf(ray->t/step_size), 1);
+		}
+#ifdef __KERNEL_CPU__
+		/* NOTE: For the branched path tracing it's possible to have direct
+		 * and indirect light integration both having volume segments allocated.
+		 * We detect this using index in the pre-allocated memory. Currently we
+		 * only support two segments allocated at a time, if more needed some
+		 * modifications to the KernelGlobals will be needed.
+		 *
+		 * This gives us restrictions that decoupled record should only happen
+		 * in the stack manner, meaning if there's subsequent call of decoupled
+		 * record it'll need to free memory before it's caller frees memory.
+		 */
+		const int index = kg->decoupled_volume_steps_index;
+		assert(index < sizeof(kg->decoupled_volume_steps) /
+		               sizeof(*kg->decoupled_volume_steps));
+		if(kg->decoupled_volume_steps[index] == NULL) {
+			kg->decoupled_volume_steps[index] =
+			        (VolumeStep*)malloc(sizeof(VolumeStep)*global_max_steps);
+		}
+		segment->steps = kg->decoupled_volume_steps[index];
+		++kg->decoupled_volume_steps_index;
+#else
 		segment->steps = (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps);
+#endif
 		random_jitter_offset = lcg_step_float(&state->rng_congruential) * step_size;
 	}
 	else {
@@ -745,8 +768,18 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *sta

 ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *segment)
 {
-	if(segment->steps != &segment->stack_step)
+	if(segment->steps != &segment->stack_step) {
+#ifdef __KERNEL_CPU__
+		/* NOTE: We only allow free last allocated segment.
+		 * No random order of alloc/free is supported.
+		 */
+		assert(kg->decoupled_volume_steps_index > 0);
+		assert(segment->steps == kg->decoupled_volume_steps[kg->decoupled_volume_steps_index - 1]);
+		--kg->decoupled_volume_steps_index;
+#else
 		free(segment->steps);
+#endif
+	}
 }

 /* scattering for homogeneous and heterogeneous volumes, using decoupled ray
@@ -968,6 +1001,7 @@ ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneou
 * is inside of. */

 ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
+                                         ShaderData *stack_sd,
                                         Ray *ray,
                                         VolumeStack *stack)
 {
@@ -994,14 +1028,13 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,

 	int stack_index = 0, enclosed_index = 0;

-	const uint visibility = PATH_RAY_ALL_VISIBILITY | kernel_data.integrator.layer_flag;
 #ifdef __VOLUME_RECORD_ALL__
 	Intersection hits[2*VOLUME_STACK_SIZE];
 	uint num_hits = scene_intersect_volume_all(kg,
 	                                           &volume_ray,
 	                                           hits,
 	                                           2*VOLUME_STACK_SIZE,
-	                                           visibility);
+	                                           PATH_RAY_ALL_VISIBILITY);
 	if(num_hits > 0) {
 		int enclosed_volumes[VOLUME_STACK_SIZE];
 		Intersection *isect = hits;
@@ -1009,28 +1042,27 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
 		qsort(hits, num_hits, sizeof(Intersection), intersections_compare);

 		for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
-			ShaderData sd;
-			shader_setup_from_ray(kg, &sd, isect, &volume_ray);
-			if(sd.flag & SD_BACKFACING) {
+			shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
+			if(stack_sd->flag & SD_BACKFACING) {
 				bool need_add = true;
 				for(int i = 0; i < enclosed_index && need_add; ++i) {
 					/* If ray exited the volume and never entered to that volume
 					 * it means that camera is inside such a volume.
 					 */
-					if(enclosed_volumes[i] == sd.object) {
+					if(enclosed_volumes[i] == stack_sd->object) {
 						need_add = false;
 					}
 				}
 				for(int i = 0; i < stack_index && need_add; ++i) {
 					/* Don't add intersections twice. */
-					if(stack[i].object == sd.object) {
+					if(stack[i].object == stack_sd->object) {
 						need_add = false;
 						break;
 					}
 				}
 				if(need_add) {
-					stack[stack_index].object = sd.object;
-					stack[stack_index].shader = sd.shader;
+					stack[stack_index].object = stack_sd->object;
+					stack[stack_index].shader = stack_sd->shader;
 					++stack_index;
 				}
 			}
@@ -1038,7 +1070,7 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
 				/* If ray from camera enters the volume, this volume shouldn't
 				 * be added to the stack on exit.
 				 */
-				enclosed_volumes[enclosed_index++] = sd.object;
+				enclosed_volumes[enclosed_index++] = stack_sd->object;
 			}
 		}
 	}
@@ -1051,13 +1083,12 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
 	      step < 2 * VOLUME_STACK_SIZE)
 	{
 		Intersection isect;
-		if(!scene_intersect_volume(kg, &volume_ray, &isect, visibility)) {
+		if(!scene_intersect_volume(kg, &volume_ray, &isect, PATH_RAY_ALL_VISIBILITY)) {
 			break;
 		}

-		ShaderData sd;
-		shader_setup_from_ray(kg, &sd, &isect, &volume_ray);
-		if(sd.flag & SD_BACKFACING) {
+		shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
+		if(stack_sd->flag & SD_BACKFACING) {
 			/* If ray exited the volume and never entered to that volume
 			 * it means that camera is inside such a volume.
 			 */
@@ -1066,20 +1097,20 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
 				/* If ray exited the volume and never entered to that volume
 				 * it means that camera is inside such a volume.
 				 */
-				if(enclosed_volumes[i] == sd.object) {
+				if(enclosed_volumes[i] == stack_sd->object) {
 					need_add = false;
 				}
 			}
 			for(int i = 0; i < stack_index && need_add; ++i) {
 				/* Don't add intersections twice. */
-				if(stack[i].object == sd.object) {
+				if(stack[i].object == stack_sd->object) {
 					need_add = false;
 					break;
 				}
 			}
 			if(need_add) {
-				stack[stack_index].object = sd.object;
-				stack[stack_index].shader = sd.shader;
+				stack[stack_index].object = stack_sd->object;
+				stack[stack_index].shader = stack_sd->shader;
 				++stack_index;
 			}
 		}
@@ -1087,11 +1118,11 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
 			/* If ray from camera enters the volume, this volume shouldn't
 			 * be added to the stack on exit.
 			 */
-			enclosed_volumes[enclosed_index++] = sd.object;
+			enclosed_volumes[enclosed_index++] = stack_sd->object;
 		}

 		/* Move ray forward. */
-		volume_ray.P = ray_offset(sd.P, -sd.Ng);
+		volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
 		++step;
 	}
 #endif
@@ -1159,6 +1190,7 @@ ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd

 #ifdef __SUBSURFACE__
 ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
+                                                          ShaderData *stack_sd,
                                                          Ray *ray,
                                                          VolumeStack *stack)
 {
@@ -1179,27 +1211,28 @@ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
 		qsort(hits, num_hits, sizeof(Intersection), intersections_compare);

 		for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
-			ShaderData sd;
-			shader_setup_from_ray(kg, &sd, isect, &volume_ray);
-			kernel_volume_stack_enter_exit(kg, &sd, stack);
+			shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
+			kernel_volume_stack_enter_exit(kg, stack_sd, stack);
 		}
 	}
 #  else
 	Intersection isect;
 	int step = 0;
+	float3 Pend = ray->P + ray->D*ray->t;
 	while(step < 2 * VOLUME_STACK_SIZE &&
 	      scene_intersect_volume(kg,
 	                             &volume_ray,
 	                             &isect,
 	                             PATH_RAY_ALL_VISIBILITY))
 	{
-		ShaderData sd;
-		shader_setup_from_ray(kg, &sd, &isect, &volume_ray);
-		kernel_volume_stack_enter_exit(kg, &sd, stack);
+		shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
+		kernel_volume_stack_enter_exit(kg, stack_sd, stack);

 		/* Move ray forward. */
-		volume_ray.P = ray_offset(sd.P, -sd.Ng);
-		volume_ray.t -= sd.ray_length;
+		volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
+		if(volume_ray.t != FLT_MAX) {
+			volume_ray.D = normalize_len(Pend - volume_ray.P, &volume_ray.t);
+		}
 		++step;
 	}
 #  endif
--- a/intern/cycles/kernel/kernels/cpu/kernel.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp
@@ -95,7 +95,7 @@ void kernel_tex_copy(KernelGlobals *kg,
 		int id = atoi(name + strlen("__tex_image_float4_"));
 		int array_index = id;

-		if(array_index >= 0 && array_index < TEX_NUM_FLOAT4_IMAGES_CPU) {
+		if(array_index >= 0 && array_index < TEX_NUM_FLOAT4_CPU) {
 			tex = &kg->texture_float4_images[array_index];
 		}

@@ -109,9 +109,9 @@ void kernel_tex_copy(KernelGlobals *kg,
 	else if(strstr(name, "__tex_image_float")) {
 		texture_image_float *tex = NULL;
 		int id = atoi(name + strlen("__tex_image_float_"));
-		int array_index = id - TEX_IMAGE_FLOAT_START_CPU;
+		int array_index = id - TEX_START_FLOAT_CPU;

-		if(array_index >= 0 && array_index < TEX_NUM_FLOAT_IMAGES_CPU) {
+		if(array_index >= 0 && array_index < TEX_NUM_FLOAT_CPU) {
 			tex = &kg->texture_float_images[array_index];
 		}

@@ -125,9 +125,9 @@ void kernel_tex_copy(KernelGlobals *kg,
 	else if(strstr(name, "__tex_image_byte4")) {
 		texture_image_uchar4 *tex = NULL;
 		int id = atoi(name + strlen("__tex_image_byte4_"));
-		int array_index = id - TEX_IMAGE_BYTE4_START_CPU;
+		int array_index = id - TEX_START_BYTE4_CPU;

-		if(array_index >= 0 && array_index < TEX_NUM_BYTE4_IMAGES_CPU) {
+		if(array_index >= 0 && array_index < TEX_NUM_BYTE4_CPU) {
 			tex = &kg->texture_byte4_images[array_index];
 		}

@@ -138,6 +138,22 @@ void kernel_tex_copy(KernelGlobals *kg,
 			tex->extension = extension;
 		}
 	}
+	else if(strstr(name, "__tex_image_byte")) {
+		texture_image_uchar *tex = NULL;
+		int id = atoi(name + strlen("__tex_image_byte_"));
+		int array_index = id - TEX_START_BYTE_CPU;
+
+		if(array_index >= 0 && array_index < TEX_NUM_BYTE_CPU) {
+			tex = &kg->texture_byte_images[array_index];
+		}
+
+		if(tex) {
+			tex->data = (uchar*)mem;
+			tex->dimensions_set(width, height, depth);
+			tex->interpolation = interpolation;
+			tex->extension = extension;
+		}
+	}
 	else
 		assert(0);
 }
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __KERNEL_CPU_IMAGE_H__
+#define __KERNEL_CPU_IMAGE_H__
+
+#ifdef __KERNEL_CPU__
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device float4 kernel_tex_image_interp_impl(KernelGlobals *kg, int tex, float x, float y)
+{
+	if(tex >= TEX_START_BYTE_CPU)
+		return kg->texture_byte_images[tex - TEX_START_BYTE_CPU].interp(x, y);
+	else if(tex >= TEX_START_FLOAT_CPU)
+		return kg->texture_float_images[tex - TEX_START_FLOAT_CPU].interp(x, y);
+	else if(tex >= TEX_START_BYTE4_CPU)
+		return kg->texture_byte4_images[tex - TEX_START_BYTE4_CPU].interp(x, y);
+	else
+		return kg->texture_float4_images[tex].interp(x, y);
+}
+
+ccl_device float4 kernel_tex_image_interp_3d_impl(KernelGlobals *kg, int tex, float x, float y, float z)
+{
+	if(tex >= TEX_START_BYTE_CPU)
+		return kg->texture_byte_images[tex - TEX_START_BYTE_CPU].interp_3d(x, y, z);
+	else if(tex >= TEX_START_FLOAT_CPU)
+		return kg->texture_float_images[tex - TEX_START_FLOAT_CPU].interp_3d(x, y, z);
+	else if(tex >= TEX_START_BYTE4_CPU)
+		return kg->texture_byte4_images[tex - TEX_START_BYTE4_CPU].interp_3d(x, y, z);
+	else
+		return kg->texture_float4_images[tex].interp_3d(x, y, z);
+
+}
+
+ccl_device float4 kernel_tex_image_interp_3d_ex_impl(KernelGlobals *kg, int tex, float x, float y, float z, int interpolation)
+{
+	if(tex >= TEX_START_BYTE_CPU)
+		return kg->texture_byte_images[tex - TEX_START_BYTE_CPU].interp_3d_ex(x, y, z, interpolation);
+	else if(tex >= TEX_START_FLOAT_CPU)
+		return kg->texture_float_images[tex - TEX_START_FLOAT_CPU].interp_3d_ex(x, y, z, interpolation);
+	else if(tex >= TEX_START_BYTE4_CPU)
+		return kg->texture_byte4_images[tex - TEX_START_BYTE4_CPU].interp_3d_ex(x, y, z, interpolation);
+	else
+		return kg->texture_float4_images[tex].interp_3d_ex(x, y, z, interpolation);
+}
+
+CCL_NAMESPACE_END
+
+#endif  // __KERNEL_CPU__
+
+
+#endif // __KERNEL_CPU_IMAGE_H__
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -24,6 +24,7 @@
 #include "kernel_math.h"
 #include "kernel_types.h"
 #include "kernel_globals.h"
+#include "kernel_cpu_image.h"
 #include "kernel_film.h"
 #include "kernel_path.h"
 #include "kernel_path_branched.h"
--- a/intern/cycles/kernel/osl/CMakeLists.txt
+++ b/intern/cycles/kernel/osl/CMakeLists.txt
@@ -3,6 +3,7 @@ set(INC
 	.
 	..
 	../svm
+	../../graph
 	../../render
 	../../util
 	../../device
--- a/intern/cycles/kernel/osl/osl_services.cpp
+++ b/intern/cycles/kernel/osl/osl_services.cpp
@@ -45,7 +45,7 @@
 #include "kernel_differential.h"
 #include "kernel_montecarlo.h"
 #include "kernel_camera.h"
-
+#include "kernels/cpu/kernel_cpu_image.h"
 #include "geom/geom.h"

 #include "kernel_projection.h"
--- a/intern/cycles/kernel/shaders/node_brick_texture.osl
+++ b/intern/cycles/kernel/shaders/node_brick_texture.osl
@@ -59,10 +59,10 @@ float brick(point p, float mortar_size, float bias,
 shader node_brick_texture(
 	int use_mapping = 0,
 	matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
-	float Offset = 0.5,
-	int OffsetFrequency = 2,
-	float Squash = 1.0,
-	int SquashFrequency = 1,
+	float offset = 0.5,
+	int offset_frequency = 2,
+	float squash = 1.0,
+	int squash_frequency = 1,
 	point Vector = P,
 	color Color1 = 0.2,
 	color Color2 = 0.8,
@@ -84,7 +84,7 @@ shader node_brick_texture(
 	color Col = Color1;
 	
 	Fac = brick(p * Scale, MortarSize, Bias, BrickWidth, RowHeight,
-		Offset, OffsetFrequency, Squash, SquashFrequency, tint);
+		offset, offset_frequency, squash, squash_frequency, tint);
 		
 	if (Fac != 1.0) {
 		float facm = 1.0 - tint;
--- a/intern/cycles/kernel/shaders/node_convert_from_color.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_color.osl
@@ -17,18 +17,18 @@
 #include "stdosl.h"

 shader node_convert_from_color(
-	color Color = 0.0,
-	output string String = "",
-	output float Val = 0.0,
-	output int ValInt = 0,
-	output vector Vector = vector(0.0, 0.0, 0.0),
-	output point Point = point(0.0, 0.0, 0.0),
-	output normal Normal = normal(0.0, 0.0, 0.0))
+	color value_color = 0.0,
+	output string value_string = "",
+	output float value_float = 0.0,
+	output int value_int = 0,
+	output vector value_vector = vector(0.0, 0.0, 0.0),
+	output point value_point = point(0.0, 0.0, 0.0),
+	output normal value_normal = normal(0.0, 0.0, 0.0))
 {
-	Val = Color[0] * 0.2126 + Color[1] * 0.7152 + Color[2] * 0.0722;
-	ValInt = (int)(Color[0] * 0.2126 + Color[1] * 0.7152 + Color[2] * 0.0722);
-	Vector = vector(Color[0], Color[1], Color[2]);
-	Point = point(Color[0], Color[1], Color[2]);
-	Normal = normal(Color[0], Color[1], Color[2]);
+	value_float = value_color[0] * 0.2126 + value_color[1] * 0.7152 + value_color[2] * 0.0722;
+	value_int = (int)(value_color[0] * 0.2126 + value_color[1] * 0.7152 + value_color[2] * 0.0722);
+	value_vector = vector(value_color[0], value_color[1], value_color[2]);
+	value_point = point(value_color[0], value_color[1], value_color[2]);
+	value_normal = normal(value_color[0], value_color[1], value_color[2]);
 }

--- a/intern/cycles/kernel/shaders/node_convert_from_float.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_float.osl
@@ -17,18 +17,18 @@
 #include "stdosl.h"

 shader node_convert_from_float(
-	float Val = 0.0,
-	output string String = "",
-	output int ValInt = 0,
-	output color Color = 0.0,
-	output vector Vector = vector(0.0, 0.0, 0.0),
-	output point Point = point(0.0, 0.0, 0.0),
-	output normal Normal = normal(0.0, 0.0, 0.0))
+	float value_float = 0.0,
+	output string value_string = "",
+	output int value_int = 0,
+	output color value_color = 0.0,
+	output vector value_vector = vector(0.0, 0.0, 0.0),
+	output point value_point = point(0.0, 0.0, 0.0),
+	output normal value_normal = normal(0.0, 0.0, 0.0))
 {
-	ValInt = (int)Val;
-	Color = color(Val, Val, Val);
-	Vector = vector(Val, Val, Val);
-	Point = point(Val, Val, Val);
-	Normal = normal(Val, Val, Val);
+	value_int = (int)value_float;
+	value_color = color(value_float, value_float, value_float);
+	value_vector = vector(value_float, value_float, value_float);
+	value_point = point(value_float, value_float, value_float);
+	value_normal = normal(value_float, value_float, value_float);
 }

--- a/intern/cycles/kernel/shaders/node_convert_from_int.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_int.osl
@@ -17,19 +17,19 @@
 #include "stdosl.h"

 shader node_convert_from_int(
-	int ValInt = 0,
-	output string String = "",
-	output float Val = 0.0,
-	output color Color = 0.0,
-	output vector Vector = vector(0.0, 0.0, 0.0),
-	output point Point = point(0.0, 0.0, 0.0),
-	output normal Normal = normal(0.0, 0.0, 0.0))
+	int value_int = 0,
+	output string value_string = "",
+	output float value_float = 0.0,
+	output color value_color = 0.0,
+	output vector value_vector = vector(0.0, 0.0, 0.0),
+	output point value_point = point(0.0, 0.0, 0.0),
+	output normal value_normal = normal(0.0, 0.0, 0.0))
 {
-	float f = (float)ValInt;
-	Val = f;
-	Color = color(f, f, f);
-	Vector = vector(f, f, f);
-	Point = point(f, f, f);
-	Normal = normal(f, f, f);
+	float f = (float)value_int;
+	value_float = f;
+	value_color = color(f, f, f);
+	value_vector = vector(f, f, f);
+	value_point = point(f, f, f);
+	value_normal = normal(f, f, f);
 }

--- a/intern/cycles/kernel/shaders/node_convert_from_normal.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_normal.osl
@@ -17,18 +17,18 @@
 #include "stdosl.h"

 shader node_convert_from_normal(
-	normal Normal = normal(0.0, 0.0, 0.0),
-	output string String = "",
-	output float Val = 0.0,
-	output int ValInt = 0,
-	output vector Vector = vector(0.0, 0.0, 0.0),
-	output color Color = 0.0,
-	output point Point = point(0.0, 0.0, 0.0))
+	normal value_normal = normal(0.0, 0.0, 0.0),
+	output string value_string = "",
+	output float value_float = 0.0,
+	output int value_int = 0,
+	output vector value_vector = vector(0.0, 0.0, 0.0),
+	output color value_color = 0.0,
+	output point value_point = point(0.0, 0.0, 0.0))
 {
-	Val = (Normal[0] + Normal[1] + Normal[2]) * (1.0 / 3.0);
-	ValInt = (int)((Normal[0] + Normal[1] + Normal[2]) * (1.0 / 3.0));
-	Vector = vector(Normal[0], Normal[1], Normal[2]);
-	Color = color(Normal[0], Normal[1], Normal[2]);
-	Point = point(Normal[0], Normal[1], Normal[2]);
+	value_float = (value_normal[0] + value_normal[1] + value_normal[2]) * (1.0 / 3.0);
+	value_int = (int)((value_normal[0] + value_normal[1] + value_normal[2]) * (1.0 / 3.0));
+	value_vector = vector(value_normal[0], value_normal[1], value_normal[2]);
+	value_color = color(value_normal[0], value_normal[1], value_normal[2]);
+	value_point = point(value_normal[0], value_normal[1], value_normal[2]);
 }

--- a/intern/cycles/kernel/shaders/node_convert_from_point.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_point.osl
@@ -17,18 +17,18 @@
 #include "stdosl.h"

 shader node_convert_from_point(
-	point Point = point(0.0, 0.0, 0.0),
-	output string String = "",
-	output float Val = 0.0,
-	output int ValInt = 0,
-	output vector Vector = vector(0.0, 0.0, 0.0),
-	output color Color = 0.0,
-	output normal Normal = normal(0.0, 0.0, 0.0))
+	point value_point = point(0.0, 0.0, 0.0),
+	output string value_string = "",
+	output float value_float = 0.0,
+	output int value_int = 0,
+	output vector value_vector = vector(0.0, 0.0, 0.0),
+	output color value_color = 0.0,
+	output normal value_normal = normal(0.0, 0.0, 0.0))
 {
-	Val = (Point[0] + Point[1] + Point[2]) * (1.0 / 3.0);
-	ValInt = (int)((Normal[0] + Normal[1] + Normal[2]) * (1.0 / 3.0));
-	Vector = vector(Point[0], Point[1], Point[2]);
-	Color = color(Point[0], Point[1], Point[2]);
-	Normal = normal(Point[0], Point[1], Point[2]);
+	value_float = (value_point[0] + value_point[1] + value_point[2]) * (1.0 / 3.0);
+	value_int = (int)((value_normal[0] + value_normal[1] + value_normal[2]) * (1.0 / 3.0));
+	value_vector = vector(value_point[0], value_point[1], value_point[2]);
+	value_color = color(value_point[0], value_point[1], value_point[2]);
+	value_normal = normal(value_point[0], value_point[1], value_point[2]);
 }

--- a/intern/cycles/kernel/shaders/node_convert_from_string.osl
+++ b/intern/cycles/kernel/shaders/node_convert_from_string.osl
@@ -17,13 +17,13 @@
 #include "stdosl.h"

 shader node_convert_from_string(
-	string String = "",
-	output color Color = color(0.0, 0.0, 0.0),
-	output float Val = 0.0,
-	output int ValInt = 0,
-	output vector Vector = vector(0.0, 0.0, 0.0),
-	output point Point = point(0.0, 0.0, 0.0),
-	output normal Normal = normal(0.0, 0.0, 0.0))
+	string value_string = "",
+	output color value_color = color(0.0, 0.0, 0.0),
+	output float value_float = 0.0,
+	output int value_int = 0,
+	output vector value_vector = vector(0.0, 0.0, 0.0),
+	output point value_point = point(0.0, 0.0, 0.0),
+	output normal value_normal = normal(0.0, 0.0, 0.0))
 {
 }

--- a/Show More
+++ b/Show More