Compare commits

..

13 Commits

Author SHA1 Message Date
3bdd415254 realize non-standard virtual arrays 2021-09-26 15:07:48 +02:00
46fe816fc6 use threads 2021-09-26 15:07:27 +02:00
2dbbbc7e85 add slice method 2021-09-26 14:52:12 +02:00
34ed0de287 improve 2021-09-26 14:11:42 +02:00
3bcb30b13b cleanup 2021-09-26 13:39:27 +02:00
f55023c82a progress 2021-09-26 13:23:09 +02:00
d00c68fc7b progress 2021-09-25 16:26:56 +02:00
ab36a7de75 progress 2021-09-25 15:59:20 +02:00
42d3210681 progress 2021-09-25 15:35:43 +02:00
318e2dd00f progress 2021-09-25 15:22:07 +02:00
8fdc78678a progress 2021-09-25 15:10:42 +02:00
ad114f806b progress 2021-09-25 15:01:09 +02:00
02d9d13a83 initial commit 2021-09-25 14:37:12 +02:00
778 changed files with 7634 additions and 22784 deletions

View File

@@ -419,8 +419,6 @@ mark_as_advanced(WITH_CYCLES_NATIVE_ONLY)
option(WITH_CYCLES_DEVICE_CUDA "Enable Cycles CUDA compute support" ON)
option(WITH_CYCLES_DEVICE_OPTIX "Enable Cycles OptiX support" ON)
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles HIP support" OFF)
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
mark_as_advanced(WITH_CYCLES_DEVICE_CUDA)
option(WITH_CUDA_DYNLOAD "Dynamically load CUDA libraries at runtime" ON)
@@ -823,11 +821,6 @@ if(NOT WITH_CUDA_DYNLOAD)
endif()
endif()
if(WITH_CYCLES_DEVICE_HIP)
# Currently HIP must be dynamically loaded, this may change in future toolkits
set(WITH_HIP_DYNLOAD ON)
endif()
#-----------------------------------------------------------------------------
# Check check if submodules are cloned
@@ -1857,9 +1850,6 @@ elseif(WITH_CYCLES_STANDALONE)
if(WITH_CUDA_DYNLOAD)
add_subdirectory(extern/cuew)
endif()
if(WITH_HIP_DYNLOAD)
add_subdirectory(extern/hipew)
endif()
if(NOT WITH_SYSTEM_GLEW)
add_subdirectory(extern/glew)
endif()

View File

@@ -24,7 +24,6 @@ import project_source_info
import subprocess
import sys
import os
import tempfile
from typing import (
Any,
@@ -36,6 +35,7 @@ USE_QUIET = (os.environ.get("QUIET", None) is not None)
CHECKER_IGNORE_PREFIX = [
"extern",
"intern/moto",
]
CHECKER_BIN = "cppcheck"
@@ -47,19 +47,13 @@ CHECKER_ARGS = [
"--max-configs=1", # speeds up execution
# "--check-config", # when includes are missing
"--enable=all", # if you want sixty hundred pedantic suggestions
# Quiet output, otherwise all defines/includes are printed (overly verbose).
# Only enable this for troubleshooting (if defines are not set as expected for example).
"--quiet",
# NOTE: `--cppcheck-build-dir=<dir>` is added later as a temporary directory.
]
if USE_QUIET:
CHECKER_ARGS.append("--quiet")
def cppcheck() -> None:
def main() -> None:
source_info = project_source_info.build_info(ignore_prefix_list=CHECKER_IGNORE_PREFIX)
source_defines = project_source_info.build_defines_as_args()
@@ -84,10 +78,7 @@ def cppcheck() -> None:
percent_str = "[" + ("%.2f]" % percent).rjust(7) + " %:"
sys.stdout.flush()
sys.stdout.write("%s %s\n" % (
percent_str,
os.path.relpath(c, project_source_info.SOURCE_DIR)
))
sys.stdout.write("%s " % percent_str)
return subprocess.Popen(cmd)
@@ -99,11 +90,5 @@ def cppcheck() -> None:
print("Finished!")
def main() -> None:
with tempfile.TemporaryDirectory() as temp_dir:
CHECKER_ARGS.append("--cppcheck-build-dir=" + temp_dir)
cppcheck()
if __name__ == "__main__":
main()

View File

@@ -243,9 +243,7 @@ def build_defines_as_args() -> List[str]:
# use this module.
def queue_processes(
process_funcs: Sequence[Tuple[Callable[..., subprocess.Popen[Any]], Tuple[Any, ...]]],
*,
job_total: int =-1,
sleep: float = 0.1,
) -> None:
""" Takes a list of function arg pairs, each function must return a process
"""
@@ -273,20 +271,14 @@ def queue_processes(
if len(processes) <= job_total:
break
time.sleep(sleep)
else:
time.sleep(0.1)
sys.stdout.flush()
sys.stderr.flush()
processes.append(func(*args))
# Don't return until all jobs have finished.
while 1:
processes[:] = [p for p in processes if p.poll() is None]
if not processes:
break
time.sleep(sleep)
def main() -> None:
if not os.path.exists(join(CMAKE_DIR, "CMakeCache.txt")):

View File

@@ -1,40 +0,0 @@
"""
This method enables conversions between Local and Pose space for bones in
the middle of updating the armature without having to update dependencies
after each change, by manually carrying updated matrices in a recursive walk.
"""
def set_pose_matrices(obj, matrix_map):
"Assign pose space matrices of all bones at once, ignoring constraints."
def rec(pbone, parent_matrix):
matrix = matrix_map[pbone.name]
## Instead of:
# pbone.matrix = matrix
# bpy.context.view_layer.update()
# Compute and assign local matrix, using the new parent matrix
if pbone.parent:
pbone.matrix_basis = pbone.bone.convert_local_to_pose(
matrix,
pbone.bone.matrix_local,
parent_matrix=parent_matrix,
parent_matrix_local=pbone.parent.bone.matrix_local,
invert=True
)
else:
pbone.matrix_basis = pbone.bone.convert_local_to_pose(
matrix,
pbone.bone.matrix_local,
invert=True
)
# Recursively process children, passing the new matrix through
for child in pbone.children:
rec(child, matrix)
# Scan all bone trees from their roots
for pbone in obj.pose.bones:
if not pbone.parent:
rec(pbone, None)

View File

@@ -1101,7 +1101,6 @@ context_type_map = {
"scene": ("Scene", False),
"sculpt_object": ("Object", False),
"selectable_objects": ("Object", True),
"selected_asset_files": ("FileSelectEntry", True),
"selected_bones": ("EditBone", True),
"selected_editable_bones": ("EditBone", True),
"selected_editable_fcurves": ("FCurve", True),

View File

@@ -67,12 +67,9 @@ endif()
if(WITH_CYCLES OR WITH_COMPOSITOR OR WITH_OPENSUBDIV)
add_subdirectory(clew)
if((WITH_CYCLES_DEVICE_CUDA OR WITH_CYCLES_DEVICE_OPTIX) AND WITH_CUDA_DYNLOAD)
if(WITH_CUDA_DYNLOAD)
add_subdirectory(cuew)
endif()
if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
add_subdirectory(hipew)
endif()
endif()
if(WITH_GHOST_X11 AND WITH_GHOST_XDND)

View File

@@ -1,39 +0,0 @@
# ***** BEGIN GPL LICENSE BLOCK *****
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
# The Original Code is Copyright (C) 2021, Blender Foundation
# All rights reserved.
# ***** END GPL LICENSE BLOCK *****
set(INC
.
include
)
set(INC_SYS
)
set(SRC
src/hipew.c
include/hipew.h
)
set(LIB
)
blender_add_lib(extern_hipew "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")

File diff suppressed because it is too large Load Diff

View File

@@ -1,533 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*/
#ifdef _MSC_VER
# if _MSC_VER < 1900
# define snprintf _snprintf
# endif
# define popen _popen
# define pclose _pclose
# define _CRT_SECURE_NO_WARNINGS
#endif
#include <hipew.h>
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#ifdef _WIN32
# define WIN32_LEAN_AND_MEAN
# define VC_EXTRALEAN
# include <windows.h>
/* Utility macros. */
typedef HMODULE DynamicLibrary;
# define dynamic_library_open(path) LoadLibraryA(path)
# define dynamic_library_close(lib) FreeLibrary(lib)
# define dynamic_library_find(lib, symbol) GetProcAddress(lib, symbol)
#else
# include <dlfcn.h>
typedef void* DynamicLibrary;
# define dynamic_library_open(path) dlopen(path, RTLD_NOW)
# define dynamic_library_close(lib) dlclose(lib)
# define dynamic_library_find(lib, symbol) dlsym(lib, symbol)
#endif
#define _LIBRARY_FIND_CHECKED(lib, name) \
name = (t##name *)dynamic_library_find(lib, #name); \
assert(name);
#define _LIBRARY_FIND(lib, name) \
name = (t##name *)dynamic_library_find(lib, #name);
#define HIP_LIBRARY_FIND_CHECKED(name) \
_LIBRARY_FIND_CHECKED(hip_lib, name)
#define HIP_LIBRARY_FIND(name) _LIBRARY_FIND(hip_lib, name)
static DynamicLibrary hip_lib;
/* Function definitions. */
thipGetErrorName *hipGetErrorName;
thipInit *hipInit;
thipDriverGetVersion *hipDriverGetVersion;
thipGetDevice *hipGetDevice;
thipGetDeviceCount *hipGetDeviceCount;
thipDeviceGetName *hipDeviceGetName;
thipDeviceGetAttribute *hipDeviceGetAttribute;
thipDeviceComputeCapability *hipDeviceComputeCapability;
thipDevicePrimaryCtxRetain *hipDevicePrimaryCtxRetain;
thipDevicePrimaryCtxRelease *hipDevicePrimaryCtxRelease;
thipDevicePrimaryCtxSetFlags *hipDevicePrimaryCtxSetFlags;
thipDevicePrimaryCtxGetState *hipDevicePrimaryCtxGetState;
thipDevicePrimaryCtxReset *hipDevicePrimaryCtxReset;
thipCtxCreate *hipCtxCreate;
thipCtxDestroy *hipCtxDestroy;
thipCtxPushCurrent *hipCtxPushCurrent;
thipCtxPopCurrent *hipCtxPopCurrent;
thipCtxSetCurrent *hipCtxSetCurrent;
thipCtxGetCurrent *hipCtxGetCurrent;
thipCtxGetDevice *hipCtxGetDevice;
thipCtxGetFlags *hipCtxGetFlags;
thipCtxSynchronize *hipCtxSynchronize;
thipDeviceSynchronize *hipDeviceSynchronize;
thipCtxGetCacheConfig *hipCtxGetCacheConfig;
thipCtxSetCacheConfig *hipCtxSetCacheConfig;
thipCtxGetSharedMemConfig *hipCtxGetSharedMemConfig;
thipCtxSetSharedMemConfig *hipCtxSetSharedMemConfig;
thipCtxGetApiVersion *hipCtxGetApiVersion;
thipModuleLoad *hipModuleLoad;
thipModuleLoadData *hipModuleLoadData;
thipModuleLoadDataEx *hipModuleLoadDataEx;
thipModuleUnload *hipModuleUnload;
thipModuleGetFunction *hipModuleGetFunction;
thipModuleGetGlobal *hipModuleGetGlobal;
thipModuleGetTexRef *hipModuleGetTexRef;
thipMemGetInfo *hipMemGetInfo;
thipMalloc *hipMalloc;
thipMemAllocPitch *hipMemAllocPitch;
thipFree *hipFree;
thipMemGetAddressRange *hipMemGetAddressRange;
thipHostMalloc *hipHostMalloc;
thipHostFree *hipHostFree;
thipHostGetDevicePointer *hipHostGetDevicePointer;
thipHostGetFlags *hipHostGetFlags;
thipMallocManaged *hipMallocManaged;
thipDeviceGetByPCIBusId *hipDeviceGetByPCIBusId;
thipDeviceGetPCIBusId *hipDeviceGetPCIBusId;
thipMemcpyPeer *hipMemcpyPeer;
thipMemcpyHtoD *hipMemcpyHtoD;
thipMemcpyDtoH *hipMemcpyDtoH;
thipMemcpyDtoD *hipMemcpyDtoD;
thipDrvMemcpy2DUnaligned *hipDrvMemcpy2DUnaligned;
thipMemcpyParam2D *hipMemcpyParam2D;
thipDrvMemcpy3D *hipDrvMemcpy3D;
thipMemcpyHtoDAsync *hipMemcpyHtoDAsync;
thipMemcpyDtoHAsync *hipMemcpyDtoHAsync;
thipMemcpyParam2DAsync *hipMemcpyParam2DAsync;
thipDrvMemcpy3DAsync *hipDrvMemcpy3DAsync;
thipMemsetD8 *hipMemsetD8;
thipMemsetD16 *hipMemsetD16;
thipMemsetD32 *hipMemsetD32;
thipMemsetD8Async *hipMemsetD8Async;
thipMemsetD16Async *hipMemsetD16Async;
thipMemsetD32Async *hipMemsetD32Async;
thipArrayCreate *hipArrayCreate;
thipArrayDestroy *hipArrayDestroy;
thipArray3DCreate *hipArray3DCreate;
thipStreamCreateWithFlags *hipStreamCreateWithFlags;
thipStreamCreateWithPriority *hipStreamCreateWithPriority;
thipStreamGetPriority *hipStreamGetPriority;
thipStreamGetFlags *hipStreamGetFlags;
thipStreamWaitEvent *hipStreamWaitEvent;
thipStreamAddCallback *hipStreamAddCallback;
thipStreamQuery *hipStreamQuery;
thipStreamSynchronize *hipStreamSynchronize;
thipStreamDestroy *hipStreamDestroy;
thipEventCreateWithFlags *hipEventCreateWithFlags;
thipEventRecord *hipEventRecord;
thipEventQuery *hipEventQuery;
thipEventSynchronize *hipEventSynchronize;
thipEventDestroy *hipEventDestroy;
thipEventElapsedTime *hipEventElapsedTime;
thipFuncGetAttribute *hipFuncGetAttribute;
thipFuncSetCacheConfig *hipFuncSetCacheConfig;
thipModuleLaunchKernel *hipModuleLaunchKernel;
thipDrvOccupancyMaxActiveBlocksPerMultiprocessor *hipDrvOccupancyMaxActiveBlocksPerMultiprocessor;
thipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags *hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
thipModuleOccupancyMaxPotentialBlockSize *hipModuleOccupancyMaxPotentialBlockSize;
thipTexRefSetArray *hipTexRefSetArray;
thipTexRefSetAddress *hipTexRefSetAddress;
thipTexRefSetAddress2D *hipTexRefSetAddress2D;
thipTexRefSetFormat *hipTexRefSetFormat;
thipTexRefSetAddressMode *hipTexRefSetAddressMode;
thipTexRefSetFilterMode *hipTexRefSetFilterMode;
thipTexRefSetFlags *hipTexRefSetFlags;
thipTexRefGetAddress *hipTexRefGetAddress;
thipTexRefGetArray *hipTexRefGetArray;
thipTexRefGetAddressMode *hipTexRefGetAddressMode;
thipTexObjectCreate *hipTexObjectCreate;
thipTexObjectDestroy *hipTexObjectDestroy;
thipDeviceCanAccessPeer *hipDeviceCanAccessPeer;
thipCtxEnablePeerAccess *hipCtxEnablePeerAccess;
thipCtxDisablePeerAccess *hipCtxDisablePeerAccess;
thipDeviceGetP2PAttribute *hipDeviceGetP2PAttribute;
thipGraphicsUnregisterResource *hipGraphicsUnregisterResource;
thipGraphicsMapResources *hipGraphicsMapResources;
thipGraphicsUnmapResources *hipGraphicsUnmapResources;
thipGraphicsResourceGetMappedPointer *hipGraphicsResourceGetMappedPointer;
thipGraphicsGLRegisterBuffer *hipGraphicsGLRegisterBuffer;
thipGLGetDevices *hipGLGetDevices;
static DynamicLibrary dynamic_library_open_find(const char **paths) {
int i = 0;
while (paths[i] != NULL) {
DynamicLibrary lib = dynamic_library_open(paths[i]);
if (lib != NULL) {
return lib;
}
++i;
}
return NULL;
}
/* Implementation function. */
static void hipewHipExit(void) {
if (hip_lib != NULL) {
/* Ignore errors. */
dynamic_library_close(hip_lib);
hip_lib = NULL;
}
}
static int hipewHipInit(void) {
/* Library paths. */
#ifdef _WIN32
/* Expected in c:/windows/system or similar, no path needed. */
const char *hip_paths[] = {"amdhip64.dll", NULL};
#elif defined(__APPLE__)
/* Default installation path. */
const char *hip_paths[] = {"", NULL};
#else
const char *hip_paths[] = {"/opt/rocm/hip/lib/libamdhip64.so", NULL};
#endif
static int initialized = 0;
static int result = 0;
int error, driver_version;
if (initialized) {
return result;
}
initialized = 1;
error = atexit(hipewHipExit);
if (error) {
result = HIPEW_ERROR_ATEXIT_FAILED;
return result;
}
/* Load library. */
hip_lib = dynamic_library_open_find(hip_paths);
if (hip_lib == NULL) {
result = HIPEW_ERROR_OPEN_FAILED;
return result;
}
/* Fetch all function pointers. */
HIP_LIBRARY_FIND_CHECKED(hipGetErrorName);
HIP_LIBRARY_FIND_CHECKED(hipInit);
HIP_LIBRARY_FIND_CHECKED(hipDriverGetVersion);
HIP_LIBRARY_FIND_CHECKED(hipGetDevice);
HIP_LIBRARY_FIND_CHECKED(hipGetDeviceCount);
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetName);
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetAttribute);
HIP_LIBRARY_FIND_CHECKED(hipDeviceComputeCapability);
HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxRetain);
HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxRelease);
HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxSetFlags);
HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxGetState);
HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxReset);
HIP_LIBRARY_FIND_CHECKED(hipCtxCreate);
HIP_LIBRARY_FIND_CHECKED(hipCtxDestroy);
HIP_LIBRARY_FIND_CHECKED(hipCtxPushCurrent);
HIP_LIBRARY_FIND_CHECKED(hipCtxPopCurrent);
HIP_LIBRARY_FIND_CHECKED(hipCtxSetCurrent);
HIP_LIBRARY_FIND_CHECKED(hipCtxGetCurrent);
HIP_LIBRARY_FIND_CHECKED(hipCtxGetDevice);
HIP_LIBRARY_FIND_CHECKED(hipCtxGetFlags);
HIP_LIBRARY_FIND_CHECKED(hipCtxSynchronize);
HIP_LIBRARY_FIND_CHECKED(hipDeviceSynchronize);
HIP_LIBRARY_FIND_CHECKED(hipCtxGetCacheConfig);
HIP_LIBRARY_FIND_CHECKED(hipCtxSetCacheConfig);
HIP_LIBRARY_FIND_CHECKED(hipCtxGetSharedMemConfig);
HIP_LIBRARY_FIND_CHECKED(hipCtxSetSharedMemConfig);
HIP_LIBRARY_FIND_CHECKED(hipCtxGetApiVersion);
HIP_LIBRARY_FIND_CHECKED(hipModuleLoad);
HIP_LIBRARY_FIND_CHECKED(hipModuleLoadData);
HIP_LIBRARY_FIND_CHECKED(hipModuleLoadDataEx);
HIP_LIBRARY_FIND_CHECKED(hipModuleUnload);
HIP_LIBRARY_FIND_CHECKED(hipModuleGetFunction);
HIP_LIBRARY_FIND_CHECKED(hipModuleGetGlobal);
HIP_LIBRARY_FIND_CHECKED(hipModuleGetTexRef);
HIP_LIBRARY_FIND_CHECKED(hipMemGetInfo);
HIP_LIBRARY_FIND_CHECKED(hipMalloc);
HIP_LIBRARY_FIND_CHECKED(hipMemAllocPitch);
HIP_LIBRARY_FIND_CHECKED(hipFree);
HIP_LIBRARY_FIND_CHECKED(hipMemGetAddressRange);
HIP_LIBRARY_FIND_CHECKED(hipHostMalloc);
HIP_LIBRARY_FIND_CHECKED(hipHostFree);
HIP_LIBRARY_FIND_CHECKED(hipHostGetDevicePointer);
HIP_LIBRARY_FIND_CHECKED(hipHostGetFlags);
HIP_LIBRARY_FIND_CHECKED(hipMallocManaged);
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetByPCIBusId);
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetPCIBusId);
HIP_LIBRARY_FIND_CHECKED(hipMemcpyPeer);
HIP_LIBRARY_FIND_CHECKED(hipMemcpyHtoD);
HIP_LIBRARY_FIND_CHECKED(hipMemcpyDtoH);
HIP_LIBRARY_FIND_CHECKED(hipMemcpyDtoD);
HIP_LIBRARY_FIND_CHECKED(hipMemcpyParam2D);
HIP_LIBRARY_FIND_CHECKED(hipDrvMemcpy3D);
HIP_LIBRARY_FIND_CHECKED(hipMemcpyHtoDAsync);
HIP_LIBRARY_FIND_CHECKED(hipMemcpyDtoHAsync);
HIP_LIBRARY_FIND_CHECKED(hipDrvMemcpy2DUnaligned);
HIP_LIBRARY_FIND_CHECKED(hipMemcpyParam2DAsync);
HIP_LIBRARY_FIND_CHECKED(hipDrvMemcpy3DAsync);
HIP_LIBRARY_FIND_CHECKED(hipMemsetD8);
HIP_LIBRARY_FIND_CHECKED(hipMemsetD16);
HIP_LIBRARY_FIND_CHECKED(hipMemsetD32);
HIP_LIBRARY_FIND_CHECKED(hipMemsetD8Async);
HIP_LIBRARY_FIND_CHECKED(hipMemsetD16Async);
HIP_LIBRARY_FIND_CHECKED(hipMemsetD32Async);
HIP_LIBRARY_FIND_CHECKED(hipArrayCreate);
HIP_LIBRARY_FIND_CHECKED(hipArrayDestroy);
HIP_LIBRARY_FIND_CHECKED(hipArray3DCreate);
HIP_LIBRARY_FIND_CHECKED(hipStreamCreateWithFlags);
HIP_LIBRARY_FIND_CHECKED(hipStreamCreateWithPriority);
HIP_LIBRARY_FIND_CHECKED(hipStreamGetPriority);
HIP_LIBRARY_FIND_CHECKED(hipStreamGetFlags);
HIP_LIBRARY_FIND_CHECKED(hipStreamWaitEvent);
HIP_LIBRARY_FIND_CHECKED(hipStreamAddCallback);
HIP_LIBRARY_FIND_CHECKED(hipStreamQuery);
HIP_LIBRARY_FIND_CHECKED(hipStreamSynchronize);
HIP_LIBRARY_FIND_CHECKED(hipStreamDestroy);
HIP_LIBRARY_FIND_CHECKED(hipEventCreateWithFlags);
HIP_LIBRARY_FIND_CHECKED(hipEventRecord);
HIP_LIBRARY_FIND_CHECKED(hipEventQuery);
HIP_LIBRARY_FIND_CHECKED(hipEventSynchronize);
HIP_LIBRARY_FIND_CHECKED(hipEventDestroy);
HIP_LIBRARY_FIND_CHECKED(hipEventElapsedTime);
HIP_LIBRARY_FIND_CHECKED(hipFuncGetAttribute);
HIP_LIBRARY_FIND_CHECKED(hipFuncSetCacheConfig);
HIP_LIBRARY_FIND_CHECKED(hipModuleLaunchKernel);
HIP_LIBRARY_FIND_CHECKED(hipModuleOccupancyMaxPotentialBlockSize);
HIP_LIBRARY_FIND_CHECKED(hipTexRefSetArray);
HIP_LIBRARY_FIND_CHECKED(hipTexRefSetAddress);
HIP_LIBRARY_FIND_CHECKED(hipTexRefSetAddress2D);
HIP_LIBRARY_FIND_CHECKED(hipTexRefSetFormat);
HIP_LIBRARY_FIND_CHECKED(hipTexRefSetAddressMode);
HIP_LIBRARY_FIND_CHECKED(hipTexRefSetFilterMode);
HIP_LIBRARY_FIND_CHECKED(hipTexRefSetFlags);
HIP_LIBRARY_FIND_CHECKED(hipTexRefGetAddress);
HIP_LIBRARY_FIND_CHECKED(hipTexRefGetAddressMode);
HIP_LIBRARY_FIND_CHECKED(hipTexObjectCreate);
HIP_LIBRARY_FIND_CHECKED(hipTexObjectDestroy);
HIP_LIBRARY_FIND_CHECKED(hipDeviceCanAccessPeer);
HIP_LIBRARY_FIND_CHECKED(hipCtxEnablePeerAccess);
HIP_LIBRARY_FIND_CHECKED(hipCtxDisablePeerAccess);
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetP2PAttribute);
#ifdef _WIN32
HIP_LIBRARY_FIND_CHECKED(hipGraphicsUnregisterResource);
HIP_LIBRARY_FIND_CHECKED(hipGraphicsMapResources);
HIP_LIBRARY_FIND_CHECKED(hipGraphicsUnmapResources);
HIP_LIBRARY_FIND_CHECKED(hipGraphicsResourceGetMappedPointer);
HIP_LIBRARY_FIND_CHECKED(hipGraphicsGLRegisterBuffer);
HIP_LIBRARY_FIND_CHECKED(hipGLGetDevices);
#endif
result = HIPEW_SUCCESS;
return result;
}
int hipewInit(hipuint32_t flags) {
int result = HIPEW_SUCCESS;
if (flags & HIPEW_INIT_HIP) {
result = hipewHipInit();
if (result != HIPEW_SUCCESS) {
return result;
}
}
return result;
}
const char *hipewErrorString(hipError_t result) {
switch (result) {
case hipSuccess: return "No errors";
case hipErrorInvalidValue: return "Invalid value";
case hipErrorOutOfMemory: return "Out of memory";
case hipErrorNotInitialized: return "Driver not initialized";
case hipErrorDeinitialized: return "Driver deinitialized";
case hipErrorProfilerDisabled: return "Profiler disabled";
case hipErrorProfilerNotInitialized: return "Profiler not initialized";
case hipErrorProfilerAlreadyStarted: return "Profiler already started";
case hipErrorProfilerAlreadyStopped: return "Profiler already stopped";
case hipErrorNoDevice: return "No HIP-capable device available";
case hipErrorInvalidDevice: return "Invalid device";
case hipErrorInvalidImage: return "Invalid kernel image";
case hipErrorInvalidContext: return "Invalid context";
case hipErrorContextAlreadyCurrent: return "Context already current";
case hipErrorMapFailed: return "Map failed";
case hipErrorUnmapFailed: return "Unmap failed";
case hipErrorArrayIsMapped: return "Array is mapped";
case hipErrorAlreadyMapped: return "Already mapped";
case hipErrorNoBinaryForGpu: return "No binary for GPU";
case hipErrorAlreadyAcquired: return "Already acquired";
case hipErrorNotMapped: return "Not mapped";
case hipErrorNotMappedAsArray: return "Mapped resource not available for access as an array";
case hipErrorNotMappedAsPointer: return "Mapped resource not available for access as a pointer";
case hipErrorECCNotCorrectable: return "Uncorrectable ECC error detected";
case hipErrorUnsupportedLimit: return "hipLimit_t not supported by device";
case hipErrorContextAlreadyInUse: return "Context already in use";
case hipErrorPeerAccessUnsupported: return "Peer access unsupported";
case hipErrorInvalidKernelFile: return "Invalid ptx";
case hipErrorInvalidGraphicsContext: return "Invalid graphics context";
case hipErrorInvalidSource: return "Invalid source";
case hipErrorFileNotFound: return "File not found";
case hipErrorSharedObjectSymbolNotFound: return "Link to a shared object failed to resolve";
case hipErrorSharedObjectInitFailed: return "Shared object initialization failed";
case hipErrorOperatingSystem: return "Operating system";
case hipErrorInvalidHandle: return "Invalid handle";
case hipErrorNotFound: return "Not found";
case hipErrorNotReady: return "HIP not ready";
case hipErrorIllegalAddress: return "Illegal address";
case hipErrorLaunchOutOfResources: return "Launch exceeded resources";
case hipErrorLaunchTimeOut: return "Launch exceeded timeout";
case hipErrorPeerAccessAlreadyEnabled: return "Peer access already enabled";
case hipErrorPeerAccessNotEnabled: return "Peer access not enabled";
case hipErrorSetOnActiveProcess: return "Primary context active";
case hipErrorAssert: return "Assert";
case hipErrorHostMemoryAlreadyRegistered: return "Host memory already registered";
case hipErrorHostMemoryNotRegistered: return "Host memory not registered";
case hipErrorLaunchFailure: return "Launch failed";
case hipErrorCooperativeLaunchTooLarge: return "Cooperative launch too large";
case hipErrorNotSupported: return "Not supported";
case hipErrorUnknown: return "Unknown error";
default: return "Unknown HIP error value";
}
}
static void path_join(const char *path1,
const char *path2,
int maxlen,
char *result) {
#if defined(WIN32) || defined(_WIN32)
const char separator = '\\';
#else
const char separator = '/';
#endif
int n = snprintf(result, maxlen, "%s%c%s", path1, separator, path2);
if (n != -1 && n < maxlen) {
result[n] = '\0';
}
else {
result[maxlen - 1] = '\0';
}
}
static int path_exists(const char *path) {
struct stat st;
if (stat(path, &st)) {
return 0;
}
return 1;
}
const char *hipewCompilerPath(void) {
#ifdef _WIN32
const char *hipPath = getenv("HIP_ROCCLR_HOME");
const char *windowsCommand = "perl ";
const char *executable = "bin/hipcc";
static char hipcc[65536];
static char finalCommand[65536];
if(hipPath) {
path_join(hipPath, executable, sizeof(hipcc), hipcc);
if(path_exists(hipcc)) {
snprintf(finalCommand, sizeof(hipcc), "%s %s", windowsCommand, hipcc);
return finalCommand;
} else {
printf("Could not find hipcc. Make sure HIP_ROCCLR_HOME points to the directory holding /bin/hipcc");
}
}
#else
const char *hipPath = "opt/rocm/hip/bin";
const char *executable = "hipcc";
static char hipcc[65536];
if(hipPath) {
path_join(hipPath, executable, sizeof(hipcc), hipcc);
if(path_exists(hipcc)){
return hipcc;
}
}
#endif
{
#ifdef _WIN32
FILE *handle = popen("where hipcc", "r");
#else
FILE *handle = popen("which hipcc", "r");
#endif
if (handle) {
char buffer[4096] = {0};
int len = fread(buffer, 1, sizeof(buffer) - 1, handle);
buffer[len] = '\0';
pclose(handle);
if (buffer[0]) {
return "hipcc";
}
}
}
return NULL;
}
int hipewCompilerVersion(void) {
const char *path = hipewCompilerPath();
const char *marker = "Hip compilation tools, release ";
FILE *pipe;
int major, minor;
char *versionstr;
char buf[128];
char output[65536] = "\0";
char command[65536] = "\0";
if (path == NULL) {
return 0;
}
/* get --version output */
strcat(command, "\"");
strncat(command, path, sizeof(command) - 1);
strncat(command, "\" --version", sizeof(command) - strlen(path) - 1);
pipe = popen(command, "r");
if (!pipe) {
fprintf(stderr, "HIP: failed to run compiler to retrieve version");
return 0;
}
while (!feof(pipe)) {
if (fgets(buf, sizeof(buf), pipe) != NULL) {
strncat(output, buf, sizeof(output) - strlen(output) - 1);
}
}
pclose(pipe);
return 40;
}

View File

@@ -297,7 +297,6 @@ endif()
if(WITH_CYCLES_STANDALONE)
set(WITH_CYCLES_DEVICE_CUDA TRUE)
set(WITH_CYCLES_DEVICE_HIP TRUE)
endif()
# TODO(sergey): Consider removing it, only causes confusion in interface.
set(WITH_CYCLES_DEVICE_MULTI TRUE)

View File

@@ -64,8 +64,6 @@ if(WITH_CYCLES_STANDALONE)
cycles_standalone.cpp
cycles_xml.cpp
cycles_xml.h
oiio_output_driver.cpp
oiio_output_driver.h
)
add_executable(cycles ${SRC} ${INC} ${INC_SYS})
unset(SRC)
@@ -75,7 +73,7 @@ if(WITH_CYCLES_STANDALONE)
if(APPLE)
if(WITH_OPENCOLORIO)
set_property(TARGET cycles APPEND_STRING PROPERTY LINK_FLAGS " -framework IOKit -framework Carbon")
set_property(TARGET cycles APPEND_STRING PROPERTY LINK_FLAGS " -framework IOKit")
endif()
if(WITH_OPENIMAGEDENOISE AND "${CMAKE_OSX_ARCHITECTURES}" STREQUAL "arm64")
# OpenImageDenoise uses BNNS from the Accelerate framework.

View File

@@ -36,9 +36,6 @@
#include "util/util_unique_ptr.h"
#include "util/util_version.h"
#include "app/cycles_xml.h"
#include "app/oiio_output_driver.h"
#ifdef WITH_CYCLES_STANDALONE_GUI
# include "util/util_view.h"
#endif
@@ -57,7 +54,6 @@ struct Options {
bool quiet;
bool show_help, interactive, pause;
string output_filepath;
string output_pass;
} options;
static void session_print(const string &str)
@@ -93,6 +89,30 @@ static void session_print_status()
session_print(status);
}
static bool write_render(const uchar *pixels, int w, int h, int channels)
{
string msg = string_printf("Writing image %s", options.output_path.c_str());
session_print(msg);
unique_ptr<ImageOutput> out = unique_ptr<ImageOutput>(ImageOutput::create(options.output_path));
if (!out) {
return false;
}
ImageSpec spec(w, h, channels, TypeDesc::UINT8);
if (!out->open(options.output_path, spec)) {
return false;
}
/* conversion for different top/bottom convention */
out->write_image(
TypeDesc::UINT8, pixels + (h - 1) * w * channels, AutoStride, -w * channels, AutoStride);
out->close();
return true;
}
static BufferParams &session_buffer_params()
{
static BufferParams buffer_params;
@@ -127,14 +147,9 @@ static void scene_init()
static void session_init()
{
options.output_pass = "combined";
options.session_params.write_render_cb = write_render;
options.session = new Session(options.session_params, options.scene_params);
if (!options.output_filepath.empty()) {
options.session->set_output_driver(make_unique<OIIOOutputDriver>(
options.output_filepath, options.output_pass, session_print));
}
if (options.session_params.background && !options.quiet)
options.session->progress.set_update_callback(function_bind(&session_print_status));
#ifdef WITH_CYCLES_STANDALONE_GUI
@@ -145,11 +160,6 @@ static void session_init()
/* load scene */
scene_init();
/* add pass for output. */
Pass *pass = options.scene->create_node<Pass>();
pass->set_name(ustring(options.output_pass.c_str()));
pass->set_type(PASS_COMBINED);
options.session->reset(options.session_params, session_buffer_params());
options.session->start();
}

View File

@@ -333,7 +333,6 @@ static void xml_read_shader_graph(XMLReadState &state, Shader *shader, xml_node
}
snode = (ShaderNode *)node_type->create(node_type);
snode->set_owner(graph);
}
xml_read_node(graph_reader, snode, node);

View File

@@ -1,71 +0,0 @@
/*
* Copyright 2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "app/oiio_output_driver.h"
CCL_NAMESPACE_BEGIN
OIIOOutputDriver::OIIOOutputDriver(const string_view filepath,
const string_view pass,
LogFunction log)
: filepath_(filepath), pass_(pass), log_(log)
{
}
OIIOOutputDriver::~OIIOOutputDriver()
{
}
void OIIOOutputDriver::write_render_tile(const Tile &tile)
{
/* Only write the full buffer, no intermediate tiles. */
if (!(tile.size == tile.full_size)) {
return;
}
log_(string_printf("Writing image %s", filepath_.c_str()));
unique_ptr<ImageOutput> image_output(ImageOutput::create(filepath_));
if (image_output == nullptr) {
log_("Failed to create image file");
return;
}
const int width = tile.size.x;
const int height = tile.size.y;
ImageSpec spec(width, height, 4, TypeDesc::FLOAT);
if (!image_output->open(filepath_, spec)) {
log_("Failed to create image file");
return;
}
vector<float> pixels(width * height * 4);
if (!tile.get_pass_pixels(pass_, 4, pixels.data())) {
log_("Failed to read render pass pixels");
return;
}
/* Manipulate offset and stride to convert from bottom-up to top-down convention. */
image_output->write_image(TypeDesc::FLOAT,
pixels.data() + (height - 1) * width * 4,
AutoStride,
-width * 4 * sizeof(float),
AutoStride);
image_output->close();
}
CCL_NAMESPACE_END

View File

@@ -1,42 +0,0 @@
/*
* Copyright 2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "render/output_driver.h"
#include "util/util_function.h"
#include "util/util_image.h"
#include "util/util_string.h"
#include "util/util_unique_ptr.h"
#include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
class OIIOOutputDriver : public OutputDriver {
public:
typedef function<void(const string &)> LogFunction;
OIIOOutputDriver(const string_view filepath, const string_view pass, LogFunction log);
virtual ~OIIOOutputDriver();
void write_render_tile(const Tile &tile) override;
protected:
string filepath_;
string pass_;
LogFunction log_;
};
CCL_NAMESPACE_END

View File

@@ -31,14 +31,13 @@ set(INC_SYS
set(SRC
blender_camera.cpp
blender_device.cpp
blender_display_driver.cpp
blender_image.cpp
blender_geometry.cpp
blender_gpu_display.cpp
blender_light.cpp
blender_mesh.cpp
blender_object.cpp
blender_object_cull.cpp
blender_output_driver.cpp
blender_particles.cpp
blender_curves.cpp
blender_logging.cpp
@@ -52,11 +51,10 @@ set(SRC
CCL_api.h
blender_device.h
blender_display_driver.h
blender_gpu_display.h
blender_id_map.h
blender_image.h
blender_object_cull.h
blender_output_driver.h
blender_sync.h
blender_session.h
blender_texture.h
@@ -97,9 +95,6 @@ set(ADDON_FILES
add_definitions(${GL_DEFINITIONS})
if(WITH_CYCLES_DEVICE_HIP)
add_definitions(-DWITH_HIP)
endif()
if(WITH_MOD_FLUID)
add_definitions(-DWITH_FLUID)
endif()

View File

@@ -28,7 +28,7 @@ def _configure_argument_parser():
action='store_true')
parser.add_argument("--cycles-device",
help="Set the device to use for Cycles, overriding user preferences and the scene setting."
"Valid options are 'CPU', 'CUDA', 'OPTIX', or 'HIP'"
"Valid options are 'CPU', 'CUDA' or 'OPTIX'."
"Additionally, you can append '+CPU' to any GPU type for hybrid rendering.",
default=None)
return parser
@@ -211,6 +211,7 @@ def list_render_passes(scene, srl):
if crl.use_pass_shadow_catcher: yield ("Shadow Catcher", "RGB", 'COLOR')
# Debug passes.
if crl.pass_debug_render_time: yield ("Debug Render Time", "X", 'VALUE')
if crl.pass_debug_sample_count: yield ("Debug Sample Count", "X", 'VALUE')
# Cryptomatte passes.

View File

@@ -111,7 +111,6 @@ enum_device_type = (
('CPU', "CPU", "CPU", 0),
('CUDA', "CUDA", "CUDA", 1),
('OPTIX', "OptiX", "OptiX", 3),
("HIP", "HIP", "HIP", 4)
)
enum_texture_limit = (
@@ -124,7 +123,7 @@ enum_texture_limit = (
('4096', "4096", "Limit texture size to 4096 pixels", 6),
('8192', "8192", "Limit texture size to 8192 pixels", 7),
)
# NOTE: Identifiers are expected to be an upper case version of identifiers from `Pass::get_type_enum()`
enum_view3d_shading_render_pass = (
('', "General", ""),
@@ -1197,6 +1196,12 @@ class CyclesCurveRenderSettings(bpy.types.PropertyGroup):
class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
pass_debug_render_time: BoolProperty(
name="Debug Render Time",
description="Render time in milliseconds per sample and pixel",
default=False,
update=update_render_passes,
)
pass_debug_sample_count: BoolProperty(
name="Debug Sample Count",
description="Number of samples/camera rays per pixel",
@@ -1261,16 +1266,12 @@ class CyclesPreferences(bpy.types.AddonPreferences):
def get_device_types(self, context):
import _cycles
has_cuda, has_optix, has_hip = _cycles.get_device_types()
has_cuda, has_optix = _cycles.get_device_types()
list = [('NONE', "None", "Don't use compute device", 0)]
if has_cuda:
list.append(('CUDA', "CUDA", "Use CUDA for GPU acceleration", 1))
if has_optix:
list.append(('OPTIX', "OptiX", "Use OptiX for GPU acceleration", 3))
if has_hip:
list.append(('HIP', "HIP", "Use HIP for GPU acceleration", 4))
return list
compute_device_type: EnumProperty(
@@ -1295,7 +1296,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
def update_device_entries(self, device_list):
for device in device_list:
if not device[1] in {'CUDA', 'OPTIX', 'CPU', 'HIP'}:
if not device[1] in {'CUDA', 'OPTIX', 'CPU'}:
continue
# Try to find existing Device entry
entry = self.find_existing_device_entry(device)
@@ -1329,7 +1330,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
elif entry.type == 'CPU':
cpu_devices.append(entry)
# Extend all GPU devices with CPU.
if compute_device_type != 'CPU' and compute_device_type != 'HIP':
if compute_device_type != 'CPU':
devices.extend(cpu_devices)
return devices
@@ -1339,7 +1340,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
import _cycles
# Ensure `self.devices` is not re-allocated when the second call to
# get_devices_for_type is made, freeing items from the first list.
for device_type in ('CUDA', 'OPTIX', 'HIP'):
for device_type in ('CUDA', 'OPTIX', 'OPENCL'):
self.update_device_entries(_cycles.available_devices(device_type))
# Deprecated: use refresh_devices instead.

View File

@@ -99,11 +99,6 @@ def use_cuda(context):
return (get_device_type(context) == 'CUDA' and cscene.device == 'GPU')
def use_hip(context):
cscene = context.scene.cycles
return (get_device_type(context) == 'HIP' and cscene.device == 'GPU')
def use_optix(context):
cscene = context.scene.cycles
@@ -792,6 +787,7 @@ class CYCLES_RENDER_PT_passes_data(CyclesButtonsPanel, Panel):
col.prop(view_layer, "use_pass_material_index")
col = layout.column(heading="Debug", align=True)
col.prop(cycles_view_layer, "pass_debug_render_time", text="Render Time")
col.prop(cycles_view_layer, "pass_debug_sample_count", text="Sample Count")
layout.prop(view_layer, "pass_alpha_threshold")

View File

@@ -26,7 +26,6 @@ enum ComputeDevice {
COMPUTE_DEVICE_CPU = 0,
COMPUTE_DEVICE_CUDA = 1,
COMPUTE_DEVICE_OPTIX = 3,
COMPUTE_DEVICE_HIP = 4,
COMPUTE_DEVICE_NUM
};
@@ -82,9 +81,6 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen
else if (compute_device == COMPUTE_DEVICE_OPTIX) {
mask |= DEVICE_MASK_OPTIX;
}
else if (compute_device == COMPUTE_DEVICE_HIP) {
mask |= DEVICE_MASK_HIP;
}
vector<DeviceInfo> devices = Device::available_devices(mask);
/* Match device preferences and available devices. */

View File

@@ -80,10 +80,8 @@ Geometry *BlenderSync::sync_geometry(BL::Depsgraph &b_depsgraph,
{
/* Test if we can instance or if the object is modified. */
Geometry::Type geom_type = determine_geom_type(b_ob_info, use_particle_hair);
BL::ID b_key_id = (b_ob_info.is_real_object_data() &&
BKE_object_is_modified(b_ob_info.real_object)) ?
b_ob_info.real_object :
b_ob_info.object_data;
BL::ID b_key_id = (BKE_object_is_modified(b_ob_info.real_object)) ? b_ob_info.real_object :
b_ob_info.object_data;
GeometryKey key(b_key_id.ptr.data, geom_type);
/* Find shader indices. */

View File

@@ -14,7 +14,7 @@
* limitations under the License.
*/
#include "blender/blender_display_driver.h"
#include "blender/blender_gpu_display.h"
#include "device/device.h"
#include "util/util_logging.h"
@@ -273,17 +273,17 @@ uint BlenderDisplaySpaceShader::get_shader_program()
}
/* --------------------------------------------------------------------
* BlenderDisplayDriver.
* BlenderGPUDisplay.
*/
BlenderDisplayDriver::BlenderDisplayDriver(BL::RenderEngine &b_engine, BL::Scene &b_scene)
BlenderGPUDisplay::BlenderGPUDisplay(BL::RenderEngine &b_engine, BL::Scene &b_scene)
: b_engine_(b_engine), display_shader_(BlenderDisplayShader::create(b_engine, b_scene))
{
/* Create context while on the main thread. */
gl_context_create();
}
BlenderDisplayDriver::~BlenderDisplayDriver()
BlenderGPUDisplay::~BlenderGPUDisplay()
{
gl_resources_destroy();
}
@@ -292,18 +292,19 @@ BlenderDisplayDriver::~BlenderDisplayDriver()
* Update procedure.
*/
bool BlenderDisplayDriver::update_begin(const Params &params,
bool BlenderGPUDisplay::do_update_begin(const GPUDisplayParams &params,
int texture_width,
int texture_height)
{
/* Note that it's the responsibility of BlenderDisplayDriver to ensure updating and drawing
/* Note that it's the responsibility of BlenderGPUDisplay to ensure updating and drawing
* the texture does not happen at the same time. This is achieved indirectly.
*
* When enabling the OpenGL context, it uses an internal mutex lock DST.gl_context_lock.
* This same lock is also held when do_draw() is called, which together ensure mutual
* exclusion.
*
* This locking is not performed on the Cycles side, because that would cause lock inversion. */
* This locking is not performed at the GPU display level, because that would cause lock
* inversion. */
if (!gl_context_enable()) {
return false;
}
@@ -360,7 +361,7 @@ bool BlenderDisplayDriver::update_begin(const Params &params,
return true;
}
void BlenderDisplayDriver::update_end()
void BlenderGPUDisplay::do_update_end()
{
gl_upload_sync_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
glFlush();
@@ -368,18 +369,54 @@ void BlenderDisplayDriver::update_end()
gl_context_disable();
}
/* --------------------------------------------------------------------
* Texture update from CPU buffer.
*/
void BlenderGPUDisplay::do_copy_pixels_to_texture(
const half4 *rgba_pixels, int texture_x, int texture_y, int pixels_width, int pixels_height)
{
/* This call copies pixels to a Pixel Buffer Object (PBO) which is much cheaper from CPU time
* point of view than to copy data directly to the OpenGL texture.
*
* The possible downside of this approach is that it might require a higher peak memory when
* doing partial updates of the texture (although, in practice even partial updates might peak
* with a full-frame buffer stored on the CPU if the GPU is currently occupied). */
half4 *mapped_rgba_pixels = map_texture_buffer();
if (!mapped_rgba_pixels) {
return;
}
if (texture_x == 0 && texture_y == 0 && pixels_width == texture_.width &&
pixels_height == texture_.height) {
const size_t size_in_bytes = sizeof(half4) * texture_.width * texture_.height;
memcpy(mapped_rgba_pixels, rgba_pixels, size_in_bytes);
}
else {
const half4 *rgba_row = rgba_pixels;
half4 *mapped_rgba_row = mapped_rgba_pixels + texture_y * texture_.width + texture_x;
for (int y = 0; y < pixels_height;
++y, rgba_row += pixels_width, mapped_rgba_row += texture_.width) {
memcpy(mapped_rgba_row, rgba_row, sizeof(half4) * pixels_width);
}
}
unmap_texture_buffer();
}
/* --------------------------------------------------------------------
* Texture buffer mapping.
*/
half4 *BlenderDisplayDriver::map_texture_buffer()
half4 *BlenderGPUDisplay::do_map_texture_buffer()
{
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id);
half4 *mapped_rgba_pixels = reinterpret_cast<half4 *>(
glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY));
if (!mapped_rgba_pixels) {
LOG(ERROR) << "Error mapping BlenderDisplayDriver pixel buffer object.";
LOG(ERROR) << "Error mapping BlenderGPUDisplay pixel buffer object.";
}
if (texture_.need_clear) {
@@ -394,7 +431,7 @@ half4 *BlenderDisplayDriver::map_texture_buffer()
return mapped_rgba_pixels;
}
void BlenderDisplayDriver::unmap_texture_buffer()
void BlenderGPUDisplay::do_unmap_texture_buffer()
{
glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
@@ -405,9 +442,9 @@ void BlenderDisplayDriver::unmap_texture_buffer()
* Graphics interoperability.
*/
BlenderDisplayDriver::GraphicsInterop BlenderDisplayDriver::graphics_interop_get()
DeviceGraphicsInteropDestination BlenderGPUDisplay::do_graphics_interop_get()
{
GraphicsInterop interop_dst;
DeviceGraphicsInteropDestination interop_dst;
interop_dst.buffer_width = texture_.buffer_width;
interop_dst.buffer_height = texture_.buffer_height;
@@ -419,12 +456,12 @@ BlenderDisplayDriver::GraphicsInterop BlenderDisplayDriver::graphics_interop_get
return interop_dst;
}
void BlenderDisplayDriver::graphics_interop_activate()
void BlenderGPUDisplay::graphics_interop_activate()
{
gl_context_enable();
}
void BlenderDisplayDriver::graphics_interop_deactivate()
void BlenderGPUDisplay::graphics_interop_deactivate()
{
gl_context_disable();
}
@@ -433,21 +470,27 @@ void BlenderDisplayDriver::graphics_interop_deactivate()
* Drawing.
*/
void BlenderDisplayDriver::clear()
void BlenderGPUDisplay::clear()
{
texture_.need_clear = true;
}
void BlenderDisplayDriver::set_zoom(float zoom_x, float zoom_y)
void BlenderGPUDisplay::set_zoom(float zoom_x, float zoom_y)
{
zoom_ = make_float2(zoom_x, zoom_y);
}
void BlenderDisplayDriver::draw(const Params &params)
void BlenderGPUDisplay::do_draw(const GPUDisplayParams &params)
{
/* See do_update_begin() for why no locking is required here. */
const bool transparent = true; // TODO(sergey): Derive this from Film.
if (texture_.need_clear) {
/* Texture is requested to be cleared and was not yet cleared.
* Do early return which should be equivalent of drawing all-zero texture. */
return;
}
if (!gl_draw_resources_ensure()) {
return;
}
@@ -456,16 +499,6 @@ void BlenderDisplayDriver::draw(const Params &params)
gl_context_mutex_.lock();
}
if (texture_.need_clear) {
/* Texture is requested to be cleared and was not yet cleared.
*
* Do early return which should be equivalent of drawing all-zero texture.
* Watch out for the lock though so that the clear happening during update is properly
* synchronized here. */
gl_context_mutex_.unlock();
return;
}
if (gl_upload_sync_) {
glWaitSync((GLsync)gl_upload_sync_, 0, GL_TIMEOUT_IGNORED);
}
@@ -547,7 +580,7 @@ void BlenderDisplayDriver::draw(const Params &params)
}
}
void BlenderDisplayDriver::gl_context_create()
void BlenderGPUDisplay::gl_context_create()
{
/* When rendering in viewport there is no render context available via engine.
* Check whether own context is to be created here.
@@ -576,7 +609,7 @@ void BlenderDisplayDriver::gl_context_create()
}
}
bool BlenderDisplayDriver::gl_context_enable()
bool BlenderGPUDisplay::gl_context_enable()
{
if (use_gl_context_) {
if (!gl_context_) {
@@ -591,7 +624,7 @@ bool BlenderDisplayDriver::gl_context_enable()
return true;
}
void BlenderDisplayDriver::gl_context_disable()
void BlenderGPUDisplay::gl_context_disable()
{
if (use_gl_context_) {
if (gl_context_) {
@@ -604,7 +637,7 @@ void BlenderDisplayDriver::gl_context_disable()
RE_engine_render_context_disable(reinterpret_cast<RenderEngine *>(b_engine_.ptr.data));
}
void BlenderDisplayDriver::gl_context_dispose()
void BlenderGPUDisplay::gl_context_dispose()
{
if (gl_context_) {
const bool drw_state = DRW_opengl_context_release();
@@ -616,7 +649,7 @@ void BlenderDisplayDriver::gl_context_dispose()
}
}
bool BlenderDisplayDriver::gl_draw_resources_ensure()
bool BlenderGPUDisplay::gl_draw_resources_ensure()
{
if (!texture_.gl_id) {
/* If there is no texture allocated, there is nothing to draw. Inform the draw call that it can
@@ -643,7 +676,7 @@ bool BlenderDisplayDriver::gl_draw_resources_ensure()
return true;
}
void BlenderDisplayDriver::gl_resources_destroy()
void BlenderGPUDisplay::gl_resources_destroy()
{
gl_context_enable();
@@ -666,7 +699,7 @@ void BlenderDisplayDriver::gl_resources_destroy()
gl_context_dispose();
}
bool BlenderDisplayDriver::gl_texture_resources_ensure()
bool BlenderGPUDisplay::gl_texture_resources_ensure()
{
if (texture_.creation_attempted) {
return texture_.is_created;
@@ -703,7 +736,7 @@ bool BlenderDisplayDriver::gl_texture_resources_ensure()
return true;
}
void BlenderDisplayDriver::texture_update_if_needed()
void BlenderGPUDisplay::texture_update_if_needed()
{
if (!texture_.need_update) {
return;
@@ -717,7 +750,7 @@ void BlenderDisplayDriver::texture_update_if_needed()
texture_.need_update = false;
}
void BlenderDisplayDriver::vertex_buffer_update(const Params &params)
void BlenderGPUDisplay::vertex_buffer_update(const GPUDisplayParams &params)
{
/* Invalidate old contents - avoids stalling if the buffer is still waiting in queue to be
* rendered. */
@@ -730,23 +763,23 @@ void BlenderDisplayDriver::vertex_buffer_update(const Params &params)
vpointer[0] = 0.0f;
vpointer[1] = 0.0f;
vpointer[2] = params.full_offset.x;
vpointer[3] = params.full_offset.y;
vpointer[2] = params.offset.x;
vpointer[3] = params.offset.y;
vpointer[4] = 1.0f;
vpointer[5] = 0.0f;
vpointer[6] = (float)params.size.x + params.full_offset.x;
vpointer[7] = params.full_offset.y;
vpointer[6] = (float)params.size.x + params.offset.x;
vpointer[7] = params.offset.y;
vpointer[8] = 1.0f;
vpointer[9] = 1.0f;
vpointer[10] = (float)params.size.x + params.full_offset.x;
vpointer[11] = (float)params.size.y + params.full_offset.y;
vpointer[10] = (float)params.size.x + params.offset.x;
vpointer[11] = (float)params.size.y + params.offset.y;
vpointer[12] = 0.0f;
vpointer[13] = 1.0f;
vpointer[14] = params.full_offset.x;
vpointer[15] = (float)params.size.y + params.full_offset.y;
vpointer[14] = params.offset.x;
vpointer[15] = (float)params.size.y + params.offset.y;
glUnmapBuffer(GL_ARRAY_BUFFER);
}

View File

@@ -22,14 +22,12 @@
#include "RNA_blender_cpp.h"
#include "render/display_driver.h"
#include "util/util_thread.h"
#include "render/gpu_display.h"
#include "util/util_unique_ptr.h"
CCL_NAMESPACE_BEGIN
/* Base class of shader used for display driver rendering. */
/* Base class of shader used for GPU display rendering. */
class BlenderDisplayShader {
public:
static constexpr const char *position_attribute_name = "pos";
@@ -98,11 +96,11 @@ class BlenderDisplaySpaceShader : public BlenderDisplayShader {
uint shader_program_ = 0;
};
/* Display driver implementation which is specific for Blender viewport integration. */
class BlenderDisplayDriver : public DisplayDriver {
/* GPU display implementation which is specific for Blender viewport integration. */
class BlenderGPUDisplay : public GPUDisplay {
public:
BlenderDisplayDriver(BL::RenderEngine &b_engine, BL::Scene &b_scene);
~BlenderDisplayDriver();
BlenderGPUDisplay(BL::RenderEngine &b_engine, BL::Scene &b_scene);
~BlenderGPUDisplay();
virtual void graphics_interop_activate() override;
virtual void graphics_interop_deactivate() override;
@@ -112,15 +110,22 @@ class BlenderDisplayDriver : public DisplayDriver {
void set_zoom(float zoom_x, float zoom_y);
protected:
virtual bool update_begin(const Params &params, int texture_width, int texture_height) override;
virtual void update_end() override;
virtual bool do_update_begin(const GPUDisplayParams &params,
int texture_width,
int texture_height) override;
virtual void do_update_end() override;
virtual half4 *map_texture_buffer() override;
virtual void unmap_texture_buffer() override;
virtual void do_copy_pixels_to_texture(const half4 *rgba_pixels,
int texture_x,
int texture_y,
int pixels_width,
int pixels_height) override;
virtual void do_draw(const GPUDisplayParams &params) override;
virtual GraphicsInterop graphics_interop_get() override;
virtual half4 *do_map_texture_buffer() override;
virtual void do_unmap_texture_buffer() override;
virtual void draw(const Params &params) override;
virtual DeviceGraphicsInteropDestination do_graphics_interop_get() override;
/* Helper function which allocates new GPU context. */
void gl_context_create();
@@ -147,13 +152,13 @@ class BlenderDisplayDriver : public DisplayDriver {
* This buffer is used to render texture in the viewport.
*
* NOTE: The buffer needs to be bound. */
void vertex_buffer_update(const Params &params);
void vertex_buffer_update(const GPUDisplayParams &params);
BL::RenderEngine b_engine_;
/* OpenGL context which is used the render engine doesn't have its own. */
void *gl_context_ = nullptr;
/* The when Blender RenderEngine side context is not available and the DisplayDriver is to create
/* The when Blender RenderEngine side context is not available and the GPUDisplay is to create
* its own context. */
bool use_gl_context_ = false;
/* Mutex used to guard the `gl_context_`. */

View File

@@ -1,127 +0,0 @@
/*
* Copyright 2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "blender/blender_output_driver.h"
CCL_NAMESPACE_BEGIN
BlenderOutputDriver::BlenderOutputDriver(BL::RenderEngine &b_engine) : b_engine_(b_engine)
{
}
BlenderOutputDriver::~BlenderOutputDriver()
{
}
bool BlenderOutputDriver::read_render_tile(const Tile &tile)
{
/* Get render result. */
BL::RenderResult b_rr = b_engine_.begin_result(tile.offset.x,
tile.offset.y,
tile.size.x,
tile.size.y,
tile.layer.c_str(),
tile.view.c_str());
/* Can happen if the intersected rectangle gives 0 width or height. */
if (b_rr.ptr.data == NULL) {
return false;
}
BL::RenderResult::layers_iterator b_single_rlay;
b_rr.layers.begin(b_single_rlay);
/* layer will be missing if it was disabled in the UI */
if (b_single_rlay == b_rr.layers.end()) {
return false;
}
BL::RenderLayer b_rlay = *b_single_rlay;
vector<float> pixels(tile.size.x * tile.size.y * 4);
/* Copy each pass.
* TODO:copy only the required ones for better performance? */
for (BL::RenderPass &b_pass : b_rlay.passes) {
tile.set_pass_pixels(b_pass.name(), b_pass.channels(), (float *)b_pass.rect());
}
b_engine_.end_result(b_rr, false, false, false);
return true;
}
bool BlenderOutputDriver::update_render_tile(const Tile &tile)
{
/* Use final write for preview renders, otherwise render result wouldn't be be updated
* quickly on Blender side. For all other cases we use the display driver. */
if (b_engine_.is_preview()) {
write_render_tile(tile);
return true;
}
else {
/* Don't highlight full-frame tile. */
if (!(tile.size == tile.full_size)) {
b_engine_.tile_highlight_clear_all();
b_engine_.tile_highlight_set(tile.offset.x, tile.offset.y, tile.size.x, tile.size.y, true);
}
return false;
}
}
void BlenderOutputDriver::write_render_tile(const Tile &tile)
{
b_engine_.tile_highlight_clear_all();
/* Get render result. */
BL::RenderResult b_rr = b_engine_.begin_result(tile.offset.x,
tile.offset.y,
tile.size.x,
tile.size.y,
tile.layer.c_str(),
tile.view.c_str());
/* Can happen if the intersected rectangle gives 0 width or height. */
if (b_rr.ptr.data == NULL) {
return;
}
BL::RenderResult::layers_iterator b_single_rlay;
b_rr.layers.begin(b_single_rlay);
/* Layer will be missing if it was disabled in the UI. */
if (b_single_rlay == b_rr.layers.end()) {
return;
}
BL::RenderLayer b_rlay = *b_single_rlay;
vector<float> pixels(tile.size.x * tile.size.y * 4);
/* Copy each pass. */
for (BL::RenderPass &b_pass : b_rlay.passes) {
if (!tile.get_pass_pixels(b_pass.name(), b_pass.channels(), &pixels[0])) {
memset(&pixels[0], 0, pixels.size() * sizeof(float));
}
b_pass.rect(&pixels[0]);
}
b_engine_.end_result(b_rr, true, false, true);
}
CCL_NAMESPACE_END

View File

@@ -1,40 +0,0 @@
/*
* Copyright 2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "MEM_guardedalloc.h"
#include "RNA_blender_cpp.h"
#include "render/output_driver.h"
CCL_NAMESPACE_BEGIN
class BlenderOutputDriver : public OutputDriver {
public:
BlenderOutputDriver(BL::RenderEngine &b_engine);
~BlenderOutputDriver();
virtual void write_render_tile(const Tile &tile) override;
virtual bool update_render_tile(const Tile &tile) override;
virtual bool read_render_tile(const Tile &tile) override;
protected:
BL::RenderEngine b_engine_;
};
CCL_NAMESPACE_END

View File

@@ -911,16 +911,14 @@ static PyObject *enable_print_stats_func(PyObject * /*self*/, PyObject * /*args*
static PyObject *get_device_types_func(PyObject * /*self*/, PyObject * /*args*/)
{
vector<DeviceType> device_types = Device::available_types();
bool has_cuda = false, has_optix = false, has_hip = false;
bool has_cuda = false, has_optix = false;
foreach (DeviceType device_type, device_types) {
has_cuda |= (device_type == DEVICE_CUDA);
has_optix |= (device_type == DEVICE_OPTIX);
has_hip |= (device_type == DEVICE_HIP);
}
PyObject *list = PyTuple_New(3);
PyObject *list = PyTuple_New(2);
PyTuple_SET_ITEM(list, 0, PyBool_FromLong(has_cuda));
PyTuple_SET_ITEM(list, 1, PyBool_FromLong(has_optix));
PyTuple_SET_ITEM(list, 2, PyBool_FromLong(has_hip));
return list;
}
@@ -946,9 +944,6 @@ static PyObject *set_device_override_func(PyObject * /*self*/, PyObject *arg)
else if (override == "OPTIX") {
BlenderSession::device_override = DEVICE_MASK_OPTIX;
}
else if (override == "HIP") {
BlenderSession::device_override = DEVICE_MASK_HIP;
}
else {
printf("\nError: %s is not a valid Cycles device.\n", override.c_str());
Py_RETURN_FALSE;

View File

@@ -42,8 +42,7 @@
#include "util/util_progress.h"
#include "util/util_time.h"
#include "blender/blender_display_driver.h"
#include "blender/blender_output_driver.h"
#include "blender/blender_gpu_display.h"
#include "blender/blender_session.h"
#include "blender/blender_sync.h"
#include "blender/blender_util.h"
@@ -72,8 +71,7 @@ BlenderSession::BlenderSession(BL::RenderEngine &b_engine,
width(0),
height(0),
preview_osl(preview_osl),
python_thread_state(NULL),
use_developer_ui(false)
python_thread_state(NULL)
{
/* offline render */
background = true;
@@ -158,13 +156,11 @@ void BlenderSession::create_session()
b_v3d, b_rv3d, scene->camera, width, height);
session->reset(session_params, buffer_params);
/* Create GPU display.
* TODO(sergey): Investigate whether DisplayDriver can be used for the preview as well. */
/* Create GPU display. */
if (!b_engine.is_preview() && !headless) {
unique_ptr<BlenderDisplayDriver> display_driver = make_unique<BlenderDisplayDriver>(b_engine,
b_scene);
display_driver_ = display_driver.get();
session->set_display_driver(move(display_driver));
unique_ptr<BlenderGPUDisplay> gpu_display = make_unique<BlenderGPUDisplay>(b_engine, b_scene);
gpu_display_ = gpu_display.get();
session->set_gpu_display(move(gpu_display));
}
/* Viewport and preview (as in, material preview) does not do tiled rendering, so can inform
@@ -281,6 +277,96 @@ void BlenderSession::free_session()
session = nullptr;
}
void BlenderSession::read_render_tile()
{
const int2 tile_offset = session->get_render_tile_offset();
const int2 tile_size = session->get_render_tile_size();
/* get render result */
BL::RenderResult b_rr = b_engine.begin_result(tile_offset.x,
tile_offset.y,
tile_size.x,
tile_size.y,
b_rlay_name.c_str(),
b_rview_name.c_str());
/* can happen if the intersected rectangle gives 0 width or height */
if (b_rr.ptr.data == NULL) {
return;
}
BL::RenderResult::layers_iterator b_single_rlay;
b_rr.layers.begin(b_single_rlay);
/* layer will be missing if it was disabled in the UI */
if (b_single_rlay == b_rr.layers.end())
return;
BL::RenderLayer b_rlay = *b_single_rlay;
vector<float> pixels(tile_size.x * tile_size.y * 4);
/* Copy each pass.
* TODO:copy only the required ones for better performance? */
for (BL::RenderPass &b_pass : b_rlay.passes) {
session->set_render_tile_pixels(b_pass.name(), b_pass.channels(), (float *)b_pass.rect());
}
b_engine.end_result(b_rr, false, false, false);
}
void BlenderSession::write_render_tile()
{
const int2 tile_offset = session->get_render_tile_offset();
const int2 tile_size = session->get_render_tile_size();
const string_view render_layer_name = session->get_render_tile_layer();
const string_view render_view_name = session->get_render_tile_view();
b_engine.tile_highlight_clear_all();
/* get render result */
BL::RenderResult b_rr = b_engine.begin_result(tile_offset.x,
tile_offset.y,
tile_size.x,
tile_size.y,
render_layer_name.c_str(),
render_view_name.c_str());
/* can happen if the intersected rectangle gives 0 width or height */
if (b_rr.ptr.data == NULL) {
return;
}
BL::RenderResult::layers_iterator b_single_rlay;
b_rr.layers.begin(b_single_rlay);
/* layer will be missing if it was disabled in the UI */
if (b_single_rlay == b_rr.layers.end()) {
return;
}
BL::RenderLayer b_rlay = *b_single_rlay;
write_render_result(b_rlay);
b_engine.end_result(b_rr, true, false, true);
}
void BlenderSession::update_render_tile()
{
if (!session->has_multiple_render_tiles()) {
/* Don't highlight full-frame tile. */
return;
}
const int2 tile_offset = session->get_render_tile_offset();
const int2 tile_size = session->get_render_tile_size();
b_engine.tile_highlight_clear_all();
b_engine.tile_highlight_set(tile_offset.x, tile_offset.y, tile_size.x, tile_size.y, true);
}
void BlenderSession::full_buffer_written(string_view filename)
{
full_buffer_files_.emplace_back(filename);
@@ -354,8 +440,18 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
return;
}
/* Create driver to write out render results. */
session->set_output_driver(make_unique<BlenderOutputDriver>(b_engine));
/* set callback to write out render results */
session->write_render_tile_cb = [&]() { write_render_tile(); };
/* Use final write for preview renders, otherwise render result wouldn't be be updated on Blender
* side. */
/* TODO(sergey): Investigate whether GPUDisplay can be used for the preview as well. */
if (b_engine.is_preview()) {
session->update_render_tile_cb = [&]() { write_render_tile(); };
}
else {
session->update_render_tile_cb = [&]() { update_render_tile(); };
}
session->full_buffer_written_cb = [&](string_view filename) { full_buffer_written(filename); };
@@ -492,22 +588,13 @@ void BlenderSession::render_frame_finish()
for (string_view filename : full_buffer_files_) {
session->process_full_buffer_from_disk(filename);
if (check_and_report_session_error()) {
break;
}
}
for (string_view filename : full_buffer_files_) {
path_remove(filename);
}
/* Clear driver. */
session->set_output_driver(nullptr);
/* clear callback */
session->write_render_tile_cb = function_null;
session->update_render_tile_cb = function_null;
session->full_buffer_written_cb = function_null;
/* All the files are handled.
* Clear the list so that this session can be re-used by Persistent Data. */
full_buffer_files_.clear();
}
static PassType bake_type_to_pass(const string &bake_type_str, const int bake_filter)
@@ -612,8 +699,9 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
pass->set_type(bake_type_to_pass(bake_type, bake_filter));
pass->set_include_albedo((bake_filter & BL::BakeSettings::pass_filter_COLOR));
session->set_display_driver(nullptr);
session->set_output_driver(make_unique<BlenderOutputDriver>(b_engine));
session->read_render_tile_cb = [&]() { read_render_tile(); };
session->write_render_tile_cb = [&]() { write_render_tile(); };
session->set_gpu_display(nullptr);
if (!session->progress.get_cancel()) {
/* Sync scene. */
@@ -656,7 +744,43 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
session->wait();
}
session->set_output_driver(nullptr);
session->read_render_tile_cb = function_null;
session->write_render_tile_cb = function_null;
}
void BlenderSession::write_render_result(BL::RenderLayer &b_rlay)
{
if (!session->copy_render_tile_from_device()) {
return;
}
const int2 tile_size = session->get_render_tile_size();
vector<float> pixels(tile_size.x * tile_size.y * 4);
/* Copy each pass. */
for (BL::RenderPass &b_pass : b_rlay.passes) {
if (!session->get_render_tile_pixels(b_pass.name(), b_pass.channels(), &pixels[0])) {
memset(&pixels[0], 0, pixels.size() * sizeof(float));
}
b_pass.rect(&pixels[0]);
}
}
void BlenderSession::update_render_result(BL::RenderLayer &b_rlay)
{
if (!session->copy_render_tile_from_device()) {
return;
}
const int2 tile_size = session->get_render_tile_size();
vector<float> pixels(tile_size.x * tile_size.y * 4);
/* Copy combined pass. */
BL::RenderPass b_combined_pass(b_rlay.passes.find_by_name("Combined", b_rview_name.c_str()));
if (session->get_render_tile_pixels("Combined", b_combined_pass.channels(), &pixels[0])) {
b_combined_pass.rect(&pixels[0]);
}
}
void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_)
@@ -764,7 +888,7 @@ void BlenderSession::draw(BL::SpaceImageEditor &space_image)
}
BL::Array<float, 2> zoom = space_image.zoom();
display_driver_->set_zoom(zoom[0], zoom[1]);
gpu_display_->set_zoom(zoom[0], zoom[1]);
session->draw();
}
@@ -911,27 +1035,20 @@ void BlenderSession::update_status_progress()
last_progress = progress;
}
check_and_report_session_error();
}
bool BlenderSession::check_and_report_session_error()
{
if (!session->progress.get_error()) {
return false;
if (session->progress.get_error()) {
string error = session->progress.get_error_message();
if (error != last_error) {
/* TODO(sergey): Currently C++ RNA API doesn't let us to
* use mnemonic name for the variable. Would be nice to
* have this figured out.
*
* For until then, 1 << 5 means RPT_ERROR.
*/
b_engine.report(1 << 5, error.c_str());
b_engine.error_set(error.c_str());
last_error = error;
}
}
const string error = session->progress.get_error_message();
if (error != last_error) {
/* TODO(sergey): Currently C++ RNA API doesn't let us to use mnemonic name for the variable.
* Would be nice to have this figured out.
*
* For until then, 1 << 5 means RPT_ERROR. */
b_engine.report(1 << 5, error.c_str());
b_engine.error_set(error.c_str());
last_error = error;
}
return true;
}
void BlenderSession::tag_update()

View File

@@ -29,7 +29,7 @@
CCL_NAMESPACE_BEGIN
class BlenderDisplayDriver;
class BlenderGPUDisplay;
class BlenderSync;
class ImageMetaData;
class Scene;
@@ -70,7 +70,20 @@ class BlenderSession {
const int bake_width,
const int bake_height);
void write_render_result(BL::RenderLayer &b_rlay);
void write_render_tile();
void update_render_tile();
void full_buffer_written(string_view filename);
/* update functions are used to update display buffer only after sample was rendered
* only needed for better visual feedback */
void update_render_result(BL::RenderLayer &b_rlay);
/* read functions for baking input */
void read_render_tile();
/* interactive updates */
void synchronize(BL::Depsgraph &b_depsgraph);
@@ -97,7 +110,8 @@ class BlenderSession {
BL::RenderSettings b_render;
BL::Depsgraph b_depsgraph;
/* NOTE: Blender's scene might become invalid after call
* #free_blender_memory_if_possible(). */
* free_blender_memory_if_possible().
*/
BL::Scene b_scene;
BL::SpaceView3D b_v3d;
BL::RegionView3D b_rv3d;
@@ -133,11 +147,6 @@ class BlenderSession {
protected:
void stamp_view_layer_metadata(Scene *scene, const string &view_layer_name);
/* Check whether session error happened.
* If so, it is reported to the render engine and true is returned.
* Otherwise false is returned. */
bool check_and_report_session_error();
void builtin_images_load();
/* Is used after each render layer synchronization is done with the goal
@@ -151,8 +160,8 @@ class BlenderSession {
int last_pass_index = -1;
} draw_state_;
/* NOTE: The BlenderSession references the display driver. */
BlenderDisplayDriver *display_driver_ = nullptr;
/* NOTE: The BlenderSession references the GPU display. */
BlenderGPUDisplay *gpu_display_ = nullptr;
vector<string> full_buffer_files_;
};

View File

@@ -279,7 +279,7 @@ static ShaderNode *add_node(Scene *scene,
array<float3> curve_mapping_curves;
float min_x, max_x;
curvemapping_color_to_array(mapping, curve_mapping_curves, RAMP_TABLE_SIZE, true);
curvemapping_minmax(mapping, 4, &min_x, &max_x);
curvemapping_minmax(mapping, true, &min_x, &max_x);
curves->set_min_x(min_x);
curves->set_max_x(max_x);
curves->set_curves(curve_mapping_curves);
@@ -292,25 +292,12 @@ static ShaderNode *add_node(Scene *scene,
array<float3> curve_mapping_curves;
float min_x, max_x;
curvemapping_color_to_array(mapping, curve_mapping_curves, RAMP_TABLE_SIZE, false);
curvemapping_minmax(mapping, 3, &min_x, &max_x);
curvemapping_minmax(mapping, false, &min_x, &max_x);
curves->set_min_x(min_x);
curves->set_max_x(max_x);
curves->set_curves(curve_mapping_curves);
node = curves;
}
else if (b_node.is_a(&RNA_ShaderNodeFloatCurve)) {
BL::ShaderNodeFloatCurve b_curve_node(b_node);
BL::CurveMapping mapping(b_curve_node.mapping());
FloatCurveNode *curve = graph->create_node<FloatCurveNode>();
array<float> curve_mapping_curve;
float min_x, max_x;
curvemapping_float_to_array(mapping, curve_mapping_curve, RAMP_TABLE_SIZE);
curvemapping_minmax(mapping, 1, &min_x, &max_x);
curve->set_min_x(min_x);
curve->set_max_x(max_x);
curve->set_curve(curve_mapping_curve);
node = curve;
}
else if (b_node.is_a(&RNA_ShaderNodeValToRGB)) {
RGBRampNode *ramp = graph->create_node<RGBRampNode>();
BL::ShaderNodeValToRGB b_ramp_node(b_node);

View File

@@ -545,6 +545,8 @@ static PassType get_blender_pass_type(BL::RenderPass &b_pass)
MAP_PASS("Shadow Catcher", PASS_SHADOW_CATCHER);
MAP_PASS("Noisy Shadow Catcher", PASS_SHADOW_CATCHER);
MAP_PASS("Debug Render Time", PASS_RENDER_TIME);
MAP_PASS("AdaptiveAuxBuffer", PASS_ADAPTIVE_AUX_BUFFER);
MAP_PASS("Debug Sample Count", PASS_SAMPLE_COUNT);
@@ -602,6 +604,10 @@ void BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_v
PointerRNA crl = RNA_pointer_get(&b_view_layer.ptr, "cycles");
/* Debug passes. */
if (get_boolean(crl, "pass_debug_render_time")) {
b_engine.add_pass("Debug Render Time", 1, "X", b_view_layer.name().c_str());
pass_add(scene, PASS_RENDER_TIME, "Debug Render Time");
}
if (get_boolean(crl, "pass_debug_sample_count")) {
b_engine.add_pass("Debug Sample Count", 1, "X", b_view_layer.name().c_str());
pass_add(scene, PASS_SAMPLE_COUNT, "Debug Sample Count");

View File

@@ -90,27 +90,26 @@ static inline BL::Mesh object_to_mesh(BL::BlendData & /*data*/,
}
#endif
BL::Mesh mesh = (b_ob_info.object_data.is_a(&RNA_Mesh)) ? BL::Mesh(b_ob_info.object_data) :
BL::Mesh(PointerRNA_NULL);
BL::Mesh mesh(PointerRNA_NULL);
if (b_ob_info.object_data.is_a(&RNA_Mesh)) {
/* TODO: calc_undeformed is not used. */
mesh = BL::Mesh(b_ob_info.object_data);
if (b_ob_info.is_real_object_data()) {
if (mesh) {
/* Make a copy to split faces if we use autosmooth, otherwise not needed.
* Also in edit mode do we need to make a copy, to ensure data layers like
* UV are not empty. */
if (mesh.is_editmode() ||
(mesh.use_auto_smooth() && subdivision_type == Mesh::SUBDIVISION_NONE)) {
BL::Depsgraph depsgraph(PointerRNA_NULL);
mesh = b_ob_info.real_object.to_mesh(false, depsgraph);
}
}
else {
/* Make a copy to split faces if we use autosmooth, otherwise not needed.
* Also in edit mode do we need to make a copy, to ensure data layers like
* UV are not empty. */
if (mesh.is_editmode() ||
(mesh.use_auto_smooth() && subdivision_type == Mesh::SUBDIVISION_NONE)) {
BL::Depsgraph depsgraph(PointerRNA_NULL);
assert(b_ob_info.is_real_object_data());
mesh = b_ob_info.real_object.to_mesh(false, depsgraph);
}
}
else {
/* TODO: what to do about non-mesh geometry instances? */
BL::Depsgraph depsgraph(PointerRNA_NULL);
if (b_ob_info.is_real_object_data()) {
mesh = b_ob_info.real_object.to_mesh(false, depsgraph);
}
}
#if 0
@@ -171,11 +170,12 @@ static inline void curvemap_minmax_curve(/*const*/ BL::CurveMap &curve, float *m
}
static inline void curvemapping_minmax(/*const*/ BL::CurveMapping &cumap,
int num_curves,
bool rgb_curve,
float *min_x,
float *max_x)
{
// const int num_curves = cumap.curves.length(); /* Gives linking error so far. */
const int num_curves = rgb_curve ? 4 : 3;
*min_x = FLT_MAX;
*max_x = -FLT_MAX;
for (int i = 0; i < num_curves; ++i) {
@@ -195,28 +195,6 @@ static inline void curvemapping_to_array(BL::CurveMapping &cumap, array<float> &
}
}
static inline void curvemapping_float_to_array(BL::CurveMapping &cumap,
array<float> &data,
int size)
{
float min = 0.0f, max = 1.0f;
curvemapping_minmax(cumap, 1, &min, &max);
const float range = max - min;
cumap.update();
BL::CurveMap map = cumap.curves[0];
data.resize(size);
for (int i = 0; i < size; i++) {
float t = min + (float)i / (float)(size - 1) * range;
data[i] = cumap.evaluate(map, t);
}
}
static inline void curvemapping_color_to_array(BL::CurveMapping &cumap,
array<float3> &data,
int size,
@@ -235,8 +213,7 @@ static inline void curvemapping_color_to_array(BL::CurveMapping &cumap,
*
* There might be some better estimations here tho.
*/
const int num_curves = rgb_curve ? 4 : 3;
curvemapping_minmax(cumap, num_curves, &min_x, &max_x);
curvemapping_minmax(cumap, rgb_curve, &min_x, &max_x);
const float range_x = max_x - min_x;

View File

@@ -213,7 +213,7 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
if (ctx->num_hits < ctx->max_hits) {
Intersection current_isect;
kernel_embree_convert_hit(kg, ray, hit, &current_isect);
for (size_t i = 0; i < ctx->num_hits; ++i) {
for (size_t i = 0; i < ctx->max_hits; ++i) {
if (current_isect.object == ctx->isect_s[i].object &&
current_isect.prim == ctx->isect_s[i].prim && current_isect.t == ctx->isect_s[i].t) {
/* This intersection was already recorded, skip it. */

View File

@@ -532,13 +532,4 @@ if(WITH_CYCLES_CUDA_BINARIES OR NOT WITH_CUDA_DYNLOAD)
endif()
endif()
###########################################################################
# HIP
###########################################################################
if(NOT WITH_HIP_DYNLOAD)
set(WITH_HIP_DYNLOAD ON)
endif()
unset(_cycles_lib_dir)

View File

@@ -156,16 +156,10 @@ macro(cycles_target_link_libraries target)
${PLATFORM_LINKLIBS}
)
if(WITH_CYCLES_DEVICE_CUDA OR WITH_CYCLES_DEVICE_OPTIX)
if(WITH_CUDA_DYNLOAD)
target_link_libraries(${target} extern_cuew)
else()
target_link_libraries(${target} ${CUDA_CUDA_LIBRARY})
endif()
endif()
if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
target_link_libraries(${target} extern_hipew)
if(WITH_CUDA_DYNLOAD)
target_link_libraries(${target} extern_cuew)
else()
target_link_libraries(${target} ${CUDA_CUDA_LIBRARY})
endif()
if(CYCLES_STANDALONE_REPOSITORY)

View File

@@ -22,25 +22,16 @@ set(INC_SYS
../../../extern/clew/include
)
if(WITH_CYCLES_DEVICE_OPTIX OR WITH_CYCLES_DEVICE_CUDA)
if(WITH_CUDA_DYNLOAD)
list(APPEND INC
../../../extern/cuew/include
)
add_definitions(-DWITH_CUDA_DYNLOAD)
else()
list(APPEND INC_SYS
${CUDA_TOOLKIT_INCLUDE}
)
add_definitions(-DCYCLES_CUDA_NVCC_EXECUTABLE="${CUDA_NVCC_EXECUTABLE}")
endif()
endif()
if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
if(WITH_CUDA_DYNLOAD)
list(APPEND INC
../../../extern/hipew/include
../../../extern/cuew/include
)
add_definitions(-DWITH_HIP_DYNLOAD)
add_definitions(-DWITH_CUDA_DYNLOAD)
else()
list(APPEND INC_SYS
${CUDA_TOOLKIT_INCLUDE}
)
add_definitions(-DCYCLES_CUDA_NVCC_EXECUTABLE="${CUDA_NVCC_EXECUTABLE}")
endif()
set(SRC
@@ -79,21 +70,6 @@ set(SRC_CUDA
cuda/util.h
)
set(SRC_HIP
hip/device.cpp
hip/device.h
hip/device_impl.cpp
hip/device_impl.h
hip/graphics_interop.cpp
hip/graphics_interop.h
hip/kernel.cpp
hip/kernel.h
hip/queue.cpp
hip/queue.h
hip/util.cpp
hip/util.h
)
set(SRC_DUMMY
dummy/device.cpp
dummy/device.h
@@ -129,21 +105,13 @@ set(LIB
${CYCLES_GL_LIBRARIES}
)
if(WITH_CYCLES_DEVICE_OPTIX OR WITH_CYCLES_DEVICE_CUDA)
if(WITH_CUDA_DYNLOAD)
list(APPEND LIB
extern_cuew
)
else()
list(APPEND LIB
${CUDA_CUDA_LIBRARY}
)
endif()
endif()
if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
if(WITH_CUDA_DYNLOAD)
list(APPEND LIB
extern_hipew
extern_cuew
)
else()
list(APPEND LIB
${CUDA_CUDA_LIBRARY}
)
endif()
@@ -152,9 +120,6 @@ add_definitions(${GL_DEFINITIONS})
if(WITH_CYCLES_DEVICE_CUDA)
add_definitions(-DWITH_CUDA)
endif()
if(WITH_CYCLES_DEVICE_HIP)
add_definitions(-DWITH_HIP)
endif()
if(WITH_CYCLES_DEVICE_OPTIX)
add_definitions(-DWITH_OPTIX)
endif()
@@ -175,7 +140,6 @@ cycles_add_library(cycles_device "${LIB}"
${SRC}
${SRC_CPU}
${SRC_CUDA}
${SRC_HIP}
${SRC_DUMMY}
${SRC_MULTI}
${SRC_OPTIX}

View File

@@ -37,15 +37,14 @@ CUDADeviceGraphicsInterop::~CUDADeviceGraphicsInterop()
}
}
void CUDADeviceGraphicsInterop::set_display_interop(
const DisplayDriver::GraphicsInterop &display_interop)
void CUDADeviceGraphicsInterop::set_destination(
const DeviceGraphicsInteropDestination &destination)
{
const int64_t new_buffer_area = int64_t(display_interop.buffer_width) *
display_interop.buffer_height;
const int64_t new_buffer_area = int64_t(destination.buffer_width) * destination.buffer_height;
need_clear_ = display_interop.need_clear;
need_clear_ = destination.need_clear;
if (opengl_pbo_id_ == display_interop.opengl_pbo_id && buffer_area_ == new_buffer_area) {
if (opengl_pbo_id_ == destination.opengl_pbo_id && buffer_area_ == new_buffer_area) {
return;
}
@@ -56,12 +55,12 @@ void CUDADeviceGraphicsInterop::set_display_interop(
}
const CUresult result = cuGraphicsGLRegisterBuffer(
&cu_graphics_resource_, display_interop.opengl_pbo_id, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
&cu_graphics_resource_, destination.opengl_pbo_id, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
if (result != CUDA_SUCCESS) {
LOG(ERROR) << "Error registering OpenGL buffer: " << cuewErrorString(result);
}
opengl_pbo_id_ = display_interop.opengl_pbo_id;
opengl_pbo_id_ = destination.opengl_pbo_id;
buffer_area_ = new_buffer_area;
}

View File

@@ -41,7 +41,7 @@ class CUDADeviceGraphicsInterop : public DeviceGraphicsInterop {
CUDADeviceGraphicsInterop &operator=(const CUDADeviceGraphicsInterop &other) = delete;
CUDADeviceGraphicsInterop &operator=(CUDADeviceGraphicsInterop &&other) = delete;
virtual void set_display_interop(const DisplayDriver::GraphicsInterop &display_interop) override;
virtual void set_destination(const DeviceGraphicsInteropDestination &destination) override;
virtual device_ptr map() override;
virtual void unmap() override;

View File

@@ -116,18 +116,18 @@ bool CUDADeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *ar
}
/* Launch kernel. */
assert_success(cuLaunchKernel(cuda_kernel.function,
num_blocks,
1,
1,
num_threads_per_block,
1,
1,
shared_mem_bytes,
cuda_stream_,
args,
0),
"enqueue");
cuda_device_assert(cuda_device_,
cuLaunchKernel(cuda_kernel.function,
num_blocks,
1,
1,
num_threads_per_block,
1,
1,
shared_mem_bytes,
cuda_stream_,
args,
0));
return !(cuda_device_->have_error());
}
@@ -139,8 +139,7 @@ bool CUDADeviceQueue::synchronize()
}
const CUDAContextScope scope(cuda_device_);
assert_success(cuStreamSynchronize(cuda_stream_), "synchronize");
cuda_device_assert(cuda_device_, cuStreamSynchronize(cuda_stream_));
debug_synchronize();
return !(cuda_device_->have_error());
@@ -163,9 +162,9 @@ void CUDADeviceQueue::zero_to_device(device_memory &mem)
assert(mem.device_pointer != 0);
const CUDAContextScope scope(cuda_device_);
assert_success(
cuMemsetD8Async((CUdeviceptr)mem.device_pointer, 0, mem.memory_size(), cuda_stream_),
"zero_to_device");
cuda_device_assert(
cuda_device_,
cuMemsetD8Async((CUdeviceptr)mem.device_pointer, 0, mem.memory_size(), cuda_stream_));
}
void CUDADeviceQueue::copy_to_device(device_memory &mem)
@@ -186,10 +185,10 @@ void CUDADeviceQueue::copy_to_device(device_memory &mem)
/* Copy memory to device. */
const CUDAContextScope scope(cuda_device_);
assert_success(
cuda_device_assert(
cuda_device_,
cuMemcpyHtoDAsync(
(CUdeviceptr)mem.device_pointer, mem.host_pointer, mem.memory_size(), cuda_stream_),
"copy_to_device");
(CUdeviceptr)mem.device_pointer, mem.host_pointer, mem.memory_size(), cuda_stream_));
}
void CUDADeviceQueue::copy_from_device(device_memory &mem)
@@ -205,19 +204,10 @@ void CUDADeviceQueue::copy_from_device(device_memory &mem)
/* Copy memory from device. */
const CUDAContextScope scope(cuda_device_);
assert_success(
cuda_device_assert(
cuda_device_,
cuMemcpyDtoHAsync(
mem.host_pointer, (CUdeviceptr)mem.device_pointer, mem.memory_size(), cuda_stream_),
"copy_from_device");
}
void CUDADeviceQueue::assert_success(CUresult result, const char *operation)
{
if (result != CUDA_SUCCESS) {
const char *name = cuewErrorString(result);
cuda_device_->set_error(string_printf(
"%s in CUDA queue %s (%s)", name, operation, debug_active_kernels().c_str()));
}
mem.host_pointer, (CUdeviceptr)mem.device_pointer, mem.memory_size(), cuda_stream_));
}
unique_ptr<DeviceGraphicsInterop> CUDADeviceQueue::graphics_interop_create()

View File

@@ -60,8 +60,6 @@ class CUDADeviceQueue : public DeviceQueue {
protected:
CUDADevice *cuda_device_;
CUstream cuda_stream_;
void assert_success(CUresult result, const char *operation);
};
CCL_NAMESPACE_END

View File

@@ -25,7 +25,6 @@
#include "device/cpu/device.h"
#include "device/cuda/device.h"
#include "device/dummy/device.h"
#include "device/hip/device.h"
#include "device/multi/device.h"
#include "device/optix/device.h"
@@ -47,7 +46,6 @@ thread_mutex Device::device_mutex;
vector<DeviceInfo> Device::cuda_devices;
vector<DeviceInfo> Device::optix_devices;
vector<DeviceInfo> Device::cpu_devices;
vector<DeviceInfo> Device::hip_devices;
uint Device::devices_initialized_mask = 0;
/* Device */
@@ -98,14 +96,6 @@ Device *Device::create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
device = device_optix_create(info, stats, profiler);
break;
#endif
#ifdef WITH_HIP
case DEVICE_HIP:
if (device_hip_init())
device = device_hip_create(info, stats, profiler);
break;
#endif
default:
break;
}
@@ -127,8 +117,6 @@ DeviceType Device::type_from_string(const char *name)
return DEVICE_OPTIX;
else if (strcmp(name, "MULTI") == 0)
return DEVICE_MULTI;
else if (strcmp(name, "HIP") == 0)
return DEVICE_HIP;
return DEVICE_NONE;
}
@@ -143,8 +131,6 @@ string Device::string_from_type(DeviceType type)
return "OPTIX";
else if (type == DEVICE_MULTI)
return "MULTI";
else if (type == DEVICE_HIP)
return "HIP";
return "";
}
@@ -159,10 +145,6 @@ vector<DeviceType> Device::available_types()
#ifdef WITH_OPTIX
types.push_back(DEVICE_OPTIX);
#endif
#ifdef WITH_HIP
types.push_back(DEVICE_HIP);
#endif
return types;
}
@@ -204,20 +186,6 @@ vector<DeviceInfo> Device::available_devices(uint mask)
}
#endif
#ifdef WITH_HIP
if (mask & DEVICE_MASK_HIP) {
if (!(devices_initialized_mask & DEVICE_MASK_HIP)) {
if (device_hip_init()) {
device_hip_info(hip_devices);
}
devices_initialized_mask |= DEVICE_MASK_HIP;
}
foreach (DeviceInfo &info, hip_devices) {
devices.push_back(info);
}
}
#endif
if (mask & DEVICE_MASK_CPU) {
if (!(devices_initialized_mask & DEVICE_MASK_CPU)) {
device_cpu_info(cpu_devices);
@@ -258,15 +226,6 @@ string Device::device_capabilities(uint mask)
}
#endif
#ifdef WITH_HIP
if (mask & DEVICE_MASK_HIP) {
if (device_hip_init()) {
capabilities += "\nHIP device capabilities:\n";
capabilities += device_hip_capabilities();
}
}
#endif
return capabilities;
}
@@ -355,7 +314,6 @@ void Device::free_memory()
devices_initialized_mask = 0;
cuda_devices.free_memory();
optix_devices.free_memory();
hip_devices.free_memory();
cpu_devices.free_memory();
}

View File

@@ -51,7 +51,6 @@ enum DeviceType {
DEVICE_CUDA,
DEVICE_MULTI,
DEVICE_OPTIX,
DEVICE_HIP,
DEVICE_DUMMY,
};
@@ -59,7 +58,6 @@ enum DeviceTypeMask {
DEVICE_MASK_CPU = (1 << DEVICE_CPU),
DEVICE_MASK_CUDA = (1 << DEVICE_CUDA),
DEVICE_MASK_OPTIX = (1 << DEVICE_OPTIX),
DEVICE_MASK_HIP = (1 << DEVICE_HIP),
DEVICE_MASK_ALL = ~0
};
@@ -286,7 +284,6 @@ class Device {
static vector<DeviceInfo> cuda_devices;
static vector<DeviceInfo> optix_devices;
static vector<DeviceInfo> cpu_devices;
static vector<DeviceInfo> hip_devices;
static uint devices_initialized_mask;
};

View File

@@ -16,12 +16,25 @@
#pragma once
#include "render/display_driver.h"
#include "util/util_types.h"
CCL_NAMESPACE_BEGIN
/* Information about interoperability destination.
* Is provided by the GPUDisplay. */
class DeviceGraphicsInteropDestination {
public:
/* Dimensions of the buffer, in pixels. */
int buffer_width = 0;
int buffer_height = 0;
/* OpenGL pixel buffer object. */
int opengl_pbo_id = 0;
/* Clear the entire destination before doing partial write to it. */
bool need_clear = false;
};
/* Device-side graphics interoperability support.
*
* Takes care of holding all the handlers needed by the device to implement interoperability with
@@ -33,7 +46,7 @@ class DeviceGraphicsInterop {
/* Update this device-side graphics interoperability object with the given destination resource
* information. */
virtual void set_display_interop(const DisplayDriver::GraphicsInterop &display_interop) = 0;
virtual void set_destination(const DeviceGraphicsInteropDestination &destination) = 0;
virtual device_ptr map() = 0;
virtual void unmap() = 0;

View File

@@ -277,7 +277,6 @@ class device_memory {
protected:
friend class CUDADevice;
friend class OptiXDevice;
friend class HIPDevice;
/* Only create through subclasses. */
device_memory(Device *device, const char *name, MemoryType type);

View File

@@ -57,9 +57,8 @@ void DeviceQueue::debug_init_execution()
{
if (VLOG_IS_ON(3)) {
last_sync_time_ = time_dt();
last_kernels_enqueued_ = 0;
}
last_kernels_enqueued_ = 0;
}
void DeviceQueue::debug_enqueue(DeviceKernel kernel, const int work_size)
@@ -67,9 +66,8 @@ void DeviceQueue::debug_enqueue(DeviceKernel kernel, const int work_size)
if (VLOG_IS_ON(3)) {
VLOG(4) << "GPU queue launch " << device_kernel_as_string(kernel) << ", work_size "
<< work_size;
last_kernels_enqueued_ |= (uint64_t(1) << (uint64_t)kernel);
}
last_kernels_enqueued_ |= (uint64_t(1) << (uint64_t)kernel);
}
void DeviceQueue::debug_synchronize()
@@ -82,14 +80,8 @@ void DeviceQueue::debug_synchronize()
stats_kernel_time_[last_kernels_enqueued_] += elapsed_time;
last_sync_time_ = new_time;
last_kernels_enqueued_ = 0;
}
last_kernels_enqueued_ = 0;
}
string DeviceQueue::debug_active_kernels()
{
return device_kernel_mask_as_string(last_kernels_enqueued_);
}
CCL_NAMESPACE_END

View File

@@ -21,7 +21,6 @@
#include "device/device_graphics_interop.h"
#include "util/util_logging.h"
#include "util/util_map.h"
#include "util/util_string.h"
#include "util/util_unique_ptr.h"
CCL_NAMESPACE_BEGIN
@@ -102,7 +101,6 @@ class DeviceQueue {
void debug_init_execution();
void debug_enqueue(DeviceKernel kernel, const int work_size);
void debug_synchronize();
string debug_active_kernels();
/* Combination of kernels enqueued together sync last synchronize. */
DeviceKernelMask last_kernels_enqueued_;

View File

@@ -1,276 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "device/hip/device.h"
#include "util/util_logging.h"
#ifdef WITH_HIP
# include "device/device.h"
# include "device/hip/device_impl.h"
# include "util/util_string.h"
# include "util/util_windows.h"
#endif /* WITH_HIP */
CCL_NAMESPACE_BEGIN
bool device_hip_init()
{
#if !defined(WITH_HIP)
return false;
#elif defined(WITH_HIP_DYNLOAD)
static bool initialized = false;
static bool result = false;
if (initialized)
return result;
initialized = true;
int hipew_result = hipewInit(HIPEW_INIT_HIP);
if (hipew_result == HIPEW_SUCCESS) {
VLOG(1) << "HIPEW initialization succeeded";
if (HIPDevice::have_precompiled_kernels()) {
VLOG(1) << "Found precompiled kernels";
result = true;
}
else if (hipewCompilerPath() != NULL) {
VLOG(1) << "Found HIPCC " << hipewCompilerPath();
result = true;
}
else {
VLOG(1) << "Neither precompiled kernels nor HIPCC was found,"
<< " unable to use HIP";
}
}
else {
VLOG(1) << "HIPEW initialization failed: "
<< ((hipew_result == HIPEW_ERROR_ATEXIT_FAILED) ? "Error setting up atexit() handler" :
"Error opening the library");
}
return result;
#else /* WITH_HIP_DYNLOAD */
return true;
#endif /* WITH_HIP_DYNLOAD */
}
Device *device_hip_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
{
#ifdef WITH_HIP
return new HIPDevice(info, stats, profiler);
#else
(void)info;
(void)stats;
(void)profiler;
LOG(FATAL) << "Request to create HIP device without compiled-in support. Should never happen.";
return nullptr;
#endif
}
#ifdef WITH_HIP
static hipError_t device_hip_safe_init()
{
# ifdef _WIN32
__try {
return hipInit(0);
}
__except (EXCEPTION_EXECUTE_HANDLER) {
/* Ignore crashes inside the HIP driver and hope we can
* survive even with corrupted HIP installs. */
fprintf(stderr, "Cycles HIP: driver crashed, continuing without HIP.\n");
}
return hipErrorNoDevice;
# else
return hipInit(0);
# endif
}
#endif /* WITH_HIP */
void device_hip_info(vector<DeviceInfo> &devices)
{
#ifdef WITH_HIP
hipError_t result = device_hip_safe_init();
if (result != hipSuccess) {
if (result != hipErrorNoDevice)
fprintf(stderr, "HIP hipInit: %s\n", hipewErrorString(result));
return;
}
int count = 0;
result = hipGetDeviceCount(&count);
if (result != hipSuccess) {
fprintf(stderr, "HIP hipGetDeviceCount: %s\n", hipewErrorString(result));
return;
}
vector<DeviceInfo> display_devices;
for (int num = 0; num < count; num++) {
char name[256];
result = hipDeviceGetName(name, 256, num);
if (result != hipSuccess) {
fprintf(stderr, "HIP :hipDeviceGetName: %s\n", hipewErrorString(result));
continue;
}
int major;
hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, num);
// TODO : (Arya) What is the last major version we are supporting?
DeviceInfo info;
info.type = DEVICE_HIP;
info.description = string(name);
info.num = num;
info.has_half_images = (major >= 3);
info.has_nanovdb = true;
info.denoisers = 0;
info.has_gpu_queue = true;
/* Check if the device has P2P access to any other device in the system. */
for (int peer_num = 0; peer_num < count && !info.has_peer_memory; peer_num++) {
if (num != peer_num) {
int can_access = 0;
hipDeviceCanAccessPeer(&can_access, num, peer_num);
info.has_peer_memory = (can_access != 0);
}
}
int pci_location[3] = {0, 0, 0};
hipDeviceGetAttribute(&pci_location[0], hipDeviceAttributePciDomainID, num);
hipDeviceGetAttribute(&pci_location[1], hipDeviceAttributePciBusId, num);
hipDeviceGetAttribute(&pci_location[2], hipDeviceAttributePciDeviceId, num);
info.id = string_printf("HIP_%s_%04x:%02x:%02x",
name,
(unsigned int)pci_location[0],
(unsigned int)pci_location[1],
(unsigned int)pci_location[2]);
/* If device has a kernel timeout and no compute preemption, we assume
* it is connected to a display and will freeze the display while doing
* computations. */
int timeout_attr = 0, preempt_attr = 0;
hipDeviceGetAttribute(&timeout_attr, hipDeviceAttributeKernelExecTimeout, num);
if (timeout_attr && !preempt_attr) {
VLOG(1) << "Device is recognized as display.";
info.description += " (Display)";
info.display_device = true;
display_devices.push_back(info);
}
else {
VLOG(1) << "Device has compute preemption or is not used for display.";
devices.push_back(info);
}
VLOG(1) << "Added device \"" << name << "\" with id \"" << info.id << "\".";
}
if (!display_devices.empty())
devices.insert(devices.end(), display_devices.begin(), display_devices.end());
#else /* WITH_HIP */
(void)devices;
#endif /* WITH_HIP */
}
string device_hip_capabilities()
{
#ifdef WITH_HIP
hipError_t result = device_hip_safe_init();
if (result != hipSuccess) {
if (result != hipErrorNoDevice) {
return string("Error initializing HIP: ") + hipewErrorString(result);
}
return "No HIP device found\n";
}
int count;
result = hipGetDeviceCount(&count);
if (result != hipSuccess) {
return string("Error getting devices: ") + hipewErrorString(result);
}
string capabilities = "";
for (int num = 0; num < count; num++) {
char name[256];
if (hipDeviceGetName(name, 256, num) != hipSuccess) {
continue;
}
capabilities += string("\t") + name + "\n";
int value;
# define GET_ATTR(attr) \
{ \
if (hipDeviceGetAttribute(&value, hipDeviceAttribute##attr, num) == hipSuccess) { \
capabilities += string_printf("\t\thipDeviceAttribute" #attr "\t\t\t%d\n", value); \
} \
} \
(void)0
/* TODO(sergey): Strip all attributes which are not useful for us
* or does not depend on the driver.
*/
GET_ATTR(MaxThreadsPerBlock);
GET_ATTR(MaxBlockDimX);
GET_ATTR(MaxBlockDimY);
GET_ATTR(MaxBlockDimZ);
GET_ATTR(MaxGridDimX);
GET_ATTR(MaxGridDimY);
GET_ATTR(MaxGridDimZ);
GET_ATTR(MaxSharedMemoryPerBlock);
GET_ATTR(TotalConstantMemory);
GET_ATTR(WarpSize);
GET_ATTR(MaxPitch);
GET_ATTR(MaxRegistersPerBlock);
GET_ATTR(ClockRate);
GET_ATTR(TextureAlignment);
GET_ATTR(MultiprocessorCount);
GET_ATTR(KernelExecTimeout);
GET_ATTR(Integrated);
GET_ATTR(CanMapHostMemory);
GET_ATTR(ComputeMode);
GET_ATTR(MaxTexture1DWidth);
GET_ATTR(MaxTexture2DWidth);
GET_ATTR(MaxTexture2DHeight);
GET_ATTR(MaxTexture3DWidth);
GET_ATTR(MaxTexture3DHeight);
GET_ATTR(MaxTexture3DDepth);
GET_ATTR(ConcurrentKernels);
GET_ATTR(EccEnabled);
GET_ATTR(MemoryClockRate);
GET_ATTR(MemoryBusWidth);
GET_ATTR(L2CacheSize);
GET_ATTR(MaxThreadsPerMultiProcessor);
GET_ATTR(ComputeCapabilityMajor);
GET_ATTR(ComputeCapabilityMinor);
GET_ATTR(MaxSharedMemoryPerMultiprocessor);
GET_ATTR(ManagedMemory);
GET_ATTR(IsMultiGpuBoard);
# undef GET_ATTR
capabilities += "\n";
}
return capabilities;
#else /* WITH_HIP */
return "";
#endif /* WITH_HIP */
}
CCL_NAMESPACE_END

View File

@@ -1,37 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "util/util_string.h"
#include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
class Device;
class DeviceInfo;
class Profiler;
class Stats;
bool device_hip_init();
Device *device_hip_create(const DeviceInfo &info, Stats &stats, Profiler &profiler);
void device_hip_info(vector<DeviceInfo> &devices);
string device_hip_capabilities();
CCL_NAMESPACE_END

File diff suppressed because it is too large Load Diff

View File

@@ -1,153 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifdef WITH_HIP
# include "device/device.h"
# include "device/hip/kernel.h"
# include "device/hip/queue.h"
# include "device/hip/util.h"
# include "util/util_map.h"
# ifdef WITH_HIP_DYNLOAD
# include "hipew.h"
# else
# include "util/util_opengl.h"
# endif
CCL_NAMESPACE_BEGIN
class DeviceQueue;
class HIPDevice : public Device {
friend class HIPContextScope;
public:
hipDevice_t hipDevice;
hipCtx_t hipContext;
hipModule_t hipModule;
size_t device_texture_headroom;
size_t device_working_headroom;
bool move_texture_to_host;
size_t map_host_used;
size_t map_host_limit;
int can_map_host;
int pitch_alignment;
int hipDevId;
int hipDevArchitecture;
bool first_error;
struct HIPMem {
HIPMem() : texobject(0), array(0), use_mapped_host(false)
{
}
hipTextureObject_t texobject;
hArray array;
/* If true, a mapped host memory in shared_pointer is being used. */
bool use_mapped_host;
};
typedef map<device_memory *, HIPMem> HIPMemMap;
HIPMemMap hip_mem_map;
thread_mutex hip_mem_map_mutex;
/* Bindless Textures */
device_vector<TextureInfo> texture_info;
bool need_texture_info;
HIPDeviceKernels kernels;
static bool have_precompiled_kernels();
virtual bool show_samples() const override;
virtual BVHLayoutMask get_bvh_layout_mask() const override;
void set_error(const string &error) override;
HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
virtual ~HIPDevice();
bool support_device(const uint /*kernel_features*/);
bool check_peer_access(Device *peer_device) override;
bool use_adaptive_compilation();
virtual string compile_kernel_get_common_cflags(const uint kernel_features);
string compile_kernel(const uint kernel_features,
const char *name,
const char *base = "hip",
bool force_ptx = false);
virtual bool load_kernels(const uint kernel_features) override;
void reserve_local_memory(const uint kernel_features);
void init_host_memory();
void load_texture_info();
void move_textures_to_host(size_t size, bool for_texture);
HIPMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
void generic_copy_to(device_memory &mem);
void generic_free(device_memory &mem);
void mem_alloc(device_memory &mem) override;
void mem_copy_to(device_memory &mem) override;
void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
void mem_zero(device_memory &mem) override;
void mem_free(device_memory &mem) override;
device_ptr mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/) override;
virtual void const_copy_to(const char *name, void *host, size_t size) override;
void global_alloc(device_memory &mem);
void global_free(device_memory &mem);
void tex_alloc(device_texture &mem);
void tex_free(device_texture &mem);
/* Graphics resources interoperability. */
virtual bool should_use_graphics_interop() override;
virtual unique_ptr<DeviceQueue> gpu_queue_create() override;
int get_num_multiprocessors();
int get_max_num_threads_per_multiprocessor();
protected:
bool get_device_attribute(hipDeviceAttribute_t attribute, int *value);
int get_device_default_attribute(hipDeviceAttribute_t attribute, int default_value);
};
CCL_NAMESPACE_END
#endif

View File

@@ -1,105 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifdef WITH_HIP
# include "device/hip/graphics_interop.h"
# include "device/hip/device_impl.h"
# include "device/hip/util.h"
CCL_NAMESPACE_BEGIN
HIPDeviceGraphicsInterop::HIPDeviceGraphicsInterop(HIPDeviceQueue *queue)
: queue_(queue), device_(static_cast<HIPDevice *>(queue->device))
{
}
HIPDeviceGraphicsInterop::~HIPDeviceGraphicsInterop()
{
HIPContextScope scope(device_);
if (hip_graphics_resource_) {
hip_device_assert(device_, hipGraphicsUnregisterResource(hip_graphics_resource_));
}
}
void HIPDeviceGraphicsInterop::set_display_interop(
const DisplayDriver::GraphicsInterop &display_interop)
{
const int64_t new_buffer_area = int64_t(display_interop.buffer_width) *
display_interop.buffer_height;
need_clear_ = display_interop.need_clear;
if (opengl_pbo_id_ == display_interop.opengl_pbo_id && buffer_area_ == new_buffer_area) {
return;
}
HIPContextScope scope(device_);
if (hip_graphics_resource_) {
hip_device_assert(device_, hipGraphicsUnregisterResource(hip_graphics_resource_));
}
const hipError_t result = hipGraphicsGLRegisterBuffer(
&hip_graphics_resource_, display_interop.opengl_pbo_id, hipGraphicsRegisterFlagsNone);
if (result != hipSuccess) {
LOG(ERROR) << "Error registering OpenGL buffer: " << hipewErrorString(result);
}
opengl_pbo_id_ = display_interop.opengl_pbo_id;
buffer_area_ = new_buffer_area;
}
device_ptr HIPDeviceGraphicsInterop::map()
{
if (!hip_graphics_resource_) {
return 0;
}
HIPContextScope scope(device_);
hipDeviceptr_t hip_buffer;
size_t bytes;
hip_device_assert(device_,
hipGraphicsMapResources(1, &hip_graphics_resource_, queue_->stream()));
hip_device_assert(
device_, hipGraphicsResourceGetMappedPointer(&hip_buffer, &bytes, hip_graphics_resource_));
if (need_clear_) {
hip_device_assert(
device_,
hipMemsetD8Async(static_cast<hipDeviceptr_t>(hip_buffer), 0, bytes, queue_->stream()));
need_clear_ = false;
}
return static_cast<device_ptr>(hip_buffer);
}
void HIPDeviceGraphicsInterop::unmap()
{
HIPContextScope scope(device_);
hip_device_assert(device_,
hipGraphicsUnmapResources(1, &hip_graphics_resource_, queue_->stream()));
}
CCL_NAMESPACE_END
#endif

View File

@@ -1,64 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifdef WITH_HIP
# include "device/device_graphics_interop.h"
# ifdef WITH_HIP_DYNLOAD
# include "hipew.h"
# endif
CCL_NAMESPACE_BEGIN
class HIPDevice;
class HIPDeviceQueue;
class HIPDeviceGraphicsInterop : public DeviceGraphicsInterop {
public:
explicit HIPDeviceGraphicsInterop(HIPDeviceQueue *queue);
HIPDeviceGraphicsInterop(const HIPDeviceGraphicsInterop &other) = delete;
HIPDeviceGraphicsInterop(HIPDeviceGraphicsInterop &&other) noexcept = delete;
~HIPDeviceGraphicsInterop();
HIPDeviceGraphicsInterop &operator=(const HIPDeviceGraphicsInterop &other) = delete;
HIPDeviceGraphicsInterop &operator=(HIPDeviceGraphicsInterop &&other) = delete;
virtual void set_display_interop(const DisplayDriver::GraphicsInterop &display_interop) override;
virtual device_ptr map() override;
virtual void unmap() override;
protected:
HIPDeviceQueue *queue_ = nullptr;
HIPDevice *device_ = nullptr;
/* OpenGL PBO which is currently registered as the destination for the CUDA buffer. */
uint opengl_pbo_id_ = 0;
/* Buffer area in pixels of the corresponding PBO. */
int64_t buffer_area_ = 0;
/* The destination was requested to be cleared. */
bool need_clear_ = false;
hipGraphicsResource hip_graphics_resource_ = nullptr;
};
CCL_NAMESPACE_END
#endif

View File

@@ -1,69 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifdef WITH_HIP
# include "device/hip/kernel.h"
# include "device/hip/device_impl.h"
CCL_NAMESPACE_BEGIN
void HIPDeviceKernels::load(HIPDevice *device)
{
hipModule_t hipModule = device->hipModule;
for (int i = 0; i < (int)DEVICE_KERNEL_NUM; i++) {
HIPDeviceKernel &kernel = kernels_[i];
/* No mega-kernel used for GPU. */
if (i == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
continue;
}
const std::string function_name = std::string("kernel_gpu_") +
device_kernel_as_string((DeviceKernel)i);
hip_device_assert(device,
hipModuleGetFunction(&kernel.function, hipModule, function_name.c_str()));
if (kernel.function) {
hip_device_assert(device, hipFuncSetCacheConfig(kernel.function, hipFuncCachePreferL1));
hip_device_assert(
device,
hipModuleOccupancyMaxPotentialBlockSize(
&kernel.min_blocks, &kernel.num_threads_per_block, kernel.function, 0, 0));
}
else {
LOG(ERROR) << "Unable to load kernel " << function_name;
}
}
loaded = true;
}
const HIPDeviceKernel &HIPDeviceKernels::get(DeviceKernel kernel) const
{
return kernels_[(int)kernel];
}
bool HIPDeviceKernels::available(DeviceKernel kernel) const
{
return kernels_[(int)kernel].function != nullptr;
}
CCL_NAMESPACE_END
#endif /* WITH_HIP*/

View File

@@ -1,54 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#ifdef WITH_HIP
# include "device/device_kernel.h"
# ifdef WITH_HIP_DYNLOAD
# include "hipew.h"
# endif
CCL_NAMESPACE_BEGIN
class HIPDevice;
/* HIP kernel and associate occupancy information. */
class HIPDeviceKernel {
public:
hipFunction_t function = nullptr;
int num_threads_per_block = 0;
int min_blocks = 0;
};
/* Cache of HIP kernels for each DeviceKernel. */
class HIPDeviceKernels {
public:
void load(HIPDevice *device);
const HIPDeviceKernel &get(DeviceKernel kernel) const;
bool available(DeviceKernel kernel) const;
protected:
HIPDeviceKernel kernels_[DEVICE_KERNEL_NUM];
bool loaded = false;
};
CCL_NAMESPACE_END
#endif /* WITH_HIP */

View File

@@ -1,209 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifdef WITH_HIP
# include "device/hip/queue.h"
# include "device/hip/device_impl.h"
# include "device/hip/graphics_interop.h"
# include "device/hip/kernel.h"
CCL_NAMESPACE_BEGIN
/* HIPDeviceQueue */
HIPDeviceQueue::HIPDeviceQueue(HIPDevice *device)
: DeviceQueue(device), hip_device_(device), hip_stream_(nullptr)
{
const HIPContextScope scope(hip_device_);
hip_device_assert(hip_device_, hipStreamCreateWithFlags(&hip_stream_, hipStreamNonBlocking));
}
HIPDeviceQueue::~HIPDeviceQueue()
{
const HIPContextScope scope(hip_device_);
hipStreamDestroy(hip_stream_);
}
int HIPDeviceQueue::num_concurrent_states(const size_t /*state_size*/) const
{
/* TODO: compute automatically. */
/* TODO: must have at least num_threads_per_block. */
return 14416128;
}
int HIPDeviceQueue::num_concurrent_busy_states() const
{
const int max_num_threads = hip_device_->get_num_multiprocessors() *
hip_device_->get_max_num_threads_per_multiprocessor();
if (max_num_threads == 0) {
return 65536;
}
return 4 * max_num_threads;
}
void HIPDeviceQueue::init_execution()
{
/* Synchronize all textures and memory copies before executing task. */
HIPContextScope scope(hip_device_);
hip_device_->load_texture_info();
hip_device_assert(hip_device_, hipDeviceSynchronize());
debug_init_execution();
}
bool HIPDeviceQueue::kernel_available(DeviceKernel kernel) const
{
return hip_device_->kernels.available(kernel);
}
bool HIPDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *args[])
{
if (hip_device_->have_error()) {
return false;
}
debug_enqueue(kernel, work_size);
const HIPContextScope scope(hip_device_);
const HIPDeviceKernel &hip_kernel = hip_device_->kernels.get(kernel);
/* Compute kernel launch parameters. */
const int num_threads_per_block = hip_kernel.num_threads_per_block;
const int num_blocks = divide_up(work_size, num_threads_per_block);
int shared_mem_bytes = 0;
switch (kernel) {
case DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY:
case DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY:
case DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY:
case DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY:
case DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY:
case DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY:
/* See parall_active_index.h for why this amount of shared memory is needed. */
shared_mem_bytes = (num_threads_per_block + 1) * sizeof(int);
break;
default:
break;
}
/* Launch kernel. */
hip_device_assert(hip_device_,
hipModuleLaunchKernel(hip_kernel.function,
num_blocks,
1,
1,
num_threads_per_block,
1,
1,
shared_mem_bytes,
hip_stream_,
args,
0));
return !(hip_device_->have_error());
}
bool HIPDeviceQueue::synchronize()
{
if (hip_device_->have_error()) {
return false;
}
const HIPContextScope scope(hip_device_);
hip_device_assert(hip_device_, hipStreamSynchronize(hip_stream_));
debug_synchronize();
return !(hip_device_->have_error());
}
void HIPDeviceQueue::zero_to_device(device_memory &mem)
{
assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
if (mem.memory_size() == 0) {
return;
}
/* Allocate on demand. */
if (mem.device_pointer == 0) {
hip_device_->mem_alloc(mem);
}
/* Zero memory on device. */
assert(mem.device_pointer != 0);
const HIPContextScope scope(hip_device_);
hip_device_assert(
hip_device_,
hipMemsetD8Async((hipDeviceptr_t)mem.device_pointer, 0, mem.memory_size(), hip_stream_));
}
void HIPDeviceQueue::copy_to_device(device_memory &mem)
{
assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
if (mem.memory_size() == 0) {
return;
}
/* Allocate on demand. */
if (mem.device_pointer == 0) {
hip_device_->mem_alloc(mem);
}
assert(mem.device_pointer != 0);
assert(mem.host_pointer != nullptr);
/* Copy memory to device. */
const HIPContextScope scope(hip_device_);
hip_device_assert(
hip_device_,
hipMemcpyHtoDAsync(
(hipDeviceptr_t)mem.device_pointer, mem.host_pointer, mem.memory_size(), hip_stream_));
}
void HIPDeviceQueue::copy_from_device(device_memory &mem)
{
assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
if (mem.memory_size() == 0) {
return;
}
assert(mem.device_pointer != 0);
assert(mem.host_pointer != nullptr);
/* Copy memory from device. */
const HIPContextScope scope(hip_device_);
hip_device_assert(
hip_device_,
hipMemcpyDtoHAsync(
mem.host_pointer, (hipDeviceptr_t)mem.device_pointer, mem.memory_size(), hip_stream_));
}
// TODO : (Arya) Enable this after stabilizing dev branch
unique_ptr<DeviceGraphicsInterop> HIPDeviceQueue::graphics_interop_create()
{
return make_unique<HIPDeviceGraphicsInterop>(this);
}
CCL_NAMESPACE_END
#endif /* WITH_HIP */

View File

@@ -1,68 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#ifdef WITH_HIP
# include "device/device_kernel.h"
# include "device/device_memory.h"
# include "device/device_queue.h"
# include "device/hip/util.h"
CCL_NAMESPACE_BEGIN
class HIPDevice;
class device_memory;
/* Base class for HIP queues. */
class HIPDeviceQueue : public DeviceQueue {
public:
HIPDeviceQueue(HIPDevice *device);
~HIPDeviceQueue();
virtual int num_concurrent_states(const size_t state_size) const override;
virtual int num_concurrent_busy_states() const override;
virtual void init_execution() override;
virtual bool kernel_available(DeviceKernel kernel) const override;
virtual bool enqueue(DeviceKernel kernel, const int work_size, void *args[]) override;
virtual bool synchronize() override;
virtual void zero_to_device(device_memory &mem) override;
virtual void copy_to_device(device_memory &mem) override;
virtual void copy_from_device(device_memory &mem) override;
virtual hipStream_t stream()
{
return hip_stream_;
}
// TODO : (Arya) Enable this after stabilizing the dev branch
virtual unique_ptr<DeviceGraphicsInterop> graphics_interop_create() override;
protected:
HIPDevice *hip_device_;
hipStream_t hip_stream_;
};
CCL_NAMESPACE_END
#endif /* WITH_HIP */

View File

@@ -1,61 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifdef WITH_HIP
# include "device/hip/util.h"
# include "device/hip/device_impl.h"
CCL_NAMESPACE_BEGIN
HIPContextScope::HIPContextScope(HIPDevice *device) : device(device)
{
hip_device_assert(device, hipCtxPushCurrent(device->hipContext));
}
HIPContextScope::~HIPContextScope()
{
hip_device_assert(device, hipCtxPopCurrent(NULL));
}
# ifndef WITH_HIP_DYNLOAD
const char *hipewErrorString(hipError_t result)
{
/* We can only give error code here without major code duplication, that
* should be enough since dynamic loading is only being disabled by folks
* who knows what they're doing anyway.
*
* NOTE: Avoid call from several threads.
*/
static string error;
error = string_printf("%d", result);
return error.c_str();
}
const char *hipewCompilerPath()
{
return CYCLES_HIP_HIPCC_EXECUTABLE;
}
int hipewCompilerVersion()
{
return (HIP_VERSION / 100) + (HIP_VERSION % 100 / 10);
}
# endif
CCL_NAMESPACE_END
#endif /* WITH_HIP */

View File

@@ -1,63 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#ifdef WITH_HIP
# ifdef WITH_HIP_DYNLOAD
# include "hipew.h"
# endif
CCL_NAMESPACE_BEGIN
class HIPDevice;
/* Utility to push/pop HIP context. */
class HIPContextScope {
public:
HIPContextScope(HIPDevice *device);
~HIPContextScope();
private:
HIPDevice *device;
};
/* Utility for checking return values of HIP function calls. */
# define hip_device_assert(hip_device, stmt) \
{ \
hipError_t result = stmt; \
if (result != hipSuccess) { \
const char *name = hipewErrorString(result); \
hip_device->set_error( \
string_printf("%s in %s (%s:%d)", name, #stmt, __FILE__, __LINE__)); \
} \
} \
(void)0
# define hip_assert(stmt) hip_device_assert(this, stmt)
# ifndef WITH_HIP_DYNLOAD
/* Transparently implement some functions, so majority of the file does not need
* to worry about difference between dynamically loaded and linked HIP at all. */
const char *hipewErrorString(hipError_t result);
const char *hipewCompilerPath();
int hipewCompilerVersion();
# endif /* WITH_HIP_DYNLOAD */
CCL_NAMESPACE_END
#endif /* WITH_HIP */

View File

@@ -315,11 +315,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
group_descs[PG_HITS].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
group_descs[PG_HITS].hitgroup.moduleAH = optix_module;
group_descs[PG_HITS].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_shadow_all_hit";
group_descs[PG_HITV].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
group_descs[PG_HITV].hitgroup.moduleCH = optix_module;
group_descs[PG_HITV].hitgroup.entryFunctionNameCH = "__closesthit__kernel_optix_hit";
group_descs[PG_HITV].hitgroup.moduleAH = optix_module;
group_descs[PG_HITV].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_volume_test";
if (kernel_features & KERNEL_FEATURE_HAIR) {
if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
@@ -402,7 +397,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
trace_css = std::max(trace_css, stack_size[PG_HITD].cssIS + stack_size[PG_HITD].cssAH);
trace_css = std::max(trace_css, stack_size[PG_HITS].cssIS + stack_size[PG_HITS].cssAH);
trace_css = std::max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH);
trace_css = std::max(trace_css, stack_size[PG_HITV].cssIS + stack_size[PG_HITV].cssAH);
trace_css = std::max(trace_css,
stack_size[PG_HITD_MOTION].cssIS + stack_size[PG_HITD_MOTION].cssAH);
trace_css = std::max(trace_css,
@@ -427,7 +421,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
pipeline_groups.push_back(groups[PG_HITD]);
pipeline_groups.push_back(groups[PG_HITS]);
pipeline_groups.push_back(groups[PG_HITL]);
pipeline_groups.push_back(groups[PG_HITV]);
if (motion_blur) {
pipeline_groups.push_back(groups[PG_HITD_MOTION]);
pipeline_groups.push_back(groups[PG_HITS_MOTION]);
@@ -466,7 +459,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
pipeline_groups.push_back(groups[PG_HITD]);
pipeline_groups.push_back(groups[PG_HITS]);
pipeline_groups.push_back(groups[PG_HITL]);
pipeline_groups.push_back(groups[PG_HITV]);
if (motion_blur) {
pipeline_groups.push_back(groups[PG_HITD_MOTION]);
pipeline_groups.push_back(groups[PG_HITS_MOTION]);
@@ -1398,33 +1390,25 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
/* Set user instance ID to object index (but leave low bit blank). */
instance.instanceId = ob->get_device_index() << 1;
/* Add some of the object visibility bits to the mask.
* __prim_visibility contains the combined visibility bits of all instances, so is not
* reliable if they differ between instances. But the OptiX visibility mask can only contain
* 8 bits, so have to trade-off here and select just a few important ones.
*/
instance.visibilityMask = ob->visibility_for_tracing() & 0xFF;
/* Have to have at least one bit in the mask, or else instance would always be culled. */
if (0 == instance.visibilityMask) {
instance.visibilityMask = 0xFF;
instance.visibilityMask = 1;
if (ob->get_geometry()->has_volume) {
/* Volumes have a special bit set in the visibility mask so a trace can mask only volumes.
*/
instance.visibilityMask |= 2;
}
if (ob->get_geometry()->geometry_type == Geometry::HAIR &&
static_cast<const Hair *>(ob->get_geometry())->curve_shape == CURVE_THICK) {
if (motion_blur && ob->get_geometry()->has_motion_blur()) {
if (ob->get_geometry()->geometry_type == Geometry::HAIR) {
/* Same applies to curves (so they can be skipped in local trace calls). */
instance.visibilityMask |= 4;
if (motion_blur && ob->get_geometry()->has_motion_blur() &&
static_cast<const Hair *>(ob->get_geometry())->curve_shape == CURVE_THICK) {
/* Select between motion blur and non-motion blur built-in intersection module. */
instance.sbtOffset = PG_HITD_MOTION - PG_HITD;
}
}
else {
/* Can disable __anyhit__kernel_optix_visibility_test by default (except for thick curves,
* since it needs to filter out end-caps there).
* It is enabled where necessary (visibility mask exceeds 8 bits or the other any-hit
* programs like __anyhit__kernel_optix_shadow_all_hit) via OPTIX_RAY_FLAG_ENFORCE_ANYHIT.
*/
instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT;
}
/* Insert motion traversable if object has motion. */
if (motion_blur && ob->use_motion()) {
@@ -1490,7 +1474,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
delete[] reinterpret_cast<uint8_t *>(&motion_transform);
/* Disable instance transform if object uses motion transform already. */
instance.flags |= OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
/* Get traversable handle to motion transform. */
optixConvertPointerToTraversableHandle(context,
@@ -1507,7 +1491,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
}
else {
/* Disable instance transform if geometry already has it applied to vertex data. */
instance.flags |= OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
/* Non-instanced objects read ID from 'prim_object', so distinguish
* them from instanced objects with the low bit set. */
instance.instanceId |= 1;

View File

@@ -40,7 +40,6 @@ enum {
PG_HITD, /* Default hit group. */
PG_HITS, /* __SHADOW_RECORD_ALL__ hit group. */
PG_HITL, /* __BVH_LOCAL__ hit group (only used for triangles). */
PG_HITV, /* __VOLUME__ hit group. */
PG_HITD_MOTION,
PG_HITS_MOTION,
PG_CALL_SVM_AO,
@@ -52,7 +51,7 @@ enum {
static const int MISS_PROGRAM_GROUP_OFFSET = PG_MISS;
static const int NUM_MIS_PROGRAM_GROUPS = 1;
static const int HIT_PROGAM_GROUP_OFFSET = PG_HITD;
static const int NUM_HIT_PROGRAM_GROUPS = 6;
static const int NUM_HIT_PROGRAM_GROUPS = 5;
static const int CALLABLE_PROGRAM_GROUPS_BASE = PG_CALL_SVM_AO;
static const int NUM_CALLABLE_PROGRAM_GROUPS = 3;

View File

@@ -27,8 +27,6 @@ set(SRC
pass_accessor.cpp
pass_accessor_cpu.cpp
pass_accessor_gpu.cpp
path_trace_display.cpp
path_trace_tile.cpp
path_trace_work.cpp
path_trace_work_cpu.cpp
path_trace_work_gpu.cpp
@@ -49,8 +47,6 @@ set(SRC_HEADERS
pass_accessor.h
pass_accessor_cpu.h
pass_accessor_gpu.h
path_trace_display.h
path_trace_tile.h
path_trace_work.h
path_trace_work_cpu.h
path_trace_work_gpu.h

View File

@@ -149,6 +149,9 @@ bool PassAccessor::get_render_tile_pixels(const RenderBuffers *render_buffers,
/* Denoised passes store their final pixels, no need in special calculation. */
get_pass_float(render_buffers, buffer_params, destination);
}
else if (type == PASS_RENDER_TIME) {
/* TODO(sergey): Needs implementation. */
}
else if (type == PASS_DEPTH) {
get_pass_depth(render_buffers, buffer_params, destination);
}

View File

@@ -19,9 +19,8 @@
#include "device/cpu/device.h"
#include "device/device.h"
#include "integrator/pass_accessor.h"
#include "integrator/path_trace_display.h"
#include "integrator/path_trace_tile.h"
#include "integrator/render_scheduler.h"
#include "render/gpu_display.h"
#include "render/pass.h"
#include "render/scene.h"
#include "render/tile.h"
@@ -68,11 +67,11 @@ PathTrace::PathTrace(Device *device,
PathTrace::~PathTrace()
{
/* Destroy any GPU resource which was used for graphics interop.
* Need to have access to the PathTraceDisplay as it is the only source of drawing context which
* is used for interop. */
if (display_) {
* Need to have access to the GPUDisplay as it is the only source of drawing context which is
* used for interop. */
if (gpu_display_) {
for (auto &&path_trace_work : path_trace_works_) {
path_trace_work->destroy_gpu_resources(display_.get());
path_trace_work->destroy_gpu_resources(gpu_display_.get());
}
}
}
@@ -95,7 +94,7 @@ bool PathTrace::ready_to_reset()
{
/* The logic here is optimized for the best feedback in the viewport, which implies having a GPU
* display. Of there is no such display, the logic here will break. */
DCHECK(display_);
DCHECK(gpu_display_);
/* The logic here tries to provide behavior which feels the most interactive feel to artists.
* General idea is to be able to reset as quickly as possible, while still providing interactive
@@ -127,8 +126,8 @@ void PathTrace::reset(const BufferParams &full_params, const BufferParams &big_t
/* NOTE: GPU display checks for buffer modification and avoids unnecessary re-allocation.
* It is requires to inform about reset whenever it happens, so that the redraw state tracking is
* properly updated. */
if (display_) {
display_->reset(full_params);
if (gpu_display_) {
gpu_display_->reset(full_params);
}
render_state_.has_denoised_result = false;
@@ -536,35 +535,25 @@ void PathTrace::denoise(const RenderWork &render_work)
render_scheduler_.report_denoise_time(render_work, time_dt() - start_time);
}
void PathTrace::set_output_driver(unique_ptr<OutputDriver> driver)
void PathTrace::set_gpu_display(unique_ptr<GPUDisplay> gpu_display)
{
output_driver_ = move(driver);
gpu_display_ = move(gpu_display);
}
void PathTrace::set_display_driver(unique_ptr<DisplayDriver> driver)
void PathTrace::clear_gpu_display()
{
if (driver) {
display_ = make_unique<PathTraceDisplay>(move(driver));
}
else {
display_ = nullptr;
}
}
void PathTrace::clear_display()
{
if (display_) {
display_->clear();
if (gpu_display_) {
gpu_display_->clear();
}
}
void PathTrace::draw()
{
if (!display_) {
if (!gpu_display_) {
return;
}
did_draw_after_reset_ |= display_->draw();
did_draw_after_reset_ |= gpu_display_->draw();
}
void PathTrace::update_display(const RenderWork &render_work)
@@ -573,32 +562,31 @@ void PathTrace::update_display(const RenderWork &render_work)
return;
}
if (!display_ && !output_driver_) {
if (!gpu_display_ && !tile_buffer_update_cb) {
VLOG(3) << "Ignore display update.";
return;
}
if (full_params_.width == 0 || full_params_.height == 0) {
VLOG(3) << "Skipping PathTraceDisplay update due to 0 size of the render buffer.";
VLOG(3) << "Skipping GPUDisplay update due to 0 size of the render buffer.";
return;
}
const double start_time = time_dt();
if (output_driver_) {
if (tile_buffer_update_cb) {
VLOG(3) << "Invoke buffer update callback.";
PathTraceTile tile(*this);
output_driver_->update_render_tile(tile);
tile_buffer_update_cb();
}
if (display_) {
if (gpu_display_) {
VLOG(3) << "Perform copy to GPUDisplay work.";
const int resolution_divider = render_work.resolution_divider;
const int texture_width = max(1, full_params_.width / resolution_divider);
const int texture_height = max(1, full_params_.height / resolution_divider);
if (!display_->update_begin(texture_width, texture_height)) {
if (!gpu_display_->update_begin(texture_width, texture_height)) {
LOG(ERROR) << "Error beginning GPUDisplay update.";
return;
}
@@ -612,10 +600,10 @@ void PathTrace::update_display(const RenderWork &render_work)
* all works in parallel. */
const int num_samples = get_num_samples_in_buffer();
for (auto &&path_trace_work : path_trace_works_) {
path_trace_work->copy_to_display(display_.get(), pass_mode, num_samples);
path_trace_work->copy_to_gpu_display(gpu_display_.get(), pass_mode, num_samples);
}
display_->update_end();
gpu_display_->update_end();
}
render_scheduler_.report_display_update_time(render_work, time_dt() - start_time);
@@ -765,26 +753,20 @@ bool PathTrace::is_cancel_requested()
void PathTrace::tile_buffer_write()
{
if (!output_driver_) {
if (!tile_buffer_write_cb) {
return;
}
PathTraceTile tile(*this);
output_driver_->write_render_tile(tile);
tile_buffer_write_cb();
}
void PathTrace::tile_buffer_read()
{
if (!device_scene_->data.bake.use) {
if (!tile_buffer_read_cb) {
return;
}
if (!output_driver_) {
return;
}
PathTraceTile tile(*this);
if (output_driver_->read_render_tile(tile)) {
if (tile_buffer_read_cb()) {
tbb::parallel_for_each(path_trace_works_, [](unique_ptr<PathTraceWork> &path_trace_work) {
path_trace_work->copy_render_buffers_to_device();
});
@@ -819,7 +801,7 @@ void PathTrace::tile_buffer_write_to_disk()
}
if (!tile_manager_.write_tile(*buffers)) {
device_->set_error("Error writing tile to file");
LOG(ERROR) << "Error writing tile to file.";
}
}
@@ -912,14 +894,7 @@ void PathTrace::process_full_buffer_from_disk(string_view filename)
DenoiseParams denoise_params;
if (!tile_manager_.read_full_buffer_from_disk(filename, &full_frame_buffers, &denoise_params)) {
const string error_message = "Error reading tiles from file";
if (progress_) {
progress_->set_error(error_message);
progress_->set_cancel(error_message);
}
else {
LOG(ERROR) << error_message;
}
LOG(ERROR) << "Error reading tiles from file.";
return;
}
@@ -1023,11 +998,6 @@ int2 PathTrace::get_render_tile_offset() const
return make_int2(tile.x, tile.y);
}
int2 PathTrace::get_render_size() const
{
return tile_manager_.get_size();
}
const BufferParams &PathTrace::get_render_tile_params() const
{
if (full_frame_state_.render_buffers) {
@@ -1058,8 +1028,6 @@ static const char *device_type_for_description(const DeviceType type)
return "CUDA";
case DEVICE_OPTIX:
return "OptiX";
case DEVICE_HIP:
return "HIP";
case DEVICE_DUMMY:
return "Dummy";
case DEVICE_MULTI:

View File

@@ -31,14 +31,12 @@ CCL_NAMESPACE_BEGIN
class AdaptiveSampling;
class Device;
class DeviceScene;
class DisplayDriver;
class Film;
class RenderBuffers;
class RenderScheduler;
class RenderWork;
class PathTraceDisplay;
class OutputDriver;
class Progress;
class GPUDisplay;
class TileManager;
/* PathTrace class takes care of kernel graph and scheduling on a (multi)device. It takes care of
@@ -100,16 +98,13 @@ class PathTrace {
* Use this to configure the adaptive sampler before rendering any samples. */
void set_adaptive_sampling(const AdaptiveSampling &adaptive_sampling);
/* Sets output driver for render buffer output. */
void set_output_driver(unique_ptr<OutputDriver> driver);
/* Set GPU display which takes care of drawing the render result. */
void set_gpu_display(unique_ptr<GPUDisplay> gpu_display);
/* Set display driver for interactive render buffer display. */
void set_display_driver(unique_ptr<DisplayDriver> driver);
/* Clear the GPU display by filling it in with all zeroes. */
void clear_gpu_display();
/* Clear the display buffer by filling it in with all zeroes. */
void clear_display();
/* Perform drawing of the current state of the DisplayDriver. */
/* Perform drawing of the current state of the GPUDisplay. */
void draw();
/* Cancel rendering process as soon as possible, without waiting for full tile to be sampled.
@@ -162,7 +157,6 @@ class PathTrace {
* instead. */
int2 get_render_tile_size() const;
int2 get_render_tile_offset() const;
int2 get_render_size() const;
/* Get buffer parameters of the current tile.
*
@@ -174,6 +168,18 @@ class PathTrace {
* times, and so on. */
string full_report() const;
/* Callback which communicates an updates state of the render buffer of the current big tile.
* Is called during path tracing to communicate work-in-progress state of the final buffer. */
function<void(void)> tile_buffer_update_cb;
/* Callback which communicates final rendered buffer. Is called after path-tracing is done. */
function<void(void)> tile_buffer_write_cb;
/* Callback which initializes rendered buffer. Is called before path-tracing starts.
*
* This is used for baking. */
function<bool(void)> tile_buffer_read_cb;
/* Callback which is called to report current rendering progress.
*
* It is supposed to be cheaper than buffer update/write, hence can be called more often.
@@ -246,11 +252,7 @@ class PathTrace {
RenderScheduler &render_scheduler_;
TileManager &tile_manager_;
/* Display driver for interactive render buffer display. */
unique_ptr<PathTraceDisplay> display_;
/* Output driver to write render buffer to. */
unique_ptr<OutputDriver> output_driver_;
unique_ptr<GPUDisplay> gpu_display_;
/* Per-compute device descriptors of work which is responsible for path tracing on its configured
* device. */

View File

@@ -1,107 +0,0 @@
/*
* Copyright 2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "integrator/path_trace_tile.h"
#include "integrator/pass_accessor_cpu.h"
#include "integrator/path_trace.h"
#include "render/buffers.h"
#include "render/film.h"
#include "render/pass.h"
#include "render/scene.h"
CCL_NAMESPACE_BEGIN
PathTraceTile::PathTraceTile(PathTrace &path_trace)
: OutputDriver::Tile(path_trace.get_render_tile_offset(),
path_trace.get_render_tile_size(),
path_trace.get_render_size(),
path_trace.get_render_tile_params().layer,
path_trace.get_render_tile_params().view),
path_trace_(path_trace),
copied_from_device_(false)
{
}
bool PathTraceTile::get_pass_pixels(const string_view pass_name,
const int num_channels,
float *pixels) const
{
/* NOTE: The code relies on a fact that session is fully update and no scene/buffer modification
* is happening while this function runs. */
if (!copied_from_device_) {
/* Copy from device on demand. */
path_trace_.copy_render_tile_from_device();
const_cast<PathTraceTile *>(this)->copied_from_device_ = true;
}
const BufferParams &buffer_params = path_trace_.get_render_tile_params();
const BufferPass *pass = buffer_params.find_pass(pass_name);
if (pass == nullptr) {
return false;
}
const bool has_denoised_result = path_trace_.has_denoised_result();
if (pass->mode == PassMode::DENOISED && !has_denoised_result) {
pass = buffer_params.find_pass(pass->type);
if (pass == nullptr) {
/* Happens when denoised result pass is requested but is never written by the kernel. */
return false;
}
}
pass = buffer_params.get_actual_display_pass(pass);
const float exposure = buffer_params.exposure;
const int num_samples = path_trace_.get_num_render_tile_samples();
PassAccessor::PassAccessInfo pass_access_info(*pass);
pass_access_info.use_approximate_shadow_catcher = buffer_params.use_approximate_shadow_catcher;
pass_access_info.use_approximate_shadow_catcher_background =
pass_access_info.use_approximate_shadow_catcher && !buffer_params.use_transparent_background;
const PassAccessorCPU pass_accessor(pass_access_info, exposure, num_samples);
const PassAccessor::Destination destination(pixels, num_channels);
return path_trace_.get_render_tile_pixels(pass_accessor, destination);
}
bool PathTraceTile::set_pass_pixels(const string_view pass_name,
const int num_channels,
const float *pixels) const
{
/* NOTE: The code relies on a fact that session is fully update and no scene/buffer modification
* is happening while this function runs. */
const BufferParams &buffer_params = path_trace_.get_render_tile_params();
const BufferPass *pass = buffer_params.find_pass(pass_name);
if (!pass) {
return false;
}
const float exposure = buffer_params.exposure;
const int num_samples = 1;
const PassAccessor::PassAccessInfo pass_access_info(*pass);
PassAccessorCPU pass_accessor(pass_access_info, exposure, num_samples);
PassAccessor::Source source(pixels, num_channels);
return path_trace_.set_render_tile_pixels(pass_accessor, source);
}
CCL_NAMESPACE_END

View File

@@ -1,43 +0,0 @@
/*
* Copyright 2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "render/output_driver.h"
CCL_NAMESPACE_BEGIN
/* PathTraceTile
*
* Implementation of OutputDriver::Tile interface for path tracer. */
class PathTrace;
class PathTraceTile : public OutputDriver::Tile {
public:
PathTraceTile(PathTrace &path_trace);
bool get_pass_pixels(const string_view pass_name, const int num_channels, float *pixels) const;
bool set_pass_pixels(const string_view pass_name,
const int num_channels,
const float *pixels) const;
private:
PathTrace &path_trace_;
bool copied_from_device_;
};
CCL_NAMESPACE_END

View File

@@ -16,12 +16,12 @@
#include "device/device.h"
#include "integrator/path_trace_display.h"
#include "integrator/path_trace_work.h"
#include "integrator/path_trace_work_cpu.h"
#include "integrator/path_trace_work_gpu.h"
#include "render/buffers.h"
#include "render/film.h"
#include "render/gpu_display.h"
#include "render/scene.h"
#include "kernel/kernel_types.h"
@@ -185,12 +185,12 @@ PassAccessor::PassAccessInfo PathTraceWork::get_display_pass_access_info(PassMod
return pass_access_info;
}
PassAccessor::Destination PathTraceWork::get_display_destination_template(
const PathTraceDisplay *display) const
PassAccessor::Destination PathTraceWork::get_gpu_display_destination_template(
const GPUDisplay *gpu_display) const
{
PassAccessor::Destination destination(film_->get_display_pass());
const int2 display_texture_size = display->get_texture_size();
const int2 display_texture_size = gpu_display->get_texture_size();
const int texture_x = effective_buffer_params_.full_x - effective_full_params_.full_x;
const int texture_y = effective_buffer_params_.full_y - effective_full_params_.full_y;

View File

@@ -28,7 +28,7 @@ class BufferParams;
class Device;
class DeviceScene;
class Film;
class PathTraceDisplay;
class GPUDisplay;
class RenderBuffers;
class PathTraceWork {
@@ -83,9 +83,11 @@ class PathTraceWork {
* noisy pass mode will be passed here when it is known that the buffer does not have denoised
* passes yet (because denoiser did not run). If the denoised pass is requested and denoiser is
* not used then this function will fall-back to the noisy pass instead. */
virtual void copy_to_display(PathTraceDisplay *display, PassMode pass_mode, int num_samples) = 0;
virtual void copy_to_gpu_display(GPUDisplay *gpu_display,
PassMode pass_mode,
int num_samples) = 0;
virtual void destroy_gpu_resources(PathTraceDisplay *display) = 0;
virtual void destroy_gpu_resources(GPUDisplay *gpu_display) = 0;
/* Copy data from/to given render buffers.
* Will copy pixels from a corresponding place (from multi-device point of view) of the render
@@ -160,8 +162,8 @@ class PathTraceWork {
/* Get destination which offset and stride are configured so that writing to it will write to a
* proper location of GPU display texture, taking current tile and device slice into account. */
PassAccessor::Destination get_display_destination_template(
const PathTraceDisplay *display) const;
PassAccessor::Destination get_gpu_display_destination_template(
const GPUDisplay *gpu_display) const;
/* Device which will be used for path tracing.
* Note that it is an actual render device (and never is a multi-device). */

View File

@@ -19,12 +19,10 @@
#include "device/cpu/kernel.h"
#include "device/device.h"
#include "kernel/kernel_path_state.h"
#include "integrator/pass_accessor_cpu.h"
#include "integrator/path_trace_display.h"
#include "render/buffers.h"
#include "render/gpu_display.h"
#include "render/scene.h"
#include "util/util_atomic.h"
@@ -118,17 +116,13 @@ void PathTraceWorkCPU::render_samples_full_pipeline(KernelGlobals *kernel_global
const KernelWorkTile &work_tile,
const int samples_num)
{
const bool has_shadow_catcher = device_scene_->data.integrator.has_shadow_catcher;
const bool has_bake = device_scene_->data.bake.use;
IntegratorStateCPU integrator_states[2];
IntegratorStateCPU integrator_states[2] = {};
IntegratorStateCPU *state = &integrator_states[0];
IntegratorStateCPU *shadow_catcher_state = nullptr;
if (device_scene_->data.integrator.has_shadow_catcher) {
shadow_catcher_state = &integrator_states[1];
path_state_init_queues(kernel_globals, shadow_catcher_state);
}
IntegratorStateCPU *shadow_catcher_state = &integrator_states[1];
KernelWorkTile sample_work_tile = work_tile;
float *render_buffer = buffers_->buffer.data();
@@ -153,7 +147,7 @@ void PathTraceWorkCPU::render_samples_full_pipeline(KernelGlobals *kernel_global
kernels_.integrator_megakernel(kernel_globals, state, render_buffer);
if (shadow_catcher_state) {
if (has_shadow_catcher) {
kernels_.integrator_megakernel(kernel_globals, shadow_catcher_state, render_buffer);
}
@@ -161,14 +155,14 @@ void PathTraceWorkCPU::render_samples_full_pipeline(KernelGlobals *kernel_global
}
}
void PathTraceWorkCPU::copy_to_display(PathTraceDisplay *display,
PassMode pass_mode,
int num_samples)
void PathTraceWorkCPU::copy_to_gpu_display(GPUDisplay *gpu_display,
PassMode pass_mode,
int num_samples)
{
half4 *rgba_half = display->map_texture_buffer();
half4 *rgba_half = gpu_display->map_texture_buffer();
if (!rgba_half) {
/* TODO(sergey): Look into using copy_to_display() if mapping failed. Might be needed for
* some implementations of PathTraceDisplay which can not map memory? */
/* TODO(sergey): Look into using copy_to_gpu_display() if mapping failed. Might be needed for
* some implementations of GPUDisplay which can not map memory? */
return;
}
@@ -178,7 +172,7 @@ void PathTraceWorkCPU::copy_to_display(PathTraceDisplay *display,
const PassAccessorCPU pass_accessor(pass_access_info, kfilm.exposure, num_samples);
PassAccessor::Destination destination = get_display_destination_template(display);
PassAccessor::Destination destination = get_gpu_display_destination_template(gpu_display);
destination.pixels_half_rgba = rgba_half;
tbb::task_arena local_arena = local_tbb_arena_create(device_);
@@ -186,10 +180,10 @@ void PathTraceWorkCPU::copy_to_display(PathTraceDisplay *display,
pass_accessor.get_render_tile_pixels(buffers_.get(), effective_buffer_params_, destination);
});
display->unmap_texture_buffer();
gpu_display->unmap_texture_buffer();
}
void PathTraceWorkCPU::destroy_gpu_resources(PathTraceDisplay * /*display*/)
void PathTraceWorkCPU::destroy_gpu_resources(GPUDisplay * /*gpu_display*/)
{
}

View File

@@ -50,10 +50,10 @@ class PathTraceWorkCPU : public PathTraceWork {
int start_sample,
int samples_num) override;
virtual void copy_to_display(PathTraceDisplay *display,
PassMode pass_mode,
int num_samples) override;
virtual void destroy_gpu_resources(PathTraceDisplay *display) override;
virtual void copy_to_gpu_display(GPUDisplay *gpu_display,
PassMode pass_mode,
int num_samples) override;
virtual void destroy_gpu_resources(GPUDisplay *gpu_display) override;
virtual bool copy_render_buffers_from_device() override;
virtual bool copy_render_buffers_to_device() override;

View File

@@ -15,12 +15,12 @@
*/
#include "integrator/path_trace_work_gpu.h"
#include "integrator/path_trace_display.h"
#include "device/device.h"
#include "integrator/pass_accessor_gpu.h"
#include "render/buffers.h"
#include "render/gpu_display.h"
#include "render/scene.h"
#include "util/util_logging.h"
#include "util/util_tbb.h"
@@ -46,7 +46,7 @@ PathTraceWorkGPU::PathTraceWorkGPU(Device *device,
queued_paths_(device, "queued_paths", MEM_READ_WRITE),
num_queued_paths_(device, "num_queued_paths", MEM_READ_WRITE),
work_tiles_(device, "work_tiles", MEM_READ_WRITE),
display_rgba_half_(device, "display buffer half", MEM_READ_WRITE),
gpu_display_rgba_half_(device, "display buffer half", MEM_READ_WRITE),
max_num_paths_(queue_->num_concurrent_states(sizeof(IntegratorStateCPU))),
min_num_active_paths_(queue_->num_concurrent_busy_states()),
max_active_path_index_(0)
@@ -95,8 +95,8 @@ void PathTraceWorkGPU::alloc_integrator_soa()
#define KERNEL_STRUCT_END(name) \
break; \
}
#define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \
if (array_index == gpu_array_size - 1) { \
#define KERNEL_STRUCT_END_ARRAY(name, array_size) \
if (array_index == array_size - 1) { \
break; \
} \
}
@@ -652,7 +652,7 @@ int PathTraceWorkGPU::get_num_active_paths()
bool PathTraceWorkGPU::should_use_graphics_interop()
{
/* There are few aspects with the graphics interop when using multiple devices caused by the fact
* that the PathTraceDisplay has a single texture:
* that the GPUDisplay has a single texture:
*
* CUDA will return `CUDA_ERROR_NOT_SUPPORTED` from `cuGraphicsGLRegisterBuffer()` when
* attempting to register OpenGL PBO which has been mapped. Which makes sense, because
@@ -678,9 +678,9 @@ bool PathTraceWorkGPU::should_use_graphics_interop()
return interop_use_;
}
void PathTraceWorkGPU::copy_to_display(PathTraceDisplay *display,
PassMode pass_mode,
int num_samples)
void PathTraceWorkGPU::copy_to_gpu_display(GPUDisplay *gpu_display,
PassMode pass_mode,
int num_samples)
{
if (device_->have_error()) {
/* Don't attempt to update GPU display if the device has errors: the error state will make
@@ -694,7 +694,7 @@ void PathTraceWorkGPU::copy_to_display(PathTraceDisplay *display,
}
if (should_use_graphics_interop()) {
if (copy_to_display_interop(display, pass_mode, num_samples)) {
if (copy_to_gpu_display_interop(gpu_display, pass_mode, num_samples)) {
return;
}
@@ -703,12 +703,12 @@ void PathTraceWorkGPU::copy_to_display(PathTraceDisplay *display,
interop_use_ = false;
}
copy_to_display_naive(display, pass_mode, num_samples);
copy_to_gpu_display_naive(gpu_display, pass_mode, num_samples);
}
void PathTraceWorkGPU::copy_to_display_naive(PathTraceDisplay *display,
PassMode pass_mode,
int num_samples)
void PathTraceWorkGPU::copy_to_gpu_display_naive(GPUDisplay *gpu_display,
PassMode pass_mode,
int num_samples)
{
const int full_x = effective_buffer_params_.full_x;
const int full_y = effective_buffer_params_.full_y;
@@ -725,42 +725,43 @@ void PathTraceWorkGPU::copy_to_display_naive(PathTraceDisplay *display,
* NOTE: allocation happens to the final resolution so that no re-allocation happens on every
* change of the resolution divider. However, if the display becomes smaller, shrink the
* allocated memory as well. */
if (display_rgba_half_.data_width != final_width ||
display_rgba_half_.data_height != final_height) {
display_rgba_half_.alloc(final_width, final_height);
if (gpu_display_rgba_half_.data_width != final_width ||
gpu_display_rgba_half_.data_height != final_height) {
gpu_display_rgba_half_.alloc(final_width, final_height);
/* TODO(sergey): There should be a way to make sure device-side memory is allocated without
* transferring zeroes to the device. */
queue_->zero_to_device(display_rgba_half_);
queue_->zero_to_device(gpu_display_rgba_half_);
}
PassAccessor::Destination destination(film_->get_display_pass());
destination.d_pixels_half_rgba = display_rgba_half_.device_pointer;
destination.d_pixels_half_rgba = gpu_display_rgba_half_.device_pointer;
get_render_tile_film_pixels(destination, pass_mode, num_samples);
queue_->copy_from_device(display_rgba_half_);
queue_->synchronize();
gpu_display_rgba_half_.copy_from_device();
display->copy_pixels_to_texture(display_rgba_half_.data(), texture_x, texture_y, width, height);
gpu_display->copy_pixels_to_texture(
gpu_display_rgba_half_.data(), texture_x, texture_y, width, height);
}
bool PathTraceWorkGPU::copy_to_display_interop(PathTraceDisplay *display,
PassMode pass_mode,
int num_samples)
bool PathTraceWorkGPU::copy_to_gpu_display_interop(GPUDisplay *gpu_display,
PassMode pass_mode,
int num_samples)
{
if (!device_graphics_interop_) {
device_graphics_interop_ = queue_->graphics_interop_create();
}
const DisplayDriver::GraphicsInterop graphics_interop_dst = display->graphics_interop_get();
device_graphics_interop_->set_display_interop(graphics_interop_dst);
const DeviceGraphicsInteropDestination graphics_interop_dst =
gpu_display->graphics_interop_get();
device_graphics_interop_->set_destination(graphics_interop_dst);
const device_ptr d_rgba_half = device_graphics_interop_->map();
if (!d_rgba_half) {
return false;
}
PassAccessor::Destination destination = get_display_destination_template(display);
PassAccessor::Destination destination = get_gpu_display_destination_template(gpu_display);
destination.d_pixels_half_rgba = d_rgba_half;
get_render_tile_film_pixels(destination, pass_mode, num_samples);
@@ -770,14 +771,14 @@ bool PathTraceWorkGPU::copy_to_display_interop(PathTraceDisplay *display,
return true;
}
void PathTraceWorkGPU::destroy_gpu_resources(PathTraceDisplay *display)
void PathTraceWorkGPU::destroy_gpu_resources(GPUDisplay *gpu_display)
{
if (!device_graphics_interop_) {
return;
}
display->graphics_interop_activate();
gpu_display->graphics_interop_activate();
device_graphics_interop_ = nullptr;
display->graphics_interop_deactivate();
gpu_display->graphics_interop_deactivate();
}
void PathTraceWorkGPU::get_render_tile_film_pixels(const PassAccessor::Destination &destination,

View File

@@ -48,10 +48,10 @@ class PathTraceWorkGPU : public PathTraceWork {
int start_sample,
int samples_num) override;
virtual void copy_to_display(PathTraceDisplay *display,
PassMode pass_mode,
int num_samples) override;
virtual void destroy_gpu_resources(PathTraceDisplay *display) override;
virtual void copy_to_gpu_display(GPUDisplay *gpu_display,
PassMode pass_mode,
int num_samples) override;
virtual void destroy_gpu_resources(GPUDisplay *gpu_display) override;
virtual bool copy_render_buffers_from_device() override;
virtual bool copy_render_buffers_to_device() override;
@@ -88,16 +88,16 @@ class PathTraceWorkGPU : public PathTraceWork {
int get_num_active_paths();
/* Check whether graphics interop can be used for the PathTraceDisplay update. */
/* Check whether graphics interop can be used for the GPUDisplay update. */
bool should_use_graphics_interop();
/* Naive implementation of the `copy_to_display()` which performs film conversion on the
* device, then copies pixels to the host and pushes them to the `display`. */
void copy_to_display_naive(PathTraceDisplay *display, PassMode pass_mode, int num_samples);
/* Naive implementation of the `copy_to_gpu_display()` which performs film conversion on the
* device, then copies pixels to the host and pushes them to the `gpu_display`. */
void copy_to_gpu_display_naive(GPUDisplay *gpu_display, PassMode pass_mode, int num_samples);
/* Implementation of `copy_to_display()` which uses driver's OpenGL/GPU interoperability
/* Implementation of `copy_to_gpu_display()` which uses driver's OpenGL/GPU interoperability
* functionality, avoiding copy of pixels to the host. */
bool copy_to_display_interop(PathTraceDisplay *display, PassMode pass_mode, int num_samples);
bool copy_to_gpu_display_interop(GPUDisplay *gpu_display, PassMode pass_mode, int num_samples);
/* Synchronously run film conversion kernel and store display result in the given destination. */
void get_render_tile_film_pixels(const PassAccessor::Destination &destination,
@@ -139,9 +139,9 @@ class PathTraceWorkGPU : public PathTraceWork {
/* Temporary buffer for passing work tiles to kernel. */
device_vector<KernelWorkTile> work_tiles_;
/* Temporary buffer used by the copy_to_display() whenever graphics interoperability is not
/* Temporary buffer used by the copy_to_gpu_display() whenever graphics interoperability is not
* available. Is allocated on-demand. */
device_vector<half4> display_rgba_half_;
device_vector<half4> gpu_display_rgba_half_;
unique_ptr<DeviceGraphicsInterop> device_graphics_interop_;

View File

@@ -384,7 +384,7 @@ bool RenderScheduler::set_postprocess_render_work(RenderWork *render_work)
}
if (denoiser_params_.use && !state_.last_work_tile_was_denoised) {
render_work->tile.denoise = !tile_manager_.has_multiple_tiles();
render_work->tile.denoise = true;
any_scheduled = true;
}
@@ -903,12 +903,6 @@ bool RenderScheduler::work_need_denoise(bool &delayed, bool &ready_to_display)
return false;
}
/* When multiple tiles are used the full frame will be denoised.
* Avoid per-tile denoising to save up render time. */
if (tile_manager_.has_multiple_tiles()) {
return false;
}
if (done()) {
/* Always denoise at the last sample. */
return true;

View File

@@ -344,7 +344,7 @@ class RenderScheduler {
/* Number of rendered samples on top of the start sample. */
int num_rendered_samples = 0;
/* Point in time the latest PathTraceDisplay work has been scheduled. */
/* Point in time the latest GPUDisplay work has been scheduled. */
double last_display_update_time = 0.0;
/* Value of -1 means display was never updated. */
int last_display_update_sample = -1;

View File

@@ -35,10 +35,6 @@ set(SRC_DEVICE_CUDA
device/cuda/kernel.cu
)
set(SRC_DEVICE_HIP
device/hip/kernel.cpp
)
set(SRC_DEVICE_OPTIX
device/optix/kernel.cu
device/optix/kernel_shader_raytrace.cu
@@ -110,12 +106,6 @@ set(SRC_DEVICE_CUDA_HEADERS
device/cuda/globals.h
)
set(SRC_DEVICE_HIP_HEADERS
device/hip/compat.h
device/hip/config.h
device/hip/globals.h
)
set(SRC_DEVICE_OPTIX_HEADERS
device/optix/compat.h
device/optix/globals.h
@@ -468,104 +458,6 @@ if(WITH_CYCLES_CUDA_BINARIES)
cycles_set_solution_folder(cycles_kernel_cuda)
endif()
####################################################### START
# HIP module
if(WITH_CYCLES_HIP_BINARIES)
# 64 bit only
set(HIP_BITS 64)
# HIP version
execute_process(COMMAND ${HIP_HIPCC_EXECUTABLE} "--version" OUTPUT_VARIABLE HIPCC_OUT)
string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" HIP_VERSION_MAJOR "${HIPCC_OUT}")
string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" HIP_VERSION_MINOR "${HIPCC_OUT}")
set(HIP_VERSION "${HIP_VERSION_MAJOR}${HIP_VERSION_MINOR}")
message(WARNING
"HIP version ${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR} detected")
# build for each arch
set(hip_sources device/hip/kernel.cpp
${SRC_HEADERS}
${SRC_DEVICE_HIP_HEADERS}
${SRC_BVH_HEADERS}
${SRC_SVM_HEADERS}
${SRC_GEOM_HEADERS}
${SRC_INTEGRATOR_HEADERS}
${SRC_CLOSURE_HEADERS}
${SRC_UTIL_HEADERS}
)
set(hip_fatbins)
macro(CYCLES_HIP_KERNEL_ADD arch prev_arch name flags sources experimental)
if(${arch} MATCHES "compute_.*")
set(format "ptx")
else()
set(format "fatbin")
endif()
set(hip_file ${name}_${arch}.${format})
set(kernel_sources ${sources})
if(NOT ${prev_arch} STREQUAL "none")
if(${prev_arch} MATCHES "compute_.*")
set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.ptx)
else()
set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.fatbin)
endif()
endif()
set(hip_kernel_src "/device/hip/${name}.cpp")
set(hip_flags ${flags}
-D CCL_NAMESPACE_BEGIN=
-D CCL_NAMESPACE_END=
-D HIPCC
-m ${HIP_BITS}
-I ${CMAKE_CURRENT_SOURCE_DIR}/..
-I ${CMAKE_CURRENT_SOURCE_DIR}/device/hip
--use_fast_math
-o ${CMAKE_CURRENT_BINARY_DIR}/${hip_file})
if(${experimental})
set(hip_flags ${hip_flags} -D __KERNEL_EXPERIMENTAL__)
set(name ${name}_experimental)
endif()
if(WITH_CYCLES_DEBUG)
set(hip_flags ${hip_flags} -D __KERNEL_DEBUG__)
endif()
if(WITH_NANOVDB)
set(hip_flags ${hip_flags}
-D WITH_NANOVDB
-I "${NANOVDB_INCLUDE_DIR}")
endif()
endmacro()
set(prev_arch "none")
foreach(arch ${CYCLES_HIP_BINARIES_ARCH})
set(hip_hipcc_executable ${HIP_HIPCC_EXECUTABLE})
set(hip_toolkit_root_dir ${HIP_TOOLKIT_ROOT_DIR})
if(DEFINED hip_hipcc_executable AND DEFINED hip_toolkit_root_dir)
# Compile regular kernel
CYCLES_HIP_KERNEL_ADD(${arch} ${prev_arch} kernel "" "${hip_sources}" FALSE)
if(WITH_CYCLES_HIP_BUILD_SERIAL)
set(prev_arch ${arch})
endif()
unset(hip_hipcc_executable)
unset(hip_toolkit_root_dir)
endif()
endforeach()
add_custom_target(cycles_kernel_hip ALL DEPENDS ${hip_fatbins})
cycles_set_solution_folder(cycles_kernel_hip)
endif()
####################################################### END
# OptiX PTX modules
if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
@@ -710,13 +602,11 @@ endif()
cycles_add_library(cycles_kernel "${LIB}"
${SRC_DEVICE_CPU}
${SRC_DEVICE_CUDA}
${SRC_DEVICE_HIP}
${SRC_DEVICE_OPTIX}
${SRC_HEADERS}
${SRC_DEVICE_CPU_HEADERS}
${SRC_DEVICE_GPU_HEADERS}
${SRC_DEVICE_CUDA_HEADERS}
${SRC_DEVICE_HIP_HEADERS}
${SRC_DEVICE_OPTIX_HEADERS}
${SRC_BVH_HEADERS}
${SRC_CLOSURE_HEADERS}
@@ -731,7 +621,6 @@ source_group("geom" FILES ${SRC_GEOM_HEADERS})
source_group("integrator" FILES ${SRC_INTEGRATOR_HEADERS})
source_group("kernel" FILES ${SRC_HEADERS})
source_group("device\\cpu" FILES ${SRC_DEVICE_CPU} ${SRC_DEVICE_CPU_HEADERS})
source_group("device\\hip" FILES ${SRC_DEVICE_HIP} ${SRC_DEVICE_HIP_HEADERS})
source_group("device\\gpu" FILES ${SRC_DEVICE_GPU_HEADERS})
source_group("device\\cuda" FILES ${SRC_DEVICE_CUDA} ${SRC_DEVICE_CUDA_HEADERS})
source_group("device\\optix" FILES ${SRC_DEVICE_OPTIX} ${SRC_DEVICE_OPTIX_HEADERS})
@@ -743,19 +632,14 @@ endif()
if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
add_dependencies(cycles_kernel cycles_kernel_optix)
endif()
if(WITH_CYCLES_HIP)
add_dependencies(cycles_kernel cycles_kernel_hip)
endif()
# Install kernel source for runtime compilation
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_DEVICE_CUDA}" ${CYCLES_INSTALL_PATH}/source/kernel/device/cuda)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_DEVICE_HIP}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hip)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_DEVICE_OPTIX}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_DEVICE_GPU_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/gpu)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_DEVICE_CUDA_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/cuda)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_DEVICE_HIP_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hip)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_DEVICE_OPTIX_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_BVH_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/bvh)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CLOSURE_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/closure)

View File

@@ -167,25 +167,15 @@ ccl_device_intersect bool scene_intersect(const KernelGlobals *kg,
uint p4 = visibility;
uint p5 = PRIMITIVE_NONE;
uint ray_mask = visibility & 0xFF;
uint ray_flags = OPTIX_RAY_FLAG_NONE;
if (0 == ray_mask && (visibility & ~0xFF) != 0) {
ray_mask = 0xFF;
ray_flags = OPTIX_RAY_FLAG_ENFORCE_ANYHIT;
}
else if (visibility & PATH_RAY_SHADOW_OPAQUE) {
ray_flags = OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT;
}
optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
ray->P,
ray->D,
0.0f,
ray->t,
ray->time,
ray_mask,
ray_flags,
0, /* SBT offset for PG_HITD */
0xF,
OPTIX_RAY_FLAG_NONE,
0, // SBT offset for PG_HITD
0,
0,
p0,
@@ -261,11 +251,11 @@ ccl_device_intersect bool scene_intersect_local(const KernelGlobals *kg,
uint p2 = ((uint64_t)local_isect) & 0xFFFFFFFF;
uint p3 = (((uint64_t)local_isect) >> 32) & 0xFFFFFFFF;
uint p4 = local_object;
/* Is set to zero on miss or if ray is aborted, so can be used as return value. */
// Is set to zero on miss or if ray is aborted, so can be used as return value
uint p5 = max_hits;
if (local_isect) {
local_isect->num_hits = 0; /* Initialize hit count to zero. */
local_isect->num_hits = 0; // Initialize hit count to zero
}
optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
ray->P,
@@ -273,10 +263,11 @@ ccl_device_intersect bool scene_intersect_local(const KernelGlobals *kg,
0.0f,
ray->t,
ray->time,
0xFF,
/* Need to always call into __anyhit__kernel_optix_local_hit. */
// Skip curves
0x3,
// Need to always call into __anyhit__kernel_optix_local_hit
OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
2, /* SBT offset for PG_HITL */
2, // SBT offset for PG_HITL
0,
0,
p0,
@@ -374,22 +365,17 @@ ccl_device_intersect bool scene_intersect_shadow_all(const KernelGlobals *kg,
uint p4 = visibility;
uint p5 = false;
uint ray_mask = visibility & 0xFF;
if (0 == ray_mask && (visibility & ~0xFF) != 0) {
ray_mask = 0xFF;
}
*num_hits = 0; /* Initialize hit count to zero. */
*num_hits = 0; // Initialize hit count to zero
optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
ray->P,
ray->D,
0.0f,
ray->t,
ray->time,
ray_mask,
/* Need to always call into __anyhit__kernel_optix_shadow_all_hit. */
0xF,
// Need to always call into __anyhit__kernel_optix_shadow_all_hit
OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
1, /* SBT offset for PG_HITS */
1, // SBT offset for PG_HITS
0,
0,
p0,
@@ -458,21 +444,16 @@ ccl_device_intersect bool scene_intersect_volume(const KernelGlobals *kg,
uint p4 = visibility;
uint p5 = PRIMITIVE_NONE;
uint ray_mask = visibility & 0xFF;
if (0 == ray_mask && (visibility & ~0xFF) != 0) {
ray_mask = 0xFF;
}
optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
ray->P,
ray->D,
0.0f,
ray->t,
ray->time,
ray_mask,
/* Need to always call into __anyhit__kernel_optix_volume_test. */
OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
3, /* SBT offset for PG_HITV */
// Skip everything but volumes
0x2,
OPTIX_RAY_FLAG_NONE,
0, // SBT offset for PG_HITD
0,
0,
p0,

View File

@@ -25,11 +25,7 @@ CCL_NAMESPACE_BEGIN
#include "util/util_atomic.h"
#ifdef __HIP__
# define GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE 1024
#else
# define GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE 512
#endif
#define GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE 512
template<uint blocksize, typename IsActiveOp>
__device__ void gpu_parallel_active_index_array(const uint num_states,

View File

@@ -27,11 +27,7 @@ CCL_NAMESPACE_BEGIN
#include "util/util_atomic.h"
#ifdef __HIP__
# define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE 1024
#else
# define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE 512
#endif
#define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE 512
template<uint blocksize> __device__ void gpu_parallel_prefix_sum(int *values, const int num_values)
{

View File

@@ -26,11 +26,7 @@ CCL_NAMESPACE_BEGIN
* the overall cost of the algorithm while keeping the work complexity O(n) and
* the step complexity O(log n). (Brent's Theorem optimization) */
#ifdef __HIP__
# define GPU_PARALLEL_SUM_DEFAULT_BLOCK_SIZE 1024
#else
# define GPU_PARALLEL_SUM_DEFAULT_BLOCK_SIZE 512
#endif
#define GPU_PARALLEL_SUM_DEFAULT_BLOCK_SIZE 512
template<uint blocksize, typename InputT, typename OutputT, typename ConvertOp>
__device__ void gpu_parallel_sum(

View File

@@ -26,11 +26,7 @@ CCL_NAMESPACE_BEGIN
#include "util/util_atomic.h"
#ifdef __HIP__
# define GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE 1024
#else
# define GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE 512
#endif
#define GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE 512
#define GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY (~0)
template<uint blocksize, typename GetKeyOp>

View File

@@ -1,121 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#define __KERNEL_GPU__
#define __KERNEL_HIP__
#define CCL_NAMESPACE_BEGIN
#define CCL_NAMESPACE_END
#ifndef ATTR_FALLTHROUGH
# define ATTR_FALLTHROUGH
#endif
#ifdef __HIPCC_RTC__
typedef unsigned int uint32_t;
typedef unsigned long long uint64_t;
#else
# include <stdint.h>
#endif
#ifdef CYCLES_HIPBIN_CC
# define FLT_MIN 1.175494350822287507969e-38f
# define FLT_MAX 340282346638528859811704183484516925440.0f
# define FLT_EPSILON 1.192092896e-07F
#endif
/* Qualifiers */
#define ccl_device __device__ __inline__
#define ccl_device_inline __device__ __inline__
#define ccl_device_forceinline __device__ __forceinline__
#define ccl_device_noinline __device__ __noinline__
#define ccl_device_noinline_cpu ccl_device
#define ccl_global
#define ccl_static_constant __constant__
#define ccl_device_constant __constant__ __device__
#define ccl_constant const
#define ccl_gpu_shared __shared__
#define ccl_private
#define ccl_may_alias
#define ccl_addr_space
#define ccl_restrict __restrict__
#define ccl_loop_no_unroll
#define ccl_align(n) __align__(n)
#define ccl_optional_struct_init
#define kernel_assert(cond)
/* Types */
#ifdef __HIP__
# include "hip/hip_fp16.h"
# include "hip/hip_runtime.h"
#endif
#ifdef _MSC_VER
# include <immintrin.h>
#endif
#define ccl_gpu_thread_idx_x (threadIdx.x)
#define ccl_gpu_block_dim_x (blockDim.x)
#define ccl_gpu_block_idx_x (blockIdx.x)
#define ccl_gpu_grid_dim_x (gridDim.x)
#define ccl_gpu_warp_size (warpSize)
#define ccl_gpu_global_id_x() (ccl_gpu_block_idx_x * ccl_gpu_block_dim_x + ccl_gpu_thread_idx_x)
#define ccl_gpu_global_size_x() (ccl_gpu_grid_dim_x * ccl_gpu_block_dim_x)
/* GPU warp synchronization */
#define ccl_gpu_syncthreads() __syncthreads()
#define ccl_gpu_ballot(predicate) __ballot(predicate)
#define ccl_gpu_shfl_down_sync(mask, var, detla) __shfl_down(var, detla)
#define ccl_gpu_popc(x) __popc(x)
/* GPU texture objects */
typedef hipTextureObject_t ccl_gpu_tex_object;
template<typename T>
ccl_device_forceinline T ccl_gpu_tex_object_read_2D(const ccl_gpu_tex_object texobj,
const float x,
const float y)
{
return tex2D<T>(texobj, x, y);
}
template<typename T>
ccl_device_forceinline T ccl_gpu_tex_object_read_3D(const ccl_gpu_tex_object texobj,
const float x,
const float y,
const float z)
{
return tex3D<T>(texobj, x, y, z);
}
/* Use fast math functions */
#define cosf(x) __cosf(((float)(x)))
#define sinf(x) __sinf(((float)(x)))
#define powf(x, y) __powf(((float)(x)), ((float)(y)))
#define tanf(x) __tanf(((float)(x)))
#define logf(x) __logf(((float)(x)))
#define expf(x) __expf(((float)(x)))
/* Types */
#include "util/util_half.h"
#include "util/util_types.h"

View File

@@ -1,57 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* Device data taken from HIP occupancy calculator.
*
* Terminology
* - HIP GPUs have multiple streaming multiprocessors
* - Each multiprocessor executes multiple thread blocks
* - Each thread block contains a number of threads, also known as the block size
* - Multiprocessors have a fixed number of registers, and the amount of registers
* used by each threads limits the number of threads per block.
*/
/* Launch Bound Definitions */
#define GPU_MULTIPRESSOR_MAX_REGISTERS 65536
#define GPU_MULTIPROCESSOR_MAX_BLOCKS 64
#define GPU_BLOCK_MAX_THREADS 1024
#define GPU_THREAD_MAX_REGISTERS 255
#define GPU_KERNEL_BLOCK_NUM_THREADS 1024
#define GPU_KERNEL_MAX_REGISTERS 64
/* Compute number of threads per block and minimum blocks per multiprocessor
* given the maximum number of registers per thread. */
#define ccl_gpu_kernel(block_num_threads, thread_num_registers) \
extern "C" __global__ void __launch_bounds__(block_num_threads, \
GPU_MULTIPRESSOR_MAX_REGISTERS / \
(block_num_threads * thread_num_registers))
/* sanity checks */
#if GPU_KERNEL_BLOCK_NUM_THREADS > GPU_BLOCK_MAX_THREADS
# error "Maximum number of threads per block exceeded"
#endif
#if GPU_MULTIPRESSOR_MAX_REGISTERS / (GPU_KERNEL_BLOCK_NUM_THREADS * GPU_KERNEL_MAX_REGISTERS) > \
GPU_MULTIPROCESSOR_MAX_BLOCKS
# error "Maximum number of blocks per multiprocessor exceeded"
#endif
#if GPU_KERNEL_MAX_REGISTERS > GPU_THREAD_MAX_REGISTERS
# error "Maximum number of registers per thread exceeded"
#endif

View File

@@ -1,49 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* Constant Globals */
#pragma once
#include "kernel/kernel_profiling.h"
#include "kernel/kernel_types.h"
#include "kernel/integrator/integrator_state.h"
CCL_NAMESPACE_BEGIN
/* Not actually used, just a NULL pointer that gets passed everywhere, which we
* hope gets optimized out by the compiler. */
struct KernelGlobals {
/* NOTE: Keep the size in sync with SHADOW_STACK_MAX_HITS. */
int unused[1];
};
/* Global scene data and textures */
__constant__ KernelData __data;
#define KERNEL_TEX(type, name) __attribute__((used)) const __constant__ __device__ type *name;
#include "kernel/kernel_textures.h"
/* Integrator state */
__constant__ IntegratorStateGPU __integrator_state;
/* Abstraction macros */
#define kernel_data __data
#define kernel_tex_fetch(t, index) t[(index)]
#define kernel_tex_array(t) (t)
#define kernel_integrator_state __integrator_state
CCL_NAMESPACE_END

View File

@@ -1,28 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* HIP kernel entry points */
#ifdef __HIP_DEVICE_COMPILE__
# include "kernel/device/hip/compat.h"
# include "kernel/device/hip/config.h"
# include "kernel/device/hip/globals.h"
# include "kernel/device/gpu/image.h"
# include "kernel/device/gpu/kernel.h"
#endif

View File

@@ -19,7 +19,7 @@
#include "kernel/device/optix/compat.h"
#include "kernel/device/optix/globals.h"
#include "kernel/device/gpu/image.h" /* Texture lookup uses normal CUDA intrinsics. */
#include "kernel/device/gpu/image.h" // Texture lookup uses normal CUDA intrinsics
#include "kernel/integrator/integrator_state.h"
#include "kernel/integrator/integrator_state_flow.h"
@@ -44,18 +44,18 @@ template<typename T> ccl_device_forceinline T *get_payload_ptr_2()
template<bool always = false> ccl_device_forceinline uint get_object_id()
{
#ifdef __OBJECT_MOTION__
/* Always get the the instance ID from the TLAS.
* There might be a motion transform node between TLAS and BLAS which does not have one. */
// Always get the the instance ID from the TLAS
// There might be a motion transform node between TLAS and BLAS which does not have one
uint object = optixGetInstanceIdFromHandle(optixGetTransformListHandle(0));
#else
uint object = optixGetInstanceId();
#endif
/* Choose between always returning object ID or only for instances. */
// Choose between always returning object ID or only for instances
if (always || (object & 1) == 0)
/* Can just remove the low bit since instance always contains object ID. */
// Can just remove the low bit since instance always contains object ID
return object >> 1;
else
/* Set to OBJECT_NONE if this is not an instanced object. */
// Set to OBJECT_NONE if this is not an instanced object
return OBJECT_NONE;
}
@@ -93,30 +93,23 @@ extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_volume_st
extern "C" __global__ void __miss__kernel_optix_miss()
{
/* 'kernel_path_lamp_emission' checks intersection distance, so need to set it even on a miss. */
// 'kernel_path_lamp_emission' checks intersection distance, so need to set it even on a miss
optixSetPayload_0(__float_as_uint(optixGetRayTmax()));
optixSetPayload_5(PRIMITIVE_NONE);
}
extern "C" __global__ void __anyhit__kernel_optix_local_hit()
{
#ifdef __HAIR__
if (!optixIsTriangleHit()) {
/* Ignore curves. */
return optixIgnoreIntersection();
}
#endif
#ifdef __BVH_LOCAL__
const uint object = get_object_id<true>();
if (object != optixGetPayload_4() /* local_object */) {
/* Only intersect with matching object. */
// Only intersect with matching object
return optixIgnoreIntersection();
}
const uint max_hits = optixGetPayload_5();
if (max_hits == 0) {
/* Special case for when no hit information is requested, just report that something was hit */
// Special case for when no hit information is requested, just report that something was hit
optixSetPayload_5(true);
return optixTerminateRay();
}
@@ -143,9 +136,8 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit()
}
else {
if (local_isect->num_hits && optixGetRayTmax() > local_isect->hits[0].t) {
/* Record closest intersection only.
* Do not terminate ray here, since there is no guarantee about distance ordering in any-hit.
*/
// Record closest intersection only
// Do not terminate ray here, since there is no guarantee about distance ordering in any-hit
return optixIgnoreIntersection();
}
@@ -162,14 +154,14 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit()
isect->u = 1.0f - barycentrics.y - barycentrics.x;
isect->v = barycentrics.x;
/* Record geometric normal. */
// Record geometric normal
const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim);
const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0));
const float3 tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1));
const float3 tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2));
local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
/* Continue tracing (without this the trace call would return after the first hit). */
// Continue tracing (without this the trace call would return after the first hit)
optixIgnoreIntersection();
#endif
}
@@ -198,7 +190,7 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
u = __uint_as_float(optixGetAttribute_0());
v = __uint_as_float(optixGetAttribute_1());
/* Filter out curve endcaps. */
// Filter out curve endcaps
if (u == 0.0f || u == 1.0f) {
ignore_intersection = true;
}
@@ -249,10 +241,10 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
isect->type = kernel_tex_fetch(__prim_type, prim);
# ifdef __TRANSPARENT_SHADOWS__
/* Detect if this surface has a shader with transparent shadows. */
// Detect if this surface has a shader with transparent shadows
if (!shader_transparent_shadow(NULL, isect) || max_hits == 0) {
# endif
/* If no transparent shadows, all light is blocked and we can stop immediately. */
// If no transparent shadows, all light is blocked and we can stop immediately
optixSetPayload_5(true);
return optixTerminateRay();
# ifdef __TRANSPARENT_SHADOWS__
@@ -260,39 +252,24 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
# endif
}
/* Continue tracing. */
// Continue tracing
optixIgnoreIntersection();
#endif
}
extern "C" __global__ void __anyhit__kernel_optix_volume_test()
extern "C" __global__ void __anyhit__kernel_optix_visibility_test()
{
#ifdef __HAIR__
if (!optixIsTriangleHit()) {
/* Ignore curves. */
return optixIgnoreIntersection();
}
#endif
uint visibility = optixGetPayload_4();
#ifdef __VISIBILITY_FLAG__
const uint prim = optixGetPrimitiveIndex();
const uint visibility = optixGetPayload_4();
if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) {
return optixIgnoreIntersection();
}
#endif
const uint object = get_object_id<true>();
if ((kernel_tex_fetch(__object_flag, object) & SD_OBJECT_HAS_VOLUME) == 0) {
return optixIgnoreIntersection();
}
}
extern "C" __global__ void __anyhit__kernel_optix_visibility_test()
{
#ifdef __HAIR__
if (!optixIsTriangleHit()) {
/* Filter out curve endcaps. */
// Filter out curve endcaps
const float u = __uint_as_float(optixGetAttribute_0());
if (u == 0.0f || u == 1.0f) {
return optixIgnoreIntersection();
@@ -300,26 +277,18 @@ extern "C" __global__ void __anyhit__kernel_optix_visibility_test()
}
#endif
#ifdef __VISIBILITY_FLAG__
const uint prim = optixGetPrimitiveIndex();
const uint visibility = optixGetPayload_4();
if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) {
return optixIgnoreIntersection();
}
/* Shadow ray early termination. */
// Shadow ray early termination
if (visibility & PATH_RAY_SHADOW_OPAQUE) {
return optixTerminateRay();
}
#endif
}
extern "C" __global__ void __closesthit__kernel_optix_hit()
{
optixSetPayload_0(__float_as_uint(optixGetRayTmax())); /* Intersection distance */
optixSetPayload_0(__float_as_uint(optixGetRayTmax())); // Intersection distance
optixSetPayload_3(optixGetPrimitiveIndex());
optixSetPayload_4(get_object_id());
/* Can be PRIMITIVE_TRIANGLE and PRIMITIVE_MOTION_TRIANGLE or curve type and segment index. */
// Can be PRIMITIVE_TRIANGLE and PRIMITIVE_MOTION_TRIANGLE or curve type and segment index
optixSetPayload_5(kernel_tex_fetch(__prim_type, optixGetPrimitiveIndex()));
if (optixIsTriangleHit()) {
@@ -328,7 +297,7 @@ extern "C" __global__ void __closesthit__kernel_optix_hit()
optixSetPayload_2(__float_as_uint(barycentrics.x));
}
else {
optixSetPayload_1(optixGetAttribute_0()); /* Same as 'optixGetCurveParameter()' */
optixSetPayload_1(optixGetAttribute_0()); // Same as 'optixGetCurveParameter()'
optixSetPayload_2(optixGetAttribute_1());
}
}
@@ -342,7 +311,7 @@ ccl_device_inline void optix_intersection_curve(const uint prim, const uint type
float3 P = optixGetObjectRayOrigin();
float3 dir = optixGetObjectRayDirection();
/* The direction is not normalized by default, but the curve intersection routine expects that */
// The direction is not normalized by default, but the curve intersection routine expects that
float len;
dir = normalize_len(dir, &len);
@@ -354,15 +323,15 @@ ccl_device_inline void optix_intersection_curve(const uint prim, const uint type
Intersection isect;
isect.t = optixGetRayTmax();
/* Transform maximum distance into object space. */
// Transform maximum distance into object space
if (isect.t != FLT_MAX)
isect.t *= len;
if (curve_intersect(NULL, &isect, P, dir, isect.t, visibility, object, prim, time, type)) {
optixReportIntersection(isect.t / len,
type & PRIMITIVE_ALL,
__float_as_int(isect.u), /* Attribute_0 */
__float_as_int(isect.v)); /* Attribute_1 */
__float_as_int(isect.u), // Attribute_0
__float_as_int(isect.v)); // Attribute_1
}
}

View File

@@ -713,7 +713,7 @@ ccl_device_inline void curve_shader_setup(const KernelGlobals *kg,
P = transform_point(&tfm, P);
D = transform_direction(&tfm, D * t);
D = safe_normalize_len(D, &t);
D = normalize_len(D, &t);
}
int prim = kernel_tex_fetch(__prim_index, isect_prim);
@@ -764,10 +764,8 @@ ccl_device_inline void curve_shader_setup(const KernelGlobals *kg,
/* Thick curves, compute normal using direction from inside the curve.
* This could be optimized by recording the normal in the intersection,
* however for Optix this would go beyond the size of the payload. */
/* NOTE: It is possible that P will be the same as P_inside (precision issues, or very small
* radius). In this case use the view direction to approximate the normal. */
const float3 P_inside = float4_to_float3(catmull_rom_basis_eval(P_curve, sd->u));
const float3 Ng = (!isequal_float3(P, P_inside)) ? normalize(P - P_inside) : -sd->I;
const float3 Ng = normalize(P - P_inside);
sd->N = Ng;
sd->Ng = Ng;

View File

@@ -41,18 +41,7 @@ ccl_device_inline int find_attribute_motion(const KernelGlobals *kg,
uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
while (attr_map.x != id) {
if (UNLIKELY(attr_map.x == ATTR_STD_NONE)) {
if (UNLIKELY(attr_map.y == 0)) {
return (int)ATTR_STD_NOT_FOUND;
}
else {
/* Chain jump to a different part of the table. */
attr_offset = attr_map.z;
}
}
else {
attr_offset += ATTR_PRIM_TYPES;
}
attr_offset += ATTR_PRIM_TYPES;
attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
}

View File

@@ -103,7 +103,7 @@ ccl_device_inline void shader_setup_from_ray(const KernelGlobals *ccl_restrict k
sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
if (isect->object != OBJECT_NONE) {
/* instance transform */
object_normal_transform_auto(kg, sd, &sd->N);
object_normal_transform_auto(kg, sd, &sd->Ng);

View File

@@ -109,17 +109,9 @@ ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS,
}
/* Position and normal on triangle. */
const int object = kernel_data.bake.object_index;
float3 P, Ng;
int shader;
triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
const int object_flag = kernel_tex_fetch(__object_flag, object);
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
P = transform_point_auto(&tfm, P);
}
triangle_point_normal(kg, kernel_data.bake.object_index, prim, u, v, &P, &Ng, &shader);
if (kernel_data.film.pass_background != PASS_UNUSED) {
/* Environment baking. */
@@ -138,13 +130,8 @@ ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS,
}
else {
/* Surface baking. */
float3 N = (shader & SHADER_SMOOTH_NORMAL) ? triangle_smooth_normal(kg, Ng, prim, u, v) : Ng;
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
N = normalize(transform_direction_transposed(&itfm, N));
Ng = normalize(transform_direction_transposed(&itfm, Ng));
}
const float3 N = (shader & SHADER_SMOOTH_NORMAL) ? triangle_smooth_normal(kg, Ng, prim, u, v) :
Ng;
/* Setup ray. */
Ray ray ccl_optional_struct_init;
@@ -156,12 +143,6 @@ ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS,
/* Setup differentials. */
float3 dPdu, dPdv;
triangle_dPdudv(kg, prim, &dPdu, &dPdv);
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
dPdu = transform_direction(&tfm, dPdu);
dPdv = transform_direction(&tfm, dPdv);
}
differential3 dP;
dP.dx = dPdu * dudx + dPdv * dvdx;
dP.dy = dPdu * dudy + dPdv * dvdy;

View File

@@ -123,7 +123,7 @@ ccl_device_forceinline void integrator_intersect_shader_next_kernel(
#ifdef __SHADOW_CATCHER__
const int object_flags = intersection_get_object_flags(kg, isect);
if (kernel_shadow_catcher_split(INTEGRATOR_STATE_PASS, object_flags)) {
if (kernel_data.film.pass_background != PASS_UNUSED && !kernel_data.background.transparent) {
if (kernel_data.film.use_approximate_shadow_catcher && !kernel_data.background.transparent) {
INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_SHADOW_CATCHER_BACKGROUND;
if (use_raytrace_kernel) {

View File

@@ -60,15 +60,7 @@ CCL_NAMESPACE_BEGIN
* TODO: these could be made dynamic depending on the features used in the scene. */
#define INTEGRATOR_VOLUME_STACK_SIZE VOLUME_STACK_SIZE
#define INTEGRATOR_SHADOW_ISECT_SIZE_CPU 1024
#define INTEGRATOR_SHADOW_ISECT_SIZE_GPU 4
#ifdef __KERNEL_CPU__
# define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_CPU
#else
# define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_GPU
#endif
#define INTEGRATOR_SHADOW_ISECT_SIZE 4
/* Data structures */
@@ -82,9 +74,9 @@ typedef struct IntegratorStateCPU {
#define KERNEL_STRUCT_END(name) \
} \
name;
#define KERNEL_STRUCT_END_ARRAY(name, cpu_size, gpu_size) \
#define KERNEL_STRUCT_END_ARRAY(name, size) \
} \
name[cpu_size];
name[size];
#include "kernel/integrator/integrator_state_template.h"
#undef KERNEL_STRUCT_BEGIN
#undef KERNEL_STRUCT_MEMBER
@@ -111,9 +103,9 @@ typedef struct IntegratorStateGPU {
#define KERNEL_STRUCT_END(name) \
} \
name;
#define KERNEL_STRUCT_END_ARRAY(name, cpu_size, gpu_size) \
#define KERNEL_STRUCT_END_ARRAY(name, size) \
} \
name[gpu_size];
name[size];
#include "kernel/integrator/integrator_state_template.h"
#undef KERNEL_STRUCT_BEGIN
#undef KERNEL_STRUCT_MEMBER

View File

@@ -107,7 +107,7 @@ KERNEL_STRUCT_END(subsurface)
KERNEL_STRUCT_BEGIN(volume_stack)
KERNEL_STRUCT_ARRAY_MEMBER(volume_stack, int, object, KERNEL_FEATURE_VOLUME)
KERNEL_STRUCT_ARRAY_MEMBER(volume_stack, int, shader, KERNEL_FEATURE_VOLUME)
KERNEL_STRUCT_END_ARRAY(volume_stack, INTEGRATOR_VOLUME_STACK_SIZE, INTEGRATOR_VOLUME_STACK_SIZE)
KERNEL_STRUCT_END_ARRAY(volume_stack, INTEGRATOR_VOLUME_STACK_SIZE)
/********************************* Shadow Path State **************************/
@@ -153,15 +153,11 @@ KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, object, KERNEL_FEATURE_PATH_TRACIN
KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, type, KERNEL_FEATURE_PATH_TRACING)
/* TODO: exclude for GPU. */
KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float3, Ng, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_END_ARRAY(shadow_isect,
INTEGRATOR_SHADOW_ISECT_SIZE_CPU,
INTEGRATOR_SHADOW_ISECT_SIZE_GPU)
KERNEL_STRUCT_END_ARRAY(shadow_isect, INTEGRATOR_SHADOW_ISECT_SIZE)
/**************************** Shadow Volume Stack *****************************/
KERNEL_STRUCT_BEGIN(shadow_volume_stack)
KERNEL_STRUCT_ARRAY_MEMBER(shadow_volume_stack, int, object, KERNEL_FEATURE_VOLUME)
KERNEL_STRUCT_ARRAY_MEMBER(shadow_volume_stack, int, shader, KERNEL_FEATURE_VOLUME)
KERNEL_STRUCT_END_ARRAY(shadow_volume_stack,
INTEGRATOR_VOLUME_STACK_SIZE,
INTEGRATOR_VOLUME_STACK_SIZE)
KERNEL_STRUCT_END_ARRAY(shadow_volume_stack, INTEGRATOR_VOLUME_STACK_SIZE)

View File

@@ -217,10 +217,10 @@ ccl_device_inline void integrator_state_copy_only(const IntegratorState to_state
while (false) \
;
# define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \
# define KERNEL_STRUCT_END_ARRAY(name, array_size) \
++index; \
} \
while (index < gpu_array_size) \
while (index < array_size) \
;
# include "kernel/integrator/integrator_state_template.h"
@@ -264,12 +264,7 @@ ccl_device_inline void integrator_state_shadow_catcher_split(INTEGRATOR_STATE_AR
IntegratorStateCPU *ccl_restrict split_state = state + 1;
/* Only copy the required subset, since shadow intersections are big and irrelevant here. */
split_state->path = state->path;
split_state->ray = state->ray;
split_state->isect = state->isect;
memcpy(split_state->volume_stack, state->volume_stack, sizeof(state->volume_stack));
split_state->shadow_path = state->shadow_path;
*split_state = *state;
split_state->path.flag |= PATH_RAY_SHADOW_CATCHER_PASS;
#endif

View File

@@ -386,7 +386,7 @@ ccl_device_inline void kernel_accum_light(INTEGRATOR_STATE_CONST_ARGS,
{
/* The throughput for shadow paths already contains the light shader evaluation. */
float3 contribution = INTEGRATOR_STATE(shadow_path, throughput);
kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(shadow_path, bounce));
kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(shadow_path, bounce) - 1);
ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS,
render_buffer);

View File

@@ -42,16 +42,6 @@ ccl_device void kernel_displace_evaluate(const KernelGlobals *kg,
object_inverse_dir_transform(kg, &sd, &D);
#ifdef __KERNEL_DEBUG_NAN__
if (!isfinite3_safe(D)) {
kernel_assert(!"Cycles displacement with non-finite value detected");
}
#endif
/* Ensure finite displacement, preventing BVH from becoming degenerate and avoiding possible
* traversal issues caused by non-finite math. */
D = ensure_finite3(D);
/* Write output. */
output[offset] += make_float4(D.x, D.y, D.z, 0.0f);
}
@@ -76,16 +66,7 @@ ccl_device void kernel_background_evaluate(const KernelGlobals *kg,
const int path_flag = PATH_RAY_EMISSION;
shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_LIGHT>(
INTEGRATOR_STATE_PASS_NULL, &sd, NULL, path_flag);
float3 color = shader_background_eval(&sd);
#ifdef __KERNEL_DEBUG_NAN__
if (!isfinite3_safe(color)) {
kernel_assert(!"Cycles background with non-finite value detected");
}
#endif
/* Ensure finite color, avoiding possible numerical instabilities in the path tracing kernels. */
color = ensure_finite3(color);
const float3 color = shader_background_eval(&sd);
/* Write output. */
output[offset] += make_float4(color.x, color.y, color.z, 0.0f);

View File

@@ -74,6 +74,10 @@ ccl_device_inline float cmj_randfloat_simple(uint i, uint p)
ccl_device float pmj_sample_1D(const KernelGlobals *kg, uint sample, uint rng_hash, uint dimension)
{
/* The PMJ sample sets contain a sample with (x,y) with NUM_PMJ_SAMPLES so for 1D
* the x part is used as the sample (TODO(@leesonw): Add using both x and y parts
* independently). */
/* Perform Owen shuffle of the sample number to reorder the samples. */
#ifdef _SIMPLE_HASH_
const uint rv = cmj_hash_simple(dimension, rng_hash);
@@ -91,10 +95,7 @@ ccl_device float pmj_sample_1D(const KernelGlobals *kg, uint sample, uint rng_ha
const uint sample_set = s / NUM_PMJ_SAMPLES;
const uint d = (dimension + sample_set);
const uint dim = d % NUM_PMJ_PATTERNS;
/* The PMJ sample sets contain a sample with (x,y) with NUM_PMJ_SAMPLES so for 1D
* the x part is used for even dims and the y for odd. */
int index = 2 * ((dim >> 1) * NUM_PMJ_SAMPLES + (s % NUM_PMJ_SAMPLES)) + (dim & 1);
int index = 2 * (dim * NUM_PMJ_SAMPLES + (s % NUM_PMJ_SAMPLES));
float fx = kernel_tex_fetch(__sample_pattern_lut, index);
@@ -103,11 +104,12 @@ ccl_device float pmj_sample_1D(const KernelGlobals *kg, uint sample, uint rng_ha
# ifdef _SIMPLE_HASH_
float dx = cmj_randfloat_simple(d, rng_hash);
# else
/* Only jitter within the grid interval. */
float dx = cmj_randfloat(d, rng_hash);
# endif
/* Jitter sample locations and map back into [0 1]. */
fx = fx + dx;
fx = fx + dx * (1.0f / NUM_PMJ_SAMPLES);
fx = fx - floorf(fx);
#else
# warning "Not using Cranley-Patterson Rotation."
#endif
@@ -134,7 +136,7 @@ ccl_device void pmj_sample_2D(
/* Based on the sample number a sample pattern is selected and offset by the dimension. */
const uint sample_set = s / NUM_PMJ_SAMPLES;
const uint d = (dimension + sample_set);
uint dim = d % NUM_PMJ_PATTERNS;
const uint dim = d % NUM_PMJ_PATTERNS;
int index = 2 * (dim * NUM_PMJ_SAMPLES + (s % NUM_PMJ_SAMPLES));
float fx = kernel_tex_fetch(__sample_pattern_lut, index);
@@ -149,17 +151,17 @@ ccl_device void pmj_sample_2D(
float dx = cmj_randfloat(d, rng_hash);
float dy = cmj_randfloat(d + 1, rng_hash);
# endif
/* Jitter sample locations and map back to the unit square [0 1]x[0 1]. */
float sx = fx + dx;
float sy = fy + dy;
sx = sx - floorf(sx);
sy = sy - floorf(sy);
/* Only jitter within the grid cells. */
fx = fx + dx * (1.0f / NUM_PMJ_DIVISIONS);
fy = fy + dy * (1.0f / NUM_PMJ_DIVISIONS);
fx = fx - floorf(fx);
fy = fy - floorf(fy);
#else
# warning "Not using Cranley Patterson Rotation."
#endif
(*x) = sx;
(*y) = sy;
(*x) = fx;
(*y) = fy;
}
CCL_NAMESPACE_END

View File

@@ -186,8 +186,8 @@ ccl_device_inline float _shader_bsdf_multi_eval(const KernelGlobals *kg,
float sum_sample_weight,
const uint light_shader_flags)
{
/* This is the veach one-sample model with balance heuristic,
* some PDF factors drop out when using balance heuristic weighting. */
/* this is the veach one-sample model with balance heuristic, some pdf
* factors drop out when using balance heuristic weighting */
for (int i = 0; i < sd->num_closure; i++) {
const ShaderClosure *sc = &sd->closure[i];

View File

@@ -360,6 +360,7 @@ typedef enum PassType {
PASS_MATERIAL_ID,
PASS_MOTION,
PASS_MOTION_WEIGHT,
PASS_RENDER_TIME,
PASS_CRYPTOMATTE,
PASS_AOV_COLOR,
PASS_AOV_VALUE,

View File

@@ -110,7 +110,6 @@ ustring OSLRenderServices::u_curve_thickness("geom:curve_thickness");
ustring OSLRenderServices::u_curve_length("geom:curve_length");
ustring OSLRenderServices::u_curve_tangent_normal("geom:curve_tangent_normal");
ustring OSLRenderServices::u_curve_random("geom:curve_random");
ustring OSLRenderServices::u_normal_map_normal("geom:normal_map_normal");
ustring OSLRenderServices::u_path_ray_length("path:ray_length");
ustring OSLRenderServices::u_path_ray_depth("path:ray_depth");
ustring OSLRenderServices::u_path_diffuse_depth("path:diffuse_depth");
@@ -986,18 +985,8 @@ bool OSLRenderServices::get_object_standard_attribute(const KernelGlobals *kg,
float3 f = curve_tangent_normal(kg, sd);
return set_attribute_float3(f, type, derivatives, val);
}
else if (name == u_normal_map_normal) {
if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
float3 f = triangle_smooth_normal_unnormalized(kg, sd, sd->Ng, sd->prim, sd->u, sd->v);
return set_attribute_float3(f, type, derivatives, val);
}
else {
return false;
}
}
else {
else
return false;
}
}
bool OSLRenderServices::get_background_attribute(const KernelGlobals *kg,

View File

@@ -297,7 +297,6 @@ class OSLRenderServices : public OSL::RendererServices {
static ustring u_curve_length;
static ustring u_curve_tangent_normal;
static ustring u_curve_random;
static ustring u_normal_map_normal;
static ustring u_path_ray_length;
static ustring u_path_ray_depth;
static ustring u_path_diffuse_depth;

View File

@@ -41,7 +41,6 @@ set(SRC_OSL
node_vector_displacement.osl
node_emission.osl
node_environment_texture.osl
node_float_curve.osl
node_fresnel.osl
node_gamma.osl
node_geometry.osl

Some files were not shown because too many files have changed in this diff Show More