Geometry Nodes: add simulation support #104924

Closed
Hans Goudey wants to merge 211 commits from geometry-nodes-simulation into main

When changing the target branch, be careful to rebase the branch in your fork to match. See documentation.
184 changed files with 5093 additions and 1136 deletions
Showing only changes of commit 60184f0b5f - Show all commits

View File

@ -525,6 +525,12 @@ if(NOT APPLE)
set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
# HIPRT is only available on Windows for now.
if(WIN32)
option(WITH_CYCLES_DEVICE_HIPRT "Enable Cycles AMD HIPRT support" OFF)
mark_as_advanced(WITH_CYCLES_DEVICE_HIPRT)
endif()
endif()
# Apple Metal
@ -1981,10 +1987,13 @@ if(FIRST_RUN)
info_cfg_option(WITH_CYCLES_DEVICE_OPTIX)
info_cfg_option(WITH_CYCLES_DEVICE_CUDA)
info_cfg_option(WITH_CYCLES_CUDA_BINARIES)
info_cfg_option(WITH_CYCLES_DEVICE_HIP)
info_cfg_option(WITH_CYCLES_HIP_BINARIES)
info_cfg_option(WITH_CYCLES_DEVICE_ONEAPI)
info_cfg_option(WITH_CYCLES_ONEAPI_BINARIES)
info_cfg_option(WITH_CYCLES_DEVICE_HIP)
info_cfg_option(WITH_CYCLES_HIP_BINARIES)
endif()
if(WIN32)
info_cfg_option(WITH_CYCLES_DEVICE_HIPRT)
endif()
endif()

View File

@ -1,12 +1,9 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright 2021 Blender Foundation.
# - Find HIP compiler
#
# This module defines
# Find HIP compiler. This module defines
# HIP_HIPCC_EXECUTABLE, the full path to the hipcc executable
# HIP_VERSION, the HIP compiler version
#
# HIP_FOUND, if the HIP toolkit is found.
# If HIP_ROOT_DIR was defined in the environment, use it.
@ -27,12 +24,21 @@ find_program(HIP_HIPCC_EXECUTABLE
bin
)
if(HIP_HIPCC_EXECUTABLE AND NOT EXISTS ${HIP_HIPCC_EXECUTABLE})
message(WARNING "Cached or directly specified hipcc executable does not exist.")
set(HIP_FOUND FALSE)
elseif(HIP_HIPCC_EXECUTABLE)
set(HIP_FOUND TRUE)
if(WIN32)
# Needed for HIP-RT on Windows.
find_program(HIP_LINKER_EXECUTABLE
NAMES
clang++
HINTS
${_hip_SEARCH_DIRS}
PATH_SUFFIXES
bin
NO_DEFAULT_PATH
NO_CMAKE_PATH
)
endif()
if(HIP_HIPCC_EXECUTABLE)
set(HIP_VERSION_MAJOR 0)
set(HIP_VERSION_MINOR 0)
set(HIP_VERSION_PATCH 0)
@ -54,33 +60,31 @@ elseif(HIP_HIPCC_EXECUTABLE)
# Strip the HIP prefix and get list of individual version components.
string(REGEX REPLACE
".*HIP version: ([.0-9]+).*" "\\1"
HIP_SEMANTIC_VERSION "${_hip_version_raw}")
string(REPLACE "." ";" HIP_VERSION_PARTS "${HIP_SEMANTIC_VERSION}")
list(LENGTH HIP_VERSION_PARTS NUM_HIP_VERSION_PARTS)
_hip_semantic_version "${_hip_version_raw}")
string(REPLACE "." ";" _hip_version_parts "${_hip_semantic_version}")
list(LENGTH _hip_version_parts _num_hip_version_parts)
# Extract components into corresponding variables.
if(NUM_HIP_VERSION_PARTS GREATER 0)
list(GET HIP_VERSION_PARTS 0 HIP_VERSION_MAJOR)
if(_num_hip_version_parts GREATER 0)
list(GET _hip_version_parts 0 HIP_VERSION_MAJOR)
endif()
if(NUM_HIP_VERSION_PARTS GREATER 1)
list(GET HIP_VERSION_PARTS 1 HIP_VERSION_MINOR)
if(_num_hip_version_parts GREATER 1)
list(GET _hip_version_parts 1 HIP_VERSION_MINOR)
endif()
if(NUM_HIP_VERSION_PARTS GREATER 2)
list(GET HIP_VERSION_PARTS 2 HIP_VERSION_PATCH)
if(_num_hip_version_parts GREATER 2)
list(GET _hip_version_parts 2 HIP_VERSION_PATCH)
endif()
# Unset temp variables.
unset(NUM_HIP_VERSION_PARTS)
unset(HIP_SEMANTIC_VERSION)
unset(HIP_VERSION_PARTS)
unset(_num_hip_version_parts)
unset(_hip_semantic_version)
unset(_hip_version_parts)
endif()
# Construct full semantic version.
set(HIP_VERSION "${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}.${HIP_VERSION_PATCH}")
unset(_hip_version_raw)
unset(_hipcc_executable)
else()
set(HIP_FOUND FALSE)
endif()
include(FindPackageHandleStandardArgs)

View File

@ -0,0 +1,47 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright 2021 Blender Foundation.
# Find HIPRT SDK. This module defines:
# HIPRT_INCLUDE_DIR, path to HIPRT include directory
# HIPRT_BITCODE, bitcode file with ray-tracing functionality
# HIPRT_FOUND, if SDK found
# If HIPRT_ROOT_DIR was defined in the environment, use it.
if(NOT HIPRT_ROOT_DIR AND NOT $ENV{HIPRT_ROOT_DIR} STREQUAL "")
set(HIPRT_ROOT_DIR $ENV{HIPRT_ROOT_DIR})
endif()
set(_hiprt_SEARCH_DIRS
${HIPRT_ROOT_DIR}
)
find_path(HIPRT_INCLUDE_DIR
NAMES
hiprt/hiprt.h
HINTS
${_hiprt_SEARCH_DIRS}
)
if(HIPRT_INCLUDE_DIR)
file(STRINGS "${HIPRT_INCLUDE_DIR}/hiprt/hiprt.h" _hiprt_version
REGEX "^#define HIPRT_VERSION_STR[ \t]\".*\"$")
string(REGEX MATCHALL "[0-9]+[.0-9]+" _hiprt_version ${_hiprt_version})
find_file(HIPRT_BITCODE
NAMES
hiprt${_hiprt_version}_amd_lib_win.bc
HINTS
${HIPRT_INCLUDE_DIR}/hiprt/win
NO_DEFAULT_PATH
)
unset(_hiprt_version)
endif()
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(HIPRT DEFAULT_MSG
HIPRT_INCLUDE_DIR HIPRT_BITCODE)
mark_as_advanced(
HIPRT_INCLUDE_DIR
)

View File

@ -81,7 +81,6 @@ if(NOT APPLE)
set(WITH_CYCLES_DEVICE_OPTIX ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE)
set(WITH_CYCLES_HIP_BINARIES ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_DEVICE_ONEAPI ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_ONEAPI_BINARIES ON CACHE BOOL "" FORCE)

View File

@ -10,6 +10,8 @@ buildbot:
version: '11.4.1'
hip:
version: '5.5.30571'
hiprt:
version: '2.0.0'
optix:
version: '7.3.0'
ocloc:

View File

@ -12,11 +12,21 @@ set(INC_SYS
set(SRC
src/hipew.c
include/hipew.h
)
set(LIB
)
if(HIPRT_INCLUDE_DIR)
list(APPEND INC_SYS
${HIPRT_INCLUDE_DIR}
)
list(APPEND SRC
src/hiprtew.cc
include/hiprtew.h
)
endif()
blender_add_lib(extern_hipew "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")

View File

@ -43,9 +43,9 @@ extern "C" {
#define HIP_TRSA_OVERRIDE_FORMAT 0x01
#define HIP_TRSF_READ_AS_INTEGER 0x01
#define HIP_TRSF_NORMALIZED_COORDINATES 0x02
#define HIP_LAUNCH_PARAM_END ((void*)0x00)
#define HIP_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01)
#define HIP_LAUNCH_PARAM_BUFFER_SIZE ((void*)0x02)
#define HIP_LAUNCH_PARAM_END ((void*)0x03)
/* Functions which changed 3.1 -> 3.2 for 64 bit stuff,
* the cuda library has both the old ones for compatibility and new
@ -55,6 +55,7 @@ extern "C" {
#define hipMemGetInfo hipMemGetInfo
#define hipMemAllocPitch hipMemAllocPitch
#define hipMemGetAddressRange hipMemGetAddressRange
#define hipMemcpy hipMemcpy
#define hipMemcpyHtoD hipMemcpyHtoD
#define hipMemcpyDtoH hipMemcpyDtoH
#define hipMemcpyDtoD hipMemcpyDtoD
@ -68,6 +69,7 @@ extern "C" {
#define hipMemsetD32 hipMemsetD32
#define hipArrayCreate hipArrayCreate
#define hipArray3DCreate hipArray3DCreate
#define hipPointerGetAttributes hipPointerGetAttributes
#define hipTexRefSetAddress hipTexRefSetAddress
#define hipTexRefGetAddress hipTexRefGetAddress
#define hipStreamDestroy hipStreamDestroy
@ -108,11 +110,20 @@ typedef struct hipMipmappedArray_st* hipMipmappedArray_t;
typedef struct ihipEvent_t* hipEvent_t;
typedef struct ihipStream_t* hipStream_t;
typedef unsigned long long hipTextureObject_t;
typedef void* hipExternalMemory_t;
typedef struct HIPuuid_st {
char bytes[16];
} HIPuuid;
typedef enum hipMemcpyKind {
hipMemcpyHostToHost = 0,
hipMemcpyHostToDevice = 1,
hipMemcpyDeviceToHost = 2,
hipMemcpyDeviceToDevice = 3,
hipMemcpyDefault = 4
} hipMemcpyKind;
typedef enum hipChannelFormatKind {
hipChannelFormatKindSigned = 0,
hipChannelFormatKindUnsigned = 1,
@ -1048,28 +1059,105 @@ typedef enum HIPGLmap_flags_enum {
HIP_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02,
} HIPGLmap_flags;
typedef enum hipExternalMemoryHandleType_enum {
hipExternalMemoryHandleTypeOpaqueFd = 1,
hipExternalMemoryHandleTypeOpaqueWin32 = 2,
hipExternalMemoryHandleTypeOpaqueWin32Kmt = 3,
hipExternalMemoryHandleTypeD3D12Heap = 4,
hipExternalMemoryHandleTypeD3D12Resource = 5,
hipExternalMemoryHandleTypeD3D11Resource = 6,
hipExternalMemoryHandleTypeD3D11ResourceKmt = 7,
} hipExternalMemoryHandleType;
typedef struct hipExternalMemoryHandleDesc_st {
hipExternalMemoryHandleType type;
union {
int fd;
struct {
void *handle;
const void *name;
} win32;
} handle;
unsigned long long size;
unsigned int flags;
} hipExternalMemoryHandleDesc;
typedef struct hipExternalMemoryBufferDesc_st {
unsigned long long offset;
unsigned long long size;
unsigned int flags;
} hipExternalMemoryBufferDesc;
/**
* hipRTC related
*/
typedef struct _hiprtcProgram* hiprtcProgram;
typedef enum hiprtcResult {
HIPRTC_SUCCESS = 0,
HIPRTC_ERROR_OUT_OF_MEMORY = 1,
HIPRTC_ERROR_PROGRAM_CREATION_FAILURE = 2,
HIPRTC_ERROR_INVALID_INPUT = 3,
HIPRTC_ERROR_INVALID_PROGRAM = 4,
HIPRTC_ERROR_INVALID_OPTION = 5,
HIPRTC_ERROR_COMPILATION = 6,
HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7,
HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8,
HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9,
HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10,
HIPRTC_ERROR_INTERNAL_ERROR = 11
HIPRTC_SUCCESS = 0,
HIPRTC_ERROR_OUT_OF_MEMORY = 1,
HIPRTC_ERROR_PROGRAM_CREATION_FAILURE = 2,
HIPRTC_ERROR_INVALID_INPUT = 3,
HIPRTC_ERROR_INVALID_PROGRAM = 4,
HIPRTC_ERROR_INVALID_OPTION = 5,
HIPRTC_ERROR_COMPILATION = 6,
HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7,
HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8,
HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9,
HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10,
HIPRTC_ERROR_INTERNAL_ERROR = 11,
HIPRTC_ERROR_LINKING = 100
} hiprtcResult;
typedef enum hiprtcJIT_option {
HIPRTC_JIT_MAX_REGISTERS = 0,
HIPRTC_JIT_THREADS_PER_BLOCK,
HIPRTC_JIT_WALL_TIME,
HIPRTC_JIT_INFO_LOG_BUFFER,
HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
HIPRTC_JIT_ERROR_LOG_BUFFER,
HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
HIPRTC_JIT_OPTIMIZATION_LEVEL,
HIPRTC_JIT_TARGET_FROM_HIPCONTEXT,
HIPRTC_JIT_TARGET,
HIPRTC_JIT_FALLBACK_STRATEGY,
HIPRTC_JIT_GENERATE_DEBUG_INFO,
HIPRTC_JIT_LOG_VERBOSE,
HIPRTC_JIT_GENERATE_LINE_INFO,
HIPRTC_JIT_CACHE_MODE,
HIPRTC_JIT_NEW_SM3X_OPT,
HIPRTC_JIT_FAST_COMPILE,
HIPRTC_JIT_GLOBAL_SYMBOL_NAMES,
HIPRTC_JIT_GLOBAL_SYMBOL_ADDRESS,
HIPRTC_JIT_GLOBAL_SYMBOL_COUNT,
HIPRTC_JIT_LTO,
HIPRTC_JIT_FTZ,
HIPRTC_JIT_PREC_DIV,
HIPRTC_JIT_PREC_SQRT,
HIPRTC_JIT_FMA,
HIPRTC_JIT_NUM_OPTIONS,
} hiprtcJIT_option;
typedef enum hiprtcJITInputType {
HIPRTC_JIT_INPUT_CUBIN = 0,
HIPRTC_JIT_INPUT_PTX,
HIPRTC_JIT_INPUT_FATBINARY,
HIPRTC_JIT_INPUT_OBJECT,
HIPRTC_JIT_INPUT_LIBRARY,
HIPRTC_JIT_INPUT_NVVM,
HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES,
HIPRTC_JIT_INPUT_LLVM_BITCODE = 100,
HIPRTC_JIT_INPUT_LLVM_BUNDLED_BITCODE = 101,
HIPRTC_JIT_INPUT_LLVM_ARCHIVES_OF_BUNDLED_BITCODE = 102,
HIPRTC_JIT_NUM_INPUT_TYPES = ( HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES + 3 )
} hiprtcJITInputType;
typedef struct ihiprtcLinkState* hiprtcLinkState;
/* Function types. */
typedef hipError_t HIPAPI thipGetErrorName(hipError_t error, const char** pStr);
typedef const char* HIPAPI thipGetErrorString(hipError_t error);
typedef hipError_t HIPAPI thipGetLastError(hipError_t error);
typedef hipError_t HIPAPI thipInit(unsigned int Flags);
typedef hipError_t HIPAPI thipDriverGetVersion(int* driverVersion);
typedef hipError_t HIPAPI thipGetDevice(int* device);
@ -1078,6 +1166,8 @@ typedef hipError_t HIPAPI thipGetDeviceProperties(hipDeviceProp_t* props, int de
typedef hipError_t HIPAPI thipDeviceGet(hipDevice_t* device, int ordinal);
typedef hipError_t HIPAPI thipDeviceGetName(char* name, int len, hipDevice_t dev);
typedef hipError_t HIPAPI thipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attrib, hipDevice_t dev);
typedef hipError_t HIPAPI thipDeviceGetLimit(size_t* pValue, enum hipLimit_t limit);
typedef hipError_t HIPAPI thipDeviceSetLimit(enum hipLimit_t limit, size_t value);
typedef hipError_t HIPAPI thipDeviceComputeCapability(int* major, int* minor, hipDevice_t dev);
typedef hipError_t HIPAPI thipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev);
typedef hipError_t HIPAPI thipDevicePrimaryCtxRelease(hipDevice_t dev);
@ -1114,13 +1204,14 @@ typedef hipError_t HIPAPI thipMemGetAddressRange(hipDeviceptr_t* pbase, size_t*
typedef hipError_t HIPAPI thipHostMalloc(void** pp, size_t bytesize, unsigned int flags);
typedef hipError_t HIPAPI thipHostFree(void* p);
typedef hipError_t HIPAPI thipMemHostAlloc(void** pp, size_t bytesize, unsigned int Flags);
typedef hipError_t HIPAPI thipHostRegister(void* p, size_t bytesize, unsigned int Flags);
typedef hipError_t HIPAPI thipHostGetDevicePointer(hipDeviceptr_t* pdptr, void* p, unsigned int Flags);
typedef hipError_t HIPAPI thipHostGetFlags(unsigned int* pFlags, void* p);
typedef hipError_t HIPAPI thipMallocManaged(hipDeviceptr_t* dptr, size_t bytesize, unsigned int flags);
typedef hipError_t HIPAPI thipDeviceGetByPCIBusId(hipDevice_t* dev, const char* pciBusId);
typedef hipError_t HIPAPI thipDeviceGetPCIBusId(char* pciBusId, int len, hipDevice_t dev);
typedef hipError_t HIPAPI thipMemHostUnregister(void* p);
typedef hipError_t HIPAPI thipMemcpy(hipDeviceptr_t dst, hipDeviceptr_t src, size_t ByteCount);
typedef hipError_t HIPAPI thipHostUnregister(void* p);
typedef hipError_t HIPAPI thipMemcpy(void* dst, const void* src, size_t ByteCount, hipMemcpyKind kind);
typedef hipError_t HIPAPI thipMemcpyPeer(hipDeviceptr_t dstDevice, hipCtx_t dstContext, hipDeviceptr_t srcDevice, hipCtx_t srcContext, size_t ByteCount);
typedef hipError_t HIPAPI thipMemcpyHtoD(hipDeviceptr_t dstDevice, void* srcHost, size_t ByteCount);
typedef hipError_t HIPAPI thipMemcpyDtoH(void* dstHost, hipDeviceptr_t srcDevice, size_t ByteCount);
@ -1130,8 +1221,10 @@ typedef hipError_t HIPAPI thipMemcpyParam2D(const hip_Memcpy2D* pCopy);
typedef hipError_t HIPAPI thipDrvMemcpy3D(const HIP_MEMCPY3D* pCopy);
typedef hipError_t HIPAPI thipMemcpyHtoDAsync(hipDeviceptr_t dstDevice, const void* srcHost, size_t ByteCount, hipStream_t hStream);
typedef hipError_t HIPAPI thipMemcpyDtoHAsync(void* dstHost, hipDeviceptr_t srcDevice, size_t ByteCount, hipStream_t hStream);
typedef hipError_t HIPAPI thipMemcpyDtoDAsync(hipDeviceptr_t dstDevice, hipDeviceptr_t srcDevice, size_t ByteCount, hipStream_t hStream);
typedef hipError_t HIPAPI thipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, hipStream_t hStream);
typedef hipError_t HIPAPI thipDrvMemcpy3DAsync(const HIP_MEMCPY3D* pCopy, hipStream_t hStream);
typedef hipError_t HIPAPI thipMemset(void* dstDevice, int value, size_t sizeBytes);
typedef hipError_t HIPAPI thipMemsetD8(hipDeviceptr_t dstDevice, unsigned char uc, size_t N);
typedef hipError_t HIPAPI thipMemsetD16(hipDeviceptr_t dstDevice, unsigned short us, size_t N);
typedef hipError_t HIPAPI thipMemsetD32(hipDeviceptr_t dstDevice, unsigned int ui, size_t N);
@ -1144,7 +1237,8 @@ typedef hipError_t HIPAPI thipMemsetD2D32Async(hipDeviceptr_t dstDevice, size_t
typedef hipError_t HIPAPI thipArrayCreate(hArray ** pHandle, const HIP_ARRAY_DESCRIPTOR* pAllocateArray);
typedef hipError_t HIPAPI thipArrayDestroy(hArray hArray);
typedef hipError_t HIPAPI thipArray3DCreate(hArray * pHandle, const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray);
typedef hipError_t HIPAPI hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr);
typedef hipError_t HIPAPI thipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr);
typedef hipError_t HIPAPI thipStreamCreate(hipStream_t* phStream);
typedef hipError_t HIPAPI thipStreamCreateWithFlags(hipStream_t* phStream, unsigned int Flags);
typedef hipError_t HIPAPI thipStreamCreateWithPriority(hipStream_t* phStream, unsigned int flags, int priority);
typedef hipError_t HIPAPI thipStreamGetPriority(hipStream_t hStream, int* priority);
@ -1189,7 +1283,10 @@ typedef hipError_t HIPAPI thipGraphicsMapResources(unsigned int count, hipGraphi
typedef hipError_t HIPAPI thipGraphicsUnmapResources(unsigned int count, hipGraphicsResource* resources, hipStream_t hStream);
typedef hipError_t HIPAPI thipGraphicsGLRegisterBuffer(hipGraphicsResource* pCudaResource, GLuint buffer, unsigned int Flags);
typedef hipError_t HIPAPI thipGLGetDevices(unsigned int* pHipDeviceCount, int* pHipDevices, unsigned int hipDeviceCount, hipGLDeviceList deviceList);
typedef hiprtcResult HIPAPI thiprtcGetErrorString(hiprtcResult result);
typedef hipError_t HIPAPI thipImportExternalMemory(hipExternalMemory_t* extMem_out, const hipExternalMemoryHandleDesc* memHandleDesc);
typedef hipError_t HIPAPI thipExternalMemoryGetMappedBuffer(void **devPtr, hipExternalMemory_t extMem, const hipExternalMemoryBufferDesc *bufferDesc);
typedef hipError_t HIPAPI thipDestroyExternalMemory(hipExternalMemory_t extMem);
typedef const char* HIPAPI thiprtcGetErrorString(hiprtcResult result);
typedef hiprtcResult HIPAPI thiprtcAddNameExpression(hiprtcProgram prog, const char* name_expression);
typedef hiprtcResult HIPAPI thiprtcCompileProgram(hiprtcProgram prog, int numOptions, const char** options);
typedef hiprtcResult HIPAPI thiprtcCreateProgram(hiprtcProgram* prog, const char* src, const char* name, int numHeaders, const char** headers, const char** includeNames);
@ -1197,20 +1294,30 @@ typedef hiprtcResult HIPAPI thiprtcDestroyProgram(hiprtcProgram* prog);
typedef hiprtcResult HIPAPI thiprtcGetLoweredName(hiprtcProgram prog, const char* name_expression, const char** lowered_name);
typedef hiprtcResult HIPAPI thiprtcGetProgramLog(hiprtcProgram prog, char* log);
typedef hiprtcResult HIPAPI thiprtcGetProgramLogSize(hiprtcProgram prog, size_t* logSizeRet);
typedef hiprtcResult HIPAPI thiprtcGetBitcode( hiprtcProgram prog, char* bitcode );
typedef hiprtcResult HIPAPI thiprtcGetBitcodeSize( hiprtcProgram prog, size_t* bitcodeSizeRet );
typedef hiprtcResult HIPAPI thiprtcGetCode(hiprtcProgram prog, char* code);
typedef hiprtcResult HIPAPI thiprtcGetCodeSize(hiprtcProgram prog, size_t* codeSizeRet);
typedef hiprtcResult HIPAPI thiprtcLinkCreate( unsigned int num_options, hiprtcJIT_option* option_ptr, void** option_vals_pptr, hiprtcLinkState* hip_link_state_ptr );
typedef hiprtcResult HIPAPI thiprtcLinkAddFile( hiprtcLinkState hip_link_state, hiprtcJITInputType input_type, const char* file_path, unsigned int num_options, hiprtcJIT_option* options_ptr, void** option_values );
typedef hiprtcResult HIPAPI thiprtcLinkAddData( hiprtcLinkState hip_link_state, hiprtcJITInputType input_type, void* image, size_t image_size, const char* name, unsigned int num_options, hiprtcJIT_option* options_ptr, void** option_values );
typedef hiprtcResult HIPAPI thiprtcLinkComplete( hiprtcLinkState hip_link_state, void** bin_out, size_t* size_out );
typedef hiprtcResult HIPAPI thiprtcLinkDestroy( hiprtcLinkState hip_link_state );
/* Function declarations. */
extern thipGetErrorName *hipGetErrorName;
extern thipGetErrorString* hipGetErrorString;
extern thipGetLastError* hipGetLastError;
extern thipInit *hipInit;
extern thipDriverGetVersion *hipDriverGetVersion;
extern thipGetDevice *hipGetDevice;
extern thipGetDeviceCount *hipGetDeviceCount;
extern thipGetDeviceProperties *hipGetDeviceProperties;
extern thipDeviceGet* hipDeviceGet;
extern thipDeviceGet *hipDeviceGet;
extern thipDeviceGetName *hipDeviceGetName;
extern thipDeviceGetAttribute *hipDeviceGetAttribute;
extern thipDeviceGetLimit *hipDeviceGetLimit;
extern thipDeviceSetLimit *hipDeviceSetLimit;
extern thipDeviceComputeCapability *hipDeviceComputeCapability;
extern thipDevicePrimaryCtxRetain *hipDevicePrimaryCtxRetain;
extern thipDevicePrimaryCtxRelease *hipDevicePrimaryCtxRelease;
@ -1246,11 +1353,14 @@ extern thipFree *hipFree;
extern thipMemGetAddressRange *hipMemGetAddressRange;
extern thipHostMalloc *hipHostMalloc;
extern thipHostFree *hipHostFree;
extern thipHostRegister *hipHostRegister;
extern thipHostGetDevicePointer *hipHostGetDevicePointer;
extern thipHostGetFlags *hipHostGetFlags;
extern thipHostUnregister *hipHostUnregister;
extern thipMallocManaged *hipMallocManaged;
extern thipDeviceGetByPCIBusId *hipDeviceGetByPCIBusId;
extern thipDeviceGetPCIBusId *hipDeviceGetPCIBusId;
extern thipMemcpy *hipMemcpy;
extern thipMemcpyPeer *hipMemcpyPeer;
extern thipMemcpyHtoD *hipMemcpyHtoD;
extern thipMemcpyDtoH *hipMemcpyDtoH;
@ -1260,8 +1370,10 @@ extern thipMemcpyParam2D *hipMemcpyParam2D;
extern thipDrvMemcpy3D *hipDrvMemcpy3D;
extern thipMemcpyHtoDAsync *hipMemcpyHtoDAsync;
extern thipMemcpyDtoHAsync *hipMemcpyDtoHAsync;
extern thipMemcpyDtoDAsync *hipMemcpyDtoDAsync;
extern thipMemcpyParam2DAsync *hipMemcpyParam2DAsync;
extern thipDrvMemcpy3DAsync *hipDrvMemcpy3DAsync;
extern thipMemset *hipMemset;
extern thipMemsetD8 *hipMemsetD8;
extern thipMemsetD16 *hipMemsetD16;
extern thipMemsetD32 *hipMemsetD32;
@ -1271,6 +1383,8 @@ extern thipMemsetD32Async *hipMemsetD32Async;
extern thipArrayCreate *hipArrayCreate;
extern thipArrayDestroy *hipArrayDestroy;
extern thipArray3DCreate *hipArray3DCreate;
extern thipPointerGetAttributes *hipPointerGetAttributes;
extern thipStreamCreate* hipStreamCreate;
extern thipStreamCreateWithFlags *hipStreamCreateWithFlags;
extern thipStreamCreateWithPriority *hipStreamCreateWithPriority;
extern thipStreamGetPriority *hipStreamGetPriority;
@ -1316,6 +1430,9 @@ extern thipGraphicsUnmapResources *hipGraphicsUnmapResources;
extern thipGraphicsGLRegisterBuffer *hipGraphicsGLRegisterBuffer;
extern thipGLGetDevices *hipGLGetDevices;
extern thipImportExternalMemory *hipImportExternalMemory;
extern thipExternalMemoryGetMappedBuffer *hipExternalMemoryGetMappedBuffer;
extern thipDestroyExternalMemory *hipDestroyExternalMemory;
extern thiprtcGetErrorString* hiprtcGetErrorString;
extern thiprtcAddNameExpression* hiprtcAddNameExpression;
@ -1325,9 +1442,17 @@ extern thiprtcDestroyProgram* hiprtcDestroyProgram;
extern thiprtcGetLoweredName* hiprtcGetLoweredName;
extern thiprtcGetProgramLog* hiprtcGetProgramLog;
extern thiprtcGetProgramLogSize* hiprtcGetProgramLogSize;
extern thiprtcGetBitcode* hiprtcGetBitcode;
extern thiprtcGetBitcodeSize* hiprtcGetBitcodeSize;
extern thiprtcGetCode* hiprtcGetCode;
extern thiprtcGetCodeSize* hiprtcGetCodeSize;
extern thiprtcLinkCreate* hiprtcLinkCreate;
extern thiprtcLinkAddFile* hiprtcLinkAddFile;
extern thiprtcLinkAddData* hiprtcLinkAddData;
extern thiprtcLinkComplete* hiprtcLinkComplete;
extern thiprtcLinkDestroy* hiprtcLinkDestroy;
/* HIPEW API. */
enum {
HIPEW_SUCCESS = 0,
@ -1344,7 +1469,6 @@ int hipewInit(hipuint32_t flags);
const char *hipewErrorString(hipError_t result);
const char *hipewCompilerPath(void);
int hipewCompilerVersion(void);
int hipewNvrtcVersion(void);
#ifdef __cplusplus
}

102
extern/hipew/include/hiprtew.h vendored Normal file
View File

@ -0,0 +1,102 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*/
#ifndef __HIPRTEW_H__
#define __HIPRTEW_H__
#include <hiprt/hiprt_types.h>
#define HIPRT_MAJOR_VERSION 2
#define HIPRT_MINOR_VERSION 0
#define HIPRT_PATCH_VERSION 0xb68861
#define HIPRT_API_VERSION 2000
#define HIPRT_VERSION_STR "02000"
typedef unsigned int hiprtuint32_t;
/* Function types. */
typedef hiprtError(thiprtCreateContext)(hiprtuint32_t hiprtApiVersion,
hiprtContextCreationInput &input,
hiprtContext *outContext);
typedef hiprtError(thiprtDestroyContext)(hiprtContext context);
typedef hiprtError(thiprtCreateGeometry)(hiprtContext context,
const hiprtGeometryBuildInput *buildInput,
const hiprtBuildOptions *buildOptions,
hiprtGeometry *outGeometry);
typedef hiprtError(thiprtDestroyGeometry)(hiprtContext context,
hiprtGeometry outGeometry);
typedef hiprtError(thiprtBuildGeometry)(hiprtContext context,
hiprtBuildOperation buildOperation,
const hiprtGeometryBuildInput *buildInput,
const hiprtBuildOptions *buildOptions,
hiprtDevicePtr temporaryBuffer,
hiprtApiStream stream,
hiprtGeometry outGeometry);
typedef hiprtError(thiprtGetGeometryBuildTemporaryBufferSize)(
hiprtContext context,
const hiprtGeometryBuildInput *buildInput,
const hiprtBuildOptions *buildOptions,
size_t *outSize);
typedef hiprtError(thiprtCreateScene)(hiprtContext context,
const hiprtSceneBuildInput *buildInput,
const hiprtBuildOptions *buildOptions,
hiprtScene *outScene);
typedef hiprtError(thiprtDestroyScene)(hiprtContext context, hiprtScene outScene);
typedef hiprtError(thiprtBuildScene)(hiprtContext context,
hiprtBuildOperation buildOperation,
const hiprtSceneBuildInput *buildInput,
const hiprtBuildOptions *buildOptions,
hiprtDevicePtr temporaryBuffer,
hiprtApiStream stream,
hiprtScene outScene);
typedef hiprtError(thiprtGetSceneBuildTemporaryBufferSize)(
hiprtContext context,
const hiprtSceneBuildInput *buildInput,
const hiprtBuildOptions *buildOptions,
size_t *outSize);
typedef hiprtError(thiprtCreateFuncTable)(hiprtContext context,
hiprtuint32_t numGeomTypes,
hiprtuint32_t numRayTypes,
hiprtFuncTable *outFuncTable);
typedef hiprtError(thiprtSetFuncTable)(hiprtContext context,
hiprtFuncTable funcTable,
hiprtuint32_t geomType,
hiprtuint32_t rayType,
hiprtFuncDataSet set);
typedef hiprtError(thiprtDestroyFuncTable)(hiprtContext context,
hiprtFuncTable funcTable);
/* Function declarations. */
extern thiprtCreateContext *hiprtCreateContext;
extern thiprtDestroyContext *hiprtDestroyContext;
extern thiprtCreateGeometry *hiprtCreateGeometry;
extern thiprtDestroyGeometry *hiprtDestroyGeometry;
extern thiprtBuildGeometry *hiprtBuildGeometry;
extern thiprtGetGeometryBuildTemporaryBufferSize *hiprtGetGeometryBuildTemporaryBufferSize;
extern thiprtCreateScene *hiprtCreateScene;
extern thiprtDestroyScene *hiprtDestroyScene;
extern thiprtBuildScene *hiprtBuildScene;
extern thiprtGetSceneBuildTemporaryBufferSize *hiprtGetSceneBuildTemporaryBufferSize;
extern thiprtCreateFuncTable *hiprtCreateFuncTable;
extern thiprtSetFuncTable *hiprtSetFuncTable;
extern thiprtDestroyFuncTable *hiprtDestroyFuncTable;
/* HIPEW API. */
bool hiprtewInit();
#endif /* __HIPRTEW_H__ */

View File

@ -1,5 +1,5 @@
/*
* Copyright 2011-2021 Blender Foundation
* Copyright 2011-2023 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -13,14 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License
*/
#ifdef _MSC_VER
# if _MSC_VER < 1900
# define snprintf _snprintf
# endif
# define popen _popen
# define pclose _pclose
# define _CRT_SECURE_NO_WARNINGS
#endif
#include "util.h"
#include <hipew.h>
#include <assert.h>
@ -28,44 +21,18 @@
#include <string.h>
#include <sys/stat.h>
#ifdef _WIN32
# define WIN32_LEAN_AND_MEAN
# define VC_EXTRALEAN
# include <windows.h>
/* Utility macros. */
typedef HMODULE DynamicLibrary;
# define dynamic_library_open(path) LoadLibraryA(path)
# define dynamic_library_close(lib) FreeLibrary(lib)
# define dynamic_library_find(lib, symbol) GetProcAddress(lib, symbol)
#else
# include <dlfcn.h>
typedef void* DynamicLibrary;
# define dynamic_library_open(path) dlopen(path, RTLD_NOW)
# define dynamic_library_close(lib) dlclose(lib)
# define dynamic_library_find(lib, symbol) dlsym(lib, symbol)
#endif
#define _LIBRARY_FIND_CHECKED(lib, name) \
name = (t##name *)dynamic_library_find(lib, #name); \
assert(name);
#define _LIBRARY_FIND(lib, name) \
name = (t##name *)dynamic_library_find(lib, #name);
static DynamicLibrary hip_lib;
#define HIP_LIBRARY_FIND_CHECKED(name) \
_LIBRARY_FIND_CHECKED(hip_lib, name)
#define HIP_LIBRARY_FIND(name) _LIBRARY_FIND(hip_lib, name)
static DynamicLibrary hip_lib;
name = (t##name *)dynamic_library_find(hip_lib, #name); \
assert(name);
#define HIP_LIBRARY_FIND(name) \
name = (t##name *)dynamic_library_find(hip_lib, #name);
/* Function definitions. */
thipGetErrorName *hipGetErrorName;
thipGetErrorString *hipGetErrorString;
thipGetLastError *hipGetLastError;
thipInit *hipInit;
thipDriverGetVersion *hipDriverGetVersion;
thipGetDevice *hipGetDevice;
@ -74,6 +41,8 @@ thipGetDeviceProperties *hipGetDeviceProperties;
thipDeviceGet* hipDeviceGet;
thipDeviceGetName *hipDeviceGetName;
thipDeviceGetAttribute *hipDeviceGetAttribute;
thipDeviceGetLimit *hipDeviceGetLimit;
thipDeviceSetLimit *hipDeviceSetLimit;
thipDeviceComputeCapability *hipDeviceComputeCapability;
thipDevicePrimaryCtxRetain *hipDevicePrimaryCtxRetain;
thipDevicePrimaryCtxRelease *hipDevicePrimaryCtxRelease;
@ -109,11 +78,14 @@ thipFree *hipFree;
thipMemGetAddressRange *hipMemGetAddressRange;
thipHostMalloc *hipHostMalloc;
thipHostFree *hipHostFree;
thipHostRegister *hipHostRegister;
thipHostGetDevicePointer *hipHostGetDevicePointer;
thipHostGetFlags *hipHostGetFlags;
thipHostUnregister *hipHostUnregister;
thipMallocManaged *hipMallocManaged;
thipDeviceGetByPCIBusId *hipDeviceGetByPCIBusId;
thipDeviceGetPCIBusId *hipDeviceGetPCIBusId;
thipMemcpy *hipMemcpy;
thipMemcpyPeer *hipMemcpyPeer;
thipMemcpyHtoD *hipMemcpyHtoD;
thipMemcpyDtoH *hipMemcpyDtoH;
@ -123,8 +95,10 @@ thipMemcpyParam2D *hipMemcpyParam2D;
thipDrvMemcpy3D *hipDrvMemcpy3D;
thipMemcpyHtoDAsync *hipMemcpyHtoDAsync;
thipMemcpyDtoHAsync *hipMemcpyDtoHAsync;
thipMemcpyDtoDAsync *hipMemcpyDtoDAsync;
thipMemcpyParam2DAsync *hipMemcpyParam2DAsync;
thipDrvMemcpy3DAsync *hipDrvMemcpy3DAsync;
thipMemset *hipMemset;
thipMemsetD8 *hipMemsetD8;
thipMemsetD16 *hipMemsetD16;
thipMemsetD32 *hipMemsetD32;
@ -134,6 +108,8 @@ thipMemsetD32Async *hipMemsetD32Async;
thipArrayCreate *hipArrayCreate;
thipArrayDestroy *hipArrayDestroy;
thipArray3DCreate *hipArray3DCreate;
thipPointerGetAttributes* hipPointerGetAttributes;
thipStreamCreate* hipStreamCreate;
thipStreamCreateWithFlags *hipStreamCreateWithFlags;
thipStreamCreateWithPriority *hipStreamCreateWithPriority;
thipStreamGetPriority *hipStreamGetPriority;
@ -179,6 +155,9 @@ thipGraphicsResourceGetMappedPointer *hipGraphicsResourceGetMappedPointer;
thipGraphicsGLRegisterBuffer *hipGraphicsGLRegisterBuffer;
thipGLGetDevices *hipGLGetDevices;
thipImportExternalMemory *hipImportExternalMemory;
thipExternalMemoryGetMappedBuffer *hipExternalMemoryGetMappedBuffer;
thipDestroyExternalMemory *hipDestroyExternalMemory;
thiprtcGetErrorString* hiprtcGetErrorString;
thiprtcAddNameExpression* hiprtcAddNameExpression;
@ -188,10 +167,15 @@ thiprtcDestroyProgram* hiprtcDestroyProgram;
thiprtcGetLoweredName* hiprtcGetLoweredName;
thiprtcGetProgramLog* hiprtcGetProgramLog;
thiprtcGetProgramLogSize* hiprtcGetProgramLogSize;
thiprtcGetBitcode* hiprtcGetBitcode;
thiprtcGetBitcodeSize* hiprtcGetBitcodeSize;
thiprtcGetCode* hiprtcGetCode;
thiprtcGetCodeSize* hiprtcGetCodeSize;
thiprtcLinkCreate* hiprtcLinkCreate;
thiprtcLinkAddFile* hiprtcLinkAddFile;
thiprtcLinkAddData* hiprtcLinkAddData;
thiprtcLinkComplete* hiprtcLinkComplete;
thiprtcLinkDestroy* hiprtcLinkDestroy;
static DynamicLibrary dynamic_library_open_find(const char **paths) {
int i = 0;
@ -217,14 +201,14 @@ static void hipewHipExit(void) {
#ifdef _WIN32
static int hipewHasOldDriver(const char *hip_path) {
DWORD verHandle = 0;
DWORD verSize = GetFileVersionInfoSize(hip_path, &verHandle);
DWORD verSize = GetFileVersionInfoSizeA(hip_path, &verHandle);
int old_driver = 0;
if (verSize != 0) {
LPSTR verData = (LPSTR)malloc(verSize);
if (GetFileVersionInfo(hip_path, verHandle, verSize, verData)) {
if (GetFileVersionInfoA(hip_path, verHandle, verSize, verData)) {
LPBYTE lpBuffer = NULL;
UINT size = 0;
if (VerQueryValue(verData, "\\", (VOID FAR * FAR *)&lpBuffer, &size)) {
if (VerQueryValueA(verData, "\\", (VOID FAR * FAR *)&lpBuffer, &size)) {
if (size) {
VS_FIXEDFILEINFO *verInfo = (VS_FIXEDFILEINFO *)lpBuffer;
/* Magic value from
@ -247,8 +231,9 @@ static int hipewHasOldDriver(const char *hip_path) {
static int hipewHipInit(void) {
/* Library paths. */
#ifdef _WIN32
/* Expected in c:/windows/system or similar, no path needed. */
/* Expected in C:/Windows/System32 or similar, no path needed. */
const char *hip_paths[] = {"amdhip64.dll", NULL};
#elif defined(__APPLE__)
/* Default installation path. */
const char *hip_paths[] = {"", NULL};
@ -289,6 +274,8 @@ static int hipewHipInit(void) {
/* Fetch all function pointers. */
HIP_LIBRARY_FIND_CHECKED(hipGetErrorName);
HIP_LIBRARY_FIND_CHECKED(hipGetErrorString);
HIP_LIBRARY_FIND_CHECKED(hipGetLastError);
HIP_LIBRARY_FIND_CHECKED(hipInit);
HIP_LIBRARY_FIND_CHECKED(hipDriverGetVersion);
HIP_LIBRARY_FIND_CHECKED(hipGetDevice);
@ -297,6 +284,8 @@ static int hipewHipInit(void) {
HIP_LIBRARY_FIND_CHECKED(hipDeviceGet);
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetName);
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetAttribute);
HIP_LIBRARY_FIND(hipDeviceGetLimit);
HIP_LIBRARY_FIND(hipDeviceSetLimit);
HIP_LIBRARY_FIND_CHECKED(hipDeviceComputeCapability);
HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxRetain);
HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxRelease);
@ -332,11 +321,14 @@ static int hipewHipInit(void) {
HIP_LIBRARY_FIND_CHECKED(hipMemGetAddressRange);
HIP_LIBRARY_FIND_CHECKED(hipHostMalloc);
HIP_LIBRARY_FIND_CHECKED(hipHostFree);
HIP_LIBRARY_FIND_CHECKED(hipHostRegister);
HIP_LIBRARY_FIND_CHECKED(hipHostGetDevicePointer);
HIP_LIBRARY_FIND_CHECKED(hipHostGetFlags);
HIP_LIBRARY_FIND_CHECKED(hipHostUnregister);
HIP_LIBRARY_FIND_CHECKED(hipMallocManaged);
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetByPCIBusId);
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetPCIBusId);
HIP_LIBRARY_FIND_CHECKED(hipMemcpy);
HIP_LIBRARY_FIND_CHECKED(hipMemcpyPeer);
HIP_LIBRARY_FIND_CHECKED(hipMemcpyHtoD);
HIP_LIBRARY_FIND_CHECKED(hipMemcpyDtoH);
@ -345,9 +337,11 @@ static int hipewHipInit(void) {
HIP_LIBRARY_FIND_CHECKED(hipDrvMemcpy3D);
HIP_LIBRARY_FIND_CHECKED(hipMemcpyHtoDAsync);
HIP_LIBRARY_FIND_CHECKED(hipMemcpyDtoHAsync);
HIP_LIBRARY_FIND_CHECKED(hipMemcpyDtoDAsync);
HIP_LIBRARY_FIND_CHECKED(hipDrvMemcpy2DUnaligned);
HIP_LIBRARY_FIND_CHECKED(hipMemcpyParam2DAsync);
HIP_LIBRARY_FIND_CHECKED(hipDrvMemcpy3DAsync);
HIP_LIBRARY_FIND_CHECKED(hipMemset);
HIP_LIBRARY_FIND_CHECKED(hipMemsetD8);
HIP_LIBRARY_FIND_CHECKED(hipMemsetD16);
HIP_LIBRARY_FIND_CHECKED(hipMemsetD32);
@ -357,6 +351,8 @@ static int hipewHipInit(void) {
HIP_LIBRARY_FIND_CHECKED(hipArrayCreate);
HIP_LIBRARY_FIND_CHECKED(hipArrayDestroy);
HIP_LIBRARY_FIND_CHECKED(hipArray3DCreate);
HIP_LIBRARY_FIND_CHECKED(hipPointerGetAttributes);
HIP_LIBRARY_FIND_CHECKED(hipStreamCreate);
HIP_LIBRARY_FIND_CHECKED(hipStreamCreateWithFlags);
HIP_LIBRARY_FIND_CHECKED(hipStreamCreateWithPriority);
HIP_LIBRARY_FIND_CHECKED(hipStreamGetPriority);
@ -399,16 +395,10 @@ static int hipewHipInit(void) {
HIP_LIBRARY_FIND_CHECKED(hipGraphicsGLRegisterBuffer);
HIP_LIBRARY_FIND_CHECKED(hipGLGetDevices);
#endif
HIP_LIBRARY_FIND_CHECKED(hiprtcGetErrorString);
HIP_LIBRARY_FIND_CHECKED(hiprtcAddNameExpression);
HIP_LIBRARY_FIND_CHECKED(hiprtcCompileProgram);
HIP_LIBRARY_FIND_CHECKED(hiprtcCreateProgram);
HIP_LIBRARY_FIND_CHECKED(hiprtcDestroyProgram);
HIP_LIBRARY_FIND_CHECKED(hiprtcGetLoweredName);
HIP_LIBRARY_FIND_CHECKED(hiprtcGetProgramLog);
HIP_LIBRARY_FIND_CHECKED(hiprtcGetProgramLogSize);
HIP_LIBRARY_FIND_CHECKED(hiprtcGetCode);
HIP_LIBRARY_FIND_CHECKED(hiprtcGetCodeSize);
HIP_LIBRARY_FIND_CHECKED(hipImportExternalMemory);
HIP_LIBRARY_FIND_CHECKED(hipExternalMemoryGetMappedBuffer);
HIP_LIBRARY_FIND_CHECKED(hipDestroyExternalMemory);
result = HIPEW_SUCCESS;
return result;
}

97
extern/hipew/src/hiprtew.cc vendored Normal file
View File

@ -0,0 +1,97 @@
/*
* Copyright 2011-2023 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*/
#include "util.h"
#include <hiprtew.h>
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
static DynamicLibrary hiprt_lib;
#define HIPRT_LIBRARY_FIND(name) \
name = (t##name *)dynamic_library_find(hiprt_lib, #name);
/* Function definitions. */
thiprtCreateContext *hiprtCreateContext;
thiprtDestroyContext *hiprtDestroyContext;
thiprtCreateGeometry *hiprtCreateGeometry;
thiprtDestroyGeometry *hiprtDestroyGeometry;
thiprtBuildGeometry *hiprtBuildGeometry;
thiprtGetGeometryBuildTemporaryBufferSize *hiprtGetGeometryBuildTemporaryBufferSize;
thiprtCreateScene *hiprtCreateScene;
thiprtDestroyScene *hiprtDestroyScene;
thiprtBuildScene *hiprtBuildScene;
thiprtGetSceneBuildTemporaryBufferSize *hiprtGetSceneBuildTemporaryBufferSize;
thiprtCreateFuncTable *hiprtCreateFuncTable;
thiprtSetFuncTable *hiprtSetFuncTable;
thiprtDestroyFuncTable *hiprtDestroyFuncTable;
static void hipewHipRtExit(void)
{
if (hiprt_lib != NULL) {
/* Ignore errors. */
dynamic_library_close(hiprt_lib);
hiprt_lib = NULL;
}
}
bool hiprtewInit()
{
static bool result = false;
static bool initialized = false;
if (initialized) {
return result;
}
#ifdef _WIN32
initialized = true;
if (atexit(hipewHipRtExit)) {
return false;
}
std::string hiprt_ver(HIPRT_VERSION_STR);
std::string hiprt_path = "hiprt" + hiprt_ver + "64.dll";
hiprt_lib = dynamic_library_open(hiprt_path.c_str());
if (hiprt_lib == NULL) {
return false;
}
HIPRT_LIBRARY_FIND(hiprtCreateContext)
HIPRT_LIBRARY_FIND(hiprtDestroyContext)
HIPRT_LIBRARY_FIND(hiprtCreateGeometry)
HIPRT_LIBRARY_FIND(hiprtDestroyGeometry)
HIPRT_LIBRARY_FIND(hiprtBuildGeometry)
HIPRT_LIBRARY_FIND(hiprtGetGeometryBuildTemporaryBufferSize)
HIPRT_LIBRARY_FIND(hiprtCreateScene)
HIPRT_LIBRARY_FIND(hiprtDestroyScene)
HIPRT_LIBRARY_FIND(hiprtBuildScene)
HIPRT_LIBRARY_FIND(hiprtGetSceneBuildTemporaryBufferSize)
HIPRT_LIBRARY_FIND(hiprtCreateFuncTable)
HIPRT_LIBRARY_FIND(hiprtSetFuncTable)
HIPRT_LIBRARY_FIND(hiprtDestroyFuncTable)
result = true;
#endif
return result;
}

51
extern/hipew/src/util.h vendored Normal file
View File

@ -0,0 +1,51 @@
/*
* Copyright 2011-2023 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*/
#pragma once
/* Portable snprintf and popen/pclose. */
#ifdef _MSC_VER
# if _MSC_VER < 1900
# define snprintf _snprintf
# endif
# define popen _popen
# define pclose _pclose
# define _CRT_SECURE_NO_WARNINGS
#endif
/* Macros for loading libraries. */
#ifdef _WIN32
# define WIN32_LEAN_AND_MEAN
# define VC_EXTRALEAN
# include <windows.h>
typedef HMODULE DynamicLibrary;
# define dynamic_library_open(path) LoadLibraryA(path)
# define dynamic_library_close(lib) FreeLibrary(lib)
# define dynamic_library_find(lib, symbol) GetProcAddress(lib, symbol)
#else
# include <dlfcn.h>
typedef void* DynamicLibrary;
# define dynamic_library_open(path) dlopen(path, RTLD_NOW)
# define dynamic_library_close(lib) dlclose(lib)
# define dynamic_library_find(lib, symbol) dlsym(lib, symbol)
#endif

View File

@ -249,6 +249,13 @@ endif()
if(WITH_CYCLES_DEVICE_HIP)
add_definitions(-DWITH_HIP)
if(WITH_CYCLES_DEVICE_HIPRT)
include_directories(
${HIPRT_INCLUDE_DIR}
)
add_definitions(-DWITH_HIPRT)
endif()
if(WITH_HIP_DYNLOAD)
include_directories(
../../extern/hipew/include

View File

@ -1507,7 +1507,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
def get_device_types(self, context):
import _cycles
has_cuda, has_optix, has_hip, has_metal, has_oneapi = _cycles.get_device_types()
has_cuda, has_optix, has_hip, has_metal, has_oneapi, has_hiprt = _cycles.get_device_types()
list = [('NONE', "None", "Don't use compute device", 0)]
if has_cuda:
@ -1544,6 +1544,13 @@ class CyclesPreferences(bpy.types.AddonPreferences):
default=False,
)
use_hiprt: BoolProperty(
name="HIP RT (Experimental)",
description="HIP RT enables AMD hardware ray tracing on RDNA2 and above, with shader fallback on older cards. "
"This feature is experimental and some scenes may render incorrectly",
default=False,
)
use_oneapirt: BoolProperty(
name="Embree on GPU (Experimental)",
description="Embree GPU execution will allow to use hardware ray tracing on Intel GPUs, which will provide better performance. "
@ -1770,7 +1777,13 @@ class CyclesPreferences(bpy.types.AddonPreferences):
col.prop(self, "kernel_optimization_level")
col.prop(self, "use_metalrt")
if compute_device_type == 'ONEAPI' and _cycles.with_embree_gpu:
if compute_device_type == 'HIP':
has_cuda, has_optix, has_hip, has_metal, has_oneapi, has_hiprt = _cycles.get_device_types()
row = layout.row()
row.enabled = has_hiprt
row.prop(self, "use_hiprt")
elif compute_device_type == 'ONEAPI' and _cycles.with_embree_gpu:
row = layout.row()
row.use_property_split = True
row.prop(self, "use_oneapirt")

View File

@ -124,6 +124,10 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences,
info.use_hardware_raytracing = false;
}
if (info.type == DEVICE_HIP && !get_boolean(cpreferences, "use_hiprt")) {
info.use_hardware_raytracing = false;
}
/* There is an accumulative logic here, because Multi-devices are support only for
* the same backend + CPU in Blender right now, and both oneAPI and Metal have a
* global boolean backend setting (see above) for enabling/disabling HW RT,

View File

@ -876,20 +876,23 @@ static PyObject *enable_print_stats_func(PyObject * /*self*/, PyObject * /*args*
static PyObject *get_device_types_func(PyObject * /*self*/, PyObject * /*args*/)
{
vector<DeviceType> device_types = Device::available_types();
bool has_cuda = false, has_optix = false, has_hip = false, has_metal = false, has_oneapi = false;
bool has_cuda = false, has_optix = false, has_hip = false, has_metal = false, has_oneapi = false,
has_hiprt = false;
foreach (DeviceType device_type, device_types) {
has_cuda |= (device_type == DEVICE_CUDA);
has_optix |= (device_type == DEVICE_OPTIX);
has_hip |= (device_type == DEVICE_HIP);
has_metal |= (device_type == DEVICE_METAL);
has_oneapi |= (device_type == DEVICE_ONEAPI);
has_hiprt |= (device_type == DEVICE_HIPRT);
}
PyObject *list = PyTuple_New(5);
PyObject *list = PyTuple_New(6);
PyTuple_SET_ITEM(list, 0, PyBool_FromLong(has_cuda));
PyTuple_SET_ITEM(list, 1, PyBool_FromLong(has_optix));
PyTuple_SET_ITEM(list, 2, PyBool_FromLong(has_hip));
PyTuple_SET_ITEM(list, 3, PyBool_FromLong(has_metal));
PyTuple_SET_ITEM(list, 4, PyBool_FromLong(has_oneapi));
PyTuple_SET_ITEM(list, 5, PyBool_FromLong(has_hiprt));
return list;
}

View File

@ -14,6 +14,7 @@ set(SRC
binning.cpp
build.cpp
embree.cpp
hiprt.cpp
multi.cpp
node.cpp
optix.cpp
@ -39,6 +40,7 @@ set(SRC_HEADERS
binning.h
build.h
embree.h
hiprt.h
multi.h
node.h
optix.h

View File

@ -6,6 +6,7 @@
#include "bvh/bvh2.h"
#include "bvh/embree.h"
#include "bvh/hiprt.h"
#include "bvh/metal.h"
#include "bvh/multi.h"
#include "bvh/optix.h"
@ -30,10 +31,14 @@ const char *bvh_layout_name(BVHLayout layout)
return "OPTIX";
case BVH_LAYOUT_METAL:
return "METAL";
case BVH_LAYOUT_HIPRT:
return "HIPRT";
case BVH_LAYOUT_MULTI_OPTIX:
case BVH_LAYOUT_MULTI_METAL:
case BVH_LAYOUT_MULTI_HIPRT:
case BVH_LAYOUT_MULTI_OPTIX_EMBREE:
case BVH_LAYOUT_MULTI_METAL_EMBREE:
case BVH_LAYOUT_MULTI_HIPRT_EMBREE:
return "MULTI";
case BVH_LAYOUT_ALL:
return "ALL";
@ -101,11 +106,20 @@ BVH *BVH::create(const BVHParams &params,
#else
(void)device;
break;
#endif
case BVH_LAYOUT_HIPRT:
#ifdef WITH_HIPRT
return new BVHHIPRT(params, geometry, objects, device);
#else
(void)device;
break;
#endif
case BVH_LAYOUT_MULTI_OPTIX:
case BVH_LAYOUT_MULTI_METAL:
case BVH_LAYOUT_MULTI_HIPRT:
case BVH_LAYOUT_MULTI_OPTIX_EMBREE:
case BVH_LAYOUT_MULTI_METAL_EMBREE:
case BVH_LAYOUT_MULTI_HIPRT_EMBREE:
return new BVHMulti(params, geometry, objects);
case BVH_LAYOUT_NONE:
case BVH_LAYOUT_ALL:

View File

@ -0,0 +1,45 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2023 Blender Foundation */
#ifdef WITH_HIPRT
# include "bvh/hiprt.h"
# include "scene/mesh.h"
# include "scene/object.h"
# include "util/foreach.h"
# include "util/progress.h"
# include "device/hiprt/device_impl.h"
CCL_NAMESPACE_BEGIN
BVHHIPRT::BVHHIPRT(const BVHParams &params,
const vector<Geometry *> &geometry,
const vector<Object *> &objects,
Device *in_device)
: BVH(params, geometry, objects),
hiprt_geom(0),
custom_primitive_bound(in_device, "Custom Primitive Bound", MEM_READ_ONLY),
triangle_index(in_device, "HIPRT Triangle Index", MEM_READ_ONLY),
vertex_data(in_device, "vertex_data", MEM_READ_ONLY),
device(in_device)
{
triangle_mesh = {0};
custom_prim_aabb = {0};
}
BVHHIPRT::~BVHHIPRT()
{
HIPRTDevice *hiprt_device = static_cast<HIPRTDevice *>(device);
hiprtContext hiprt_context = hiprt_device->get_hiprt_context();
custom_primitive_bound.free();
triangle_index.free();
vertex_data.free();
hiprtDestroyGeometry(hiprt_context, hiprt_geom);
}
CCL_NAMESPACE_END
#endif

58
intern/cycles/bvh/hiprt.h Normal file
View File

@ -0,0 +1,58 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2023 Blender Foundation */
#ifdef WITH_HIPRT
# pragma once
# include "bvh/bvh.h"
# include "bvh/params.h"
# ifdef WITH_HIP_DYNLOAD
# include "hiprtew.h"
# else
# include <hiprt/hiprt_types.h>
# endif
# include "device/memory.h"
CCL_NAMESPACE_BEGIN
class BVHHIPRT : public BVH {
public:
friend class HIPDevice;
bool is_tlas()
{
return params.top_level;
}
hiprtGeometry hiprt_geom;
hiprtTriangleMeshPrimitive triangle_mesh;
hiprtAABBListPrimitive custom_prim_aabb;
hiprtGeometryBuildInput geom_input;
vector<int2> custom_prim_info; /* x: prim_id, y: prim_type */
vector<float2> prims_time;
/* Custom primitives. */
device_vector<BoundBox> custom_primitive_bound;
device_vector<int> triangle_index;
device_vector<float> vertex_data;
protected:
friend class BVH;
BVHHIPRT(const BVHParams &params,
const vector<Geometry *> &geometry,
const vector<Object *> &objects,
Device *in_device);
virtual ~BVHHIPRT();
private:
Device *device;
};
CCL_NAMESPACE_END
#endif

View File

@ -41,20 +41,32 @@ endif()
# HIP
###########################################################################
if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
if(UNIX)
# Disabled until there is a HIP 5.5 release for Linux.
set(WITH_CYCLES_HIP_BINARIES OFF)
message(STATUS "HIP temporarily disabled due to compiler bugs")
else()
# Need at least HIP 5.5 to solve compiler bug affecting the kernel.
find_package(HIP 5.5.0)
set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES)
if(WITH_CYCLES_DEVICE_HIP)
if(WITH_CYCLES_HIP_BINARIES)
if(UNIX)
# Disabled until there is a HIP 5.5 release for Linux.
set(WITH_CYCLES_HIP_BINARIES OFF)
message(STATUS "HIP temporarily disabled due to compiler bugs")
else()
# Need at least HIP 5.5 to solve compiler bug affecting the kernel.
find_package(HIP 5.5.0)
set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES)
if(HIP_FOUND)
message(STATUS "Found HIP ${HIP_HIPCC_EXECUTABLE} (${HIP_VERSION})")
if(HIP_FOUND)
message(STATUS "Found HIP ${HIP_HIPCC_EXECUTABLE} (${HIP_VERSION})")
endif()
endif()
endif()
# HIP RT
if(WITH_CYCLES_DEVICE_HIP AND WITH_CYCLES_DEVICE_HIPRT)
find_package(HIPRT)
set_and_warn_library_found("HIP RT" HIPRT_FOUND WITH_CYCLES_DEVICE_HIPRT)
endif()
endif()
if(NOT WITH_CYCLES_DEVICE_HIP)
set(WITH_CYCLES_DEVICE_HIPRT OFF)
endif()
if(NOT WITH_HIP_DYNLOAD)

View File

@ -66,6 +66,13 @@ set(SRC_HIP
hip/util.h
)
set(SRC_HIPRT
hiprt/device_impl.cpp
hiprt/device_impl.h
hiprt/queue.cpp
hiprt/queue.h
)
set(SRC_ONEAPI
oneapi/device_impl.cpp
oneapi/device_impl.h
@ -124,6 +131,7 @@ set(SRC
${SRC_CPU}
${SRC_CUDA}
${SRC_HIP}
${SRC_HIPRT}
${SRC_DUMMY}
${SRC_MULTI}
${SRC_OPTIX}
@ -209,6 +217,7 @@ source_group("cpu" FILES ${SRC_CPU})
source_group("cuda" FILES ${SRC_CUDA})
source_group("dummy" FILES ${SRC_DUMMY})
source_group("hip" FILES ${SRC_HIP})
source_group("hiprt" FILES ${SRC_HIPRT})
source_group("multi" FILES ${SRC_MULTI})
source_group("metal" FILES ${SRC_METAL})
source_group("optix" FILES ${SRC_OPTIX})

View File

@ -14,6 +14,7 @@
#include "device/cuda/device.h"
#include "device/dummy/device.h"
#include "device/hip/device.h"
#include "device/hiprt/device_impl.h"
#include "device/metal/device.h"
#include "device/multi/device.h"
#include "device/oneapi/device.h"
@ -135,6 +136,8 @@ DeviceType Device::type_from_string(const char *name)
return DEVICE_METAL;
else if (strcmp(name, "ONEAPI") == 0)
return DEVICE_ONEAPI;
else if (strcmp(name, "HIPRT") == 0)
return DEVICE_HIPRT;
return DEVICE_NONE;
}
@ -155,6 +158,8 @@ string Device::string_from_type(DeviceType type)
return "METAL";
else if (type == DEVICE_ONEAPI)
return "ONEAPI";
else if (type == DEVICE_HIPRT)
return "HIPRT";
return "";
}
@ -177,6 +182,10 @@ vector<DeviceType> Device::available_types()
#endif
#ifdef WITH_ONEAPI
types.push_back(DEVICE_ONEAPI);
#endif
#ifdef WITH_HIPRT
if (hiprtewInit())
types.push_back(DEVICE_HIPRT);
#endif
return types;
}

View File

@ -40,6 +40,7 @@ enum DeviceType {
DEVICE_MULTI,
DEVICE_OPTIX,
DEVICE_HIP,
DEVICE_HIPRT,
DEVICE_METAL,
DEVICE_ONEAPI,
DEVICE_DUMMY,
@ -79,8 +80,7 @@ class DeviceInfo {
bool has_profiling; /* Supports runtime collection of profiling info. */
bool has_peer_memory; /* GPU has P2P access to memory of another GPU. */
bool has_gpu_queue; /* Device supports GPU queue. */
bool use_hardware_raytracing; /* Use hardware ray tracing to accelerate ray queries in a backend.
*/
bool use_hardware_raytracing; /* Use hardware instructions to accelerate ray tracing. */
KernelOptimizationLevel kernel_optimization_level; /* Optimization level applied to path tracing
* kernels (Metal only). */
DenoiserTypeMask denoisers; /* Supported denoiser types. */

View File

@ -13,6 +13,10 @@
# include "util/windows.h"
#endif /* WITH_HIP */
#ifdef WITH_HIPRT
# include "device/hiprt/device_impl.h"
#endif
CCL_NAMESPACE_BEGIN
bool device_hip_init()
@ -65,7 +69,12 @@ bool device_hip_init()
Device *device_hip_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
{
#ifdef WITH_HIP
#ifdef WITH_HIPRT
if (info.use_hardware_raytracing)
return new HIPRTDevice(info, stats, profiler);
else
return new HIPDevice(info, stats, profiler);
#elif defined(WITH_HIP)
return new HIPDevice(info, stats, profiler);
#else
(void)info;
@ -115,6 +124,12 @@ void device_hip_info(vector<DeviceInfo> &devices)
return;
}
# ifdef WITH_HIPRT
const bool has_hardware_raytracing = hiprtewInit();
# else
const bool has_hardware_raytracing = false;
# endif
vector<DeviceInfo> display_devices;
for (int num = 0; num < count; num++) {
@ -150,6 +165,8 @@ void device_hip_info(vector<DeviceInfo> &devices)
}
}
info.use_hardware_raytracing = has_hardware_raytracing;
int pci_location[3] = {0, 0, 0};
hipDeviceGetAttribute(&pci_location[0], hipDeviceAttributePciDomainID, num);
hipDeviceGetAttribute(&pci_location[1], hipDeviceAttributePciBusId, num);
@ -176,6 +193,7 @@ void device_hip_info(vector<DeviceInfo> &devices)
VLOG_INFO << "Device has compute preemption or is not used for display.";
devices.push_back(info);
}
VLOG_INFO << "Added device \"" << name << "\" with id \"" << info.id << "\".";
}

View File

@ -1,6 +1,8 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#pragma once
#ifdef WITH_HIP
# include "device/device.h"
@ -49,9 +51,11 @@ class HIPDevice : public GPUDevice {
bool use_adaptive_compilation();
string compile_kernel_get_common_cflags(const uint kernel_features);
virtual string compile_kernel_get_common_cflags(const uint kernel_features);
string compile_kernel(const uint kernel_features, const char *name, const char *base = "hip");
virtual string compile_kernel(const uint kernel_features,
const char *name,
const char *base = "hip");
virtual bool load_kernels(const uint kernel_features) override;
void reserve_local_memory(const uint kernel_features);

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,126 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2023 Blender Foundation */
#pragma once
#ifdef WITH_HIPRT
# include "device/hip/device_impl.h"
# include "device/hip/kernel.h"
# include "device/hip/queue.h"
# include "device/hiprt/queue.h"
# ifdef WITH_HIP_DYNLOAD
# include "hiprtew.h"
# else
# include <hiprt/hiprt_types.h>
# endif
# include "kernel/device/hiprt/globals.h"
CCL_NAMESPACE_BEGIN
class Mesh;
class Hair;
class PointCloud;
class Geometry;
class Object;
class BVHHIPRT;
class HIPRTDevice : public HIPDevice {
public:
virtual BVHLayoutMask get_bvh_layout_mask(const uint kernel_features) const override;
HIPRTDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
virtual ~HIPRTDevice();
virtual unique_ptr<DeviceQueue> gpu_queue_create() override;
string compile_kernel_get_common_cflags(const uint kernel_features) override;
virtual string compile_kernel(const uint kernel_features,
const char *name,
const char *base = "hiprt") override;
virtual bool load_kernels(const uint kernel_features) override;
virtual void const_copy_to(const char *name, void *host, size_t size) override;
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
hiprtContext get_hiprt_context()
{
return hiprt_context;
}
device_vector<int> global_stack_buffer;
protected:
enum Filter_Function { Closest = 0, Shadows, Local, Volume, Max_Intersect_Filter_Function };
enum Primitive_Type { Triangle = 0, Curve, Motion_Triangle, Point, Max_Primitive_Type };
hiprtGeometryBuildInput prepare_triangle_blas(BVHHIPRT *bvh, Mesh *mesh);
hiprtGeometryBuildInput prepare_curve_blas(BVHHIPRT *bvh, Hair *hair);
hiprtGeometryBuildInput prepare_point_blas(BVHHIPRT *bvh, PointCloud *pointcloud);
void build_blas(BVHHIPRT *bvh, Geometry *geom, hiprtBuildOptions options);
hiprtScene build_tlas(BVHHIPRT *bvh,
vector<Object *> objects,
hiprtBuildOptions options,
bool refit);
hiprtContext hiprt_context;
hiprtScene scene;
hiprtFuncTable functions_table;
thread_mutex hiprt_mutex;
size_t scratch_buffer_size;
device_vector<char> scratch_buffer;
/* The following vectors are to transfer scene information available on the host to the GPU
* visibility, instance_transform_matrix, transform_headers, and hiprt_blas_ptr are passed to
* hiprt to build bvh the rest are directly used in traversal functions/intersection kernels and
* are defined on the GPU side as members of KernelParamsHIPRT struct the host memory is copied
* to GPU through const_copy_to() function. */
device_vector<uint32_t> visibility;
/* instance_transform_matrix passes transform matrix of instances converted from Cycles Transform
* format to instanceFrames member of hiprtSceneBuildInput. */
device_vector<hiprtFrameMatrix> instance_transform_matrix;
/* Movement over a time interval for motion blur is captured through multiple transform matrices.
* In this case transform matrix of an instance cannot be directly retrieved by looking up
* instance_transform_matrix give the instance id. transform_headers maps the instance id to the
* appropriate index to retrieve instance transform matrix (frameIndex member of
* hiprtTransformHeader). transform_headers also has the information on how many transform
* matrices are associated with an instance (frameCount member of hiprtTransformHeader)
* transform_headers is passed to hiprt through instanceTransformHeaders member of
* hiprtSceneBuildInput. */
device_vector<hiprtTransformHeader> transform_headers;
/* Instance/object ids are not explicitly passed to hiprt.
* HIP RT assigns the ids based on the order blas pointers are passed to it (through
* instanceGeometries member of hiprtSceneBuildInput). If blas is absent for a particular
* geometry (e.g. a plane), HIP RT removes that entry and in scenes with objects with no blas,
* the instance id that hiprt returns for a hit point will not necessarily match the instance id
* of the application. user_instance_id provides a map for retrieving original instance id from
* what HIP RT returns as instance id. hiprt_blas_ptr is the list of all the valid blas pointers.
* blas_ptr has all the valid pointers and null pointers and blas for any geometry can be
* directly retrieved from this array (used in subsurface scattering). */
device_vector<int> user_instance_id;
device_vector<uint64_t> hiprt_blas_ptr;
device_vector<uint64_t> blas_ptr;
/* custom_prim_info stores custom information for custom primitives for all the primitives in a
* scene. Primitive id that HIP RT returns is local to the geometry that was hit.
* custom_prim_info_offset returns the offset required to add to the primitive id to retrieve
* primitive info from custom_prim_info. */
device_vector<int2> custom_prim_info;
device_vector<int2> custom_prim_info_offset;
/* prims_time stores primitive time for geometries with motion blur.
* prim_time_offset returns the offset to add to primitive id to retrieve primitive time. */
device_vector<float2> prims_time;
device_vector<int> prim_time_offset;
};
CCL_NAMESPACE_END
#endif

View File

@ -0,0 +1,68 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#ifdef WITH_HIPRT
# include "device/hiprt/queue.h"
# include "device/hip/graphics_interop.h"
# include "device/hip/kernel.h"
# include "device/hiprt/device_impl.h"
CCL_NAMESPACE_BEGIN
HIPRTDeviceQueue::HIPRTDeviceQueue(HIPRTDevice *device)
: HIPDeviceQueue((HIPDevice *)device), hiprt_device_(device)
{
}
bool HIPRTDeviceQueue::enqueue(DeviceKernel kernel,
const int work_size,
DeviceKernelArguments const &args)
{
if (hiprt_device_->have_error()) {
return false;
}
if (!device_kernel_has_intersection(kernel)) {
return HIPDeviceQueue::enqueue(kernel, work_size, args);
}
debug_enqueue_begin(kernel, work_size);
const HIPContextScope scope(hiprt_device_);
const HIPDeviceKernel &hip_kernel = hiprt_device_->kernels.get(kernel);
if (!hiprt_device_->global_stack_buffer.device_pointer) {
int max_path = num_concurrent_states(0);
hiprt_device_->global_stack_buffer.alloc(max_path * HIPRT_SHARED_STACK_SIZE * sizeof(int));
hiprt_device_->global_stack_buffer.zero_to_device();
}
DeviceKernelArguments args_copy = args;
args_copy.add(&hiprt_device_->global_stack_buffer.device_pointer);
/* Compute kernel launch parameters. */
const int num_threads_per_block = HIPRT_THREAD_GROUP_SIZE;
const int num_blocks = divide_up(work_size, num_threads_per_block);
int shared_mem_bytes = 0;
assert_success(hipModuleLaunchKernel(hip_kernel.function,
num_blocks,
1,
1,
num_threads_per_block,
1,
1,
shared_mem_bytes,
hip_stream_,
const_cast<void **>(args_copy.values),
0),
"enqueue");
return !(hiprt_device_->have_error());
}
CCL_NAMESPACE_END
#endif /* WITH_HIPRT */

View File

@ -0,0 +1,33 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#pragma once
#ifdef WITH_HIPRT
# include "device/kernel.h"
# include "device/memory.h"
# include "device/queue.h"
# include "device/hip/queue.h"
# include "device/hip/util.h"
CCL_NAMESPACE_BEGIN
class HIPRTDevice;
class HIPRTDeviceQueue : public HIPDeviceQueue {
public:
HIPRTDeviceQueue(HIPRTDevice *device);
~HIPRTDeviceQueue() {}
virtual bool enqueue(DeviceKernel kernel,
const int work_size,
DeviceKernelArguments const &args) override;
protected:
HIPRTDevice *hiprt_device_;
};
CCL_NAMESPACE_END
#endif /* WITH_HIPRT */

View File

@ -117,6 +117,10 @@ class MultiDevice : public Device {
return BVH_LAYOUT_MULTI_METAL;
}
if (bvh_layout_mask == BVH_LAYOUT_HIPRT) {
return BVH_LAYOUT_MULTI_HIPRT;
}
/* When devices do not share a common BVH layout, fall back to creating one for each */
const BVHLayoutMask BVH_LAYOUT_OPTIX_EMBREE = (BVH_LAYOUT_OPTIX | BVH_LAYOUT_EMBREE);
if ((bvh_layout_mask_all & BVH_LAYOUT_OPTIX_EMBREE) == BVH_LAYOUT_OPTIX_EMBREE) {
@ -158,8 +162,10 @@ class MultiDevice : public Device {
assert(bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX ||
bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL ||
bvh->params.bvh_layout == BVH_LAYOUT_MULTI_HIPRT ||
bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE ||
bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL_EMBREE);
bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL_EMBREE ||
bvh->params.bvh_layout == BVH_LAYOUT_MULTI_HIPRT_EMBREE);
BVHMulti *const bvh_multi = static_cast<BVHMulti *>(bvh);
bvh_multi->sub_bvhs.resize(devices.size());
@ -184,12 +190,17 @@ class MultiDevice : public Device {
params.bvh_layout = BVH_LAYOUT_OPTIX;
else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL)
params.bvh_layout = BVH_LAYOUT_METAL;
else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_HIPRT)
params.bvh_layout = BVH_LAYOUT_HIPRT;
else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE)
params.bvh_layout = sub.device->info.type == DEVICE_OPTIX ? BVH_LAYOUT_OPTIX :
BVH_LAYOUT_EMBREE;
else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL_EMBREE)
params.bvh_layout = sub.device->info.type == DEVICE_METAL ? BVH_LAYOUT_METAL :
BVH_LAYOUT_EMBREE;
else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_HIPRT_EMBREE)
params.bvh_layout = sub.device->info.type == DEVICE_HIPRT ? BVH_LAYOUT_HIPRT :
BVH_LAYOUT_EMBREE;
/* Skip building a bottom level acceleration structure for non-instanced geometry on Embree
* (since they are put into the top level directly, see bvh_embree.cpp) */

View File

@ -1149,6 +1149,8 @@ static const char *device_type_for_description(const DeviceType type)
return "OptiX";
case DEVICE_HIP:
return "HIP";
case DEVICE_HIPRT:
return "HIPRT";
case DEVICE_ONEAPI:
return "oneAPI";
case DEVICE_DUMMY:

View File

@ -26,6 +26,10 @@ set(SRC_KERNEL_DEVICE_HIP
device/hip/kernel.cpp
)
set(SRC_KERNEL_DEVICE_HIPRT
device/hiprt/kernel.cpp
)
set(SRC_KERNEL_DEVICE_METAL
device/metal/kernel.metal
)
@ -77,6 +81,13 @@ set(SRC_KERNEL_DEVICE_HIP_HEADERS
device/hip/globals.h
)
set(SRC_KERNEL_DEVICE_HIPRT_HEADERS
device/hiprt/bvh.h
device/hiprt/common.h
device/hiprt/globals.h
device/hiprt/hiprt_kernels.h
)
set(SRC_KERNEL_DEVICE_OPTIX_HEADERS
device/optix/bvh.h
device/optix/compat.h
@ -643,6 +654,86 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
cycles_set_solution_folder(cycles_kernel_hip)
endif()
# HIP RT module
if(WITH_CYCLES_DEVICE_HIPRT AND WITH_CYCLES_HIP_BINARIES)
set(hiprt_sources device/hiprt/kernel.cpp
${SRC_KERNEL_HEADERS}
${SRC_KERNEL_DEVICE_GPU_HEADERS}
${SRC_KERNEL_DEVICE_HIPRT_HEADERS}
${SRC_UTIL_HEADERS})
set(bitcode_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.bc)
set(hiprt_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.hipfb)
set(kernel_sources ${sources})
set(hiprt_kernel_src "/device/hiprt/kernel.cpp")
if(WIN32)
set(hiprt_compile_command ${CMAKE_COMMAND})
set(hiprt_compile_flags
-E env "HIP_PATH=${HIP_ROOT_DIR}"
${HIP_HIPCC_EXECUTABLE}.bat)
else()
set(hiprt_compile_command ${HIP_HIPCC_EXECUTABLE})
set(hiprt_compile_flags)
endif()
set(target_gpus)
foreach(arch ${CYCLES_HIP_BINARIES_ARCH})
list(APPEND target_gpus "--offload-arch=${arch}")
endforeach()
set(hiprt_compile_flags
${hiprt_compile_flags}
${target_gpus}
${HIP_HIPCC_FLAGS}
${CMAKE_CURRENT_SOURCE_DIR}${hiprt_kernel_src}
${flags}
-D CCL_NAMESPACE_BEGIN=
-D CCL_NAMESPACE_END=
-D HIPCC
-D __HIPRT__
-std=c++17
-fgpu-rdc
-c
--gpu-bundle-output
-emit-llvm
-I ${CMAKE_CURRENT_SOURCE_DIR}/..
-I ${CMAKE_CURRENT_SOURCE_DIR}/device/hiprt
-I ${HIPRT_INCLUDE_DIR}
-Wno-parentheses-equality
-Wno-unused-value
--hipcc-func-supp
-ffast-math
-o ${bitcode_file})
if(WITH_CYCLES_DEBUG)
set(hiprt_compile_flags ${hiprt_compile_flags} -D WITH_CYCLES_DEBUG)
endif()
add_custom_command(
OUTPUT ${bitcode_file}
COMMAND ${hiprt_compile_command} ${hiprt_compile_flags}
DEPENDS ${kernel_sources})
if(WIN32)
set(hiprt_link_command ${CMAKE_COMMAND})
set(hiprt_link_flags -E env "HIP_PATH=${HIP_ROOT_DIR}"
${HIP_LINKER_EXECUTABLE})
else()
# not implemented yet
endif()
set(hiprt_link_flags
${hiprt_link_flags}
${target_gpus}
-fgpu-rdc
--hip-link
--cuda-device-only
${bitcode_file}
${HIPRT_BITCODE}
-o ${hiprt_file})
add_custom_command(
OUTPUT ${hiprt_file}
COMMAND ${hiprt_link_command} ${hiprt_link_flags}
DEPENDS ${bitcode_file})
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hiprt_file}" ${CYCLES_INSTALL_PATH}/lib)
add_custom_target(cycles_kernel_hiprt ALL DEPENDS ${hiprt_file})
cycles_set_solution_folder(cycles_kernel_hiprt)
endif()
# OptiX PTX modules
if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
@ -1033,6 +1124,7 @@ cycles_add_library(cycles_kernel "${LIB}"
${SRC_KERNEL_DEVICE_CPU}
${SRC_KERNEL_DEVICE_CUDA}
${SRC_KERNEL_DEVICE_HIP}
${SRC_KERNEL_DEVICE_HIPRT}
${SRC_KERNEL_DEVICE_OPTIX}
${SRC_KERNEL_DEVICE_METAL}
${SRC_KERNEL_HEADERS}
@ -1040,6 +1132,7 @@ cycles_add_library(cycles_kernel "${LIB}"
${SRC_KERNEL_DEVICE_GPU_HEADERS}
${SRC_KERNEL_DEVICE_CUDA_HEADERS}
${SRC_KERNEL_DEVICE_HIP_HEADERS}
${SRC_KERNEL_DEVICE_HIPRT_HEADERS}
${SRC_KERNEL_DEVICE_OPTIX_HEADERS}
${SRC_KERNEL_DEVICE_METAL_HEADERS}
${SRC_KERNEL_DEVICE_ONEAPI_HEADERS}
@ -1053,6 +1146,7 @@ source_group("device\\cpu" FILES ${SRC_KERNEL_DEVICE_CPU} ${SRC_KERNEL_DEVICE_CP
source_group("device\\cuda" FILES ${SRC_KERNEL_DEVICE_CUDA} ${SRC_KERNEL_DEVICE_CUDA_HEADERS})
source_group("device\\gpu" FILES ${SRC_KERNEL_DEVICE_GPU_HEADERS})
source_group("device\\hip" FILES ${SRC_KERNEL_DEVICE_HIP} ${SRC_KERNEL_DEVICE_HIP_HEADERS})
source_group("device\\hiprt" FILES ${SRC_KERNEL_DEVICE_HIPRT} ${SRC_KERNEL_DEVICE_HIPRT_HEADERS})
source_group("device\\optix" FILES ${SRC_KERNEL_DEVICE_OPTIX} ${SRC_KERNEL_DEVICE_OPTIX_HEADERS})
source_group("device\\metal" FILES ${SRC_KERNEL_DEVICE_METAL} ${SRC_KERNEL_DEVICE_METAL_HEADERS})
source_group("device\\oneapi" FILES ${SRC_KERNEL_DEVICE_ONEAPI} ${SRC_KERNEL_DEVICE_ONEAPI_HEADERS})
@ -1090,6 +1184,8 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_CUDA_HEADERS}"
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_GPU_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/gpu)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIP}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hip)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIP_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hip)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIPRT}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hiprt)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIPRT_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hiprt)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_OPTIX}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_OPTIX_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_METAL}" ${CYCLES_INSTALL_PATH}/source/kernel/device/metal)

View File

@ -17,6 +17,8 @@
# include "kernel/device/metal/bvh.h"
#elif defined(__KERNEL_OPTIX__)
# include "kernel/device/optix/bvh.h"
#elif defined(__HIPRT__)
# include "kernel/device/hiprt/bvh.h"
#else
# define __BVH2__
#endif

View File

@ -48,6 +48,9 @@
#include "kernel/film/read.h"
#if defined(__HIPRT__)
# include "kernel/device/hiprt/hiprt_kernels.h"
#endif
/* --------------------------------------------------------------------
* Integrator.
*/
@ -128,11 +131,13 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
}
ccl_gpu_kernel_postfix
#if !defined(__HIPRT__)
/* Intersection kernels need access to the kernel handler for specialization constants to work
* properly. */
#ifdef __KERNEL_ONEAPI__
# include "kernel/device/oneapi/context_intersect_begin.h"
#endif
# ifdef __KERNEL_ONEAPI__
# include "kernel/device/oneapi/context_intersect_begin.h"
# endif
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(integrator_intersect_closest,
@ -191,8 +196,10 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
}
ccl_gpu_kernel_postfix
#ifdef __KERNEL_ONEAPI__
# include "kernel/device/oneapi/context_intersect_end.h"
# ifdef __KERNEL_ONEAPI__
# include "kernel/device/oneapi/context_intersect_end.h"
# endif
#endif
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
@ -259,11 +266,13 @@ ccl_gpu_kernel_postfix
constant int __dummy_constant [[function_constant(Kernel_DummyConstant)]];
#endif
#if !defined(__HIPRT__)
/* Kernels using intersections need access to the kernel handler for specialization constants to
* work properly. */
#ifdef __KERNEL_ONEAPI__
# include "kernel/device/oneapi/context_intersect_begin.h"
#endif
# ifdef __KERNEL_ONEAPI__
# include "kernel/device/oneapi/context_intersect_begin.h"
# endif
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(integrator_shade_surface_raytrace,
@ -276,15 +285,15 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
if (ccl_gpu_kernel_within_bounds(global_index, work_size)) {
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
#if defined(__KERNEL_METAL_APPLE__) && defined(__METALRT__)
# if defined(__KERNEL_METAL_APPLE__) && defined(__METALRT__)
KernelGlobals kg = NULL;
/* Workaround Ambient Occlusion and Bevel nodes not working with Metal.
* Dummy offset should not affect result, but somehow fixes bug! */
kg += __dummy_constant;
ccl_gpu_kernel_call(integrator_shade_surface_raytrace(kg, state, render_buffer));
#else
# else
ccl_gpu_kernel_call(integrator_shade_surface_raytrace(NULL, state, render_buffer));
#endif
# endif
}
}
ccl_gpu_kernel_postfix
@ -303,8 +312,11 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
}
}
ccl_gpu_kernel_postfix
#ifdef __KERNEL_ONEAPI__
# include "kernel/device/oneapi/context_intersect_end.h"
# ifdef __KERNEL_ONEAPI__
# include "kernel/device/oneapi/context_intersect_end.h"
# endif
#endif
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)

View File

@ -20,6 +20,10 @@
#define GPU_KERNEL_BLOCK_NUM_THREADS 1024
#define GPU_KERNEL_MAX_REGISTERS 64
/* For performance tuning of hiprt kernels we might have to change the number
* that's why we don't use GPU_KERNEL_BLOCK_NUM_THREADS. */
#define GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS 1024
/* Compute number of threads per block and minimum blocks per multiprocessor
* given the maximum number of registers per thread. */
#define ccl_gpu_kernel(block_num_threads, thread_num_registers) \

View File

@ -0,0 +1,219 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#pragma once
#include "kernel/device/hiprt/common.h"
CCL_NAMESPACE_BEGIN
ccl_device_inline bool scene_intersect_valid(ccl_private const Ray *ray)
{
return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x) && len_squared(ray->D) != 0.0f;
}
ccl_device_intersect bool scene_intersect(KernelGlobals kg,
ccl_private const Ray *ray,
const uint visibility,
ccl_private Intersection *isect)
{
isect->t = ray->tmax;
isect->u = 0.0f;
isect->v = 0.0f;
isect->prim = PRIM_NONE;
isect->object = OBJECT_NONE;
isect->type = PRIMITIVE_NONE;
if (!scene_intersect_valid(ray)) {
isect->t = ray->tmax;
isect->type = PRIMITIVE_NONE;
return false;
}
hiprtRay ray_hip;
SET_HIPRT_RAY(ray_hip, ray)
RayPayload payload;
payload.self = ray->self;
payload.kg = kg;
payload.visibility = visibility;
payload.prim_type = PRIMITIVE_NONE;
payload.ray_time = ray->time;
hiprtHit hit = {};
GET_TRAVERSAL_STACK()
if (visibility & PATH_RAY_SHADOW_OPAQUE) {
GET_TRAVERSAL_ANY_HIT(table_closest_intersect, 0)
hit = traversal.getNextHit();
}
else {
GET_TRAVERSAL_CLOSEST_HIT(table_closest_intersect, 0)
hit = traversal.getNextHit();
}
if (hit.hasHit()) {
set_intersect_point(kg, hit, isect);
if (isect->type > 1) { // should be applied only for curves
isect->type = payload.prim_type;
isect->prim = hit.primID;
}
return true;
}
return false;
}
#ifdef __BVH_LOCAL__
ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
ccl_private const Ray *ray,
ccl_private LocalIntersection *local_isect,
int local_object,
ccl_private uint *lcg_state,
int max_hits)
{
if (!scene_intersect_valid(ray)) {
if (local_isect) {
local_isect->num_hits = 0;
}
return false;
}
float3 P = ray->P;
float3 dir = bvh_clamp_direction(ray->D);
float3 idir = bvh_inverse_direction(dir);
if (local_isect != NULL) {
local_isect->num_hits = 0;
}
const int object_flag = kernel_data_fetch(object_flag, local_object);
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir);
# else
bvh_instance_push(kg, local_object, ray, &P, &dir, &idir);
# endif
}
hiprtRay ray_hip;
ray_hip.origin = P;
ray_hip.direction = dir;
ray_hip.maxT = ray->tmax;
ray_hip.minT = ray->tmin;
LocalPayload payload = {0};
payload.kg = kg;
payload.self = ray->self;
payload.local_object = local_object;
payload.max_hits = max_hits;
payload.lcg_state = lcg_state;
payload.local_isect = local_isect;
GET_TRAVERSAL_STACK()
void *local_geom = (void *)(kernel_data_fetch(blas_ptr, local_object));
// we don't need custom intersection functions for SSR
# ifdef HIPRT_SHARED_STACK
hiprtGeomTraversalAnyHitCustomStack<Stack> traversal(local_geom,
ray_hip,
stack,
hiprtTraversalHintDefault,
&payload,
kernel_params.table_local_intersect,
2);
# else
hiprtGeomTraversalAnyHit traversal(
local_geom, ray_hip, table, hiprtTraversalHintDefault, &payload);
# endif
hiprtHit hit = traversal.getNextHit();
return hit.hasHit();
}
#endif //__BVH_LOCAL__
#ifdef __SHADOW_RECORD_ALL__
ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
IntegratorShadowState state,
ccl_private const Ray *ray,
uint visibility,
uint max_hits,
ccl_private uint *num_recorded_hits,
ccl_private float *throughput)
{
*throughput = 1.0f;
*num_recorded_hits = 0;
if (!scene_intersect_valid(ray)) {
return false;
}
hiprtRay ray_hip;
SET_HIPRT_RAY(ray_hip, ray)
ShadowPayload payload;
payload.kg = kg;
payload.self = ray->self;
payload.in_state = state;
payload.max_hits = max_hits;
payload.visibility = visibility;
payload.prim_type = PRIMITIVE_TRIANGLE;
payload.ray_time = ray->time;
payload.num_hits = 0;
payload.r_num_recorded_hits = num_recorded_hits;
payload.r_throughput = throughput;
GET_TRAVERSAL_STACK()
GET_TRAVERSAL_ANY_HIT(table_shadow_intersect, 1)
hiprtHit hit = traversal.getNextHit();
num_recorded_hits = payload.r_num_recorded_hits;
throughput = payload.r_throughput;
return hit.hasHit();
}
#endif /* __SHADOW_RECORD_ALL__ */
#ifdef __VOLUME__
ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
ccl_private const Ray *ray,
ccl_private Intersection *isect,
const uint visibility)
{
isect->t = ray->tmax;
isect->u = 0.0f;
isect->v = 0.0f;
isect->prim = PRIM_NONE;
isect->object = OBJECT_NONE;
isect->type = PRIMITIVE_NONE;
if (!scene_intersect_valid(ray)) {
return false;
}
hiprtRay ray_hip;
SET_HIPRT_RAY(ray_hip, ray)
RayPayload payload;
payload.self = ray->self;
payload.kg = kg;
payload.visibility = visibility;
payload.prim_type = PRIMITIVE_NONE;
payload.ray_time = ray->time;
GET_TRAVERSAL_STACK()
GET_TRAVERSAL_CLOSEST_HIT(table_volume_intersect, 3)
hiprtHit hit = traversal.getNextHit();
// return hit.hasHit();
if (hit.hasHit()) {
set_intersect_point(kg, hit, isect);
if (isect->type > 1) { // should be applied only for curves
isect->type = payload.prim_type;
isect->prim = hit.primID;
}
return true;
}
else
return false;
}
#endif /* __VOLUME__ */
CCL_NAMESPACE_END

View File

@ -0,0 +1,637 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#ifdef __HIPRT__
struct RayPayload {
KernelGlobals kg;
RaySelfPrimitives self;
uint visibility;
int prim_type;
float ray_time;
};
struct ShadowPayload {
KernelGlobals kg;
RaySelfPrimitives self;
uint visibility;
int prim_type;
float ray_time;
int in_state;
uint max_hits;
uint num_hits;
uint *r_num_recorded_hits;
float *r_throughput;
};
struct LocalPayload {
KernelGlobals kg;
RaySelfPrimitives self;
int prim_type;
float ray_time;
int local_object;
uint max_hits;
uint *lcg_state;
LocalIntersection *local_isect;
};
# define SET_HIPRT_RAY(RAY_RT, RAY) \
RAY_RT.direction = RAY->D; \
RAY_RT.origin = RAY->P; \
RAY_RT.maxT = RAY->tmax; \
RAY_RT.minT = RAY->tmin;
# if defined(HIPRT_SHARED_STACK)
# define GET_TRAVERSAL_STACK() \
Stack stack(&kg->global_stack_buffer[0], \
HIPRT_THREAD_STACK_SIZE, \
kg->shared_stack, \
HIPRT_SHARED_STACK_SIZE);
# else
# define GET_TRAVERSAL_STACK()
# endif
# ifdef HIPRT_SHARED_STACK
# define GET_TRAVERSAL_ANY_HIT(FUNCTION_TABLE, RAY_TYPE) \
hiprtSceneTraversalAnyHitCustomStack<Stack> traversal(kernel_data.device_bvh, \
ray_hip, \
stack, \
visibility, \
hiprtTraversalHintDefault, \
&payload, \
kernel_params.FUNCTION_TABLE, \
RAY_TYPE); \
hiprtSceneTraversalAnyHitCustomStack<Stack> traversal_simple( \
kernel_data.device_bvh, ray_hip, stack, visibility);
# define GET_TRAVERSAL_CLOSEST_HIT(FUNCTION_TABLE, RAY_TYPE) \
hiprtSceneTraversalClosestCustomStack<Stack> traversal(kernel_data.device_bvh, \
ray_hip, \
stack, \
visibility, \
hiprtTraversalHintDefault, \
&payload, \
kernel_params.FUNCTION_TABLE, \
RAY_TYPE); \
hiprtSceneTraversalClosestCustomStack<Stack> traversal_simple( \
kernel_data.device_bvh, ray_hip, stack, visibility);
# else
# define GET_TRAVERSAL_ANY_HIT(FUNCTION_TABLE) \
hiprtSceneTraversalAnyHit traversal(kernel_data.device_bvh, \
ray_hip, \
visibility, \
FUNCTION_TABLE, \
hiprtTraversalHintDefault, \
&payload); \
hiprtSceneTraversalAnyHit traversal_simple(kernel_data.device_bvh, ray_hip, visibility);
# define GET_TRAVERSAL_CLOSEST_HIT(FUNCTION_TABLE) \
hiprtSceneTraversalClosest traversal(kernel_data.device_bvh, \
ray_hip, \
visibility, \
FUNCTION_TABLE, \
hiprtTraversalHintDefault, \
&payload); \
hiprtSceneTraversalClosest traversal_simple(kernel_data.device_bvh, ray_hip, visibility);
# endif
ccl_device_inline void set_intersect_point(KernelGlobals kg,
hiprtHit &hit,
ccl_private Intersection *isect)
{
int prim_offset = 0;
int object_id = kernel_data_fetch(user_instance_id, hit.instanceID);
prim_offset = kernel_data_fetch(object_prim_offset, object_id);
isect->type = kernel_data_fetch(objects, object_id).primitive_type;
isect->t = hit.t;
isect->prim = hit.primID + prim_offset;
isect->object = object_id;
isect->u = hit.uv.x;
isect->v = hit.uv.y;
}
// custom intersection functions
ccl_device_inline bool curve_custom_intersect(const hiprtRay &ray,
const void *userPtr,
void *payload,
hiprtHit &hit)
{
Intersection isect;
RayPayload *local_payload = (RayPayload *)payload;
// could also cast shadow payload to get the elements needed to do the intersection
// no need to write a separate function for shadow intersection
KernelGlobals kg = local_payload->kg;
int object_id = kernel_data_fetch(user_instance_id, hit.instanceID);
int2 data_offset = kernel_data_fetch(custom_prim_info_offset, object_id);
// data_offset.x: where the data (prim id, type )for the geometry of the current object begins
// the prim_id that is in hiprtHit hit is local to the partciular geometry so we add the above
// ofstream
// to map prim id in hiprtHit to the one compatible to what next stage expects
// data_offset.y: the offset that has to be added to a local primitive to get the global
// primitive id = kernel_data_fetch(object_prim_offset, object_id);
int prim_offset = data_offset.y;
int curve_index = kernel_data_fetch(custom_prim_info, hit.primID + data_offset.x).x;
int key_value = kernel_data_fetch(custom_prim_info, hit.primID + data_offset.x).y;
if (intersection_skip_self_shadow(local_payload->self, object_id, curve_index + prim_offset))
return false;
float ray_time = local_payload->ray_time;
if ((key_value & PRIMITIVE_MOTION) && kernel_data.bvh.use_bvh_steps) {
int time_offset = kernel_data_fetch(prim_time_offset, object_id);
float2 prims_time = kernel_data_fetch(prims_time, hit.primID + time_offset);
if (ray_time < prims_time.x || ray_time > prims_time.y) {
return false;
}
}
bool b_hit = curve_intersect(kg,
&isect,
ray.origin,
ray.direction,
ray.minT,
ray.maxT,
object_id,
curve_index + prim_offset,
ray_time,
key_value);
if (b_hit) {
hit.uv.x = isect.u;
hit.uv.y = isect.v;
hit.t = isect.t;
hit.primID = isect.prim;
local_payload->prim_type = isect.type; // packed_curve_type;
}
return b_hit;
}
ccl_device_inline bool motion_triangle_custom_intersect(const hiprtRay &ray,
const void *userPtr,
void *payload,
hiprtHit &hit)
{
# ifdef MOTION_BLUR
RayPayload *local_payload = (RayPayload *)payload;
KernelGlobals kg = local_payload->kg;
int object_id = kernel_data_fetch(user_instance_id, hit.instanceID);
int2 data_offset = kernel_data_fetch(custom_prim_info_offset, object_id);
int prim_offset = kernel_data_fetch(object_prim_offset, object_id);
int prim_id_local = kernel_data_fetch(custom_prim_info, hit.primID + data_offset.x).x;
int prim_id_global = prim_id_local + prim_offset;
if (intersection_skip_self_shadow(local_payload->self, object_id, prim_id_global))
return false;
Intersection isect;
bool b_hit = motion_triangle_intersect(kg,
&isect,
ray.origin,
ray.direction,
ray.minT,
ray.maxT,
local_payload->ray_time,
local_payload->visibility,
object_id,
prim_id_global,
prim_id_local);
if (b_hit) {
hit.uv.x = isect.u;
hit.uv.y = isect.v;
hit.t = isect.t;
hit.primID = isect.prim;
local_payload->prim_type = isect.type;
}
return b_hit;
# else
return false;
# endif
}
ccl_device_inline bool motion_triangle_custom_local_intersect(const hiprtRay &ray,
const void *userPtr,
void *payload,
hiprtHit &hit)
{
# ifdef MOTION_BLUR
LocalPayload *local_payload = (LocalPayload *)payload;
KernelGlobals kg = local_payload->kg;
int object_id = local_payload->local_object;
int prim_offset = kernel_data_fetch(object_prim_offset, object_id);
int2 data_offset = kernel_data_fetch(custom_prim_info_offset, object_id);
int prim_id_local = kernel_data_fetch(custom_prim_info, hit.primID + data_offset.x).x;
int prim_id_global = prim_id_local + prim_offset;
if (intersection_skip_self_local(local_payload->self, prim_id_global))
return false;
LocalIntersection *local_isect = local_payload->local_isect;
bool b_hit = motion_triangle_intersect_local(kg,
local_isect,
ray.origin,
ray.direction,
local_payload->ray_time,
object_id,
prim_id_global,
prim_id_local,
ray.minT,
ray.maxT,
local_payload->lcg_state,
local_payload->max_hits);
if (b_hit) {
local_payload->prim_type = PRIMITIVE_MOTION_TRIANGLE;
}
return b_hit;
# else
return false;
# endif
}
ccl_device_inline bool motion_triangle_custom_volume_intersect(const hiprtRay &ray,
const void *userPtr,
void *payload,
hiprtHit &hit)
{
# ifdef MOTION_BLUR
RayPayload *local_payload = (RayPayload *)payload;
KernelGlobals kg = local_payload->kg;
int object_id = kernel_data_fetch(user_instance_id, hit.instanceID);
int object_flag = kernel_data_fetch(object_flag, object_id);
if (!(object_flag & SD_OBJECT_HAS_VOLUME))
return false;
int2 data_offset = kernel_data_fetch(custom_prim_info_offset, object_id);
int prim_offset = kernel_data_fetch(object_prim_offset, object_id);
int prim_id_local = kernel_data_fetch(custom_prim_info, hit.primID + data_offset.x).x;
int prim_id_global = prim_id_local + prim_offset;
if (intersection_skip_self_shadow(local_payload->self, object_id, prim_id_global))
return false;
Intersection isect;
bool b_hit = motion_triangle_intersect(kg,
&isect,
ray.origin,
ray.direction,
ray.minT,
ray.maxT,
local_payload->ray_time,
local_payload->visibility,
object_id,
prim_id_global,
prim_id_local);
if (b_hit) {
hit.uv.x = isect.u;
hit.uv.y = isect.v;
hit.t = isect.t;
hit.primID = isect.prim;
local_payload->prim_type = isect.type;
}
return b_hit;
# else
return false;
# endif
}
ccl_device_inline bool point_custom_intersect(const hiprtRay &ray,
const void *userPtr,
void *payload,
hiprtHit &hit)
{
# ifdef POINT_CLOUD
RayPayload *local_payload = (RayPayload *)payload;
KernelGlobals kg = local_payload->kg;
int object_id = kernel_data_fetch(user_instance_id, hit.instanceID);
int2 data_offset = kernel_data_fetch(custom_prim_info_offset, object_id);
int prim_offset = kernel_data_fetch(object_prim_offset, object_id);
int2 prim_info = kernel_data_fetch(custom_prim_info, hit.primID + data_offset.x);
int prim_id_local = prim_info.x;
int prim_id_global = prim_id_local + prim_offset;
int type = prim_info.y;
if (intersection_skip_self_shadow(local_payload->self, object_id, prim_id_global))
return false;
float ray_time = local_payload->ray_time;
if ((type & PRIMITIVE_MOTION) && kernel_data.bvh.use_bvh_steps) {
int time_offset = kernel_data_fetch(prim_time_offset, object_id);
float2 prims_time = kernel_data_fetch(prims_time, hit.primID + time_offset);
if (ray_time < prims_time.x || ray_time > prims_time.y) {
return false;
}
}
Intersection isect;
bool b_hit = point_intersect(kg,
&isect,
ray.origin,
ray.direction,
ray.minT,
ray.maxT,
object_id,
prim_id_global,
ray_time,
type);
if (b_hit) {
hit.uv.x = isect.u;
hit.uv.y = isect.v;
hit.t = isect.t;
hit.primID = isect.prim;
local_payload->prim_type = isect.type;
}
return b_hit;
# else
return false;
# endif
}
// intersection filters
ccl_device_inline bool closest_intersection_filter(const hiprtRay &ray,
const void *data,
void *user_data,
const hiprtHit &hit)
{
RayPayload *payload = (RayPayload *)user_data;
int object_id = kernel_data_fetch(user_instance_id, hit.instanceID);
int prim_offset = kernel_data_fetch(object_prim_offset, object_id);
int prim = hit.primID + prim_offset;
if (intersection_skip_self_shadow(payload->self, object_id, prim))
return true;
else
return false;
}
ccl_device_inline bool shadow_intersection_filter(const hiprtRay &ray,
const void *data,
void *user_data,
const hiprtHit &hit)
{
ShadowPayload *payload = (ShadowPayload *)user_data;
uint num_hits = payload->num_hits;
uint num_recorded_hits = *(payload->r_num_recorded_hits);
uint max_hits = payload->max_hits;
int state = payload->in_state;
KernelGlobals kg = payload->kg;
RaySelfPrimitives self = payload->self;
int object = kernel_data_fetch(user_instance_id, hit.instanceID);
int prim_offset = kernel_data_fetch(object_prim_offset, object);
int prim = hit.primID + prim_offset;
float ray_tmax = hit.t;
# ifdef __VISIBILITY_FLAG__
if ((kernel_data_fetch(objects, object).visibility & payload->visibility) == 0) {
return true; // no hit - continue traversal
}
# endif
if (intersection_skip_self_shadow(self, object, prim)) {
return true; // no hit -continue traversal
}
float u = hit.uv.x;
float v = hit.uv.y;
int type = kernel_data_fetch(objects, object).primitive_type;
# ifdef __HAIR__
if (type & (PRIMITIVE_CURVE_THICK | PRIMITIVE_CURVE_RIBBON)) {
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
type = segment.type;
prim = segment.prim;
}
# endif
# ifndef __TRANSPARENT_SHADOWS__
return false;
# else
if (num_hits >= max_hits ||
!(intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) {
return false;
}
if (type & PRIMITIVE_CURVE) {
float throughput = *payload->r_throughput;
throughput *= intersection_curve_shadow_transparency(kg, object, prim, type, u);
*payload->r_throughput = throughput;
payload->num_hits += 1;
if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
return false;
}
else {
return true;
}
}
uint record_index = num_recorded_hits;
num_hits += 1;
num_recorded_hits += 1;
payload->num_hits = num_hits;
*(payload->r_num_recorded_hits) = num_recorded_hits;
const uint max_record_hits = min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
if (record_index >= max_record_hits) {
float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t);
uint max_recorded_hit = 0;
for (int i = 1; i < max_record_hits; i++) {
const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t);
if (isect_t > max_recorded_t) {
max_recorded_t = isect_t;
max_recorded_hit = i;
}
}
if (ray_tmax >= max_recorded_t) {
return true;
}
record_index = max_recorded_hit;
}
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, u) = u;
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, v) = v;
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, t) = ray_tmax;
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim;
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object;
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type;
return true;
# endif /* __TRANSPARENT_SHADOWS__ */
}
ccl_device_inline bool local_intersection_filter(const hiprtRay &ray,
const void *data,
void *user_data,
const hiprtHit &hit)
{
# ifdef __BVH_LOCAL__
LocalPayload *payload = (LocalPayload *)user_data;
KernelGlobals kg = payload->kg;
int object_id = payload->local_object;
int prim_offset = kernel_data_fetch(object_prim_offset, object_id);
int prim = hit.primID + prim_offset;
# ifndef __RAY_OFFSET__
if (intersection_skip_self_local(payload->self, prim)) {
return true; // continue search
}
# endif
uint max_hits = payload->max_hits;
if (max_hits == 0) {
return false; // stop search
}
int hit_index = 0;
if (payload->lcg_state) {
for (int i = min(max_hits, payload->local_isect->num_hits) - 1; i >= 0; --i) {
if (hit.t == payload->local_isect->hits[i].t) {
return true; // continue search
}
}
hit_index = payload->local_isect->num_hits++;
if (payload->local_isect->num_hits > max_hits) {
hit_index = lcg_step_uint(payload->lcg_state) % payload->local_isect->num_hits;
if (hit_index >= max_hits) {
return true; // continue search
}
}
}
else {
if (payload->local_isect->num_hits && hit.t > payload->local_isect->hits[0].t) {
return true;
}
payload->local_isect->num_hits = 1;
}
Intersection *isect = &payload->local_isect->hits[hit_index];
isect->t = hit.t;
isect->prim = prim;
isect->object = object_id;
isect->type = PRIMITIVE_TRIANGLE; // kernel_data_fetch(__objects, object_id).primitive_type;
isect->u = hit.uv.x;
isect->v = hit.uv.y;
payload->local_isect->Ng[hit_index] = hit.normal;
return true;
# endif
}
ccl_device_inline bool volume_intersection_filter(const hiprtRay &ray,
const void *data,
void *user_data,
const hiprtHit &hit)
{
RayPayload *payload = (RayPayload *)user_data;
int object_id = kernel_data_fetch(user_instance_id, hit.instanceID);
int prim_offset = kernel_data_fetch(object_prim_offset, object_id);
int prim = hit.primID + prim_offset;
int object_flag = kernel_data_fetch(object_flag, object_id);
if (intersection_skip_self(payload->self, object_id, prim))
return true;
else if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0)
return true;
else
return false;
}
HIPRT_DEVICE bool intersectFunc(u32 geomType,
u32 rayType,
const hiprtFuncTableHeader &tableHeader,
const hiprtRay &ray,
void *payload,
hiprtHit &hit)
{
const u32 index = tableHeader.numGeomTypes * rayType + geomType;
const void *data = tableHeader.funcDataSets[index].filterFuncData;
switch (index) {
case Curve_Intersect_Function:
case Curve_Intersect_Shadow:
return curve_custom_intersect(ray, data, payload, hit);
case Motion_Triangle_Intersect_Function:
case Motion_Triangle_Intersect_Shadow:
return motion_triangle_custom_intersect(ray, data, payload, hit);
case Motion_Triangle_Intersect_Local:
return motion_triangle_custom_local_intersect(ray, data, payload, hit);
case Motion_Triangle_Intersect_Volume:
return motion_triangle_custom_volume_intersect(ray, data, payload, hit);
case Point_Intersect_Function:
case Point_Intersect_Shadow:
return point_custom_intersect(ray, data, payload, hit);
default:
break;
}
return false;
}
HIPRT_DEVICE bool filterFunc(u32 geomType,
u32 rayType,
const hiprtFuncTableHeader &tableHeader,
const hiprtRay &ray,
void *payload,
const hiprtHit &hit)
{
const u32 index = tableHeader.numGeomTypes * rayType + geomType;
const void *data = tableHeader.funcDataSets[index].intersectFuncData;
switch (index) {
case Triangle_Filter_Closest:
return closest_intersection_filter(ray, data, payload, hit);
case Triangle_Filter_Shadow:
case Curve_Filter_Shadow:
case Motion_Triangle_Filter_Shadow:
case Point_Filter_Shadow:
return shadow_intersection_filter(ray, data, payload, hit);
case Triangle_Filter_Local:
case Motion_Triangle_Filter_Local:
return local_intersection_filter(ray, data, payload, hit);
case Triangle_Filter_Volume:
case Motion_Triangle_Filter_Volume:
return volume_intersection_filter(ray, data, payload, hit);
default:
break;
}
return false;
}
#endif

View File

@ -0,0 +1,158 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#pragma once
#include "kernel/types.h"
#include "kernel/integrator/state.h"
#include "kernel/util/profiling.h"
#define HIPRT_SHARED_STACK
/* The size of global stack available to each thread (memory reserved for each thread in
* global_stack_buffer). */
#define HIPRT_THREAD_STACK_SIZE 64
/* LDS (Local Data Storage) allocation for each thread, the number is obtained empirically. */
#define HIPRT_SHARED_STACK_SIZE 24
/* HIPRT_THREAD_GROUP_SIZE is the number of threads per work group for intersection kernels
* The default number of threads per work-group is 1024, however, since HIP RT intersection kernels
* use local memory, and the local memory size in those kernels scales up with the number of
* threads, the number of threads to is scaled down to 256 to avoid going over maximum local memory
* and to strike a balance between memory access and the number of waves.
*
* Total local stack size would be number of threads * HIPRT_SHARED_STACK_SIZE. */
#define HIPRT_THREAD_GROUP_SIZE 256
CCL_NAMESPACE_BEGIN
struct KernelGlobalsGPU {
int *global_stack_buffer;
#ifdef HIPRT_SHARED_STACK
int *shared_stack;
#endif
};
typedef ccl_global KernelGlobalsGPU *ccl_restrict KernelGlobals;
#if defined(HIPRT_SHARED_STACK)
/* This macro allocates shared memory and to pass the shared memory down to intersection functions
* KernelGlobals is used. */
# define HIPRT_INIT_KERNEL_GLOBAL() \
ccl_gpu_shared int shared_stack[HIPRT_SHARED_STACK_SIZE * HIPRT_THREAD_GROUP_SIZE]; \
ccl_global KernelGlobalsGPU kg_gpu; \
KernelGlobals kg = &kg_gpu; \
kg->shared_stack = &shared_stack[0]; \
kg->global_stack_buffer = stack_buffer;
#else
# define HIPRT_INIT_KERNEL_GLOBAL() \
KernelGlobals kg = NULL; \
kg->global_stack_buffer = stack_buffer;
#endif
struct KernelParamsHIPRT {
KernelData data;
#define KERNEL_DATA_ARRAY(type, name) const type *name;
KERNEL_DATA_ARRAY(int, user_instance_id)
KERNEL_DATA_ARRAY(uint64_t, blas_ptr)
KERNEL_DATA_ARRAY(int2, custom_prim_info)
KERNEL_DATA_ARRAY(int2, custom_prim_info_offset)
KERNEL_DATA_ARRAY(float2, prims_time)
KERNEL_DATA_ARRAY(int, prim_time_offset)
#include "kernel/data_arrays.h"
/* Integrator state */
IntegratorStateGPU integrator_state;
hiprtFuncTable table_closest_intersect;
hiprtFuncTable table_shadow_intersect;
hiprtFuncTable table_local_intersect;
hiprtFuncTable table_volume_intersect;
};
/* Intersection_Function_Table_Index defines index values to retrieve custom intersection
* functions from function table. */
enum Intersection_Function_Table_Index {
// Triangles use the intersection function provided by HIP RT and don't need custom intersection
// functions
// Custom intersection functions for closest intersect.
Curve_Intersect_Function = 1, // Custom intersection for curves
Motion_Triangle_Intersect_Function, // Custom intersection for triangles with vertex motion blur
// attributes.
Point_Intersect_Function, // Custom intersection for point cloud.
// Custom intersection functions for shadow rendering are the same as the function for closest
// intersect.
// However, the table indices are different
Triangle_Intersect_Shadow_None,
Curve_Intersect_Shadow,
Motion_Triangle_Intersect_Shadow,
Point_Intersect_Shadow,
// Custom intersection functions for subsurface scattering.
// Only motion triangles have valid custom intersection function
Triangle_Intersect_Local_None,
Curve_Intersect_Local_None,
Motion_Triangle_Intersect_Local,
Point_Intersect_Local_None,
// Custom intersection functions for volume rendering.
// Only motion triangles have valid custom intersection function
Triangle_Intersect_Volume_None,
Curve_Intersect_Volume_None,
Motion_Triangle_Intersect_Volume,
Point_Intersect_Volume_None,
};
// Filter functions, filter hits, i.e. test whether a hit should be accepted or not, and whether
// traversal should stop or continue.
enum Filter_Function_Table_Index {
Triangle_Filter_Closest = 0, // Filter function for triangles for closest intersect, no custom
// intersection function is needed.
Curve_Filter_Opaque_None, // No filter function is needed and everything is handled in the
// intersection function.
Motion_Triangle_Filter_Opaque_None, // No filter function is needed and everything is handled in
// intersection function.
Point_Filter_Opaque_Non, // No filter function is needed.
// Filter function for all primitives for shadow intersection.
// All primitives use the same function but each has a different index in the table.
Triangle_Filter_Shadow,
Curve_Filter_Shadow,
Motion_Triangle_Filter_Shadow,
Point_Filter_Shadow,
// Filter functions for subsurface scattering. Triangles and motion triangles need function
// assignment. They indices for triangles and motion triangles point to the same function. Points
// and curves dont need any function since subsurface scattering is not applied on either.
Triangle_Filter_Local, // Filter functions for triangles
Curve_Filter_Local_None, // Subsurface scattering is not applied on curves, no filter function
// is
// needed.
Motion_Triangle_Filter_Local,
Point_Filter_Local_None,
// Filter functions for volume rendering.
// Volume rendering only applies to triangles and motion triangles.
// Triangles and motion triangles use the same filter functions for volume rendering
Triangle_Filter_Volume,
Curve_Filter_Volume_None,
Motion_Triangle_Filter_Volume,
Point_Filter_Volume_None,
};
#ifdef __KERNEL_GPU__
__constant__ KernelParamsHIPRT kernel_params;
# ifdef HIPRT_SHARED_STACK
typedef hiprtGlobalStack Stack;
# endif
#endif
/* Abstraction macros */
#define kernel_data kernel_params.data
#define kernel_data_fetch(name, index) kernel_params.name[(index)]
#define kernel_data_array(name) (kernel_params.name)
#define kernel_integrator_state kernel_params.integrator_state
CCL_NAMESPACE_END

View File

@ -0,0 +1,101 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#ifdef __HIPRT__
ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_gpu_kernel_signature(integrator_intersect_closest,
ccl_global const int *path_index_array,
ccl_global float *render_buffer,
const int work_size,
ccl_global int *stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
if (global_index < work_size) {
HIPRT_INIT_KERNEL_GLOBAL()
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
ccl_gpu_kernel_call(integrator_intersect_closest(kg, state, render_buffer));
}
}
ccl_gpu_kernel_postfix
ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_gpu_kernel_signature(integrator_intersect_shadow,
ccl_global const int *path_index_array,
const int work_size,
ccl_global int *stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
if (global_index < work_size) {
HIPRT_INIT_KERNEL_GLOBAL()
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
ccl_gpu_kernel_call(integrator_intersect_shadow(kg, state));
}
}
ccl_gpu_kernel_postfix
ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_gpu_kernel_signature(integrator_intersect_subsurface,
ccl_global const int *path_index_array,
const int work_size,
ccl_global int *stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
if (global_index < work_size) {
HIPRT_INIT_KERNEL_GLOBAL()
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
ccl_gpu_kernel_call(integrator_intersect_subsurface(kg, state));
}
}
ccl_gpu_kernel_postfix
ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_gpu_kernel_signature(integrator_intersect_volume_stack,
ccl_global const int *path_index_array,
const int work_size,
ccl_global int *stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
if (global_index < work_size) {
HIPRT_INIT_KERNEL_GLOBAL()
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
ccl_gpu_kernel_call(integrator_intersect_volume_stack(kg, state));
}
}
ccl_gpu_kernel_postfix
ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_gpu_kernel_signature(integrator_shade_surface_raytrace,
ccl_global const int *path_index_array,
ccl_global float *render_buffer,
const int work_size,
ccl_global int *stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
if (global_index < work_size) {
HIPRT_INIT_KERNEL_GLOBAL()
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
ccl_gpu_kernel_call(integrator_shade_surface_raytrace(kg, state, render_buffer));
}
}
ccl_gpu_kernel_postfix
ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_gpu_kernel_signature(integrator_shade_surface_mnee,
ccl_global const int *path_index_array,
ccl_global float *render_buffer,
const int work_size,
ccl_global int *stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
if (global_index < work_size) {
HIPRT_INIT_KERNEL_GLOBAL()
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
ccl_gpu_kernel_call(integrator_shade_surface_mnee(kg, state, render_buffer));
}
}
ccl_gpu_kernel_postfix
#endif /* __HIPRT__ */

View File

@ -0,0 +1,16 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2023 Blender Foundation */
#ifdef __HIP_DEVICE_COMPILE__
# include "kernel/device/hip/compat.h"
# include "kernel/device/hip/config.h"
# include <hiprt/hiprt_device.h>
# include "kernel/device/hiprt/globals.h"
# include "kernel/device/gpu/image.h"
# include "kernel/device/gpu/kernel.h"
#endif

View File

@ -1169,10 +1169,14 @@ typedef enum KernelBVHLayout {
BVH_LAYOUT_METAL = (1 << 5),
BVH_LAYOUT_MULTI_METAL = (1 << 6),
BVH_LAYOUT_MULTI_METAL_EMBREE = (1 << 7),
BVH_LAYOUT_HIPRT = (1 << 8),
BVH_LAYOUT_MULTI_HIPRT = (1 << 9),
BVH_LAYOUT_MULTI_HIPRT_EMBREE = (1 << 10),
/* Default BVH layout to use for CPU. */
BVH_LAYOUT_AUTO = BVH_LAYOUT_EMBREE,
BVH_LAYOUT_ALL = BVH_LAYOUT_BVH2 | BVH_LAYOUT_EMBREE | BVH_LAYOUT_OPTIX | BVH_LAYOUT_METAL,
BVH_LAYOUT_ALL = BVH_LAYOUT_BVH2 | BVH_LAYOUT_EMBREE | BVH_LAYOUT_OPTIX | BVH_LAYOUT_METAL |
BVH_LAYOUT_HIPRT | BVH_LAYOUT_MULTI_HIPRT | BVH_LAYOUT_MULTI_HIPRT_EMBREE,
} KernelBVHLayout;
/* Specialized struct that can become constants in dynamic compilation. */
@ -1225,6 +1229,8 @@ typedef struct KernelData {
OptixTraversableHandle device_bvh;
#elif defined __METALRT__
metalrt_as_type device_bvh;
#elif defined(__HIPRT__)
void *device_bvh;
#else
# ifdef __EMBREE__
RTCScene device_bvh;

View File

@ -114,7 +114,9 @@ bool Geometry::need_build_bvh(BVHLayout layout) const
{
return is_instanced() || layout == BVH_LAYOUT_OPTIX || layout == BVH_LAYOUT_MULTI_OPTIX ||
layout == BVH_LAYOUT_METAL || layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE ||
layout == BVH_LAYOUT_MULTI_METAL || layout == BVH_LAYOUT_MULTI_METAL_EMBREE;
layout == BVH_LAYOUT_MULTI_METAL || layout == BVH_LAYOUT_MULTI_METAL_EMBREE ||
layout == BVH_LAYOUT_HIPRT || layout == BVH_LAYOUT_MULTI_HIPRT ||
layout == BVH_LAYOUT_MULTI_HIPRT_EMBREE;
}
bool Geometry::is_instanced() const

View File

@ -597,7 +597,8 @@ void ObjectManager::device_update_prim_offsets(Device *device, DeviceScene *dsce
if (!scene->integrator->get_use_light_tree()) {
BVHLayoutMask layout_mask = device->get_bvh_layout_mask(dscene->data.kernel_features);
if (layout_mask != BVH_LAYOUT_METAL && layout_mask != BVH_LAYOUT_MULTI_METAL &&
layout_mask != BVH_LAYOUT_MULTI_METAL_EMBREE) {
layout_mask != BVH_LAYOUT_MULTI_METAL_EMBREE && layout_mask != BVH_LAYOUT_HIPRT &&
layout_mask != BVH_LAYOUT_MULTI_HIPRT && layout_mask != BVH_LAYOUT_MULTI_HIPRT_EMBREE) {
return;
}
}

View File

@ -10,6 +10,7 @@
#include "scene/bake.h"
#include "scene/camera.h"
#include "scene/curves.h"
#include "scene/devicescene.h"
#include "scene/film.h"
#include "scene/integrator.h"
#include "scene/light.h"
@ -24,7 +25,6 @@
#include "scene/svm.h"
#include "scene/tables.h"
#include "scene/volume.h"
#include "scene/devicescene.h"
#include "session/session.h"
#include "util/foreach.h"
@ -34,8 +34,6 @@
CCL_NAMESPACE_BEGIN
Scene::Scene(const SceneParams &params_, Device *device)
: name("Scene"),
bvh(NULL),

View File

@ -595,7 +595,7 @@ ccl_device_inline float4 ensure_finite(float4 v)
ccl_device_inline float4 pow(float4 v, float e)
{
return make_float4(powf(v.x, e), powf(v.y, e), powf(v.z, e), powf(v.z, e));
return make_float4(powf(v.x, e), powf(v.y, e), powf(v.z, e), powf(v.w, e));
}
CCL_NAMESPACE_END

View File

@ -1,3 +1,5 @@
#!/bin/sh
# Disable ALSA and OSS as they are not available, and trying to initialize them
# breaks sound in other apps. Use PulseAudio instead.
export ALSOFT_DRIVERS=-oss,-alsa,

View File

@ -6288,8 +6288,7 @@ def km_view3d_rotate_modal(_params):
)
items.extend([
("CONFIRM", {"type": 'MIDDLEMOUSE', "value": 'RELEASE', "any": True}, None),
("CONFIRM", {"type": 'ESC', "value": 'PRESS', "any": True}, None),
("CANCEL", {"type": 'RIGHTMOUSE', "value": 'PRESS', "any": True}, None),
("AXIS_SNAP_ENABLE", {"type": 'LEFT_ALT', "value": 'PRESS', "any": True}, None),
("AXIS_SNAP_DISABLE", {"type": 'LEFT_ALT', "value": 'RELEASE', "any": True}, None),
("AXIS_SNAP_ENABLE", {"type": 'RIGHT_ALT', "value": 'PRESS', "any": True}, None),
@ -6308,8 +6307,7 @@ def km_view3d_move_modal(_params):
)
items.extend([
("CONFIRM", {"type": 'MIDDLEMOUSE', "value": 'RELEASE', "any": True}, None),
("CONFIRM", {"type": 'ESC', "value": 'PRESS', "any": True}, None),
("CANCEL", {"type": 'RIGHTMOUSE', "value": 'PRESS', "any": True}, None),
])
return keymap
@ -6324,8 +6322,7 @@ def km_view3d_zoom_modal(_params):
)
items.extend([
("CONFIRM", {"type": 'MIDDLEMOUSE', "value": 'RELEASE', "any": True}, None),
("CONFIRM", {"type": 'ESC', "value": 'PRESS', "any": True}, None),
("CANCEL", {"type": 'RIGHTMOUSE', "value": 'PRESS', "any": True}, None),
])
return keymap
@ -6340,8 +6337,7 @@ def km_view3d_dolly_modal(_params):
)
items.extend([
("CONFIRM", {"type": 'MIDDLEMOUSE', "value": 'RELEASE', "any": True}, None),
("CONFIRM", {"type": 'ESC', "value": 'PRESS', "any": True}, None),
("CANCEL", {"type": 'RIGHTMOUSE', "value": 'PRESS', "any": True}, None),
])
return keymap

View File

@ -1491,6 +1491,7 @@ class USERPREF_PT_file_paths_asset_libraries(FilePathsPanel, Panel):
active_library = paths.asset_libraries[active_library_index]
layout.prop(active_library, "path")
layout.prop(active_library, "import_method", text="Import Method")
layout.prop(active_library, "use_relative_path")
class USERPREF_UL_asset_libraries(bpy.types.UIList):

View File

@ -120,6 +120,7 @@ set(SRC_DNA_DEFAULTS_INC
${CMAKE_CURRENT_SOURCE_DIR}/makesdna/DNA_space_defaults.h
${CMAKE_CURRENT_SOURCE_DIR}/makesdna/DNA_speaker_defaults.h
${CMAKE_CURRENT_SOURCE_DIR}/makesdna/DNA_texture_defaults.h
${CMAKE_CURRENT_SOURCE_DIR}/makesdna/DNA_userdef_defaults.h
${CMAKE_CURRENT_SOURCE_DIR}/makesdna/DNA_vec_defaults.h
${CMAKE_CURRENT_SOURCE_DIR}/makesdna/DNA_view3d_defaults.h
${CMAKE_CURRENT_SOURCE_DIR}/makesdna/DNA_volume_defaults.h

View File

@ -67,6 +67,8 @@ class AssetLibrary {
* #import_method_ above, it's just a default. */
bool may_override_import_method_ = false;
bool use_relative_path_ = true;
bCallbackFuncStore on_save_callback_store_{};
public:

View File

@ -88,6 +88,7 @@ class AssetRepresentation {
* #get_import_method(). Also returns true if there is no predefined import method
* (when #get_import_method() returns no value). */
bool may_override_import_method() const;
bool get_use_relative_path() const;
/** If this asset is stored inside this current file (#is_local_id() is true), this returns the
* ID's pointer, otherwise null. */
ID *local_id() const;
@ -109,3 +110,4 @@ std::string AS_asset_representation_full_library_path_get(const ::AssetRepresent
std::optional<eAssetImportMethod> AS_asset_representation_import_method_get(
const ::AssetRepresentation *asset_handle);
bool AS_asset_representation_may_override_import_method(const ::AssetRepresentation *asset_handle);
bool AS_asset_representation_use_relative_path_get(const ::AssetRepresentation *asset_handle);

View File

@ -101,6 +101,7 @@ AssetLibrary *AssetLibraryService::get_asset_library(
AssetLibrary *library = get_asset_library_on_disk_custom(custom_library->name, root_path);
library->import_method_ = eAssetImportMethod(custom_library->import_method);
library->may_override_import_method_ = true;
library->use_relative_path_ = (custom_library->flag & ASSET_LIBRARY_RELATIVE_PATH) != 0;
return library;
}

View File

@ -106,6 +106,14 @@ bool AssetRepresentation::may_override_import_method() const
return owner_asset_library_->may_override_import_method_;
}
bool AssetRepresentation::get_use_relative_path() const
{
if (!owner_asset_library_) {
return false;
}
return owner_asset_library_->use_relative_path_;
}
ID *AssetRepresentation::local_id() const
{
return is_local_id_ ? local_asset_id_ : nullptr;
@ -155,6 +163,13 @@ bool AS_asset_representation_may_override_import_method(const AssetRepresentatio
return asset->may_override_import_method();
}
bool AS_asset_representation_use_relative_path_get(const AssetRepresentation *asset_handle)
{
const asset_system::AssetRepresentation *asset =
reinterpret_cast<const asset_system::AssetRepresentation *>(asset_handle);
return asset->get_use_relative_path();
}
/* ---------------------------------------------------------------------- */
/** \name C-API
* \{ */

View File

@ -25,7 +25,7 @@ extern "C" {
/* Blender file format version. */
#define BLENDER_FILE_VERSION BLENDER_VERSION
#define BLENDER_FILE_SUBVERSION 5
#define BLENDER_FILE_SUBVERSION 6
/* Minimum Blender version that supports reading file written with the current
* version. Older Blender versions will test this and show a warning if the file

View File

@ -92,7 +92,7 @@ enum {
* Do not attempt to access original ID pointers (triggers usages of
* `IDWALK_NO_ORIG_POINTERS_ACCESS` too).
*
* Use when original ID pointers values are (probably) not valid, e.g. dureing readfile process.
* Use when original ID pointers values are (probably) not valid, e.g. during read-file process.
*/
ID_REMAP_NO_ORIG_POINTERS_ACCESS = 1 << 20,
};

View File

@ -887,21 +887,33 @@ ListBase BKE_collection_object_cache_instanced_get(Collection *collection)
static void collection_object_cache_free(Collection *collection)
{
/* Clear own cache an for all parents, since those are affected by changes as well. */
collection->flag &= ~(COLLECTION_HAS_OBJECT_CACHE | COLLECTION_HAS_OBJECT_CACHE_INSTANCED);
BLI_freelistN(&collection->runtime.object_cache);
BLI_freelistN(&collection->runtime.object_cache_instanced);
}
void BKE_collection_object_cache_free(Collection *collection)
static void collection_object_cache_free_parent_recursive(Collection *collection)
{
collection_object_cache_free(collection);
/* Clear cache in all parents recursively, since those are affected by changes as well. */
LISTBASE_FOREACH (CollectionParent *, parent, &collection->runtime.parents) {
collection_object_cache_free(parent->collection);
/* In theory there should be no NULL pointer here. However, this code can be called from
* non-valid temporary states (e.g. indirectly from #BKE_collections_object_remove_invalids
* as part of ID remapping process). */
if (parent->collection == NULL) {
continue;
}
collection_object_cache_free_parent_recursive(parent->collection);
}
}
void BKE_collection_object_cache_free(Collection *collection)
{
BLI_assert(collection != NULL);
collection_object_cache_free_parent_recursive(collection);
}
void BKE_main_collections_object_cache_free(const Main *bmain)
{
for (Scene *scene = bmain->scenes.first; scene != NULL; scene = scene->id.next) {

View File

@ -1900,7 +1900,7 @@ void BKE_fcurve_deduplicate_keys(FCurve *fcu)
}
else {
/* Move the retained key to the old X-coordinate, to 'anchor' the X-coordinate used for
* subsequente comparisons. Without this, the reference X-coordinate would keep moving
* subsequent comparisons. Without this, the reference X-coordinate would keep moving
* forward in time, potentially merging in more keys than desired. */
BKE_fcurve_keyframe_move_time_with_handles(prev_bezt, prev_x);
}

View File

@ -422,7 +422,7 @@ bool try_capture_field_on_geometry(GeometryComponent &component,
return attributes.add(attribute_id, domain, data_type, AttributeInitConstruct{});
}
bke::GeometryFieldContext field_context{component, domain};
const bke::GeometryFieldContext field_context{component, domain};
const IndexMask mask{IndexMask(domain_size)};
const bke::AttributeValidator validator = attributes.lookup_validator(attribute_id);
@ -433,9 +433,9 @@ bool try_capture_field_on_geometry(GeometryComponent &component,
/* We are writing to an attribute that exists already with the correct domain and type. */
if (attribute_exists) {
if (GSpanAttributeWriter dst_attribute = attributes.lookup_for_write_span(attribute_id)) {
bke::GeometryFieldContext field_context{component, domain};
const IndexMask mask{IndexMask(domain_size)};
const bke::GeometryFieldContext field_context{component, domain};
fn::FieldEvaluator evaluator{field_context, &mask};
evaluator.add(validator.validate_field_if_necessary(field));
evaluator.set_selection(selection);

View File

@ -2076,6 +2076,7 @@ static bool lib_override_library_resync(Main *bmain,
}
BKE_id_remapper_free(id_remapper);
BLI_linklist_free(id_override_old_list, nullptr);
id_override_old_list = nullptr;
/* Delete old override IDs.
* Note that we have to use tagged group deletion here, since ID deletion also uses
@ -2083,40 +2084,55 @@ static bool lib_override_library_resync(Main *bmain,
int user_edited_overrides_deletion_count = 0;
FOREACH_MAIN_ID_BEGIN (bmain, id) {
if (id->tag & LIB_TAG_DOIT) {
/* Note that this works because linked IDs are always after local ones (including
* overrides), so we will only ever tag an old override ID after we have already checked it
* in this loop, hence we cannot untag it later. */
/* Since this code can also be called on linked liboverride now (during recursive resync),
* order of processing cannot guarantee anymore that the old liboverride won't be tagged for
* deletion before being processed by this loop (which would then untag it again).
*
* So instead store old liboverrides in Main into a temp list again, and do the tagging
* separately once this loop over all IDs in main is done. */
if (id->newid != nullptr && id->lib == id_root_reference->lib) {
ID *id_override_old = static_cast<ID *>(BLI_ghash_lookup(linkedref_to_old_override, id));
if (id_override_old != nullptr) {
id->newid->tag &= ~LIB_TAG_DOIT;
id_override_old->tag |= LIB_TAG_DOIT;
if (id_override_old->tag & LIB_TAG_NO_MAIN) {
id_override_old->tag |= LIB_TAG_DOIT;
BLI_assert(BLI_findindex(no_main_ids_list, id_override_old) != -1);
}
else {
/* Defer tagging. */
BLI_linklist_prepend(&id_override_old_list, id_override_old);
}
}
}
id->tag &= ~LIB_TAG_DOIT;
}
/* Also deal with old overrides that went missing in new linked data - only for real local
* overrides for now, not those who are linked. */
else if (id->tag & LIB_TAG_MISSING && !ID_IS_LINKED(id) && ID_IS_OVERRIDE_LIBRARY(id)) {
if (ID_IS_OVERRIDE_LIBRARY_REAL(id) &&
id->override_library->reference->lib->id.tag & LIB_TAG_MISSING) {
else if (id->tag & LIB_TAG_MISSING && !ID_IS_LINKED(id) && ID_IS_OVERRIDE_LIBRARY_REAL(id)) {
bool do_delete;
ID *hierarchy_root = id->override_library->hierarchy_root;
if (id->override_library->reference->lib->id.tag & LIB_TAG_MISSING) {
/* Do not delete overrides which reference is missing because the library itself is missing
* (ref. #100586). */
do_delete = false;
}
else if (hierarchy_root != nullptr &&
hierarchy_root->override_library->reference->tag & LIB_TAG_MISSING) {
/* Do not delete overrides which root hierarchy reference is missing. This would typically
* cause more harm than good. */
do_delete = false;
}
else if (!BKE_lib_override_library_is_user_edited(id)) {
/* If user never edited them, we can delete them. */
id->tag |= LIB_TAG_DOIT;
id->tag &= ~LIB_TAG_MISSING;
do_delete = true;
CLOG_INFO(&LOG, 2, "Old override %s is being deleted", id->name);
}
#if 0
else {
/* Otherwise, keep them, user needs to decide whether what to do with them. */
BLI_assert((id->tag & LIB_TAG_DOIT) == 0);
do_delete = false;
id_fake_user_set(id);
id->flag |= LIB_LIB_OVERRIDE_RESYNC_LEFTOVER;
CLOG_INFO(&LOG, 2, "Old override %s is being kept around as it was user-edited", id->name);
@ -2125,17 +2141,27 @@ static bool lib_override_library_resync(Main *bmain,
else {
/* Delete them nevertheless, with fat warning, user needs to decide whether they want to
* save that version of the file (and accept the loss), or not. */
id->tag |= LIB_TAG_DOIT;
id->tag &= ~LIB_TAG_MISSING;
do_delete = true;
CLOG_WARN(
&LOG, "Old override %s is being deleted even though it was user-edited", id->name);
user_edited_overrides_deletion_count++;
}
#endif
if (do_delete) {
id->tag |= LIB_TAG_DOIT;
id->tag &= ~LIB_TAG_MISSING;
}
}
}
FOREACH_MAIN_ID_END;
/* Finalize tagging old liboverrides for deletion. */
for (LinkNode *ln_iter = id_override_old_list; ln_iter != nullptr; ln_iter = ln_iter->next) {
ID *id_override_old = static_cast<ID *>(ln_iter->link);
id_override_old->tag |= LIB_TAG_DOIT;
}
BLI_linklist_free(id_override_old_list, nullptr);
/* Cleanup, many pointers in this GHash are already invalid now. */
BLI_ghash_free(linkedref_to_old_override, nullptr, nullptr);
@ -2375,6 +2401,41 @@ static bool lib_override_resync_tagging_finalize_recurse(
return is_ancestor_tagged_for_resync;
}
/* Return true if the ID should be skipped for resync given current context. */
static bool lib_override_library_main_resync_id_skip_check(ID *id,
const int library_indirect_level)
{
if (!ID_IS_OVERRIDE_LIBRARY_REAL(id)) {
return true;
}
if (!lib_override_resync_id_lib_level_is_valid(id, library_indirect_level, true)) {
return true;
}
/* Do not attempt to resync from missing data. */
if (((id->tag | id->override_library->reference->tag) & LIB_TAG_MISSING) != 0) {
return true;
}
if (id->override_library->flag & IDOVERRIDE_LIBRARY_FLAG_NO_HIERARCHY) {
/* This ID is not part of an override hierarchy. */
BLI_assert((id->tag & LIB_TAG_LIB_OVERRIDE_NEED_RESYNC) == 0);
return true;
}
/* Do not attempt to resync when hierarchy root is missing, this would usually do more harm
* than good. */
ID *hierarchy_root = id->override_library->hierarchy_root;
if (hierarchy_root == nullptr ||
((hierarchy_root->tag | hierarchy_root->override_library->reference->tag) &
LIB_TAG_MISSING) != 0) {
return true;
}
return false;
}
/* Ensure resync of all overrides at one level of indirect usage.
*
* We need to handle each level independently, since an override at level n may be affected by
@ -2411,11 +2472,7 @@ static void lib_override_library_main_resync_on_library_indirect_level(
lib_override_group_tag_data_object_to_collection_init(&data);
ID *id;
FOREACH_MAIN_ID_BEGIN (bmain, id) {
if (!ID_IS_OVERRIDE_LIBRARY_REAL(id)) {
continue;
}
if (!lib_override_resync_id_lib_level_is_valid(id, library_indirect_level, true)) {
if (lib_override_library_main_resync_id_skip_check(id, library_indirect_level)) {
continue;
}
@ -2424,16 +2481,6 @@ static void lib_override_library_main_resync_on_library_indirect_level(
continue;
}
/* Do not attempt to resync from missing data. */
if (((id->tag | id->override_library->reference->tag) & LIB_TAG_MISSING) != 0) {
continue;
}
if (id->override_library->flag & IDOVERRIDE_LIBRARY_FLAG_NO_HIERARCHY) {
/* This ID is not part of an override hierarchy. */
continue;
}
data.id_root = id->override_library->reference;
lib_override_linked_group_tag(&data);
BKE_main_relations_tag_set(bmain, MAINIDRELATIONS_ENTRY_TAGS_PROCESSED, false);
@ -2449,22 +2496,7 @@ static void lib_override_library_main_resync_on_library_indirect_level(
* such, or the one using linked data that is now tagged as needing override. */
BKE_main_relations_tag_set(bmain, MAINIDRELATIONS_ENTRY_TAGS_PROCESSED, false);
FOREACH_MAIN_ID_BEGIN (bmain, id) {
if (!ID_IS_OVERRIDE_LIBRARY_REAL(id)) {
continue;
}
if (!lib_override_resync_id_lib_level_is_valid(id, library_indirect_level, true)) {
continue;
}
/* Do not attempt to resync from missing data. */
if (((id->tag | id->override_library->reference->tag) & LIB_TAG_MISSING) != 0) {
continue;
}
if (id->override_library->flag & IDOVERRIDE_LIBRARY_FLAG_NO_HIERARCHY) {
/* This ID is not part of an override hierarchy. */
BLI_assert((id->tag & LIB_TAG_LIB_OVERRIDE_NEED_RESYNC) == 0);
if (lib_override_library_main_resync_id_skip_check(id, library_indirect_level)) {
continue;
}
@ -2687,6 +2719,9 @@ static int lib_override_libraries_index_define(Main *bmain)
do_continue = false;
ID *id;
FOREACH_MAIN_ID_BEGIN (bmain, id) {
/* NOTE: In theory all non-liboverride IDs could be skipped here. This does not gives any
* performances boost though, so for now keep it as is (i.e. also consider non-liboverride
* relationships to establish libraries hierarchy). */
BKE_library_foreach_ID_link(
bmain, id, lib_override_sort_libraries_func, &do_continue, IDWALK_READONLY);
}

File diff suppressed because it is too large Load Diff

View File

@ -24,6 +24,7 @@
#include "BLT_translation.h"
#include "DNA_defaults.h"
#include "DNA_userdef_types.h"
#define U BLI_STATIC_ASSERT(false, "Global 'U' not allowed, only use arguments passed in!")
@ -37,6 +38,7 @@ bUserAssetLibrary *BKE_preferences_asset_library_add(UserDef *userdef,
const char *path)
{
bUserAssetLibrary *library = MEM_callocN(sizeof(*library), "bUserAssetLibrary");
memcpy(library, DNA_struct_default_get(bUserAssetLibrary), sizeof(*library));
BLI_addtail(&userdef->asset_libraries, library);
@ -46,7 +48,6 @@ bUserAssetLibrary *BKE_preferences_asset_library_add(UserDef *userdef,
if (path) {
BLI_strncpy(library->path, path, sizeof(library->path));
}
library->import_method = ASSET_IMPORT_APPEND_REUSE;
return library;
}

View File

@ -242,7 +242,7 @@ void BKE_screen_foreach_id_screen_area(LibraryForeachIDData *data, ScrArea *area
}
}
/* Both `snode->id` and `snode->nodetree` have been remapped now, sotheir data can be
/* Both `snode->id` and `snode->nodetree` have been remapped now, so their data can be
* accessed. */
BLI_assert(snode->id == NULL || snode->nodetree == NULL ||
(snode->nodetree->id.flag & LIB_EMBEDDED_DATA) == 0 ||

View File

@ -1728,7 +1728,7 @@ static void subdiv_foreach_mark_non_loose_geometry(SubdivForeachTaskContext *ctx
for (const int poly_index : ctx->coarse_polys.index_range()) {
for (const int corner : ctx->coarse_polys[poly_index]) {
BLI_BITMAP_ENABLE(ctx->coarse_vertices_used_map, ctx->coarse_corner_verts[corner]);
BLI_BITMAP_ENABLE(ctx->coarse_edges_used_map, ctx->coarse_corner_verts[corner]);
BLI_BITMAP_ENABLE(ctx->coarse_edges_used_map, ctx->coarse_corner_edges[corner]);
}
}
}

View File

@ -1787,16 +1787,8 @@ bool BKE_vfont_to_curve_ex(Object *ob,
};
do {
data.ok &= vfont_to_curve(ob,
cu,
mode,
&data,
NULL,
r_nubase,
r_text,
r_text_len,
r_text_free,
r_chartransdata);
data.ok &= vfont_to_curve(
ob, cu, mode, &data, NULL, r_nubase, r_text, r_text_len, r_text_free, r_chartransdata);
} while (data.ok && ELEM(data.status, VFONT_TO_CURVE_SCALE_ONCE, VFONT_TO_CURVE_BISECT));
return data.ok;

View File

@ -1349,6 +1349,18 @@ void do_versions_after_linking_300(FileData * /*fd*/, Main *bmain)
FOREACH_NODETREE_END;
}
if (!MAIN_VERSION_ATLEAST(bmain, 306, 6)) {
LISTBASE_FOREACH (Scene *, scene, &bmain->scenes) {
Editing *ed = SEQ_editing_get(scene);
if (ed == nullptr) {
continue;
}
SEQ_for_each_callback(
&scene->ed->seqbase, do_versions_sequencer_init_retiming_tool_data, scene);
}
}
/**
* Versioning code until next subversion bump goes here.
*
@ -1361,16 +1373,6 @@ void do_versions_after_linking_300(FileData * /*fd*/, Main *bmain)
*/
{
/* Keep this block, even when empty. */
LISTBASE_FOREACH (Scene *, scene, &bmain->scenes) {
Editing *ed = SEQ_editing_get(scene);
if (ed == nullptr) {
continue;
}
SEQ_for_each_callback(
&scene->ed->seqbase, do_versions_sequencer_init_retiming_tool_data, scene);
}
}
}

View File

@ -814,6 +814,12 @@ void blo_do_versions_userdef(UserDef *userdef)
}
}
if (!USER_VERSION_ATLEAST(306, 6)) {
LISTBASE_FOREACH (bUserAssetLibrary *, asset_library, &userdef->asset_libraries) {
asset_library->flag |= ASSET_LIBRARY_RELATIVE_PATH;
}
}
/**
* Versioning code until next subversion bump goes here.
*

View File

@ -2,16 +2,7 @@
#pragma once
#include <memory>
#include "BLI_map.hh"
#include "BLI_math_vector_types.hh"
#include "DNA_scene_types.h"
#include "DNA_texture_types.h"
#include "COM_cached_texture.hh"
#include "COM_context.hh"
#include "COM_morphological_distance_feather_weights.hh"
#include "COM_smaa_precomputed_textures.hh"
#include "COM_symmetric_blur_weights.hh"
@ -19,15 +10,14 @@
namespace blender::realtime_compositor {
class Context;
/* -------------------------------------------------------------------------------------------------
* Static Cache Manager
*
* A static cache manager is a collection of cached resources that can be retrieved when needed and
* created if not already available. In particular, each cached resource type has its own Map in
* the class, where all instances of that cached resource type are stored and tracked. See the
* CachedResource class for more information.
* created if not already available. In particular, each cached resource type has its own instance
* of a container derived from the CachedResourceContainer type in the class. All instances of that
* cached resource type are stored and tracked in the container. See the CachedResource and
* CachedResourceContainer classes for more information.
*
* The manager deletes the cached resources that are no longer needed. A cached resource is said to
* be not needed when it was not used in the previous evaluation. This is done through the
@ -43,65 +33,18 @@ class Context;
* evaluation will be deleted before the next evaluation. This mechanism is implemented in the
* reset() method of the class, which should be called before every evaluation. */
class StaticCacheManager {
private:
/* A map that stores all SymmetricBlurWeights cached resources. */
Map<SymmetricBlurWeightsKey, std::unique_ptr<SymmetricBlurWeights>> symmetric_blur_weights_;
/* A map that stores all SymmetricSeparableBlurWeights cached resources. */
Map<SymmetricSeparableBlurWeightsKey, std::unique_ptr<SymmetricSeparableBlurWeights>>
symmetric_separable_blur_weights_;
/* A map that stores all MorphologicalDistanceFeatherWeights cached resources. */
Map<MorphologicalDistanceFeatherWeightsKey, std::unique_ptr<MorphologicalDistanceFeatherWeights>>
morphological_distance_feather_weights_;
/* A nested map that stores all CachedTexture cached resources. The outer map identifies the
* textures using their ID name, while the inner map identifies the textures using their
* parameters. */
Map<std::string, Map<CachedTextureKey, std::unique_ptr<CachedTexture>>> cached_textures_;
/* A unique pointers that stores the cached SMAAPrecomputedTextures, if one is cached. */
std::unique_ptr<SMAAPrecomputedTextures> smaa_precomputed_textures_;
public:
SymmetricBlurWeightsContainer symmetric_blur_weights;
SymmetricSeparableBlurWeightsContainer symmetric_separable_blur_weights;
MorphologicalDistanceFeatherWeightsContainer morphological_distance_feather_weights;
SMAAPrecomputedTexturesContainer smaa_precomputed_textures;
CachedTextureContainer cached_textures;
/* Reset the cache manager by deleting the cached resources that are no longer needed because
* they weren't used in the last evaluation and prepare the remaining cached resources to track
* their needed status in the next evaluation. See the class description for more information.
* This should be called before every evaluation. */
void reset();
/* Check if there is an available SymmetricBlurWeights cached resource with the given parameters
* in the manager, if one exists, return it, otherwise, return a newly created one and add it to
* the manager. In both cases, tag the cached resource as needed to keep it cached for the next
* evaluation. */
SymmetricBlurWeights &get_symmetric_blur_weights(int type, float2 radius);
/* Check if there is an available SymmetricSeparableBlurWeights cached resource with the given
* parameters in the manager, if one exists, return it, otherwise, return a newly created one and
* add it to the manager. In both cases, tag the cached resource as needed to keep it cached for
* the next evaluation. */
SymmetricSeparableBlurWeights &get_symmetric_separable_blur_weights(int type, float radius);
/* Check if there is an available MorphologicalDistanceFeatherWeights cached resource with the
* given parameters in the manager, if one exists, return it, otherwise, return a newly created
* one and add it to the manager. In both cases, tag the cached resource as needed to keep it
* cached for the next evaluation. */
MorphologicalDistanceFeatherWeights &get_morphological_distance_feather_weights(int type,
int radius);
/* Check if the given texture ID has changed since the last time it was retrieved through its
* recalculate flag, and if so, invalidate its corresponding cached textures and reset the
* recalculate flag to ready it to track the next change. Then, check if there is an available
* CachedTexture cached resource with the given parameters in the manager, if one exists, return
* it, otherwise, return a newly created one and add it to the manager. In both cases, tag the
* cached resource as needed to keep it cached for the next evaluation. */
CachedTexture &get_cached_texture(
Context &context, Tex *texture, const Scene *scene, int2 size, float2 offset, float2 scale);
/* Check if a cached SMAA precomputed texture exists, if it does, return it, otherwise, return
* a newly created one and store it in the manager. In both cases, tag the cached resource as
* needed to keep it cached for the next evaluation. */
SMAAPrecomputedTextures &get_smaa_precomputed_textures();
};
} // namespace blender::realtime_compositor

View File

@ -73,7 +73,7 @@ static Result calculate_blending_weights(Context &context, Result &edges, int co
edges.bind_as_texture(shader, "edges_tx");
const SMAAPrecomputedTextures &smaa_precomputed_textures =
context.cache_manager().get_smaa_precomputed_textures();
context.cache_manager().smaa_precomputed_textures.get();
smaa_precomputed_textures.bind_area_texture(shader, "area_tx");
smaa_precomputed_textures.bind_search_texture(shader, "search_tx");

View File

@ -33,7 +33,7 @@ static Result horizontal_pass(Context &context,
input.bind_as_texture(shader, "input_tx");
const SymmetricSeparableBlurWeights &weights =
context.cache_manager().get_symmetric_separable_blur_weights(filter_type, radius);
context.cache_manager().symmetric_separable_blur_weights.get(filter_type, radius);
weights.bind_as_texture(shader, "weights_tx");
Domain domain = input.domain();
@ -84,7 +84,7 @@ static void vertical_pass(Context &context,
horizontal_pass_result.bind_as_texture(shader, "input_tx");
const SymmetricSeparableBlurWeights &weights =
context.cache_manager().get_symmetric_separable_blur_weights(filter_type, radius.y);
context.cache_manager().symmetric_separable_blur_weights.get(filter_type, radius.y);
weights.bind_as_texture(shader, "weights_tx");
Domain domain = original_input.domain();

View File

@ -8,17 +8,17 @@ namespace blender::realtime_compositor {
* Cached Resource.
*
* A cached resource is any resource that can be cached across compositor evaluations and across
* multiple operations. Cached resources are managed by an instance of a StaticCacheManager and are
* freed when they are no longer needed, a state which is represented by the `needed` member in the
* class. For more information on the caching mechanism, see the StaticCacheManager class.
* multiple operations. Cached resources are managed by an instance of a StaticCacheManager, stored
* in an instance of a CachedResourceContainer, and are freed when they are no longer needed, a
* state which is represented by the `needed` member in the class. For more information on the
* caching mechanism, see the StaticCacheManager class.
*
* To add a new cached resource:
*
* - Create a key class that can be used to identify the resource in a Map if needed.
* - Create a derived class from CachedResource to represent the resource.
* - Create a key class that can be used in a Map to identify the resource.
* - Add a new Map to StaticCacheManager mapping the key to the resource.
* - Reset the contents of the added map in StaticCacheManager::reset.
* - Add an appropriate getter method in StaticCacheManager.
* - Create a derived class from CachedResourceContainer to store the resources.
* - Add an instance of the container to StaticCacheManager and call its reset method.
*
* See the existing cached resources for reference. */
class CachedResource {
@ -28,4 +28,23 @@ class CachedResource {
bool needed = true;
};
/* -------------------------------------------------------------------------------------------------
* Cached Resource Container.
*
* A cached resource container stores all the cached resources for a specific cached resource type.
* The cached resources are typically stored in a map identified by a key type. The reset method
* should be implemented as described in StaticCacheManager::reset. An appropriate getter method
* should be provided that properly sets the CachedResource::needed flag as described in the
* description of the StaticCacheManager class.
*
* See the existing cached resources for reference. */
class CachedResourceContainer {
public:
/* Reset the container by deleting the cached resources that are no longer needed because they
* weren't used in the last evaluation and prepare the remaining cached resources to track their
* needed status in the next evaluation. See the description of the StaticCacheManager class for
* more information. This should be called in StaticCacheManager::reset. */
virtual void reset() = 0;
};
} // namespace blender::realtime_compositor

View File

@ -3,7 +3,10 @@
#pragma once
#include <cstdint>
#include <memory>
#include <string>
#include "BLI_map.hh"
#include "BLI_math_vector_types.hh"
#include "GPU_texture.h"
@ -15,6 +18,8 @@
namespace blender::realtime_compositor {
class Context;
/* ------------------------------------------------------------------------------------------------
* Cached Texture Key.
*/
@ -52,4 +57,24 @@ class CachedTexture : public CachedResource {
GPUTexture *value_texture();
};
/* ------------------------------------------------------------------------------------------------
* Cached Texture Container.
*/
class CachedTextureContainer : CachedResourceContainer {
private:
Map<std::string, Map<CachedTextureKey, std::unique_ptr<CachedTexture>>> map_;
public:
void reset() override;
/* Check if the given texture ID has changed since the last time it was retrieved through its
* recalculate flag, and if so, invalidate its corresponding cached textures and reset the
* recalculate flag to ready it to track the next change. Then, check if there is an available
* CachedTexture cached resource with the given parameters in the container, if one exists,
* return it, otherwise, return a newly created one and add it to the container. In both cases,
* tag the cached resource as needed to keep it cached for the next evaluation. */
CachedTexture &get(
Context &context, Tex *texture, const Scene *scene, int2 size, float2 offset, float2 scale);
};
} // namespace blender::realtime_compositor

View File

@ -3,6 +3,9 @@
#pragma once
#include <cstdint>
#include <memory>
#include "BLI_map.hh"
#include "GPU_shader.h"
#include "GPU_texture.h"
@ -58,4 +61,22 @@ class MorphologicalDistanceFeatherWeights : public CachedResource {
void unbind_distance_falloffs_as_texture() const;
};
/* ------------------------------------------------------------------------------------------------
* Morphological Distance Feather Key.
*/
class MorphologicalDistanceFeatherWeightsContainer : CachedResourceContainer {
private:
Map<MorphologicalDistanceFeatherWeightsKey, std::unique_ptr<MorphologicalDistanceFeatherWeights>>
map_;
public:
void reset() override;
/* Check if there is an available MorphologicalDistanceFeatherWeights cached resource with the
* given parameters in the container, if one exists, return it, otherwise, return a newly created
* one and add it to the container. In both cases, tag the cached resource as needed to keep it
* cached for the next evaluation. */
MorphologicalDistanceFeatherWeights &get(int type, int radius);
};
} // namespace blender::realtime_compositor

View File

@ -2,6 +2,8 @@
#pragma once
#include <memory>
#include "GPU_shader.h"
#include "GPU_texture.h"
@ -33,4 +35,20 @@ class SMAAPrecomputedTextures : public CachedResource {
void unbind_area_texture() const;
};
/* ------------------------------------------------------------------------------------------------
* SMAA Precomputed Textures Container.
*/
class SMAAPrecomputedTexturesContainer : public CachedResourceContainer {
private:
std::unique_ptr<SMAAPrecomputedTextures> textures_;
public:
void reset() override;
/* Check if a cached SMAA precomputed texture exists, if it does, return it, otherwise, return
* a newly created one and store it in the container. In both cases, tag the cached resource as
* needed to keep it cached for the next evaluation. */
SMAAPrecomputedTextures &get();
};
} // namespace blender::realtime_compositor

View File

@ -3,7 +3,9 @@
#pragma once
#include <cstdint>
#include <memory>
#include "BLI_map.hh"
#include "BLI_math_vector_types.hh"
#include "GPU_shader.h"
@ -49,4 +51,21 @@ class SymmetricBlurWeights : public CachedResource {
void unbind_as_texture() const;
};
/* ------------------------------------------------------------------------------------------------
* Symmetric Blur Weights Container.
*/
class SymmetricBlurWeightsContainer : public CachedResourceContainer {
private:
Map<SymmetricBlurWeightsKey, std::unique_ptr<SymmetricBlurWeights>> map_;
public:
void reset() override;
/* Check if there is an available SymmetricBlurWeights cached resource with the given parameters
* in the container, if one exists, return it, otherwise, return a newly created one and add it
* to the container. In both cases, tag the cached resource as needed to keep it cached for the
* next evaluation. */
SymmetricBlurWeights &get(int type, float2 radius);
};
} // namespace blender::realtime_compositor

View File

@ -3,7 +3,9 @@
#pragma once
#include <cstdint>
#include <memory>
#include "BLI_map.hh"
#include "BLI_math_vector_types.hh"
#include "GPU_shader.h"
@ -50,4 +52,22 @@ class SymmetricSeparableBlurWeights : public CachedResource {
void unbind_as_texture() const;
};
/* ------------------------------------------------------------------------------------------------
* Symmetric Separable Blur Weights Container.
*/
class SymmetricSeparableBlurWeightsContainer : public CachedResourceContainer {
private:
Map<SymmetricSeparableBlurWeightsKey, std::unique_ptr<SymmetricSeparableBlurWeights>> map_;
public:
void reset() override;
/* Check if there is an available SymmetricSeparableBlurWeights cached resource with the given
* parameters in the container, if one exists, return it, otherwise, return a newly created one
* and add it to the container. In both cases, tag the cached resource as needed to keep it
* cached for the next evaluation. */
SymmetricSeparableBlurWeights &get(int type, float radius);
};
} // namespace blender::realtime_compositor

View File

@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include <cstdint>
#include <memory>
#include "BLI_array.hh"
#include "BLI_hash.hh"
@ -12,12 +13,14 @@
#include "BKE_texture.h"
#include "DNA_ID.h"
#include "DNA_scene_types.h"
#include "DNA_texture_types.h"
#include "RE_texture.h"
#include "COM_cached_texture.hh"
#include "COM_context.hh"
namespace blender::realtime_compositor {
@ -99,4 +102,44 @@ GPUTexture *CachedTexture::value_texture()
return value_texture_;
}
/* --------------------------------------------------------------------
* Cached Texture Container.
*/
void CachedTextureContainer::reset()
{
/* First, delete all cached textures that are no longer needed. */
for (auto &cached_textures_for_id : map_.values()) {
cached_textures_for_id.remove_if([](auto item) { return !item.value->needed; });
}
map_.remove_if([](auto item) { return item.value.is_empty(); });
/* Second, reset the needed status of the remaining cached textures to false to ready them to
* track their needed status for the next evaluation. */
for (auto &cached_textures_for_id : map_.values()) {
for (auto &value : cached_textures_for_id.values()) {
value->needed = false;
}
}
}
CachedTexture &CachedTextureContainer::get(
Context &context, Tex *texture, const Scene *scene, int2 size, float2 offset, float2 scale)
{
const CachedTextureKey key(size, offset, scale);
auto &cached_textures_for_id = map_.lookup_or_add_default(texture->id.name);
/* Invalidate the cache for that texture ID if it was changed and reset the recalculate flag. */
if (context.query_id_recalc_flag(reinterpret_cast<ID *>(texture)) & ID_RECALC_ALL) {
cached_textures_for_id.clear();
}
auto &cached_texture = *cached_textures_for_id.lookup_or_add_cb(
key, [&]() { return std::make_unique<CachedTexture>(texture, scene, size, offset, scale); });
cached_texture.needed = true;
return cached_texture;
}
} // namespace blender::realtime_compositor

View File

@ -2,6 +2,7 @@
#include <cmath>
#include <cstdint>
#include <memory>
#include "BLI_array.hh"
#include "BLI_hash.hh"
@ -157,4 +158,32 @@ void MorphologicalDistanceFeatherWeights::unbind_distance_falloffs_as_texture()
GPU_texture_unbind(distance_falloffs_texture_);
}
/* --------------------------------------------------------------------
* Morphological Distance Feather Weights Container.
*/
void MorphologicalDistanceFeatherWeightsContainer::reset()
{
/* First, delete all resources that are no longer needed. */
map_.remove_if([](auto item) { return !item.value->needed; });
/* Second, reset the needed status of the remaining resources to false to ready them to track
* their needed status for the next evaluation. */
for (auto &value : map_.values()) {
value->needed = false;
}
}
MorphologicalDistanceFeatherWeights &MorphologicalDistanceFeatherWeightsContainer::get(int type,
int radius)
{
const MorphologicalDistanceFeatherWeightsKey key(type, radius);
auto &weights = *map_.lookup_or_add_cb(
key, [&]() { return std::make_unique<MorphologicalDistanceFeatherWeights>(type, radius); });
weights.needed = true;
return weights;
}
} // namespace blender::realtime_compositor

View File

@ -1,5 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include <memory>
#include "BLI_smaa_textures.h"
#include "GPU_shader.h"
@ -9,6 +11,10 @@
namespace blender::realtime_compositor {
/* ------------------------------------------------------------------------------------------------
* SMAA Precomputed Textures.
*/
SMAAPrecomputedTextures::SMAAPrecomputedTextures()
{
search_texture_ = GPU_texture_create_2d("SMAA Search",
@ -61,4 +67,32 @@ void SMAAPrecomputedTextures::unbind_area_texture() const
GPU_texture_unbind(area_texture_);
}
/* ------------------------------------------------------------------------------------------------
* SMAA Precomputed Textures Container.
*/
void SMAAPrecomputedTexturesContainer::reset()
{
/* First, delete the textures if they are no longer needed. */
if (textures_ && !textures_->needed) {
textures_.reset();
}
/* Second, if they were not deleted, reset their needed status to false to ready them to track
* their needed status for the next evaluation. */
if (textures_) {
textures_->needed = false;
}
}
SMAAPrecomputedTextures &SMAAPrecomputedTexturesContainer::get()
{
if (!textures_) {
textures_ = std::make_unique<SMAAPrecomputedTextures>();
}
textures_->needed = true;
return *textures_;
}
} // namespace blender::realtime_compositor

View File

@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include <cstdint>
#include <memory>
#include "BLI_array.hh"
#include "BLI_hash.hh"
@ -113,4 +114,31 @@ void SymmetricBlurWeights::unbind_as_texture() const
GPU_texture_unbind(texture_);
}
/* --------------------------------------------------------------------
* Symmetric Blur Weights Container.
*/
void SymmetricBlurWeightsContainer::reset()
{
/* First, delete all resources that are no longer needed. */
map_.remove_if([](auto item) { return !item.value->needed; });
/* Second, reset the needed status of the remaining resources to false to ready them to track
* their needed status for the next evaluation. */
for (auto &value : map_.values()) {
value->needed = false;
}
}
SymmetricBlurWeights &SymmetricBlurWeightsContainer::get(int type, float2 radius)
{
const SymmetricBlurWeightsKey key(type, radius);
auto &weights = *map_.lookup_or_add_cb(
key, [&]() { return std::make_unique<SymmetricBlurWeights>(type, radius); });
weights.needed = true;
return weights;
}
} // namespace blender::realtime_compositor

View File

@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include <cstdint>
#include <memory>
#include "BLI_array.hh"
#include "BLI_hash.hh"
@ -91,4 +92,31 @@ void SymmetricSeparableBlurWeights::unbind_as_texture() const
GPU_texture_unbind(texture_);
}
/* --------------------------------------------------------------------
* Symmetric Separable Blur Weights Container.
*/
void SymmetricSeparableBlurWeightsContainer::reset()
{
/* First, delete all resources that are no longer needed. */
map_.remove_if([](auto item) { return !item.value->needed; });
/* Second, reset the needed status of the remaining resources to false to ready them to track
* their needed status for the next evaluation. */
for (auto &value : map_.values()) {
value->needed = false;
}
}
SymmetricSeparableBlurWeights &SymmetricSeparableBlurWeightsContainer::get(int type, float radius)
{
const SymmetricSeparableBlurWeightsKey key(type, radius);
auto &weights = *map_.lookup_or_add_cb(
key, [&]() { return std::make_unique<SymmetricSeparableBlurWeights>(type, radius); });
weights.needed = true;
return weights;
}
} // namespace blender::realtime_compositor

View File

@ -1,125 +1,16 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include <memory>
#include "BLI_math_vector_types.hh"
#include "DNA_ID.h"
#include "DNA_scene_types.h"
#include "DNA_texture_types.h"
#include "COM_context.hh"
#include "COM_morphological_distance_feather_weights.hh"
#include "COM_smaa_precomputed_textures.hh"
#include "COM_symmetric_blur_weights.hh"
#include "COM_symmetric_separable_blur_weights.hh"
#include "COM_static_cache_manager.hh"
namespace blender::realtime_compositor {
/* --------------------------------------------------------------------
* Static Cache Manager.
*/
void StaticCacheManager::reset()
{
/* First, delete all resources that are no longer needed. */
symmetric_blur_weights_.remove_if([](auto item) { return !item.value->needed; });
symmetric_separable_blur_weights_.remove_if([](auto item) { return !item.value->needed; });
morphological_distance_feather_weights_.remove_if([](auto item) { return !item.value->needed; });
for (auto &cached_textures_for_id : cached_textures_.values()) {
cached_textures_for_id.remove_if([](auto item) { return !item.value->needed; });
}
cached_textures_.remove_if([](auto item) { return item.value.is_empty(); });
if (smaa_precomputed_textures_ && !smaa_precomputed_textures_->needed) {
smaa_precomputed_textures_.reset();
}
/* Second, reset the needed status of the remaining resources to false to ready them to track
* their needed status for the next evaluation. */
for (auto &value : symmetric_blur_weights_.values()) {
value->needed = false;
}
for (auto &value : symmetric_separable_blur_weights_.values()) {
value->needed = false;
}
for (auto &value : morphological_distance_feather_weights_.values()) {
value->needed = false;
}
for (auto &cached_textures_for_id : cached_textures_.values()) {
for (auto &value : cached_textures_for_id.values()) {
value->needed = false;
}
}
if (smaa_precomputed_textures_) {
smaa_precomputed_textures_->needed = false;
}
}
SymmetricBlurWeights &StaticCacheManager::get_symmetric_blur_weights(int type, float2 radius)
{
const SymmetricBlurWeightsKey key(type, radius);
auto &weights = *symmetric_blur_weights_.lookup_or_add_cb(
key, [&]() { return std::make_unique<SymmetricBlurWeights>(type, radius); });
weights.needed = true;
return weights;
}
SymmetricSeparableBlurWeights &StaticCacheManager::get_symmetric_separable_blur_weights(
int type, float radius)
{
const SymmetricSeparableBlurWeightsKey key(type, radius);
auto &weights = *symmetric_separable_blur_weights_.lookup_or_add_cb(
key, [&]() { return std::make_unique<SymmetricSeparableBlurWeights>(type, radius); });
weights.needed = true;
return weights;
}
MorphologicalDistanceFeatherWeights &StaticCacheManager::
get_morphological_distance_feather_weights(int type, int radius)
{
const MorphologicalDistanceFeatherWeightsKey key(type, radius);
auto &weights = *morphological_distance_feather_weights_.lookup_or_add_cb(
key, [&]() { return std::make_unique<MorphologicalDistanceFeatherWeights>(type, radius); });
weights.needed = true;
return weights;
}
CachedTexture &StaticCacheManager::get_cached_texture(
Context &context, Tex *texture, const Scene *scene, int2 size, float2 offset, float2 scale)
{
const CachedTextureKey key(size, offset, scale);
auto &cached_textures_for_id = cached_textures_.lookup_or_add_default(texture->id.name);
if (context.query_id_recalc_flag(reinterpret_cast<ID *>(texture)) & ID_RECALC_ALL) {
cached_textures_for_id.clear();
}
auto &cached_texture = *cached_textures_for_id.lookup_or_add_cb(
key, [&]() { return std::make_unique<CachedTexture>(texture, scene, size, offset, scale); });
cached_texture.needed = true;
return cached_texture;
}
SMAAPrecomputedTextures &StaticCacheManager::get_smaa_precomputed_textures()
{
if (!smaa_precomputed_textures_) {
smaa_precomputed_textures_ = std::make_unique<SMAAPrecomputedTextures>();
}
smaa_precomputed_textures_->needed = true;
return *smaa_precomputed_textures_;
symmetric_blur_weights.reset();
symmetric_separable_blur_weights.reset();
morphological_distance_feather_weights.reset();
cached_textures.reset();
smaa_precomputed_textures.reset();
}
} // namespace blender::realtime_compositor

View File

@ -32,6 +32,7 @@ void ED_asset_handle_get_full_library_path(
/* `1090` for #FILE_MAX_LIBEXTRA,
* rely on warnings to let us know if this gets out of sync. */
char r_full_lib_path[1090]);
bool ED_asset_handle_get_use_relative_path(const struct AssetHandle *asset);
#ifdef __cplusplus
}

View File

@ -68,3 +68,8 @@ void ED_asset_handle_get_full_library_path(const AssetHandle *asset_handle,
BLI_strncpy(r_full_lib_path, library_path.c_str(), FILE_MAX);
}
bool ED_asset_handle_get_use_relative_path(const AssetHandle *asset)
{
return AS_asset_representation_use_relative_path_get(asset->file_data->asset);
}

View File

@ -40,7 +40,7 @@ void ED_operatortypes_curve(void)
WM_operatortype_append(FONT_OT_text_paste);
WM_operatortype_append(FONT_OT_text_paste_from_file);
WM_operatortype_append(FONT_OT_selection_set);
WM_operatortype_append(FONT_OT_selection_set);
WM_operatortype_append(FONT_OT_select_word);
WM_operatortype_append(FONT_OT_move);

View File

@ -1917,7 +1917,7 @@ void FONT_OT_selection_set(struct wmOperatorType *ot)
/* -------------------------------------------------------------------- */
/** \name Select Word Operator
* \{ */
* \{ */
static int font_select_word_exec(bContext *C, wmOperator *UNUSED(op))
{

View File

@ -395,8 +395,7 @@ static int wm_usd_import_exec(bContext *C, wmOperator *op)
const bool create_collection = RNA_boolean_get(op->ptr, "create_collection");
char prim_path_mask[1024];
RNA_string_get(op->ptr, "prim_path_mask", prim_path_mask);
char *prim_path_mask = RNA_string_get_alloc(op->ptr, "prim_path_mask", NULL, 0, NULL);
const bool import_guide = RNA_boolean_get(op->ptr, "import_guide");
const bool import_proxy = RNA_boolean_get(op->ptr, "import_proxy");
@ -448,6 +447,7 @@ static int wm_usd_import_exec(bContext *C, wmOperator *op)
.import_meshes = import_meshes,
.import_volumes = import_volumes,
.import_shapes = import_shapes,
.prim_path_mask = prim_path_mask,
.import_subdiv = import_subdiv,
.import_instance_proxies = import_instance_proxies,
.create_collection = create_collection,
@ -464,7 +464,6 @@ static int wm_usd_import_exec(bContext *C, wmOperator *op)
.tex_name_collision_mode = tex_name_collision_mode,
.import_all_materials = import_all_materials};
STRNCPY(params.prim_path_mask, prim_path_mask);
STRNCPY(params.import_textures_dir, import_textures_dir);
const bool ok = USD_import(C, filename, &params, as_background_job);
@ -623,9 +622,10 @@ void WM_OT_usd_import(struct wmOperatorType *ot)
RNA_def_string(ot->srna,
"prim_path_mask",
NULL,
1024,
0,
"Path Mask",
"Import only the subset of the USD scene rooted at the given primitive");
"Import only the primitive at the given path and its descendents. "
"Multiple paths may be specified in a list delimited by commas or semicolons");
RNA_def_boolean(ot->srna, "import_guide", false, "Guide", "Import guide geometry");

View File

@ -16,21 +16,15 @@
namespace blender::ed::outliner {
static void outliner_context_selected_ids_recursive(const SpaceOutliner &space_outliner,
bContextDataResult *result)
static void outliner_context_selected_ids(const SpaceOutliner *space_outliner,
bContextDataResult *result)
{
tree_iterator::all(space_outliner, [&](const TreeElement *te) {
tree_iterator::all(*space_outliner, [&](const TreeElement *te) {
const TreeStoreElem *tse = TREESTORE(te);
if ((tse->flag & TSE_SELECTED) && ELEM(tse->type, TSE_SOME_ID, TSE_LAYER_COLLECTION)) {
CTX_data_id_list_add(result, tse->id);
}
});
}
static void outliner_context_selected_ids(const SpaceOutliner *space_outliner,
bContextDataResult *result)
{
outliner_context_selected_ids_recursive(*space_outliner, result);
CTX_data_type_set(result, CTX_DATA_TYPE_COLLECTION);
}

View File

@ -49,6 +49,7 @@ enum {
/* NOTE: these defines are saved in keymap files, do not change values but just add new ones */
enum {
VIEW_MODAL_CANCEL = 0, /* used for all view operations */
VIEW_MODAL_CONFIRM = 1, /* used for all view operations */
VIEWROT_MODAL_AXIS_SNAP_ENABLE = 2,
VIEWROT_MODAL_AXIS_SNAP_DISABLE = 3,

View File

@ -31,6 +31,7 @@
void viewdolly_modal_keymap(wmKeyConfig *keyconf)
{
static const EnumPropertyItem modal_items[] = {
{VIEW_MODAL_CANCEL, "CANCEL", 0, "Cancel", ""},
{VIEW_MODAL_CONFIRM, "CONFIRM", 0, "Confirm", ""},
{VIEWROT_MODAL_SWITCH_ROTATE, "SWITCH_TO_ROTATE", 0, "Switch to Rotate"},
@ -48,34 +49,6 @@ void viewdolly_modal_keymap(wmKeyConfig *keyconf)
keymap = WM_modalkeymap_ensure(keyconf, "View3D Dolly Modal", modal_items);
/* disabled mode switching for now, can re-implement better, later on */
#if 0
WM_modalkeymap_add_item(keymap,
&(const KeyMapItem_Params){
.type = LEFTMOUSE,
.value = KM_RELEASE,
.modifier = KM_ANY,
.direction = KM_ANY,
},
VIEWROT_MODAL_SWITCH_ROTATE);
WM_modalkeymap_add_item(keymap,
&(const KeyMapItem_Params){
.type = EVT_LEFTCTRLKEY,
.value = KM_RELEASE,
.modifier = KM_ANY,
.direction = KM_ANY,
},
VIEWROT_MODAL_SWITCH_ROTATE);
WM_modalkeymap_add_item(keymap,
&(const KeyMapItem_Params){
.type = EVT_LEFTSHIFTKEY,
.value = KM_PRESS,
.modifier = KM_ANY,
.direction = KM_ANY,
},
VIEWROT_MODAL_SWITCH_MOVE);
#endif
/* assign map to operators */
WM_modalkeymap_assign(keymap, "VIEW3D_OT_dolly");
}
@ -167,7 +140,7 @@ static int viewdolly_modal(bContext *C, wmOperator *op, const wmEvent *event)
event_code = VIEW_CONFIRM;
}
}
else if (ELEM(event->type, EVT_ESCKEY, RIGHTMOUSE)) {
else if (event->type == EVT_ESCKEY) {
if (event->val == KM_PRESS) {
event_code = VIEW_CANCEL;
}

View File

@ -27,6 +27,7 @@
void viewmove_modal_keymap(wmKeyConfig *keyconf)
{
static const EnumPropertyItem modal_items[] = {
{VIEW_MODAL_CANCEL, "CANCEL", 0, "Cancel", ""},
{VIEW_MODAL_CONFIRM, "CONFIRM", 0, "Confirm", ""},
{VIEWROT_MODAL_SWITCH_ZOOM, "SWITCH_TO_ZOOM", 0, "Switch to Zoom"},
@ -44,53 +45,6 @@ void viewmove_modal_keymap(wmKeyConfig *keyconf)
keymap = WM_modalkeymap_ensure(keyconf, "View3D Move Modal", modal_items);
/* items for modal map */
WM_modalkeymap_add_item(keymap,
&(const KeyMapItem_Params){
.type = MIDDLEMOUSE,
.value = KM_RELEASE,
.modifier = KM_ANY,
.direction = KM_ANY,
},
VIEW_MODAL_CONFIRM);
WM_modalkeymap_add_item(keymap,
&(const KeyMapItem_Params){
.type = EVT_ESCKEY,
.value = KM_PRESS,
.modifier = KM_ANY,
.direction = KM_ANY,
},
VIEW_MODAL_CONFIRM);
/* disabled mode switching for now, can re-implement better, later on */
#if 0
WM_modalkeymap_add_item(keymap,
&(const KeyMapItem_Params){
.type = LEFTMOUSE,
.value = KM_PRESS,
.modifier = KM_ANY,
.direction = KM_ANY,
},
VIEWROT_MODAL_SWITCH_ZOOM);
WM_modalkeymap_add_item(keymap,
&(const KeyMapItem_Params){
.type = EVT_LEFTCTRLKEY,
.value = KM_PRESS,
.modifier = KM_ANY,
.direction = KM_ANY,
},
VIEWROT_MODAL_SWITCH_ZOOM);
WM_modalkeymap_add_item(keymap,
&(const KeyMapItem_Params){
.type = EVT_LEFTSHIFTKEY,
.value = KM_RELEASE,
.modifier = KM_ANY,
.direction = KM_ANY,
},
VIEWROT_MODAL_SWITCH_ROTATE);
#endif
/* assign map to operators */
WM_modalkeymap_assign(keymap, "VIEW3D_OT_move");
}
@ -109,6 +63,9 @@ static int viewmove_modal(bContext *C, wmOperator *op, const wmEvent *event)
case VIEW_MODAL_CONFIRM:
event_code = VIEW_CONFIRM;
break;
case VIEW_MODAL_CANCEL:
event_code = VIEW_CANCEL;
break;
case VIEWROT_MODAL_SWITCH_ZOOM:
WM_operator_name_call(C, "VIEW3D_OT_zoom", WM_OP_INVOKE_DEFAULT, NULL, event);
event_code = VIEW_CONFIRM;
@ -128,7 +85,7 @@ static int viewmove_modal(bContext *C, wmOperator *op, const wmEvent *event)
event_code = VIEW_CONFIRM;
}
}
else if (ELEM(event->type, EVT_ESCKEY, RIGHTMOUSE)) {
else if (event->type == EVT_ESCKEY) {
if (event->val == KM_PRESS) {
event_code = VIEW_CANCEL;
}

View File

@ -93,6 +93,9 @@ static int viewroll_modal(bContext *C, wmOperator *op, const wmEvent *event)
case VIEW_MODAL_CONFIRM:
event_code = VIEW_CONFIRM;
break;
case VIEW_MODAL_CANCEL:
event_code = VIEW_CANCEL;
break;
case VIEWROT_MODAL_SWITCH_MOVE:
WM_operator_name_call(C, "VIEW3D_OT_move", WM_OP_INVOKE_DEFAULT, NULL, event);
event_code = VIEW_CONFIRM;
@ -114,7 +117,7 @@ static int viewroll_modal(bContext *C, wmOperator *op, const wmEvent *event)
event_code = VIEW_CONFIRM;
}
}
else if (ELEM(event->type, EVT_ESCKEY, RIGHTMOUSE)) {
else if (event->type == EVT_ESCKEY) {
if (event->val == KM_PRESS) {
event_code = VIEW_CANCEL;
}

View File

@ -24,6 +24,7 @@
void viewrotate_modal_keymap(wmKeyConfig *keyconf)
{
static const EnumPropertyItem modal_items[] = {
{VIEW_MODAL_CANCEL, "CANCEL", 0, "Cancel", ""},
{VIEW_MODAL_CONFIRM, "CONFIRM", 0, "Confirm", ""},
{VIEWROT_MODAL_AXIS_SNAP_ENABLE, "AXIS_SNAP_ENABLE", 0, "Axis Snap", ""},
@ -44,34 +45,6 @@ void viewrotate_modal_keymap(wmKeyConfig *keyconf)
keymap = WM_modalkeymap_ensure(keyconf, "View3D Rotate Modal", modal_items);
/* disabled mode switching for now, can re-implement better, later on */
#if 0
WM_modalkeymap_add_item(keymap,
&(const KeyMapItem_Params){
.type = LEFTMOUSE,
.value = KM_PRESS,
.modifier = KM_ANY,
.direction = KM_ANY,
},
VIEWROT_MODAL_SWITCH_ZOOM);
WM_modalkeymap_add_item(keymap,
&(const KeyMapItem_Params){
.type = EVT_LEFTCTRLKEY,
.value = KM_PRESS,
.modifier = KM_ANY,
.direction = KM_ANY,
},
VIEWROT_MODAL_SWITCH_ZOOM);
WM_modalkeymap_add_item(keymap,
&(const KeyMapItem_Params){
.type = EVT_LEFTSHIFTKEY,
.value = KM_PRESS,
.modifier = KM_ANY,
.direction = KM_ANY,
},
VIEWROT_MODAL_SWITCH_MOVE);
#endif
/* assign map to operators */
WM_modalkeymap_assign(keymap, "VIEW3D_OT_rotate");
}
@ -333,6 +306,9 @@ static int viewrotate_modal(bContext *C, wmOperator *op, const wmEvent *event)
case VIEW_MODAL_CONFIRM:
event_code = VIEW_CONFIRM;
break;
case VIEW_MODAL_CANCEL:
event_code = VIEW_CANCEL;
break;
case VIEWROT_MODAL_AXIS_SNAP_ENABLE:
vod->axis_snap = true;
event_code = VIEW_APPLY;
@ -361,7 +337,7 @@ static int viewrotate_modal(bContext *C, wmOperator *op, const wmEvent *event)
event_code = VIEW_CONFIRM;
}
}
else if (ELEM(event->type, EVT_ESCKEY, RIGHTMOUSE)) {
else if (event->type == EVT_ESCKEY) {
if (event->val == KM_PRESS) {
event_code = VIEW_CANCEL;
}

View File

@ -31,6 +31,7 @@
void viewzoom_modal_keymap(wmKeyConfig *keyconf)
{
static const EnumPropertyItem modal_items[] = {
{VIEW_MODAL_CANCEL, "CANCEL", 0, "Cancel", ""},
{VIEW_MODAL_CONFIRM, "CONFIRM", 0, "Confirm", ""},
{VIEWROT_MODAL_SWITCH_ROTATE, "SWITCH_TO_ROTATE", 0, "Switch to Rotate"},
@ -48,34 +49,6 @@ void viewzoom_modal_keymap(wmKeyConfig *keyconf)
keymap = WM_modalkeymap_ensure(keyconf, "View3D Zoom Modal", modal_items);
/* disabled mode switching for now, can re-implement better, later on */
#if 0
WM_modalkeymap_add_item(keymap,
&(const KeyMapItem_Params){
.type = LEFTMOUSE,
.value = KM_RELEASE,
.modifier = KM_ANY,
.direction = KM_ANY,
},
VIEWROT_MODAL_SWITCH_ROTATE);
WM_modalkeymap_add_item(keymap,
&(const KeyMapItem_Params){
.type = EVT_LEFTCTRLKEY,
.value = KM_RELEASE,
.modifier = KM_ANY,
.direction = KM_ANY,
},
VIEWROT_MODAL_SWITCH_ROTATE);
WM_modalkeymap_add_item(keymap,
&(const KeyMapItem_Params){
.type = EVT_LEFTSHIFTKEY,
.value = KM_PRESS,
.modifier = KM_ANY,
.direction = KM_ANY,
},
VIEWROT_MODAL_SWITCH_MOVE);
#endif
/* assign map to operators */
WM_modalkeymap_assign(keymap, "VIEW3D_OT_zoom");
}
@ -383,6 +356,9 @@ static int viewzoom_modal(bContext *C, wmOperator *op, const wmEvent *event)
case VIEW_MODAL_CONFIRM:
event_code = VIEW_CONFIRM;
break;
case VIEW_MODAL_CANCEL:
event_code = VIEW_CANCEL;
break;
case VIEWROT_MODAL_SWITCH_MOVE:
WM_operator_name_call(C, "VIEW3D_OT_move", WM_OP_INVOKE_DEFAULT, NULL, event);
event_code = VIEW_CONFIRM;
@ -408,7 +384,7 @@ static int viewzoom_modal(bContext *C, wmOperator *op, const wmEvent *event)
event_code = VIEW_CONFIRM;
}
}
else if (ELEM(event->type, EVT_ESCKEY, RIGHTMOUSE)) {
else if (event->type == EVT_ESCKEY) {
if (event->val == KM_PRESS) {
event_code = VIEW_CANCEL;
}

View File

@ -47,8 +47,6 @@ VolumeGrid *fog_volume_grid_add_from_mesh(Volume *volume,
const Mesh *mesh,
const float4x4 &mesh_to_volume_space_transform,
float voxel_size,
bool fill_volume,
float exterior_band_width,
float interior_band_width,
float density);
/**

View File

@ -10,8 +10,10 @@
#include "GEO_mesh_to_volume.hh"
#ifdef WITH_OPENVDB
# include <algorithm>
# include <openvdb/openvdb.h>
# include <openvdb/tools/GridTransformer.h>
# include <openvdb/tools/LevelSetUtil.h>
# include <openvdb/tools/VolumeToMesh.h>
namespace blender::geometry {
@ -88,26 +90,28 @@ float volume_compute_voxel_size(const Depsgraph *depsgraph,
float3 bb_max;
bounds_fn(bb_min, bb_max);
/* Compute the voxel size based on the desired number of voxels and the approximated bounding
* box of the volume. */
/* Compute the diagonal of the bounding box. This is used because
* it will always be bigger than the widest side of the mesh. */
const float diagonal = math::distance(math::transform_point(transform, bb_max),
math::transform_point(transform, bb_min));
const float approximate_volume_side_length = diagonal + exterior_band_width * 2.0f;
const float voxel_size = approximate_volume_side_length / res.settings.voxel_amount /
volume_simplify;
return voxel_size;
/* To get the approximate size per voxel, first subtract the exterior band from the requested
* voxel amount, then divide the diagonal with this value if it's bigger than 1. */
const float voxel_size =
(diagonal / std::max(1.0f, float(res.settings.voxel_amount) - 2.0f * exterior_band_width));
/* Return the simplified voxel size. */
return voxel_size / volume_simplify;
}
static openvdb::FloatGrid::Ptr mesh_to_fog_volume_grid(
const Mesh *mesh,
const float4x4 &mesh_to_volume_space_transform,
const float voxel_size,
const bool fill_volume,
const float exterior_band_width,
const float interior_band_width,
const float density)
{
if (voxel_size == 0.0f) {
if (voxel_size < 1e-5f || interior_band_width <= 0.0f) {
return nullptr;
}
@ -117,22 +121,21 @@ static openvdb::FloatGrid::Ptr mesh_to_fog_volume_grid(
mesh_to_index_space_transform.location() -= 0.5f;
OpenVDBMeshAdapter mesh_adapter{*mesh, mesh_to_index_space_transform};
const float interior = std::max(1.0f, interior_band_width / voxel_size);
/* Convert the bandwidths from object in index space. */
const float exterior = MAX2(0.001f, exterior_band_width / voxel_size);
const float interior = MAX2(0.001f, interior_band_width / voxel_size);
/* Setting the interior bandwidth to FLT_MAX, will make it fill the entire volume. */
openvdb::math::Transform::Ptr transform = openvdb::math::Transform::createLinearTransform(
voxel_size);
openvdb::FloatGrid::Ptr new_grid = openvdb::tools::meshToVolume<openvdb::FloatGrid>(
mesh_adapter, {}, exterior, fill_volume ? FLT_MAX : interior);
mesh_adapter, *transform, 1.0f, interior);
/* Give each grid cell a fixed density for now. */
openvdb::tools::foreach (
new_grid->beginValueOn(),
[density](const openvdb::FloatGrid::ValueOnIter &iter) { iter.setValue(density); });
new_grid->setGridClass(openvdb::GRID_FOG_VOLUME);
openvdb::tools::sdfToFogVolume(*new_grid);
if (density != 1.0f) {
openvdb::tools::foreach (new_grid->beginValueOn(),
[&](const openvdb::FloatGrid::ValueOnIter &iter) {
iter.modifyValue([&](float &value) { value *= density; });
});
}
return new_grid;
}
@ -180,31 +183,12 @@ VolumeGrid *fog_volume_grid_add_from_mesh(Volume *volume,
const Mesh *mesh,
const float4x4 &mesh_to_volume_space_transform,
const float voxel_size,
const bool fill_volume,
const float exterior_band_width,
const float interior_band_width,
const float density)
{
VolumeGrid *c_grid = BKE_volume_grid_add(volume, name.c_str(), VOLUME_GRID_FLOAT);
openvdb::FloatGrid::Ptr grid = openvdb::gridPtrCast<openvdb::FloatGrid>(
BKE_volume_grid_openvdb_for_write(volume, c_grid, false));
/* Generate grid from mesh */
openvdb::FloatGrid::Ptr mesh_grid = mesh_to_fog_volume_grid(mesh,
mesh_to_volume_space_transform,
voxel_size,
fill_volume,
exterior_band_width,
interior_band_width,
density);
if (mesh_grid != nullptr) {
/* Merge the generated grid. Should be cheap because grid has just been created. */
grid->merge(*mesh_grid);
/* Change transform so that the index space is correctly transformed to object space. */
grid->transform().postScale(voxel_size);
}
return c_grid;
openvdb::FloatGrid::Ptr mesh_grid = mesh_to_fog_volume_grid(
mesh, mesh_to_volume_space_transform, voxel_size, interior_band_width, density);
return mesh_grid ? BKE_volume_grid_add_vdb(*volume, name, std::move(mesh_grid)) : nullptr;
}
VolumeGrid *sdf_volume_grid_add_from_mesh(Volume *volume,

View File

@ -313,7 +313,6 @@ UVPackIsland_Params::UVPackIsland_Params()
/* Compact representation for AABB packers. */
class UVAABBIsland {
public:
uv_phi phi;
float2 uv_diagonal;
int64_t index;
float aspect_y;
@ -330,8 +329,10 @@ class UVAABBIsland {
* Technically, the algorithm here is only `O(n)`, In practice, to get reasonable results,
* the input must be pre-sorted, which costs an additional `O(nlogn)` time complexity.
*/
static void pack_islands_alpaca_turbo(const Span<UVAABBIsland *> islands,
static void pack_islands_alpaca_turbo(const int64_t start_index,
const Span<UVAABBIsland *> islands,
const float target_aspect_y,
MutableSpan<uv_phi> r_phis,
float *r_max_u,
float *r_max_v)
{
@ -344,7 +345,8 @@ static void pack_islands_alpaca_turbo(const Span<UVAABBIsland *> islands,
float v0 = zigzag ? 0.0f : next_v1;
/* Visit every island in order. */
for (UVAABBIsland *island : islands) {
for (int64_t index = start_index; index < islands.size(); index++) {
UVAABBIsland *island = islands[index];
const float dsm_u = island->uv_diagonal.x;
const float dsm_v = island->uv_diagonal.y;
@ -363,8 +365,10 @@ static void pack_islands_alpaca_turbo(const Span<UVAABBIsland *> islands,
}
/* Place the island. */
island->phi.translation.x = u0 + dsm_u * 0.5f;
island->phi.translation.y = v0 + dsm_v * 0.5f;
uv_phi &phi = r_phis[island->index];
phi.rotation = 0.0f;
phi.translation.x = u0 + dsm_u * 0.5f;
phi.translation.y = v0 + dsm_v * 0.5f;
if (zigzag) {
/* Move upwards. */
v0 += dsm_v;
@ -437,8 +441,10 @@ static void update_hole_rotate(float2 &hole,
* Also adds the concept of a "Hole", which is unused space that can be filled.
* Tracking the "Hole" has a slight performance cost, while improving packing efficiency.
*/
static void pack_islands_alpaca_rotate(const Span<UVAABBIsland *> islands,
static void pack_islands_alpaca_rotate(const int64_t start_index,
const Span<UVAABBIsland *> islands,
const float target_aspect_y,
MutableSpan<uv_phi> r_phis,
float *r_max_u,
float *r_max_v)
{
@ -456,7 +462,9 @@ static void pack_islands_alpaca_rotate(const Span<UVAABBIsland *> islands,
float v0 = zigzag ? 0.0f : next_v1;
/* Visit every island in order. */
for (UVAABBIsland *island : islands) {
for (int64_t index = start_index; index < islands.size(); index++) {
UVAABBIsland *island = islands[index];
uv_phi &phi = r_phis[island->index];
const float uvdiag_x = island->uv_diagonal.x * island->aspect_y;
float min_dsm = std::min(uvdiag_x, island->uv_diagonal.y);
float max_dsm = std::max(uvdiag_x, island->uv_diagonal.y);
@ -464,14 +472,14 @@ static void pack_islands_alpaca_rotate(const Span<UVAABBIsland *> islands,
if (min_dsm < hole_diagonal.x && max_dsm < hole_diagonal.y) {
/* Place island in the hole. */
if (hole_rotate == (min_dsm == island->uv_diagonal.x)) {
island->phi.rotation = DEG2RADF(90.0f);
island->phi.translation.x = hole[0] + island->uv_diagonal.y * 0.5f / island->aspect_y;
island->phi.translation.y = hole[1] + island->uv_diagonal.x * 0.5f * island->aspect_y;
phi.rotation = DEG2RADF(90.0f);
phi.translation.x = hole[0] + island->uv_diagonal.y * 0.5f / island->aspect_y;
phi.translation.y = hole[1] + island->uv_diagonal.x * 0.5f * island->aspect_y;
}
else {
island->phi.rotation = 0.0f;
island->phi.translation.x = hole[0] + island->uv_diagonal.x * 0.5f;
island->phi.translation.y = hole[1] + island->uv_diagonal.y * 0.5f;
phi.rotation = 0.0f;
phi.translation.x = hole[0] + island->uv_diagonal.x * 0.5f;
phi.translation.y = hole[1] + island->uv_diagonal.y * 0.5f;
}
/* Update space left in the hole. */
@ -507,14 +515,14 @@ static void pack_islands_alpaca_rotate(const Span<UVAABBIsland *> islands,
/* Place the island. */
if (zigzag == (min_dsm == uvdiag_x)) {
island->phi.rotation = DEG2RADF(90.0f);
island->phi.translation.x = u0 + island->uv_diagonal.y * 0.5f / island->aspect_y;
island->phi.translation.y = v0 + island->uv_diagonal.x * 0.5f * island->aspect_y;
phi.rotation = DEG2RADF(90.0f);
phi.translation.x = u0 + island->uv_diagonal.y * 0.5f / island->aspect_y;
phi.translation.y = v0 + island->uv_diagonal.x * 0.5f * island->aspect_y;
}
else {
island->phi.rotation = 0.0f;
island->phi.translation.x = u0 + island->uv_diagonal.x * 0.5f;
island->phi.translation.y = v0 + island->uv_diagonal.y * 0.5f;
phi.rotation = 0.0f;
phi.translation.x = u0 + island->uv_diagonal.x * 0.5f;
phi.translation.y = v0 + island->uv_diagonal.y * 0.5f;
}
/* Move according to the "Alpaca rules", with rotation. */
@ -543,6 +551,7 @@ static void pack_island_box_pack_2d(const Span<UVAABBIsland *> aabbs,
const float scale,
const float margin,
const float target_aspect_y,
MutableSpan<uv_phi> r_phis,
float *r_max_u,
float *r_max_v)
{
@ -567,13 +576,11 @@ static void pack_island_box_pack_2d(const Span<UVAABBIsland *> aabbs,
/* Write back box_pack UVs. */
for (const int64_t i : aabbs.index_range()) {
PackIsland *island = islands[aabbs[i]->index];
BoxPack *box = box_array + i;
uv_phi phi;
uv_phi &phi = *(uv_phi *)&r_phis[aabbs[i]->index];
phi.rotation = 0.0f; /* #BLI_box_pack_2d never rotates. */
phi.translation.x = (box->x + box->w * 0.5f) * target_aspect_y;
phi.translation.y = (box->y + box->h * 0.5f);
island->place_(scale, phi);
}
/* Housekeeping. */
@ -868,6 +875,7 @@ static void pack_island_xatlas(const Span<UVAABBIsland *> island_indices,
const float scale,
const float margin,
const UVPackIsland_Params &params,
MutableSpan<uv_phi> r_phis,
float *r_max_u,
float *r_max_v)
{
@ -875,7 +883,6 @@ static void pack_island_xatlas(const Span<UVAABBIsland *> island_indices,
float max_u = 0.0f;
float max_v = 0.0f;
blender::Array<uv_phi> phis(island_indices.size());
int scan_line = 0;
int i = 0;
@ -925,14 +932,13 @@ static void pack_island_xatlas(const Span<UVAABBIsland *> island_indices,
/* Redraw already placed islands. (Greedy.) */
for (int j = 0; j < i; j++) {
occupancy.trace_island(islands[island_indices[j]->index], phis[j], scale, margin, true);
occupancy.trace_island(islands[island_indices[j]->index], r_phis[j], scale, margin, true);
}
continue;
}
/* Place island. */
phis[i] = phi;
island->place_(scale, phi);
r_phis[island_indices[i]->index] = phi;
occupancy.trace_island(island, phi, scale, margin, true);
i++; /* Next island. */
@ -980,9 +986,10 @@ static float pack_islands_scale_margin(const Span<PackIsland *> islands,
* - Sort islands in size order.
* - Call #BLI_box_pack_2d on the first `alpaca_cutoff` islands.
* - Call #pack_islands_alpaca_* on the remaining islands.
* - Combine results.
*/
blender::Array<uv_phi> phis(islands.size());
/* First, copy information from our input into the AABB structure. */
Array<UVAABBIsland *> aabbs(islands.size());
for (const int64_t i : islands.index_range()) {
@ -1049,6 +1056,7 @@ static float pack_islands_scale_margin(const Span<PackIsland *> islands,
scale,
margin,
params,
phis.as_mutable_span(),
&max_u,
&max_v);
break;
@ -1058,6 +1066,7 @@ static float pack_islands_scale_margin(const Span<PackIsland *> islands,
scale,
margin,
params.target_aspect_y,
phis.as_mutable_span(),
&max_u,
&max_v);
break;
@ -1067,25 +1076,25 @@ static float pack_islands_scale_margin(const Span<PackIsland *> islands,
/* Call Alpaca. */
if (params.rotate) {
pack_islands_alpaca_rotate(
aabbs.as_mutable_span().drop_front(max_box_pack), params.target_aspect_y, &max_u, &max_v);
pack_islands_alpaca_rotate(max_box_pack,
aabbs.as_mutable_span(),
params.target_aspect_y,
phis.as_mutable_span(),
&max_u,
&max_v);
}
else {
pack_islands_alpaca_turbo(
aabbs.as_mutable_span().drop_front(max_box_pack), params.target_aspect_y, &max_u, &max_v);
pack_islands_alpaca_turbo(max_box_pack,
aabbs.as_mutable_span(),
params.target_aspect_y,
phis.as_mutable_span(),
&max_u,
&max_v);
}
/* Write back Alpaca UVs. */
for (int64_t i = max_box_pack; i < aabbs.size(); i++) {
UVAABBIsland *aabb = aabbs[i];
islands[aabb->index]->place_(scale, aabb->phi);
}
/* Memory management. */
for (int64_t i : aabbs.index_range()) {
UVAABBIsland *aabb = aabbs[i];
aabbs[i] = nullptr;
delete aabb;
/* Write back UVs. */
for (int64_t i = 0; i < aabbs.size(); i++) {
islands[i]->place_(scale, phis[i]);
}
return std::max(max_u / params.target_aspect_y, max_v);

View File

@ -25,9 +25,8 @@ static VmaAllocationCreateFlagBits vma_allocation_flags(GPUUsageType usage)
{
switch (usage) {
case GPU_USAGE_STATIC:
return static_cast<VmaAllocationCreateFlagBits>(
VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT);
case GPU_USAGE_DYNAMIC:
case GPU_USAGE_STREAM:
return static_cast<VmaAllocationCreateFlagBits>(
VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT);
case GPU_USAGE_DEVICE_ONLY:
@ -35,7 +34,6 @@ static VmaAllocationCreateFlagBits vma_allocation_flags(GPUUsageType usage)
VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT |
VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT);
case GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY:
case GPU_USAGE_STREAM:
break;
}
BLI_assert_msg(false, "Unimplemented GPUUsageType");
@ -83,6 +81,10 @@ void VKBuffer::update(const void *data) const
{
BLI_assert_msg(is_mapped(), "Cannot update a non-mapped buffer.");
memcpy(mapped_memory_, data, size_in_bytes_);
VKContext &context = *VKContext::get();
VmaAllocator mem_allocator = context.mem_allocator_get();
vmaFlushAllocation(mem_allocator, allocation_, 0, VK_WHOLE_SIZE);
}
void VKBuffer::clear(VKContext &context, uint32_t clear_value)

View File

@ -63,4 +63,5 @@ class VKBuffer {
bool map(VKContext &context);
void unmap(VKContext &context);
};
} // namespace blender::gpu

View File

@ -9,9 +9,11 @@
#include "vk_buffer.hh"
#include "vk_context.hh"
#include "vk_framebuffer.hh"
#include "vk_index_buffer.hh"
#include "vk_memory.hh"
#include "vk_pipeline.hh"
#include "vk_texture.hh"
#include "vk_vertex_buffer.hh"
#include "BLI_assert.h"
@ -34,30 +36,42 @@ void VKCommandBuffer::init(const VkDevice vk_device,
vk_queue_ = vk_queue;
vk_command_buffer_ = vk_command_buffer;
submission_id_.reset();
state.stage = Stage::Initial;
if (vk_fence_ == VK_NULL_HANDLE) {
VK_ALLOCATION_CALLBACKS;
VkFenceCreateInfo fenceInfo{};
fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
fenceInfo.flags = VK_FENCE_CREATE_SIGNALED_BIT;
vkCreateFence(vk_device_, &fenceInfo, vk_allocation_callbacks, &vk_fence_);
}
else {
vkResetFences(vk_device_, 1, &vk_fence_);
}
}
void VKCommandBuffer::begin_recording()
{
vkWaitForFences(vk_device_, 1, &vk_fence_, VK_TRUE, UINT64_MAX);
vkResetFences(vk_device_, 1, &vk_fence_);
vkResetCommandBuffer(vk_command_buffer_, 0);
if (is_in_stage(Stage::Submitted)) {
vkWaitForFences(vk_device_, 1, &vk_fence_, VK_TRUE, FenceTimeout);
vkResetFences(vk_device_, 1, &vk_fence_);
stage_transfer(Stage::Submitted, Stage::Executed);
}
if (is_in_stage(Stage::Executed)) {
vkResetCommandBuffer(vk_command_buffer_, 0);
stage_transfer(Stage::Executed, Stage::Initial);
}
VkCommandBufferBeginInfo begin_info = {};
begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
vkBeginCommandBuffer(vk_command_buffer_, &begin_info);
stage_transfer(Stage::Initial, Stage::Recording);
}
void VKCommandBuffer::end_recording()
{
ensure_no_active_framebuffer();
vkEndCommandBuffer(vk_command_buffer_);
stage_transfer(Stage::Recording, Stage::BetweenRecordingAndSubmitting);
}
void VKCommandBuffer::bind(const VKPipeline &pipeline, VkPipelineBindPoint bind_point)
@ -74,19 +88,43 @@ void VKCommandBuffer::bind(const VKDescriptorSet &descriptor_set,
vk_command_buffer_, bind_point, vk_pipeline_layout, 0, 1, &vk_descriptor_set, 0, 0);
}
void VKCommandBuffer::begin_render_pass(const VKFrameBuffer &framebuffer)
void VKCommandBuffer::bind(const uint32_t binding,
const VKVertexBuffer &vertex_buffer,
const VkDeviceSize offset)
{
VkRenderPassBeginInfo render_pass_begin_info = {};
render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
render_pass_begin_info.renderPass = framebuffer.vk_render_pass_get();
render_pass_begin_info.framebuffer = framebuffer.vk_framebuffer_get();
render_pass_begin_info.renderArea = framebuffer.vk_render_area_get();
vkCmdBeginRenderPass(vk_command_buffer_, &render_pass_begin_info, VK_SUBPASS_CONTENTS_INLINE);
bind(binding, vertex_buffer.vk_handle(), offset);
}
void VKCommandBuffer::end_render_pass(const VKFrameBuffer & /*framebuffer*/)
void VKCommandBuffer::bind(const uint32_t binding,
const VkBuffer &vk_vertex_buffer,
const VkDeviceSize offset)
{
vkCmdEndRenderPass(vk_command_buffer_);
validate_framebuffer_exists();
ensure_active_framebuffer();
vkCmdBindVertexBuffers(vk_command_buffer_, binding, 1, &vk_vertex_buffer, &offset);
}
void VKCommandBuffer::bind(const VKIndexBuffer &index_buffer, VkIndexType index_type)
{
validate_framebuffer_exists();
ensure_active_framebuffer();
VkBuffer vk_buffer = index_buffer.vk_handle();
vkCmdBindIndexBuffer(vk_command_buffer_, vk_buffer, 0, index_type);
}
void VKCommandBuffer::begin_render_pass(const VKFrameBuffer &framebuffer)
{
validate_framebuffer_not_exists();
state.framebuffer_ = &framebuffer;
}
void VKCommandBuffer::end_render_pass(const VKFrameBuffer &framebuffer)
{
UNUSED_VARS_NDEBUG(framebuffer)
validate_framebuffer_exists();
BLI_assert(state.framebuffer_ == &framebuffer);
ensure_no_active_framebuffer();
state.framebuffer_ = nullptr;
}
void VKCommandBuffer::push_constants(const VKPushConstants &push_constants,
@ -105,6 +143,7 @@ void VKCommandBuffer::push_constants(const VKPushConstants &push_constants,
void VKCommandBuffer::fill(VKBuffer &buffer, uint32_t clear_data)
{
ensure_no_active_framebuffer();
vkCmdFillBuffer(vk_command_buffer_, buffer.vk_handle(), 0, buffer.size_in_bytes(), clear_data);
}
@ -112,6 +151,7 @@ void VKCommandBuffer::copy(VKBuffer &dst_buffer,
VKTexture &src_texture,
Span<VkBufferImageCopy> regions)
{
ensure_no_active_framebuffer();
vkCmdCopyImageToBuffer(vk_command_buffer_,
src_texture.vk_image_handle(),
src_texture.current_layout_get(),
@ -123,6 +163,7 @@ void VKCommandBuffer::copy(VKTexture &dst_texture,
VKBuffer &src_buffer,
Span<VkBufferImageCopy> regions)
{
ensure_no_active_framebuffer();
vkCmdCopyBufferToImage(vk_command_buffer_,
src_buffer.vk_handle(),
dst_texture.vk_image_handle(),
@ -130,12 +171,27 @@ void VKCommandBuffer::copy(VKTexture &dst_texture,
regions.size(),
regions.data());
}
void VKCommandBuffer::blit(VKTexture &dst_texture,
VKTexture &src_buffer,
Span<VkImageBlit> regions)
{
ensure_no_active_framebuffer();
vkCmdBlitImage(vk_command_buffer_,
src_buffer.vk_image_handle(),
src_buffer.current_layout_get(),
dst_texture.vk_image_handle(),
dst_texture.current_layout_get(),
regions.size(),
regions.data(),
VK_FILTER_NEAREST);
}
void VKCommandBuffer::clear(VkImage vk_image,
VkImageLayout vk_image_layout,
const VkClearColorValue &vk_clear_color,
Span<VkImageSubresourceRange> ranges)
{
ensure_no_active_framebuffer();
vkCmdClearColorImage(vk_command_buffer_,
vk_image,
vk_image_layout,
@ -146,13 +202,36 @@ void VKCommandBuffer::clear(VkImage vk_image,
void VKCommandBuffer::clear(Span<VkClearAttachment> attachments, Span<VkClearRect> areas)
{
validate_framebuffer_exists();
ensure_active_framebuffer();
vkCmdClearAttachments(
vk_command_buffer_, attachments.size(), attachments.data(), areas.size(), areas.data());
}
void VKCommandBuffer::draw(int v_first, int v_count, int i_first, int i_count)
{
validate_framebuffer_exists();
ensure_active_framebuffer();
vkCmdDraw(vk_command_buffer_, v_count, i_count, v_first, i_first);
state.draw_counts++;
}
void VKCommandBuffer::draw(
int index_count, int instance_count, int first_index, int vertex_offset, int first_instance)
{
validate_framebuffer_exists();
ensure_active_framebuffer();
vkCmdDrawIndexed(
vk_command_buffer_, index_count, instance_count, first_index, vertex_offset, first_instance);
state.draw_counts++;
}
void VKCommandBuffer::pipeline_barrier(VkPipelineStageFlags source_stages,
VkPipelineStageFlags destination_stages)
{
if (state.framebuffer_) {
ensure_active_framebuffer();
}
vkCmdPipelineBarrier(vk_command_buffer_,
source_stages,
destination_stages,
@ -167,6 +246,7 @@ void VKCommandBuffer::pipeline_barrier(VkPipelineStageFlags source_stages,
void VKCommandBuffer::pipeline_barrier(Span<VkImageMemoryBarrier> image_memory_barriers)
{
ensure_no_active_framebuffer();
vkCmdPipelineBarrier(vk_command_buffer_,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
@ -181,11 +261,13 @@ void VKCommandBuffer::pipeline_barrier(Span<VkImageMemoryBarrier> image_memory_b
void VKCommandBuffer::dispatch(int groups_x_len, int groups_y_len, int groups_z_len)
{
ensure_no_active_framebuffer();
vkCmdDispatch(vk_command_buffer_, groups_x_len, groups_y_len, groups_z_len);
}
void VKCommandBuffer::submit()
{
ensure_no_active_framebuffer();
end_recording();
encode_recorded_commands();
submit_encoded_commands();
@ -208,6 +290,55 @@ void VKCommandBuffer::submit_encoded_commands()
vkQueueSubmit(vk_queue_, 1, &submit_info, vk_fence_);
submission_id_.next();
stage_transfer(Stage::BetweenRecordingAndSubmitting, Stage::Submitted);
}
/* -------------------------------------------------------------------- */
/** \name FrameBuffer/RenderPass state tracking
* \{ */
void VKCommandBuffer::validate_framebuffer_not_exists()
{
BLI_assert_msg(state.framebuffer_ == nullptr && state.framebuffer_active_ == false,
"State error: expected no framebuffer being tracked.");
}
void VKCommandBuffer::validate_framebuffer_exists()
{
BLI_assert_msg(state.framebuffer_, "State error: expected framebuffer being tracked.");
}
void VKCommandBuffer::ensure_no_active_framebuffer()
{
state.checks_++;
if (state.framebuffer_ && state.framebuffer_active_) {
vkCmdEndRenderPass(vk_command_buffer_);
state.framebuffer_active_ = false;
state.switches_++;
}
}
void VKCommandBuffer::ensure_active_framebuffer()
{
BLI_assert(state.framebuffer_);
state.checks_++;
if (!state.framebuffer_active_) {
VkRenderPassBeginInfo render_pass_begin_info = {};
render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
render_pass_begin_info.renderPass = state.framebuffer_->vk_render_pass_get();
render_pass_begin_info.framebuffer = state.framebuffer_->vk_framebuffer_get();
render_pass_begin_info.renderArea = state.framebuffer_->vk_render_area_get();
/* We don't use clear ops, but vulkan wants to have at least one. */
VkClearValue clear_value = {};
render_pass_begin_info.clearValueCount = 1;
render_pass_begin_info.pClearValues = &clear_value;
vkCmdBeginRenderPass(vk_command_buffer_, &render_pass_begin_info, VK_SUBPASS_CONTENTS_INLINE);
state.framebuffer_active_ = true;
state.switches_++;
}
}
/** \} */
} // namespace blender::gpu

View File

@ -16,30 +16,133 @@ namespace blender::gpu {
class VKBuffer;
class VKDescriptorSet;
class VKFrameBuffer;
class VKIndexBuffer;
class VKPipeline;
class VKPushConstants;
class VKTexture;
class VKVertexBuffer;
/** Command buffer to keep track of the life-time of a command buffer. */
class VKCommandBuffer : NonCopyable, NonMovable {
/** None owning handle to the command buffer and device. Handle is owned by `GHOST_ContextVK`. */
/** Not owning handle to the command buffer and device. Handle is owned by `GHOST_ContextVK`. */
VkDevice vk_device_ = VK_NULL_HANDLE;
VkCommandBuffer vk_command_buffer_ = VK_NULL_HANDLE;
VkQueue vk_queue_ = VK_NULL_HANDLE;
/**
* Timeout to use when waiting for fences in nanoseconds.
*
* Currently added as the fence will halt when there are no commands in the command buffer for
* the second time. This should be solved and this timeout should be removed.
*/
static constexpr uint64_t FenceTimeout = UINT64_MAX;
/** Owning handles */
VkFence vk_fence_ = VK_NULL_HANDLE;
VKSubmissionID submission_id_;
private:
enum class Stage {
Initial,
Recording,
BetweenRecordingAndSubmitting,
Submitted,
Executed,
};
/*
* Some vulkan command require an active frame buffer. Others require no active frame-buffer. As
* our current API does not provide a solution for this we need to keep track of the actual state
* and do the changes when recording the next command.
*
* This is a temporary solution to get things rolling.
* TODO: In a future solution we should decide the scope of a command buffer.
*
* - command buffer per draw command.
* - minimize command buffers and track render passes.
* - add custom encoder to also track resource usages.
*
* Currently I expect the custom encoder has to be done eventually. But want to keep postponing
* the custom encoder for now to collect more use cases it should solve. (first pixel drawn on
* screen).
*
* Some command can also be encoded in another way when encoded as a first command. For example
* clearing a frame-buffer textures isn't allowed inside a render pass, but clearing the
* frame-buffer textures via ops is allowed. When clearing a frame-buffer texture directly after
* beginning a render pass could be re-encoded to do this in the same command.
*
* So for now we track the state and temporary switch to another state if the command requires
* it.
*/
struct {
/* Reference to the last_framebuffer where begin_render_pass was called for. */
const VKFrameBuffer *framebuffer_ = nullptr;
/* Is last_framebuffer_ currently bound. Each call should ensure the correct state. */
bool framebuffer_active_ = false;
/* Amount of times a check has been requested. */
uint64_t checks_ = 0;
/* Amount of times a check required to change the render pass. */
uint64_t switches_ = 0;
/* Number of times a vkDraw command has been recorded. */
uint64_t draw_counts = 0;
/**
* Current stage of the command buffer to keep track of inconsistencies & incorrect usage.
*/
Stage stage = Stage::Initial;
} state;
bool is_in_stage(Stage stage)
{
return state.stage == stage;
}
void stage_set(Stage stage)
{
state.stage = stage;
}
std::string to_string(Stage stage)
{
switch (stage) {
case Stage::Initial:
return "INITIAL";
case Stage::Recording:
return "RECORDING";
case Stage::BetweenRecordingAndSubmitting:
return "BEFORE_SUBMIT";
case Stage::Submitted:
return "SUBMITTED";
case Stage::Executed:
return "EXECUTED";
}
return "UNKNOWN";
}
void stage_transfer(Stage stage_from, Stage stage_to)
{
BLI_assert(is_in_stage(stage_from));
#if 0
printf(" *** Transfer stage from %s to %s\n",
to_string(stage_from).c_str(),
to_string(stage_to).c_str());
#endif
stage_set(stage_to);
}
public:
virtual ~VKCommandBuffer();
void init(const VkDevice vk_device, const VkQueue vk_queue, VkCommandBuffer vk_command_buffer);
void begin_recording();
void end_recording();
void bind(const VKPipeline &vk_pipeline, VkPipelineBindPoint bind_point);
void bind(const VKDescriptorSet &descriptor_set,
const VkPipelineLayout vk_pipeline_layout,
VkPipelineBindPoint bind_point);
void bind(const uint32_t binding,
const VKVertexBuffer &vertex_buffer,
const VkDeviceSize offset);
/* Bind the given buffer as a vertex buffer. */
void bind(const uint32_t binding, const VkBuffer &vk_vertex_buffer, const VkDeviceSize offset);
void bind(const VKIndexBuffer &index_buffer, VkIndexType index_type);
void begin_render_pass(const VKFrameBuffer &framebuffer);
void end_render_pass(const VKFrameBuffer &framebuffer);
@ -55,6 +158,7 @@ class VKCommandBuffer : NonCopyable, NonMovable {
/** Copy the contents of a texture MIP level to the dst buffer. */
void copy(VKBuffer &dst_buffer, VKTexture &src_texture, Span<VkBufferImageCopy> regions);
void copy(VKTexture &dst_texture, VKBuffer &src_buffer, Span<VkBufferImageCopy> regions);
void blit(VKTexture &dst_texture, VKTexture &src_texture, Span<VkImageBlit> regions);
void pipeline_barrier(VkPipelineStageFlags source_stages,
VkPipelineStageFlags destination_stages);
void pipeline_barrier(Span<VkImageMemoryBarrier> image_memory_barriers);
@ -72,6 +176,10 @@ class VKCommandBuffer : NonCopyable, NonMovable {
void clear(Span<VkClearAttachment> attachments, Span<VkClearRect> areas);
void fill(VKBuffer &buffer, uint32_t data);
void draw(int v_first, int v_count, int i_first, int i_count);
void draw(
int index_count, int instance_count, int first_index, int vertex_offset, int first_instance);
/**
* Stop recording commands, encode + send the recordings to Vulkan, wait for the until the
* commands have been executed and start the command buffer to accept recordings again.
@ -86,6 +194,30 @@ class VKCommandBuffer : NonCopyable, NonMovable {
private:
void encode_recorded_commands();
void submit_encoded_commands();
/**
* Validate that there isn't a framebuffer being tracked (bound or not bound).
*
* Raises an assert in debug when a framebuffer is being tracked.
*/
void validate_framebuffer_not_exists();
/**
* Validate that there is a framebuffer being tracked (bound or not bound).
*
* Raises an assert in debug when no framebuffer is being tracked.
*/
void validate_framebuffer_exists();
/**
* Ensure that there is no framebuffer being tracked or the tracked framebuffer isn't bound.
*/
void ensure_no_active_framebuffer();
/**
* Ensure that the tracked framebuffer is bound.
*/
void ensure_active_framebuffer();
};
} // namespace blender::gpu

Some files were not shown because too many files have changed in this diff Show More