WIP: Linux Support for HIP-RT #121050

Draft
Sahar A. Kashi wants to merge 5 commits from salipour/AMD_HIPRT:HIPRT_OPEN_SOURCE into main

When changing the target branch, be careful to rebase the branch in your fork to match. See documentation.
23 changed files with 409 additions and 119 deletions

View File

@ -694,11 +694,9 @@ if(NOT APPLE AND NOT (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64"))
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
# HIPRT is only available on Windows for now.
if(WIN32)
option(WITH_CYCLES_DEVICE_HIPRT "Enable Cycles AMD HIPRT support" OFF)
mark_as_advanced(WITH_CYCLES_DEVICE_HIPRT)
endif()
option(WITH_CYCLES_DEVICE_HIPRT "Enable Cycles AMD HIPRT support" OFF)

This should still default to OFF.

It's build_files/cmake/config/blender_release.cmake that enables it by default for releases. Though we should still not enable it for Linux there, until after the precompiled libraries for that platform have landed.

This should still default to OFF. It's `build_files/cmake/config/blender_release.cmake` that enables it by default for releases. Though we should still not enable it for Linux there, until after the precompiled libraries for that platform have landed.
mark_as_advanced(WITH_CYCLES_DEVICE_HIPRT)
endif()
# Apple Metal
@ -2475,8 +2473,6 @@ if(FIRST_RUN)
info_cfg_option(WITH_CYCLES_ONEAPI_BINARIES)
info_cfg_option(WITH_CYCLES_DEVICE_HIP)
info_cfg_option(WITH_CYCLES_HIP_BINARIES)
endif()
if(WIN32)
info_cfg_option(WITH_CYCLES_DEVICE_HIPRT)
endif()
endif()

View File

@ -178,3 +178,4 @@ download_source(VULKAN_HEADERS)
download_source(VULKAN_LOADER)
download_source(PYBIND11)
download_source(DEFLATE)
download_source(HIPRT)

View File

@ -158,6 +158,9 @@ else()
if(NOT APPLE)
harvest(level-zero/include/level_zero level-zero/include/level_zero "*.h")
harvest(level-zero/lib level-zero/lib "*${SHAREDLIBEXT}*")
harvest(hiprt/hiprt hiprt/hiprt "*.h")
harvest(hiprt/hiprt/impl hiprt/hiprt/impl "*.h")
harvest(hiprt/dist/bin/Release hiprt/bin "*${SHAREDLIBEXT}*")
endif()
harvest(llvm/bin llvm/bin "clang-format")
if(BUILD_CLANG_TOOLS)

View File

@ -0,0 +1,62 @@
# SPDX-FileCopyrightText: 2017-2024 Blender Authors
#
# SPDX-License-Identifier: GPL-2.0-or-later
# Note the utility apps may use png/tiff/gif system libraries, but the
# library itself does not depend on them, so should give no problems.
set(HIPRT_CMAKE_FLAGS ${DEFAULT_CMAKE_FLAGS})
set(HIPRT_EXTRA_ARGS
-DHIPRT_EXPORTS=ON

Should be -D instead of --D?

Should be `-D` instead of `--D`?
-D__USE_HIP__=ON
-DHIPRT_BITCODE_LINKING=ON
-DHIPRT_LOAD_FROM_STRING=OFF
-DORO_PRECOMPILED=ON
)
if(WIN32)
set(HIPRT_EXTRA_ARGS
${HIPRT_EXTRA_ARGS}
-DCMAKE_DEBUG_POSTFIX=_d
)
endif()
ExternalProject_Add(external_hiprt
URL file://${PACKAGE_DIR}/${HIPRT_FILE}
DOWNLOAD_DIR ${DOWNLOAD_DIR}
URL_HASH ${HIPRT_HASH_TYPE}=${HIPRT_HASH}
CMAKE_GENERATOR ${PLATFORM_ALT_GENERATOR}
PREFIX ${BUILD_DIR}/hiprt
CMAKE_ARGS
-DCMAKE_INSTALL_PREFIX=${LIBDIR}/hiprt
${HIPRT_CMAKE_FLAGS}
${HIPRT_EXTRA_ARGS}
INSTALL_DIR ${LIBDIR}/hiprt
)
if(WIN32)
if(BUILD_MODE STREQUAL Release)

For Linux, there will need to be additions in build_files/build_environment/cmake/harvest.cmake to install things.

I guess something like this, not sure if we want to just install all files or only a subset.

  harvest(hiprt/include hiprt/include "*")
  harvest(hiprt/lib hiprt/lib "*")
For Linux, there will need to be additions in `build_files/build_environment/cmake/harvest.cmake` to install things. I guess something like this, not sure if we want to just install all files or only a subset. ``` harvest(hiprt/include hiprt/include "*") harvest(hiprt/lib hiprt/lib "*") ```
ExternalProject_Add_Step(external_hiprt after_install
COMMAND ${CMAKE_COMMAND} -E copy_directory
${LIBDIR}/hiprt/hiprt
${HARVEST_TARGET}/hiprt/hiprt
COMMAND ${CMAKE_COMMAND} -E copy
${LIBDIR}/hiprt/dist/bin/Release/hiprt*64.dll
${HARVEST_TARGET}/hiprt/bin/hiprt*64.dll
DEPENDEES install
)
else()
ExternalProject_Add_Step(external_hiprt after_install
COMMAND ${CMAKE_COMMAND} -E copy
${LIBDIR}/hiprt/dist/bin/Debug/hiprt*64D.dll
${HARVEST_TARGET}/hiprt/bin/hiprt*64D.dll

I think hiprt64_d.dll should be used here.

I think `hiprt64_d.dll` should be used here.
DEPENDEES install
)
endif()
endif()

View File

@ -855,3 +855,9 @@ set(PYBIND11_URI https://github.com/pybind/pybind11/archive/refs/tags/v${PYBIND1
set(PYBIND11_HASH ce07bfd5089245da7807b3faf6cbc878)
set(PYBIND11_HASH_TYPE MD5)
set(PYBIND11_FILE pybind-v${PYBIND11_VERSION}.tar.gz)
set(HIPRT_VERSION 2.3.7df94af)

This version doesn't seem to exist?
https://github.com/GPUOpen-LibrariesAndSDKs/HIPRTSDK/releases

Not sure if we need to wait for it to be released, or if another version should be used.

This version doesn't seem to exist? https://github.com/GPUOpen-LibrariesAndSDKs/HIPRTSDK/releases Not sure if we need to wait for it to be released, or if another version should be used.
Review
URI is wrong, it's over at https://github.com/GPUOpen-LibrariesAndSDKs/HIPRT/releases/tag/2.3.7df94af

Ah ok, so it's:

set(HIPRT_URI https://github.com/GPUOpen-LibrariesAndSDKs/HIPRT/archive/refs/tags/${HIPRT_VERSION}.zip)
set(HIPRT_HASH eeb4053fd7e5ada2e2dff838dff41ca1)
Ah ok, so it's: ``` set(HIPRT_URI https://github.com/GPUOpen-LibrariesAndSDKs/HIPRT/archive/refs/tags/${HIPRT_VERSION}.zip) set(HIPRT_HASH eeb4053fd7e5ada2e2dff838dff41ca1) ```
Review

probably? I haven't gotten around to building this yet i'll try to schedule some time for this over the weekend.

probably? I haven't gotten around to building this yet i'll try to schedule some time for this over the weekend.

Ok, thanks. For reference, this PR is heavily WIP and will need many more changes to get it to actually build.

I asked to just add the code so we get an idea of how things should fit together, what the build options should be, etc. But not to actually test if it works, since it's not so easy to build the libs on Windows.

Ok, thanks. For reference, this PR is heavily WIP and will need many more changes to get it to actually build. I asked to just add the code so we get an idea of how things should fit together, what the build options should be, etc. But not to actually test if it works, since it's not so easy to build the libs on Windows.
set(HIPRT_URI https://github.com/GPUOpen-LibrariesAndSDKs/HIPRTSDK/releases/tag/${HIPRT_VERSION}.zip)
set(HIPRT_HASH 813aa5ed29e24693f8d1edf9bc13b38cad5d2928)
set(HIPRT_HASH_TYPE MD5)
set(HIPRT_FILE hiprt-${HIPRT_VERSION}.zip)

View File

@ -93,6 +93,7 @@ if(HIP_HIPCC_EXECUTABLE)
# Construct full semantic version.
set(HIP_VERSION "${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}.${HIP_VERSION_PATCH}")
set(HIP_VERSION_SHORT "${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}")
unset(_hip_version_raw)
unset(_hipcc_executable)
endif()

View File

@ -4,7 +4,7 @@
# Find HIPRT SDK. This module defines:
# HIPRT_INCLUDE_DIR, path to HIPRT include directory
# HIPRT_BITCODE, bitcode file with ray-tracing functionality
# HIPRT_DYNAMIC_LIB, dynamic library with ray-tracing functionality
# HIPRT_FOUND, if SDK found
if(NOT (DEFINED HIPRT_ROOT_DIR))
@ -22,8 +22,8 @@ elseif(DEFINED ENV{HIP_PATH})
endif()
set(_hiprt_SEARCH_DIRS
${HIPRT_ROOT_DIR}
)
${HIPRT_ROOT_DIR}
/opt/lib/hiprt)
find_path(HIPRT_INCLUDE_DIR
NAMES
@ -38,21 +38,33 @@ if(HIPRT_INCLUDE_DIR)
REGEX "^#define HIPRT_VERSION_STR[ \t]\".*\"$")
string(REGEX MATCHALL "[0-9]+[.0-9]+" _hiprt_version ${_hiprt_version})
find_file(HIPRT_BITCODE
NAMES
hiprt${_hiprt_version}_amd_lib_win.bc
HINTS
${HIPRT_ROOT_DIR}/bin
${HIPRT_ROOT_DIR}/dist/bin/Release
NO_DEFAULT_PATH
)
set(HIPRT_VERSION ${_hiprt_version})
endif()
unset(_hiprt_version)
if(WIN32)
set(HIPRT_DYNAMIC_LIB hiprt${HIPRT_VERSION}64.dll)
else()
set(HIPRT_DYNAMIC_LIB hiprt${HIPRT_VERSION}64.so)

Is ${HIPRT_VERSION} supposed to be in the name? It's not in hiprt.cmake.

Is `${HIPRT_VERSION}` supposed to be in the name? It's not in `hiprt.cmake`.
Review

I updated the name in hiprt.cmake file.

I updated the name in hiprt.cmake file.
endif()
find_path(HIPRT_LIB_DIR
NAMES
${HIPRT_DYNAMIC_LIB}
HINTS
${_hiprt_SEARCH_DIRS}
PATH_SUFFIXES
bin
)
if(HIPRT_LIB_DIR)
set(HIPRT_DYNAMIC_LIB_PATH

This seems to be unused currently. I guess the idea is still to dynamically load the hiprt library rather than Blender linking to it? If so I guess this variable can be removed.

To install the shared libraries along with Blender, this will need to be added in build_files/cmake/platform/platform_unix.cmake, which will copy all .so files into the bundled lib directory.

add_bundled_libraries(hiprt/lib)

For Windows this is done in source/creator/CMakeLists.txt, something like:

 if(WITH_CYCLES_DEVICE_HIPRT)
    windows_install_shared_manifest(
      FILES ${LIBDIR}/hiprt/bin/hiprt64.dll
      ALL
    )
  endif()

Unless there are separate release and debug libraries, in which case:

  if(EXISTS ${LIBDIR}/hiprt/bin/hiprt64.dll)
    windows_install_shared_manifest(
      FILES
        ${LIBDIR}/hiprt/bin/hiprt64.dll
      RELEASE
    )   
    windows_install_shared_manifest(
      FILES
        ${LIBDIR}/hiprt/bin/hiprt64_d.dll
      DEBUG
    )
  endif()
This seems to be unused currently. I guess the idea is still to dynamically load the hiprt library rather than Blender linking to it? If so I guess this variable can be removed. To install the shared libraries along with Blender, this will need to be added in `build_files/cmake/platform/platform_unix.cmake`, which will copy all `.so` files into the bundled lib directory. ``` add_bundled_libraries(hiprt/lib) ``` For Windows this is done in `source/creator/CMakeLists.txt`, something like: ``` if(WITH_CYCLES_DEVICE_HIPRT) windows_install_shared_manifest( FILES ${LIBDIR}/hiprt/bin/hiprt64.dll ALL ) endif() ``` Unless there are separate release and debug libraries, in which case: ``` if(EXISTS ${LIBDIR}/hiprt/bin/hiprt64.dll) windows_install_shared_manifest( FILES ${LIBDIR}/hiprt/bin/hiprt64.dll RELEASE ) windows_install_shared_manifest( FILES ${LIBDIR}/hiprt/bin/hiprt64_d.dll DEBUG ) endif() ```
Review

The main intention was to check if the dynamic library for hiprt exists before enabling it.

The main intention was to check if the dynamic library for hiprt exists before enabling it.
${HIPRT_LIB_DIR}/bin/${HIPRT_DYNAMIC_LIB}})
endif()
unset(_hiprt_version)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(HIPRT DEFAULT_MSG
HIPRT_INCLUDE_DIR HIPRT_BITCODE)
HIPRT_INCLUDE_DIR HIPRT_DYNAMIC_LIB_PATH)
mark_as_advanced(
HIPRT_INCLUDE_DIR

View File

@ -625,6 +625,8 @@ if(DEFINED LIBDIR)
without_system_libs_end()
endif()
add_bundled_libraries(hiprt/bin)
# ----------------------------------------------------------------------------
# Build and Link Flags

View File

@ -20,63 +20,141 @@
#include <hiprt/hiprt_types.h>
#define HIPRT_MAJOR_VERSION 2
#define HIPRT_MINOR_VERSION 0
#define HIPRT_PATCH_VERSION 0xb68861
#define HIPRT_MINOR_VERSION 3
#define HIPRT_PATCH_VERSION 0x7df94af
#define HIPRT_API_VERSION 2000
#define HIPRT_VERSION_STR "02000"
#define HIPRT_API_VERSION 2003
#define HIPRT_VERSION_STR "02003"
#define HIP_VERSION_STR "6.0"
#ifdef _WIN32
#define HIPRTAPI __stdcall
#else
#define HIPRTAPI
#define HIP_CB
#endif
typedef unsigned int hiprtuint32_t;
/* Function types. */
typedef hiprtError(thiprtCreateContext)(hiprtuint32_t hiprtApiVersion,
hiprtContextCreationInput &input,
const hiprtContextCreationInput &input,
hiprtContext *outContext);
typedef hiprtError(thiprtDestroyContext)(hiprtContext context);
typedef hiprtError(thiprtCreateGeometry)(hiprtContext context,
const hiprtGeometryBuildInput *buildInput,
const hiprtBuildOptions *buildOptions,
hiprtGeometry *outGeometry);
const hiprtGeometryBuildInput &buildInput,
const hiprtBuildOptions buildOptions,
hiprtGeometry &outGeometry);
typedef hiprtError(thiprtDestroyGeometry)(hiprtContext context,
hiprtGeometry outGeometry);
typedef hiprtError(thiprtCreateGeometries)(hiprtContext context,
uint32_t numGeometries,
const hiprtGeometryBuildInput *buildInput,
const hiprtBuildOptions buildOptions,
hiprtGeometry **outGeometries);
typedef hiprtError(thiprtDestroyGeometries)(hiprtContext context, uint32_t numGeometries,
hiprtGeometry* outGeometry);
typedef hiprtError(thiprtBuildGeometry)(hiprtContext context,
hiprtBuildOperation buildOperation,
const hiprtGeometryBuildInput &buildInput,
const hiprtBuildOptions buildOptions,
hiprtDevicePtr temporaryBuffer,
hiprtApiStream stream,
hiprtGeometry outGeometry);
typedef hiprtError(thiprtBuildGeometries)(hiprtContext context,
uint32_t numGeometries,
hiprtBuildOperation buildOperation,
const hiprtGeometryBuildInput *buildInput,
const hiprtBuildOptions *buildOptions,
hiprtDevicePtr temporaryBuffer,
hiprtApiStream stream,
hiprtGeometry outGeometry);
hiprtGeometry *outGeometries);
typedef hiprtError(thiprtGetGeometryBuildTemporaryBufferSize)(
hiprtContext context,
const hiprtGeometryBuildInput &buildInput,
const hiprtBuildOptions buildOptions,
size_t &outSize);
typedef hiprtError(thiprtGetGeometriesBuildTemporaryBufferSize)(
hiprtContext context,
uint32_t numGeometries,
const hiprtGeometryBuildInput *buildInput,
const hiprtBuildOptions *buildOptions,
size_t *outSize);
size_t &outSize);
typedef hiprtError(thiprtCompactGeometry)( hiprtContext context, hiprtApiStream stream, hiprtGeometry geometryIn, hiprtGeometry& geometryOut);
typedef hiprtError(thiprtCompactGeometries)(
hiprtContext context,
uint32_t numGeometries,
hiprtApiStream stream,
hiprtGeometry* geometriesIn,
hiprtGeometry** geometriesOut );
typedef hiprtError(thiprtCreateScene)(hiprtContext context,
const hiprtSceneBuildInput &buildInput,
const hiprtBuildOptions buildOptions,
hiprtScene &outScene);
typedef hiprtError(thiprtCreateScenes)(hiprtContext context,
uint32_t numScenes,
const hiprtSceneBuildInput *buildInput,
const hiprtBuildOptions *buildOptions,
hiprtScene *outScene);
const hiprtBuildOptions buildOptions,
hiprtScene **outScene);
typedef hiprtError(thiprtDestroyScene)(hiprtContext context, hiprtScene outScene);
typedef hiprtError(thiprtDestroyScenes)( hiprtContext context, uint32_t numScenes,hiprtScene *scene );
typedef hiprtError(thiprtBuildScene)(hiprtContext context,
hiprtBuildOperation buildOperation,
const hiprtSceneBuildInput &buildInput,
const hiprtBuildOptions buildOptions,
hiprtDevicePtr temporaryBuffer,
hiprtApiStream stream,
hiprtScene outScene);
typedef hiprtError(thiprtBuildScenes)(hiprtContext context,
uint32_t numScenes,
hiprtBuildOperation buildOperation,
const hiprtSceneBuildInput *buildInput,
const hiprtBuildOptions *buildOptions,
hiprtDevicePtr temporaryBuffer,
hiprtApiStream stream,
hiprtScene outScene);
hiprtScene *outScene);
typedef hiprtError(thiprtGetSceneBuildTemporaryBufferSize)(
hiprtContext context,
const hiprtSceneBuildInput &buildInput,
const hiprtBuildOptions buildOptions,
size_t &outSize);
typedef hiprtError(thiprtGetScenesBuildTemporaryBufferSize)(
hiprtContext context,
uint32_t numScenes,
const hiprtSceneBuildInput *buildInput,
const hiprtBuildOptions *buildOptions,
size_t *outSize);
const hiprtBuildOptions buildOptions,
size_t &outSize);
typedef hiprtError(thiprtCompactScene)( hiprtContext context, hiprtApiStream stream, hiprtScene sceneIn, hiprtScene& sceneOut );
typedef hiprtError(thiprtCompactScenes)(
hiprtContext context, uint32_t numScenes, hiprtApiStream stream, hiprtScene* scenesIn, hiprtScene** scenesOut );
typedef hiprtError(thiprtCreateFuncTable)(hiprtContext context,
hiprtuint32_t numGeomTypes,
hiprtuint32_t numRayTypes,
hiprtFuncTable *outFuncTable);
hiprtFuncTable &outFuncTable);
typedef hiprtError(thiprtSetFuncTable)(hiprtContext context,
hiprtFuncTable funcTable,
hiprtuint32_t geomType,
hiprtuint32_t rayType,
hiprtFuncDataSet set);
typedef hiprtError (thiprtCreateGlobalStackBuffer)(hiprtContext context, const hiprtGlobalStackBufferInput& input, hiprtGlobalStackBuffer& stackBufferOut );
typedef hiprtError (thiprtDestroyGlobalStackBuffer)( hiprtContext context, hiprtGlobalStackBuffer stackBuffer );
typedef hiprtError(thiprtDestroyFuncTable)(hiprtContext context,
hiprtFuncTable funcTable);
typedef void(thiprtSetLogLevel)( hiprtLogLevel level );
@ -94,6 +172,8 @@ extern thiprtBuildScene *hiprtBuildScene;
extern thiprtGetSceneBuildTemporaryBufferSize *hiprtGetSceneBuildTemporaryBufferSize;
extern thiprtCreateFuncTable *hiprtCreateFuncTable;
extern thiprtSetFuncTable *hiprtSetFuncTable;
extern thiprtCreateGlobalStackBuffer *hiprtCreateGlobalStackBuffer;
extern thiprtDestroyGlobalStackBuffer *hiprtDestroyGlobalStackBuffer;
extern thiprtDestroyFuncTable *hiprtDestroyFuncTable;
extern thiprtSetLogLevel *hiprtSetLogLevel;

View File

@ -233,7 +233,9 @@ static int hipewHipInit(void) {
/* Library paths. */
#ifdef _WIN32
/* Expected in C:/Windows/System32 or similar, no path needed. */
const char *hip_paths[] = {"amdhip64.dll", "amdhip64_6.dll", NULL};
const char *hip_paths[] = {"amdhip64_6.dll", "amdhip64.dll", NULL};
#elif defined(__APPLE__)
/* Default installation path. */

View File

@ -40,6 +40,8 @@ thiprtBuildScene *hiprtBuildScene;
thiprtGetSceneBuildTemporaryBufferSize *hiprtGetSceneBuildTemporaryBufferSize;
thiprtCreateFuncTable *hiprtCreateFuncTable;
thiprtSetFuncTable *hiprtSetFuncTable;
thiprtCreateGlobalStackBuffer *hiprtCreateGlobalStackBuffer;
thiprtDestroyGlobalStackBuffer *hiprtDestroyGlobalStackBuffer;
thiprtDestroyFuncTable *hiprtDestroyFuncTable;
thiprtSetLogLevel *hiprtSetLogLevel;
@ -89,7 +91,9 @@ bool hiprtewInit()
HIPRT_LIBRARY_FIND(hiprtGetSceneBuildTemporaryBufferSize)
HIPRT_LIBRARY_FIND(hiprtCreateFuncTable)
HIPRT_LIBRARY_FIND(hiprtSetFuncTable)
HIPRT_LIBRARY_FIND(hiprtCreateGlobalStackBuffer)
HIPRT_LIBRARY_FIND(hiprtDestroyFuncTable)
HIPRT_LIBRARY_FIND(hiprtDestroyGlobalStackBuffer)
HIPRT_LIBRARY_FIND(hiprtSetLogLevel)
result = true;

View File

@ -1735,10 +1735,9 @@ class CyclesPreferences(bpy.types.AddonPreferences):
if compute_device_type == 'HIP':
import platform
if platform.system() == "Windows": # HIP-RT is currently only supported on Windows
row = layout.row()
row.active = has_rt_api_support['HIP']
row.prop(self, "use_hiprt")
row = layout.row()
row.active = has_rt_api_support['HIP']
row.prop(self, "use_hiprt")
elif compute_device_type == 'ONEAPI' and _cycles.with_embree_gpu:
row = layout.row()

View File

@ -59,7 +59,6 @@ BVHLayoutMask HIPRTDevice::get_bvh_layout_mask(const uint /* kernel_features */)
HIPRTDevice::HIPRTDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
: HIPDevice(info, stats, profiler),
global_stack_buffer(this, "global_stack_buffer", MEM_DEVICE_ONLY),
hiprt_context(NULL),
scene(NULL),
functions_table(NULL),
@ -77,6 +76,7 @@ HIPRTDevice::HIPRTDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
prim_time_offset(this, "prim_time_offset", MEM_GLOBAL)
{
HIPContextScope scope(this);
global_stack_buffer = {0};
hiprtContextCreationInput hiprt_context_input = {0};
hiprt_context_input.ctxt = hipContext;
hiprt_context_input.device = hipDevice;
@ -90,7 +90,7 @@ HIPRTDevice::HIPRTDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
}
rt_result = hiprtCreateFuncTable(
hiprt_context, Max_Primitive_Type, Max_Intersect_Filter_Function, &functions_table);
hiprt_context, Max_Primitive_Type, Max_Intersect_Filter_Function, functions_table);
if (rt_result != hiprtSuccess) {
set_error(string_printf("Failed to create HIPRT Function Table"));
@ -113,7 +113,8 @@ HIPRTDevice::~HIPRTDevice()
custom_prim_info.free();
prim_time_offset.free();
prims_time.free();
global_stack_buffer.free();
hiprtDestroyGlobalStackBuffer(hiprt_context, global_stack_buffer);
hiprtDestroyFuncTable(hiprt_context, functions_table);
hiprtDestroyScene(hiprt_context, scene);
hiprtDestroyContext(hiprt_context);
@ -458,7 +459,7 @@ hiprtGeometryBuildInput HIPRTDevice::prepare_triangle_blas(BVHHIPRT *bvh, Mesh *
bvh->custom_prim_aabb.aabbs = (void *)bvh->custom_primitive_bound.device_pointer;
geom_input.type = hiprtPrimitiveTypeAABBList;
geom_input.aabbList.primitive = &bvh->custom_prim_aabb;
geom_input.primitive.aabbList.aabbs = &bvh->custom_prim_aabb;
geom_input.geomType = Motion_Triangle;
}
else {
@ -490,7 +491,7 @@ hiprtGeometryBuildInput HIPRTDevice::prepare_triangle_blas(BVHHIPRT *bvh, Mesh *
bvh->vertex_data.host_pointer = 0;
geom_input.type = hiprtPrimitiveTypeTriangleMesh;
geom_input.triangleMesh.primitive = &(bvh->triangle_mesh);
geom_input.primitive.triangleMesh = bvh->triangle_mesh;
}
return geom_input;
@ -613,7 +614,7 @@ hiprtGeometryBuildInput HIPRTDevice::prepare_curve_blas(BVHHIPRT *bvh, Hair *hai
bvh->custom_prim_aabb.aabbs = (void *)bvh->custom_primitive_bound.device_pointer;
geom_input.type = hiprtPrimitiveTypeAABBList;
geom_input.aabbList.primitive = &bvh->custom_prim_aabb;
geom_input.primitive.aabbList.aabbs = &bvh->custom_prim_aabb;
geom_input.geomType = Curve;
return geom_input;
@ -714,7 +715,7 @@ hiprtGeometryBuildInput HIPRTDevice::prepare_point_blas(BVHHIPRT *bvh, PointClou
bvh->custom_prim_aabb.aabbs = (void *)bvh->custom_primitive_bound.device_pointer;
geom_input.type = hiprtPrimitiveTypeAABBList;
geom_input.aabbList.primitive = &bvh->custom_prim_aabb;
geom_input.primitive.aabbList.aabbs = &bvh->custom_prim_aabb;
geom_input.geomType = Point;
return geom_input;
@ -761,13 +762,13 @@ void HIPRTDevice::build_blas(BVHHIPRT *bvh, Geometry *geom, hiprtBuildOptions op
size_t blas_scratch_buffer_size = 0;
hiprtError rt_err = hiprtGetGeometryBuildTemporaryBufferSize(
hiprt_context, &geom_input, &options, &blas_scratch_buffer_size);
hiprt_context, geom_input, options, blas_scratch_buffer_size);
if (rt_err != hiprtSuccess) {
set_error(string_printf("Failed to get scratch buffer size for BLAS!"));
}
rt_err = hiprtCreateGeometry(hiprt_context, &geom_input, &options, &bvh->hiprt_geom);
rt_err = hiprtCreateGeometry(hiprt_context, geom_input, options, bvh->hiprt_geom);
if (rt_err != hiprtSuccess) {
set_error(string_printf("Failed to create BLAS!"));
@ -782,8 +783,8 @@ void HIPRTDevice::build_blas(BVHHIPRT *bvh, Geometry *geom, hiprtBuildOptions op
}
rt_err = hiprtBuildGeometry(hiprt_context,
hiprtBuildOperationBuild,
&bvh->geom_input,
&options,
bvh->geom_input,
options,
(void *)(scratch_buffer.device_pointer),
0,
bvh->hiprt_geom);
@ -800,6 +801,7 @@ hiprtScene HIPRTDevice::build_tlas(BVHHIPRT *bvh,
{
size_t num_object = objects.size();
if (num_object == 0) {
return 0;
}
@ -932,7 +934,8 @@ hiprtScene HIPRTDevice::build_tlas(BVHHIPRT *bvh,
user_instance_id[num_instances] = blender_instance_id;
prim_visibility[num_instances] = mask;
hiprt_blas_ptr[num_instances] = (uint64_t)hiprt_geom_current;
hiprt_blas_ptr[num_instances].geometry = hiprt_geom_current;
hiprt_blas_ptr[num_instances].type = hiprtInstanceTypeGeometry;
num_instances++;
}
blas_ptr[blender_instance_id] = (uint64_t)hiprt_geom_current;
@ -961,13 +964,13 @@ hiprtScene HIPRTDevice::build_tlas(BVHHIPRT *bvh,
}
scene_input_ptr.instanceMasks = (void *)prim_visibility.device_pointer;
scene_input_ptr.instanceGeometries = (void *)hiprt_blas_ptr.device_pointer;
scene_input_ptr.instances = (void *)hiprt_blas_ptr.device_pointer;
scene_input_ptr.instanceTransformHeaders = (void *)transform_headers.device_pointer;
scene_input_ptr.instanceFrames = (void *)instance_transform_matrix.device_pointer;
hiprtScene scene = 0;
hiprtError rt_err = hiprtCreateScene(hiprt_context, &scene_input_ptr, &options, &scene);
hiprtError rt_err = hiprtCreateScene(hiprt_context, scene_input_ptr, options, scene);
if (rt_err != hiprtSuccess) {
set_error(string_printf("Failed to create TLAS"));
@ -975,7 +978,7 @@ hiprtScene HIPRTDevice::build_tlas(BVHHIPRT *bvh,
size_t tlas_scratch_buffer_size;
rt_err = hiprtGetSceneBuildTemporaryBufferSize(
hiprt_context, &scene_input_ptr, &options, &tlas_scratch_buffer_size);
hiprt_context, scene_input_ptr, options, tlas_scratch_buffer_size);
if (rt_err != hiprtSuccess) {
set_error(string_printf("Failed to get scratch buffer size for TLAS"));
@ -988,8 +991,8 @@ hiprtScene HIPRTDevice::build_tlas(BVHHIPRT *bvh,
rt_err = hiprtBuildScene(hiprt_context,
build_operation,
&scene_input_ptr,
&options,
scene_input_ptr,
options,
(void *)scratch_buffer.device_pointer,
0,
scene);

View File

@ -53,7 +53,7 @@ class HIPRTDevice : public HIPDevice {
return hiprt_context;
}
device_vector<int> global_stack_buffer;
hiprtGlobalStackBuffer global_stack_buffer;
protected:
enum Filter_Function { Closest = 0, Shadows, Local, Volume, Max_Intersect_Filter_Function };
@ -111,7 +111,7 @@ class HIPRTDevice : public HIPDevice {
* blas_ptr has all the valid pointers and null pointers and blas for any geometry can be
* directly retrieved from this array (used in subsurface scattering). */
device_vector<int> user_instance_id;
device_vector<uint64_t> hiprt_blas_ptr;
device_vector<hiprtInstance> hiprt_blas_ptr;
device_vector<uint64_t> blas_ptr;
/* custom_prim_info stores custom information for custom primitives for all the primitives in a

View File

@ -34,14 +34,25 @@ bool HIPRTDeviceQueue::enqueue(DeviceKernel kernel,
const HIPContextScope scope(hiprt_device_);
const HIPDeviceKernel &hip_kernel = hiprt_device_->kernels.get(kernel);
if (!hiprt_device_->global_stack_buffer.device_pointer) {
int max_path = num_concurrent_states(0);
hiprt_device_->global_stack_buffer.alloc(max_path * HIPRT_SHARED_STACK_SIZE * sizeof(int));
hiprt_device_->global_stack_buffer.zero_to_device();
if (!hiprt_device_->global_stack_buffer.stackData) {
uint32_t max_path = num_concurrent_states(0);
hiprtGlobalStackBufferInput stack_buffer_input{
hiprtStackTypeGlobal, hiprtStackEntryTypeInteger, HIPRT_THREAD_STACK_SIZE, max_path};
hiprtError rt_result = hiprtCreateGlobalStackBuffer(hiprt_device_->get_hiprt_context(),
stack_buffer_input,
hiprt_device_->global_stack_buffer);
if (rt_result != hiprtSuccess) {
string_printf("Failed to create hiprt Global Stack Buffer");
return false;
}
}
DeviceKernelArguments args_copy = args;
args_copy.add(&hiprt_device_->global_stack_buffer.device_pointer);
args_copy.add(DeviceKernelArguments::HIPRT_GLOBAL_STACK,
(void *)(&hiprt_device_->global_stack_buffer),
sizeof(hiprtGlobalStackBuffer));
/* Compute kernel launch parameters. */
const int num_threads_per_block = HIPRT_THREAD_GROUP_SIZE;

View File

@ -23,12 +23,7 @@ struct KernelWorkTile;
/* Container for device kernel arguments with type correctness ensured by API. */
struct DeviceKernelArguments {
enum Type {
POINTER,
INT32,
FLOAT32,
KERNEL_FILM_CONVERT,
};
enum Type { POINTER, INT32, FLOAT32, KERNEL_FILM_CONVERT, HIPRT_GLOBAL_STACK };
static const int MAX_ARGS = 18;
Type types[MAX_ARGS];

View File

@ -671,12 +671,16 @@ endif()
# HIP RT module
if(WITH_CYCLES_DEVICE_HIPRT AND WITH_CYCLES_HIP_BINARIES)
set(hiprt_sources device/hiprt/kernel.cpp
${SRC_KERNEL_HEADERS}
${SRC_KERNEL_DEVICE_GPU_HEADERS}
${SRC_KERNEL_DEVICE_HIPRT_HEADERS}
${SRC_UTIL_HEADERS})
set(bitcode_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.bc)
set(cycles_bitcode_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.bc)
set(sdk_bitcode_file ${CMAKE_CURRENT_BINARY_DIR}/hiprt${HIPRT_VERSION}_${HIP_VERSION_SHORT}_amd_lib.bc)
set(bvh_file ${CMAKE_CURRENT_BINARY_DIR}/hiprt${HIPRT_VERSION}_${HIP_VERSION_SHORT}_amd.hipfb)
set(bvh_file_oro ${CMAKE_CURRENT_BINARY_DIR}/oro_compiled_kernels.hipfb)
set(hiprt_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.hipfb)
set(kernel_sources ${hiprt_sources})
set(hiprt_kernel_src "/device/hiprt/kernel.cpp")
@ -688,12 +692,79 @@ if(WITH_CYCLES_DEVICE_HIPRT AND WITH_CYCLES_HIP_BINARIES)
else()
set(hiprt_compile_command ${HIP_HIPCC_EXECUTABLE})
set(hiprt_compile_flags)
#set(hiprt_compile_flags -mcode-object-version=4)
endif()
set(target_gpus)
foreach(arch ${CYCLES_HIP_BINARIES_ARCH})
list(APPEND target_gpus "--offload-arch=${arch}")
endforeach()
set(hiprt_compile_flags
if(WITH_CYCLES_DEBUG)
set(hiprt_compile_flags ${hiprt_compile_flags} -D WITH_CYCLES_DEBUG)
endif()
set(hiprt_compile_flags_bvh
${hiprt_compile_flags}
${target_gpus}
${HIP_HIPCC_FLAGS}
-x hip
${HIPRT_INCLUDE_DIR}/hiprt/impl/hiprt_kernels.h
${flags}
-D HIPRT_BITCODE_LINKING
-std=c++17
-mllvm
-amdgpu-early-inline-all=false
-mllvm
-amdgpu-function-calls=true
--genco
-I ${HIPRT_INCLUDE_DIR}
-Wno-parentheses-equality
-Wno-unused-value
-ffast-math
-o ${bvh_file})
set(hiprt_compile_flags_bvh_oro
${hiprt_compile_flags}
${target_gpus}
${HIP_HIPCC_FLAGS}
-x hip
${HIPRT_INCLUDE_DIR}/orochi/ParallelPrimitives/RadixSortKernels.h
${flags}
-D HIPRT_BITCODE_LINKING
-std=c++17
-mllvm
-amdgpu-early-inline-all=false
-mllvm
-amdgpu-function-calls=true
--genco
-I ${HIPRT_INCLUDE_DIR}/orochi
-include hip/hip_runtime.h
-Wno-parentheses-equality
-Wno-unused-value
-ffast-math
-o ${bvh_file_oro})
set(hiprt_compile_flags_sdk_bc
${hiprt_compile_flags}
${target_gpus}
${HIP_HIPCC_FLAGS}
${flags}
-x hip
${HIPRT_INCLUDE_DIR}/hiprt/impl/hiprt_kernels_bitcode.h
-O3
-D HIPRT_BITCODE_LINKING
-std=c++17
-fgpu-rdc
-c
--gpu-bundle-output
-emit-llvm
-I ${HIPRT_INCLUDE_DIR}
-I ${HIPRT_INCLUDE_DIR}/orochi
-Wno-parentheses-equality
-Wno-unused-value
-ffast-math
-o ${sdk_bitcode_file})
set(hiprt_compile_flags_cycles_bc
${hiprt_compile_flags}
${target_gpus}
${HIP_HIPCC_FLAGS}
@ -714,37 +785,56 @@ if(WITH_CYCLES_DEVICE_HIPRT AND WITH_CYCLES_HIP_BINARIES)
-Wno-parentheses-equality
-Wno-unused-value
-ffast-math
-o ${bitcode_file})
if(WITH_CYCLES_DEBUG)
set(hiprt_compile_flags ${hiprt_compile_flags} -D WITH_CYCLES_DEBUG)
endif()
-o ${cycles_bitcode_file})
add_custom_command(
OUTPUT ${bitcode_file}
COMMAND ${hiprt_compile_command} ${hiprt_compile_flags}
OUTPUT ${cycles_bitcode_file}
COMMAND ${hiprt_compile_command} ${hiprt_compile_flags_cycles_bc}
DEPENDS ${kernel_sources})
if(WIN32)
set(hiprt_link_command ${CMAKE_COMMAND})
set(hiprt_link_flags -E env "HIP_PATH=${HIP_ROOT_DIR}"
${HIP_LINKER_EXECUTABLE})
else()
# not implemented yet
endif()
add_custom_command(
OUTPUT ${sdk_bitcode_file}
COMMAND ${hiprt_compile_command} ${hiprt_compile_flags_sdk_bc}
DEPENDS ${HIPRT_INCLUDE_DIR}/hiprt/impl/hiprt_kernels_bitcode.h)
add_custom_command(

Nitpick: use 1 instead of 2 empty lines.

Nitpick: use 1 instead of 2 empty lines.
OUTPUT ${bvh_file}
COMMAND ${hiprt_compile_command} ${hiprt_compile_flags_bvh}
DEPENDS ${HIPRT_INCLUDE_DIR}/hiprt/impl/hiprt_kernels.h)
add_custom_command(
OUTPUT ${bvh_file_oro}
COMMAND ${hiprt_compile_command} ${hiprt_compile_flags_bvh_oro}
DEPENDS ${HIPRT_INCLUDE_DIR}/orochi/ParallelPrimitives/RadixSortKernels.h)
set(hiprt_link_command ${CMAKE_COMMAND})
set(hiprt_link_flags -E env "HIP_PATH=${HIP_ROOT_DIR}"
${HIP_LINKER_EXECUTABLE})
set(hiprt_link_flags
${hiprt_link_flags}
${target_gpus}
-fgpu-rdc
-mcode-object-version=4
--hip-link
--cuda-device-only
${bitcode_file}
${HIPRT_BITCODE}
${cycles_bitcode_file}
${sdk_bitcode_file}
-o ${hiprt_file})
add_custom_command(
OUTPUT ${hiprt_file}
COMMAND ${hiprt_link_command} ${hiprt_link_flags}
DEPENDS ${bitcode_file})
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hiprt_file}" ${CYCLES_INSTALL_PATH}/lib)
add_custom_target(cycles_kernel_hiprt ALL DEPENDS ${hiprt_file})
cycles_set_solution_folder(cycles_kernel_hiprt)
add_custom_command(
OUTPUT ${hiprt_file}
COMMAND ${hiprt_link_command} ${hiprt_link_flags}
DEPENDS ${cycles_bitcode_file} ${sdk_bitcode_file})
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hiprt_file}" ${CYCLES_INSTALL_PATH}/lib)
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${bvh_file}" ${CMAKE_INSTALL_PREFIX})
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${bvh_file_oro}" ${CMAKE_INSTALL_PREFIX})
delayed_install("${HIPRT_INCLUDE_DIR}/bin" "${HIPRT_DYNAMIC_LIB}" ${CMAKE_INSTALL_PREFIX})
add_custom_target(cycles_kernel_hiprt ALL DEPENDS ${hiprt_file} ${bvh_file} ${bvh_file_oro})
cycles_set_solution_folder(cycles_kernel_hiprt)
endif()
# OptiX PTX modules

View File

@ -116,7 +116,7 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
void *local_geom = (void *)(kernel_data_fetch(blas_ptr, local_object));
// we don't need custom intersection functions for SSR
# ifdef HIPRT_SHARED_STACK
hiprtGeomTraversalAnyHitCustomStack<Stack> traversal(local_geom,
hiprtGeomTraversalAnyHitCustomStack<Stack> traversal((hiprtGeometry)local_geom,
ray_hip,
stack,
hiprtTraversalHintDefault,

View File

@ -44,19 +44,20 @@ struct LocalPayload {
# if defined(HIPRT_SHARED_STACK)
# define GET_TRAVERSAL_STACK() \
Stack stack(&kg->global_stack_buffer[0], \
HIPRT_THREAD_STACK_SIZE, \
kg->shared_stack, \
HIPRT_SHARED_STACK_SIZE);
Stack stack(kg->global_stack_buffer, \
kg->shared_stack); \
Instance_Stack instance_stack;
# else
# define GET_TRAVERSAL_STACK()
# endif
# ifdef HIPRT_SHARED_STACK
# define GET_TRAVERSAL_ANY_HIT(FUNCTION_TABLE, RAY_TYPE, RAY_TIME) \
hiprtSceneTraversalAnyHitCustomStack<Stack> traversal(kernel_data.device_bvh, \
hiprtSceneTraversalAnyHitCustomStack<Stack, Instance_Stack> traversal( \
(hiprtScene)kernel_data.device_bvh, \
ray_hip, \
stack, \
instance_stack, \
visibility, \
hiprtTraversalHintDefault, \
&payload, \
@ -65,9 +66,11 @@ struct LocalPayload {
RAY_TIME);
# define GET_TRAVERSAL_CLOSEST_HIT(FUNCTION_TABLE, RAY_TYPE, RAY_TIME) \
hiprtSceneTraversalClosestCustomStack<Stack> traversal(kernel_data.device_bvh, \
hiprtSceneTraversalClosestCustomStack<Stack, Instance_Stack> traversal( \
(hiprtScene)kernel_data.device_bvh, \
ray_hip, \
stack, \
instance_stack, \
visibility, \
hiprtTraversalHintDefault, \
&payload, \
@ -614,14 +617,14 @@ ccl_device_inline bool volume_intersection_filter(const hiprtRay &ray,
return false;
}
HIPRT_DEVICE bool intersectFunc(u32 geomType,
u32 rayType,
HIPRT_DEVICE bool intersectFunc(uint geomType,
uint rayType,
const hiprtFuncTableHeader &tableHeader,
const hiprtRay &ray,
void *payload,
hiprtHit &hit)
{
const u32 index = tableHeader.numGeomTypes * rayType + geomType;
const uint index = tableHeader.numGeomTypes * rayType + geomType;
const void *data = tableHeader.funcDataSets[index].filterFuncData;
switch (index) {
case Curve_Intersect_Function:
@ -643,14 +646,14 @@ HIPRT_DEVICE bool intersectFunc(u32 geomType,
return false;
}
HIPRT_DEVICE bool filterFunc(u32 geomType,
u32 rayType,
HIPRT_DEVICE bool filterFunc(uint geomType,
uint rayType,
const hiprtFuncTableHeader &tableHeader,
const hiprtRay &ray,
void *payload,
const hiprtHit &hit)
{
const u32 index = tableHeader.numGeomTypes * rayType + geomType;
const uint index = tableHeader.numGeomTypes * rayType + geomType;
const void *data = tableHeader.funcDataSets[index].intersectFuncData;
switch (index) {
case Triangle_Filter_Closest:

View File

@ -31,9 +31,9 @@
CCL_NAMESPACE_BEGIN
struct KernelGlobalsGPU {
int *global_stack_buffer;
hiprtGlobalStackBuffer global_stack_buffer;
#ifdef HIPRT_SHARED_STACK
int *shared_stack;
hiprtSharedStackBuffer shared_stack;
#endif
};
@ -47,7 +47,8 @@ typedef ccl_global KernelGlobalsGPU *ccl_restrict KernelGlobals;
ccl_gpu_shared int shared_stack[HIPRT_SHARED_STACK_SIZE * HIPRT_THREAD_GROUP_SIZE]; \
ccl_global KernelGlobalsGPU kg_gpu; \
KernelGlobals kg = &kg_gpu; \
kg->shared_stack = &shared_stack[0]; \
kg->shared_stack.stackData = &shared_stack[0]; \
kg->shared_stack.stackSize = HIPRT_SHARED_STACK_SIZE; \
kg->global_stack_buffer = stack_buffer;
#else
# define HIPRT_INIT_KERNEL_GLOBAL() \
@ -146,6 +147,7 @@ __constant__ KernelParamsHIPRT kernel_params;
# ifdef HIPRT_SHARED_STACK
typedef hiprtGlobalStack Stack;
typedef hiprtEmptyInstanceStack Instance_Stack;
# endif
#endif

View File

@ -9,7 +9,7 @@ ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_global const int *path_index_array,
ccl_global float *render_buffer,
const int work_size,
ccl_global int *stack_buffer)
ccl_global hiprtGlobalStackBuffer stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
@ -25,7 +25,7 @@ ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_gpu_kernel_signature(integrator_intersect_shadow,
ccl_global const int *path_index_array,
const int work_size,
ccl_global int *stack_buffer)
ccl_global hiprtGlobalStackBuffer stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
@ -41,7 +41,7 @@ ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_gpu_kernel_signature(integrator_intersect_subsurface,
ccl_global const int *path_index_array,
const int work_size,
ccl_global int *stack_buffer)
ccl_global hiprtGlobalStackBuffer stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
@ -57,7 +57,7 @@ ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_gpu_kernel_signature(integrator_intersect_volume_stack,
ccl_global const int *path_index_array,
const int work_size,
ccl_global int *stack_buffer)
ccl_global hiprtGlobalStackBuffer stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
@ -72,7 +72,7 @@ ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_gpu_kernel_signature(integrator_intersect_dedicated_light,
ccl_global const int *path_index_array,
const int work_size,
ccl_global int *stack_buffer)
ccl_global hiprtGlobalStackBuffer stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
@ -89,7 +89,7 @@ ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_global const int *path_index_array,
ccl_global float *render_buffer,
const int work_size,
ccl_global int *stack_buffer)
ccl_global hiprtGlobalStackBuffer stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
if (global_index < work_size) {
@ -104,7 +104,7 @@ ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_global const int *path_index_array,
ccl_global float *render_buffer,
const int work_size,
ccl_global int *stack_buffer)
ccl_global hiprtGlobalStackBuffer stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
if (global_index < work_size) {

@ -1 +1 @@
Subproject commit a5521c85e03bfd1556ff1e63bf7163235c401497
Subproject commit 19b2b87f5ef0d8caa39e0882fbf832052974b785

This change to the submodule hash should be left out.

This change to the submodule hash should be left out.

Happened accidentally. How can I roll it back?

Happened accidentally. How can I roll it back?

Like this:

cd lib/windows_x64
git checkout a5521c85e03bfd1556ff1e63bf7163235c401497
cd ../..
git commit lib/windows_x64
Like this: ``` cd lib/windows_x64 git checkout a5521c85e03bfd1556ff1e63bf7163235c401497 cd ../.. git commit lib/windows_x64 ```

View File

@ -1858,6 +1858,24 @@ if(WIN32)
endforeach()
endif()
if(WIN32)
if(WITH_CYCLES_DEVICE_HIPRT)
#place holder, HIPRT_VERSION is out of scope, this won't work
if(EXISTS ${LIBDIR}/hiprt/bin/hiprt${HIPRT_VERSION}64.dll)
windows_install_shared_manifest(
FILES
${LIBDIR}/hiprt/bin/hiprt${HIPRT_VERSION}64.dll
RELEASE
)
windows_install_shared_manifest(
FILES
${LIBDIR}/hiprt/bin/hiprt${HIPRT_VERSION}64D.dll
DEBUG
)
endif()
endif()
endif()
# `vcpkg` substitutes our libraries with theirs, which will cause issues when you you run
# these builds on other systems due to missing DLL's. So we opt out the use of `vcpkg`.
if(WIN32)