This repository has been archived on 2023-10-09. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
blender-archive/intern/cycles/device/optix/queue.cpp
Patrick Mours e6b38deb9d Cycles: Add basic support for using OSL with OptiX
This patch  generalizes the OSL support in Cycles to include GPU
device types and adds an implementation for that in the OptiX
device. There are some caveats still, including simplified texturing
due to lack of OIIO on the GPU and a few missing OSL intrinsics.

Note that this is incomplete and missing an update to the OSL
library before being enabled! The implementation is already
committed now to simplify further development.

Maniphest Tasks: T101222

Differential Revision: https://developer.blender.org/D15902
2022-11-09 15:30:21 +01:00

200 lines
7.4 KiB
C++

/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#ifdef WITH_OPTIX
# include "device/optix/queue.h"
# include "device/optix/device_impl.h"
# include "util/time.h"
# define __KERNEL_OPTIX__
# include "kernel/device/optix/globals.h"
CCL_NAMESPACE_BEGIN
/* CUDADeviceQueue */
OptiXDeviceQueue::OptiXDeviceQueue(OptiXDevice *device) : CUDADeviceQueue(device)
{
}
void OptiXDeviceQueue::init_execution()
{
CUDADeviceQueue::init_execution();
}
static bool is_optix_specific_kernel(DeviceKernel kernel, bool use_osl)
{
# ifdef WITH_OSL
/* OSL uses direct callables to execute, so shading needs to be done in OptiX if OSL is used. */
if (use_osl && device_kernel_has_shading(kernel)) {
return true;
}
# else
(void)use_osl;
# endif
return device_kernel_has_intersection(kernel);
}
bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
const int work_size,
DeviceKernelArguments const &args)
{
OptiXDevice *const optix_device = static_cast<OptiXDevice *>(cuda_device_);
# ifdef WITH_OSL
const bool use_osl = static_cast<OSLGlobals *>(optix_device->get_cpu_osl_memory())->use;
# else
const bool use_osl = false;
# endif
if (!is_optix_specific_kernel(kernel, use_osl)) {
return CUDADeviceQueue::enqueue(kernel, work_size, args);
}
if (cuda_device_->have_error()) {
return false;
}
debug_enqueue_begin(kernel, work_size);
const CUDAContextScope scope(cuda_device_);
const device_ptr sbt_data_ptr = optix_device->sbt_data.device_pointer;
const device_ptr launch_params_ptr = optix_device->launch_params.device_pointer;
cuda_device_assert(
cuda_device_,
cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, path_index_array),
args.values[0], // &d_path_index
sizeof(device_ptr),
cuda_stream_));
if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST || device_kernel_has_shading(kernel)) {
cuda_device_assert(
cuda_device_,
cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer),
args.values[1], // &d_render_buffer
sizeof(device_ptr),
cuda_stream_));
}
if (kernel == DEVICE_KERNEL_SHADER_EVAL_DISPLACE ||
kernel == DEVICE_KERNEL_SHADER_EVAL_BACKGROUND ||
kernel == DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY) {
cuda_device_assert(cuda_device_,
cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, offset),
args.values[2], // &d_offset
sizeof(int32_t),
cuda_stream_));
}
cuda_device_assert(cuda_device_, cuStreamSynchronize(cuda_stream_));
OptixPipeline pipeline = nullptr;
OptixShaderBindingTable sbt_params = {};
switch (kernel) {
case DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND:
pipeline = optix_device->pipelines[PIP_SHADE];
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_BACKGROUND * sizeof(SbtRecord);
break;
case DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT:
pipeline = optix_device->pipelines[PIP_SHADE];
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_LIGHT * sizeof(SbtRecord);
break;
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE:
pipeline = optix_device->pipelines[PIP_SHADE];
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE * sizeof(SbtRecord);
break;
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
pipeline = optix_device->pipelines[PIP_SHADE];
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_RAYTRACE * sizeof(SbtRecord);
break;
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE:
pipeline = optix_device->pipelines[PIP_SHADE];
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_MNEE * sizeof(SbtRecord);
break;
case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME:
pipeline = optix_device->pipelines[PIP_SHADE];
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_VOLUME * sizeof(SbtRecord);
break;
case DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW:
pipeline = optix_device->pipelines[PIP_SHADE];
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SHADOW * sizeof(SbtRecord);
break;
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
pipeline = optix_device->pipelines[PIP_INTERSECT];
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_CLOSEST * sizeof(SbtRecord);
break;
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW:
pipeline = optix_device->pipelines[PIP_INTERSECT];
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_SHADOW * sizeof(SbtRecord);
break;
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
pipeline = optix_device->pipelines[PIP_INTERSECT];
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_SUBSURFACE * sizeof(SbtRecord);
break;
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK:
pipeline = optix_device->pipelines[PIP_INTERSECT];
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_VOLUME_STACK * sizeof(SbtRecord);
break;
case DEVICE_KERNEL_SHADER_EVAL_DISPLACE:
pipeline = optix_device->pipelines[PIP_SHADE];
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_EVAL_DISPLACE * sizeof(SbtRecord);
break;
case DEVICE_KERNEL_SHADER_EVAL_BACKGROUND:
pipeline = optix_device->pipelines[PIP_SHADE];
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_EVAL_BACKGROUND * sizeof(SbtRecord);
break;
case DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY:
pipeline = optix_device->pipelines[PIP_SHADE];
sbt_params.raygenRecord = sbt_data_ptr +
PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY * sizeof(SbtRecord);
break;
default:
LOG(ERROR) << "Invalid kernel " << device_kernel_as_string(kernel)
<< " is attempted to be enqueued.";
return false;
}
sbt_params.missRecordBase = sbt_data_ptr + MISS_PROGRAM_GROUP_OFFSET * sizeof(SbtRecord);
sbt_params.missRecordStrideInBytes = sizeof(SbtRecord);
sbt_params.missRecordCount = NUM_MISS_PROGRAM_GROUPS;
sbt_params.hitgroupRecordBase = sbt_data_ptr + HIT_PROGAM_GROUP_OFFSET * sizeof(SbtRecord);
sbt_params.hitgroupRecordStrideInBytes = sizeof(SbtRecord);
sbt_params.hitgroupRecordCount = NUM_HIT_PROGRAM_GROUPS;
sbt_params.callablesRecordBase = sbt_data_ptr + CALLABLE_PROGRAM_GROUPS_BASE * sizeof(SbtRecord);
sbt_params.callablesRecordCount = NUM_CALLABLE_PROGRAM_GROUPS;
sbt_params.callablesRecordStrideInBytes = sizeof(SbtRecord);
# ifdef WITH_OSL
if (use_osl) {
sbt_params.callablesRecordCount += static_cast<unsigned int>(optix_device->osl_groups.size());
}
# endif
/* Launch the ray generation program. */
optix_device_assert(optix_device,
optixLaunch(pipeline,
cuda_stream_,
launch_params_ptr,
optix_device->launch_params.data_elements,
&sbt_params,
work_size,
1,
1));
debug_enqueue_end();
return !(optix_device->have_error());
}
CCL_NAMESPACE_END
#endif /* WITH_OPTIX */