1
1

Cycles: MetalRT support (kernel side)

This patch adds MetalRT support to Cycles kernel code. It is mostly additive in nature or confined to Metal-specific code, however there are a few areas where this interacts with other code:

- MetalRT closely follows the Optix implementation, and in some cases (notably handling of transforms) it makes sense to extend Optix special-casing to MetalRT. For these generalisations we now have `__KERNEL_GPU_RAYTRACING__` instead of `__KERNEL_OPTIX__`.
- MetalRT doesn't support primitive offsetting (as with `primitiveIndexOffset` in Optix), so we define and populate a new kernel texture, `__object_prim_offset`, containing per-object primitive / curve-segment offsets. This is referenced and applied in MetalRT intersection handlers.
- Two new BVH layout enum values have been added: `BVH_LAYOUT_METAL` and `BVH_LAYOUT_MULTI_METAL_EMBREE` for XPU mode). Some host-side enum case handling has been updated where it is trivial to do so.

Ref T92212

Reviewed By: brecht

Maniphest Tasks: T92212

Differential Revision: https://developer.blender.org/D13353
This commit is contained in:
2021-11-29 15:06:22 +00:00
parent 98a5c924fc
commit f613c4c095
24 changed files with 1010 additions and 32 deletions

View File

@@ -40,8 +40,11 @@ const char *bvh_layout_name(BVHLayout layout)
return "EMBREE";
case BVH_LAYOUT_OPTIX:
return "OPTIX";
case BVH_LAYOUT_METAL:
return "METAL";
case BVH_LAYOUT_MULTI_OPTIX:
case BVH_LAYOUT_MULTI_OPTIX_EMBREE:
case BVH_LAYOUT_MULTI_METAL_EMBREE:
return "MULTI";
case BVH_LAYOUT_ALL:
return "ALL";
@@ -105,7 +108,10 @@ BVH *BVH::create(const BVHParams &params,
#endif
case BVH_LAYOUT_MULTI_OPTIX:
case BVH_LAYOUT_MULTI_OPTIX_EMBREE:
case BVH_LAYOUT_MULTI_METAL_EMBREE:
return new BVHMulti(params, geometry, objects);
case BVH_LAYOUT_METAL:
/* host-side changes for BVH_LAYOUT_METAL are imminent */
case BVH_LAYOUT_NONE:
case BVH_LAYOUT_ALL:
break;

View File

@@ -274,7 +274,8 @@ void CPUDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
{
#ifdef WITH_EMBREE
if (bvh->params.bvh_layout == BVH_LAYOUT_EMBREE ||
bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE) {
bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE ||
bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL_EMBREE) {
BVHEmbree *const bvh_embree = static_cast<BVHEmbree *>(bvh);
if (refit) {
bvh_embree->refit(progress);

View File

@@ -129,6 +129,10 @@ class MultiDevice : public Device {
if ((bvh_layout_mask_all & BVH_LAYOUT_OPTIX_EMBREE) == BVH_LAYOUT_OPTIX_EMBREE) {
return BVH_LAYOUT_MULTI_OPTIX_EMBREE;
}
const BVHLayoutMask BVH_LAYOUT_METAL_EMBREE = (BVH_LAYOUT_METAL | BVH_LAYOUT_EMBREE);
if ((bvh_layout_mask_all & BVH_LAYOUT_METAL_EMBREE) == BVH_LAYOUT_METAL_EMBREE) {
return BVH_LAYOUT_MULTI_METAL_EMBREE;
}
return bvh_layout_mask;
}
@@ -151,7 +155,8 @@ class MultiDevice : public Device {
}
assert(bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX ||
bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE);
bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE ||
bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL_EMBREE);
BVHMulti *const bvh_multi = static_cast<BVHMulti *>(bvh);
bvh_multi->sub_bvhs.resize(devices.size());

View File

@@ -207,6 +207,7 @@ set(SRC_KERNEL_BVH_HEADERS
bvh/volume.h
bvh/volume_all.h
bvh/embree.h
bvh/metal.h
)
set(SRC_KERNEL_CAMERA_HEADERS

View File

@@ -31,6 +31,10 @@
# include "kernel/bvh/embree.h"
#endif
#ifdef __METALRT__
# include "kernel/bvh/metal.h"
#endif
#include "kernel/bvh/types.h"
#include "kernel/bvh/util.h"
@@ -38,7 +42,7 @@
CCL_NAMESPACE_BEGIN
#ifndef __KERNEL_OPTIX__
#if !defined(__KERNEL_GPU_RAYTRACING__)
/* Regular BVH traversal */
@@ -139,7 +143,7 @@ CCL_NAMESPACE_BEGIN
# undef BVH_NAME_EVAL
# undef BVH_FUNCTION_FULL_NAME
#endif /* __KERNEL_OPTIX__ */
#endif /* !defined(__KERNEL_GPU_RAYTRACING__) */
ccl_device_inline bool scene_intersect_valid(ccl_private const Ray *ray)
{
@@ -205,7 +209,95 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
isect->type = p5;
return p5 != PRIMITIVE_NONE;
#else /* __KERNEL_OPTIX__ */
#elif defined(__METALRT__)
if (!scene_intersect_valid(ray)) {
isect->t = ray->t;
isect->type = PRIMITIVE_NONE;
return false;
}
# if defined(__KERNEL_DEBUG__)
if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
isect->t = ray->t;
isect->type = PRIMITIVE_NONE;
kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
return false;
}
if (is_null_intersection_function_table(metal_ancillaries->ift_default)) {
isect->t = ray->t;
isect->type = PRIMITIVE_NONE;
kernel_assert(!"Invalid ift_default");
return false;
}
# endif
metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
metalrt_intersector_type metalrt_intersect;
if (!kernel_data.bvh.have_curves) {
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
}
MetalRTIntersectionPayload payload;
payload.u = 0.0f;
payload.v = 0.0f;
payload.visibility = visibility;
typename metalrt_intersector_type::result_type intersection;
uint ray_mask = visibility & 0xFF;
if (0 == ray_mask && (visibility & ~0xFF) != 0) {
ray_mask = 0xFF;
/* No further intersector setup required: Default MetalRT behaviour is anyhit */
}
else if (visibility & PATH_RAY_SHADOW_OPAQUE) {
/* No further intersector setup required: Shadow ray early termination is controlled by the
* intersection handler */
}
# if defined(__METALRT_MOTION__)
payload.time = ray->time;
intersection = metalrt_intersect.intersect(r,
metal_ancillaries->accel_struct,
ray_mask,
ray->time,
metal_ancillaries->ift_default,
payload);
# else
intersection = metalrt_intersect.intersect(
r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload);
# endif
if (intersection.type == intersection_type::none) {
isect->t = ray->t;
isect->type = PRIMITIVE_NONE;
return false;
}
isect->t = intersection.distance;
isect->prim = payload.prim;
isect->type = payload.type;
isect->object = intersection.user_instance_id;
isect->t = intersection.distance;
if (intersection.type == intersection_type::triangle) {
isect->u = 1.0f - intersection.triangle_barycentric_coord.y -
intersection.triangle_barycentric_coord.x;
isect->v = intersection.triangle_barycentric_coord.x;
}
else {
isect->u = payload.u;
isect->v = payload.v;
}
return isect->type != PRIMITIVE_NONE;
#else
if (!scene_intersect_valid(ray)) {
return false;
}
@@ -289,7 +381,69 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
p5);
return p5;
# else /* __KERNEL_OPTIX__ */
# elif defined(__METALRT__)
if (!scene_intersect_valid(ray)) {
if (local_isect) {
local_isect->num_hits = 0;
}
return false;
}
# if defined(__KERNEL_DEBUG__)
if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
if (local_isect) {
local_isect->num_hits = 0;
}
kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
return false;
}
if (is_null_intersection_function_table(metal_ancillaries->ift_local)) {
if (local_isect) {
local_isect->num_hits = 0;
}
kernel_assert(!"Invalid ift_local");
return false;
}
# endif
metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
metalrt_intersector_type metalrt_intersect;
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
if (!kernel_data.bvh.have_curves) {
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
}
MetalRTIntersectionLocalPayload payload;
payload.local_object = local_object;
payload.max_hits = max_hits;
payload.local_isect.num_hits = 0;
if (lcg_state) {
payload.has_lcg_state = true;
payload.lcg_state = *lcg_state;
}
payload.result = false;
typename metalrt_intersector_type::result_type intersection;
# if defined(__METALRT_MOTION__)
intersection = metalrt_intersect.intersect(
r, metal_ancillaries->accel_struct, 0xFF, ray->time, metal_ancillaries->ift_local, payload);
# else
intersection = metalrt_intersect.intersect(
r, metal_ancillaries->accel_struct, 0xFF, metal_ancillaries->ift_local, payload);
# endif
if (lcg_state) {
*lcg_state = payload.lcg_state;
}
*local_isect = payload.local_isect;
return payload.result;
# else
if (!scene_intersect_valid(ray)) {
if (local_isect) {
local_isect->num_hits = 0;
@@ -406,7 +560,67 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
*throughput = __uint_as_float(p1);
return p5;
# else /* __KERNEL_OPTIX__ */
# elif defined(__METALRT__)
if (!scene_intersect_valid(ray)) {
return false;
}
# if defined(__KERNEL_DEBUG__)
if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
return false;
}
if (is_null_intersection_function_table(metal_ancillaries->ift_shadow)) {
kernel_assert(!"Invalid ift_shadow");
return false;
}
# endif
metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
metalrt_intersector_type metalrt_intersect;
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
if (!kernel_data.bvh.have_curves) {
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
}
MetalRTIntersectionShadowPayload payload;
payload.visibility = visibility;
payload.max_hits = max_hits;
payload.num_hits = 0;
payload.num_recorded_hits = 0;
payload.throughput = 1.0f;
payload.result = false;
payload.state = state;
uint ray_mask = visibility & 0xFF;
if (0 == ray_mask && (visibility & ~0xFF) != 0) {
ray_mask = 0xFF;
}
typename metalrt_intersector_type::result_type intersection;
# if defined(__METALRT_MOTION__)
payload.time = ray->time;
intersection = metalrt_intersect.intersect(r,
metal_ancillaries->accel_struct,
ray_mask,
ray->time,
metal_ancillaries->ift_shadow,
payload);
# else
intersection = metalrt_intersect.intersect(
r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_shadow, payload);
# endif
*num_recorded_hits = payload.num_recorded_hits;
*throughput = payload.throughput;
return payload.result;
# else
if (!scene_intersect_valid(ray)) {
*num_recorded_hits = 0;
*throughput = 1.0f;
@@ -503,7 +717,76 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
isect->type = p5;
return p5 != PRIMITIVE_NONE;
# else /* __KERNEL_OPTIX__ */
# elif defined(__METALRT__)
if (!scene_intersect_valid(ray)) {
return false;
}
# if defined(__KERNEL_DEBUG__)
if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
return false;
}
if (is_null_intersection_function_table(metal_ancillaries->ift_default)) {
kernel_assert(!"Invalid ift_default");
return false;
}
# endif
metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
metalrt_intersector_type metalrt_intersect;
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
if (!kernel_data.bvh.have_curves) {
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
}
MetalRTIntersectionPayload payload;
payload.visibility = visibility;
typename metalrt_intersector_type::result_type intersection;
uint ray_mask = visibility & 0xFF;
if (0 == ray_mask && (visibility & ~0xFF) != 0) {
ray_mask = 0xFF;
}
# if defined(__METALRT_MOTION__)
payload.time = ray->time;
intersection = metalrt_intersect.intersect(r,
metal_ancillaries->accel_struct,
ray_mask,
ray->time,
metal_ancillaries->ift_default,
payload);
# else
intersection = metalrt_intersect.intersect(
r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload);
# endif
if (intersection.type == intersection_type::none) {
return false;
}
isect->prim = payload.prim;
isect->type = payload.type;
isect->object = intersection.user_instance_id;
isect->t = intersection.distance;
if (intersection.type == intersection_type::triangle) {
isect->u = 1.0f - intersection.triangle_barycentric_coord.y -
intersection.triangle_barycentric_coord.x;
isect->v = intersection.triangle_barycentric_coord.x;
}
else {
isect->u = payload.u;
isect->v = payload.v;
}
return isect->type != PRIMITIVE_NONE;
# else
if (!scene_intersect_valid(ray)) {
return false;
}

View File

@@ -0,0 +1,47 @@
/*
* Copyright 2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
struct MetalRTIntersectionPayload {
uint visibility;
float u, v;
int prim;
int type;
#if defined(__METALRT_MOTION__)
float time;
#endif
};
struct MetalRTIntersectionLocalPayload {
uint local_object;
uint lcg_state;
short max_hits;
bool has_lcg_state;
bool result;
LocalIntersection local_isect;
};
struct MetalRTIntersectionShadowPayload {
uint visibility;
#if defined(__METALRT_MOTION__)
float time;
#endif
int state;
float throughput;
short max_hits;
short num_hits;
short num_recorded_hits;
bool result;
};

View File

@@ -21,6 +21,8 @@
#include "kernel/device/gpu/parallel_sorted_index.h"
#include "kernel/device/gpu/work_stealing.h"
#include "kernel/sample/lcg.h"
/* Include constant tables before entering Metal's context class scope (context_begin.h) */
#include "kernel/tables.h"

View File

@@ -32,6 +32,10 @@
using namespace metal;
#ifdef __METALRT__
using namespace metal::raytracing;
#endif
#pragma clang diagnostic ignored "-Wunused-variable"
#pragma clang diagnostic ignored "-Wsign-compare"
#pragma clang diagnostic ignored "-Wuninitialized"
@@ -47,7 +51,7 @@ using namespace metal;
#define ccl_global device
#define ccl_inline_constant static constant constexpr
#define ccl_device_constant constant
#define ccl_constant const device
#define ccl_constant constant
#define ccl_gpu_shared threadgroup
#define ccl_private thread
#define ccl_may_alias
@@ -246,6 +250,22 @@ void kernel_gpu_##name::run(thread MetalKernelContext& context, \
#define __device__
#ifdef __METALRT__
# define __KERNEL_GPU_RAYTRACING__
# if defined(__METALRT_MOTION__)
# define METALRT_TAGS instancing, instance_motion, primitive_motion
# else
# define METALRT_TAGS instancing
# endif /* __METALRT_MOTION__ */
typedef acceleration_structure<METALRT_TAGS> metalrt_as_type;
typedef intersection_function_table<triangle_data, METALRT_TAGS> metalrt_ift_type;
typedef metal::raytracing::intersector<triangle_data, METALRT_TAGS> metalrt_intersector_type;
#endif /* __METALRT__ */
/* texture bindings and sampler setup */
struct Texture2DParamsMetal {
@@ -258,6 +278,13 @@ struct Texture3DParamsMetal {
struct MetalAncillaries {
device Texture2DParamsMetal *textures_2d;
device Texture3DParamsMetal *textures_3d;
#ifdef __METALRT__
metalrt_as_type accel_struct;
metalrt_ift_type ift_default;
metalrt_ift_type ift_shadow;
metalrt_ift_type ift_local;
#endif
};
#include "util/half.h"

View File

@@ -26,6 +26,10 @@ class MetalKernelContext {
MetalKernelContext(constant KernelParamsMetal &_launch_params_metal, constant MetalAncillaries * _metal_ancillaries)
: launch_params_metal(_launch_params_metal), metal_ancillaries(_metal_ancillaries)
{}
MetalKernelContext(constant KernelParamsMetal &_launch_params_metal)
: launch_params_metal(_launch_params_metal)
{}
/* texture fetch adapter functions */
typedef uint64_t ccl_gpu_tex_object;

View File

@@ -16,10 +16,566 @@
/* Metal kernel entry points */
// clang-format off
#include "kernel/device/metal/compat.h"
#include "kernel/device/metal/globals.h"
#include "kernel/device/gpu/kernel.h"
// clang-format on
/* MetalRT intersection handlers */
#ifdef __METALRT__
/* Return type for a bounding box intersection function. */
struct BoundingBoxIntersectionResult
{
bool accept [[accept_intersection]];
bool continue_search [[continue_search]];
float distance [[distance]];
};
/* Return type for a triangle intersection function. */
struct TriangleIntersectionResult
{
bool accept [[accept_intersection]];
bool continue_search [[continue_search]];
};
enum { METALRT_HIT_TRIANGLE, METALRT_HIT_BOUNDING_BOX };
template<typename TReturn, uint intersection_type>
TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal,
ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload,
const uint object,
const uint primitive_id,
const float2 barycentrics,
const float ray_tmax)
{
TReturn result;
#ifdef __BVH_LOCAL__
uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
if (object != payload.local_object) {
/* Only intersect with matching object */
result.accept = false;
result.continue_search = true;
return result;
}
const short max_hits = payload.max_hits;
if (max_hits == 0) {
/* Special case for when no hit information is requested, just report that something was hit */
payload.result = true;
result.accept = true;
result.continue_search = false;
return result;
}
int hit = 0;
if (payload.has_lcg_state) {
for (short i = min(max_hits, short(payload.local_isect.num_hits)) - 1; i >= 0; --i) {
if (ray_tmax == payload.local_isect.hits[i].t) {
result.accept = false;
result.continue_search = true;
return result;
}
}
hit = payload.local_isect.num_hits++;
if (payload.local_isect.num_hits > max_hits) {
hit = lcg_step_uint(&payload.lcg_state) % payload.local_isect.num_hits;
if (hit >= max_hits) {
result.accept = false;
result.continue_search = true;
return result;
}
}
}
else {
if (payload.local_isect.num_hits && ray_tmax > payload.local_isect.hits[0].t) {
/* Record closest intersection only. Do not terminate ray here, since there is no guarantee about distance ordering in any-hit */
result.accept = false;
result.continue_search = true;
return result;
}
payload.local_isect.num_hits = 1;
}
ray_data Intersection *isect = &payload.local_isect.hits[hit];
isect->t = ray_tmax;
isect->prim = prim;
isect->object = object;
isect->type = kernel_tex_fetch(__objects, object).primitive_type;
isect->u = 1.0f - barycentrics.y - barycentrics.x;
isect->v = barycentrics.x;
/* Record geometric normal */
const uint tri_vindex = kernel_tex_fetch(__tri_vindex, isect->prim).w;
const float3 tri_a = float3(kernel_tex_fetch(__tri_verts, tri_vindex + 0));
const float3 tri_b = float3(kernel_tex_fetch(__tri_verts, tri_vindex + 1));
const float3 tri_c = float3(kernel_tex_fetch(__tri_verts, tri_vindex + 2));
payload.local_isect.Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
/* Continue tracing (without this the trace call would return after the first hit) */
result.accept = false;
result.continue_search = true;
return result;
#endif
}
[[intersection(triangle, triangle_data, METALRT_TAGS)]]
TriangleIntersectionResult
__anyhit__kernel_metalrt_local_hit_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload [[payload]],
uint instance_id [[user_instance_id]],
uint primitive_id [[primitive_id]],
float2 barycentrics [[barycentric_coord]],
float ray_tmax [[distance]])
{
return metalrt_local_hit<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>(
launch_params_metal, payload, instance_id, primitive_id, barycentrics, ray_tmax);
}
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
BoundingBoxIntersectionResult
__anyhit__kernel_metalrt_local_hit_box(const float ray_tmax [[max_distance]])
{
/* unused function */
BoundingBoxIntersectionResult result;
result.distance = ray_tmax;
result.accept = false;
result.continue_search = false;
return result;
}
template<uint intersection_type>
bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
uint object,
uint prim,
const float2 barycentrics,
const float ray_tmax)
{
#ifdef __SHADOW_RECORD_ALL__
# ifdef __VISIBILITY_FLAG__
const uint visibility = payload.visibility;
if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
/* continue search */
return true;
}
# endif
float u = 0.0f, v = 0.0f;
int type = 0;
if (intersection_type == METALRT_HIT_TRIANGLE) {
u = 1.0f - barycentrics.y - barycentrics.x;
v = barycentrics.x;
type = kernel_tex_fetch(__objects, object).primitive_type;
}
# ifdef __HAIR__
else {
u = barycentrics.x;
v = barycentrics.y;
const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim);
type = segment.type;
prim = segment.prim;
/* Filter out curve endcaps */
if (u == 0.0f || u == 1.0f) {
/* continue search */
return true;
}
}
# endif
# ifndef __TRANSPARENT_SHADOWS__
/* No transparent shadows support compiled in, make opaque. */
payload.result = true;
/* terminate ray */
return false;
# else
short max_hits = payload.max_hits;
short num_hits = payload.num_hits;
short num_recorded_hits = payload.num_recorded_hits;
MetalKernelContext context(launch_params_metal);
/* If no transparent shadows, all light is blocked and we can stop immediately. */
if (num_hits >= max_hits ||
!(context.intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) {
payload.result = true;
/* terminate ray */
return false;
}
/* Always use baked shadow transparency for curves. */
if (type & PRIMITIVE_ALL_CURVE) {
float throughput = payload.throughput;
throughput *= context.intersection_curve_shadow_transparency(nullptr, object, prim, u);
payload.throughput = throughput;
payload.num_hits += 1;
if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
/* Accept result and terminate if throughput is sufficiently low */
payload.result = true;
return false;
}
else {
return true;
}
}
payload.num_hits += 1;
payload.num_recorded_hits += 1;
uint record_index = num_recorded_hits;
const IntegratorShadowState state = payload.state;
const uint max_record_hits = min(uint(max_hits), INTEGRATOR_SHADOW_ISECT_SIZE);
if (record_index >= max_record_hits) {
/* If maximum number of hits reached, find a hit to replace. */
float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t);
uint max_recorded_hit = 0;
for (int i = 1; i < max_record_hits; i++) {
const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t);
if (isect_t > max_recorded_t) {
max_recorded_t = isect_t;
max_recorded_hit = i;
}
}
if (ray_tmax >= max_recorded_t) {
/* Accept hit, so that we don't consider any more hits beyond the distance of the
* current hit anymore. */
payload.result = true;
return true;
}
record_index = max_recorded_hit;
}
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, u) = u;
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, v) = v;
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, t) = ray_tmax;
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim;
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object;
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type;
/* Continue tracing. */
# endif /* __TRANSPARENT_SHADOWS__ */
#endif /* __SHADOW_RECORD_ALL__ */
return true;
}
[[intersection(triangle, triangle_data, METALRT_TAGS)]]
TriangleIntersectionResult
__anyhit__kernel_metalrt_shadow_all_hit_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
unsigned int object [[user_instance_id]],
unsigned int primitive_id [[primitive_id]],
float2 barycentrics [[barycentric_coord]],
float ray_tmax [[distance]])
{
uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
TriangleIntersectionResult result;
result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_TRIANGLE>(
launch_params_metal, payload, object, prim, barycentrics, ray_tmax);
result.accept = !result.continue_search;
return result;
}
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
BoundingBoxIntersectionResult
__anyhit__kernel_metalrt_shadow_all_hit_box(const float ray_tmax [[max_distance]])
{
/* unused function */
BoundingBoxIntersectionResult result;
result.distance = ray_tmax;
result.accept = false;
result.continue_search = false;
return result;
}
template<typename TReturnType, uint intersection_type>
inline TReturnType metalrt_visibility_test(constant KernelParamsMetal &launch_params_metal,
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
const uint object,
const uint prim,
const float u)
{
TReturnType result;
# ifdef __HAIR__
if (intersection_type == METALRT_HIT_BOUNDING_BOX) {
/* Filter out curve endcaps. */
if (u == 0.0f || u == 1.0f) {
result.accept = false;
result.continue_search = true;
return result;
}
}
# endif
# ifdef __VISIBILITY_FLAG__
uint visibility = payload.visibility;
if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
result.accept = false;
result.continue_search = true;
return result;
}
/* Shadow ray early termination. */
if (visibility & PATH_RAY_SHADOW_OPAQUE) {
result.accept = true;
result.continue_search = false;
return result;
}
# endif
result.accept = true;
result.continue_search = true;
return result;
}
[[intersection(triangle, triangle_data, METALRT_TAGS)]]
TriangleIntersectionResult
__anyhit__kernel_metalrt_visibility_test_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
unsigned int object [[user_instance_id]],
unsigned int primitive_id [[primitive_id]])
{
uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
TriangleIntersectionResult result = metalrt_visibility_test<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>(
launch_params_metal, payload, object, prim, 0.0f);
if (result.accept) {
payload.prim = prim;
payload.type = kernel_tex_fetch(__objects, object).primitive_type;
}
return result;
}
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
BoundingBoxIntersectionResult
__anyhit__kernel_metalrt_visibility_test_box(const float ray_tmax [[max_distance]])
{
/* Unused function */
BoundingBoxIntersectionResult result;
result.accept = false;
result.continue_search = true;
result.distance = ray_tmax;
return result;
}
#ifdef __HAIR__
ccl_device_inline
void metalrt_intersection_curve(constant KernelParamsMetal &launch_params_metal,
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
const uint object,
const uint prim,
const uint type,
const float3 ray_origin,
const float3 ray_direction,
float time,
const float ray_tmax,
thread BoundingBoxIntersectionResult &result)
{
# ifdef __VISIBILITY_FLAG__
const uint visibility = payload.visibility;
if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
return;
}
# endif
float3 P = ray_origin;
float3 dir = ray_direction;
/* The direction is not normalized by default, but the curve intersection routine expects that */
float len;
dir = normalize_len(dir, &len);
Intersection isect;
isect.t = ray_tmax;
/* Transform maximum distance into object space. */
if (isect.t != FLT_MAX)
isect.t *= len;
MetalKernelContext context(launch_params_metal);
if (context.curve_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) {
result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>(
launch_params_metal, payload, object, prim, isect.u);
if (result.accept) {
result.distance = isect.t / len;
payload.u = isect.u;
payload.v = isect.v;
payload.prim = prim;
payload.type = type;
}
}
}
ccl_device_inline
void metalrt_intersection_curve_shadow(constant KernelParamsMetal &launch_params_metal,
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
const uint object,
const uint prim,
const uint type,
const float3 ray_origin,
const float3 ray_direction,
float time,
const float ray_tmax,
thread BoundingBoxIntersectionResult &result)
{
const uint visibility = payload.visibility;
float3 P = ray_origin;
float3 dir = ray_direction;
/* The direction is not normalized by default, but the curve intersection routine expects that */
float len;
dir = normalize_len(dir, &len);
Intersection isect;
isect.t = ray_tmax;
/* Transform maximum distance into object space */
if (isect.t != FLT_MAX)
isect.t *= len;
MetalKernelContext context(launch_params_metal);
if (context.curve_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) {
result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>(
launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax);
result.accept = !result.continue_search;
if (result.accept) {
result.distance = isect.t / len;
}
}
}
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
BoundingBoxIntersectionResult
__intersection__curve_ribbon(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
const uint object [[user_instance_id]],
const uint primitive_id [[primitive_id]],
const float3 ray_origin [[origin]],
const float3 ray_direction [[direction]],
const float ray_tmax [[max_distance]])
{
uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim);
BoundingBoxIntersectionResult result;
result.accept = false;
result.continue_search = true;
result.distance = ray_tmax;
if (segment.type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON)) {
metalrt_intersection_curve(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction,
# if defined(__METALRT_MOTION__)
payload.time,
# else
0.0f,
# endif
ray_tmax, result);
}
return result;
}
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
BoundingBoxIntersectionResult
__intersection__curve_ribbon_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
const uint object [[user_instance_id]],
const uint primitive_id [[primitive_id]],
const float3 ray_origin [[origin]],
const float3 ray_direction [[direction]],
const float ray_tmax [[max_distance]])
{
uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim);
BoundingBoxIntersectionResult result;
result.accept = false;
result.continue_search = true;
result.distance = ray_tmax;
if (segment.type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON)) {
metalrt_intersection_curve_shadow(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction,
# if defined(__METALRT_MOTION__)
payload.time,
# else
0.0f,
# endif
ray_tmax, result);
}
return result;
}
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
BoundingBoxIntersectionResult
__intersection__curve_all(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
const uint object [[user_instance_id]],
const uint primitive_id [[primitive_id]],
const float3 ray_origin [[origin]],
const float3 ray_direction [[direction]],
const float ray_tmax [[max_distance]])
{
uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim);
BoundingBoxIntersectionResult result;
result.accept = false;
result.continue_search = true;
result.distance = ray_tmax;
metalrt_intersection_curve(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction,
# if defined(__METALRT_MOTION__)
payload.time,
# else
0.0f,
# endif
ray_tmax, result);
return result;
}
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
BoundingBoxIntersectionResult
__intersection__curve_all_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
const uint object [[user_instance_id]],
const uint primitive_id [[primitive_id]],
const float3 ray_origin [[origin]],
const float3 ray_direction [[direction]],
const float ray_tmax [[max_distance]])
{
uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim);
BoundingBoxIntersectionResult result;
result.accept = false;
result.continue_search = true;
result.distance = ray_tmax;
metalrt_intersection_curve_shadow(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction,
# if defined(__METALRT_MOTION__)
payload.time,
# else
0.0f,
# endif
ray_tmax, result);
return result;
}
#endif /* __HAIR__ */
#endif /* __METALRT__ */

View File

@@ -21,6 +21,7 @@
#include <optix.h>
#define __KERNEL_GPU__
#define __KERNEL_GPU_RAYTRACING__
#define __KERNEL_CUDA__ /* OptiX kernels are implicitly CUDA kernels too */
#define __KERNEL_OPTIX__
#define CCL_NAMESPACE_BEGIN

View File

@@ -101,8 +101,8 @@ ccl_device_inline
const int isect_prim,
float3 verts[3])
{
# ifdef __KERNEL_OPTIX__
/* t is always in world space with OptiX. */
# if defined(__KERNEL_GPU_RAYTRACING__)
/* t is always in world space with OptiX and MetalRT. */
return motion_triangle_refine(kg, sd, P, D, t, isect_object, isect_prim, verts);
# else
# ifdef __INTERSECTION_REFINE__

View File

@@ -227,8 +227,8 @@ ccl_device_inline float3 triangle_refine_local(KernelGlobals kg,
const int isect_object,
const int isect_prim)
{
#ifdef __KERNEL_OPTIX__
/* t is always in world space with OptiX. */
#if defined(__KERNEL_GPU_RAYTRACING__)
/* t is always in world space with OptiX and MetalRT. */
return triangle_refine(kg, sd, P, D, t, isect_object, isect_prim);
#else
if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {

View File

@@ -137,8 +137,8 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
Transform tfm = object_fetch_transform_motion_test(kg, object, time, &itfm);
hit_Ng = normalize(transform_direction_transposed(&itfm, hit_Ng));
/* Transform t to world space, except for OptiX where it already is. */
#ifdef __KERNEL_OPTIX__
/* Transform t to world space, except for OptiX and MetalRT where it already is. */
#ifdef __KERNEL_GPU_RAYTRACING__
(void)tfm;
#else
float3 D = transform_direction(&itfm, ray.D);

View File

@@ -212,7 +212,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
ray.dP = ray_dP;
ray.dD = differential_zero_compact();
#ifndef __KERNEL_OPTIX__
#ifndef __KERNEL_GPU_RAYTRACING__
/* Compute or fetch object transforms. */
Transform ob_itfm ccl_optional_struct_init;
Transform ob_tfm = object_fetch_transform_motion_test(kg, object, time, &ob_itfm);
@@ -382,8 +382,8 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
hit = (ss_isect.num_hits > 0);
if (hit) {
#ifdef __KERNEL_OPTIX__
/* t is always in world space with OptiX. */
#ifdef __KERNEL_GPU_RAYTRACING__
/* t is always in world space with OptiX and MetalRT. */
ray.t = ss_isect.hits[0].t;
#else
/* Compute world space distance to surface hit. */

View File

@@ -34,6 +34,7 @@ KERNEL_TEX(Transform, __object_motion_pass)
KERNEL_TEX(DecomposedTransform, __object_motion)
KERNEL_TEX(uint, __object_flag)
KERNEL_TEX(float, __object_volume_step)
KERNEL_TEX(uint, __object_prim_offset)
/* cameras */
KERNEL_TEX(DecomposedTransform, __camera_motion)

View File

@@ -110,9 +110,9 @@ CCL_NAMESPACE_BEGIN
# define __VOLUME_RECORD_ALL__
#endif /* __KERNEL_CPU__ */
#ifdef __KERNEL_OPTIX__
#ifdef __KERNEL_GPU_RAYTRACING__
# undef __BAKING__
#endif /* __KERNEL_OPTIX__ */
#endif /* __KERNEL_GPU_RAYTRACING__ */
/* Scene-based selective features compilation. */
#ifdef __KERNEL_FEATURES__
@@ -1220,10 +1220,12 @@ typedef enum KernelBVHLayout {
BVH_LAYOUT_OPTIX = (1 << 2),
BVH_LAYOUT_MULTI_OPTIX = (1 << 3),
BVH_LAYOUT_MULTI_OPTIX_EMBREE = (1 << 4),
BVH_LAYOUT_METAL = (1 << 5),
BVH_LAYOUT_MULTI_METAL_EMBREE = (1 << 6),
/* Default BVH layout to use for CPU. */
BVH_LAYOUT_AUTO = BVH_LAYOUT_EMBREE,
BVH_LAYOUT_ALL = BVH_LAYOUT_BVH2 | BVH_LAYOUT_EMBREE | BVH_LAYOUT_OPTIX,
BVH_LAYOUT_ALL = BVH_LAYOUT_BVH2 | BVH_LAYOUT_EMBREE | BVH_LAYOUT_OPTIX | BVH_LAYOUT_METAL,
} KernelBVHLayout;
typedef struct KernelBVH {
@@ -1238,6 +1240,8 @@ typedef struct KernelBVH {
/* Custom BVH */
#ifdef __KERNEL_OPTIX__
OptixTraversableHandle scene;
#elif defined __METALRT__
metalrt_as_type scene;
#else
# ifdef __EMBREE__
RTCScene scene;

View File

@@ -165,7 +165,8 @@ int Geometry::motion_step(float time) const
bool Geometry::need_build_bvh(BVHLayout layout) const
{
return is_instanced() || layout == BVH_LAYOUT_OPTIX || layout == BVH_LAYOUT_MULTI_OPTIX ||
layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE;
layout == BVH_LAYOUT_METAL || layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE ||
layout == BVH_LAYOUT_MULTI_METAL_EMBREE;
}
bool Geometry::is_instanced() const
@@ -1247,7 +1248,8 @@ void GeometryManager::device_update_bvh(Device *device,
VLOG(1) << "Using " << bvh_layout_name(bparams.bvh_layout) << " layout.";
const bool can_refit = scene->bvh != nullptr &&
(bparams.bvh_layout == BVHLayout::BVH_LAYOUT_OPTIX);
(bparams.bvh_layout == BVHLayout::BVH_LAYOUT_OPTIX ||
bparams.bvh_layout == BVHLayout::BVH_LAYOUT_METAL);
BVH *bvh = scene->bvh;
if (!scene->bvh) {

View File

@@ -530,6 +530,34 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
}
}
void ObjectManager::device_update_prim_offsets(Device *device, DeviceScene *dscene, Scene *scene)
{
BVHLayoutMask layout_mask = device->get_bvh_layout_mask();
if (layout_mask != BVH_LAYOUT_METAL && layout_mask != BVH_LAYOUT_MULTI_METAL_EMBREE) {
return;
}
/* On MetalRT, primitive / curve segment offsets can't be baked at BVH build time. Intersection
* handlers need to apply the offset manually. */
uint *object_prim_offset = dscene->object_prim_offset.alloc(scene->objects.size());
foreach (Object *ob, scene->objects) {
uint32_t prim_offset = 0;
if (Geometry *const geom = ob->geometry) {
if (geom->geometry_type == Geometry::HAIR) {
prim_offset = ((Hair *const)geom)->curve_segment_offset;
}
else {
prim_offset = geom->prim_offset;
}
}
uint obj_index = ob->get_device_index();
object_prim_offset[obj_index] = prim_offset;
}
dscene->object_prim_offset.copy_to_device();
dscene->object_prim_offset.clear_modified();
}
void ObjectManager::device_update_transforms(DeviceScene *dscene, Scene *scene, Progress &progress)
{
UpdateObjectTransformState state;
@@ -840,6 +868,7 @@ void ObjectManager::device_free(Device *, DeviceScene *dscene, bool force_free)
dscene->object_motion.free_if_need_realloc(force_free);
dscene->object_flag.free_if_need_realloc(force_free);
dscene->object_volume_step.free_if_need_realloc(force_free);
dscene->object_prim_offset.free_if_need_realloc(force_free);
}
void ObjectManager::apply_static_transforms(DeviceScene *dscene, Scene *scene, Progress &progress)

View File

@@ -155,6 +155,7 @@ class ObjectManager {
void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress);
void device_update_transforms(DeviceScene *dscene, Scene *scene, Progress &progress);
void device_update_prim_offsets(Device *device, DeviceScene *dscene, Scene *scene);
void device_update_flags(Device *device,
DeviceScene *dscene,

View File

@@ -69,6 +69,7 @@ DeviceScene::DeviceScene(Device *device)
object_motion(device, "__object_motion", MEM_GLOBAL),
object_flag(device, "__object_flag", MEM_GLOBAL),
object_volume_step(device, "__object_volume_step", MEM_GLOBAL),
object_prim_offset(device, "__object_prim_offset", MEM_GLOBAL),
camera_motion(device, "__camera_motion", MEM_GLOBAL),
attributes_map(device, "__attributes_map", MEM_GLOBAL),
attributes_float(device, "__attributes_float", MEM_GLOBAL),
@@ -309,6 +310,12 @@ void Scene::device_update(Device *device_, Progress &progress)
progress.set_status("Updating Objects Flags");
object_manager->device_update_flags(device, &dscene, this, progress);
if (progress.get_cancel() || device->have_error())
return;
progress.set_status("Updating Primitive Offsets");
object_manager->device_update_prim_offsets(device, &dscene, this);
if (progress.get_cancel() || device->have_error())
return;

View File

@@ -100,6 +100,7 @@ class DeviceScene {
device_vector<DecomposedTransform> object_motion;
device_vector<uint> object_flag;
device_vector<float> object_volume_step;
device_vector<uint> object_prim_offset;
/* cameras */
device_vector<DecomposedTransform> camera_motion;

View File

@@ -233,7 +233,7 @@ ccl_device_inline float3 operator/=(float3 &a, float f)
return a = a * invf;
}
#if !(defined(__KERNEL_METAL__) || defined(__KERNEL_CUDA__))
# if !(defined(__KERNEL_METAL__) || defined(__KERNEL_CUDA__))
ccl_device_inline packed_float3 operator*=(packed_float3 &a, const float3 &b)
{
a = float3(a) * b;
@@ -257,7 +257,7 @@ ccl_device_inline packed_float3 operator/=(packed_float3 &a, float f)
a = float3(a) / f;
return a;
}
#endif
# endif
ccl_device_inline bool operator==(const float3 &a, const float3 &b)
{

View File

@@ -366,10 +366,10 @@ ccl_device_inline Transform transform_empty()
ccl_device_inline float4 quat_interpolate(float4 q1, float4 q2, float t)
{
/* Optix is using lerp to interpolate motion transformations. */
#ifdef __KERNEL_OPTIX__
/* Optix and MetalRT are using lerp to interpolate motion transformations. */
#if defined(__KERNEL_GPU_RAYTRACING__)
return normalize((1.0f - t) * q1 + t * q2);
#else /* __KERNEL_OPTIX__ */
#else /* defined(__KERNEL_GPU_RAYTRACING__) */
/* note: this does not ensure rotation around shortest angle, q1 and q2
* are assumed to be matched already in transform_motion_decompose */
float costheta = dot(q1, q2);
@@ -387,7 +387,7 @@ ccl_device_inline float4 quat_interpolate(float4 q1, float4 q2, float t)
float thetap = theta * t;
return q1 * cosf(thetap) + qperp * sinf(thetap);
}
#endif /* __KERNEL_OPTIX__ */
#endif /* defined(__KERNEL_GPU_RAYTRACING__) */
}
ccl_device_inline Transform transform_quick_inverse(Transform M)