EEVEE Next: Add imageStore/LoadFast ops to Film Shader #121114

Open
Jason Fielder wants to merge 1 commits from Jason-Fielder/blender:im_store_fast_film into main

When changing the target branch, be careful to rebase the branch in your fork to match. See documentation.
6 changed files with 56 additions and 24 deletions

View File

@ -33,7 +33,7 @@ void cryptomatte_clear_samples(FilmSample dst)
{
int layer_len = imageSize(cryptomatte_img).z;
for (int i = 0; i < layer_len; i++) {
imageStore(cryptomatte_img, ivec3(dst.texel, i), vec4(0.0));
imageStoreFast(cryptomatte_img, ivec3(dst.texel, i), vec4(0.0));
/* Ensure stores are visible to later reads. */
imageFence(cryptomatte_img);
}
@ -70,7 +70,7 @@ void cryptomatte_store_film_sample(FilmSample dst,
else {
continue;
}
imageStore(cryptomatte_img, img_co, sample_pair);
imageStoreFast(cryptomatte_img, img_co, sample_pair);
break;
}
/* Ensure stores are visible to later reads. */

View File

@ -13,7 +13,7 @@ void cryptomatte_load_samples(ivec2 texel, int layer, out vec2 samples[CRYPTOMAT
/* Read all samples from the cryptomatte layer. */
for (int p = 0; p < pass_len; p++) {
vec4 pass_sample = imageLoad(cryptomatte_img, ivec3(texel, p + layer_id));
vec4 pass_sample = imageLoadFast(cryptomatte_img, ivec3(texel, p + layer_id));
samples[p * 2] = pass_sample.xy;
samples[p * 2 + 1] = pass_sample.zw;
}
@ -59,7 +59,7 @@ void cryptomatte_store_samples(ivec2 texel, int layer, vec2 samples[CRYPTOMATTE_
vec4 pass_sample;
pass_sample.xy = samples[p * 2];
pass_sample.zw = samples[p * 2 + 1];
imageStore(cryptomatte_img, ivec3(texel, p + layer_id), pass_sample);
imageStoreFast(cryptomatte_img, ivec3(texel, p + layer_id), pass_sample);
}
/* Ensure stores are visible to later reads. */
imageFence(cryptomatte_img);
@ -73,7 +73,7 @@ void main()
cryptomatte_load_samples(texel, layer, samples);
cryptomatte_sort_samples(samples);
/* Repeat texture coordinates as the weight can be optimized to a small portion of the film. */
float weight = imageLoad(
float weight = imageLoadFast(
weight_img,
ivec3(texel % imageSize(weight_img).xy, FILM_WEIGHT_LAYER_ACCUMULATION))
.x;

View File

@ -10,21 +10,21 @@ void main()
float out_depth;
if (uniform_buf.film.display_only) {
out_depth = imageLoad(depth_img, texel_film).r;
out_depth = imageLoadFast(depth_img, texel_film).r;
if (display_id == -1) {
out_color = texelFetch(in_combined_tx, texel_film, 0);
}
else if (uniform_buf.film.display_storage_type == PASS_STORAGE_VALUE) {
out_color.rgb = imageLoad(value_accum_img, ivec3(texel_film, display_id)).rrr;
out_color.rgb = imageLoadFast(value_accum_img, ivec3(texel_film, display_id)).rrr;
out_color.a = 1.0;
}
else if (uniform_buf.film.display_storage_type == PASS_STORAGE_COLOR) {
out_color = imageLoad(color_accum_img, ivec3(texel_film, display_id));
out_color = imageLoadFast(color_accum_img, ivec3(texel_film, display_id));
}
else /* PASS_STORAGE_CRYPTOMATTE */ {
out_color = cryptomatte_false_color(
imageLoad(cryptomatte_img, ivec3(texel_film, display_id)).r);
imageLoadFast(cryptomatte_img, ivec3(texel_film, display_id)).r);
}
}
else {

View File

@ -198,7 +198,7 @@ float film_distance_load(ivec2 texel)
if (!uniform_buf.film.use_history || use_reprojection) {
return 1.0e16;
}
return imageLoad(in_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_DISTANCE)).x;
return imageLoadFast(in_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_DISTANCE)).x;
}
float film_weight_load(ivec2 texel)
@ -209,7 +209,7 @@ float film_weight_load(ivec2 texel)
if (!uniform_buf.film.use_history || use_reprojection) {
return 0.0;
}
return imageLoad(in_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_ACCUMULATION)).x;
return imageLoadFast(in_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_ACCUMULATION)).x;
}
/* Returns motion in pixel space to retrieve the pixel history. */
@ -502,7 +502,7 @@ void film_store_combined(
if (display_id == -1) {
display = color;
}
imageStore(out_combined_img, dst.texel, color);
imageStoreFast(out_combined_img, dst.texel, color);
}
void film_store_color(FilmSample dst, int pass_id, vec4 color, inout vec4 display)
@ -511,7 +511,7 @@ void film_store_color(FilmSample dst, int pass_id, vec4 color, inout vec4 displa
return;
}
vec4 data_film = imageLoad(color_accum_img, ivec3(dst.texel, pass_id));
vec4 data_film = imageLoadFast(color_accum_img, ivec3(dst.texel, pass_id));
color = (data_film * dst.weight + color) * dst.weight_sum_inv;
@ -523,7 +523,7 @@ void film_store_color(FilmSample dst, int pass_id, vec4 color, inout vec4 displa
if (display_id == pass_id) {
display = color;
}
imageStore(color_accum_img, ivec3(dst.texel, pass_id), color);
imageStoreFast(color_accum_img, ivec3(dst.texel, pass_id), color);
}
void film_store_value(FilmSample dst, int pass_id, float value, inout vec4 display)
@ -532,7 +532,7 @@ void film_store_value(FilmSample dst, int pass_id, float value, inout vec4 displ
return;
}
float data_film = imageLoad(value_accum_img, ivec3(dst.texel, pass_id)).x;
float data_film = imageLoadFast(value_accum_img, ivec3(dst.texel, pass_id)).x;
value = (data_film * dst.weight + value) * dst.weight_sum_inv;
@ -544,7 +544,7 @@ void film_store_value(FilmSample dst, int pass_id, float value, inout vec4 displ
if (display_id == pass_id) {
display = vec4(value, value, value, 1.0);
}
imageStore(value_accum_img, ivec3(dst.texel, pass_id), vec4(value));
imageStoreFast(value_accum_img, ivec3(dst.texel, pass_id), vec4(value));
}
/* Nearest sample variant. Always stores the data. */
@ -557,7 +557,7 @@ void film_store_data(ivec2 texel_film, int pass_id, vec4 data_sample, inout vec4
if (display_id == pass_id) {
display = data_sample;
}
imageStore(color_accum_img, ivec3(texel_film, pass_id), data_sample);
imageStoreFast(color_accum_img, ivec3(texel_film, pass_id), data_sample);
}
void film_store_depth(ivec2 texel_film, float value, out float out_depth)
@ -568,17 +568,17 @@ void film_store_depth(ivec2 texel_film, float value, out float out_depth)
out_depth = film_depth_convert_to_scene(value);
imageStore(depth_img, texel_film, vec4(out_depth));
imageStoreFast(depth_img, texel_film, vec4(out_depth));
}
void film_store_distance(ivec2 texel, float value)
{
imageStore(out_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_DISTANCE), vec4(value));
imageStoreFast(out_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_DISTANCE), vec4(value));
}
void film_store_weight(ivec2 texel, float value)
{
imageStore(out_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_ACCUMULATION), vec4(value));
imageStoreFast(out_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_ACCUMULATION), vec4(value));
}
float film_display_depth_ammend(ivec2 texel, float depth)
@ -657,9 +657,9 @@ void film_process_data(ivec2 texel_film, out vec4 out_color, out float out_depth
film_store_distance(texel_film, film_sample.weight);
}
else {
out_depth = imageLoad(depth_img, texel_film).r;
out_depth = imageLoadFast(depth_img, texel_film).r;
if (display_id != -1 && display_id == normal_id) {
out_color = imageLoad(color_accum_img, ivec3(texel_film, display_id));
out_color = imageLoadFast(color_accum_img, ivec3(texel_film, display_id));
}
}
}

View File

@ -1040,6 +1040,14 @@ inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_1d<S
tex.texture->write(value, uint(_coord));
}
template<typename S, typename T, access A>
inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_1d<S, A> tex,
T _coord,
S value)
{
tex.texture->write(value, uint(_coord));
}
template<typename S, typename T, access A>
inline void _texture_write_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
T _coord,
@ -1060,6 +1068,14 @@ inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_2d<S
tex.texture->write(value, uint2(_coord.xy));
}
template<typename S, typename T, access A>
inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_2d<S, A> tex,
T _coord,
S value)
{
tex.texture->write(value, uint2(_coord.xy));
}
template<typename S, typename T, access A>
inline void _texture_write_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
T _coord,
@ -1083,6 +1099,14 @@ inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_2d_a
tex.texture->write(value, uint2(_coord.xy), _coord.z);
}
template<typename S, typename T, access A>
inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
T _coord,
S value)
{
tex.texture->write(value, uint2(_coord.xy), _coord.z);
}
template<typename S, typename T, access A>
inline void _texture_write_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
T _coord,
@ -1106,6 +1130,14 @@ inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_3d<S
tex.texture->write(value, uint3(_coord.xyz));
}
template<typename S, typename T, access A>
inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_3d<S, A> tex,
T _coord,
S value)
{
tex.texture->write(value, uint3(_coord.xyz));
}
/* Texture atomic operations are only supported in Metal 3.1 and onward (macOS 14.0 Sonoma). */
#ifdef MTL_SUPPORTS_TEXTURE_ATOMICS

View File

@ -62,8 +62,8 @@
#define int32_t int
#define uint32_t uint
/* Fast store variant macro. In GLSL this is the same as imageStore, but assumes no bounds
* checking. */
/* Fast load/store variant macro. In GLSL this is the same as imageLoad/imageStore, but assumes no
* bounds checking. */
#define imageStoreFast imageStore
#define imageLoadFast imageLoad