EEVEE Next: Add imageStore/LoadFast ops to Film Shader #121114
|
@ -33,7 +33,7 @@ void cryptomatte_clear_samples(FilmSample dst)
|
|||
{
|
||||
int layer_len = imageSize(cryptomatte_img).z;
|
||||
for (int i = 0; i < layer_len; i++) {
|
||||
imageStore(cryptomatte_img, ivec3(dst.texel, i), vec4(0.0));
|
||||
imageStoreFast(cryptomatte_img, ivec3(dst.texel, i), vec4(0.0));
|
||||
/* Ensure stores are visible to later reads. */
|
||||
imageFence(cryptomatte_img);
|
||||
}
|
||||
|
@ -70,7 +70,7 @@ void cryptomatte_store_film_sample(FilmSample dst,
|
|||
else {
|
||||
continue;
|
||||
}
|
||||
imageStore(cryptomatte_img, img_co, sample_pair);
|
||||
imageStoreFast(cryptomatte_img, img_co, sample_pair);
|
||||
break;
|
||||
}
|
||||
/* Ensure stores are visible to later reads. */
|
||||
|
|
|
@ -13,7 +13,7 @@ void cryptomatte_load_samples(ivec2 texel, int layer, out vec2 samples[CRYPTOMAT
|
|||
|
||||
/* Read all samples from the cryptomatte layer. */
|
||||
for (int p = 0; p < pass_len; p++) {
|
||||
vec4 pass_sample = imageLoad(cryptomatte_img, ivec3(texel, p + layer_id));
|
||||
vec4 pass_sample = imageLoadFast(cryptomatte_img, ivec3(texel, p + layer_id));
|
||||
samples[p * 2] = pass_sample.xy;
|
||||
samples[p * 2 + 1] = pass_sample.zw;
|
||||
}
|
||||
|
@ -59,7 +59,7 @@ void cryptomatte_store_samples(ivec2 texel, int layer, vec2 samples[CRYPTOMATTE_
|
|||
vec4 pass_sample;
|
||||
pass_sample.xy = samples[p * 2];
|
||||
pass_sample.zw = samples[p * 2 + 1];
|
||||
imageStore(cryptomatte_img, ivec3(texel, p + layer_id), pass_sample);
|
||||
imageStoreFast(cryptomatte_img, ivec3(texel, p + layer_id), pass_sample);
|
||||
}
|
||||
/* Ensure stores are visible to later reads. */
|
||||
imageFence(cryptomatte_img);
|
||||
|
@ -73,7 +73,7 @@ void main()
|
|||
cryptomatte_load_samples(texel, layer, samples);
|
||||
cryptomatte_sort_samples(samples);
|
||||
/* Repeat texture coordinates as the weight can be optimized to a small portion of the film. */
|
||||
float weight = imageLoad(
|
||||
float weight = imageLoadFast(
|
||||
weight_img,
|
||||
ivec3(texel % imageSize(weight_img).xy, FILM_WEIGHT_LAYER_ACCUMULATION))
|
||||
.x;
|
||||
|
|
|
@ -10,21 +10,21 @@ void main()
|
|||
float out_depth;
|
||||
|
||||
if (uniform_buf.film.display_only) {
|
||||
out_depth = imageLoad(depth_img, texel_film).r;
|
||||
out_depth = imageLoadFast(depth_img, texel_film).r;
|
||||
|
||||
if (display_id == -1) {
|
||||
out_color = texelFetch(in_combined_tx, texel_film, 0);
|
||||
}
|
||||
else if (uniform_buf.film.display_storage_type == PASS_STORAGE_VALUE) {
|
||||
out_color.rgb = imageLoad(value_accum_img, ivec3(texel_film, display_id)).rrr;
|
||||
out_color.rgb = imageLoadFast(value_accum_img, ivec3(texel_film, display_id)).rrr;
|
||||
out_color.a = 1.0;
|
||||
}
|
||||
else if (uniform_buf.film.display_storage_type == PASS_STORAGE_COLOR) {
|
||||
out_color = imageLoad(color_accum_img, ivec3(texel_film, display_id));
|
||||
out_color = imageLoadFast(color_accum_img, ivec3(texel_film, display_id));
|
||||
}
|
||||
else /* PASS_STORAGE_CRYPTOMATTE */ {
|
||||
out_color = cryptomatte_false_color(
|
||||
imageLoad(cryptomatte_img, ivec3(texel_film, display_id)).r);
|
||||
imageLoadFast(cryptomatte_img, ivec3(texel_film, display_id)).r);
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
|
|
@ -198,7 +198,7 @@ float film_distance_load(ivec2 texel)
|
|||
if (!uniform_buf.film.use_history || use_reprojection) {
|
||||
return 1.0e16;
|
||||
}
|
||||
return imageLoad(in_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_DISTANCE)).x;
|
||||
return imageLoadFast(in_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_DISTANCE)).x;
|
||||
}
|
||||
|
||||
float film_weight_load(ivec2 texel)
|
||||
|
@ -209,7 +209,7 @@ float film_weight_load(ivec2 texel)
|
|||
if (!uniform_buf.film.use_history || use_reprojection) {
|
||||
return 0.0;
|
||||
}
|
||||
return imageLoad(in_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_ACCUMULATION)).x;
|
||||
return imageLoadFast(in_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_ACCUMULATION)).x;
|
||||
}
|
||||
|
||||
/* Returns motion in pixel space to retrieve the pixel history. */
|
||||
|
@ -502,7 +502,7 @@ void film_store_combined(
|
|||
if (display_id == -1) {
|
||||
display = color;
|
||||
}
|
||||
imageStore(out_combined_img, dst.texel, color);
|
||||
imageStoreFast(out_combined_img, dst.texel, color);
|
||||
}
|
||||
|
||||
void film_store_color(FilmSample dst, int pass_id, vec4 color, inout vec4 display)
|
||||
|
@ -511,7 +511,7 @@ void film_store_color(FilmSample dst, int pass_id, vec4 color, inout vec4 displa
|
|||
return;
|
||||
}
|
||||
|
||||
vec4 data_film = imageLoad(color_accum_img, ivec3(dst.texel, pass_id));
|
||||
vec4 data_film = imageLoadFast(color_accum_img, ivec3(dst.texel, pass_id));
|
||||
|
||||
color = (data_film * dst.weight + color) * dst.weight_sum_inv;
|
||||
|
||||
|
@ -523,7 +523,7 @@ void film_store_color(FilmSample dst, int pass_id, vec4 color, inout vec4 displa
|
|||
if (display_id == pass_id) {
|
||||
display = color;
|
||||
}
|
||||
imageStore(color_accum_img, ivec3(dst.texel, pass_id), color);
|
||||
imageStoreFast(color_accum_img, ivec3(dst.texel, pass_id), color);
|
||||
}
|
||||
|
||||
void film_store_value(FilmSample dst, int pass_id, float value, inout vec4 display)
|
||||
|
@ -532,7 +532,7 @@ void film_store_value(FilmSample dst, int pass_id, float value, inout vec4 displ
|
|||
return;
|
||||
}
|
||||
|
||||
float data_film = imageLoad(value_accum_img, ivec3(dst.texel, pass_id)).x;
|
||||
float data_film = imageLoadFast(value_accum_img, ivec3(dst.texel, pass_id)).x;
|
||||
|
||||
value = (data_film * dst.weight + value) * dst.weight_sum_inv;
|
||||
|
||||
|
@ -544,7 +544,7 @@ void film_store_value(FilmSample dst, int pass_id, float value, inout vec4 displ
|
|||
if (display_id == pass_id) {
|
||||
display = vec4(value, value, value, 1.0);
|
||||
}
|
||||
imageStore(value_accum_img, ivec3(dst.texel, pass_id), vec4(value));
|
||||
imageStoreFast(value_accum_img, ivec3(dst.texel, pass_id), vec4(value));
|
||||
}
|
||||
|
||||
/* Nearest sample variant. Always stores the data. */
|
||||
|
@ -557,7 +557,7 @@ void film_store_data(ivec2 texel_film, int pass_id, vec4 data_sample, inout vec4
|
|||
if (display_id == pass_id) {
|
||||
display = data_sample;
|
||||
}
|
||||
imageStore(color_accum_img, ivec3(texel_film, pass_id), data_sample);
|
||||
imageStoreFast(color_accum_img, ivec3(texel_film, pass_id), data_sample);
|
||||
}
|
||||
|
||||
void film_store_depth(ivec2 texel_film, float value, out float out_depth)
|
||||
|
@ -568,17 +568,17 @@ void film_store_depth(ivec2 texel_film, float value, out float out_depth)
|
|||
|
||||
out_depth = film_depth_convert_to_scene(value);
|
||||
|
||||
imageStore(depth_img, texel_film, vec4(out_depth));
|
||||
imageStoreFast(depth_img, texel_film, vec4(out_depth));
|
||||
}
|
||||
|
||||
void film_store_distance(ivec2 texel, float value)
|
||||
{
|
||||
imageStore(out_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_DISTANCE), vec4(value));
|
||||
imageStoreFast(out_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_DISTANCE), vec4(value));
|
||||
}
|
||||
|
||||
void film_store_weight(ivec2 texel, float value)
|
||||
{
|
||||
imageStore(out_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_ACCUMULATION), vec4(value));
|
||||
imageStoreFast(out_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_ACCUMULATION), vec4(value));
|
||||
}
|
||||
|
||||
float film_display_depth_ammend(ivec2 texel, float depth)
|
||||
|
@ -657,9 +657,9 @@ void film_process_data(ivec2 texel_film, out vec4 out_color, out float out_depth
|
|||
film_store_distance(texel_film, film_sample.weight);
|
||||
}
|
||||
else {
|
||||
out_depth = imageLoad(depth_img, texel_film).r;
|
||||
out_depth = imageLoadFast(depth_img, texel_film).r;
|
||||
if (display_id != -1 && display_id == normal_id) {
|
||||
out_color = imageLoad(color_accum_img, ivec3(texel_film, display_id));
|
||||
out_color = imageLoadFast(color_accum_img, ivec3(texel_film, display_id));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1040,6 +1040,14 @@ inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_1d<S
|
|||
tex.texture->write(value, uint(_coord));
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_1d<S, A> tex,
|
||||
T _coord,
|
||||
S value)
|
||||
{
|
||||
tex.texture->write(value, uint(_coord));
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline void _texture_write_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
|
||||
T _coord,
|
||||
|
@ -1060,6 +1068,14 @@ inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_2d<S
|
|||
tex.texture->write(value, uint2(_coord.xy));
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_2d<S, A> tex,
|
||||
T _coord,
|
||||
S value)
|
||||
{
|
||||
tex.texture->write(value, uint2(_coord.xy));
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline void _texture_write_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
|
||||
T _coord,
|
||||
|
@ -1083,6 +1099,14 @@ inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_2d_a
|
|||
tex.texture->write(value, uint2(_coord.xy), _coord.z);
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
|
||||
T _coord,
|
||||
S value)
|
||||
{
|
||||
tex.texture->write(value, uint2(_coord.xy), _coord.z);
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline void _texture_write_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
|
||||
T _coord,
|
||||
|
@ -1106,6 +1130,14 @@ inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_3d<S
|
|||
tex.texture->write(value, uint3(_coord.xyz));
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_3d<S, A> tex,
|
||||
T _coord,
|
||||
S value)
|
||||
{
|
||||
tex.texture->write(value, uint3(_coord.xyz));
|
||||
}
|
||||
|
||||
/* Texture atomic operations are only supported in Metal 3.1 and onward (macOS 14.0 Sonoma). */
|
||||
#ifdef MTL_SUPPORTS_TEXTURE_ATOMICS
|
||||
|
||||
|
|
|
@ -62,8 +62,8 @@
|
|||
#define int32_t int
|
||||
#define uint32_t uint
|
||||
|
||||
/* Fast store variant macro. In GLSL this is the same as imageStore, but assumes no bounds
|
||||
* checking. */
|
||||
/* Fast load/store variant macro. In GLSL this is the same as imageLoad/imageStore, but assumes no
|
||||
* bounds checking. */
|
||||
#define imageStoreFast imageStore
|
||||
#define imageLoadFast imageLoad
|
||||
|
||||
|
|
Loading…
Reference in New Issue