Realtime Compositor: cache render pass GPU textures #108818

Merged
Brecht Van Lommel merged 3 commits from brecht/blender:cache-gpu-texture into main 2023-06-12 19:49:24 +02:00
8 changed files with 174 additions and 50 deletions

View File

@ -825,10 +825,19 @@ class RenderLayerOperation : public NodeOperation {
void execute() override
{
const int view_layer = bnode().custom1;
GPUTexture *combined_texture = context().get_input_texture(view_layer, RE_PASSNAME_COMBINED);
execute_pass("Image", combined_texture, "compositor_read_pass_color");
execute_pass("Alpha", combined_texture, "compositor_read_pass_alpha");
Result &image_result = get_result("Image");
Result &alpha_result = get_result("Alpha");
if (image_result.should_compute() || alpha_result.should_compute()) {
GPUTexture *combined_texture = context().get_input_texture(view_layer, RE_PASSNAME_COMBINED);
if (image_result.should_compute()) {
execute_pass(image_result, combined_texture, "compositor_read_pass_color");
}
if (alpha_result.should_compute()) {
execute_pass(alpha_result, combined_texture, "compositor_read_pass_alpha");
}
brecht marked this conversation as resolved
Review

Should this be compositor_read_pass_alpha instead?

Should this be `compositor_read_pass_alpha` instead?
}
/* Other output passes are not supported for now, so allocate them as invalid. */
for (const bNodeSocket *output : this->node()->output_sockets()) {
@ -836,15 +845,20 @@ class RenderLayerOperation : public NodeOperation {
continue;
}
Result &result = get_result(output->identifier);
if (!result.should_compute()) {
continue;
}
GPUTexture *pass_texture = context().get_input_texture(view_layer, output->identifier);
if (output->type == SOCK_FLOAT) {
execute_pass(output->identifier, pass_texture, "compositor_read_pass_float");
execute_pass(result, pass_texture, "compositor_read_pass_float");
}
else if (output->type == SOCK_VECTOR) {
execute_pass(output->identifier, pass_texture, "compositor_read_pass_vector");
execute_pass(result, pass_texture, "compositor_read_pass_vector");
}
else if (output->type == SOCK_RGBA) {
execute_pass(output->identifier, pass_texture, "compositor_read_pass_color");
execute_pass(result, pass_texture, "compositor_read_pass_color");
}
else {
BLI_assert_unreachable();
@ -852,15 +866,11 @@ class RenderLayerOperation : public NodeOperation {
}
}
void execute_pass(const char *pass_name, GPUTexture *pass_texture, const char *shader_name)
void execute_pass(Result &result, GPUTexture *pass_texture, const char *shader_name)
{
Result &image_result = get_result(pass_name);
if (!image_result.should_compute()) {
return;
}
if (pass_texture == nullptr) {
/* Pass not rendered yet, or not supported by viewport. */
image_result.allocate_invalid();
result.allocate_invalid();
context().set_info_message("Viewport compositor setup not fully supported");
return;
}
@ -878,14 +888,14 @@ class RenderLayerOperation : public NodeOperation {
GPU_texture_bind(pass_texture, input_unit);
const int2 compositing_region_size = context().get_compositing_region_size();
image_result.allocate_texture(Domain(compositing_region_size));
image_result.bind_as_image(shader, "output_img");
result.allocate_texture(Domain(compositing_region_size));
result.bind_as_image(shader, "output_img");
compute_dispatch_threads_at_least(shader, compositing_region_size);
GPU_shader_unbind();
GPU_texture_unbind(pass_texture);
image_result.unbind_as_image();
result.unbind_as_image();
}
};

View File

@ -14,6 +14,7 @@
#include "BLI_implicit_sharing.h"
struct GPUTexture;
struct ImBuf;
struct Image;
struct ImageFormatData;
@ -44,16 +45,20 @@ typedef struct Render Render;
/* Buffer of a floating point values which uses implicit sharing.
*
* The buffer is allocated by render passes creation, and then is shared with the render result
* and image buffer. */
* and image buffer.
*
* The GPU texture is an optional read-only copy of the render buffer in GPU memory. */
typedef struct RenderBuffer {
float *data;
const ImplicitSharingInfoHandle *sharing_info;
struct GPUTexture *gpu_texture;
} RenderBuffer;
/* Specialized render buffer to store 8bpp passes. */
typedef struct RenderByteBuffer {
uint8_t *data;
const ImplicitSharingInfoHandle *sharing_info;
struct GPUTexture *gpu_texture;
} RenderByteBuffer;
/* Render Result usage:
@ -206,6 +211,7 @@ void RE_FreeAllRender(void);
* On file load, free render results.
*/
void RE_FreeAllRenderResults(void);
/**
* On file load or changes engines, free persistent render data.
* Assumes no engines are currently rendering.
@ -216,6 +222,12 @@ void RE_FreeAllPersistentData(void);
*/
void RE_FreePersistentData(const struct Scene *scene);
/*
* Free cached GPU textures to reduce memory usage. Before rendering all are cleard
* and on UI changes when detected they are no longer used.
*/
void RE_FreeGPUTextureCaches(const bool only_unused);
/**
* Get results and statistics.
*/
@ -463,6 +475,11 @@ struct RenderPass *RE_pass_find_by_type(struct RenderLayer *rl,
*/
void RE_pass_set_buffer_data(struct RenderPass *pass, float *data);
/**
* Ensure a GPU texture corresponding to the render buffer data exists.
*/
struct GPUTexture *RE_pass_ensure_gpu_texture_cache(struct Render *re, struct RenderPass *rpass);
/* shaded view or baking options */
#define RE_BAKE_NORMALS 0
#define RE_BAKE_DISPLACEMENT 1

View File

@ -73,6 +73,9 @@ class Context : public realtime_compositor::Context {
/* Viewer output texture. */
GPUTexture *viewer_output_texture_ = nullptr;
/* Texture pool. */
TexturePool &render_texture_pool_;
public:
Context(const Scene &scene,
const RenderData &render_data,
@ -85,7 +88,8 @@ class Context : public realtime_compositor::Context {
render_data_(render_data),
node_tree_(node_tree),
use_file_output_(use_file_output),
view_name_(view_name)
view_name_(view_name),
render_texture_pool_(texture_pool)
{
}
@ -175,9 +179,6 @@ class Context : public realtime_compositor::Context {
GPUTexture *get_input_texture(int view_layer_id, const char *pass_name) override
{
/* TODO: eventually this should get cached on the RenderResult itself when
* it supports storing GPU buffers, for faster updates. But will also need
* some eviction strategy to avoid too much GPU memory usage. */
Render *re = RE_GetSceneRender(&scene_);
RenderResult *rr = nullptr;
GPUTexture *input_texture = nullptr;
@ -195,34 +196,12 @@ class Context : public realtime_compositor::Context {
&rl->passes, pass_name, offsetof(RenderPass, name));
if (rpass && rpass->buffer.data) {
const int2 size(rl->rectx, rl->recty);
input_texture = RE_pass_ensure_gpu_texture_cache(re, rpass);
if (rpass->channels == 1) {
input_texture = texture_pool().acquire_float(size);
if (input_texture) {
GPU_texture_update(input_texture, GPU_DATA_FLOAT, rpass->buffer.data);
}
}
else if (rpass->channels == 3) {
input_texture = texture_pool().acquire_color(size);
if (input_texture) {
/* TODO: conversion could be done as part of GPU upload somehow? */
const float *rgb_buffer = rpass->buffer.data;
Vector<float> rgba_buffer(4 * size.x * size.y);
for (size_t i = 0; i < size_t(size.x) * size_t(size.y); i++) {
rgba_buffer[i * 4 + 0] = rgb_buffer[i * 3 + 0];
rgba_buffer[i * 4 + 1] = rgb_buffer[i * 3 + 1];
rgba_buffer[i * 4 + 2] = rgb_buffer[i * 3 + 2];
rgba_buffer[i * 4 + 3] = 1.0f;
}
GPU_texture_update(input_texture, GPU_DATA_FLOAT, rgba_buffer.data());
}
}
else if (rpass->channels == 4) {
input_texture = texture_pool().acquire_color(size);
if (input_texture) {
GPU_texture_update(input_texture, GPU_DATA_FLOAT, rpass->buffer.data);
}
if (input_texture) {
/* Don't assume render keeps texture around, add our own reference. */
GPU_texture_ref(input_texture);
render_texture_pool_.textures_.append(input_texture);
}
}
}

View File

@ -23,6 +23,7 @@
#include "DNA_sequence_types.h"
#include "DNA_space_types.h"
#include "DNA_userdef_types.h"
#include "DNA_windowmanager_types.h"
#include "MEM_guardedalloc.h"
@ -331,6 +332,7 @@ void RE_ClearResult(Render *re)
if (re) {
render_result_free(re->result);
re->result = nullptr;
re->result_has_gpu_texture_caches = false;
}
}
@ -636,6 +638,7 @@ void RE_FreeAllRenderResults(void)
re->result = nullptr;
re->pushedresult = nullptr;
re->result_has_gpu_texture_caches = false;
}
}
@ -650,9 +653,58 @@ void RE_FreeAllPersistentData(void)
}
}
void RE_FreeGPUTextureCaches(const bool only_unused)
{
LISTBASE_FOREACH (Render *, re, &RenderGlobal.renderlist) {
if (!re->result_has_gpu_texture_caches) {
continue;
}
Scene *scene = re->scene;
bool do_free = true;
/* Detect if scene is using realtime compositing, and if either a node editor is
* showing the nodes, or an image editor is showing the render result or viewer. */
if (only_unused && scene && scene->use_nodes && scene->nodetree &&
scene->nodetree->execution_mode == NTREE_EXECUTION_MODE_REALTIME)
{
wmWindowManager *wm = static_cast<wmWindowManager *>(G_MAIN->wm.first);
LISTBASE_FOREACH (const wmWindow *, win, &wm->windows) {
const bScreen *screen = WM_window_get_active_screen(win);
LISTBASE_FOREACH (const ScrArea *, area, &screen->areabase) {
const SpaceLink &space = *static_cast<const SpaceLink *>(area->spacedata.first);
if (space.spacetype == SPACE_NODE) {
const SpaceNode &snode = reinterpret_cast<const SpaceNode &>(space);
if (snode.nodetree == scene->nodetree) {
do_free = false;
}
}
else if (space.spacetype == SPACE_IMAGE) {
brecht marked this conversation as resolved Outdated

Should this be SPACE_IMAGE?

Should this be `SPACE_IMAGE`?
const SpaceImage &sima = reinterpret_cast<const SpaceImage &>(space);
if (sima.image && sima.image->source == IMA_SRC_VIEWER) {
do_free = false;
}
}
}
}
}
if (do_free) {
RenderResult *result = RE_AcquireResultWrite(re);
if (result != nullptr) {
render_result_free_gpu_texture_caches(result);
}
re->result_has_gpu_texture_caches = false;
RE_ReleaseResult(re);
}
}
}
static void re_free_persistent_data(Render *re)
{
/* If engine is currently rendering, just wait for it to be freed when it finishes rendering. */
/* If engine is currently rendering, just wait for it to be freed when it finishes rendering.
*/
if (re->engine && !(re->engine->flag & RE_ENGINE_RENDERING)) {
RE_engine_free(re->engine);
re->engine = nullptr;
@ -1754,8 +1806,8 @@ static void render_pipeline_free(Render *re)
/* Destroy the opengl context in the correct thread. */
RE_system_gpu_context_destroy(re);
/* In the case the engine did not mark tiles as finished (un-highlight, which could happen in the
* case of cancelled render) ensure the storage is empty. */
/* In the case the engine did not mark tiles as finished (un-highlight, which could happen in
* the case of cancelled render) ensure the storage is empty. */
if (re->highlighted_tiles != nullptr) {
BLI_mutex_lock(&re->highlighted_tiles_mutex);
@ -1797,6 +1849,9 @@ void RE_RenderFrame(Render *re,
render_callback_exec_id(re, re->main, &scene->id, BKE_CB_EVT_RENDER_PRE);
/* Reduce GPU memory usage so renderer has more space. */
RE_FreeGPUTextureCaches(false);
render_init_depsgraph(re);
do_render_full_pipeline(re);
@ -2198,6 +2253,9 @@ void RE_RenderAnim(Render *re,
for (nfra = sfra, scene->r.cfra = sfra; scene->r.cfra <= efra; scene->r.cfra++) {
char filepath[FILE_MAX];
/* Reduce GPU memory usage so renderer has more space. */
RE_FreeGPUTextureCaches(false);
/* A feedback loop exists here -- render initialization requires updated
* render layers settings which could be animated, but scene evaluation for
* the frame happens later because it depends on what layers are visible to

View File

@ -39,6 +39,8 @@
#include "IMB_imbuf_types.h"
#include "IMB_openexr.h"
#include "GPU_texture.h"
#include "RE_engine.h"
#include "render_result.h"
@ -115,6 +117,18 @@ void render_result_free_list(ListBase *lb, RenderResult *rr)
}
}
void render_result_free_gpu_texture_caches(RenderResult *rr)
{
LISTBASE_FOREACH (RenderLayer *, rl, &rr->layers) {
LISTBASE_FOREACH (RenderPass *, rpass, &rl->passes) {
if (rpass->buffer.gpu_texture) {
GPU_texture_free(rpass->buffer.gpu_texture);
rpass->buffer.gpu_texture = nullptr;
}
}
}
}
/********************************* multiview *************************************/
void render_result_views_shallowcopy(RenderResult *dst, RenderResult *src)
@ -407,6 +421,35 @@ void RE_pass_set_buffer_data(RenderPass *pass, float *data)
RE_RenderBuffer_assign_data(&pass->buffer, data);
}
GPUTexture *RE_pass_ensure_gpu_texture_cache(Render *re, RenderPass *rpass)
{
if (rpass->buffer.gpu_texture) {
return rpass->buffer.gpu_texture;
}
if (rpass->buffer.data == nullptr) {
return nullptr;
}
const eGPUTextureFormat format = (rpass->channels == 1) ? GPU_R16F :
(rpass->channels == 3) ? GPU_RGB16F :
GPU_RGBA16F;
rpass->buffer.gpu_texture = GPU_texture_create_2d("RenderBuffer.gpu_texture",
rpass->rectx,
rpass->recty,
1,
format,
GPU_TEXTURE_USAGE_GENERAL,
NULL);
if (rpass->buffer.gpu_texture) {
GPU_texture_update(rpass->buffer.gpu_texture, GPU_DATA_FLOAT, rpass->buffer.data);
re->result_has_gpu_texture_caches = true;
}
return rpass->buffer.gpu_texture;
}
void RE_render_result_full_channel_name(char *fullname,
const char *layname,
const char *passname,
@ -1200,6 +1243,7 @@ template<class BufferType> static BufferType render_buffer_new(decltype(BufferTy
buffer.data = data;
buffer.sharing_info = blender::implicit_sharing::info_for_mem_free(data);
buffer.gpu_texture = nullptr;
return buffer;
}
@ -1212,6 +1256,11 @@ template<class BufferType> static void render_buffer_data_free(BufferType *rende
}
blender::implicit_sharing::free_shared_data(&render_buffer->data, &render_buffer->sharing_info);
if (render_buffer->gpu_texture) {
GPU_texture_free(render_buffer->gpu_texture);
render_buffer->gpu_texture = nullptr;
}
}
template<class BufferType>

View File

@ -131,6 +131,11 @@ void render_result_views_shallowcopy(struct RenderResult *dst, struct RenderResu
*/
void render_result_views_shallowdelete(struct RenderResult *rr);
/**
* Free GPU texture caches to reduce memory usage.
*/
void render_result_free_gpu_texture_caches(struct RenderResult *rr);
#define FOREACH_VIEW_LAYER_TO_RENDER_BEGIN(re_, iter_) \
{ \
int nr_; \

View File

@ -53,6 +53,8 @@ struct Render {
* write lock, all external code must use a read lock. internal code is assumed
* to not conflict with writes, so no lock used for that */
ThreadRWMutex resultmutex;
/* True if result has GPU textures, to quickly skip cache clear. */
bool result_has_gpu_texture_caches;
/* Guard for drawing render result using engine's `draw()` callback. */
ThreadMutex engine_draw_mutex;

View File

@ -83,6 +83,8 @@
#include "DEG_depsgraph.h"
#include "DEG_depsgraph_query.h"
#include "RE_pipeline.h"
/**
* When a gizmo is highlighted and uses click/drag events,
* this prevents mouse button press events from being passed through to other key-maps
@ -716,6 +718,8 @@ void wm_event_do_notifiers(bContext *C)
wm_event_do_refresh_wm_and_depsgraph(C);
RE_FreeGPUTextureCaches(true);
/* Status bar. */
if (wm->winactive) {
wmWindow *win = wm->winactive;