Realtime Compositor: cache render pass GPU textures #108818
|
@ -825,10 +825,19 @@ class RenderLayerOperation : public NodeOperation {
|
|||
void execute() override
|
||||
{
|
||||
const int view_layer = bnode().custom1;
|
||||
GPUTexture *combined_texture = context().get_input_texture(view_layer, RE_PASSNAME_COMBINED);
|
||||
|
||||
execute_pass("Image", combined_texture, "compositor_read_pass_color");
|
||||
execute_pass("Alpha", combined_texture, "compositor_read_pass_alpha");
|
||||
Result &image_result = get_result("Image");
|
||||
Result &alpha_result = get_result("Alpha");
|
||||
|
||||
if (image_result.should_compute() || alpha_result.should_compute()) {
|
||||
GPUTexture *combined_texture = context().get_input_texture(view_layer, RE_PASSNAME_COMBINED);
|
||||
if (image_result.should_compute()) {
|
||||
execute_pass(image_result, combined_texture, "compositor_read_pass_color");
|
||||
}
|
||||
if (alpha_result.should_compute()) {
|
||||
execute_pass(alpha_result, combined_texture, "compositor_read_pass_alpha");
|
||||
}
|
||||
brecht marked this conversation as resolved
|
||||
}
|
||||
|
||||
/* Other output passes are not supported for now, so allocate them as invalid. */
|
||||
for (const bNodeSocket *output : this->node()->output_sockets()) {
|
||||
|
@ -836,15 +845,20 @@ class RenderLayerOperation : public NodeOperation {
|
|||
continue;
|
||||
}
|
||||
|
||||
Result &result = get_result(output->identifier);
|
||||
if (!result.should_compute()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
GPUTexture *pass_texture = context().get_input_texture(view_layer, output->identifier);
|
||||
if (output->type == SOCK_FLOAT) {
|
||||
execute_pass(output->identifier, pass_texture, "compositor_read_pass_float");
|
||||
execute_pass(result, pass_texture, "compositor_read_pass_float");
|
||||
}
|
||||
else if (output->type == SOCK_VECTOR) {
|
||||
execute_pass(output->identifier, pass_texture, "compositor_read_pass_vector");
|
||||
execute_pass(result, pass_texture, "compositor_read_pass_vector");
|
||||
}
|
||||
else if (output->type == SOCK_RGBA) {
|
||||
execute_pass(output->identifier, pass_texture, "compositor_read_pass_color");
|
||||
execute_pass(result, pass_texture, "compositor_read_pass_color");
|
||||
}
|
||||
else {
|
||||
BLI_assert_unreachable();
|
||||
|
@ -852,15 +866,11 @@ class RenderLayerOperation : public NodeOperation {
|
|||
}
|
||||
}
|
||||
|
||||
void execute_pass(const char *pass_name, GPUTexture *pass_texture, const char *shader_name)
|
||||
void execute_pass(Result &result, GPUTexture *pass_texture, const char *shader_name)
|
||||
{
|
||||
Result &image_result = get_result(pass_name);
|
||||
if (!image_result.should_compute()) {
|
||||
return;
|
||||
}
|
||||
if (pass_texture == nullptr) {
|
||||
/* Pass not rendered yet, or not supported by viewport. */
|
||||
image_result.allocate_invalid();
|
||||
result.allocate_invalid();
|
||||
context().set_info_message("Viewport compositor setup not fully supported");
|
||||
return;
|
||||
}
|
||||
|
@ -878,14 +888,14 @@ class RenderLayerOperation : public NodeOperation {
|
|||
GPU_texture_bind(pass_texture, input_unit);
|
||||
|
||||
const int2 compositing_region_size = context().get_compositing_region_size();
|
||||
image_result.allocate_texture(Domain(compositing_region_size));
|
||||
image_result.bind_as_image(shader, "output_img");
|
||||
result.allocate_texture(Domain(compositing_region_size));
|
||||
result.bind_as_image(shader, "output_img");
|
||||
|
||||
compute_dispatch_threads_at_least(shader, compositing_region_size);
|
||||
|
||||
GPU_shader_unbind();
|
||||
GPU_texture_unbind(pass_texture);
|
||||
image_result.unbind_as_image();
|
||||
result.unbind_as_image();
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
|
||||
#include "BLI_implicit_sharing.h"
|
||||
|
||||
struct GPUTexture;
|
||||
struct ImBuf;
|
||||
struct Image;
|
||||
struct ImageFormatData;
|
||||
|
@ -44,16 +45,20 @@ typedef struct Render Render;
|
|||
/* Buffer of a floating point values which uses implicit sharing.
|
||||
*
|
||||
* The buffer is allocated by render passes creation, and then is shared with the render result
|
||||
* and image buffer. */
|
||||
* and image buffer.
|
||||
*
|
||||
* The GPU texture is an optional read-only copy of the render buffer in GPU memory. */
|
||||
typedef struct RenderBuffer {
|
||||
float *data;
|
||||
const ImplicitSharingInfoHandle *sharing_info;
|
||||
struct GPUTexture *gpu_texture;
|
||||
} RenderBuffer;
|
||||
|
||||
/* Specialized render buffer to store 8bpp passes. */
|
||||
typedef struct RenderByteBuffer {
|
||||
uint8_t *data;
|
||||
const ImplicitSharingInfoHandle *sharing_info;
|
||||
struct GPUTexture *gpu_texture;
|
||||
} RenderByteBuffer;
|
||||
|
||||
/* Render Result usage:
|
||||
|
@ -206,6 +211,7 @@ void RE_FreeAllRender(void);
|
|||
* On file load, free render results.
|
||||
*/
|
||||
void RE_FreeAllRenderResults(void);
|
||||
|
||||
/**
|
||||
* On file load or changes engines, free persistent render data.
|
||||
* Assumes no engines are currently rendering.
|
||||
|
@ -216,6 +222,12 @@ void RE_FreeAllPersistentData(void);
|
|||
*/
|
||||
void RE_FreePersistentData(const struct Scene *scene);
|
||||
|
||||
/*
|
||||
* Free cached GPU textures to reduce memory usage. Before rendering all are cleard
|
||||
* and on UI changes when detected they are no longer used.
|
||||
*/
|
||||
void RE_FreeGPUTextureCaches(const bool only_unused);
|
||||
|
||||
/**
|
||||
* Get results and statistics.
|
||||
*/
|
||||
|
@ -463,6 +475,11 @@ struct RenderPass *RE_pass_find_by_type(struct RenderLayer *rl,
|
|||
*/
|
||||
void RE_pass_set_buffer_data(struct RenderPass *pass, float *data);
|
||||
|
||||
/**
|
||||
* Ensure a GPU texture corresponding to the render buffer data exists.
|
||||
*/
|
||||
struct GPUTexture *RE_pass_ensure_gpu_texture_cache(struct Render *re, struct RenderPass *rpass);
|
||||
|
||||
/* shaded view or baking options */
|
||||
#define RE_BAKE_NORMALS 0
|
||||
#define RE_BAKE_DISPLACEMENT 1
|
||||
|
|
|
@ -73,6 +73,9 @@ class Context : public realtime_compositor::Context {
|
|||
/* Viewer output texture. */
|
||||
GPUTexture *viewer_output_texture_ = nullptr;
|
||||
|
||||
/* Texture pool. */
|
||||
TexturePool &render_texture_pool_;
|
||||
|
||||
public:
|
||||
Context(const Scene &scene,
|
||||
const RenderData &render_data,
|
||||
|
@ -85,7 +88,8 @@ class Context : public realtime_compositor::Context {
|
|||
render_data_(render_data),
|
||||
node_tree_(node_tree),
|
||||
use_file_output_(use_file_output),
|
||||
view_name_(view_name)
|
||||
view_name_(view_name),
|
||||
render_texture_pool_(texture_pool)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -175,9 +179,6 @@ class Context : public realtime_compositor::Context {
|
|||
|
||||
GPUTexture *get_input_texture(int view_layer_id, const char *pass_name) override
|
||||
{
|
||||
/* TODO: eventually this should get cached on the RenderResult itself when
|
||||
* it supports storing GPU buffers, for faster updates. But will also need
|
||||
* some eviction strategy to avoid too much GPU memory usage. */
|
||||
Render *re = RE_GetSceneRender(&scene_);
|
||||
RenderResult *rr = nullptr;
|
||||
GPUTexture *input_texture = nullptr;
|
||||
|
@ -195,34 +196,12 @@ class Context : public realtime_compositor::Context {
|
|||
&rl->passes, pass_name, offsetof(RenderPass, name));
|
||||
|
||||
if (rpass && rpass->buffer.data) {
|
||||
const int2 size(rl->rectx, rl->recty);
|
||||
input_texture = RE_pass_ensure_gpu_texture_cache(re, rpass);
|
||||
|
||||
if (rpass->channels == 1) {
|
||||
input_texture = texture_pool().acquire_float(size);
|
||||
if (input_texture) {
|
||||
GPU_texture_update(input_texture, GPU_DATA_FLOAT, rpass->buffer.data);
|
||||
}
|
||||
}
|
||||
else if (rpass->channels == 3) {
|
||||
input_texture = texture_pool().acquire_color(size);
|
||||
if (input_texture) {
|
||||
/* TODO: conversion could be done as part of GPU upload somehow? */
|
||||
const float *rgb_buffer = rpass->buffer.data;
|
||||
Vector<float> rgba_buffer(4 * size.x * size.y);
|
||||
for (size_t i = 0; i < size_t(size.x) * size_t(size.y); i++) {
|
||||
rgba_buffer[i * 4 + 0] = rgb_buffer[i * 3 + 0];
|
||||
rgba_buffer[i * 4 + 1] = rgb_buffer[i * 3 + 1];
|
||||
rgba_buffer[i * 4 + 2] = rgb_buffer[i * 3 + 2];
|
||||
rgba_buffer[i * 4 + 3] = 1.0f;
|
||||
}
|
||||
GPU_texture_update(input_texture, GPU_DATA_FLOAT, rgba_buffer.data());
|
||||
}
|
||||
}
|
||||
else if (rpass->channels == 4) {
|
||||
input_texture = texture_pool().acquire_color(size);
|
||||
if (input_texture) {
|
||||
GPU_texture_update(input_texture, GPU_DATA_FLOAT, rpass->buffer.data);
|
||||
}
|
||||
if (input_texture) {
|
||||
/* Don't assume render keeps texture around, add our own reference. */
|
||||
GPU_texture_ref(input_texture);
|
||||
render_texture_pool_.textures_.append(input_texture);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "DNA_sequence_types.h"
|
||||
#include "DNA_space_types.h"
|
||||
#include "DNA_userdef_types.h"
|
||||
#include "DNA_windowmanager_types.h"
|
||||
|
||||
#include "MEM_guardedalloc.h"
|
||||
|
||||
|
@ -331,6 +332,7 @@ void RE_ClearResult(Render *re)
|
|||
if (re) {
|
||||
render_result_free(re->result);
|
||||
re->result = nullptr;
|
||||
re->result_has_gpu_texture_caches = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -636,6 +638,7 @@ void RE_FreeAllRenderResults(void)
|
|||
|
||||
re->result = nullptr;
|
||||
re->pushedresult = nullptr;
|
||||
re->result_has_gpu_texture_caches = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -650,9 +653,58 @@ void RE_FreeAllPersistentData(void)
|
|||
}
|
||||
}
|
||||
|
||||
void RE_FreeGPUTextureCaches(const bool only_unused)
|
||||
{
|
||||
LISTBASE_FOREACH (Render *, re, &RenderGlobal.renderlist) {
|
||||
if (!re->result_has_gpu_texture_caches) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Scene *scene = re->scene;
|
||||
bool do_free = true;
|
||||
|
||||
/* Detect if scene is using realtime compositing, and if either a node editor is
|
||||
* showing the nodes, or an image editor is showing the render result or viewer. */
|
||||
if (only_unused && scene && scene->use_nodes && scene->nodetree &&
|
||||
scene->nodetree->execution_mode == NTREE_EXECUTION_MODE_REALTIME)
|
||||
{
|
||||
wmWindowManager *wm = static_cast<wmWindowManager *>(G_MAIN->wm.first);
|
||||
LISTBASE_FOREACH (const wmWindow *, win, &wm->windows) {
|
||||
const bScreen *screen = WM_window_get_active_screen(win);
|
||||
LISTBASE_FOREACH (const ScrArea *, area, &screen->areabase) {
|
||||
const SpaceLink &space = *static_cast<const SpaceLink *>(area->spacedata.first);
|
||||
|
||||
if (space.spacetype == SPACE_NODE) {
|
||||
const SpaceNode &snode = reinterpret_cast<const SpaceNode &>(space);
|
||||
if (snode.nodetree == scene->nodetree) {
|
||||
do_free = false;
|
||||
}
|
||||
}
|
||||
else if (space.spacetype == SPACE_IMAGE) {
|
||||
brecht marked this conversation as resolved
Outdated
Omar Emara
commented
Should this be Should this be `SPACE_IMAGE`?
|
||||
const SpaceImage &sima = reinterpret_cast<const SpaceImage &>(space);
|
||||
if (sima.image && sima.image->source == IMA_SRC_VIEWER) {
|
||||
do_free = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (do_free) {
|
||||
RenderResult *result = RE_AcquireResultWrite(re);
|
||||
if (result != nullptr) {
|
||||
render_result_free_gpu_texture_caches(result);
|
||||
}
|
||||
re->result_has_gpu_texture_caches = false;
|
||||
RE_ReleaseResult(re);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void re_free_persistent_data(Render *re)
|
||||
{
|
||||
/* If engine is currently rendering, just wait for it to be freed when it finishes rendering. */
|
||||
/* If engine is currently rendering, just wait for it to be freed when it finishes rendering.
|
||||
*/
|
||||
if (re->engine && !(re->engine->flag & RE_ENGINE_RENDERING)) {
|
||||
RE_engine_free(re->engine);
|
||||
re->engine = nullptr;
|
||||
|
@ -1754,8 +1806,8 @@ static void render_pipeline_free(Render *re)
|
|||
/* Destroy the opengl context in the correct thread. */
|
||||
RE_system_gpu_context_destroy(re);
|
||||
|
||||
/* In the case the engine did not mark tiles as finished (un-highlight, which could happen in the
|
||||
* case of cancelled render) ensure the storage is empty. */
|
||||
/* In the case the engine did not mark tiles as finished (un-highlight, which could happen in
|
||||
* the case of cancelled render) ensure the storage is empty. */
|
||||
if (re->highlighted_tiles != nullptr) {
|
||||
BLI_mutex_lock(&re->highlighted_tiles_mutex);
|
||||
|
||||
|
@ -1797,6 +1849,9 @@ void RE_RenderFrame(Render *re,
|
|||
|
||||
render_callback_exec_id(re, re->main, &scene->id, BKE_CB_EVT_RENDER_PRE);
|
||||
|
||||
/* Reduce GPU memory usage so renderer has more space. */
|
||||
RE_FreeGPUTextureCaches(false);
|
||||
|
||||
render_init_depsgraph(re);
|
||||
|
||||
do_render_full_pipeline(re);
|
||||
|
@ -2198,6 +2253,9 @@ void RE_RenderAnim(Render *re,
|
|||
for (nfra = sfra, scene->r.cfra = sfra; scene->r.cfra <= efra; scene->r.cfra++) {
|
||||
char filepath[FILE_MAX];
|
||||
|
||||
/* Reduce GPU memory usage so renderer has more space. */
|
||||
RE_FreeGPUTextureCaches(false);
|
||||
|
||||
/* A feedback loop exists here -- render initialization requires updated
|
||||
* render layers settings which could be animated, but scene evaluation for
|
||||
* the frame happens later because it depends on what layers are visible to
|
||||
|
|
|
@ -39,6 +39,8 @@
|
|||
#include "IMB_imbuf_types.h"
|
||||
#include "IMB_openexr.h"
|
||||
|
||||
#include "GPU_texture.h"
|
||||
|
||||
#include "RE_engine.h"
|
||||
|
||||
#include "render_result.h"
|
||||
|
@ -115,6 +117,18 @@ void render_result_free_list(ListBase *lb, RenderResult *rr)
|
|||
}
|
||||
}
|
||||
|
||||
void render_result_free_gpu_texture_caches(RenderResult *rr)
|
||||
{
|
||||
LISTBASE_FOREACH (RenderLayer *, rl, &rr->layers) {
|
||||
LISTBASE_FOREACH (RenderPass *, rpass, &rl->passes) {
|
||||
if (rpass->buffer.gpu_texture) {
|
||||
GPU_texture_free(rpass->buffer.gpu_texture);
|
||||
rpass->buffer.gpu_texture = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/********************************* multiview *************************************/
|
||||
|
||||
void render_result_views_shallowcopy(RenderResult *dst, RenderResult *src)
|
||||
|
@ -407,6 +421,35 @@ void RE_pass_set_buffer_data(RenderPass *pass, float *data)
|
|||
RE_RenderBuffer_assign_data(&pass->buffer, data);
|
||||
}
|
||||
|
||||
GPUTexture *RE_pass_ensure_gpu_texture_cache(Render *re, RenderPass *rpass)
|
||||
{
|
||||
if (rpass->buffer.gpu_texture) {
|
||||
return rpass->buffer.gpu_texture;
|
||||
}
|
||||
if (rpass->buffer.data == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const eGPUTextureFormat format = (rpass->channels == 1) ? GPU_R16F :
|
||||
(rpass->channels == 3) ? GPU_RGB16F :
|
||||
GPU_RGBA16F;
|
||||
|
||||
rpass->buffer.gpu_texture = GPU_texture_create_2d("RenderBuffer.gpu_texture",
|
||||
rpass->rectx,
|
||||
rpass->recty,
|
||||
1,
|
||||
format,
|
||||
GPU_TEXTURE_USAGE_GENERAL,
|
||||
NULL);
|
||||
|
||||
if (rpass->buffer.gpu_texture) {
|
||||
GPU_texture_update(rpass->buffer.gpu_texture, GPU_DATA_FLOAT, rpass->buffer.data);
|
||||
re->result_has_gpu_texture_caches = true;
|
||||
}
|
||||
|
||||
return rpass->buffer.gpu_texture;
|
||||
}
|
||||
|
||||
void RE_render_result_full_channel_name(char *fullname,
|
||||
const char *layname,
|
||||
const char *passname,
|
||||
|
@ -1200,6 +1243,7 @@ template<class BufferType> static BufferType render_buffer_new(decltype(BufferTy
|
|||
|
||||
buffer.data = data;
|
||||
buffer.sharing_info = blender::implicit_sharing::info_for_mem_free(data);
|
||||
buffer.gpu_texture = nullptr;
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
@ -1212,6 +1256,11 @@ template<class BufferType> static void render_buffer_data_free(BufferType *rende
|
|||
}
|
||||
|
||||
blender::implicit_sharing::free_shared_data(&render_buffer->data, &render_buffer->sharing_info);
|
||||
|
||||
if (render_buffer->gpu_texture) {
|
||||
GPU_texture_free(render_buffer->gpu_texture);
|
||||
render_buffer->gpu_texture = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
template<class BufferType>
|
||||
|
|
|
@ -131,6 +131,11 @@ void render_result_views_shallowcopy(struct RenderResult *dst, struct RenderResu
|
|||
*/
|
||||
void render_result_views_shallowdelete(struct RenderResult *rr);
|
||||
|
||||
/**
|
||||
* Free GPU texture caches to reduce memory usage.
|
||||
*/
|
||||
void render_result_free_gpu_texture_caches(struct RenderResult *rr);
|
||||
|
||||
#define FOREACH_VIEW_LAYER_TO_RENDER_BEGIN(re_, iter_) \
|
||||
{ \
|
||||
int nr_; \
|
||||
|
|
|
@ -53,6 +53,8 @@ struct Render {
|
|||
* write lock, all external code must use a read lock. internal code is assumed
|
||||
* to not conflict with writes, so no lock used for that */
|
||||
ThreadRWMutex resultmutex;
|
||||
/* True if result has GPU textures, to quickly skip cache clear. */
|
||||
bool result_has_gpu_texture_caches;
|
||||
|
||||
/* Guard for drawing render result using engine's `draw()` callback. */
|
||||
ThreadMutex engine_draw_mutex;
|
||||
|
|
|
@ -83,6 +83,8 @@
|
|||
#include "DEG_depsgraph.h"
|
||||
#include "DEG_depsgraph_query.h"
|
||||
|
||||
#include "RE_pipeline.h"
|
||||
|
||||
/**
|
||||
* When a gizmo is highlighted and uses click/drag events,
|
||||
* this prevents mouse button press events from being passed through to other key-maps
|
||||
|
@ -716,6 +718,8 @@ void wm_event_do_notifiers(bContext *C)
|
|||
|
||||
wm_event_do_refresh_wm_and_depsgraph(C);
|
||||
|
||||
RE_FreeGPUTextureCaches(true);
|
||||
|
||||
/* Status bar. */
|
||||
if (wm->winactive) {
|
||||
wmWindow *win = wm->winactive;
|
||||
|
|
Loading…
Reference in New Issue
Should this be
compositor_read_pass_alpha
instead?