ImBuf: Optimize GPU memory by using 1 component format for grayscale images

This is done by checking the number of bitplanes from the image buffer.
We assume that for float buffer to use the same bitplanes as it was a
byte buffer.

Then, the data of the image buffer is packed at the start of the `rect` or
`float_rect` before upload.

**Statistics - einar.v004.blend **

Note that not all grayscale textures have been stored as BW images so the
amount of memory that can be reduced would be more.

Without patch
```
104 Textures - 3294.99 MB (3294.47 MB over 32x32), 37 RTs - 192.52 MB.
Avg. tex dimension: 2201.88x1253.51 (2283.53x2202.13 over 32x32)
464 Buffers - 25.01 MB total 1.24 MB IBs 23.50 MB VBs.
3512.52 MB - Grand total GPU buffer + texture load
```

Patch applied
```
104 Textures - 2917.66 MB (2917.14 MB over 32x32), 39 RTs - 215.45 MB.
Avg. tex dimension: 2221.38x1252.75 (2323.28x2253.47 over 32x32)
467 Buffers - 25.01 MB total 1.24 MB IBs 23.51 MB VBs.
3158.13 MB - Grand total GPU buffer + texture load.
```

Reviewed By: fclem

Differential Revision: https://developer.blender.org/D15484
This commit is contained in:
Jeroen Bakker
2022-08-23 14:26:01 +02:00
committed by Jeroen Bakker
parent 13b2716e1c
commit 37533cd6cb
7 changed files with 131 additions and 28 deletions

View File

@@ -186,8 +186,14 @@ struct NodeData {
{ {
UDIMTilePixels *tile = find_tile_data(image_tile); UDIMTilePixels *tile = find_tile_data(image_tile);
if (tile && tile->flags.dirty) { if (tile && tile->flags.dirty) {
if (image_buffer.planes == 8) {
image_buffer.planes = 32;
BKE_image_partial_update_mark_full_update(&image);
}
else {
BKE_image_partial_update_mark_region( BKE_image_partial_update_mark_region(
&image, image_tile.image_tile, &image_buffer, &tile->dirty_region); &image, image_tile.image_tile, &image_buffer, &tile->dirty_region);
}
tile->clear_dirty(); tile->clear_dirty();
} }
} }

View File

@@ -138,6 +138,8 @@ static GPUTexture *gpu_texture_create_tile_array(Image *ima, ImBuf *main_ibuf)
int arraywidth = 0, arrayheight = 0; int arraywidth = 0, arrayheight = 0;
ListBase boxes = {nullptr}; ListBase boxes = {nullptr};
int planes = 0;
LISTBASE_FOREACH (ImageTile *, tile, &ima->tiles) { LISTBASE_FOREACH (ImageTile *, tile, &ima->tiles) {
ImageUser iuser; ImageUser iuser;
BKE_imageuser_default(&iuser); BKE_imageuser_default(&iuser);
@@ -164,6 +166,7 @@ static GPUTexture *gpu_texture_create_tile_array(Image *ima, ImBuf *main_ibuf)
BKE_image_release_ibuf(ima, ibuf, nullptr); BKE_image_release_ibuf(ima, ibuf, nullptr);
BLI_addtail(&boxes, packtile); BLI_addtail(&boxes, packtile);
planes = max_ii(planes, ibuf->planes);
} }
} }
@@ -195,9 +198,15 @@ static GPUTexture *gpu_texture_create_tile_array(Image *ima, ImBuf *main_ibuf)
} }
const bool use_high_bitdepth = (ima->flag & IMA_HIGH_BITDEPTH); const bool use_high_bitdepth = (ima->flag & IMA_HIGH_BITDEPTH);
const bool use_grayscale = planes <= 8;
/* Create Texture without content. */ /* Create Texture without content. */
GPUTexture *tex = IMB_touch_gpu_texture( GPUTexture *tex = IMB_touch_gpu_texture(ima->id.name + 2,
ima->id.name + 2, main_ibuf, arraywidth, arrayheight, arraylayers, use_high_bitdepth); main_ibuf,
arraywidth,
arrayheight,
arraylayers,
use_high_bitdepth,
use_grayscale);
/* Upload each tile one by one. */ /* Upload each tile one by one. */
LISTBASE_FOREACH (ImageTile *, tile, &ima->tiles) { LISTBASE_FOREACH (ImageTile *, tile, &ima->tiles) {
@@ -223,6 +232,7 @@ static GPUTexture *gpu_texture_create_tile_array(Image *ima, ImBuf *main_ibuf)
tilelayer, tilelayer,
UNPACK2(tilesize), UNPACK2(tilesize),
use_high_bitdepth, use_high_bitdepth,
use_grayscale,
store_premultiplied); store_premultiplied);
} }

View File

@@ -158,6 +158,16 @@ void imapaint_image_update(
imapaintpartial.dirty_region.xmax, imapaintpartial.dirty_region.xmax,
imapaintpartial.dirty_region.ymax); imapaintpartial.dirty_region.ymax);
/* When buffer is partial updated the planes should be set to a larger value than 8. This will
* make sure that partial updating is working but uses more GPU memory as the gpu texture will
* have 4 channels. When so the whole texture needs to be reuploaded to the GPU using the new
* texture format.*/
if (ibuf != nullptr && ibuf->planes == 8) {
ibuf->planes = 32;
BKE_image_partial_update_mark_full_update(image);
return;
}
/* TODO: should set_tpage create ->rect? */ /* TODO: should set_tpage create ->rect? */
if (texpaint || (sima && sima->lock)) { if (texpaint || (sima && sima->lock)) {
const int w = BLI_rcti_size_x(&imapaintpartial.dirty_region); const int w = BLI_rcti_size_x(&imapaintpartial.dirty_region);

View File

@@ -1212,8 +1212,8 @@ void uiTemplateImageInfo(uiLayout *layout, bContext *C, Image *ima, ImageUser *i
ofs += BLI_strncpy_rlen(str + ofs, TIP_(" + Z"), len - ofs); ofs += BLI_strncpy_rlen(str + ofs, TIP_(" + Z"), len - ofs);
} }
eGPUTextureFormat texture_format = IMB_gpu_get_texture_format(ibuf, eGPUTextureFormat texture_format = IMB_gpu_get_texture_format(
ima->flag & IMA_HIGH_BITDEPTH); ibuf, ima->flag & IMA_HIGH_BITDEPTH, ibuf->planes >= 8);
const char *texture_format_description = GPU_texture_format_description(texture_format); const char *texture_format_description = GPU_texture_format_description(texture_format);
ofs += BLI_snprintf_rlen(str + ofs, len - ofs, TIP_(", %s"), texture_format_description); ofs += BLI_snprintf_rlen(str + ofs, len - ofs, TIP_(", %s"), texture_format_description);

View File

@@ -890,14 +890,22 @@ GPUTexture *IMB_create_gpu_texture(const char *name,
bool use_high_bitdepth, bool use_high_bitdepth,
bool use_premult); bool use_premult);
eGPUTextureFormat IMB_gpu_get_texture_format(const struct ImBuf *ibuf, bool high_bitdepth); eGPUTextureFormat IMB_gpu_get_texture_format(const struct ImBuf *ibuf,
bool high_bitdepth,
bool use_grayscale);
/** /**
* The `ibuf` is only here to detect the storage type. The produced texture will have undefined * The `ibuf` is only here to detect the storage type. The produced texture will have undefined
* content. It will need to be populated by using #IMB_update_gpu_texture_sub(). * content. It will need to be populated by using #IMB_update_gpu_texture_sub().
*/ */
GPUTexture *IMB_touch_gpu_texture( GPUTexture *IMB_touch_gpu_texture(const char *name,
const char *name, struct ImBuf *ibuf, int w, int h, int layers, bool use_high_bitdepth); struct ImBuf *ibuf,
int w,
int h,
int layers,
bool use_high_bitdepth,
bool use_grayscale);
/** /**
* Will update a #GPUTexture using the content of the #ImBuf. Only one layer will be updated. * Will update a #GPUTexture using the content of the #ImBuf. Only one layer will be updated.
* Will resize the ibuf if needed. * Will resize the ibuf if needed.
@@ -911,6 +919,7 @@ void IMB_update_gpu_texture_sub(GPUTexture *tex,
int w, int w,
int h, int h,
bool use_high_bitdepth, bool use_high_bitdepth,
bool use_grayscale,
bool use_premult); bool use_premult);
/** /**

View File

@@ -460,7 +460,7 @@ static int imb_read_tiff_pixels(ImBuf *ibuf, TIFF *image)
scanline_contig_16bit(tmpibuf->rect_float + ib_offset, sbuf, ibuf->x, spp); scanline_contig_16bit(tmpibuf->rect_float + ib_offset, sbuf, ibuf->x, spp);
} }
} }
/* separate channels: RRRGGGBBB */ /* Separate channels: RRRGGGBBB. */
} }
else if (config == PLANARCONFIG_SEPARATE) { else if (config == PLANARCONFIG_SEPARATE) {
@@ -574,7 +574,7 @@ ImBuf *imb_loadtiff(const unsigned char *mem,
TIFFGetField(image, TIFFTAG_IMAGELENGTH, &height); TIFFGetField(image, TIFFTAG_IMAGELENGTH, &height);
TIFFGetField(image, TIFFTAG_SAMPLESPERPIXEL, &spp); TIFFGetField(image, TIFFTAG_SAMPLESPERPIXEL, &spp);
ib_depth = (spp == 3) ? 24 : 32; ib_depth = spp * 8;
ibuf = IMB_allocImBuf(width, height, ib_depth, 0); ibuf = IMB_allocImBuf(width, height, ib_depth, 0);
if (ibuf) { if (ibuf) {

View File

@@ -14,6 +14,7 @@
#include "BKE_global.h" #include "BKE_global.h"
#include "GPU_capabilities.h" #include "GPU_capabilities.h"
#include "GPU_state.h"
#include "GPU_texture.h" #include "GPU_texture.h"
#include "IMB_colormanagement.h" #include "IMB_colormanagement.h"
@@ -22,39 +23,62 @@
/* gpu ibuf utils */ /* gpu ibuf utils */
static bool imb_is_grayscale_texture_format_compatible(const ImBuf *ibuf)
{
if (ibuf->planes > 8) {
return false;
}
/* Only imbufs with colorspace that do not modify the chrominance of the texture data relative
* to the scene color space can be uploaded as single channel textures. */
if (IMB_colormanagement_space_is_data(ibuf->rect_colorspace) ||
IMB_colormanagement_space_is_srgb(ibuf->rect_colorspace) ||
IMB_colormanagement_space_is_scene_linear(ibuf->rect_colorspace)) {
return true;
};
return false;
}
static void imb_gpu_get_format(const ImBuf *ibuf, static void imb_gpu_get_format(const ImBuf *ibuf,
bool high_bitdepth, bool high_bitdepth,
bool use_grayscale,
eGPUDataFormat *r_data_format, eGPUDataFormat *r_data_format,
eGPUTextureFormat *r_texture_format) eGPUTextureFormat *r_texture_format)
{ {
const bool float_rect = (ibuf->rect_float != NULL); const bool float_rect = (ibuf->rect_float != NULL);
const bool is_grayscale = use_grayscale && imb_is_grayscale_texture_format_compatible(ibuf);
if (float_rect) { if (float_rect) {
/* Float. */ /* Float. */
const bool use_high_bitdepth = (!(ibuf->flags & IB_halffloat) && high_bitdepth); const bool use_high_bitdepth = (!(ibuf->flags & IB_halffloat) && high_bitdepth);
*r_data_format = GPU_DATA_FLOAT; *r_data_format = GPU_DATA_FLOAT;
*r_texture_format = use_high_bitdepth ? GPU_RGBA32F : GPU_RGBA16F; *r_texture_format = is_grayscale ? (use_high_bitdepth ? GPU_R32F : GPU_R16F) :
(use_high_bitdepth ? GPU_RGBA32F : GPU_RGBA16F);
} }
else { else {
if (IMB_colormanagement_space_is_data(ibuf->rect_colorspace) || if (IMB_colormanagement_space_is_data(ibuf->rect_colorspace) ||
IMB_colormanagement_space_is_scene_linear(ibuf->rect_colorspace)) { IMB_colormanagement_space_is_scene_linear(ibuf->rect_colorspace)) {
/* Non-color data or scene linear, just store buffer as is. */ /* Non-color data or scene linear, just store buffer as is. */
*r_data_format = GPU_DATA_UBYTE; *r_data_format = GPU_DATA_UBYTE;
*r_texture_format = GPU_RGBA8; *r_texture_format = (is_grayscale) ? GPU_R8 : GPU_RGBA8;
} }
else if (IMB_colormanagement_space_is_srgb(ibuf->rect_colorspace)) { else if (IMB_colormanagement_space_is_srgb(ibuf->rect_colorspace)) {
/* sRGB, store as byte texture that the GPU can decode directly. */ /* sRGB, store as byte texture that the GPU can decode directly. */
*r_data_format = GPU_DATA_UBYTE; *r_data_format = (is_grayscale) ? GPU_DATA_FLOAT : GPU_DATA_UBYTE;
*r_texture_format = GPU_SRGB8_A8; *r_texture_format = (is_grayscale) ? GPU_R16F : GPU_SRGB8_A8;
} }
else { else {
/* Other colorspace, store as half float texture to avoid precision loss. */ /* Other colorspace, store as half float texture to avoid precision loss. */
*r_data_format = GPU_DATA_FLOAT; *r_data_format = GPU_DATA_FLOAT;
*r_texture_format = GPU_RGBA16F; *r_texture_format = (is_grayscale) ? GPU_R16F : GPU_RGBA16F;
} }
} }
} }
static const char *imb_gpu_get_swizzle(const ImBuf *ibuf)
{
return imb_is_grayscale_texture_format_compatible(ibuf) ? "rrra" : "rgba";
}
/* Return false if no suitable format was found. */ /* Return false if no suitable format was found. */
#ifdef WITH_DDS #ifdef WITH_DDS
static bool IMB_gpu_get_compressed_format(const ImBuf *ibuf, eGPUTextureFormat *r_texture_format) static bool IMB_gpu_get_compressed_format(const ImBuf *ibuf, eGPUTextureFormat *r_texture_format)
@@ -90,7 +114,8 @@ static void *imb_gpu_get_data(const ImBuf *ibuf,
const bool store_premultiplied, const bool store_premultiplied,
bool *r_freedata) bool *r_freedata)
{ {
const bool is_float_rect = (ibuf->rect_float != NULL); bool is_float_rect = (ibuf->rect_float != NULL);
const bool is_grayscale = imb_is_grayscale_texture_format_compatible(ibuf);
void *data_rect = (is_float_rect) ? (void *)ibuf->rect_float : (void *)ibuf->rect; void *data_rect = (is_float_rect) ? (void *)ibuf->rect_float : (void *)ibuf->rect;
bool freedata = false; bool freedata = false;
@@ -121,7 +146,8 @@ static void *imb_gpu_get_data(const ImBuf *ibuf,
else if (IMB_colormanagement_space_is_srgb(ibuf->rect_colorspace) || else if (IMB_colormanagement_space_is_srgb(ibuf->rect_colorspace) ||
IMB_colormanagement_space_is_scene_linear(ibuf->rect_colorspace)) { IMB_colormanagement_space_is_scene_linear(ibuf->rect_colorspace)) {
/* sRGB or scene linear, store as byte texture that the GPU can decode directly. */ /* sRGB or scene linear, store as byte texture that the GPU can decode directly. */
data_rect = MEM_mallocN(sizeof(uchar[4]) * ibuf->x * ibuf->y, __func__); data_rect = MEM_mallocN(
(is_grayscale ? sizeof(float[4]) : sizeof(uchar[4])) * ibuf->x * ibuf->y, __func__);
*r_freedata = freedata = true; *r_freedata = freedata = true;
if (data_rect == NULL) { if (data_rect == NULL) {
@@ -133,9 +159,17 @@ static void *imb_gpu_get_data(const ImBuf *ibuf,
* this allows us to use sRGB texture formats and preserves color values in * this allows us to use sRGB texture formats and preserves color values in
* zero alpha areas, and appears generally closer to what game engines that we * zero alpha areas, and appears generally closer to what game engines that we
* want to be compatible with do. */ * want to be compatible with do. */
if (is_grayscale) {
/* Convert to byte buffer to then pack as half floats reducing the buffer size by half. */
IMB_colormanagement_imbuf_to_float_texture(
(float *)data_rect, 0, 0, ibuf->x, ibuf->y, ibuf, store_premultiplied);
is_float_rect = true;
}
else {
IMB_colormanagement_imbuf_to_byte_texture( IMB_colormanagement_imbuf_to_byte_texture(
(uchar *)data_rect, 0, 0, ibuf->x, ibuf->y, ibuf, store_premultiplied); (uchar *)data_rect, 0, 0, ibuf->x, ibuf->y, ibuf, store_premultiplied);
} }
}
else { else {
/* Other colorspace, store as float texture to avoid precision loss. */ /* Other colorspace, store as float texture to avoid precision loss. */
data_rect = MEM_mallocN(sizeof(float[4]) * ibuf->x * ibuf->y, __func__); data_rect = MEM_mallocN(sizeof(float[4]) * ibuf->x * ibuf->y, __func__);
@@ -167,21 +201,52 @@ static void *imb_gpu_get_data(const ImBuf *ibuf,
} }
data_rect = (is_float_rect) ? (void *)scale_ibuf->rect_float : (void *)scale_ibuf->rect; data_rect = (is_float_rect) ? (void *)scale_ibuf->rect_float : (void *)scale_ibuf->rect;
*r_freedata = true; *r_freedata = freedata = true;
/* Steal the rescaled buffer to avoid double free. */ /* Steal the rescaled buffer to avoid double free. */
scale_ibuf->rect_float = NULL; scale_ibuf->rect_float = NULL;
scale_ibuf->rect = NULL; scale_ibuf->rect = NULL;
IMB_freeImBuf(scale_ibuf); IMB_freeImBuf(scale_ibuf);
} }
/* Pack first channel data manually at the start of the buffer. */
if (is_grayscale) {
void *src_rect = data_rect;
if (freedata == false) {
data_rect = MEM_mallocN((is_float_rect ? sizeof(float) : sizeof(uchar)) * ibuf->x * ibuf->y,
__func__);
*r_freedata = freedata = true;
}
if (data_rect == NULL) {
return NULL;
}
if (is_float_rect) {
for (uint64_t i = 0; i < ibuf->x * ibuf->y; i++) {
((float *)data_rect)[i] = ((float *)src_rect)[i * 4];
}
}
else {
for (uint64_t i = 0; i < ibuf->x * ibuf->y; i++) {
((uchar *)data_rect)[i] = ((uchar *)src_rect)[i * 4];
}
}
}
return data_rect; return data_rect;
} }
GPUTexture *IMB_touch_gpu_texture( GPUTexture *IMB_touch_gpu_texture(const char *name,
const char *name, ImBuf *ibuf, int w, int h, int layers, bool use_high_bitdepth) ImBuf *ibuf,
int w,
int h,
int layers,
bool use_high_bitdepth,
bool use_grayscale)
{ {
eGPUDataFormat data_format; eGPUDataFormat data_format;
eGPUTextureFormat tex_format; eGPUTextureFormat tex_format;
imb_gpu_get_format(ibuf, use_high_bitdepth, &data_format, &tex_format); imb_gpu_get_format(ibuf, use_high_bitdepth, use_grayscale, &data_format, &tex_format);
GPUTexture *tex; GPUTexture *tex;
if (layers > 0) { if (layers > 0) {
@@ -191,6 +256,7 @@ GPUTexture *IMB_touch_gpu_texture(
tex = GPU_texture_create_2d(name, w, h, 9999, tex_format, NULL); tex = GPU_texture_create_2d(name, w, h, 9999, tex_format, NULL);
} }
GPU_texture_swizzle_set(tex, imb_gpu_get_swizzle(ibuf));
GPU_texture_anisotropic_filter(tex, true); GPU_texture_anisotropic_filter(tex, true);
return tex; return tex;
} }
@@ -203,6 +269,7 @@ void IMB_update_gpu_texture_sub(GPUTexture *tex,
int w, int w,
int h, int h,
bool use_high_bitdepth, bool use_high_bitdepth,
bool use_grayscale,
bool use_premult) bool use_premult)
{ {
const bool do_rescale = (ibuf->x != w || ibuf->y != h); const bool do_rescale = (ibuf->x != w || ibuf->y != h);
@@ -210,7 +277,7 @@ void IMB_update_gpu_texture_sub(GPUTexture *tex,
eGPUDataFormat data_format; eGPUDataFormat data_format;
eGPUTextureFormat tex_format; eGPUTextureFormat tex_format;
imb_gpu_get_format(ibuf, use_high_bitdepth, &data_format, &tex_format); imb_gpu_get_format(ibuf, use_high_bitdepth, use_grayscale, &data_format, &tex_format);
bool freebuf = false; bool freebuf = false;
@@ -266,7 +333,7 @@ GPUTexture *IMB_create_gpu_texture(const char *name,
eGPUDataFormat data_format; eGPUDataFormat data_format;
eGPUTextureFormat tex_format; eGPUTextureFormat tex_format;
imb_gpu_get_format(ibuf, use_high_bitdepth, &data_format, &tex_format); imb_gpu_get_format(ibuf, use_high_bitdepth, true, &data_format, &tex_format);
bool freebuf = false; bool freebuf = false;
@@ -282,6 +349,7 @@ GPUTexture *IMB_create_gpu_texture(const char *name,
void *data = imb_gpu_get_data(ibuf, do_rescale, size, use_premult, &freebuf); void *data = imb_gpu_get_data(ibuf, do_rescale, size, use_premult, &freebuf);
GPU_texture_update(tex, data_format, data); GPU_texture_update(tex, data_format, data);
GPU_texture_swizzle_set(tex, imb_gpu_get_swizzle(ibuf));
GPU_texture_anisotropic_filter(tex, true); GPU_texture_anisotropic_filter(tex, true);
if (freebuf) { if (freebuf) {
@@ -291,12 +359,12 @@ GPUTexture *IMB_create_gpu_texture(const char *name,
return tex; return tex;
} }
eGPUTextureFormat IMB_gpu_get_texture_format(const ImBuf *ibuf, bool high_bitdepth) eGPUTextureFormat IMB_gpu_get_texture_format(const ImBuf *ibuf, bool high_bitdepth, bool use_grayscale)
{ {
eGPUTextureFormat gpu_texture_format; eGPUTextureFormat gpu_texture_format;
eGPUDataFormat gpu_data_format; eGPUDataFormat gpu_data_format;
imb_gpu_get_format(ibuf, high_bitdepth, &gpu_data_format, &gpu_texture_format); imb_gpu_get_format(ibuf, high_bitdepth, use_grayscale, &gpu_data_format, &gpu_texture_format);
return gpu_texture_format; return gpu_texture_format;
} }