1
1

Researching bottlenecks.

This commit is contained in:
2022-10-14 14:51:10 +02:00
parent 5c0e90033f
commit 396fe9a98a
8 changed files with 156 additions and 89 deletions

View File

@@ -106,14 +106,12 @@ struct UDIMTilePixels {
Vector<PackedPixelRow> pixel_rows;
int64_t gpu_buffer_offset;
/* Region of the tile that can be painted on by this node. Size of a subtile is determined by */
/* TODO: use list of sub_tile_ids to not overcommit texture usage. */
rcti gpu_sub_tiles;
Vector<int2> gpu_sub_tiles;
UDIMTilePixels()
{
flags.dirty = false;
BLI_rcti_init_minmax(&dirty_region);
BLI_rcti_init_minmax(&gpu_sub_tiles);
}
void mark_dirty(const PackedPixelRow &pixel_row)

View File

@@ -15,6 +15,8 @@
#include "BLI_math.h"
#include "BLI_task.h"
#include "PIL_time_utildefines.h"
#include "BKE_image_wrappers.hh"
#include "bmesh.h"
@@ -79,19 +81,37 @@ void NodeData::build_pixels_gpu_buffer()
void UDIMTilePixels::init_gpu_sub_tiles()
{
BLI_rcti_init_minmax(&gpu_sub_tiles);
BLI_assert(gpu_sub_tiles.is_empty());
const int max_sub_tiles = 16;
bool sub_tiles_hit[max_sub_tiles][max_sub_tiles];
for (int x = 0; x < max_sub_tiles; x++) {
for (int y = 0; y < max_sub_tiles; y++) {
sub_tiles_hit[x][y] = false;
}
}
int2 max_sub_tile_len(0, 0);
for (const PackedPixelRow &elements : pixel_rows) {
int2 subtile_from = int2(elements.start_image_coordinate / TEXTURE_STREAMING_TILE_SIZE);
int2 coord_to = int2(elements.start_image_coordinate) + int2(elements.num_pixels + 1, 1);
int2 subtile_to = int2(coord_to / TEXTURE_STREAMING_TILE_SIZE);
for (int x = subtile_from.x; x < subtile_to.x; x++) {
sub_tiles_hit[x][subtile_from.y] = true;
}
}
BLI_rcti_do_minmax_v(&gpu_sub_tiles, subtile_from);
BLI_rcti_do_minmax_v(&gpu_sub_tiles, subtile_to);
for (int x = 0; x < max_sub_tiles; x++) {
for (int y = 0; y < max_sub_tiles; y++) {
if (sub_tiles_hit[x][y]) {
gpu_sub_tiles.append(int2(x, y));
}
}
}
}
void NodeData::init_gpu_sub_tiles()
{
printf("%s\n", __func__);
for (UDIMTilePixels &tile : tiles) {
tile.init_gpu_sub_tiles();
}

View File

@@ -553,7 +553,7 @@ static void init_paint_brush(const SculptSession &ss,
* - Only tiles that are painted on are loaded in memory, painted on and merged back to the actual
* texture.
*/
template<int32_t Size, int32_t Depth = 512> class GPUSubTileTexture {
template<int32_t Size, int32_t Depth = 16> class GPUSubTileTexture {
struct Info {
struct {
bool in_use_stroke : 1;
@@ -572,7 +572,7 @@ template<int32_t Size, int32_t Depth = 512> class GPUSubTileTexture {
std::array<int32_t, Depth> layer_lookup_;
GPUTexture *gpu_texture_ = nullptr;
GPUStorageBuf *tile_buf_ = nullptr;
GPUStorageBuf *paint_tile_buf_ = nullptr;
int64_t tile_buf_size_ = 0;
public:
@@ -593,17 +593,9 @@ template<int32_t Size, int32_t Depth = 512> class GPUSubTileTexture {
gpu_texture_ = nullptr;
}
if (tile_buf_) {
GPU_storagebuf_free(tile_buf_);
tile_buf_ = nullptr;
}
}
void reset_usage()
{
printf("%s\n", __func__);
for (Info &info : infos_) {
info.flags.in_use = false;
if (paint_tile_buf_) {
GPU_storagebuf_free(paint_tile_buf_);
paint_tile_buf_ = nullptr;
}
}
@@ -766,6 +758,7 @@ template<int32_t Size, int32_t Depth = 512> class GPUSubTileTexture {
{
BLI_assert(gpu_texture_);
float *buffer = nullptr;
bool tiles_updated = false;
for (int64_t index : infos_.index_range()) {
Info &info = infos_[index];
PaintTileData &tile = paint_tiles_[index];
@@ -781,11 +774,22 @@ template<int32_t Size, int32_t Depth = 512> class GPUSubTileTexture {
buffer = static_cast<float *>(MEM_callocN(Size * Size * 4 * sizeof(float), __func__));
}
printf("%s: initializing tile {tile:%d, sub_tile:%d,%d, layer_id:%d}\n",
__func__,
tile.tile_number,
UNPACK2(tile.sub_tile_id),
tile.layer_id);
/* TODO: Copy correct data from ImBuf.*/
// GPU_texture_update_sub(
// gpu_texture_, GPU_DATA_FLOAT, buffer, 0, 0, tile.layer_id, Size, Size, 1);
GPU_texture_update_sub(
gpu_texture_, GPU_DATA_FLOAT, buffer, 0, 0, tile.layer_id, Size, Size, 1);
info.flags.needs_update = false;
tiles_updated = true;
}
if (tiles_updated) {
GPU_memory_barrier(GPU_BARRIER_TEXTURE_UPDATE);
}
if (buffer) {
@@ -798,32 +802,42 @@ template<int32_t Size, int32_t Depth = 512> class GPUSubTileTexture {
return gpu_texture_;
}
void ensure_tile_buf()
void ensure_paint_tile_buf()
{
int64_t needed_size = paint_tiles_.capacity() * sizeof(PaintTileData);
/* Reuse previous buffer only when exact size, due to potentional read out of bound errors.*/
if (tile_buf_ && tile_buf_size_ == needed_size) {
if (paint_tile_buf_ && tile_buf_size_ == needed_size) {
return;
}
if (tile_buf_) {
GPU_storagebuf_free(tile_buf_);
tile_buf_ = nullptr;
if (paint_tile_buf_) {
GPU_storagebuf_free(paint_tile_buf_);
paint_tile_buf_ = nullptr;
}
tile_buf_ = GPU_storagebuf_create(needed_size);
paint_tile_buf_ = GPU_storagebuf_create(needed_size);
}
void update_tile_buf()
void update_paint_tile_buf()
{
BLI_assert(tile_buf_);
GPU_storagebuf_update(tile_buf_, paint_tiles_.data());
BLI_assert(paint_tile_buf_);
for (PaintTileData &tile : paint_tiles_) {
tile.in_use_frame = false;
}
GPU_storagebuf_update(paint_tile_buf_, paint_tiles_.data());
}
GPUStorageBuf *tile_buf_get()
void read_back_paint_tile_buf()
{
BLI_assert(tile_buf_);
return tile_buf_;
BLI_assert(paint_tile_buf_);
// GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
GPU_storagebuf_read(paint_tile_buf_, paint_tiles_.data());
}
GPUStorageBuf *paint_tile_buf_get()
{
BLI_assert(paint_tile_buf_);
return paint_tile_buf_;
}
int32_t paint_tiles_len()
@@ -835,7 +849,7 @@ template<int32_t Size, int32_t Depth = 512> class GPUSubTileTexture {
{
GPU_texture_image_bind(gpu_texture_get(),
GPU_shader_get_texture_binding(shader, "paint_tiles_img"));
GPU_storagebuf_bind(tile_buf_get(), GPU_shader_get_ssbo(shader, "paint_tile_buf"));
GPU_storagebuf_bind(paint_tile_buf_get(), GPU_shader_get_ssbo(shader, "paint_tile_buf"));
GPU_shader_uniform_1i(shader, "paint_tile_buf_len", paint_tiles_len());
}
@@ -847,6 +861,11 @@ template<int32_t Size, int32_t Depth = 512> class GPUSubTileTexture {
if (!info.flags.in_use_frame) {
continue;
}
/*
PaintTileData &paint_tile = paint_tiles_[index];
if (!paint_tile.in_use_frame) {
continue;
}*/
predicate(paint_tiles_[index]);
}
}
@@ -1029,19 +1048,15 @@ static void gpu_painting_image_merge(GPUSculptPaintData &batches,
ImageUser &image_user,
ImBuf &image_buffer)
{
GPU_memory_barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
GPUTexture *canvas_tex = BKE_image_get_gpu_texture(&image, &image_user, &image_buffer);
GPUShader *shader = SCULPT_shader_paint_image_merge_get();
GPU_shader_bind(shader);
batches.tile_texture.bind(shader);
GPU_texture_image_bind(canvas_tex, GPU_shader_get_texture_binding(shader, "texture_img"));
batches.tile_texture.foreach_in_frame([shader](PaintTileData &paint_tile) {
printf("%s: merging tile stored on layer %d {tile:%d sub_tile:%d,%d} \n",
__func__,
paint_tile.layer_id,
paint_tile.tile_number,
UNPACK2(paint_tile.sub_tile_id));
GPU_shader_uniform_1i(shader, "layer_id", paint_tile.layer_id);
GPU_compute_dispatch(shader, TEXTURE_STREAMING_TILE_SIZE, TEXTURE_STREAMING_TILE_SIZE, 1);
GPU_compute_dispatch(shader, TEXTURE_STREAMING_TILE_SIZE / 32, TEXTURE_STREAMING_TILE_SIZE, 1);
});
}
@@ -1064,7 +1079,7 @@ static void init_paint_step(const SculptSession &ss,
}
}
static void dispatch_gpu_painting(TexturePaintingUserData &data)
static void add_paint_step(TexturePaintingUserData &data)
{
SculptSession &ss = *data.ob->sculpt;
@@ -1073,6 +1088,7 @@ static void dispatch_gpu_painting(TexturePaintingUserData &data)
PaintStepData paint_step;
init_paint_step(ss, *data.brush, paint_step);
batches.steps.append(paint_step);
PIL_sleep_ms(1);
}
/* This should be done based on the frame_selection nodes, otherwise we might be over
@@ -1086,11 +1102,8 @@ static void paint_tiles_mark_used(TexturePaintingUserData &data)
for (PBVHNode *node : MutableSpan<PBVHNode *>(data.nodes, data.nodes_len)) {
NodeData &node_data = BKE_pbvh_pixels_node_data_get(*node);
for (UDIMTilePixels &tile : node_data.tiles) {
for (int x = tile.gpu_sub_tiles.xmin; x <= tile.gpu_sub_tiles.xmax; x++) {
for (int y = tile.gpu_sub_tiles.ymin; y <= tile.gpu_sub_tiles.ymax; y++) {
int2 sub_tile_id(x, y);
batches.tile_texture.mark_usage(tile.tile_number, sub_tile_id);
}
for (int2 &sub_tile_id : tile.gpu_sub_tiles) {
batches.tile_texture.mark_usage(tile.tile_number, sub_tile_id);
}
}
}
@@ -1120,7 +1133,7 @@ static TileNumbers collect_active_tile_numbers(const TexturePaintingUserData &da
return result;
}
static void dispatch_gpu_batches(TexturePaintingUserData &data)
static void flush_gpu_batches(TexturePaintingUserData &data)
{
SculptSession &ss = *data.ob->sculpt;
if (!ss.mode.texture_paint.gpu_data) {
@@ -1129,6 +1142,7 @@ static void dispatch_gpu_batches(TexturePaintingUserData &data)
GPUSculptPaintData &batches = *static_cast<GPUSculptPaintData *>(ss.mode.texture_paint.gpu_data);
const int64_t steps_len = batches.steps.size();
printf("%s: flushing %ld steps\n", __func__, steps_len);
int2 paint_step_range(0, steps_len);
batches.update_step_buf();
batches.ensure_vert_coord_buf(ss);
@@ -1136,8 +1150,8 @@ static void dispatch_gpu_batches(TexturePaintingUserData &data)
batches.tile_texture.ensure_gpu_texture();
batches.tile_texture.remove_unused();
batches.tile_texture.assign_layer_ids();
batches.tile_texture.ensure_tile_buf();
batches.tile_texture.update_tile_buf();
batches.tile_texture.ensure_paint_tile_buf();
batches.tile_texture.update_paint_tile_buf();
Image &image = *data.image_data.image;
ImageUser local_image_user = *data.image_data.image_user;
@@ -1153,17 +1167,14 @@ static void dispatch_gpu_batches(TexturePaintingUserData &data)
TIMEIT_START(upload);
batches.tile_texture.update_gpu_texture(tile_number, *image_buffer);
GPU_flush();
TIMEIT_END(upload);
GPU_debug_group_begin("Paint tile");
TIMEIT_START(paint_step);
gpu_painting_paint_step(data, batches, tile_number, paint_step_range);
GPU_flush();
TIMEIT_END(paint_step);
TIMEIT_START(merge);
gpu_painting_image_merge(batches, *data.image_data.image, local_image_user, *image_buffer);
GPU_flush();
TIMEIT_END(merge);
GPU_debug_group_end();
@@ -1185,6 +1196,17 @@ static void gpu_frame_end(TexturePaintingUserData &data)
batches.tile_texture.reset_usage_frame();
}
static bool has_unflushed_batches(Object *ob)
{
SculptSession &ss = *ob->sculpt;
if (!ss.mode.texture_paint.gpu_data) {
return false;
}
GPUSculptPaintData &batches = *static_cast<GPUSculptPaintData *>(ss.mode.texture_paint.gpu_data);
return batches.steps.size();
}
/** \} */
} // namespace blender::ed::sculpt_paint::paint::image
@@ -1253,7 +1275,7 @@ void SCULPT_do_paint_brush_image(
if (SCULPT_use_image_paint_compute()) {
ensure_gpu_buffers(data);
update_frame_selection(data);
dispatch_gpu_painting(data);
add_paint_step(data);
paint_tiles_mark_used(data);
}
else {
@@ -1278,6 +1300,10 @@ void SCULPT_paint_image_batches_flush(PaintModeSettings *paint_mode_settings,
return;
}
if (!has_unflushed_batches(ob)) {
return;
}
Brush *brush = BKE_paint_brush(&sd->paint);
TexturePaintingUserData data = {nullptr};
data.ob = ob;
@@ -1290,7 +1316,7 @@ void SCULPT_paint_image_batches_flush(PaintModeSettings *paint_mode_settings,
if (ImageData::init_active_image(ob, &data.image_data, paint_mode_settings)) {
TIMEIT_START(paint_image_gpu);
GPU_debug_group_begin("SCULPT_paint_brush");
dispatch_gpu_batches(data);
flush_gpu_batches(data);
gpu_frame_end(data);
GPU_debug_group_end();
TIMEIT_END(paint_image_gpu);

View File

@@ -69,8 +69,11 @@ struct PaintStepData {
BLI_STATIC_ASSERT_ALIGN(PaintStepData, 16);
struct PaintTileData {
int2 sub_tile_id;
int tile_number;
int layer_id;
int2 sub_tile_id;
int index;
bool1 in_use_frame;
int _pad1[2];
};
BLI_STATIC_ASSERT_ALIGN(PaintTileData, 16);

View File

@@ -8,7 +8,7 @@
#include "gpu_shader_create_info.hh"
GPU_SHADER_CREATE_INFO(sculpt_paint_sub_tiles)
.storage_buf(0, Qualifier::READ, "PaintTileData", "paint_tile_buf[]")
.storage_buf(0, Qualifier::READ_WRITE, "PaintTileData", "paint_tile_buf[]")
.push_constant(Type::INT, "paint_tile_buf_len")
.define("SUB_TILE_SIZE", "1024");
@@ -28,7 +28,7 @@ GPU_SHADER_CREATE_INFO(sculpt_paint_image_compute)
.typedef_source("GPU_sculpt_shader_shared.h");
GPU_SHADER_CREATE_INFO(sculpt_paint_image_merge_compute)
.local_group_size(1, 1, 1)
.local_group_size(32, 1, 1)
.image(0, GPU_RGBA16F, Qualifier::READ, ImageType::FLOAT_3D, "paint_tiles_img")
.image(1, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "texture_img")
.push_constant(Type::INT, "layer_id")

View File

@@ -20,9 +20,6 @@ void main()
{
PackedPixelRow row = pixel_row_buf[gl_GlobalInvocationID.x + pixel_row_offset];
TrianglePaintInput triangle = paint_input[PIXEL_ROW_PRIM_INDEX(row)];
PaintTileData paint_tile;
ivec3 image_coord = paint_tile_coord_from_udim(
1001, PIXEL_ROW_START_IMAGE_COORD(row), paint_tile);
uint row_len = PIXEL_ROW_LEN(row);
@@ -34,34 +31,42 @@ void main()
vec3 delta;
SCULPT_get_row_pos_and_delta(co1, co2, co3, triangle, row, pos, delta);
for (int x = 0; x < row_len; x++) {
/* TODO: Do clipping test. */
vec4 color;
bool color_read = false;
int x = 0;
while (x < row_len) {
PaintTileData paint_tile;
ivec3 image_coord = paint_tile_coord_from_udim(
1001, PIXEL_ROW_START_IMAGE_COORD(row) + int2(x, 0), paint_tile);
bool in_use = false;
for (; x < row_len && image_coord.x < SUB_TILE_SIZE; x++, image_coord.x++, pos += delta) {
/* TODO: Do clipping test. */
vec4 color;
bool color_read = false;
for (int step_index = paint_step_range[0]; step_index < paint_step_range[1]; step_index++) {
PaintStepData step_data = paint_step_buf[step_index];
float distance;
bool test_result = SCULPT_brush_test(paint_brush_buf.test, step_data, pos, distance);
if (test_result) {
if (!color_read) {
color = imageLoad(paint_tiles_img, image_coord);
color_read = true;
for (int step_index = paint_step_range[0]; step_index < paint_step_range[1]; step_index++) {
PaintStepData step_data = paint_step_buf[step_index];
float distance;
bool test_result = SCULPT_brush_test(paint_brush_buf.test, step_data, pos, distance);
if (test_result) {
if (!color_read) {
color = imageLoad(paint_tiles_img, image_coord);
color_read = true;
in_use = true;
}
// TODO: blend with color...
float factor = SCULPT_hardness_factor(distance, step_data.hardness, step_data.radius);
float curve_factor = SCULPT_curve_strength(factor, paint_brush_buf.falloff_shape);
vec4 final_paint_color = SCULPT_blend_color(
color, paint_brush_buf.color * curve_factor * step_data.strength);
final_paint_color *= paint_brush_buf.alpha;
color = SCULPT_blend_color(color, final_paint_color);
}
// TODO: blend with color...
float factor = SCULPT_hardness_factor(distance, step_data.hardness, step_data.radius);
float curve_factor = SCULPT_curve_strength(factor, paint_brush_buf.falloff_shape);
vec4 final_paint_color = SCULPT_blend_color(
color, paint_brush_buf.color * curve_factor * step_data.strength);
final_paint_color *= paint_brush_buf.alpha;
color = SCULPT_blend_color(color, final_paint_color);
}
if (color_read) {
imageStore(paint_tiles_img, image_coord, color);
}
}
if (color_read) {
imageStore(paint_tiles_img, image_coord, color);
if (in_use) {
paint_tile_mark_used(paint_tile);
}
image_coord.x += 1;
pos += delta;
}
}

View File

@@ -4,6 +4,10 @@ void main()
{
PaintTileData paint_tile;
paint_tile_get_layer(layer_id, paint_tile);
if (!paint_tile.in_use_frame) {
return;
}
ivec3 coord_in = ivec3(gl_GlobalInvocationID.xy, layer_id);
vec4 paint_color = imageLoad(paint_tiles_img, coord_in);
paint_color.a = 1.0;

View File

@@ -3,28 +3,39 @@ ivec2 paint_tile_coord_to_sub_tile_id(ivec2 coord)
return coord / ivec2(SUB_TILE_SIZE);
}
bool paint_tile_search(int tile_number, ivec2 coord, out PaintTileData r_paint_tile)
bool paint_tile_search(int tile_number, int2 sub_tile_id, out PaintTileData r_paint_tile)
{
int2 sub_tile_id = paint_tile_coord_to_sub_tile_id(coord);
for (int i = 0; i < paint_tile_buf_len; i++) {
if (paint_tile_buf[i].tile_number == tile_number &&
paint_tile_buf[i].sub_tile_id == sub_tile_id) {
r_paint_tile = paint_tile_buf[i];
r_paint_tile.index = i;
return true;
}
}
return false;
}
void paint_tile_mark_used(PaintTileData paint_tile)
{
paint_tile_buf[paint_tile.index].in_use_frame = true;
}
void paint_tile_get_layer(int layer_id, out PaintTileData r_paint_tile)
{
r_paint_tile = paint_tile_buf[layer_id];
}
ivec3 paint_tile_coord_from_udim(int tile_number, ivec2 coord, out PaintTileData r_paint_tile)
ivec3 paint_tile_coord_from_paint_tile(ivec2 coord, PaintTileData paint_tile)
{
if (paint_tile_search(tile_number, coord, r_paint_tile)) {
return ivec3(coord - r_paint_tile.sub_tile_id * ivec2(SUB_TILE_SIZE), r_paint_tile.layer_id);
return ivec3(coord - paint_tile.sub_tile_id * ivec2(SUB_TILE_SIZE), paint_tile.layer_id);
}
ivec3 paint_tile_coord_from_udim(int tile_number, ivec2 coord, inout PaintTileData r_paint_tile)
{
int2 sub_tile_id = paint_tile_coord_to_sub_tile_id(coord);
if (paint_tile_search(tile_number, sub_tile_id, r_paint_tile)) {
return paint_tile_coord_from_paint_tile(coord, r_paint_tile);
}
return ivec3(0);