IO: Add support for multiple drag-n-drop files #107230

Merged
Brecht Van Lommel merged 20 commits from guishe/blender:dragndrop-files into main 2023-12-12 18:46:22 +01:00
8 changed files with 233 additions and 173 deletions
Showing only changes of commit ad785c7536 - Show all commits

View File

@ -84,6 +84,7 @@ void normals_calc_faces(Span<float3> vert_positions,
void normals_calc_verts(Span<float3> vert_positions,
OffsetIndices<int> faces,
Span<int> corner_verts,
GroupedSpan<int> vert_to_face_map,
Span<float3> face_normals,
MutableSpan<float3> vert_normals);

View File

@ -2265,6 +2265,7 @@ void BKE_keyblock_mesh_calc_normals(const KeyBlock *kb,
positions,
faces,
corner_verts,
mesh->vert_to_face_map(),
{reinterpret_cast<const blender::float3 *>(face_normals), faces.size()},
{reinterpret_cast<blender::float3 *>(vert_normals), mesh->totvert});
}

View File

@ -46,46 +46,6 @@
# include "BLI_timeit.hh"
#endif
/* -------------------------------------------------------------------- */
/** \name Private Utility Functions
* \{ */
/**
* A thread-safe version of #add_v3_v3 that uses a spin-lock.
*
* \note Avoid using this when the chance of contention is high.
*/
static void add_v3_v3_atomic(float r[3], const float a[3])
{
#define FLT_EQ_NONAN(_fa, _fb) (*((const uint32_t *)&_fa) == *((const uint32_t *)&_fb))
float virtual_lock = r[0];
while (true) {
/* This loops until following conditions are met:
* - `r[0]` has same value as virtual_lock (i.e. it did not change since last try).
* - `r[0]` was not `FLT_MAX`, i.e. it was not locked by another thread. */
const float test_lock = atomic_cas_float(&r[0], virtual_lock, FLT_MAX);
if (_ATOMIC_LIKELY(FLT_EQ_NONAN(test_lock, virtual_lock) && (test_lock != FLT_MAX))) {
break;
}
virtual_lock = test_lock;
}
virtual_lock += a[0];
r[1] += a[1];
r[2] += a[2];
/* Second atomic operation to 'release'
* our lock on that vector and set its first scalar value. */
/* Note that we do not need to loop here, since we 'locked' `r[0]`,
* nobody should have changed it in the mean time. */
virtual_lock = atomic_cas_float(&r[0], FLT_MAX, virtual_lock);
BLI_assert(virtual_lock == FLT_MAX);
#undef FLT_EQ_NONAN
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Public Utility Functions
*
@ -202,94 +162,35 @@ void normals_calc_faces(const Span<float3> positions,
});
}
static void normalize_and_validate(MutableSpan<float3> normals, const Span<float3> positions)
{
threading::parallel_for(normals.index_range(), 1024, [&](const IndexRange range) {
for (const int vert_i : range) {
float *no = normals[vert_i];
if (UNLIKELY(normalize_v3(no) == 0.0f)) {
/* Following Mesh convention; we use vertex coordinate itself for normal in this case. */
normalize_v3_v3(no, positions[vert_i]);
}
}
});
}
static void accumulate_face_normal_to_vert(const Span<float3> positions,
const Span<int> face_verts,
const float3 &face_normal,
MutableSpan<float3> vert_normals)
{
const int i_end = face_verts.size() - 1;
/* Accumulate angle weighted face normal into the vertex normal. */
/* Inline version of #accumulate_vertex_normals_poly_v3. */
{
float edvec_prev[3], edvec_next[3], edvec_end[3];
const float *v_curr = positions[face_verts[i_end]];
sub_v3_v3v3(edvec_prev, positions[face_verts[i_end - 1]], v_curr);
normalize_v3(edvec_prev);
copy_v3_v3(edvec_end, edvec_prev);
for (int i_next = 0, i_curr = i_end; i_next <= i_end; i_curr = i_next++) {
const float *v_next = positions[face_verts[i_next]];
/* Skip an extra normalization by reusing the first calculated edge. */
if (i_next != i_end) {
sub_v3_v3v3(edvec_next, v_curr, v_next);
normalize_v3(edvec_next);
}
else {
copy_v3_v3(edvec_next, edvec_end);
}
/* Calculate angle between the two face edges incident on this vertex. */
const float fac = math::safe_acos_approx(-dot_v3v3(edvec_prev, edvec_next));
const float vnor_add[3] = {face_normal[0] * fac, face_normal[1] * fac, face_normal[2] * fac};
float *vnor = vert_normals[face_verts[i_curr]];
add_v3_v3_atomic(vnor, vnor_add);
v_curr = v_next;
copy_v3_v3(edvec_prev, edvec_next);
}
}
}
void normals_calc_verts(const Span<float3> positions,
void normals_calc_verts(const Span<float3> vert_positions,
const OffsetIndices<int> faces,
const Span<int> corner_verts,
const GroupedSpan<int> vert_to_face_map,
const Span<float3> face_normals,
MutableSpan<float3> vert_normals)
{
memset(vert_normals.data(), 0, vert_normals.as_span().size_in_bytes());
const Span<float3> positions = vert_positions;
threading::parallel_for(positions.index_range(), 1024, [&](const IndexRange range) {
for (const int vert : range) {
const Span<int> vert_faces = vert_to_face_map[vert];
if (vert_faces.is_empty()) {
vert_normals[vert] = math::normalize(positions[vert]);
continue;
}
threading::parallel_for(faces.index_range(), 1024, [&](const IndexRange range) {
for (const int face_i : range) {
const Span<int> face_verts = corner_verts.slice(faces[face_i]);
accumulate_face_normal_to_vert(positions, face_verts, face_normals[face_i], vert_normals);
float3 vert_normal(0);
for (const int face : vert_faces) {
const int2 adjacent_verts = face_find_adjecent_verts(faces[face], corner_verts, vert);
const float3 dir_prev = math::normalize(positions[adjacent_verts[0]] - positions[vert]);
const float3 dir_next = math::normalize(positions[adjacent_verts[1]] - positions[vert]);
const float factor = math::safe_acos_approx(math::dot(dir_prev, dir_next));
vert_normal += face_normals[face] * factor;
}
vert_normals[vert] = math::normalize(vert_normal);
}
});
normalize_and_validate(vert_normals, positions);
}
static void normals_calc_faces_and_verts(const Span<float3> positions,
const OffsetIndices<int> faces,
const Span<int> corner_verts,
MutableSpan<float3> face_normals,
MutableSpan<float3> vert_normals)
{
memset(vert_normals.data(), 0, vert_normals.as_span().size_in_bytes());
threading::parallel_for(faces.index_range(), 1024, [&](const IndexRange range) {
for (const int face_i : range) {
const Span<int> face_verts = corner_verts.slice(faces[face_i]);
face_normals[face_i] = normal_calc_ngon(positions, face_verts);
accumulate_face_normal_to_vert(positions, face_verts, face_normals[face_i], vert_normals);
}
});
normalize_and_validate(vert_normals, positions);
}
/** \} */
@ -342,32 +243,16 @@ blender::Span<blender::float3> Mesh::vert_normals() const
if (this->runtime->vert_normals_cache.is_cached()) {
return this->runtime->vert_normals_cache.data();
}
const Span<float3> positions = this->vert_positions();
const OffsetIndices faces = this->faces();
const Span<int> corner_verts = this->corner_verts();
/* Calculating only vertex normals based on precalculated face normals is faster, but if face
* normals are dirty, calculating both at the same time can be slightly faster. Since normal
* calculation commonly has a significant performance impact, we maintain both code paths. */
if (this->runtime->face_normals_cache.is_cached()) {
const Span<float3> face_normals = this->face_normals();
this->runtime->vert_normals_cache.ensure([&](Vector<float3> &r_data) {
r_data.reinitialize(positions.size());
bke::mesh::normals_calc_verts(positions, faces, corner_verts, face_normals, r_data);
});
}
else {
Vector<float3> face_normals(faces.size());
this->runtime->vert_normals_cache.ensure([&](Vector<float3> &r_data) {
r_data.reinitialize(positions.size());
bke::mesh::normals_calc_faces_and_verts(
positions, faces, corner_verts, face_normals, r_data);
});
this->runtime->face_normals_cache.ensure(
[&](Vector<float3> &r_data) { r_data = std::move(face_normals); });
}
const Span<float3> positions = this->vert_positions();
const Span<float3> face_normals = this->face_normals();
const GroupedSpan<int> vert_to_face_map = this->vert_to_face_map();
this->runtime->vert_normals_cache.ensure([&](Vector<float3> &r_data) {
r_data.reinitialize(positions.size());
bke::mesh::normals_calc_verts(
positions, faces, corner_verts, vert_to_face_map, face_normals, r_data);
});
return this->runtime->vert_normals_cache.data();
}

View File

@ -63,14 +63,31 @@ enum class ConversionType {
FLOAT_TO_B10F_G11F_R11F,
B10F_G11F_R11F_TO_FLOAT,
FLOAT3_TO_HALF4,
HALF4_TO_FLOAT3,
FLOAT3_TO_FLOAT4,
FLOAT4_TO_FLOAT3,
/**
* The requested conversion isn't supported.
*/
UNSUPPORTED,
};
static ConversionType type_of_conversion_float(eGPUTextureFormat device_format)
static ConversionType type_of_conversion_float(const eGPUTextureFormat host_format,
const eGPUTextureFormat device_format)
{
if (host_format != device_format) {
if (host_format == GPU_RGB16F && device_format == GPU_RGBA16F) {
return ConversionType::FLOAT3_TO_HALF4;
}
if (host_format == GPU_RGB32F && device_format == GPU_RGBA32F) {
return ConversionType::FLOAT3_TO_FLOAT4;
}
return ConversionType::UNSUPPORTED;
}
switch (device_format) {
case GPU_RGBA32F:
case GPU_RG32F:
@ -486,13 +503,15 @@ static ConversionType type_of_conversion_r10g10b10a2(eGPUTextureFormat device_fo
return ConversionType::UNSUPPORTED;
}
static ConversionType host_to_device(eGPUDataFormat host_format, eGPUTextureFormat device_format)
static ConversionType host_to_device(const eGPUDataFormat host_format,
const eGPUTextureFormat host_texture_format,
const eGPUTextureFormat device_format)
{
BLI_assert(validate_data_format(device_format, host_format));
switch (host_format) {
case GPU_DATA_FLOAT:
return type_of_conversion_float(device_format);
return type_of_conversion_float(host_texture_format, device_format);
case GPU_DATA_UINT:
return type_of_conversion_uint(device_format);
case GPU_DATA_INT:
@ -540,6 +559,8 @@ static ConversionType reversed(ConversionType type)
CASE_PAIR(FLOAT, SRGBA8)
CASE_PAIR(FLOAT, DEPTH_COMPONENT24)
CASE_PAIR(FLOAT, B10F_G11F_R11F)
CASE_PAIR(FLOAT3, HALF4)
CASE_PAIR(FLOAT3, FLOAT4)
case ConversionType::UNSUPPORTED:
return ConversionType::UNSUPPORTED;
@ -599,6 +620,42 @@ using FLOAT4 = PixelValue<ColorSceneLinear4f<eAlpha::Premultiplied>>;
class B10F_G11G_R11F : public PixelValue<uint32_t> {
};
class HALF4 : public PixelValue<uint64_t> {
public:
uint32_t get_r() const
{
return value & 0xffff;
}
void set_r(uint64_t new_value)
{
value = (value & 0xffffffffffff0000) | (new_value & 0xffff);
}
uint64_t get_g() const
{
return (value >> 16) & 0xffff;
}
void set_g(uint64_t new_value)
{
value = (value & 0xffffffff0000ffff) | ((new_value & 0xffff) << 16);
}
uint64_t get_b() const
{
return (value >> 32) & 0xffff;
}
void set_b(uint64_t new_value)
{
value = (value & 0xffff0000ffffffff) | ((new_value & 0xffff) << 32);
}
void set_a(uint64_t new_value)
{
value = (value & 0xffffffffffff) | ((new_value & 0xffff) << 48);
}
};
class DepthComponent24 : public ComponentValue<uint32_t> {
public:
operator uint32_t() const
@ -738,6 +795,36 @@ static void convert(FLOAT4 &dst, const SRGBA8 &src)
dst.value = src.value.decode();
}
static void convert(FLOAT3 &dst, const HALF4 &src)
{
dst.value.x = uint32_t_to_float(convert_float_formats<FormatF32, FormatF16>(src.get_r()));
dst.value.y = uint32_t_to_float(convert_float_formats<FormatF32, FormatF16>(src.get_g()));
dst.value.z = uint32_t_to_float(convert_float_formats<FormatF32, FormatF16>(src.get_b()));
}
static void convert(HALF4 &dst, const FLOAT3 &src)
{
dst.set_r(convert_float_formats<FormatF16, FormatF32>(float_to_uint32_t(src.value.x)));
dst.set_g(convert_float_formats<FormatF16, FormatF32>(float_to_uint32_t(src.value.y)));
dst.set_b(convert_float_formats<FormatF16, FormatF32>(float_to_uint32_t(src.value.z)));
dst.set_a(convert_float_formats<FormatF16, FormatF32>(float_to_uint32_t(1.0f)));
}
static void convert(FLOAT3 &dst, const FLOAT4 &src)
{
dst.value.x = src.value.r;
dst.value.y = src.value.g;
dst.value.z = src.value.b;
}
static void convert(FLOAT4 &dst, const FLOAT3 &src)
{
dst.value.r = src.value.x;
dst.value.g = src.value.y;
dst.value.b = src.value.z;
dst.value.a = 1.0f;
}
constexpr uint32_t MASK_10_BITS = 0b1111111111;
constexpr uint32_t MASK_11_BITS = 0b11111111111;
constexpr uint8_t SHIFT_B = 22;
@ -918,6 +1005,20 @@ static void convert_buffer(void *dst_memory,
case ConversionType::B10F_G11F_R11F_TO_FLOAT:
convert_per_pixel<FLOAT3, B10F_G11G_R11F>(dst_memory, src_memory, buffer_size);
break;
case ConversionType::FLOAT3_TO_HALF4:
convert_per_pixel<HALF4, FLOAT3>(dst_memory, src_memory, buffer_size);
break;
case ConversionType::HALF4_TO_FLOAT3:
convert_per_pixel<FLOAT3, HALF4>(dst_memory, src_memory, buffer_size);
break;
case ConversionType::FLOAT3_TO_FLOAT4:
convert_per_pixel<FLOAT4, FLOAT3>(dst_memory, src_memory, buffer_size);
break;
case ConversionType::FLOAT4_TO_FLOAT3:
convert_per_pixel<FLOAT3, FLOAT4>(dst_memory, src_memory, buffer_size);
break;
}
}
@ -929,9 +1030,10 @@ void convert_host_to_device(void *dst_buffer,
const void *src_buffer,
size_t buffer_size,
eGPUDataFormat host_format,
eGPUTextureFormat host_texture_format,
eGPUTextureFormat device_format)
{
ConversionType conversion_type = host_to_device(host_format, device_format);
ConversionType conversion_type = host_to_device(host_format, host_texture_format, device_format);
BLI_assert(conversion_type != ConversionType::UNSUPPORTED);
convert_buffer(dst_buffer, src_buffer, buffer_size, device_format, conversion_type);
}
@ -940,9 +1042,11 @@ void convert_device_to_host(void *dst_buffer,
const void *src_buffer,
size_t buffer_size,
eGPUDataFormat host_format,
eGPUTextureFormat host_texture_format,
eGPUTextureFormat device_format)
{
ConversionType conversion_type = reversed(host_to_device(host_format, device_format));
ConversionType conversion_type = reversed(
host_to_device(host_format, host_texture_format, device_format));
BLI_assert_msg(conversion_type != ConversionType::UNSUPPORTED,
"Data conversion between host_format and device_format isn't supported (yet).");
convert_buffer(dst_buffer, src_buffer, buffer_size, device_format, conversion_type);

View File

@ -24,6 +24,7 @@ struct VKWorkarounds;
* \param src_buffer: host buffer.
* \param buffer_size: number of pixels to convert from the start of the given buffer.
* \param host_format: format of the host buffer.
* \param host_texture_format: texture format of the host buffer.
* \param device_format: format of the device buffer.
*
* \note Will assert when the host_format/device_format combination isn't valid
@ -34,6 +35,7 @@ void convert_host_to_device(void *dst_buffer,
const void *src_buffer,
size_t buffer_size,
eGPUDataFormat host_format,
eGPUTextureFormat host_texture_format,
eGPUTextureFormat device_format);
/**
@ -43,6 +45,7 @@ void convert_host_to_device(void *dst_buffer,
* \param src_buffer: device buffer.
* \param buffer_size: number of pixels to convert from the start of the given buffer.
* \param host_format: format of the host buffer
* \param host_texture_format: texture format of the host buffer.
* \param device_format: format of the device buffer.
*
* \note Will assert when the host_format/device_format combination isn't valid
@ -53,6 +56,7 @@ void convert_device_to_host(void *dst_buffer,
const void *src_buffer,
size_t buffer_size,
eGPUDataFormat host_format,
eGPUTextureFormat host_texture_format,
eGPUTextureFormat device_format);
/**

View File

@ -254,4 +254,68 @@ TEST(VulkanDataConversion, vertex_format_multiple_attributes)
}
}
TEST(VulkanDataConversion, texture_rgb16f_as_floats_to_rgba16f)
{
const size_t num_pixels = 4;
float input[] = {
1.0,
0.5,
0.2,
0.2,
1.0,
0.3,
0.4,
0.2,
1.0,
1.0,
1.0,
1.0,
};
uint64_t device[num_pixels];
convert_host_to_device(device, input, num_pixels, GPU_DATA_FLOAT, GPU_RGB16F, GPU_RGBA16F);
float read_back[num_pixels * 3];
convert_device_to_host(read_back, device, num_pixels, GPU_DATA_FLOAT, GPU_RGB16F, GPU_RGBA16F);
for (int i : IndexRange(num_pixels * 3)) {
EXPECT_NEAR(input[i], read_back[i], 0.01);
}
}
TEST(VulkanDataConversion, texture_rgb32f_as_floats_to_rgba32f)
{
const size_t num_pixels = 4;
float input[] = {
1.0,
0.5,
0.2,
0.2,
1.0,
0.3,
0.4,
0.2,
1.0,
1.0,
1.0,
1.0,
};
float device[num_pixels * 4];
convert_host_to_device(device, input, num_pixels, GPU_DATA_FLOAT, GPU_RGB32F, GPU_RGBA32F);
float read_back[num_pixels * 3];
convert_device_to_host(read_back, device, num_pixels, GPU_DATA_FLOAT, GPU_RGB32F, GPU_RGBA32F);
for (int i : IndexRange(num_pixels * 3)) {
EXPECT_NEAR(input[i], read_back[i], 0.01);
}
}
} // namespace blender::gpu::tests

View File

@ -244,7 +244,7 @@ void VKTexture::read_sub(
context.flush();
convert_device_to_host(
r_data, staging_buffer.mapped_memory_get(), sample_len, format, device_format_);
r_data, staging_buffer.mapped_memory_get(), sample_len, format, format_, device_format_);
}
void *VKTexture::read(int mip, eGPUDataFormat format)
@ -284,7 +284,7 @@ void VKTexture::update_sub(
VKBuffer staging_buffer;
staging_buffer.create(device_memory_size, GPU_USAGE_DYNAMIC, VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
convert_host_to_device(
staging_buffer.mapped_memory_get(), data, sample_len, format, device_format_);
staging_buffer.mapped_memory_get(), data, sample_len, format, format_, device_format_);
VkBufferImageCopy region = {};
region.imageExtent.width = extent.x;
@ -330,6 +330,14 @@ bool VKTexture::init_internal()
if (device_format_ == GPU_DEPTH24_STENCIL8 && workarounds.not_aligned_pixel_formats) {
device_format_ = GPU_DEPTH32F_STENCIL8;
}
/* R16G16F16 formats are typically not supported (<1%) but R16G16B16A16 is
* typically supported (+90%). */
if (device_format_ == GPU_RGB16F) {
device_format_ = GPU_RGBA16F;
}
if (device_format_ == GPU_RGB32F) {
device_format_ = GPU_RGBA32F;
}
if (!allocate()) {
return false;

View File

@ -10,6 +10,7 @@
#include "BLI_task.hh"
#include "BKE_mesh.hh"
#include "BKE_mesh_mapping.hh"
#include "node_geometry_util.hh"
@ -25,23 +26,8 @@ static void node_declare(NodeDeclarationBuilder &b)
using VertPriority = std::pair<float, int>;
struct EdgeVertMap {
Array<Vector<int>> edges_by_vertex_map;
EdgeVertMap(const Mesh &mesh)
{
const Span<int2> edges = mesh.edges();
edges_by_vertex_map.reinitialize(mesh.totvert);
for (const int edge_i : edges.index_range()) {
const int2 &edge = edges[edge_i];
edges_by_vertex_map[edge[0]].append(edge_i);
edges_by_vertex_map[edge[1]].append(edge_i);
}
}
};
static void shortest_paths(const Mesh &mesh,
EdgeVertMap &maps,
const GroupedSpan<int> vert_to_edge,
const IndexMask end_selection,
const VArray<float> &input_cost,
MutableSpan<int> r_next_index,
@ -65,8 +51,7 @@ static void shortest_paths(const Mesh &mesh,
continue;
}
visited[vert_i] = true;
const Span<int> incident_edge_indices = maps.edges_by_vertex_map[vert_i];
for (const int edge_i : incident_edge_indices) {
for (const int edge_i : vert_to_edge[vert_i]) {
const int2 &edge = edges[edge_i];
const int neighbor_vert_i = edge[0] + edge[1] - vert_i;
if (visited[neighbor_vert_i]) {
@ -117,8 +102,12 @@ class ShortestEdgePathsNextVertFieldInput final : public bke::MeshFieldInput {
Array<float> cost(mesh.totvert, FLT_MAX);
if (!end_selection.is_empty()) {
EdgeVertMap maps(mesh);
shortest_paths(mesh, maps, end_selection, input_cost, next_index, cost);
const Span<int2> edges = mesh.edges();
Array<int> vert_to_edge_offset_data;
Array<int> vert_to_edge_indices;
const GroupedSpan<int> vert_to_edge = bke::mesh::build_vert_to_edge_map(
edges, mesh.totvert, vert_to_edge_offset_data, vert_to_edge_indices);
shortest_paths(mesh, vert_to_edge, end_selection, input_cost, next_index, cost);
}
threading::parallel_for(next_index.index_range(), 1024, [&](const IndexRange range) {
for (const int i : range) {
@ -193,8 +182,12 @@ class ShortestEdgePathsCostFieldInput final : public bke::MeshFieldInput {
Array<float> cost(mesh.totvert, FLT_MAX);
if (!end_selection.is_empty()) {
EdgeVertMap maps(mesh);
shortest_paths(mesh, maps, end_selection, input_cost, next_index, cost);
const Span<int2> edges = mesh.edges();
Array<int> vert_to_edge_offset_data;
Array<int> vert_to_edge_indices;
const GroupedSpan<int> vert_to_edge = bke::mesh::build_vert_to_edge_map(
edges, mesh.totvert, vert_to_edge_offset_data, vert_to_edge_indices);
shortest_paths(mesh, vert_to_edge, end_selection, input_cost, next_index, cost);
}
threading::parallel_for(cost.index_range(), 1024, [&](const IndexRange range) {
for (const int i : range) {