IO: Add support for multiple drag-n-drop files #107230
|
@ -84,6 +84,7 @@ void normals_calc_faces(Span<float3> vert_positions,
|
|||
void normals_calc_verts(Span<float3> vert_positions,
|
||||
OffsetIndices<int> faces,
|
||||
Span<int> corner_verts,
|
||||
GroupedSpan<int> vert_to_face_map,
|
||||
Span<float3> face_normals,
|
||||
MutableSpan<float3> vert_normals);
|
||||
|
||||
|
|
|
@ -2265,6 +2265,7 @@ void BKE_keyblock_mesh_calc_normals(const KeyBlock *kb,
|
|||
positions,
|
||||
faces,
|
||||
corner_verts,
|
||||
mesh->vert_to_face_map(),
|
||||
{reinterpret_cast<const blender::float3 *>(face_normals), faces.size()},
|
||||
{reinterpret_cast<blender::float3 *>(vert_normals), mesh->totvert});
|
||||
}
|
||||
|
|
|
@ -46,46 +46,6 @@
|
|||
# include "BLI_timeit.hh"
|
||||
#endif
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
/** \name Private Utility Functions
|
||||
* \{ */
|
||||
|
||||
/**
|
||||
* A thread-safe version of #add_v3_v3 that uses a spin-lock.
|
||||
*
|
||||
* \note Avoid using this when the chance of contention is high.
|
||||
*/
|
||||
static void add_v3_v3_atomic(float r[3], const float a[3])
|
||||
{
|
||||
#define FLT_EQ_NONAN(_fa, _fb) (*((const uint32_t *)&_fa) == *((const uint32_t *)&_fb))
|
||||
|
||||
float virtual_lock = r[0];
|
||||
while (true) {
|
||||
/* This loops until following conditions are met:
|
||||
* - `r[0]` has same value as virtual_lock (i.e. it did not change since last try).
|
||||
* - `r[0]` was not `FLT_MAX`, i.e. it was not locked by another thread. */
|
||||
const float test_lock = atomic_cas_float(&r[0], virtual_lock, FLT_MAX);
|
||||
if (_ATOMIC_LIKELY(FLT_EQ_NONAN(test_lock, virtual_lock) && (test_lock != FLT_MAX))) {
|
||||
break;
|
||||
}
|
||||
virtual_lock = test_lock;
|
||||
}
|
||||
virtual_lock += a[0];
|
||||
r[1] += a[1];
|
||||
r[2] += a[2];
|
||||
|
||||
/* Second atomic operation to 'release'
|
||||
* our lock on that vector and set its first scalar value. */
|
||||
/* Note that we do not need to loop here, since we 'locked' `r[0]`,
|
||||
* nobody should have changed it in the mean time. */
|
||||
virtual_lock = atomic_cas_float(&r[0], FLT_MAX, virtual_lock);
|
||||
BLI_assert(virtual_lock == FLT_MAX);
|
||||
|
||||
#undef FLT_EQ_NONAN
|
||||
}
|
||||
|
||||
/** \} */
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
/** \name Public Utility Functions
|
||||
*
|
||||
|
@ -202,94 +162,35 @@ void normals_calc_faces(const Span<float3> positions,
|
|||
});
|
||||
}
|
||||
|
||||
static void normalize_and_validate(MutableSpan<float3> normals, const Span<float3> positions)
|
||||
{
|
||||
threading::parallel_for(normals.index_range(), 1024, [&](const IndexRange range) {
|
||||
for (const int vert_i : range) {
|
||||
float *no = normals[vert_i];
|
||||
if (UNLIKELY(normalize_v3(no) == 0.0f)) {
|
||||
/* Following Mesh convention; we use vertex coordinate itself for normal in this case. */
|
||||
normalize_v3_v3(no, positions[vert_i]);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
static void accumulate_face_normal_to_vert(const Span<float3> positions,
|
||||
const Span<int> face_verts,
|
||||
const float3 &face_normal,
|
||||
MutableSpan<float3> vert_normals)
|
||||
{
|
||||
const int i_end = face_verts.size() - 1;
|
||||
|
||||
/* Accumulate angle weighted face normal into the vertex normal. */
|
||||
/* Inline version of #accumulate_vertex_normals_poly_v3. */
|
||||
{
|
||||
float edvec_prev[3], edvec_next[3], edvec_end[3];
|
||||
const float *v_curr = positions[face_verts[i_end]];
|
||||
sub_v3_v3v3(edvec_prev, positions[face_verts[i_end - 1]], v_curr);
|
||||
normalize_v3(edvec_prev);
|
||||
copy_v3_v3(edvec_end, edvec_prev);
|
||||
|
||||
for (int i_next = 0, i_curr = i_end; i_next <= i_end; i_curr = i_next++) {
|
||||
const float *v_next = positions[face_verts[i_next]];
|
||||
|
||||
/* Skip an extra normalization by reusing the first calculated edge. */
|
||||
if (i_next != i_end) {
|
||||
sub_v3_v3v3(edvec_next, v_curr, v_next);
|
||||
normalize_v3(edvec_next);
|
||||
}
|
||||
else {
|
||||
copy_v3_v3(edvec_next, edvec_end);
|
||||
}
|
||||
|
||||
/* Calculate angle between the two face edges incident on this vertex. */
|
||||
const float fac = math::safe_acos_approx(-dot_v3v3(edvec_prev, edvec_next));
|
||||
const float vnor_add[3] = {face_normal[0] * fac, face_normal[1] * fac, face_normal[2] * fac};
|
||||
|
||||
float *vnor = vert_normals[face_verts[i_curr]];
|
||||
add_v3_v3_atomic(vnor, vnor_add);
|
||||
v_curr = v_next;
|
||||
copy_v3_v3(edvec_prev, edvec_next);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void normals_calc_verts(const Span<float3> positions,
|
||||
void normals_calc_verts(const Span<float3> vert_positions,
|
||||
const OffsetIndices<int> faces,
|
||||
const Span<int> corner_verts,
|
||||
const GroupedSpan<int> vert_to_face_map,
|
||||
const Span<float3> face_normals,
|
||||
MutableSpan<float3> vert_normals)
|
||||
{
|
||||
memset(vert_normals.data(), 0, vert_normals.as_span().size_in_bytes());
|
||||
const Span<float3> positions = vert_positions;
|
||||
threading::parallel_for(positions.index_range(), 1024, [&](const IndexRange range) {
|
||||
for (const int vert : range) {
|
||||
const Span<int> vert_faces = vert_to_face_map[vert];
|
||||
if (vert_faces.is_empty()) {
|
||||
vert_normals[vert] = math::normalize(positions[vert]);
|
||||
continue;
|
||||
}
|
||||
|
||||
threading::parallel_for(faces.index_range(), 1024, [&](const IndexRange range) {
|
||||
for (const int face_i : range) {
|
||||
const Span<int> face_verts = corner_verts.slice(faces[face_i]);
|
||||
accumulate_face_normal_to_vert(positions, face_verts, face_normals[face_i], vert_normals);
|
||||
float3 vert_normal(0);
|
||||
for (const int face : vert_faces) {
|
||||
const int2 adjacent_verts = face_find_adjecent_verts(faces[face], corner_verts, vert);
|
||||
const float3 dir_prev = math::normalize(positions[adjacent_verts[0]] - positions[vert]);
|
||||
const float3 dir_next = math::normalize(positions[adjacent_verts[1]] - positions[vert]);
|
||||
const float factor = math::safe_acos_approx(math::dot(dir_prev, dir_next));
|
||||
|
||||
vert_normal += face_normals[face] * factor;
|
||||
}
|
||||
|
||||
vert_normals[vert] = math::normalize(vert_normal);
|
||||
}
|
||||
});
|
||||
|
||||
normalize_and_validate(vert_normals, positions);
|
||||
}
|
||||
|
||||
static void normals_calc_faces_and_verts(const Span<float3> positions,
|
||||
const OffsetIndices<int> faces,
|
||||
const Span<int> corner_verts,
|
||||
MutableSpan<float3> face_normals,
|
||||
MutableSpan<float3> vert_normals)
|
||||
{
|
||||
memset(vert_normals.data(), 0, vert_normals.as_span().size_in_bytes());
|
||||
|
||||
threading::parallel_for(faces.index_range(), 1024, [&](const IndexRange range) {
|
||||
for (const int face_i : range) {
|
||||
const Span<int> face_verts = corner_verts.slice(faces[face_i]);
|
||||
face_normals[face_i] = normal_calc_ngon(positions, face_verts);
|
||||
accumulate_face_normal_to_vert(positions, face_verts, face_normals[face_i], vert_normals);
|
||||
}
|
||||
});
|
||||
|
||||
normalize_and_validate(vert_normals, positions);
|
||||
}
|
||||
|
||||
/** \} */
|
||||
|
@ -342,32 +243,16 @@ blender::Span<blender::float3> Mesh::vert_normals() const
|
|||
if (this->runtime->vert_normals_cache.is_cached()) {
|
||||
return this->runtime->vert_normals_cache.data();
|
||||
}
|
||||
|
||||
const Span<float3> positions = this->vert_positions();
|
||||
const OffsetIndices faces = this->faces();
|
||||
const Span<int> corner_verts = this->corner_verts();
|
||||
|
||||
/* Calculating only vertex normals based on precalculated face normals is faster, but if face
|
||||
* normals are dirty, calculating both at the same time can be slightly faster. Since normal
|
||||
* calculation commonly has a significant performance impact, we maintain both code paths. */
|
||||
if (this->runtime->face_normals_cache.is_cached()) {
|
||||
const Span<float3> positions = this->vert_positions();
|
||||
const Span<float3> face_normals = this->face_normals();
|
||||
const GroupedSpan<int> vert_to_face_map = this->vert_to_face_map();
|
||||
this->runtime->vert_normals_cache.ensure([&](Vector<float3> &r_data) {
|
||||
r_data.reinitialize(positions.size());
|
||||
bke::mesh::normals_calc_verts(positions, faces, corner_verts, face_normals, r_data);
|
||||
bke::mesh::normals_calc_verts(
|
||||
positions, faces, corner_verts, vert_to_face_map, face_normals, r_data);
|
||||
});
|
||||
}
|
||||
else {
|
||||
Vector<float3> face_normals(faces.size());
|
||||
this->runtime->vert_normals_cache.ensure([&](Vector<float3> &r_data) {
|
||||
r_data.reinitialize(positions.size());
|
||||
bke::mesh::normals_calc_faces_and_verts(
|
||||
positions, faces, corner_verts, face_normals, r_data);
|
||||
});
|
||||
this->runtime->face_normals_cache.ensure(
|
||||
[&](Vector<float3> &r_data) { r_data = std::move(face_normals); });
|
||||
}
|
||||
|
||||
return this->runtime->vert_normals_cache.data();
|
||||
}
|
||||
|
||||
|
|
|
@ -63,14 +63,31 @@ enum class ConversionType {
|
|||
FLOAT_TO_B10F_G11F_R11F,
|
||||
B10F_G11F_R11F_TO_FLOAT,
|
||||
|
||||
FLOAT3_TO_HALF4,
|
||||
HALF4_TO_FLOAT3,
|
||||
|
||||
FLOAT3_TO_FLOAT4,
|
||||
FLOAT4_TO_FLOAT3,
|
||||
|
||||
/**
|
||||
* The requested conversion isn't supported.
|
||||
*/
|
||||
UNSUPPORTED,
|
||||
};
|
||||
|
||||
static ConversionType type_of_conversion_float(eGPUTextureFormat device_format)
|
||||
static ConversionType type_of_conversion_float(const eGPUTextureFormat host_format,
|
||||
const eGPUTextureFormat device_format)
|
||||
{
|
||||
if (host_format != device_format) {
|
||||
if (host_format == GPU_RGB16F && device_format == GPU_RGBA16F) {
|
||||
return ConversionType::FLOAT3_TO_HALF4;
|
||||
}
|
||||
if (host_format == GPU_RGB32F && device_format == GPU_RGBA32F) {
|
||||
return ConversionType::FLOAT3_TO_FLOAT4;
|
||||
}
|
||||
return ConversionType::UNSUPPORTED;
|
||||
}
|
||||
|
||||
switch (device_format) {
|
||||
case GPU_RGBA32F:
|
||||
case GPU_RG32F:
|
||||
|
@ -486,13 +503,15 @@ static ConversionType type_of_conversion_r10g10b10a2(eGPUTextureFormat device_fo
|
|||
return ConversionType::UNSUPPORTED;
|
||||
}
|
||||
|
||||
static ConversionType host_to_device(eGPUDataFormat host_format, eGPUTextureFormat device_format)
|
||||
static ConversionType host_to_device(const eGPUDataFormat host_format,
|
||||
const eGPUTextureFormat host_texture_format,
|
||||
const eGPUTextureFormat device_format)
|
||||
{
|
||||
BLI_assert(validate_data_format(device_format, host_format));
|
||||
|
||||
switch (host_format) {
|
||||
case GPU_DATA_FLOAT:
|
||||
return type_of_conversion_float(device_format);
|
||||
return type_of_conversion_float(host_texture_format, device_format);
|
||||
case GPU_DATA_UINT:
|
||||
return type_of_conversion_uint(device_format);
|
||||
case GPU_DATA_INT:
|
||||
|
@ -540,6 +559,8 @@ static ConversionType reversed(ConversionType type)
|
|||
CASE_PAIR(FLOAT, SRGBA8)
|
||||
CASE_PAIR(FLOAT, DEPTH_COMPONENT24)
|
||||
CASE_PAIR(FLOAT, B10F_G11F_R11F)
|
||||
CASE_PAIR(FLOAT3, HALF4)
|
||||
CASE_PAIR(FLOAT3, FLOAT4)
|
||||
|
||||
case ConversionType::UNSUPPORTED:
|
||||
return ConversionType::UNSUPPORTED;
|
||||
|
@ -599,6 +620,42 @@ using FLOAT4 = PixelValue<ColorSceneLinear4f<eAlpha::Premultiplied>>;
|
|||
class B10F_G11G_R11F : public PixelValue<uint32_t> {
|
||||
};
|
||||
|
||||
class HALF4 : public PixelValue<uint64_t> {
|
||||
public:
|
||||
uint32_t get_r() const
|
||||
{
|
||||
return value & 0xffff;
|
||||
}
|
||||
|
||||
void set_r(uint64_t new_value)
|
||||
{
|
||||
value = (value & 0xffffffffffff0000) | (new_value & 0xffff);
|
||||
}
|
||||
uint64_t get_g() const
|
||||
{
|
||||
return (value >> 16) & 0xffff;
|
||||
}
|
||||
|
||||
void set_g(uint64_t new_value)
|
||||
{
|
||||
value = (value & 0xffffffff0000ffff) | ((new_value & 0xffff) << 16);
|
||||
}
|
||||
uint64_t get_b() const
|
||||
{
|
||||
return (value >> 32) & 0xffff;
|
||||
}
|
||||
|
||||
void set_b(uint64_t new_value)
|
||||
{
|
||||
value = (value & 0xffff0000ffffffff) | ((new_value & 0xffff) << 32);
|
||||
}
|
||||
|
||||
void set_a(uint64_t new_value)
|
||||
{
|
||||
value = (value & 0xffffffffffff) | ((new_value & 0xffff) << 48);
|
||||
}
|
||||
};
|
||||
|
||||
class DepthComponent24 : public ComponentValue<uint32_t> {
|
||||
public:
|
||||
operator uint32_t() const
|
||||
|
@ -738,6 +795,36 @@ static void convert(FLOAT4 &dst, const SRGBA8 &src)
|
|||
dst.value = src.value.decode();
|
||||
}
|
||||
|
||||
static void convert(FLOAT3 &dst, const HALF4 &src)
|
||||
{
|
||||
dst.value.x = uint32_t_to_float(convert_float_formats<FormatF32, FormatF16>(src.get_r()));
|
||||
dst.value.y = uint32_t_to_float(convert_float_formats<FormatF32, FormatF16>(src.get_g()));
|
||||
dst.value.z = uint32_t_to_float(convert_float_formats<FormatF32, FormatF16>(src.get_b()));
|
||||
}
|
||||
|
||||
static void convert(HALF4 &dst, const FLOAT3 &src)
|
||||
{
|
||||
dst.set_r(convert_float_formats<FormatF16, FormatF32>(float_to_uint32_t(src.value.x)));
|
||||
dst.set_g(convert_float_formats<FormatF16, FormatF32>(float_to_uint32_t(src.value.y)));
|
||||
dst.set_b(convert_float_formats<FormatF16, FormatF32>(float_to_uint32_t(src.value.z)));
|
||||
dst.set_a(convert_float_formats<FormatF16, FormatF32>(float_to_uint32_t(1.0f)));
|
||||
}
|
||||
|
||||
static void convert(FLOAT3 &dst, const FLOAT4 &src)
|
||||
{
|
||||
dst.value.x = src.value.r;
|
||||
dst.value.y = src.value.g;
|
||||
dst.value.z = src.value.b;
|
||||
}
|
||||
|
||||
static void convert(FLOAT4 &dst, const FLOAT3 &src)
|
||||
{
|
||||
dst.value.r = src.value.x;
|
||||
dst.value.g = src.value.y;
|
||||
dst.value.b = src.value.z;
|
||||
dst.value.a = 1.0f;
|
||||
}
|
||||
|
||||
constexpr uint32_t MASK_10_BITS = 0b1111111111;
|
||||
constexpr uint32_t MASK_11_BITS = 0b11111111111;
|
||||
constexpr uint8_t SHIFT_B = 22;
|
||||
|
@ -918,6 +1005,20 @@ static void convert_buffer(void *dst_memory,
|
|||
case ConversionType::B10F_G11F_R11F_TO_FLOAT:
|
||||
convert_per_pixel<FLOAT3, B10F_G11G_R11F>(dst_memory, src_memory, buffer_size);
|
||||
break;
|
||||
|
||||
case ConversionType::FLOAT3_TO_HALF4:
|
||||
convert_per_pixel<HALF4, FLOAT3>(dst_memory, src_memory, buffer_size);
|
||||
break;
|
||||
case ConversionType::HALF4_TO_FLOAT3:
|
||||
convert_per_pixel<FLOAT3, HALF4>(dst_memory, src_memory, buffer_size);
|
||||
break;
|
||||
|
||||
case ConversionType::FLOAT3_TO_FLOAT4:
|
||||
convert_per_pixel<FLOAT4, FLOAT3>(dst_memory, src_memory, buffer_size);
|
||||
break;
|
||||
case ConversionType::FLOAT4_TO_FLOAT3:
|
||||
convert_per_pixel<FLOAT3, FLOAT4>(dst_memory, src_memory, buffer_size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -929,9 +1030,10 @@ void convert_host_to_device(void *dst_buffer,
|
|||
const void *src_buffer,
|
||||
size_t buffer_size,
|
||||
eGPUDataFormat host_format,
|
||||
eGPUTextureFormat host_texture_format,
|
||||
eGPUTextureFormat device_format)
|
||||
{
|
||||
ConversionType conversion_type = host_to_device(host_format, device_format);
|
||||
ConversionType conversion_type = host_to_device(host_format, host_texture_format, device_format);
|
||||
BLI_assert(conversion_type != ConversionType::UNSUPPORTED);
|
||||
convert_buffer(dst_buffer, src_buffer, buffer_size, device_format, conversion_type);
|
||||
}
|
||||
|
@ -940,9 +1042,11 @@ void convert_device_to_host(void *dst_buffer,
|
|||
const void *src_buffer,
|
||||
size_t buffer_size,
|
||||
eGPUDataFormat host_format,
|
||||
eGPUTextureFormat host_texture_format,
|
||||
eGPUTextureFormat device_format)
|
||||
{
|
||||
ConversionType conversion_type = reversed(host_to_device(host_format, device_format));
|
||||
ConversionType conversion_type = reversed(
|
||||
host_to_device(host_format, host_texture_format, device_format));
|
||||
BLI_assert_msg(conversion_type != ConversionType::UNSUPPORTED,
|
||||
"Data conversion between host_format and device_format isn't supported (yet).");
|
||||
convert_buffer(dst_buffer, src_buffer, buffer_size, device_format, conversion_type);
|
||||
|
|
|
@ -24,6 +24,7 @@ struct VKWorkarounds;
|
|||
* \param src_buffer: host buffer.
|
||||
* \param buffer_size: number of pixels to convert from the start of the given buffer.
|
||||
* \param host_format: format of the host buffer.
|
||||
* \param host_texture_format: texture format of the host buffer.
|
||||
* \param device_format: format of the device buffer.
|
||||
*
|
||||
* \note Will assert when the host_format/device_format combination isn't valid
|
||||
|
@ -34,6 +35,7 @@ void convert_host_to_device(void *dst_buffer,
|
|||
const void *src_buffer,
|
||||
size_t buffer_size,
|
||||
eGPUDataFormat host_format,
|
||||
eGPUTextureFormat host_texture_format,
|
||||
eGPUTextureFormat device_format);
|
||||
|
||||
/**
|
||||
|
@ -43,6 +45,7 @@ void convert_host_to_device(void *dst_buffer,
|
|||
* \param src_buffer: device buffer.
|
||||
* \param buffer_size: number of pixels to convert from the start of the given buffer.
|
||||
* \param host_format: format of the host buffer
|
||||
* \param host_texture_format: texture format of the host buffer.
|
||||
* \param device_format: format of the device buffer.
|
||||
*
|
||||
* \note Will assert when the host_format/device_format combination isn't valid
|
||||
|
@ -53,6 +56,7 @@ void convert_device_to_host(void *dst_buffer,
|
|||
const void *src_buffer,
|
||||
size_t buffer_size,
|
||||
eGPUDataFormat host_format,
|
||||
eGPUTextureFormat host_texture_format,
|
||||
eGPUTextureFormat device_format);
|
||||
|
||||
/**
|
||||
|
|
|
@ -254,4 +254,68 @@ TEST(VulkanDataConversion, vertex_format_multiple_attributes)
|
|||
}
|
||||
}
|
||||
|
||||
TEST(VulkanDataConversion, texture_rgb16f_as_floats_to_rgba16f)
|
||||
{
|
||||
const size_t num_pixels = 4;
|
||||
float input[] = {
|
||||
1.0,
|
||||
0.5,
|
||||
0.2,
|
||||
|
||||
0.2,
|
||||
1.0,
|
||||
0.3,
|
||||
|
||||
0.4,
|
||||
0.2,
|
||||
1.0,
|
||||
|
||||
1.0,
|
||||
1.0,
|
||||
1.0,
|
||||
};
|
||||
|
||||
uint64_t device[num_pixels];
|
||||
convert_host_to_device(device, input, num_pixels, GPU_DATA_FLOAT, GPU_RGB16F, GPU_RGBA16F);
|
||||
|
||||
float read_back[num_pixels * 3];
|
||||
convert_device_to_host(read_back, device, num_pixels, GPU_DATA_FLOAT, GPU_RGB16F, GPU_RGBA16F);
|
||||
|
||||
for (int i : IndexRange(num_pixels * 3)) {
|
||||
EXPECT_NEAR(input[i], read_back[i], 0.01);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(VulkanDataConversion, texture_rgb32f_as_floats_to_rgba32f)
|
||||
{
|
||||
const size_t num_pixels = 4;
|
||||
float input[] = {
|
||||
1.0,
|
||||
0.5,
|
||||
0.2,
|
||||
|
||||
0.2,
|
||||
1.0,
|
||||
0.3,
|
||||
|
||||
0.4,
|
||||
0.2,
|
||||
1.0,
|
||||
|
||||
1.0,
|
||||
1.0,
|
||||
1.0,
|
||||
};
|
||||
|
||||
float device[num_pixels * 4];
|
||||
convert_host_to_device(device, input, num_pixels, GPU_DATA_FLOAT, GPU_RGB32F, GPU_RGBA32F);
|
||||
|
||||
float read_back[num_pixels * 3];
|
||||
convert_device_to_host(read_back, device, num_pixels, GPU_DATA_FLOAT, GPU_RGB32F, GPU_RGBA32F);
|
||||
|
||||
for (int i : IndexRange(num_pixels * 3)) {
|
||||
EXPECT_NEAR(input[i], read_back[i], 0.01);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace blender::gpu::tests
|
||||
|
|
|
@ -244,7 +244,7 @@ void VKTexture::read_sub(
|
|||
context.flush();
|
||||
|
||||
convert_device_to_host(
|
||||
r_data, staging_buffer.mapped_memory_get(), sample_len, format, device_format_);
|
||||
r_data, staging_buffer.mapped_memory_get(), sample_len, format, format_, device_format_);
|
||||
}
|
||||
|
||||
void *VKTexture::read(int mip, eGPUDataFormat format)
|
||||
|
@ -284,7 +284,7 @@ void VKTexture::update_sub(
|
|||
VKBuffer staging_buffer;
|
||||
staging_buffer.create(device_memory_size, GPU_USAGE_DYNAMIC, VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
|
||||
convert_host_to_device(
|
||||
staging_buffer.mapped_memory_get(), data, sample_len, format, device_format_);
|
||||
staging_buffer.mapped_memory_get(), data, sample_len, format, format_, device_format_);
|
||||
|
||||
VkBufferImageCopy region = {};
|
||||
region.imageExtent.width = extent.x;
|
||||
|
@ -330,6 +330,14 @@ bool VKTexture::init_internal()
|
|||
if (device_format_ == GPU_DEPTH24_STENCIL8 && workarounds.not_aligned_pixel_formats) {
|
||||
device_format_ = GPU_DEPTH32F_STENCIL8;
|
||||
}
|
||||
/* R16G16F16 formats are typically not supported (<1%) but R16G16B16A16 is
|
||||
* typically supported (+90%). */
|
||||
if (device_format_ == GPU_RGB16F) {
|
||||
device_format_ = GPU_RGBA16F;
|
||||
}
|
||||
if (device_format_ == GPU_RGB32F) {
|
||||
device_format_ = GPU_RGBA32F;
|
||||
}
|
||||
|
||||
if (!allocate()) {
|
||||
return false;
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include "BLI_task.hh"
|
||||
|
||||
#include "BKE_mesh.hh"
|
||||
#include "BKE_mesh_mapping.hh"
|
||||
|
||||
#include "node_geometry_util.hh"
|
||||
|
||||
|
@ -25,23 +26,8 @@ static void node_declare(NodeDeclarationBuilder &b)
|
|||
|
||||
using VertPriority = std::pair<float, int>;
|
||||
|
||||
struct EdgeVertMap {
|
||||
Array<Vector<int>> edges_by_vertex_map;
|
||||
|
||||
EdgeVertMap(const Mesh &mesh)
|
||||
{
|
||||
const Span<int2> edges = mesh.edges();
|
||||
edges_by_vertex_map.reinitialize(mesh.totvert);
|
||||
for (const int edge_i : edges.index_range()) {
|
||||
const int2 &edge = edges[edge_i];
|
||||
edges_by_vertex_map[edge[0]].append(edge_i);
|
||||
edges_by_vertex_map[edge[1]].append(edge_i);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void shortest_paths(const Mesh &mesh,
|
||||
EdgeVertMap &maps,
|
||||
const GroupedSpan<int> vert_to_edge,
|
||||
const IndexMask end_selection,
|
||||
const VArray<float> &input_cost,
|
||||
MutableSpan<int> r_next_index,
|
||||
|
@ -65,8 +51,7 @@ static void shortest_paths(const Mesh &mesh,
|
|||
continue;
|
||||
}
|
||||
visited[vert_i] = true;
|
||||
const Span<int> incident_edge_indices = maps.edges_by_vertex_map[vert_i];
|
||||
for (const int edge_i : incident_edge_indices) {
|
||||
for (const int edge_i : vert_to_edge[vert_i]) {
|
||||
const int2 &edge = edges[edge_i];
|
||||
const int neighbor_vert_i = edge[0] + edge[1] - vert_i;
|
||||
if (visited[neighbor_vert_i]) {
|
||||
|
@ -117,8 +102,12 @@ class ShortestEdgePathsNextVertFieldInput final : public bke::MeshFieldInput {
|
|||
Array<float> cost(mesh.totvert, FLT_MAX);
|
||||
|
||||
if (!end_selection.is_empty()) {
|
||||
EdgeVertMap maps(mesh);
|
||||
shortest_paths(mesh, maps, end_selection, input_cost, next_index, cost);
|
||||
const Span<int2> edges = mesh.edges();
|
||||
Array<int> vert_to_edge_offset_data;
|
||||
Array<int> vert_to_edge_indices;
|
||||
const GroupedSpan<int> vert_to_edge = bke::mesh::build_vert_to_edge_map(
|
||||
edges, mesh.totvert, vert_to_edge_offset_data, vert_to_edge_indices);
|
||||
shortest_paths(mesh, vert_to_edge, end_selection, input_cost, next_index, cost);
|
||||
}
|
||||
threading::parallel_for(next_index.index_range(), 1024, [&](const IndexRange range) {
|
||||
for (const int i : range) {
|
||||
|
@ -193,8 +182,12 @@ class ShortestEdgePathsCostFieldInput final : public bke::MeshFieldInput {
|
|||
Array<float> cost(mesh.totvert, FLT_MAX);
|
||||
|
||||
if (!end_selection.is_empty()) {
|
||||
EdgeVertMap maps(mesh);
|
||||
shortest_paths(mesh, maps, end_selection, input_cost, next_index, cost);
|
||||
const Span<int2> edges = mesh.edges();
|
||||
Array<int> vert_to_edge_offset_data;
|
||||
Array<int> vert_to_edge_indices;
|
||||
const GroupedSpan<int> vert_to_edge = bke::mesh::build_vert_to_edge_map(
|
||||
edges, mesh.totvert, vert_to_edge_offset_data, vert_to_edge_indices);
|
||||
shortest_paths(mesh, vert_to_edge, end_selection, input_cost, next_index, cost);
|
||||
}
|
||||
threading::parallel_for(cost.index_range(), 1024, [&](const IndexRange range) {
|
||||
for (const int i : range) {
|
||||
|
|
Loading…
Reference in New Issue