BLI: refactor IndexMask for better performance and memory usage #104629
|
@ -167,8 +167,13 @@ class IndexMask {
|
|||
|
||||
IndexMask slice(IndexRange range) const;
|
||||
IndexMask slice(int64_t start, int64_t size) const;
|
||||
IndexMask slice_and_offset(IndexRange range, IndexMaskMemory &memory) const;
|
||||
IndexMask slice_and_offset(int64_t start, int64_t size, IndexMaskMemory &memory) const;
|
||||
IndexMask slice_and_offset(IndexRange range,
|
||||
const int64_t offset,
|
||||
IndexMaskMemory &memory) const;
|
||||
IndexMask slice_and_offset(int64_t start,
|
||||
int64_t size,
|
||||
const int64_t offset,
|
||||
IndexMaskMemory &memory) const;
|
||||
IndexMask complement(const IndexRange universe, IndexMaskMemory &memory) const;
|
||||
|
||||
ChunkSlice chunk(const int64_t chunk_i) const;
|
||||
|
@ -188,7 +193,8 @@ class IndexMask {
|
|||
template<typename Fn> void foreach_index_optimized(GrainSize grain_size, Fn &&fn) const;
|
||||
|
||||
template<typename T> static IndexMask from_indices(Span<T> indices, IndexMaskMemory &memory);
|
||||
static IndexMask from_bits(BitSpan bits, IndexMaskMemory &memory, int64_t offset = 0);
|
||||
static IndexMask from_bits(BitSpan bits, IndexMaskMemory &memory);
|
||||
static IndexMask from_bits(const IndexMask &universe, BitSpan bits, IndexMaskMemory &memory);
|
||||
static IndexMask from_bools(Span<bool> bools, IndexMaskMemory &memory);
|
||||
static IndexMask from_bools(const VArray<bool> &bools, IndexMaskMemory &memory);
|
||||
static IndexMask from_bools(const IndexMask &universe,
|
||||
|
@ -215,8 +221,8 @@ class IndexMask {
|
|||
MutableSpan<IndexMask> r_masks);
|
||||
|
||||
template<typename T> void to_indices(MutableSpan<T> r_indices) const;
|
||||
void to_bits(MutableBitSpan r_bits, int64_t offset = 0) const;
|
||||
void to_bools(MutableSpan<bool> r_bools, int64_t offset = 0) const;
|
||||
void to_bits(MutableBitSpan r_bits) const;
|
||||
void to_bools(MutableSpan<bool> r_bools) const;
|
||||
std::optional<IndexRange> to_range() const;
|
||||
Vector<IndexRange> to_ranges() const;
|
||||
Vector<IndexRange> to_ranges_invert(IndexRange universe) const;
|
||||
|
|
|
@ -233,33 +233,50 @@ void IndexMask::foreach_span_impl(const FunctionRef<void(OffsetSpan<int64_t, int
|
|||
});
|
||||
}
|
||||
|
||||
IndexMask IndexMask::slice_and_offset(const IndexRange range, IndexMaskMemory &memory) const
|
||||
IndexMask IndexMask::slice_and_offset(const IndexRange range,
|
||||
const int64_t offset,
|
||||
IndexMaskMemory &memory) const
|
||||
{
|
||||
return this->slice_and_offset(range.start(), range.size(), memory);
|
||||
return this->slice_and_offset(range.start(), range.size(), offset, memory);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static IndexMask offset_indices_in_mask(const IndexMask &mask,
|
||||
const int64_t offset,
|
||||
IndexMaskMemory &memory)
|
||||
{
|
||||
Vector<T, chunk_capacity> indices(mask.size());
|
||||
mask.to_indices<T>(indices);
|
||||
threading::parallel_for(indices.index_range(), 2048, [&](const IndexRange range) {
|
||||
for (T &index : indices.as_mutable_span().slice(range)) {
|
||||
index += T(offset);
|
||||
}
|
||||
});
|
||||
return IndexMask::from_indices<T>(indices, memory);
|
||||
}
|
||||
|
||||
IndexMask IndexMask::slice_and_offset(const int64_t start,
|
||||
const int64_t size,
|
||||
const int64_t offset,
|
||||
JacquesLucke marked this conversation as resolved
Outdated
|
||||
IndexMaskMemory &memory) const
|
||||
{
|
||||
if (size == 0) {
|
||||
return {};
|
||||
}
|
||||
if (this->to_range().has_value()) {
|
||||
return IndexMask(size);
|
||||
if (std::optional<IndexRange> range = this->to_range()) {
|
||||
return range->slice(start, size).shift(offset);
|
||||
}
|
||||
const IndexMask sliced_mask = this->slice(start, size);
|
||||
const int64_t offset = sliced_mask.first();
|
||||
if (offset == 0) {
|
||||
return sliced_mask;
|
||||
}
|
||||
if (sliced_mask.to_range().has_value()) {
|
||||
return IndexMask(size);
|
||||
if (std::optional<IndexRange> range = sliced_mask.to_range()) {
|
||||
return range->shift(offset);
|
||||
}
|
||||
const int64_t range_size = sliced_mask.last() - sliced_mask.first() + 1;
|
||||
BitVector bits(range_size);
|
||||
sliced_mask.to_bits(bits, offset);
|
||||
return IndexMask::from_bits(bits, memory);
|
||||
if (sliced_mask.last() < INT32_MAX && sliced_mask.last() + offset < INT32_MAX) {
|
||||
return offset_indices_in_mask<int32_t>(sliced_mask, offset, memory);
|
||||
}
|
||||
return offset_indices_in_mask<int64_t>(sliced_mask, offset, memory);
|
||||
}
|
||||
Hans Goudey
commented
Maybe not worth it (also just want to test my understanding)-- couldn't this limit the maximum size of the span argument to Maybe not worth it (also just want to test my understanding)-- couldn't this limit the maximum size of the span argument to `find_predicate_begin` with something like `find_predicate_begin(segment_indices.take_front(max_segment_size + offset),...`?
Jacques Lucke
commented
Not sure why the Not sure why the `+ offset`, but taking at most `max_segment_size` makes sense.
In practice it likely doesn't make a difference right now, because the span passed to `segments_from_indices` is already sliced (for multi-threading).
|
||||
|
||||
IndexMask IndexMask::complement(const IndexRange universe, IndexMaskMemory &memory) const
|
||||
|
@ -417,15 +434,18 @@ IndexMask IndexMask::from_indices(const Span<T> indices, IndexMaskMemory &memory
|
|||
return mask;
|
||||
}
|
||||
|
||||
IndexMask IndexMask::from_bits(const BitSpan bits, IndexMaskMemory &memory, const int64_t offset)
|
||||
IndexMask IndexMask::from_bits(const BitSpan bits, IndexMaskMemory &memory)
|
||||
{
|
||||
Vector<int64_t> indices;
|
||||
for (const int64_t i : bits.index_range()) {
|
||||
if (bits[i]) {
|
||||
indices.append(i + offset);
|
||||
}
|
||||
}
|
||||
return IndexMask::from_indices<int64_t>(indices, memory);
|
||||
return IndexMask::from_bits(bits.index_range(), bits, memory);
|
||||
}
|
||||
|
||||
IndexMask IndexMask::from_bits(const IndexMask &universe,
|
||||
const BitSpan bits,
|
||||
IndexMaskMemory &memory)
|
||||
{
|
||||
return IndexMask::from_predicate(universe, GrainSize(1024), memory, [bits](const int64_t index) {
|
||||
return bits[index].test();
|
||||
});
|
||||
}
|
||||
|
||||
IndexMask IndexMask::from_bools(Span<bool> bools, IndexMaskMemory &memory)
|
||||
|
@ -443,7 +463,7 @@ IndexMask IndexMask::from_bools(const IndexMask &universe,
|
|||
IndexMaskMemory &memory)
|
||||
{
|
||||
return IndexMask::from_predicate(
|
||||
universe, GrainSize(1024), memory, [&](const int64_t index) { return bools[index]; });
|
||||
universe, GrainSize(1024), memory, [bools](const int64_t index) { return bools[index]; });
|
||||
}
|
||||
|
||||
IndexMask IndexMask::from_bools(const IndexMask &universe,
|
||||
|
@ -831,28 +851,28 @@ template<typename T> void IndexMask::to_indices(MutableSpan<T> r_indices) const
|
|||
[&](const int64_t i, const int64_t mask_i) mutable { r_indices[mask_i] = T(i); });
|
||||
}
|
||||
|
||||
void IndexMask::to_bits(MutableBitSpan r_bits, int64_t offset) const
|
||||
void IndexMask::to_bits(MutableBitSpan r_bits) const
|
||||
{
|
||||
BLI_assert(r_bits.size() >= this->min_array_size() - offset);
|
||||
BLI_assert(r_bits.size() >= this->min_array_size());
|
||||
r_bits.reset_all();
|
||||
this->foreach_span_or_range([&](const auto mask_segment) {
|
||||
if constexpr (std::is_same_v<std::decay_t<decltype(mask_segment)>, IndexRange>) {
|
||||
const IndexRange range = mask_segment.shift(-offset);
|
||||
const IndexRange range = mask_segment;
|
||||
r_bits.slice(range).set_all();
|
||||
}
|
||||
else {
|
||||
for (const int64_t i : mask_segment) {
|
||||
r_bits[i - offset].set();
|
||||
r_bits[i].set();
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void IndexMask::to_bools(MutableSpan<bool> r_bools, int64_t offset) const
|
||||
void IndexMask::to_bools(MutableSpan<bool> r_bools) const
|
||||
{
|
||||
BLI_assert(r_bools.size() >= this->min_array_size() - offset);
|
||||
BLI_assert(r_bools.size() >= this->min_array_size());
|
||||
r_bools.fill(false);
|
||||
this->foreach_index_optimized([&](const int64_t i) { r_bools[i - offset] = true; });
|
||||
this->foreach_index_optimized(GrainSize(2048), [&](const int64_t i) { r_bools[i] = true; });
|
||||
}
|
||||
|
||||
Vector<IndexRange> IndexMask::to_ranges() const
|
||||
|
|
|
@ -97,18 +97,14 @@ TEST(index_mask, FromBits)
|
|||
IndexMaskMemory memory;
|
||||
const uint64_t bits =
|
||||
0b0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'1111'0010'0000;
|
||||
const IndexMask mask = IndexMask::from_bits(BitSpan(&bits, IndexRange(2, 40)), memory, 100);
|
||||
const IndexMask mask = IndexMask::from_bits(BitSpan(&bits, IndexRange(2, 40)), memory);
|
||||
Array<int> indices(5);
|
||||
mask.to_indices<int>(indices);
|
||||
EXPECT_EQ(indices[0], 103);
|
||||
EXPECT_EQ(indices[1], 106);
|
||||
EXPECT_EQ(indices[2], 107);
|
||||
EXPECT_EQ(indices[3], 108);
|
||||
EXPECT_EQ(indices[4], 109);
|
||||
|
||||
uint64_t new_bits = 0;
|
||||
mask.to_bits(MutableBitSpan(&new_bits, IndexRange(5, 40)), 100);
|
||||
EXPECT_EQ(new_bits, bits << 3);
|
||||
EXPECT_EQ(indices[0], 3);
|
||||
EXPECT_EQ(indices[1], 6);
|
||||
EXPECT_EQ(indices[2], 7);
|
||||
EXPECT_EQ(indices[3], 8);
|
||||
EXPECT_EQ(indices[4], 9);
|
||||
}
|
||||
|
||||
TEST(index_mask, FromSize)
|
||||
|
|
|
@ -149,7 +149,8 @@ void MultiFunction::call_auto(const IndexMask &mask, Params params, Context cont
|
|||
const IndexRange input_slice_range{input_slice_start, input_slice_size};
|
||||
|
||||
IndexMaskMemory memory;
|
||||
const IndexMask offset_mask = mask.slice_and_offset(sub_range, memory);
|
||||
const int64_t offset = -input_slice_start;
|
||||
const IndexMask offset_mask = mask.slice_and_offset(sub_range, offset, memory);
|
||||
|
||||
ParamsBuilder sliced_params{*this, &offset_mask};
|
||||
add_sliced_parameters(*signature_ref_, params, input_slice_range, sliced_params);
|
||||
|
|
Loading…
Reference in New Issue
With the
1/2^14
constant factor, a larger inline buffer here could probably eliminate most allocations. Same below withVector<IndexMaskSegment> segments