BLI: refactor IndexMask for better performance and memory usage #104629
|
@ -167,8 +167,13 @@ class IndexMask {
|
||||||
|
|
||||||
IndexMask slice(IndexRange range) const;
|
IndexMask slice(IndexRange range) const;
|
||||||
IndexMask slice(int64_t start, int64_t size) const;
|
IndexMask slice(int64_t start, int64_t size) const;
|
||||||
IndexMask slice_and_offset(IndexRange range, IndexMaskMemory &memory) const;
|
IndexMask slice_and_offset(IndexRange range,
|
||||||
IndexMask slice_and_offset(int64_t start, int64_t size, IndexMaskMemory &memory) const;
|
const int64_t offset,
|
||||||
|
IndexMaskMemory &memory) const;
|
||||||
|
IndexMask slice_and_offset(int64_t start,
|
||||||
|
int64_t size,
|
||||||
|
const int64_t offset,
|
||||||
|
IndexMaskMemory &memory) const;
|
||||||
IndexMask complement(const IndexRange universe, IndexMaskMemory &memory) const;
|
IndexMask complement(const IndexRange universe, IndexMaskMemory &memory) const;
|
||||||
|
|
||||||
ChunkSlice chunk(const int64_t chunk_i) const;
|
ChunkSlice chunk(const int64_t chunk_i) const;
|
||||||
|
@ -188,7 +193,8 @@ class IndexMask {
|
||||||
template<typename Fn> void foreach_index_optimized(GrainSize grain_size, Fn &&fn) const;
|
template<typename Fn> void foreach_index_optimized(GrainSize grain_size, Fn &&fn) const;
|
||||||
|
|
||||||
template<typename T> static IndexMask from_indices(Span<T> indices, IndexMaskMemory &memory);
|
template<typename T> static IndexMask from_indices(Span<T> indices, IndexMaskMemory &memory);
|
||||||
static IndexMask from_bits(BitSpan bits, IndexMaskMemory &memory, int64_t offset = 0);
|
static IndexMask from_bits(BitSpan bits, IndexMaskMemory &memory);
|
||||||
|
static IndexMask from_bits(const IndexMask &universe, BitSpan bits, IndexMaskMemory &memory);
|
||||||
static IndexMask from_bools(Span<bool> bools, IndexMaskMemory &memory);
|
static IndexMask from_bools(Span<bool> bools, IndexMaskMemory &memory);
|
||||||
static IndexMask from_bools(const VArray<bool> &bools, IndexMaskMemory &memory);
|
static IndexMask from_bools(const VArray<bool> &bools, IndexMaskMemory &memory);
|
||||||
static IndexMask from_bools(const IndexMask &universe,
|
static IndexMask from_bools(const IndexMask &universe,
|
||||||
|
@ -215,8 +221,8 @@ class IndexMask {
|
||||||
MutableSpan<IndexMask> r_masks);
|
MutableSpan<IndexMask> r_masks);
|
||||||
|
|
||||||
template<typename T> void to_indices(MutableSpan<T> r_indices) const;
|
template<typename T> void to_indices(MutableSpan<T> r_indices) const;
|
||||||
void to_bits(MutableBitSpan r_bits, int64_t offset = 0) const;
|
void to_bits(MutableBitSpan r_bits) const;
|
||||||
void to_bools(MutableSpan<bool> r_bools, int64_t offset = 0) const;
|
void to_bools(MutableSpan<bool> r_bools) const;
|
||||||
std::optional<IndexRange> to_range() const;
|
std::optional<IndexRange> to_range() const;
|
||||||
Vector<IndexRange> to_ranges() const;
|
Vector<IndexRange> to_ranges() const;
|
||||||
Vector<IndexRange> to_ranges_invert(IndexRange universe) const;
|
Vector<IndexRange> to_ranges_invert(IndexRange universe) const;
|
||||||
|
|
|
@ -233,33 +233,50 @@ void IndexMask::foreach_span_impl(const FunctionRef<void(OffsetSpan<int64_t, int
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
IndexMask IndexMask::slice_and_offset(const IndexRange range, IndexMaskMemory &memory) const
|
IndexMask IndexMask::slice_and_offset(const IndexRange range,
|
||||||
|
const int64_t offset,
|
||||||
|
IndexMaskMemory &memory) const
|
||||||
{
|
{
|
||||||
return this->slice_and_offset(range.start(), range.size(), memory);
|
return this->slice_and_offset(range.start(), range.size(), offset, memory);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
static IndexMask offset_indices_in_mask(const IndexMask &mask,
|
||||||
|
const int64_t offset,
|
||||||
|
IndexMaskMemory &memory)
|
||||||
|
{
|
||||||
|
Vector<T, chunk_capacity> indices(mask.size());
|
||||||
|
mask.to_indices<T>(indices);
|
||||||
|
threading::parallel_for(indices.index_range(), 2048, [&](const IndexRange range) {
|
||||||
|
for (T &index : indices.as_mutable_span().slice(range)) {
|
||||||
|
index += T(offset);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return IndexMask::from_indices<T>(indices, memory);
|
||||||
}
|
}
|
||||||
|
|
||||||
IndexMask IndexMask::slice_and_offset(const int64_t start,
|
IndexMask IndexMask::slice_and_offset(const int64_t start,
|
||||||
const int64_t size,
|
const int64_t size,
|
||||||
|
const int64_t offset,
|
||||||
JacquesLucke marked this conversation as resolved
Outdated
|
|||||||
IndexMaskMemory &memory) const
|
IndexMaskMemory &memory) const
|
||||||
{
|
{
|
||||||
if (size == 0) {
|
if (size == 0) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
if (this->to_range().has_value()) {
|
if (std::optional<IndexRange> range = this->to_range()) {
|
||||||
return IndexMask(size);
|
return range->slice(start, size).shift(offset);
|
||||||
}
|
}
|
||||||
const IndexMask sliced_mask = this->slice(start, size);
|
const IndexMask sliced_mask = this->slice(start, size);
|
||||||
const int64_t offset = sliced_mask.first();
|
|
||||||
if (offset == 0) {
|
if (offset == 0) {
|
||||||
return sliced_mask;
|
return sliced_mask;
|
||||||
}
|
}
|
||||||
if (sliced_mask.to_range().has_value()) {
|
if (std::optional<IndexRange> range = sliced_mask.to_range()) {
|
||||||
return IndexMask(size);
|
return range->shift(offset);
|
||||||
}
|
}
|
||||||
const int64_t range_size = sliced_mask.last() - sliced_mask.first() + 1;
|
if (sliced_mask.last() < INT32_MAX && sliced_mask.last() + offset < INT32_MAX) {
|
||||||
BitVector bits(range_size);
|
return offset_indices_in_mask<int32_t>(sliced_mask, offset, memory);
|
||||||
sliced_mask.to_bits(bits, offset);
|
}
|
||||||
return IndexMask::from_bits(bits, memory);
|
return offset_indices_in_mask<int64_t>(sliced_mask, offset, memory);
|
||||||
}
|
}
|
||||||
Hans Goudey
commented
Maybe not worth it (also just want to test my understanding)-- couldn't this limit the maximum size of the span argument to Maybe not worth it (also just want to test my understanding)-- couldn't this limit the maximum size of the span argument to `find_predicate_begin` with something like `find_predicate_begin(segment_indices.take_front(max_segment_size + offset),...`?
Jacques Lucke
commented
Not sure why the Not sure why the `+ offset`, but taking at most `max_segment_size` makes sense.
In practice it likely doesn't make a difference right now, because the span passed to `segments_from_indices` is already sliced (for multi-threading).
|
|||||||
|
|
||||||
IndexMask IndexMask::complement(const IndexRange universe, IndexMaskMemory &memory) const
|
IndexMask IndexMask::complement(const IndexRange universe, IndexMaskMemory &memory) const
|
||||||
|
@ -417,15 +434,18 @@ IndexMask IndexMask::from_indices(const Span<T> indices, IndexMaskMemory &memory
|
||||||
return mask;
|
return mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
IndexMask IndexMask::from_bits(const BitSpan bits, IndexMaskMemory &memory, const int64_t offset)
|
IndexMask IndexMask::from_bits(const BitSpan bits, IndexMaskMemory &memory)
|
||||||
{
|
{
|
||||||
Vector<int64_t> indices;
|
return IndexMask::from_bits(bits.index_range(), bits, memory);
|
||||||
for (const int64_t i : bits.index_range()) {
|
}
|
||||||
if (bits[i]) {
|
|
||||||
indices.append(i + offset);
|
IndexMask IndexMask::from_bits(const IndexMask &universe,
|
||||||
}
|
const BitSpan bits,
|
||||||
}
|
IndexMaskMemory &memory)
|
||||||
return IndexMask::from_indices<int64_t>(indices, memory);
|
{
|
||||||
|
return IndexMask::from_predicate(universe, GrainSize(1024), memory, [bits](const int64_t index) {
|
||||||
|
return bits[index].test();
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
IndexMask IndexMask::from_bools(Span<bool> bools, IndexMaskMemory &memory)
|
IndexMask IndexMask::from_bools(Span<bool> bools, IndexMaskMemory &memory)
|
||||||
|
@ -443,7 +463,7 @@ IndexMask IndexMask::from_bools(const IndexMask &universe,
|
||||||
IndexMaskMemory &memory)
|
IndexMaskMemory &memory)
|
||||||
{
|
{
|
||||||
return IndexMask::from_predicate(
|
return IndexMask::from_predicate(
|
||||||
universe, GrainSize(1024), memory, [&](const int64_t index) { return bools[index]; });
|
universe, GrainSize(1024), memory, [bools](const int64_t index) { return bools[index]; });
|
||||||
}
|
}
|
||||||
|
|
||||||
IndexMask IndexMask::from_bools(const IndexMask &universe,
|
IndexMask IndexMask::from_bools(const IndexMask &universe,
|
||||||
|
@ -831,28 +851,28 @@ template<typename T> void IndexMask::to_indices(MutableSpan<T> r_indices) const
|
||||||
[&](const int64_t i, const int64_t mask_i) mutable { r_indices[mask_i] = T(i); });
|
[&](const int64_t i, const int64_t mask_i) mutable { r_indices[mask_i] = T(i); });
|
||||||
}
|
}
|
||||||
|
|
||||||
void IndexMask::to_bits(MutableBitSpan r_bits, int64_t offset) const
|
void IndexMask::to_bits(MutableBitSpan r_bits) const
|
||||||
{
|
{
|
||||||
BLI_assert(r_bits.size() >= this->min_array_size() - offset);
|
BLI_assert(r_bits.size() >= this->min_array_size());
|
||||||
r_bits.reset_all();
|
r_bits.reset_all();
|
||||||
this->foreach_span_or_range([&](const auto mask_segment) {
|
this->foreach_span_or_range([&](const auto mask_segment) {
|
||||||
if constexpr (std::is_same_v<std::decay_t<decltype(mask_segment)>, IndexRange>) {
|
if constexpr (std::is_same_v<std::decay_t<decltype(mask_segment)>, IndexRange>) {
|
||||||
const IndexRange range = mask_segment.shift(-offset);
|
const IndexRange range = mask_segment;
|
||||||
r_bits.slice(range).set_all();
|
r_bits.slice(range).set_all();
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
for (const int64_t i : mask_segment) {
|
for (const int64_t i : mask_segment) {
|
||||||
r_bits[i - offset].set();
|
r_bits[i].set();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void IndexMask::to_bools(MutableSpan<bool> r_bools, int64_t offset) const
|
void IndexMask::to_bools(MutableSpan<bool> r_bools) const
|
||||||
{
|
{
|
||||||
BLI_assert(r_bools.size() >= this->min_array_size() - offset);
|
BLI_assert(r_bools.size() >= this->min_array_size());
|
||||||
r_bools.fill(false);
|
r_bools.fill(false);
|
||||||
this->foreach_index_optimized([&](const int64_t i) { r_bools[i - offset] = true; });
|
this->foreach_index_optimized(GrainSize(2048), [&](const int64_t i) { r_bools[i] = true; });
|
||||||
}
|
}
|
||||||
|
|
||||||
Vector<IndexRange> IndexMask::to_ranges() const
|
Vector<IndexRange> IndexMask::to_ranges() const
|
||||||
|
|
|
@ -97,18 +97,14 @@ TEST(index_mask, FromBits)
|
||||||
IndexMaskMemory memory;
|
IndexMaskMemory memory;
|
||||||
const uint64_t bits =
|
const uint64_t bits =
|
||||||
0b0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'1111'0010'0000;
|
0b0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'1111'0010'0000;
|
||||||
const IndexMask mask = IndexMask::from_bits(BitSpan(&bits, IndexRange(2, 40)), memory, 100);
|
const IndexMask mask = IndexMask::from_bits(BitSpan(&bits, IndexRange(2, 40)), memory);
|
||||||
Array<int> indices(5);
|
Array<int> indices(5);
|
||||||
mask.to_indices<int>(indices);
|
mask.to_indices<int>(indices);
|
||||||
EXPECT_EQ(indices[0], 103);
|
EXPECT_EQ(indices[0], 3);
|
||||||
EXPECT_EQ(indices[1], 106);
|
EXPECT_EQ(indices[1], 6);
|
||||||
EXPECT_EQ(indices[2], 107);
|
EXPECT_EQ(indices[2], 7);
|
||||||
EXPECT_EQ(indices[3], 108);
|
EXPECT_EQ(indices[3], 8);
|
||||||
EXPECT_EQ(indices[4], 109);
|
EXPECT_EQ(indices[4], 9);
|
||||||
|
|
||||||
uint64_t new_bits = 0;
|
|
||||||
mask.to_bits(MutableBitSpan(&new_bits, IndexRange(5, 40)), 100);
|
|
||||||
EXPECT_EQ(new_bits, bits << 3);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(index_mask, FromSize)
|
TEST(index_mask, FromSize)
|
||||||
|
|
|
@ -149,7 +149,8 @@ void MultiFunction::call_auto(const IndexMask &mask, Params params, Context cont
|
||||||
const IndexRange input_slice_range{input_slice_start, input_slice_size};
|
const IndexRange input_slice_range{input_slice_start, input_slice_size};
|
||||||
|
|
||||||
IndexMaskMemory memory;
|
IndexMaskMemory memory;
|
||||||
const IndexMask offset_mask = mask.slice_and_offset(sub_range, memory);
|
const int64_t offset = -input_slice_start;
|
||||||
|
const IndexMask offset_mask = mask.slice_and_offset(sub_range, offset, memory);
|
||||||
|
|
||||||
ParamsBuilder sliced_params{*this, &offset_mask};
|
ParamsBuilder sliced_params{*this, &offset_mask};
|
||||||
add_sliced_parameters(*signature_ref_, params, input_slice_range, sliced_params);
|
add_sliced_parameters(*signature_ref_, params, input_slice_range, sliced_params);
|
||||||
|
|
Loading…
Reference in New Issue
With the
1/2^14
constant factor, a larger inline buffer here could probably eliminate most allocations. Same below withVector<IndexMaskSegment> segments