From 339cf787c232e60122908b07731734273a75e497 Mon Sep 17 00:00:00 2001 From: Hans Goudey Date: Fri, 26 May 2023 15:21:46 -0400 Subject: [PATCH 01/12] WIP: BLI: Improve IndexMask::complement() performance Instead of reusing `from_predicate` and lookup in the source mask, scan the mask once, inserting segments between the original indices. Theoretically this improves the performance from O(N*log(N)) to O(N). But with the small constant offset of the former, the improvement won't be that nice. TODO: - More performance testing. I didn't see much change in the test code runtime. --- source/blender/blenlib/intern/index_mask.cc | 118 ++++++++++++++++-- .../blenlib/tests/BLI_index_mask_test.cc | 51 ++++++++ 2 files changed, 161 insertions(+), 8 deletions(-) diff --git a/source/blender/blenlib/intern/index_mask.cc b/source/blender/blenlib/intern/index_mask.cc index 37b9823f8c5..2dabae25849 100644 --- a/source/blender/blenlib/intern/index_mask.cc +++ b/source/blender/blenlib/intern/index_mask.cc @@ -150,14 +150,6 @@ IndexMask IndexMask::slice_and_offset(const int64_t start, return sliced_mask; } -IndexMask IndexMask::complement(const IndexRange universe, IndexMaskMemory &memory) const -{ - /* TODO: Implement more efficient solution. */ - return IndexMask::from_predicate(universe, GrainSize(512), memory, [&](const int64_t index) { - return !this->contains(index); - }); -} - /** * Merges consecutive segments in some cases. Having fewer but larger segments generally allows for * better performance when using the mask later on. @@ -330,6 +322,116 @@ struct ParallelSegmentsCollector { } }; +/** + * Convert a range to potentially multiple index mask segments. + */ +static void range_to_segments(const IndexRange range, Vector &segments) +{ + const Span static_indices = get_static_indices_array(); + for (int64_t start = 0; start < range.size(); start += max_segment_size) { + const int64_t size = std::min(max_segment_size, range.size() - start); + segments.append_as(range.start() + start, static_indices.take_front(size)); + } +} + +static void inverted_indices_to_segments(const IndexMaskSegment segment, + const int64_t range_threshold, + LinearAllocator<> &allocator, + Vector &segments) +{ + const int64_t offset = segment.offset(); + const Span static_indices = get_static_indices_array(); + + int64_t inverted_index_count = 0; + std::array inverted_indices_array; + auto add_index = [&](const int16_t index) { + inverted_indices_array[size_t(inverted_index_count)] = index; + inverted_index_count++; + }; + + auto finish_indices = [&]() { + if (inverted_index_count == 0) { + return; + } + MutableSpan offset_indices = allocator.allocate_array(inverted_index_count); + offset_indices.copy_from(Span(inverted_indices_array).take_front(inverted_index_count)); + segments.append_as(offset, offset_indices); + inverted_index_count = 0; + }; + + Span indices = segment.base_span(); + while (indices.size() > 1) { + const int64_t size_before_gap = unique_sorted_indices::find_size_of_next_range(indices); + if (size_before_gap == indices.size()) { + break; + } + + const int16_t gap_first = indices[size_before_gap - 1] + 1; + const int16_t next = indices[size_before_gap]; + const int16_t gap_size = next - gap_first; + if (gap_size > range_threshold) { + finish_indices(); + segments.append_as(offset + gap_first, static_indices.take_front(gap_size)); + } + else { + for (const int64_t i : IndexRange(gap_size)) { + add_index(gap_first + int16_t(i)); + } + } + + indices = indices.drop_front(size_before_gap); + } + + finish_indices(); +} + +IndexMask IndexMask::complement(const IndexRange universe, IndexMaskMemory &memory) const +{ + if (this->is_empty()) { + return universe; + } + + Vector segments; + + if (universe.start() < this->first()) { + range_to_segments(universe.take_front(this->first() - universe.start()), segments); + } + + if (!this->to_range()) { + const int64_t segments_num = this->segments_num(); + ParallelSegmentsCollector segments_collector; + threading::parallel_for( + IndexRange(segments_num).drop_back(1), 512, [&](const IndexRange range) { + ParallelSegmentsCollector::LocalData &local_data = + segments_collector.data_by_thread.local(); + + for (const int64_t segment_i : range) { + const IndexMaskSegment segment = this->segment(segment_i); + inverted_indices_to_segments(segment, 64, local_data.allocator, local_data.segments); + + const IndexMaskSegment next_segment = this->segment(segment_i + 1); + const int64_t between_start = segment.last() + 1; + const int64_t size_between_segments = next_segment[0] - segment.last() - 1; + const IndexRange range_between_segments(between_start, size_between_segments); + if (!range_between_segments.is_empty()) { + range_to_segments(range_between_segments, local_data.segments); + } + } + }); + + inverted_indices_to_segments(this->segment(segments_num - 1), 64, memory, segments); + segments_collector.reduce(memory, segments); + } + + if (universe.last() > this->first()) { + range_to_segments(universe.take_back(universe.last() - this->last()), segments); + } + + IndexMask result = mask_from_segments(segments, memory); + // BLI_assert(result.size() == universe.size() - this->size()); + return result; +} + template IndexMask IndexMask::from_indices(const Span indices, IndexMaskMemory &memory) { diff --git a/source/blender/blenlib/tests/BLI_index_mask_test.cc b/source/blender/blenlib/tests/BLI_index_mask_test.cc index 9c48d5f0257..ce29b7494bd 100644 --- a/source/blender/blenlib/tests/BLI_index_mask_test.cc +++ b/source/blender/blenlib/tests/BLI_index_mask_test.cc @@ -223,4 +223,55 @@ TEST(index_mask, FromPredicateFuzzy) }); } +TEST(index_mask, Complement) +{ + RandomNumberGenerator rng; + + const int64_t mask_size = 100; + const int64_t iter_num = 100; + const int64_t universe_size = 110; + + for (const int64_t iter : IndexRange(iter_num)) { + Set values; + for ([[maybe_unused]] const int64_t _ : IndexRange(iter)) { + values.add(rng.get_int32(mask_size)); + } + IndexMaskMemory memory; + const IndexMask mask = IndexMask::from_predicate( + IndexRange(mask_size), GrainSize(1024), memory, [&](const int64_t i) { + return values.contains(int(i)); + }); + + const IndexMask complement = mask.complement(IndexRange(universe_size), memory); + EXPECT_EQ(universe_size - mask.size(), complement.size()); + complement.foreach_index([&](const int64_t i) { EXPECT_FALSE(mask.contains(i)); }); + } +} + +TEST(index_mask, ComplementLarge) +{ + RandomNumberGenerator rng; + + const int64_t mask_size = 100000; + const int64_t iter_num = 100; + const int64_t universe_size = 110000; + + for (const int64_t iter : IndexRange(100)) { + Set values; + for ([[maybe_unused]] const int64_t _ : IndexRange(iter * mask_size / iter_num)) { + values.add(rng.get_int32(mask_size)); + } + IndexMaskMemory memory; + const IndexMask mask = IndexMask::from_predicate( + IndexRange(mask_size), GrainSize(1024), memory, [&](const int64_t i) { + return values.contains(int(i)); + }); + + const IndexMask complement = mask.complement(IndexRange(universe_size), memory); + complement.foreach_index([&](const int64_t i) { EXPECT_FALSE(mask.contains(i)); }); + mask.foreach_index([&](const int64_t i) { EXPECT_FALSE(complement.contains(i)); }); + EXPECT_EQ(universe_size - mask.size(), complement.size()); + } +} + } // namespace blender::index_mask::tests -- 2.30.2 From da708df74b2578e97e6093dc7ff2fac402b7c490 Mon Sep 17 00:00:00 2001 From: Hans Goudey Date: Fri, 26 May 2023 16:11:54 -0400 Subject: [PATCH 02/12] Cleanup: Simplofy use of modifier eval context flags --- source/blender/blenkernel/intern/DerivedMesh.cc | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/source/blender/blenkernel/intern/DerivedMesh.cc b/source/blender/blenkernel/intern/DerivedMesh.cc index 83874191f84..123f4fb9ff0 100644 --- a/source/blender/blenkernel/intern/DerivedMesh.cc +++ b/source/blender/blenkernel/intern/DerivedMesh.cc @@ -632,12 +632,10 @@ static void mesh_calc_modifiers(struct Depsgraph *depsgraph, const bool sculpt_dyntopo = (sculpt_mode && ob->sculpt->bm) && !use_render; /* Modifier evaluation contexts for different types of modifiers. */ - ModifierApplyFlag apply_render = use_render ? MOD_APPLY_RENDER : (ModifierApplyFlag)0; - ModifierApplyFlag apply_cache = use_cache ? MOD_APPLY_USECACHE : (ModifierApplyFlag)0; - const ModifierEvalContext mectx = { - depsgraph, ob, (ModifierApplyFlag)(apply_render | apply_cache)}; - const ModifierEvalContext mectx_orco = { - depsgraph, ob, (ModifierApplyFlag)(apply_render | MOD_APPLY_ORCO)}; + ModifierApplyFlag apply_render = use_render ? MOD_APPLY_RENDER : ModifierApplyFlag(0); + ModifierApplyFlag apply_cache = use_cache ? MOD_APPLY_USECACHE : ModifierApplyFlag(0); + const ModifierEvalContext mectx = {depsgraph, ob, apply_render | apply_cache}; + const ModifierEvalContext mectx_orco = {depsgraph, ob, apply_render | MOD_APPLY_ORCO}; /* Get effective list of modifiers to execute. Some effects like shape keys * are added as virtual modifiers before the user created modifiers. */ @@ -1166,9 +1164,8 @@ static void editbmesh_calc_modifiers(struct Depsgraph *depsgraph, const bool use_render = (DEG_get_mode(depsgraph) == DAG_EVAL_RENDER); /* Modifier evaluation contexts for different types of modifiers. */ - ModifierApplyFlag apply_render = use_render ? MOD_APPLY_RENDER : (ModifierApplyFlag)0; - const ModifierEvalContext mectx = { - depsgraph, ob, (ModifierApplyFlag)(MOD_APPLY_USECACHE | apply_render)}; + ModifierApplyFlag apply_render = use_render ? MOD_APPLY_RENDER : ModifierApplyFlag(0); + const ModifierEvalContext mectx = {depsgraph, ob, MOD_APPLY_USECACHE | apply_render}; const ModifierEvalContext mectx_orco = {depsgraph, ob, MOD_APPLY_ORCO}; /* Get effective list of modifiers to execute. Some effects like shape keys -- 2.30.2 From 38a061824be570be0b1ca3d7bc6c9084025e9d6e Mon Sep 17 00:00:00 2001 From: Hans Goudey Date: Tue, 30 May 2023 16:37:27 -0400 Subject: [PATCH 03/12] Make performance test --- .../blenlib/tests/BLI_index_mask_test.cc | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/source/blender/blenlib/tests/BLI_index_mask_test.cc b/source/blender/blenlib/tests/BLI_index_mask_test.cc index ce29b7494bd..2e043d5fca6 100644 --- a/source/blender/blenlib/tests/BLI_index_mask_test.cc +++ b/source/blender/blenlib/tests/BLI_index_mask_test.cc @@ -248,7 +248,7 @@ TEST(index_mask, Complement) } } -TEST(index_mask, ComplementLarge) +TEST(index_mask, ComplementPerformance) { RandomNumberGenerator rng; @@ -256,18 +256,24 @@ TEST(index_mask, ComplementLarge) const int64_t iter_num = 100; const int64_t universe_size = 110000; - for (const int64_t iter : IndexRange(100)) { + for (const int64_t iter : IndexRange(iter_num)) { + const float probability = float(iter) / float(iter_num); + Array bools(mask_size); + for (const int64_t i : bools.index_range()) { + bools[i] = rng.get_float() > probability; + } Set values; for ([[maybe_unused]] const int64_t _ : IndexRange(iter * mask_size / iter_num)) { values.add(rng.get_int32(mask_size)); } IndexMaskMemory memory; - const IndexMask mask = IndexMask::from_predicate( - IndexRange(mask_size), GrainSize(1024), memory, [&](const int64_t i) { - return values.contains(int(i)); - }); + const IndexMask mask = IndexMask::from_bools(bools, memory); - const IndexMask complement = mask.complement(IndexRange(universe_size), memory); + IndexMask complement; + { + SCOPED_TIMER("Probability: " + std::to_string(probability)); + complement = mask.complement(IndexRange(universe_size), memory); + } complement.foreach_index([&](const int64_t i) { EXPECT_FALSE(mask.contains(i)); }); mask.foreach_index([&](const int64_t i) { EXPECT_FALSE(complement.contains(i)); }); EXPECT_EQ(universe_size - mask.size(), complement.size()); -- 2.30.2 From f890f8e0c4ad7774cbbf7e2536e6d99fee3d2989 Mon Sep 17 00:00:00 2001 From: Hans Goudey Date: Tue, 30 May 2023 16:37:43 -0400 Subject: [PATCH 04/12] Improve normal test --- source/blender/blenlib/tests/BLI_index_mask_test.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/source/blender/blenlib/tests/BLI_index_mask_test.cc b/source/blender/blenlib/tests/BLI_index_mask_test.cc index 2e043d5fca6..b0383843dc5 100644 --- a/source/blender/blenlib/tests/BLI_index_mask_test.cc +++ b/source/blender/blenlib/tests/BLI_index_mask_test.cc @@ -245,6 +245,7 @@ TEST(index_mask, Complement) const IndexMask complement = mask.complement(IndexRange(universe_size), memory); EXPECT_EQ(universe_size - mask.size(), complement.size()); complement.foreach_index([&](const int64_t i) { EXPECT_FALSE(mask.contains(i)); }); + mask.foreach_index([&](const int64_t i) { EXPECT_FALSE(complement.contains(i)); }); } } -- 2.30.2 From d2b9825f0e9c19f830f25ee2762e72003c5caf11 Mon Sep 17 00:00:00 2001 From: Hans Goudey Date: Tue, 30 May 2023 16:49:46 -0400 Subject: [PATCH 05/12] Make performance test slower --- source/blender/blenlib/tests/BLI_index_mask_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/blender/blenlib/tests/BLI_index_mask_test.cc b/source/blender/blenlib/tests/BLI_index_mask_test.cc index b0383843dc5..4ea50e54ccd 100644 --- a/source/blender/blenlib/tests/BLI_index_mask_test.cc +++ b/source/blender/blenlib/tests/BLI_index_mask_test.cc @@ -253,9 +253,9 @@ TEST(index_mask, ComplementPerformance) { RandomNumberGenerator rng; - const int64_t mask_size = 100000; + const int64_t mask_size = 1000000; const int64_t iter_num = 100; - const int64_t universe_size = 110000; + const int64_t universe_size = 1100000; for (const int64_t iter : IndexRange(iter_num)) { const float probability = float(iter) / float(iter_num); -- 2.30.2 From 83713fef3da78622c3cda2215188a2142a8433d7 Mon Sep 17 00:00:00 2001 From: Hans Goudey Date: Tue, 30 May 2023 16:53:18 -0400 Subject: [PATCH 06/12] Add r_ prefix --- source/blender/blenlib/intern/index_mask.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/source/blender/blenlib/intern/index_mask.cc b/source/blender/blenlib/intern/index_mask.cc index 2dabae25849..4b8eaee8661 100644 --- a/source/blender/blenlib/intern/index_mask.cc +++ b/source/blender/blenlib/intern/index_mask.cc @@ -325,19 +325,19 @@ struct ParallelSegmentsCollector { /** * Convert a range to potentially multiple index mask segments. */ -static void range_to_segments(const IndexRange range, Vector &segments) +static void range_to_segments(const IndexRange range, Vector &r_segments) { const Span static_indices = get_static_indices_array(); for (int64_t start = 0; start < range.size(); start += max_segment_size) { const int64_t size = std::min(max_segment_size, range.size() - start); - segments.append_as(range.start() + start, static_indices.take_front(size)); + r_segments.append_as(range.start() + start, static_indices.take_front(size)); } } static void inverted_indices_to_segments(const IndexMaskSegment segment, const int64_t range_threshold, LinearAllocator<> &allocator, - Vector &segments) + Vector &r_segments) { const int64_t offset = segment.offset(); const Span static_indices = get_static_indices_array(); @@ -355,7 +355,7 @@ static void inverted_indices_to_segments(const IndexMaskSegment segment, } MutableSpan offset_indices = allocator.allocate_array(inverted_index_count); offset_indices.copy_from(Span(inverted_indices_array).take_front(inverted_index_count)); - segments.append_as(offset, offset_indices); + r_segments.append_as(offset, offset_indices); inverted_index_count = 0; }; @@ -371,7 +371,7 @@ static void inverted_indices_to_segments(const IndexMaskSegment segment, const int16_t gap_size = next - gap_first; if (gap_size > range_threshold) { finish_indices(); - segments.append_as(offset + gap_first, static_indices.take_front(gap_size)); + r_segments.append_as(offset + gap_first, static_indices.take_front(gap_size)); } else { for (const int64_t i : IndexRange(gap_size)) { -- 2.30.2 From 9a6ccb377077570416ad3a5910bc6bbd4bf20d14 Mon Sep 17 00:00:00 2001 From: Hans Goudey Date: Tue, 30 May 2023 17:10:51 -0400 Subject: [PATCH 07/12] Add multiple indices at the same time --- source/blender/blenlib/intern/index_mask.cc | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/source/blender/blenlib/intern/index_mask.cc b/source/blender/blenlib/intern/index_mask.cc index 4b8eaee8661..51489154468 100644 --- a/source/blender/blenlib/intern/index_mask.cc +++ b/source/blender/blenlib/intern/index_mask.cc @@ -344,9 +344,10 @@ static void inverted_indices_to_segments(const IndexMaskSegment segment, int64_t inverted_index_count = 0; std::array inverted_indices_array; - auto add_index = [&](const int16_t index) { - inverted_indices_array[size_t(inverted_index_count)] = index; - inverted_index_count++; + auto add_indices = [&](const int16_t start, const int16_t num) { + int16_t *new_indices_begin = inverted_indices_array.data() + inverted_index_count; + std::iota(new_indices_begin, new_indices_begin + num, start); + inverted_index_count += num; }; auto finish_indices = [&]() { @@ -374,9 +375,7 @@ static void inverted_indices_to_segments(const IndexMaskSegment segment, r_segments.append_as(offset + gap_first, static_indices.take_front(gap_size)); } else { - for (const int64_t i : IndexRange(gap_size)) { - add_index(gap_first + int16_t(i)); - } + add_indices(gap_first, gap_size); } indices = indices.drop_front(size_before_gap); -- 2.30.2 From c2b75d0e90b617632099206ae6a4c30fd368326b Mon Sep 17 00:00:00 2001 From: Hans Goudey Date: Tue, 30 May 2023 17:28:20 -0400 Subject: [PATCH 08/12] Use a dynamic grain size --- source/blender/blenlib/intern/index_mask.cc | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/source/blender/blenlib/intern/index_mask.cc b/source/blender/blenlib/intern/index_mask.cc index 51489154468..7d6f57cb300 100644 --- a/source/blender/blenlib/intern/index_mask.cc +++ b/source/blender/blenlib/intern/index_mask.cc @@ -398,9 +398,16 @@ IndexMask IndexMask::complement(const IndexRange universe, IndexMaskMemory &memo if (!this->to_range()) { const int64_t segments_num = this->segments_num(); + + constexpr int64_t min_grain_size = 16; + constexpr int64_t max_grain_size = 4096; + const int64_t threads_num = BLI_system_thread_count(); + const int64_t grain_size = std::clamp( + segments_num / threads_num, min_grain_size, max_grain_size); + ParallelSegmentsCollector segments_collector; threading::parallel_for( - IndexRange(segments_num).drop_back(1), 512, [&](const IndexRange range) { + IndexRange(segments_num).drop_back(1), grain_size, [&](const IndexRange range) { ParallelSegmentsCollector::LocalData &local_data = segments_collector.data_by_thread.local(); -- 2.30.2 From edbe13209ead9051c813c9a2713fa00dc3fbf3a0 Mon Sep 17 00:00:00 2001 From: Hans Goudey Date: Tue, 30 May 2023 18:47:49 -0400 Subject: [PATCH 09/12] Add a separate code path for small masks --- source/blender/blenlib/intern/index_mask.cc | 57 ++++++++++++--------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/source/blender/blenlib/intern/index_mask.cc b/source/blender/blenlib/intern/index_mask.cc index 7d6f57cb300..234b0ba7b16 100644 --- a/source/blender/blenlib/intern/index_mask.cc +++ b/source/blender/blenlib/intern/index_mask.cc @@ -335,10 +335,10 @@ static void range_to_segments(const IndexRange range, Vector &allocator, Vector &r_segments) { + constexpr int64_t range_threshold = 64; const int64_t offset = segment.offset(); const Span static_indices = get_static_indices_array(); @@ -384,6 +384,25 @@ static void inverted_indices_to_segments(const IndexMaskSegment segment, finish_indices(); } +static void invert_segments(const IndexMask &mask, + const IndexRange segment_range, + LinearAllocator<> &allocator, + Vector &r_segments) +{ + for (const int64_t segment_i : segment_range) { + const IndexMaskSegment segment = mask.segment(segment_i); + inverted_indices_to_segments(segment, allocator, r_segments); + + const IndexMaskSegment next_segment = mask.segment(segment_i + 1); + const int64_t between_start = segment.last() + 1; + const int64_t size_between_segments = next_segment[0] - segment.last() - 1; + const IndexRange range_between_segments(between_start, size_between_segments); + if (!range_between_segments.is_empty()) { + range_to_segments(range_between_segments, r_segments); + } + } +} + IndexMask IndexMask::complement(const IndexRange universe, IndexMaskMemory &memory) const { if (this->is_empty()) { @@ -405,28 +424,20 @@ IndexMask IndexMask::complement(const IndexRange universe, IndexMaskMemory &memo const int64_t grain_size = std::clamp( segments_num / threads_num, min_grain_size, max_grain_size); - ParallelSegmentsCollector segments_collector; - threading::parallel_for( - IndexRange(segments_num).drop_back(1), grain_size, [&](const IndexRange range) { - ParallelSegmentsCollector::LocalData &local_data = - segments_collector.data_by_thread.local(); - - for (const int64_t segment_i : range) { - const IndexMaskSegment segment = this->segment(segment_i); - inverted_indices_to_segments(segment, 64, local_data.allocator, local_data.segments); - - const IndexMaskSegment next_segment = this->segment(segment_i + 1); - const int64_t between_start = segment.last() + 1; - const int64_t size_between_segments = next_segment[0] - segment.last() - 1; - const IndexRange range_between_segments(between_start, size_between_segments); - if (!range_between_segments.is_empty()) { - range_to_segments(range_between_segments, local_data.segments); - } - } - }); - - inverted_indices_to_segments(this->segment(segments_num - 1), 64, memory, segments); - segments_collector.reduce(memory, segments); + const IndexRange non_last_segments = IndexRange(segments_num).drop_back(1); + if (segments_num < min_grain_size) { + invert_segments(*this, non_last_segments, memory, segments); + } + else { + ParallelSegmentsCollector segments_collector; + threading::parallel_for(non_last_segments, grain_size, [&](const IndexRange range) { + ParallelSegmentsCollector::LocalData &local_data = + segments_collector.data_by_thread.local(); + invert_segments(*this, range, local_data.allocator, local_data.segments); + }); + segments_collector.reduce(memory, segments); + } + inverted_indices_to_segments(this->segment(segments_num - 1), memory, segments); } if (universe.last() > this->first()) { -- 2.30.2 From 64d31f1b6527794bee65eeff058475043402e5d0 Mon Sep 17 00:00:00 2001 From: Hans Goudey Date: Tue, 30 May 2023 19:18:59 -0400 Subject: [PATCH 10/12] Slightly specialize get_size_before_gap --- source/blender/blenlib/intern/index_mask.cc | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/source/blender/blenlib/intern/index_mask.cc b/source/blender/blenlib/intern/index_mask.cc index 234b0ba7b16..66b63382d1a 100644 --- a/source/blender/blenlib/intern/index_mask.cc +++ b/source/blender/blenlib/intern/index_mask.cc @@ -334,6 +334,17 @@ static void range_to_segments(const IndexRange range, Vector indices) +{ + BLI_assert(indices.size() >= 2); + if (indices[1] > indices[0] + 1) { + /* For sparse indices, often the next gap is just after the next index. + * In this case we can skip the logarithmic check below.*/ + return 1; + } + return unique_sorted_indices::find_size_of_next_range(indices); +} + static void inverted_indices_to_segments(const IndexMaskSegment segment, LinearAllocator<> &allocator, Vector &r_segments) @@ -362,7 +373,7 @@ static void inverted_indices_to_segments(const IndexMaskSegment segment, Span indices = segment.base_span(); while (indices.size() > 1) { - const int64_t size_before_gap = unique_sorted_indices::find_size_of_next_range(indices); + const int64_t size_before_gap = get_size_before_gap(indices); if (size_before_gap == indices.size()) { break; } -- 2.30.2 From 7f1433764cafaf86c801885011d78cb1b212a49f Mon Sep 17 00:00:00 2001 From: Hans Goudey Date: Wed, 31 May 2023 11:01:39 -0400 Subject: [PATCH 11/12] Add more O(1) checks from Jacques --- source/blender/blenlib/intern/index_mask.cc | 32 ++++++++++++++++++--- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/source/blender/blenlib/intern/index_mask.cc b/source/blender/blenlib/intern/index_mask.cc index 66b63382d1a..b6280879278 100644 --- a/source/blender/blenlib/intern/index_mask.cc +++ b/source/blender/blenlib/intern/index_mask.cc @@ -419,6 +419,32 @@ IndexMask IndexMask::complement(const IndexRange universe, IndexMaskMemory &memo if (this->is_empty()) { return universe; } + if (universe.is_empty()) { + return {}; + } + const std::optional this_range = this->to_range(); + if (this_range) { + const bool first_in_range = this_range->first() <= universe.first(); + const bool last_in_range = this_range->last() >= universe.last(); + if (first_in_range && last_in_range) { + /* This mask fills the entire universe, so the complement is empty. */ + return {}; + } + if (first_in_range) { + /* This mask is a range that contains the start of the universe. + * The complement is a range that contains the end of the universe. */ + const int64_t complement_start = this_range->one_after_last(); + const int64_t complement_size = universe.one_after_last() - complement_start; + return IndexRange(complement_start, complement_size); + } + if (last_in_range) { + /* This mask is a range that contains the end of the universe. + * The complement is a range that contains the start of the universe. */ + const int64_t complement_start = universe.first(); + const int64_t complement_size = this_range->first() - complement_start; + return IndexRange(complement_start, complement_size); + } + } Vector segments; @@ -426,7 +452,7 @@ IndexMask IndexMask::complement(const IndexRange universe, IndexMaskMemory &memo range_to_segments(universe.take_front(this->first() - universe.start()), segments); } - if (!this->to_range()) { + if (!this_range) { const int64_t segments_num = this->segments_num(); constexpr int64_t min_grain_size = 16; @@ -455,9 +481,7 @@ IndexMask IndexMask::complement(const IndexRange universe, IndexMaskMemory &memo range_to_segments(universe.take_back(universe.last() - this->last()), segments); } - IndexMask result = mask_from_segments(segments, memory); - // BLI_assert(result.size() == universe.size() - this->size()); - return result; + return mask_from_segments(segments, memory); } template -- 2.30.2 From 19428c4b6f9cc65267f6c3926b4b4d115263ec09 Mon Sep 17 00:00:00 2001 From: Hans Goudey Date: Wed, 31 May 2023 11:04:12 -0400 Subject: [PATCH 12/12] Add some special case tests, remove performance test --- .../blenlib/tests/BLI_index_mask_test.cc | 65 ++++++++++--------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/source/blender/blenlib/tests/BLI_index_mask_test.cc b/source/blender/blenlib/tests/BLI_index_mask_test.cc index 4ea50e54ccd..b14ec2083b4 100644 --- a/source/blender/blenlib/tests/BLI_index_mask_test.cc +++ b/source/blender/blenlib/tests/BLI_index_mask_test.cc @@ -224,6 +224,39 @@ TEST(index_mask, FromPredicateFuzzy) } TEST(index_mask, Complement) +{ + IndexMaskMemory memory; + { + const IndexMask mask(0); + const IndexMask complement = mask.complement(IndexRange(100), memory); + EXPECT_EQ(100 - mask.size(), complement.size()); + complement.foreach_index([&](const int64_t i) { EXPECT_FALSE(mask.contains(i)); }); + mask.foreach_index([&](const int64_t i) { EXPECT_FALSE(complement.contains(i)); }); + } + { + const IndexMask mask(10000); + const IndexMask complement = mask.complement(IndexRange(10000), memory); + EXPECT_EQ(10000 - mask.size(), complement.size()); + complement.foreach_index([&](const int64_t i) { EXPECT_FALSE(mask.contains(i)); }); + mask.foreach_index([&](const int64_t i) { EXPECT_FALSE(complement.contains(i)); }); + } + { + const IndexMask mask(IndexRange(100, 900)); + const IndexMask complement = mask.complement(IndexRange(1000), memory); + EXPECT_EQ(1000 - mask.size(), complement.size()); + complement.foreach_index([&](const int64_t i) { EXPECT_FALSE(mask.contains(i)); }); + mask.foreach_index([&](const int64_t i) { EXPECT_FALSE(complement.contains(i)); }); + } + { + const IndexMask mask(IndexRange(0, 900)); + const IndexMask complement = mask.complement(IndexRange(1000), memory); + EXPECT_EQ(1000 - mask.size(), complement.size()); + complement.foreach_index([&](const int64_t i) { EXPECT_FALSE(mask.contains(i)); }); + mask.foreach_index([&](const int64_t i) { EXPECT_FALSE(complement.contains(i)); }); + } +} + +TEST(index_mask, ComplementFuzzy) { RandomNumberGenerator rng; @@ -249,36 +282,4 @@ TEST(index_mask, Complement) } } -TEST(index_mask, ComplementPerformance) -{ - RandomNumberGenerator rng; - - const int64_t mask_size = 1000000; - const int64_t iter_num = 100; - const int64_t universe_size = 1100000; - - for (const int64_t iter : IndexRange(iter_num)) { - const float probability = float(iter) / float(iter_num); - Array bools(mask_size); - for (const int64_t i : bools.index_range()) { - bools[i] = rng.get_float() > probability; - } - Set values; - for ([[maybe_unused]] const int64_t _ : IndexRange(iter * mask_size / iter_num)) { - values.add(rng.get_int32(mask_size)); - } - IndexMaskMemory memory; - const IndexMask mask = IndexMask::from_bools(bools, memory); - - IndexMask complement; - { - SCOPED_TIMER("Probability: " + std::to_string(probability)); - complement = mask.complement(IndexRange(universe_size), memory); - } - complement.foreach_index([&](const int64_t i) { EXPECT_FALSE(mask.contains(i)); }); - mask.foreach_index([&](const int64_t i) { EXPECT_FALSE(complement.contains(i)); }); - EXPECT_EQ(universe_size - mask.size(), complement.size()); - } -} - } // namespace blender::index_mask::tests -- 2.30.2