Use hash for Collection.gobject lookup, speedup object linking #104553

Closed
Campbell Barton wants to merge 5 commits from ideasman42:pr-collection-object-hash into main

When changing the target branch, be careful to rebase the branch in your fork to match. See documentation.
2 changed files with 147 additions and 5 deletions
Showing only changes of commit a127d2ecaa - Show all commits

View File

@ -50,6 +50,11 @@
static CLG_LogRef LOG = {"bke.collection"};
/**
* Extra asserts that #Collection.gobject_hash is valid which are too slow even for debug mode.
*/
// #define USE_DEBUG_EXTRA_GOBJECT_ASSERT
/* -------------------------------------------------------------------- */
/** \name Prototypes
* \{ */
@ -72,6 +77,19 @@ static CollectionParent *collection_find_parent(Collection *child, Collection *c
static bool collection_find_child_recursive(const Collection *parent,
const Collection *collection);
static void collection_gobject_hash_ensure(Collection *collection);
static void collection_gobject_hash_remove_object(Collection *collection, const Object *ob);
static void collection_gobject_hash_replace_object_pair(Collection *collection,
Object *ob_old,
Object *ob_new,
CollectionObject *cob);
/** Does nothing unless #USE_DEBUG_EXTRA_GOBJECT_ASSERT is defined. */
static void collection_gobject_hash_assert_internal_consistency(Collection *collection);
#define BLI_ASSERT_COLLECION_GOBJECT_HASH_IS_VALID(collection) \
collection_gobject_hash_assert_internal_consistency(collection)
/** \} */
/* -------------------------------------------------------------------- */
@ -119,6 +137,7 @@ static void collection_copy_data(Main *bmain, ID *id_dst, const ID *id_src, cons
BLI_listbase_clear(&collection_dst->gobject);
BLI_listbase_clear(&collection_dst->children);
BLI_listbase_clear(&collection_dst->runtime.parents);
collection_dst->runtime.gobject_hash = NULL;
LISTBASE_FOREACH (CollectionChild *, child, &collection_src->children) {
collection_child_add(collection_dst, child->collection, flag, false);
@ -136,6 +155,11 @@ static void collection_free_data(ID *id)
BKE_previewimg_free(&collection->preview);
BLI_freelistN(&collection->gobject);
if (collection->runtime.gobject_hash) {
BLI_ghash_free(collection->runtime.gobject_hash, NULL, NULL);
collection->runtime.gobject_hash = NULL;
}
BLI_freelistN(&collection->children);
BLI_freelistN(&collection->runtime.parents);
@ -150,7 +174,13 @@ static void collection_foreach_id(ID *id, LibraryForeachIDData *data)
data, collection->runtime.owner_id, IDWALK_CB_LOOPBACK | IDWALK_CB_NEVER_SELF);
LISTBASE_FOREACH (CollectionObject *, cob, &collection->gobject) {
Object *cob_ob_old = cob->ob;
BKE_LIB_FOREACHID_PROCESS_IDSUPER(data, cob->ob, IDWALK_CB_USER);
ideasman42 marked this conversation as resolved
Review

I would move the checks into the collection_gobject_hash_update_object. At leats the cob_ob_old != cob->ob.

I would move the checks into the `collection_gobject_hash_update_object`. At leats the `cob_ob_old != cob->ob`.
if (collection->runtime.gobject_hash && (cob_ob_old != cob->ob)) {
collection_gobject_hash_replace_object_pair(collection, cob_ob_old, cob->ob, cob);
}
}
LISTBASE_FOREACH (CollectionChild *, child, &collection->children) {
BKE_LIB_FOREACHID_PROCESS_IDSUPER(
@ -288,6 +318,7 @@ static void collection_blend_read_data(BlendDataReader *reader, ID *id)
static void lib_link_collection_data(BlendLibReader *reader, Library *lib, Collection *collection)
{
BLI_assert(collection->runtime.gobject_hash == NULL);
LISTBASE_FOREACH_MUTABLE (CollectionObject *, cob, &collection->gobject) {
BLO_read_id_address(reader, lib, &cob->ob);
@ -354,6 +385,7 @@ void BKE_collection_compat_blend_read_expand(struct BlendExpander *expander,
void BKE_collection_blend_read_expand(BlendExpander *expander, Collection *collection)
{
BLI_assert(collection->runtime.gobject_hash == NULL);
LISTBASE_FOREACH (CollectionObject *, cob, &collection->gobject) {
BLO_expand(expander, cob->ob);
}
@ -518,6 +550,15 @@ bool BKE_collection_delete(Main *bmain, Collection *collection, bool hierarchy)
return false;
}
/* This is being deleted, no need to handle each item.
ideasman42 marked this conversation as resolved
Review

Something seems to be missing here. The collection_object_remove ensures that the hash exists (the first thing it does is the collection_gobject_hash_ensure(collection)).

Something seems to be missing here. The `collection_object_remove` ensures that the hash exists (the first thing it does is the `collection_gobject_hash_ensure(collection)`).
Review

That's bad. I've split out the remove function so low level code that already knows the CollectionObject doesn't need to do a lookup or ensure the hash exists.

That's bad. I've split out the remove function so low level code that already knows the `CollectionObject` doesn't need to do a lookup or ensure the hash exists.
* NOTE: While it might seem an advantage to use the hash instead of the list-lookup
* it is in fact slower because the items are removed in-order,
* so the list-lookup succeeds on the first test. */
if (collection->runtime.gobject_hash) {
BLI_ghash_free(collection->runtime.gobject_hash, NULL, NULL);
collection->runtime.gobject_hash = NULL;
}
if (hierarchy) {
/* Remove child objects. */
CollectionObject *cob = collection->gobject.first;
@ -938,7 +979,10 @@ bool BKE_collection_has_object(Collection *collection, const Object *ob)
if (ELEM(NULL, collection, ob)) {
return false;
}
ideasman42 marked this conversation as resolved
Review

Would say this should also use collection_gobject_hash_ensure? While the overhead of initialization may be significant, this is often called in a collection manipulation context, where other calls (like object adding) will ensure the hash anyway... And then code is simpler, call to BLI_ASSERT_COLLECION_GOBJECT_HASH_IS_VALID can be removed, etc.

Would say this should also use `collection_gobject_hash_ensure`? While the overhead of initialization may be significant, this is often called in a collection manipulation context, where other calls (like object adding) will ensure the hash anyway... And then code is simpler, call to `BLI_ASSERT_COLLECION_GOBJECT_HASH_IS_VALID` can be removed, etc.
BLI_ASSERT_COLLECION_GOBJECT_HASH_IS_VALID(collection);
if (collection->runtime.gobject_hash) {
return BLI_ghash_lookup(collection->runtime.gobject_hash, ob);
}
return BLI_findptr(&collection->gobject, ob, offsetof(CollectionObject, ob));
}
@ -1062,6 +1106,8 @@ static Collection *collection_parent_editable_find_recursive(const ViewLayer *vi
static bool collection_object_add(
Main *bmain, Collection *collection, Object *ob, int flag, const bool add_us)
{
ideasman42 marked this conversation as resolved
Review

This call can be removed (see comment in collection_gobject_hash_ensure).

This call can be removed (see comment in `collection_gobject_hash_ensure`).
BLI_ASSERT_COLLECION_GOBJECT_HASH_IS_VALID(collection);
if (ob->instance_collection) {
/* Cyclic dependency check. */
if ((ob->instance_collection == collection) ||
@ -1070,13 +1116,17 @@ static bool collection_object_add(
}
}
CollectionObject *cob = BLI_findptr(&collection->gobject, ob, offsetof(CollectionObject, ob));
collection_gobject_hash_ensure(collection);
ideasman42 marked this conversation as resolved
Review

You can save a lookup here by using BLI_ghash_ensure_p instead... Since you know you'll always end up with a valid cob for the given ob key in that function.

You can save a lookup here by using `BLI_ghash_ensure_p` instead... Since you know you'll always end up with a valid `cob` for the given `ob` key in that function.
BLI_ASSERT_COLLECION_GOBJECT_HASH_IS_VALID(collection);
ideasman42 marked this conversation as resolved
Review

Why do we need BLI_ASSERT_COLLECION_GOBJECT_HASH_IS_VALID after the collection_gobject_hash_ensure? And why is it needed here but not in the other use of the collection_gobject_hash_ensure some lines below?

Why do we need `BLI_ASSERT_COLLECION_GOBJECT_HASH_IS_VALID` after the `collection_gobject_hash_ensure`? And why is it needed here but not in the other use of the `collection_gobject_hash_ensure` some lines below?
Review

I would actually move this check as part of collection_gobject_hash_ensure itself? Would be much less verbose, and make more sense to me?

I would actually move this check as part of `collection_gobject_hash_ensure` itself? Would be much less verbose, and make more sense to me?
Review

Makes sense, moved into collection_gobject_hash_ensure.

Makes sense, moved into `collection_gobject_hash_ensure`.
CollectionObject *cob = BLI_ghash_lookup(collection->runtime.gobject_hash, ob);
BLI_assert(cob == BLI_findptr(&collection->gobject, ob, offsetof(CollectionObject, ob)));
ideasman42 marked this conversation as resolved
Review

These asserts remove almost all benefit of using the mapping for debug builds... and are totally redundant if BLI_ASSERT_COLLECION_GOBJECT_HASH_IS_VALID is actually checking things? Same for the others below.

So not sure we should keep them here.

These asserts remove almost all benefit of using the mapping for debug builds... and are totally redundant if `BLI_ASSERT_COLLECION_GOBJECT_HASH_IS_VALID` is actually checking things? Same for the others below. So not sure we should keep them here.
Review

Agree, at the time of writing I was eager to flag up any issues early, but this adds too much overhead, even for debug builds, removing the BLI_assert.

Agree, at the time of writing I was eager to flag up any issues early, but this adds too much overhead, even for debug builds, removing the `BLI_assert`.
if (cob) {
return false;
}
cob = MEM_callocN(sizeof(CollectionObject), __func__);
cob->ob = ob;
BLI_ghash_insert(collection->runtime.gobject_hash, ob, cob);
BLI_addtail(&collection->gobject, cob);
BKE_collection_object_cache_free(collection);
@ -1100,11 +1150,17 @@ static bool collection_object_remove(Main *bmain,
Object *ob,
const bool free_us)
ideasman42 marked this conversation as resolved
Review

This call can be removed (see comment in collection_gobject_hash_ensure).

This call can be removed (see comment in `collection_gobject_hash_ensure`).
{
CollectionObject *cob = BLI_findptr(&collection->gobject, ob, offsetof(CollectionObject, ob));
BLI_ASSERT_COLLECION_GOBJECT_HASH_IS_VALID(collection);
collection_gobject_hash_ensure(collection);
CollectionObject *cob = BLI_ghash_lookup(collection->runtime.gobject_hash, ob);
ideasman42 marked this conversation as resolved
Review

Couldn't this use instead cob = BLI_ghash_popkey(collection->runtime.gobject_hash, ob, NULL);? Then there is no need for an extra lookup below in the call to collection_gobject_hash_remove_object(collection, ob);

Couldn't this use instead `cob = BLI_ghash_popkey(collection->runtime.gobject_hash, ob, NULL);`? Then there is no need for an extra lookup below in the call to `collection_gobject_hash_remove_object(collection, ob);`
Review

Yep, good idea.

Yep, good idea.
BLI_assert(cob == BLI_findptr(&collection->gobject, ob, offsetof(CollectionObject, ob)));
if (cob == NULL) {
return false;
}
collection_gobject_hash_remove_object(collection, ob);
BLI_freelinkN(&collection->gobject, cob);
BKE_collection_object_cache_free(collection);
@ -1219,8 +1275,15 @@ bool BKE_collection_object_replace(Main *bmain,
Object *ob_old,
Object *ob_new)
{
CollectionObject *cob = BLI_findptr(
&collection->gobject, ob_old, offsetof(CollectionObject, ob));
BLI_ASSERT_COLLECION_GOBJECT_HASH_IS_VALID(collection);
const bool use_hash_exists = collection->runtime.gobject_hash != NULL;
CollectionObject *cob;
if (use_hash_exists) {
cob = BLI_ghash_popkey(collection->runtime.gobject_hash, ob_old, NULL);
}
else {
cob = BLI_findptr(&collection->gobject, ob_old, offsetof(CollectionObject, ob));
}
if (cob == NULL) {
return false;
}
@ -1229,6 +1292,10 @@ bool BKE_collection_object_replace(Main *bmain,
cob->ob = ob_new;
id_us_plus(&cob->ob->id);
if (use_hash_exists) {
BLI_ghash_insert(collection->runtime.gobject_hash, cob->ob, cob);
}
if (BKE_collection_is_in_scene(collection)) {
BKE_main_collection_sync(bmain);
}
@ -1313,9 +1380,14 @@ void BKE_collections_object_remove_nulls(Main *bmain)
static void collection_object_remove_duplicates(Collection *collection)
{
bool changed = false;
BLI_ASSERT_COLLECION_GOBJECT_HASH_IS_VALID(collection);
const bool use_hash_exists = (collection->runtime.gobject_hash != NULL);
LISTBASE_FOREACH_MUTABLE (CollectionObject *, cob, &collection->gobject) {
if (cob->ob->runtime.collection_management) {
if (use_hash_exists) {
collection_gobject_hash_remove_object(collection, cob->ob);
}
BLI_freelinkN(&collection->gobject, cob);
changed = true;
continue;
@ -1578,6 +1650,72 @@ static CollectionParent *collection_find_parent(Collection *child, Collection *c
return BLI_findptr(&child->runtime.parents, collection, offsetof(CollectionParent, collection));
}
ideasman42 marked this conversation as resolved
Review

Should also call BLI_ASSERT_COLLECION_GOBJECT_HASH_IS_VALID here.

Should also call `BLI_ASSERT_COLLECION_GOBJECT_HASH_IS_VALID` here.
static void collection_gobject_hash_ensure(Collection *collection)
{
if (collection->runtime.gobject_hash) {
return;
}
GHash *gobject_hash = BLI_ghash_ptr_new_ex(__func__, BLI_listbase_count(&collection->gobject));
LISTBASE_FOREACH (CollectionObject *, cob, &collection->gobject) {
BLI_ghash_insert(gobject_hash, cob->ob, cob);
}
collection->runtime.gobject_hash = gobject_hash;
}
static void collection_gobject_hash_remove_object(Collection *collection, const Object *ob)
{
const bool found = BLI_ghash_remove(collection->runtime.gobject_hash, ob, NULL, NULL);
BLI_assert(found);
UNUSED_VARS_NDEBUG(found);
}
/**
* Update the collections object hash the old reference to `ob_old` uses `cob->ob` as the key.
ideasman42 marked this conversation as resolved
Review

This is not a proper English sentence... ;)

This is not a proper English sentence... ;)
*
* \param ob_old: The existing key to `cob` in the hash, not removed when NULL.
* \param ob_new: The new key to `cob` in the hash, ignored when NULL.
*/
static void collection_gobject_hash_replace_object_pair(Collection *collection,
Object *ob_old,
Object *ob_new,
CollectionObject *cob)
{
BLI_ASSERT_COLLECION_GOBJECT_HASH_IS_VALID(collection);
if (ob_old) {
ideasman42 marked this conversation as resolved
Review

This comment does not reall bring information. The fact that it may be NULL is clear from the NULL-pointer check.

The more interesting for a reader information would be to know why CollectionObject can be pointing to NULL. Intuitively those are malformed and can/should simply be removed. If it is allowed to be NULL, an explanation is needed (together with a comment in the typedef struct CollectionObject, and a lot of checks all over the code base are to be added.

The confusing part is that the cob->ob is checked for NULL in SOME cases, inconsistently.

P.S. Generally, avoid over-commenting, as it actually hurts readability.

This comment does not reall bring information. The fact that it may be NULL is clear from the NULL-pointer check. The more interesting for a reader information would be to know why `CollectionObject` can be pointing to NULL. Intuitively those are malformed and can/should simply be removed. If it is allowed to be NULL, an explanation is needed (together with a comment in the `typedef struct CollectionObject`, and a lot of checks all over the code base are to be added. The confusing part is that the `cob->ob` is checked for NULL in SOME cases, inconsistently. P.S. Generally, avoid over-commenting, as it actually hurts readability.
Review

Yep, expanded the comment to note this isn't simply a paranoid null check.

  /* The object may be set to NULL if the ID is being cleared from #collection_foreach_id,
   * generally `cob->ob` is not expected to be NULL. */
Yep, expanded the comment to note this isn't simply a paranoid null check. ``` /* The object may be set to NULL if the ID is being cleared from #collection_foreach_id, * generally `cob->ob` is not expected to be NULL. */ ```
collection_gobject_hash_remove_object(collection, ob_old);
}
/* This may be set to NULL. */
if (ob_new) {
ideasman42 marked this conversation as resolved
Review

This may leak in case ob_new is NULL but cob is not, depending on calling code...

You could make it clear in doc that this function takes 'ownership' of the given cob if any, and may free it if it cannot use it.

But TBH, since this is addressing such a specific use-case, and with a very specific behavior, I would not even make a separate function, and would just add the code directly into collection_foreach_id. Or at the very least move this function above its caller, and rename it to something like collection_foreach_id_gobject_hash_update. I have a hard time seeing this very specific behavior used anywhere else than in the remapping use-case.

This may leak in case `ob_new` is NULL but `cob` is not, depending on calling code... You could make it clear in doc that this function takes 'ownership' of the given `cob` if any, and may free it if it cannot use it. But TBH, since this is addressing such a specific use-case, and with a very specific behavior, I would not even make a separate function, and would just add the code directly into `collection_foreach_id`. Or at the very least move this function above its caller, and rename it to something like `collection_foreach_id_gobject_hash_update`. I have a hard time seeing this very specific behavior used anywhere else than in the remapping use-case.
Review

It's awkward either way, Sergey requested to move this into a utility function, while it's nice to split out in some ways - it's rather spesific functionality which is only used in one place, renamed the function and removed the ob_new argument so cob->ob is always used.

It's awkward either way, Sergey requested to move this into a utility function, while it's nice to split out in some ways - it's rather spesific functionality which is only used in one place, renamed the function and removed the `ob_new` argument so `cob->ob` is always used.
BLI_ghash_insert(collection->runtime.gobject_hash, ob_new, cob);
}
}
/**
* Should only be called by: #BLI_ASSERT_COLLECION_GOBJECT_HASH_IS_VALID macro,
* this is an expensive operation intended only to be used for debugging.
*/
static void collection_gobject_hash_assert_internal_consistency(Collection *collection)
{
#ifdef USE_DEBUG_EXTRA_GOBJECT_ASSERT
if (collection->runtime.gobject_hash == NULL) {
return;
}
GHash *gobject_hash = collection->runtime.gobject_hash;
int gobject_count = 0;
LISTBASE_FOREACH (CollectionObject *, cob, &collection->gobject) {
CollectionObject *cob_test = BLI_ghash_lookup(gobject_hash, cob->ob);
BLI_assert(cob == cob_test);
gobject_count += 1;
}
const int gobject_hash_count = BLI_ghash_len(gobject_hash);
BLI_assert(gobject_count == gobject_hash_count);
#else
UNUSED_VARS(collection);
#endif /* USE_DEBUG_EXTRA_GOBJECT_ASSERT */
}
static bool collection_child_add(Collection *parent,
Collection *collection,
const int flag,

View File

@ -19,6 +19,7 @@ extern "C" {
struct Collection;
struct Object;
struct GHash;
typedef struct CollectionObject {
struct CollectionObject *next, *prev;
@ -61,6 +62,9 @@ typedef struct Collection_Runtime {
/** List of collections that are a parent of this data-block. */
ListBase parents;
/** An optional map for faster lookups on #Collection.gobject */
struct GHash *gobject_hash;
uint8_t tag;
char _pad0[7];