Smallhash: refactor and fixes

- BLI_smallhash_remove didnt decrement total entries. - rename vars to match closer to ghash. - smallhash_lookup returns NULL when no entry found. - using a zero value key wasn't supported. - no need to memset or calloc bucket arrays - add asserts for unsupported conditions. - added BLI_smallhash_lookup_p
2014-01-30 20:51:44 +11:00
parent ab6157a06a
commit 1f64371ec0
2 changed files with 168 additions and 126 deletions
--- a/source/blender/blenlib/BLI_smallhash.h
+++ b/source/blender/blenlib/BLI_smallhash.h
@@ -42,29 +42,31 @@ typedef struct {
 /*how much stack space to use before dynamically allocating memory*/
 #define SMSTACKSIZE 64
 typedef struct SmallHash {
-	SmallHashEntry *table;
-	SmallHashEntry _stacktable[SMSTACKSIZE];
-	SmallHashEntry _copytable[SMSTACKSIZE];
-	SmallHashEntry *stacktable, *copytable;
-	unsigned int used;
-	unsigned int curhash;
-	unsigned int size;
+	SmallHashEntry *buckets;
+	SmallHashEntry *buckets_stack;
+	SmallHashEntry *buckets_copy;
+	SmallHashEntry _buckets_stack[SMSTACKSIZE];
+	SmallHashEntry _buckets_copy[SMSTACKSIZE];
+	unsigned int nbuckets;
+	unsigned int nentries;
+	unsigned int cursize;
 } SmallHash;

 typedef struct {
-	SmallHash *hash;
+	SmallHash *sh;
 	unsigned int i;
 } SmallHashIter;

-void    BLI_smallhash_init(SmallHash *hash) ATTR_NONNULL(1);
-void    BLI_smallhash_release(SmallHash *hash) ATTR_NONNULL(1);
-void    BLI_smallhash_insert(SmallHash *hash, uintptr_t key, void *item) ATTR_NONNULL(1);
-void    BLI_smallhash_remove(SmallHash *hash, uintptr_t key) ATTR_NONNULL(1);
-void   *BLI_smallhash_lookup(SmallHash *hash, uintptr_t key) ATTR_NONNULL(1) ATTR_WARN_UNUSED_RESULT;
-bool    BLI_smallhash_haskey(SmallHash *hash, uintptr_t key) ATTR_NONNULL(1);
-int     BLI_smallhash_count(SmallHash *hash)  ATTR_NONNULL(1);
+void    BLI_smallhash_init(SmallHash *sh) ATTR_NONNULL(1);
+void    BLI_smallhash_release(SmallHash *sh) ATTR_NONNULL(1);
+void    BLI_smallhash_insert(SmallHash *sh, uintptr_t key, void *item) ATTR_NONNULL(1);
+bool    BLI_smallhash_remove(SmallHash *sh, uintptr_t key) ATTR_NONNULL(1);
+void   *BLI_smallhash_lookup(SmallHash *sh, uintptr_t key) ATTR_NONNULL(1) ATTR_WARN_UNUSED_RESULT;
+void  **BLI_smallhash_lookup_p(SmallHash *sh, uintptr_t key) ATTR_NONNULL(1) ATTR_WARN_UNUSED_RESULT;
+bool    BLI_smallhash_haskey(SmallHash *sh, uintptr_t key) ATTR_NONNULL(1);
+int     BLI_smallhash_count(SmallHash *sh)  ATTR_NONNULL(1);
 void   *BLI_smallhash_iternext(SmallHashIter *iter, uintptr_t *key)  ATTR_NONNULL(1) ATTR_WARN_UNUSED_RESULT;
-void   *BLI_smallhash_iternew(SmallHash *hash, SmallHashIter *iter, uintptr_t *key) ATTR_NONNULL(1) ATTR_WARN_UNUSED_RESULT;
-/* void BLI_smallhash_print(SmallHash *hash); */ /* UNUSED */
+void   *BLI_smallhash_iternew(SmallHash *sh, SmallHashIter *iter, uintptr_t *key) ATTR_NONNULL(1) ATTR_WARN_UNUSED_RESULT;
+/* void BLI_smallhash_print(SmallHash *sh); */ /* UNUSED */

 #endif /* __BLI_SMALLHASH_H__ */
--- a/source/blender/blenlib/intern/smallhash.c
+++ b/source/blender/blenlib/intern/smallhash.c
@@ -35,6 +35,7 @@
 */

 #include <string.h>
+#include <stdlib.h>

 #include "MEM_guardedalloc.h"
 #include "BLI_utildefines.h"
@@ -53,6 +54,7 @@
 */
 #define SMHASH_CELL_UNUSED  ((void *)0x7FFFFFFF)
 #define SMHASH_CELL_FREE    ((void *)0x7FFFFFFD)
+#define SMHASH_KEY_UNUSED ((uintptr_t)-1)

 /* typically this re-assigns 'h' */
 #define SMHASH_NEXT(h, hoff)  ( \
@@ -63,152 +65,190 @@

 extern const unsigned int hashsizes[];

-void BLI_smallhash_init(SmallHash *hash)
+/**
+ * Check if the number of items in the smallhash is large enough to require more buckets.
+ */
+BLI_INLINE bool smallhash_test_expand_buckets(const unsigned int nentries, const unsigned int nbuckets)
+{
+	return nentries * 3 > nbuckets;
+}
+
+BLI_INLINE void smallhash_init_empty(SmallHash *sh)
 {
 	unsigned int i;

-	memset(hash, 0, sizeof(*hash));
-
-	hash->table = hash->_stacktable;
-	hash->curhash = 2;
-	hash->size = hashsizes[hash->curhash];
-
-	hash->copytable = hash->_copytable;
-	hash->stacktable = hash->_stacktable;
-
-	for (i = 0; i < hash->size; i++) {
-		hash->table[i].val = SMHASH_CELL_FREE;
+	for (i = 0; i < sh->nbuckets; i++) {
+		sh->buckets[i].key = SMHASH_KEY_UNUSED;
+		sh->buckets[i].val = SMHASH_CELL_FREE;
 	}
 }

-/*NOTE: does *not* free *hash itself!  only the direct data!*/
-void BLI_smallhash_release(SmallHash *hash)
+BLI_INLINE SmallHashEntry *smallhash_lookup(SmallHash *sh, const uintptr_t key)
 {
-	if (hash->table != hash->stacktable) {
-		MEM_freeN(hash->table);
-	}
-}
-
-BLI_INLINE SmallHashEntry *smallhash_lookup_first_free(SmallHash *hash, uintptr_t key)
-{
-	SmallHashEntry *entry;
+	SmallHashEntry *e;
 	unsigned int h = (unsigned int)key;
 	unsigned int hoff = 1;

-	for (entry = &hash->table[h % hash->size];
-	     !ELEM(entry->val, SMHASH_CELL_UNUSED, SMHASH_CELL_FREE);
-	     h = SMHASH_NEXT(h, hoff), entry = &hash->table[h % hash->size])
+	BLI_assert(key != SMHASH_KEY_UNUSED);
+
+	/* note: there are always more buckets then entries,
+	 * so we know there will always be a free bucket if the key isn't found. */
+	for (e = &sh->buckets[h % sh->nbuckets];
+	     e->val != SMHASH_CELL_FREE;
+	     h = SMHASH_NEXT(h, hoff), e = &sh->buckets[h % sh->nbuckets])
 	{
-		/* Nothing else to do! */
-	}
-
-	return entry;
-}
-
-void BLI_smallhash_insert(SmallHash *hash, uintptr_t key, void *item)
-{
-	SmallHashEntry *entry;
-
-	if (hash->size < hash->used * 3) {
-		unsigned int newsize = hashsizes[++hash->curhash];
-		SmallHashEntry *tmp;
-		unsigned int i = 0;
-
-		if (hash->table != hash->stacktable || newsize > SMSTACKSIZE) {
-			tmp = MEM_callocN(sizeof(*hash->table) * newsize, __func__);
-		}
-		else {
-			SWAP(SmallHashEntry *, hash->stacktable, hash->copytable);
-			tmp = hash->stacktable;
-		}
-
-		SWAP(SmallHashEntry *, tmp, hash->table);
-
-		hash->size = newsize;
-
-		for (i = 0; i < hash->size; i++) {
-			hash->table[i].val = SMHASH_CELL_FREE;
-		}
-
-		for (i = 0; i < hashsizes[hash->curhash - 1]; i++) {
-			if (ELEM(tmp[i].val, SMHASH_CELL_UNUSED, SMHASH_CELL_FREE)) {
-				continue;
-			}
-
-			entry = smallhash_lookup_first_free(hash, tmp[i].key);
-			entry->key = tmp[i].key;
-			entry->val = tmp[i].val;
-		}
-
-		if (tmp != hash->stacktable && tmp != hash->copytable) {
-			MEM_freeN(tmp);
+		if (e->key == key) {
+			/* should never happen because unused keys are zero'd */
+			BLI_assert(e->val != SMHASH_CELL_UNUSED);
+			return e;
 		}
 	}

-	entry = smallhash_lookup_first_free(hash, key);
-	entry->key = key;
-	entry->val = item;
-
-	hash->used++;
+	return NULL;
 }

-BLI_INLINE SmallHashEntry *smallhash_lookup(SmallHash *hash, uintptr_t key)
+BLI_INLINE SmallHashEntry *smallhash_lookup_first_free(SmallHash *sh, const uintptr_t key)
 {
-	SmallHashEntry *entry;
+	SmallHashEntry *e;
 	unsigned int h = (unsigned int)key;
 	unsigned int hoff = 1;

-	for (entry = &hash->table[h % hash->size];
-	     ((entry->key != key) || (entry->val == SMHASH_CELL_UNUSED)) && (entry->val != SMHASH_CELL_FREE);
-	     h = SMHASH_NEXT(h, hoff), entry = &hash->table[h % hash->size])
+	for (e = &sh->buckets[h % sh->nbuckets];
+	     !ELEM(e->val, SMHASH_CELL_UNUSED, SMHASH_CELL_FREE);
+	     h = SMHASH_NEXT(h, hoff), e = &sh->buckets[h % sh->nbuckets])
 	{
-		/* Nothing else to do! */
+		/* pass */
 	}

-	return entry;
+	return e;
 }

-void BLI_smallhash_remove(SmallHash *hash, uintptr_t key)
+BLI_INLINE void smallhash_resize_buckets(SmallHash *sh, const unsigned int nbuckets)
 {
-	SmallHashEntry *entry = smallhash_lookup(hash, key);
+	SmallHashEntry *buckets_old = sh->buckets;
+	const unsigned int nbuckets_old = sh->nbuckets;
+	unsigned int i = 0;

-	if (entry->val != SMHASH_CELL_FREE) {
-		entry->key = 0;
-		entry->val = SMHASH_CELL_UNUSED;
+	BLI_assert(sh->nbuckets != nbuckets);
+
+	if (buckets_old == sh->buckets_stack && nbuckets <= SMSTACKSIZE) {
+		SWAP(SmallHashEntry *, sh->buckets_stack, sh->buckets_copy);
+		sh->buckets = sh->buckets_stack;
+	}
+	else {
+		sh->buckets = MEM_mallocN(sizeof(*sh->buckets) * nbuckets, __func__);
+	}
+
+	sh->nbuckets = nbuckets;
+
+	smallhash_init_empty(sh);
+
+	for (i = 0; i < nbuckets_old; i++) {
+		if (!ELEM(buckets_old[i].val, SMHASH_CELL_UNUSED, SMHASH_CELL_FREE)) {
+			SmallHashEntry *e = smallhash_lookup_first_free(sh, buckets_old[i].key);
+			e->key = buckets_old[i].key;
+			e->val = buckets_old[i].val;
+		}
+	}
+
+	if (buckets_old != sh->buckets_stack && buckets_old != sh->buckets_copy) {
+		MEM_freeN(buckets_old);
 	}
 }

-void *BLI_smallhash_lookup(SmallHash *hash, uintptr_t key)
+void BLI_smallhash_init(SmallHash *sh)
 {
-	SmallHashEntry *entry = smallhash_lookup(hash, key);
+	/* assume 'sh' is uninitialized */

-	return ELEM(entry->val, SMHASH_CELL_UNUSED, SMHASH_CELL_FREE) ? NULL : entry->val;
+	sh->nentries = 0;
+	sh->cursize = 2;
+	sh->nbuckets = hashsizes[sh->cursize];
+
+	sh->buckets         = sh->_buckets_stack;
+	sh->buckets_copy    = sh->_buckets_copy;
+	sh->buckets_stack   = sh->_buckets_stack;
+
+	smallhash_init_empty(sh);
 }

-
-bool BLI_smallhash_haskey(SmallHash *hash, uintptr_t key)
+/*NOTE: does *not* free *sh itself!  only the direct data!*/
+void BLI_smallhash_release(SmallHash *sh)
 {
-	SmallHashEntry *entry = smallhash_lookup(hash, key);
-
-	return !ELEM(entry->val, SMHASH_CELL_UNUSED, SMHASH_CELL_FREE);
+	if (sh->buckets != sh->buckets_stack) {
+		MEM_freeN(sh->buckets);
+	}
 }

-int BLI_smallhash_count(SmallHash *hash)
+void BLI_smallhash_insert(SmallHash *sh, uintptr_t key, void *val)
 {
-	return (int)hash->used;
+	SmallHashEntry *e;
+
+	BLI_assert(key != SMHASH_KEY_UNUSED);
+	BLI_assert(!ELEM(val, SMHASH_CELL_UNUSED, SMHASH_CELL_FREE));
+	BLI_assert(BLI_smallhash_haskey(sh, key) == false);
+
+	if (UNLIKELY(smallhash_test_expand_buckets(++sh->nentries, sh->nbuckets))) {
+		smallhash_resize_buckets(sh, hashsizes[++sh->cursize]);
+	}
+
+	e = smallhash_lookup_first_free(sh, key);
+	e->key = key;
+	e->val = val;
+}
+
+bool BLI_smallhash_remove(SmallHash *sh, uintptr_t key)
+{
+	SmallHashEntry *e = smallhash_lookup(sh, key);
+
+	if (e) {
+		e->key = SMHASH_KEY_UNUSED;
+		e->val = SMHASH_CELL_UNUSED;
+		sh->nentries--;
+
+		return true;
+	}
+	else {
+		return false;
+	}
+}
+
+void *BLI_smallhash_lookup(SmallHash *sh, uintptr_t key)
+{
+	SmallHashEntry *e = smallhash_lookup(sh, key);
+
+	return e ? e->val : NULL;
+}
+
+void **BLI_smallhash_lookup_p(SmallHash *sh, uintptr_t key)
+{
+	SmallHashEntry *e = smallhash_lookup(sh, key);
+
+	return e ? &e->val : NULL;
+}
+
+bool BLI_smallhash_haskey(SmallHash *sh, uintptr_t key)
+{
+	SmallHashEntry *e = smallhash_lookup(sh, key);
+
+	return (e != NULL);
+}
+
+int BLI_smallhash_count(SmallHash *sh)
+{
+	return (int)sh->nentries;
 }

 void *BLI_smallhash_iternext(SmallHashIter *iter, uintptr_t *key)
 {
-	while (iter->i < iter->hash->size) {
-		if ((iter->hash->table[iter->i].val != SMHASH_CELL_UNUSED) &&
-		    (iter->hash->table[iter->i].val != SMHASH_CELL_FREE))
+	while (iter->i < iter->sh->nbuckets) {
+		if ((iter->sh->buckets[iter->i].val != SMHASH_CELL_UNUSED) &&
+		    (iter->sh->buckets[iter->i].val != SMHASH_CELL_FREE))
 		{
 			if (key) {
-				*key = iter->hash->table[iter->i].key;
+				*key = iter->sh->buckets[iter->i].key;
 			}

-			return iter->hash->table[iter->i++].val;
+			return iter->sh->buckets[iter->i++].val;
 		}

 		iter->i++;
@@ -217,9 +257,9 @@ void *BLI_smallhash_iternext(SmallHashIter *iter, uintptr_t *key)
 	return NULL;
 }

-void *BLI_smallhash_iternew(SmallHash *hash, SmallHashIter *iter, uintptr_t *key)
+void *BLI_smallhash_iternew(SmallHash *sh, SmallHashIter *iter, uintptr_t *key)
 {
-	iter->hash = hash;
+	iter->sh = sh;
 	iter->i = 0;

 	return BLI_smallhash_iternext(iter, key);
@@ -228,23 +268,23 @@ void *BLI_smallhash_iternew(SmallHash *hash, SmallHashIter *iter, uintptr_t *key
 /* note, this was called _print_smhash in knifetool.c
 * it may not be intended for general use - campbell */
 #if 0
-void BLI_smallhash_print(SmallHash *hash)
+void BLI_smallhash_print(SmallHash *sh)
 {
-	int i, linecol = 79, c = 0;
+	unsigned int i, linecol = 79, c = 0;

 	printf("{");
-	for (i = 0; i < hash->size; i++) {
-		if (hash->table[i].val == SMHASH_CELL_UNUSED) {
+	for (i = 0; i < sh->nbuckets; i++) {
+		if (sh->buckets[i].val == SMHASH_CELL_UNUSED) {
 			printf("--u-");
 		}
-		else if (hash->table[i].val == SMHASH_CELL_FREE) {
+		else if (sh->buckets[i].val == SMHASH_CELL_FREE) {
 			printf("--f-");
 		}
 		else {
-			printf("%2x", (unsigned int)hash->table[i].key);
+			printf("%2x", (unsigned int)sh->buckets[i].key);
 		}

-		if (i != hash->size - 1)
+		if (i != sh->nbuckets - 1)
 			printf(", ");

 		c += 6;