This repository has been archived on 2023-10-09. You can view files and clone it, but cannot push or open issues or pull requests.
Files
blender-archive/source/blender/blenlib/intern/smallhash.c

427 lines
10 KiB
C

/*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* The Original Code is Copyright (C) 2008 Blender Foundation.
* All rights reserved.
*/
/** \file
* \ingroup bli
*
* A light stack-friendly hash library, it uses stack space for relatively small,
* fixed size hash tables but falls back to heap memory once the stack limits reached
* (#SMSTACKSIZE).
*
* based on a doubling hashing approach (non-chaining) which uses more buckets then entries
* stepping over buckets when two keys share the same hash so any key can find a free bucket.
*
* See: https://en.wikipedia.org/wiki/Double_hashing
*
* \warning This should _only_ be used for small hashes
* where allocating a hash every time is unacceptable.
* Otherwise #GHash should be used instead.
*
* #SmallHashEntry.key
* - ``SMHASH_KEY_UNUSED`` means the key in the cell has not been initialized.
*
* #SmallHashEntry.val
* - ``SMHASH_CELL_UNUSED`` means this cell is inside a key series.
* - ``SMHASH_CELL_FREE`` means this cell terminates a key series.
*
* Note that the values and keys are often pointers or index values,
* use the maximum values to avoid real pointers colliding with magic numbers.
*/
#include <string.h>
#include <stdlib.h>
#include "BLI_sys_types.h"
#include "MEM_guardedalloc.h"
#include "BLI_utildefines.h"
#include "BLI_smallhash.h"
#include "BLI_strict_flags.h"
#define SMHASH_KEY_UNUSED ((uintptr_t)(UINTPTR_MAX - 0))
#define SMHASH_CELL_FREE ((void *)(UINTPTR_MAX - 1))
#define SMHASH_CELL_UNUSED ((void *)(UINTPTR_MAX - 2))
/* typically this re-assigns 'h' */
#define SMHASH_NEXT(h, hoff) \
(CHECK_TYPE_INLINE(&(h), uint *), \
CHECK_TYPE_INLINE(&(hoff), uint *), \
((h) + (((hoff) = ((hoff)*2) + 1), (hoff))))
/* nothing uses BLI_smallhash_remove yet */
// #define USE_REMOVE
BLI_INLINE bool smallhash_val_is_used(const void *val)
{
#ifdef USE_REMOVE
return !ELEM(val, SMHASH_CELL_FREE, SMHASH_CELL_UNUSED);
#else
return (val != SMHASH_CELL_FREE);
#endif
}
extern const uint BLI_ghash_hash_sizes[];
#define hashsizes BLI_ghash_hash_sizes
BLI_INLINE uint smallhash_key(const uintptr_t key)
{
return (uint)key;
}
/**
* Check if the number of items in the smallhash is large enough to require more buckets.
*/
BLI_INLINE bool smallhash_test_expand_buckets(const uint nentries, const uint nbuckets)
{
/* (approx * 1.5) */
return (nentries + (nentries >> 1)) > nbuckets;
}
BLI_INLINE void smallhash_init_empty(SmallHash *sh)
{
uint i;
for (i = 0; i < sh->nbuckets; i++) {
sh->buckets[i].key = SMHASH_KEY_UNUSED;
sh->buckets[i].val = SMHASH_CELL_FREE;
}
}
/**
* Increase initial bucket size to match a reserved amount.
*/
BLI_INLINE void smallhash_buckets_reserve(SmallHash *sh, const uint nentries_reserve)
{
while (smallhash_test_expand_buckets(nentries_reserve, sh->nbuckets)) {
sh->nbuckets = hashsizes[++sh->cursize];
}
}
BLI_INLINE SmallHashEntry *smallhash_lookup(const SmallHash *sh, const uintptr_t key)
{
SmallHashEntry *e;
uint h = smallhash_key(key);
uint hoff = 1;
BLI_assert(key != SMHASH_KEY_UNUSED);
/* note: there are always more buckets than entries,
* so we know there will always be a free bucket if the key isn't found. */
for (e = &sh->buckets[h % sh->nbuckets]; e->val != SMHASH_CELL_FREE;
h = SMHASH_NEXT(h, hoff), e = &sh->buckets[h % sh->nbuckets]) {
if (e->key == key) {
/* should never happen because unused keys are zero'd */
BLI_assert(e->val != SMHASH_CELL_UNUSED);
return e;
}
}
return NULL;
}
BLI_INLINE SmallHashEntry *smallhash_lookup_first_free(SmallHash *sh, const uintptr_t key)
{
SmallHashEntry *e;
uint h = smallhash_key(key);
uint hoff = 1;
for (e = &sh->buckets[h % sh->nbuckets]; smallhash_val_is_used(e->val);
h = SMHASH_NEXT(h, hoff), e = &sh->buckets[h % sh->nbuckets]) {
/* pass */
}
return e;
}
BLI_INLINE void smallhash_resize_buckets(SmallHash *sh, const uint nbuckets)
{
SmallHashEntry *buckets_old = sh->buckets;
const uint nbuckets_old = sh->nbuckets;
const bool was_alloc = (buckets_old != sh->buckets_stack);
uint i = 0;
BLI_assert(sh->nbuckets != nbuckets);
if (nbuckets <= SMSTACKSIZE) {
const size_t size = sizeof(*buckets_old) * nbuckets_old;
buckets_old = alloca(size);
memcpy(buckets_old, sh->buckets, size);
sh->buckets = sh->buckets_stack;
}
else {
sh->buckets = MEM_mallocN(sizeof(*sh->buckets) * nbuckets, __func__);
}
sh->nbuckets = nbuckets;
smallhash_init_empty(sh);
for (i = 0; i < nbuckets_old; i++) {
if (smallhash_val_is_used(buckets_old[i].val)) {
SmallHashEntry *e = smallhash_lookup_first_free(sh, buckets_old[i].key);
e->key = buckets_old[i].key;
e->val = buckets_old[i].val;
}
}
if (was_alloc) {
MEM_freeN(buckets_old);
}
}
void BLI_smallhash_init_ex(SmallHash *sh, const uint nentries_reserve)
{
/* assume 'sh' is uninitialized */
sh->nentries = 0;
sh->cursize = 2;
sh->nbuckets = hashsizes[sh->cursize];
sh->buckets = sh->buckets_stack;
if (nentries_reserve) {
smallhash_buckets_reserve(sh, nentries_reserve);
if (sh->nbuckets > SMSTACKSIZE) {
sh->buckets = MEM_mallocN(sizeof(*sh->buckets) * sh->nbuckets, __func__);
}
}
smallhash_init_empty(sh);
}
void BLI_smallhash_init(SmallHash *sh)
{
BLI_smallhash_init_ex(sh, 0);
}
/* NOTE: does *not* free *sh itself! only the direct data! */
void BLI_smallhash_release(SmallHash *sh)
{
if (sh->buckets != sh->buckets_stack) {
MEM_freeN(sh->buckets);
}
}
void BLI_smallhash_insert(SmallHash *sh, uintptr_t key, void *val)
{
SmallHashEntry *e;
BLI_assert(key != SMHASH_KEY_UNUSED);
BLI_assert(smallhash_val_is_used(val));
BLI_assert(BLI_smallhash_haskey(sh, key) == false);
if (UNLIKELY(smallhash_test_expand_buckets(++sh->nentries, sh->nbuckets))) {
smallhash_resize_buckets(sh, hashsizes[++sh->cursize]);
}
e = smallhash_lookup_first_free(sh, key);
e->key = key;
e->val = val;
}
/**
* Inserts a new value to a key that may already be in ghash.
*
* Avoids #BLI_smallhash_remove, #BLI_smallhash_insert calls (double lookups)
*
* \returns true if a new key has been added.
*/
bool BLI_smallhash_reinsert(SmallHash *sh, uintptr_t key, void *item)
{
SmallHashEntry *e = smallhash_lookup(sh, key);
if (e) {
e->val = item;
return false;
}
else {
BLI_smallhash_insert(sh, key, item);
return true;
}
}
#ifdef USE_REMOVE
bool BLI_smallhash_remove(SmallHash *sh, uintptr_t key)
{
SmallHashEntry *e = smallhash_lookup(sh, key);
if (e) {
e->key = SMHASH_KEY_UNUSED;
e->val = SMHASH_CELL_UNUSED;
sh->nentries--;
return true;
}
else {
return false;
}
}
#endif
void *BLI_smallhash_lookup(const SmallHash *sh, uintptr_t key)
{
SmallHashEntry *e = smallhash_lookup(sh, key);
return e ? e->val : NULL;
}
void **BLI_smallhash_lookup_p(const SmallHash *sh, uintptr_t key)
{
SmallHashEntry *e = smallhash_lookup(sh, key);
return e ? &e->val : NULL;
}
bool BLI_smallhash_haskey(const SmallHash *sh, uintptr_t key)
{
SmallHashEntry *e = smallhash_lookup(sh, key);
return (e != NULL);
}
int BLI_smallhash_len(const SmallHash *sh)
{
return (int)sh->nentries;
}
BLI_INLINE SmallHashEntry *smallhash_iternext(SmallHashIter *iter, uintptr_t *key)
{
while (iter->i < iter->sh->nbuckets) {
if (smallhash_val_is_used(iter->sh->buckets[iter->i].val)) {
if (key) {
*key = iter->sh->buckets[iter->i].key;
}
return &iter->sh->buckets[iter->i++];
}
iter->i++;
}
return NULL;
}
void *BLI_smallhash_iternext(SmallHashIter *iter, uintptr_t *key)
{
SmallHashEntry *e = smallhash_iternext(iter, key);
return e ? e->val : NULL;
}
void **BLI_smallhash_iternext_p(SmallHashIter *iter, uintptr_t *key)
{
SmallHashEntry *e = smallhash_iternext(iter, key);
return e ? &e->val : NULL;
}
void *BLI_smallhash_iternew(const SmallHash *sh, SmallHashIter *iter, uintptr_t *key)
{
iter->sh = sh;
iter->i = 0;
return BLI_smallhash_iternext(iter, key);
}
void **BLI_smallhash_iternew_p(const SmallHash *sh, SmallHashIter *iter, uintptr_t *key)
{
iter->sh = sh;
iter->i = 0;
return BLI_smallhash_iternext_p(iter, key);
}
/** \name Debugging & Introspection
* \{ */
/* note, this was called _print_smhash in knifetool.c
* it may not be intended for general use - campbell */
#if 0
void BLI_smallhash_print(SmallHash *sh)
{
uint i, linecol = 79, c = 0;
printf("{");
for (i = 0; i < sh->nbuckets; i++) {
if (sh->buckets[i].val == SMHASH_CELL_UNUSED) {
printf("--u-");
}
else if (sh->buckets[i].val == SMHASH_CELL_FREE) {
printf("--f-");
}
else {
printf("%2x", (uint)sh->buckets[i].key);
}
if (i != sh->nbuckets - 1) {
printf(", ");
}
c += 6;
if (c >= linecol) {
printf("\n ");
c = 0;
}
}
fflush(stdout);
}
#endif
#ifdef DEBUG
/**
* Measure how well the hash function performs
* (1.0 is perfect - no stepping needed).
*
* Smaller is better!
*/
double BLI_smallhash_calc_quality(SmallHash *sh)
{
uint64_t sum = 0;
uint i;
if (sh->nentries == 0) {
return -1.0;
}
for (i = 0; i < sh->nbuckets; i++) {
if (sh->buckets[i].key != SMHASH_KEY_UNUSED) {
uint64_t count = 0;
SmallHashEntry *e, *e_final = &sh->buckets[i];
uint h = smallhash_key(e_final->key);
uint hoff = 1;
for (e = &sh->buckets[h % sh->nbuckets]; e != e_final;
h = SMHASH_NEXT(h, hoff), e = &sh->buckets[h % sh->nbuckets]) {
count += 1;
}
sum += count;
}
}
return ((double)(sh->nentries + sum) / (double)sh->nentries);
}
#endif
/** \} */