This repository has been archived on 2023-10-09. You can view files and clone it, but cannot push or open issues or pull requests.
Files
blender-archive/source/blender/blenlib/BLI_string_map.hh

541 lines
15 KiB
C++

/*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifndef __BLI_STRING_MAP_HH__
#define __BLI_STRING_MAP_HH__
/** \file
* \ingroup bli
*
* This tries to solve the issue that a normal map with std::string as key might do many
* allocations when the keys are longer than 16 bytes (the usual small string optimization size).
*
* For now this still uses std::string, but having this abstraction in place will make it easier to
* make it more efficient later on. Also, even if we will never implement this optimization, having
* a special map with string keys can be quite handy. */
#include "BLI_map.hh"
#include "BLI_optional.hh"
#include "BLI_string_ref.hh"
#include "BLI_vector.hh"
namespace BLI {
// clang-format off
#define ITER_SLOTS_BEGIN(HASH, ARRAY, OPTIONAL_CONST, R_ITEM, R_OFFSET) \
uint32_t hash_copy = HASH; \
uint32_t perturb = HASH; \
while (true) { \
uint32_t item_index = (hash_copy & ARRAY.slot_mask()) >> OFFSET_SHIFT; \
uint8_t R_OFFSET = hash_copy & OFFSET_MASK; \
uint8_t initial_offset = R_OFFSET; \
OPTIONAL_CONST Item &R_ITEM = ARRAY.item(item_index); \
do {
#define ITER_SLOTS_END(R_OFFSET) \
R_OFFSET = (R_OFFSET + 1) & OFFSET_MASK; \
} while (R_OFFSET != initial_offset); \
perturb >>= 5; \
hash_copy = hash_copy * 5 + 1 + perturb; \
} ((void)0)
// clang-format on
template<typename T, typename Allocator = GuardedAllocator> class StringMap {
private:
static constexpr uint32_t OFFSET_MASK = 3;
static constexpr uint32_t OFFSET_SHIFT = 2;
class Item {
private:
static constexpr int32_t IS_EMPTY = -1;
uint32_t m_hashes[4];
int32_t m_indices[4];
char m_values[sizeof(T) * 4];
public:
static constexpr uint slots_per_item = 4;
Item()
{
for (uint offset = 0; offset < 4; offset++) {
m_indices[offset] = IS_EMPTY;
}
}
~Item()
{
for (uint offset = 0; offset < 4; offset++) {
if (this->is_set(offset)) {
destruct(this->value(offset));
}
}
}
Item(const Item &other)
{
for (uint offset = 0; offset < 4; offset++) {
m_indices[offset] = other.m_indices[offset];
if (other.is_set(offset)) {
m_hashes[offset] = other.m_hashes[offset];
new (this->value(offset)) T(*other.value(offset));
}
}
}
Item(Item &&other) noexcept
{
for (uint offset = 0; offset < 4; offset++) {
m_indices[offset] = other.m_indices[offset];
if (other.is_set(offset)) {
m_hashes[offset] = other.m_hashes[offset];
new (this->value(offset)) T(std::move(*other.value(offset)));
}
}
}
uint32_t index(uint offset) const
{
return m_indices[offset];
}
uint32_t hash(uint offset) const
{
return m_hashes[offset];
}
T *value(uint offset) const
{
return (T *)POINTER_OFFSET(m_values, offset * sizeof(T));
}
bool is_set(uint offset) const
{
return m_indices[offset] >= 0;
}
bool is_empty(uint offset) const
{
return m_indices[offset] == IS_EMPTY;
}
bool has_hash(uint offset, uint32_t hash) const
{
BLI_assert(this->is_set(offset));
return m_hashes[offset] == hash;
}
bool has_exact_key(uint offset, StringRef key, const Vector<char> &chars) const
{
return key == this->get_key(offset, chars);
}
StringRefNull get_key(uint offset, const Vector<char> &chars) const
{
const char *ptr = chars.begin() + m_indices[offset];
uint length = *(uint *)ptr;
const char *start = ptr + sizeof(uint);
return StringRefNull(start, length);
}
template<typename ForwardT>
void store(uint offset, uint32_t hash, uint32_t index, ForwardT &&value)
{
this->store_without_value(offset, hash, index);
new (this->value(offset)) T(std::forward<ForwardT>(value));
}
void store_without_value(uint offset, uint32_t hash, uint32_t index)
{
BLI_assert(!this->is_set(offset));
m_hashes[offset] = hash;
m_indices[offset] = index;
}
};
using ArrayType = OpenAddressingArray<Item, 1, Allocator>;
ArrayType m_array;
Vector<char> m_chars;
public:
StringMap() = default;
/**
* Get the number of key-value pairs in the map.
*/
uint size() const
{
return m_array.slots_set();
}
/**
* Add a new element to the map. It is assumed that the key did not exist before.
*/
void add_new(StringRef key, const T &value)
{
this->add_new__impl(key, value);
}
void add_new(StringRef key, T &&value)
{
this->add_new__impl(key, std::move(value));
}
/**
* Add a new element to the map if the key does not exist yet.
*/
void add(StringRef key, const T &value)
{
this->add__impl(key, value);
}
void add(StringRef key, T &&value)
{
this->add__impl(key, std::move(value));
}
/**
* First, checks if the key exists in the map.
* If it does exist, call the modify function with a pointer to the corresponding value.
* If it does not exist, call the create function with a pointer to where the value should be
* created.
*
* Returns whatever is returned from one of the callback functions. Both callbacks have to return
* the same type.
*
* CreateValueF: Takes a pointer to where the value should be created.
* ModifyValueF: Takes a pointer to the value that should be modified.
*/
template<typename CreateValueF, typename ModifyValueF>
auto add_or_modify(StringRef key,
const CreateValueF &create_value,
const ModifyValueF &modify_value) -> decltype(create_value(nullptr))
{
using CreateReturnT = decltype(create_value(nullptr));
using ModifyReturnT = decltype(modify_value(nullptr));
BLI_STATIC_ASSERT((std::is_same<CreateReturnT, ModifyReturnT>::value),
"Both callbacks should return the same type.");
this->ensure_can_add();
uint32_t hash = this->compute_string_hash(key);
ITER_SLOTS_BEGIN (hash, m_array, , item, offset) {
if (item.is_empty(offset)) {
m_array.update__empty_to_set();
uint32_t index = this->save_key_in_array(key);
item.store_without_value(offset, hash, index);
T *value_ptr = item.value(offset);
return create_value(value_ptr);
}
else if (item.has_hash(offset, hash) && item.has_exact_key(offset, key, m_chars)) {
T *value_ptr = item.value(offset);
return modify_value(value_ptr);
}
}
ITER_SLOTS_END(offset);
}
/**
* Return true when the key exists in the map, otherwise false.
*/
bool contains(StringRef key) const
{
uint32_t hash = this->compute_string_hash(key);
ITER_SLOTS_BEGIN (hash, m_array, const, item, offset) {
if (item.is_empty(offset)) {
return false;
}
else if (item.has_hash(offset, hash) && item.has_exact_key(offset, key, m_chars)) {
return true;
}
}
ITER_SLOTS_END(offset);
}
/**
* Get a reference to the value corresponding to a key. It is assumed that the key does exist.
*/
const T &lookup(StringRef key) const
{
BLI_assert(this->contains(key));
T *found_value = nullptr;
uint32_t hash = this->compute_string_hash(key);
ITER_SLOTS_BEGIN (hash, m_array, const, item, offset) {
if (item.is_empty(offset)) {
return *found_value;
}
else if (item.has_hash(offset, hash)) {
if (found_value == nullptr) {
/* Common case: the first slot with the correct hash contains the key.
* However, still need to iterate until the next empty slot to make sure there is no
* other key with the exact same hash. */
/* TODO: Check if we can guarantee that every hash only exists once in some cases. */
found_value = item.value(offset);
}
else if (item.has_exact_key(offset, key, m_chars)) {
/* Found the hash more than once, now check for actual string equality. */
return *item.value(offset);
}
}
}
ITER_SLOTS_END(offset);
}
T &lookup(StringRef key)
{
return const_cast<T &>(const_cast<const StringMap *>(this)->lookup(key));
}
/**
* Get a pointer to the value corresponding to the key. Return nullptr, if the key does not
* exist.
*/
const T *lookup_ptr(StringRef key) const
{
uint32_t hash = this->compute_string_hash(key);
ITER_SLOTS_BEGIN (hash, m_array, const, item, offset) {
if (item.is_empty(offset)) {
return nullptr;
}
else if (item.has_hash(offset, hash) && item.has_exact_key(offset, key, m_chars)) {
return item.value(offset);
}
}
ITER_SLOTS_END(offset);
}
T *lookup_ptr(StringRef key)
{
return const_cast<T *>(const_cast<const StringMap *>(this)->lookup_ptr(key));
}
Optional<T> try_lookup(StringRef key) const
{
return Optional<T>::FromPointer(this->lookup_ptr(key));
}
/**
* Get a copy of the value corresponding to the key. If the key does not exist, return the
* default value.
*/
T lookup_default(StringRef key, const T &default_value) const
{
const T *ptr = this->lookup_ptr(key);
if (ptr != nullptr) {
return *ptr;
}
else {
return default_value;
}
}
/**
* Return the value that corresponds to the given key.
* If it does not exist yet, create and insert it first.
*/
template<typename CreateValueF> T &lookup_or_add(StringRef key, const CreateValueF &create_value)
{
return *this->add_or_modify(
key,
[&](T *value) { return new (value) T(create_value()); },
[](T *value) { return value; });
}
/**
* Return the value that corresponds to the given key.
* If it does not exist yet, insert a new default constructed value and return that.
*/
T &lookup_or_add_default(StringRef key)
{
return this->lookup_or_add(key, []() { return T(); });
}
/**
* Do a linear search over all items to find a key for a value.
*/
StringRefNull find_key_for_value(const T &value) const
{
for (const Item &item : m_array) {
for (uint offset = 0; offset < 4; offset++) {
if (item.is_set(offset) && value == *item.value(offset)) {
return item.get_key(offset, m_chars);
}
}
}
BLI_assert(false);
return {};
}
/**
* Run a function for every value in the map.
*/
template<typename FuncT> void foreach_value(const FuncT &func)
{
for (Item &item : m_array) {
for (uint offset = 0; offset < 4; offset++) {
if (item.is_set(offset)) {
func(*item.value(offset));
}
}
}
}
/**
* Run a function for every key in the map.
*/
template<typename FuncT> void foreach_key(const FuncT &func)
{
for (Item &item : m_array) {
for (uint offset = 0; offset < 4; offset++) {
if (item.is_set(offset)) {
StringRefNull key = item.get_key(offset, m_chars);
func(key);
}
}
}
}
/**
* Run a function for every key-value-pair in the map.
*/
template<typename FuncT> void foreach_item(const FuncT &func)
{
for (Item &item : m_array) {
for (uint offset = 0; offset < 4; offset++) {
if (item.is_set(offset)) {
StringRefNull key = item.get_key(offset, m_chars);
T &value = *item.value(offset);
func(key, value);
}
}
}
}
template<typename FuncT> void foreach_item(const FuncT &func) const
{
for (const Item &item : m_array) {
for (uint offset = 0; offset < 4; offset++) {
if (item.is_set(offset)) {
StringRefNull key = item.get_key(offset, m_chars);
const T &value = *item.value(offset);
func(key, value);
}
}
}
}
private:
uint32_t compute_string_hash(StringRef key) const
{
/* TODO: check if this can be optimized more because we know the key length already. */
uint32_t hash = 5381;
for (char c : key) {
hash = hash * 33 + c;
}
return hash;
}
uint32_t save_key_in_array(StringRef key)
{
uint index = m_chars.size();
uint string_size = key.size();
m_chars.extend(ArrayRef<char>((char *)&string_size, sizeof(uint)));
m_chars.extend(key);
m_chars.append('\0');
return index;
}
StringRefNull key_from_index(uint32_t index) const
{
const char *ptr = m_chars.begin() + index;
uint length = *(uint *)ptr;
const char *start = ptr + sizeof(uint);
return StringRefNull(start, length);
}
void ensure_can_add()
{
if (UNLIKELY(m_array.should_grow())) {
this->grow(this->size() + 1);
}
}
BLI_NOINLINE void grow(uint min_usable_slots)
{
ArrayType new_array = m_array.init_reserved(min_usable_slots);
for (Item &old_item : m_array) {
for (uint offset = 0; offset < 4; offset++) {
if (old_item.is_set(offset)) {
this->add_after_grow(
*old_item.value(offset), old_item.hash(offset), old_item.index(offset), new_array);
}
}
}
m_array = std::move(new_array);
}
void add_after_grow(T &value, uint32_t hash, uint32_t index, ArrayType &new_array)
{
ITER_SLOTS_BEGIN (hash, new_array, , item, offset) {
if (item.is_empty(offset)) {
item.store(offset, hash, index, std::move(value));
return;
}
}
ITER_SLOTS_END(offset);
}
template<typename ForwardT> bool add__impl(StringRef key, ForwardT &&value)
{
this->ensure_can_add();
uint32_t hash = this->compute_string_hash(key);
ITER_SLOTS_BEGIN (hash, m_array, , item, offset) {
if (item.is_empty(offset)) {
uint32_t index = this->save_key_in_array(key);
item.store(offset, hash, index, std::forward<ForwardT>(value));
m_array.update__empty_to_set();
return true;
}
else if (item.has_hash(offset, hash) && item.has_exact_key(offset, key, m_chars)) {
return false;
}
}
ITER_SLOTS_END(offset);
}
template<typename ForwardT> void add_new__impl(StringRef key, ForwardT &&value)
{
BLI_assert(!this->contains(key));
this->ensure_can_add();
uint32_t hash = this->compute_string_hash(key);
ITER_SLOTS_BEGIN (hash, m_array, , item, offset) {
if (item.is_empty(offset)) {
uint32_t index = this->save_key_in_array(key);
item.store(offset, hash, index, std::forward<ForwardT>(value));
m_array.update__empty_to_set();
return;
}
}
ITER_SLOTS_END(offset);
}
};
#undef ITER_SLOTS_BEGIN
#undef ITER_SLOTS_END
} // namespace BLI
#endif /* __BLI_STRING_MAP_HH__ */