Speedup for guarded allocator

- Re-arrange locks, so no actual memory allocation
  (which is relatively slow) happens from inside
  the lock. operation system will take care of locks
  which might be needed there on it's own.

- Use spin lock instead of mutex, since it's just
  list operations happens from inside lock, no need
  in mutex here.

- Use atomic operations for memory in use and total
  used blocks counters.

This makes guarded allocator almost the same speed
as non-guarded one in files from Tube project.

There're still MemHead/MemTail overhead which might
be bad for CPU cache utilization
This commit is contained in:
2013-08-19 10:51:40 +00:00
parent efa836531e
commit c0f8e15295
12 changed files with 42 additions and 32 deletions

View File

@@ -44,7 +44,8 @@
# endif # endif
#endif #endif
#if defined(_M_X64) || defined(__amd64__) || defined(__x86_64__) /* TODO(sergey): check on other 64bit platforms. */
#if defined(_M_X64) || defined(_M_AMD64) || defined(__amd64__) || defined(__x86_64__)
# define LG_SIZEOF_PTR 3 # define LG_SIZEOF_PTR 3
# define LG_SIZEOF_INT 3 # define LG_SIZEOF_INT 3
#else #else

View File

@@ -25,6 +25,7 @@
set(INC set(INC
. .
../atomic
) )
set(INC_SYS set(INC_SYS

View File

@@ -38,6 +38,6 @@ if env['WITH_BF_CXX_GUARDEDALLOC']:
sources.append('cpp/mallocn.cpp') sources.append('cpp/mallocn.cpp')
defs.append('WITH_CXX_GUARDEDALLOC') defs.append('WITH_CXX_GUARDEDALLOC')
incs = '.' incs = '. ../atomic'
env.BlenderLib ('bf_intern_guardedalloc', sources, Split(incs), defs, libtype=['intern','player'], priority = [5,150] ) env.BlenderLib ('bf_intern_guardedalloc', sources, Split(incs), defs, libtype=['intern','player'], priority = [5,150] )

View File

@@ -53,6 +53,8 @@
/* should always be defined except for experimental cases */ /* should always be defined except for experimental cases */
#ifdef WITH_GUARDEDALLOC #ifdef WITH_GUARDEDALLOC
#include "atomic_ops.h"
/* Blame Microsoft for LLP64 and no inttypes.h, quick workaround needed: */ /* Blame Microsoft for LLP64 and no inttypes.h, quick workaround needed: */
#if defined(WIN64) #if defined(WIN64)
# define SIZET_FORMAT "%I64u" # define SIZET_FORMAT "%I64u"
@@ -210,8 +212,8 @@ static const char *check_memlist(MemHead *memh);
/* --------------------------------------------------------------------- */ /* --------------------------------------------------------------------- */
static volatile int totblock = 0; static unsigned int totblock = 0;
static volatile uintptr_t mem_in_use = 0, mmap_in_use = 0, peak_mem = 0; static size_t mem_in_use = 0, mmap_in_use = 0, peak_mem = 0;
static volatile struct localListBase _membase; static volatile struct localListBase _membase;
static volatile struct localListBase *membase = &_membase; static volatile struct localListBase *membase = &_membase;
@@ -494,30 +496,28 @@ static void make_memhead_header(MemHead *memh, size_t len, const char *str)
memt = (MemTail *)(((char *) memh) + sizeof(MemHead) + len); memt = (MemTail *)(((char *) memh) + sizeof(MemHead) + len);
memt->tag3 = MEMTAG3; memt->tag3 = MEMTAG3;
atomic_add_u(&totblock, 1);
atomic_add_z(&mem_in_use, len);
mem_lock_thread();
addtail(membase, &memh->next); addtail(membase, &memh->next);
if (memh->next) { if (memh->next) {
memh->nextname = MEMNEXT(memh->next)->name; memh->nextname = MEMNEXT(memh->next)->name;
} }
totblock++;
mem_in_use += len;
peak_mem = mem_in_use > peak_mem ? mem_in_use : peak_mem; peak_mem = mem_in_use > peak_mem ? mem_in_use : peak_mem;
mem_unlock_thread();
} }
void *MEM_mallocN(size_t len, const char *str) void *MEM_mallocN(size_t len, const char *str)
{ {
MemHead *memh; MemHead *memh;
mem_lock_thread();
len = (len + 3) & ~3; /* allocate in units of 4 */ len = (len + 3) & ~3; /* allocate in units of 4 */
memh = (MemHead *)malloc(len + sizeof(MemHead) + sizeof(MemTail)); memh = (MemHead *)malloc(len + sizeof(MemHead) + sizeof(MemTail));
if (memh) { if (memh) {
make_memhead_header(memh, len, str); make_memhead_header(memh, len, str);
mem_unlock_thread();
if (malloc_debug_memset && len) if (malloc_debug_memset && len)
memset(memh + 1, 255, len); memset(memh + 1, 255, len);
@@ -528,7 +528,6 @@ void *MEM_mallocN(size_t len, const char *str)
#endif #endif
return (++memh); return (++memh);
} }
mem_unlock_thread();
print_error("Malloc returns null: len=" SIZET_FORMAT " in %s, total %u\n", print_error("Malloc returns null: len=" SIZET_FORMAT " in %s, total %u\n",
SIZET_ARG(len), str, (unsigned int) mem_in_use); SIZET_ARG(len), str, (unsigned int) mem_in_use);
return NULL; return NULL;
@@ -538,15 +537,12 @@ void *MEM_callocN(size_t len, const char *str)
{ {
MemHead *memh; MemHead *memh;
mem_lock_thread();
len = (len + 3) & ~3; /* allocate in units of 4 */ len = (len + 3) & ~3; /* allocate in units of 4 */
memh = (MemHead *)calloc(len + sizeof(MemHead) + sizeof(MemTail), 1); memh = (MemHead *)calloc(len + sizeof(MemHead) + sizeof(MemTail), 1);
if (memh) { if (memh) {
make_memhead_header(memh, len, str); make_memhead_header(memh, len, str);
mem_unlock_thread();
#ifdef DEBUG_MEMCOUNTER #ifdef DEBUG_MEMCOUNTER
if (_mallocn_count == DEBUG_MEMCOUNTER_ERROR_VAL) if (_mallocn_count == DEBUG_MEMCOUNTER_ERROR_VAL)
memcount_raise(__func__); memcount_raise(__func__);
@@ -554,7 +550,6 @@ void *MEM_callocN(size_t len, const char *str)
#endif #endif
return (++memh); return (++memh);
} }
mem_unlock_thread();
print_error("Calloc returns null: len=" SIZET_FORMAT " in %s, total %u\n", print_error("Calloc returns null: len=" SIZET_FORMAT " in %s, total %u\n",
SIZET_ARG(len), str, (unsigned int) mem_in_use); SIZET_ARG(len), str, (unsigned int) mem_in_use);
return NULL; return NULL;
@@ -565,8 +560,6 @@ void *MEM_mapallocN(size_t len, const char *str)
{ {
MemHead *memh; MemHead *memh;
mem_lock_thread();
len = (len + 3) & ~3; /* allocate in units of 4 */ len = (len + 3) & ~3; /* allocate in units of 4 */
memh = mmap(NULL, len + sizeof(MemHead) + sizeof(MemTail), memh = mmap(NULL, len + sizeof(MemHead) + sizeof(MemTail),
@@ -575,7 +568,8 @@ void *MEM_mapallocN(size_t len, const char *str)
if (memh != (MemHead *)-1) { if (memh != (MemHead *)-1) {
make_memhead_header(memh, len, str); make_memhead_header(memh, len, str);
memh->mmap = 1; memh->mmap = 1;
mmap_in_use += len; atomic_add_z(&mmap_in_use, len);
mem_lock_thread();
peak_mem = mmap_in_use > peak_mem ? mmap_in_use : peak_mem; peak_mem = mmap_in_use > peak_mem ? mmap_in_use : peak_mem;
mem_unlock_thread(); mem_unlock_thread();
#ifdef DEBUG_MEMCOUNTER #ifdef DEBUG_MEMCOUNTER
@@ -586,7 +580,6 @@ void *MEM_mapallocN(size_t len, const char *str)
return (++memh); return (++memh);
} }
else { else {
mem_unlock_thread();
print_error("Mapalloc returns null, fallback to regular malloc: " print_error("Mapalloc returns null, fallback to regular malloc: "
"len=" SIZET_FORMAT " in %s, total %u\n", "len=" SIZET_FORMAT " in %s, total %u\n",
SIZET_ARG(len), str, (unsigned int) mmap_in_use); SIZET_ARG(len), str, (unsigned int) mmap_in_use);
@@ -844,7 +837,6 @@ void MEM_freeN(void *vmemh)
return; return;
} }
mem_lock_thread();
if ((memh->tag1 == MEMTAG1) && if ((memh->tag1 == MEMTAG1) &&
(memh->tag2 == MEMTAG2) && (memh->tag2 == MEMTAG2) &&
((memh->len & 0x3) == 0)) ((memh->len & 0x3) == 0))
@@ -858,8 +850,6 @@ void MEM_freeN(void *vmemh)
/* after tags !!! */ /* after tags !!! */
rem_memblock(memh); rem_memblock(memh);
mem_unlock_thread();
return; return;
} }
MemorY_ErroR(memh->name, "end corrupt"); MemorY_ErroR(memh->name, "end corrupt");
@@ -869,7 +859,9 @@ void MEM_freeN(void *vmemh)
} }
} }
else { else {
mem_lock_thread();
name = check_memlist(memh); name = check_memlist(memh);
mem_unlock_thread();
if (name == NULL) if (name == NULL)
MemorY_ErroR("free", "pointer not in memlist"); MemorY_ErroR("free", "pointer not in memlist");
else else
@@ -879,8 +871,6 @@ void MEM_freeN(void *vmemh)
totblock--; totblock--;
/* here a DUMP should happen */ /* here a DUMP should happen */
mem_unlock_thread();
return; return;
} }
@@ -927,6 +917,7 @@ static void remlink(volatile localListBase *listbase, void *vlink)
static void rem_memblock(MemHead *memh) static void rem_memblock(MemHead *memh)
{ {
mem_lock_thread();
remlink(membase, &memh->next); remlink(membase, &memh->next);
if (memh->prev) { if (memh->prev) {
if (memh->next) if (memh->next)
@@ -934,9 +925,10 @@ static void rem_memblock(MemHead *memh)
else else
MEMNEXT(memh->prev)->nextname = NULL; MEMNEXT(memh->prev)->nextname = NULL;
} }
mem_unlock_thread();
totblock--; atomic_sub_u(&totblock, 1);
mem_in_use -= memh->len; atomic_sub_z(&mem_in_use, memh->len);
#ifdef DEBUG_MEMDUPLINAME #ifdef DEBUG_MEMDUPLINAME
if (memh->need_free_name) if (memh->need_free_name)
@@ -944,7 +936,7 @@ static void rem_memblock(MemHead *memh)
#endif #endif
if (memh->mmap) { if (memh->mmap) {
mmap_in_use -= memh->len; atomic_sub_z(&mmap_in_use, memh->len);
if (munmap(memh, memh->len + sizeof(MemHead) + sizeof(MemTail))) if (munmap(memh, memh->len + sizeof(MemHead) + sizeof(MemTail)))
printf("Couldn't unmap memory %s\n", memh->name); printf("Couldn't unmap memory %s\n", memh->name);
} }

View File

@@ -50,6 +50,7 @@ struct ListBase;
/*this is run once at startup*/ /*this is run once at startup*/
void BLI_threadapi_init(void); void BLI_threadapi_init(void);
void BLI_threadapi_exit(void);
void BLI_init_threads(struct ListBase *threadbase, void *(*do_thread)(void *), int tot); void BLI_init_threads(struct ListBase *threadbase, void *(*do_thread)(void *), int tot);
int BLI_available_threads(struct ListBase *threadbase); int BLI_available_threads(struct ListBase *threadbase);

View File

@@ -107,7 +107,7 @@ static void *thread_tls_data;
* BLI_end_threads(&lb); * BLI_end_threads(&lb);
* *
************************************************ */ ************************************************ */
static pthread_mutex_t _malloc_lock = PTHREAD_MUTEX_INITIALIZER; static SpinLock _malloc_lock;
static pthread_mutex_t _image_lock = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t _image_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t _image_draw_lock = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t _image_draw_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t _viewer_lock = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t _viewer_lock = PTHREAD_MUTEX_INITIALIZER;
@@ -134,17 +134,24 @@ typedef struct ThreadSlot {
static void BLI_lock_malloc_thread(void) static void BLI_lock_malloc_thread(void)
{ {
pthread_mutex_lock(&_malloc_lock); BLI_spin_lock(&_malloc_lock);
} }
static void BLI_unlock_malloc_thread(void) static void BLI_unlock_malloc_thread(void)
{ {
pthread_mutex_unlock(&_malloc_lock); BLI_spin_unlock(&_malloc_lock);
} }
void BLI_threadapi_init(void) void BLI_threadapi_init(void)
{ {
mainid = pthread_self(); mainid = pthread_self();
BLI_spin_init(&_malloc_lock);
}
void BLI_threadapi_exit(void)
{
BLI_spin_end(&_malloc_lock);
} }
/* tot = 0 only initializes malloc mutex in a safe way (see sequence.c) /* tot = 0 only initializes malloc mutex in a safe way (see sequence.c)

View File

@@ -29,6 +29,7 @@ add_definitions(-DWITH_DNA_GHASH)
blender_include_dirs( blender_include_dirs(
../../../../intern/guardedalloc ../../../../intern/guardedalloc
../../../../intern/atomic
../../blenlib ../../blenlib
.. ..
) )

View File

@@ -46,6 +46,7 @@ dna = env.Clone()
makesdna_tool.Append(CCFLAGS = '-DBASE_HEADER="\\"source/blender/makesdna/\\"" ') makesdna_tool.Append(CCFLAGS = '-DBASE_HEADER="\\"source/blender/makesdna/\\"" ')
makesdna_tool.Append (CPPPATH = ['#/intern/guardedalloc', makesdna_tool.Append (CPPPATH = ['#/intern/guardedalloc',
'#/intern/atomic',
'../../makesdna', '../../bmesh']) '../../makesdna', '../../bmesh'])
if env['OURPLATFORM'] == 'linuxcross': if env['OURPLATFORM'] == 'linuxcross':

View File

@@ -36,6 +36,7 @@ incs = [
'.', '.',
'./intern', './intern',
'#/intern/guardedalloc', '#/intern/guardedalloc',
'#/intern/atomic',
'#/intern/memutil', '#/intern/memutil',
'#/extern/glew/include', '#/extern/glew/include',
'#/intern/audaspace/intern', '#/intern/audaspace/intern',

View File

@@ -276,6 +276,7 @@ blender_include_dirs(
../../../../intern/audaspace/intern ../../../../intern/audaspace/intern
../../../../intern/cycles/blender ../../../../intern/cycles/blender
../../../../intern/guardedalloc ../../../../intern/guardedalloc
../../../../intern/atomic
../../../../intern/memutil ../../../../intern/memutil
../../../../intern/smoke/extern ../../../../intern/smoke/extern
) )

View File

@@ -51,6 +51,7 @@
#include "BLI_listbase.h" #include "BLI_listbase.h"
#include "BLI_path_util.h" #include "BLI_path_util.h"
#include "BLI_string.h" #include "BLI_string.h"
#include "BLI_threads.h"
#include "BLI_utildefines.h" #include "BLI_utildefines.h"
#include "BKE_blender.h" #include "BKE_blender.h"
@@ -510,6 +511,8 @@ void WM_exit_ext(bContext *C, const short do_python)
GHOST_DisposeSystemPaths(); GHOST_DisposeSystemPaths();
BLI_threadapi_exit();
if (MEM_get_memory_blocks_in_use() != 0) { if (MEM_get_memory_blocks_in_use() != 0) {
printf("Error: Not freed memory blocks: %d\n", MEM_get_memory_blocks_in_use()); printf("Error: Not freed memory blocks: %d\n", MEM_get_memory_blocks_in_use());
MEM_printmemlist(); MEM_printmemlist();

View File

@@ -58,6 +58,7 @@ extern "C"
#endif // __cplusplus #endif // __cplusplus
#include "MEM_guardedalloc.h" #include "MEM_guardedalloc.h"
#include "BKE_blender.h" #include "BKE_blender.h"
#include "BKE_depsgraph.h"
#include "BKE_global.h" #include "BKE_global.h"
#include "BKE_icons.h" #include "BKE_icons.h"
#include "BKE_image.h" #include "BKE_image.h"