summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWill Newton <will.newton@linaro.org>2013-07-11 12:37:17 +0100
committerWill Newton <will.newton@linaro.org>2013-07-11 12:37:17 +0100
commit18e703c46e36dbeaeed1ac12fad345dbb1bb402c (patch)
tree971f9032633f449d91380f231f1ec4bfb4fcf5ef
parent2fa8fd4d7bd6a6b976fadccc18bd1b65e704ba16 (diff)
Add ssmalloc reference code.
-rw-r--r--Makefile.am43
-rw-r--r--configure.ac20
-rw-r--r--reference/ssmalloc/include-x86_64/atomic.h99
-rw-r--r--reference/ssmalloc/include-x86_64/bitops.h64
-rw-r--r--reference/ssmalloc/include-x86_64/cpu.h30
-rw-r--r--reference/ssmalloc/include-x86_64/double-list.h64
-rw-r--r--reference/ssmalloc/include-x86_64/queue.h192
-rw-r--r--reference/ssmalloc/ssmalloc.c785
-rw-r--r--reference/ssmalloc/ssmalloc.h177
9 files changed, 1461 insertions, 13 deletions
diff --git a/Makefile.am b/Makefile.am
index d7a813c..4137d9e 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -63,19 +63,6 @@ tests_tst_valloc_CFLAGS = $(tests_cflags)
TESTS = $(check_PROGRAMS)
-## Benchmarks
-noinst_PROGRAMS = \
- try-none \
- try-this \
- try-glibc \
- try-jemalloc
-
-# Benchmark harness
-noinst_LIBRARIES = \
- libmallocbench.a \
- libglibc.a \
- libjemalloc.a
-
libmallocbench_a_SOURCES = \
benchmarks/mallocbench.c
@@ -126,6 +113,36 @@ try_glibc_LDADD = libmallocbench.a libglibc.a -lrt -lpthread
try_jemalloc_SOURCES =
try_jemalloc_LDADD = libmallocbench.a libjemalloc.a -lrt -lpthread
+if HOST_X86_64
+libssmalloc_a_SOURCES = \
+ reference/ssmalloc/ssmalloc.c
+
+libssmalloc_a_CFLAGS = \
+ -O3 -Wstrict-prototypes -fomit-frame-pointer -g -Wall \
+ -Ireference/ssmalloc/include-x86_64
+
+try_ssmalloc_SOURCES =
+try_ssmalloc_LDADD = libmallocbench.a libssmalloc.a -lrt -lpthread
+
+x86_64_programs = try-ssmalloc
+x86_64_libraries = libssmalloc.a
+endif
+
+## Benchmarks
+noinst_PROGRAMS = \
+ try-none \
+ try-this \
+ try-glibc \
+ try-jemalloc \
+ $(x86_64_programs)
+
+# Benchmark harness
+noinst_LIBRARIES = \
+ libmallocbench.a \
+ libglibc.a \
+ libjemalloc.a \
+ $(x86_64_libraries)
+
# Main library
libcortex_malloc_la_SOURCES = \
src/malloc.c
diff --git a/configure.ac b/configure.ac
index 1a4b75a..381cbcb 100644
--- a/configure.ac
+++ b/configure.ac
@@ -32,4 +32,24 @@ AM_PROG_AS
AC_PROG_CC
AC_PROG_LIBTOOL
+case $host in
+aarch64*-*-*)
+ arch=aarch64
+ ;;
+arm*-*-*)
+ arch=aarch32
+ ;;
+x86_64-*-*-*)
+ arch=x86_64
+ ;;
+*)
+ arch=generic
+ ;;
+esac
+
+AM_CONDITIONAL([HOST_AARCH32], [test x$arch = xaarch32])
+AM_CONDITIONAL([HOST_AARCH64], [test x$arch = xaarch64])
+AM_CONDITIONAL([HOST_X86_64], [test x$arch = xx86_64])
+AM_CONDITIONAL([HOST_GENERIC], [test x$arch = xgeneric])
+
AC_OUTPUT
diff --git a/reference/ssmalloc/include-x86_64/atomic.h b/reference/ssmalloc/include-x86_64/atomic.h
new file mode 100644
index 0000000..2431300
--- /dev/null
+++ b/reference/ssmalloc/include-x86_64/atomic.h
@@ -0,0 +1,99 @@
+#ifndef __SYNCHRO_ATOMIC_H__
+#define __SYNCHRO_ATOMIC_H__
+
+#define mb() asm volatile ("sync" : : : "memory")
+#define LOCK_PREFIX "lock ; "
+
+static inline unsigned long fetch_and_store(volatile unsigned int *address, unsigned int value)
+{
+ asm volatile("xchgl %k0,%1"
+ : "=r" (value)
+ : "m" (*address), "0" (value)
+ : "memory");
+
+ return value;
+}
+
+static inline int atmc_fetch_and_add(volatile unsigned int *address, int value)
+{
+ int prev = value;
+
+ asm volatile(
+ LOCK_PREFIX "xaddl %0, %1"
+ : "+r" (value), "+m" (*address)
+ : : "memory");
+
+ return prev + value;
+}
+
+static inline long long atmc_fetch_and_add64(volatile unsigned long long *address, long long value)
+{
+ int prev = value;
+
+ asm volatile(
+ LOCK_PREFIX "xaddq %0, %1"
+ : "+r" (value), "+m" (*address)
+ : : "memory");
+
+ return prev + value;
+}
+
+static inline void atmc_add32(volatile unsigned int* address, int value)
+{
+ asm volatile(
+ LOCK_PREFIX "addl %1,%0"
+ : "=m" (*address)
+ : "ir" (value), "m" (*address));
+}
+
+static inline void atmc_add64(volatile unsigned long long* address, unsigned long long value)
+{
+ asm volatile(
+ LOCK_PREFIX "addq %1,%0"
+ : "=m" (*address)
+ : "ir" (value), "m" (*address));
+}
+
+static inline unsigned int compare_and_swap32(volatile unsigned int *address, unsigned int old_value, unsigned int new_value)
+{
+ unsigned long prev = 0;
+
+ asm volatile(LOCK_PREFIX "cmpxchgl %k1,%2"
+ : "=a"(prev)
+ : "r"(new_value), "m"(*address), "0"(old_value)
+ : "memory");
+
+ return prev == old_value;
+}
+
+static inline unsigned int compare_and_swap64(volatile unsigned long long *address, unsigned long old_value, unsigned long new_value)
+{
+ unsigned long prev = 0;
+
+ asm volatile(LOCK_PREFIX "cmpxchgq %1,%2"
+ : "=a"(prev)
+ : "r"(new_value), "m"(*address), "0"(old_value)
+ : "memory");
+
+ return prev == old_value;
+}
+
+static inline unsigned long compare_and_swap64_out(volatile unsigned long long *address, unsigned long old_value, unsigned long new_value)
+{
+ unsigned long prev = 0;
+
+ asm volatile(LOCK_PREFIX "cmpxchgq %1,%2"
+ : "=a"(prev)
+ : "r"(new_value), "m"(*address), "0"(old_value)
+ : "memory");
+
+ return prev;
+}
+
+static inline unsigned long compare_and_swap_ptr(volatile void *address, void* old_ptr, void* new_ptr)
+{
+ return compare_and_swap64((volatile unsigned long long *)address, (unsigned long)old_ptr, (unsigned long)new_ptr);
+}
+
+#endif
+
diff --git a/reference/ssmalloc/include-x86_64/bitops.h b/reference/ssmalloc/include-x86_64/bitops.h
new file mode 100644
index 0000000..879f237
--- /dev/null
+++ b/reference/ssmalloc/include-x86_64/bitops.h
@@ -0,0 +1,64 @@
+#ifndef __X86_64_BITOPS_H_
+#define __X86_64_BITOPS_H_
+
+/*
+ * Copyright 1992, Linus Torvalds.
+ */
+
+#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
+/* Technically wrong, but this avoids compilation errors on some gcc
+ versions. */
+#define ADDR "=m" (*(volatile long *) addr)
+#else
+#define ADDR "+m" (*(volatile long *) addr)
+#endif
+
+/**
+ * __change_bit - Toggle a bit in memory
+ * @nr: the bit to change
+ * @addr: the address to start counting from
+ *
+ * Unlike change_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static __inline__ void __change_bit(int nr, volatile void * addr)
+{
+ __asm__ __volatile__(
+ "btcl %1,%0"
+ :ADDR
+ :"dIr" (nr));
+}
+
+/* WARNING: non atomic and it can be reordered! */
+static __inline__ int __test_and_change_bit(int nr, volatile void * addr)
+{
+ int oldbit;
+
+ __asm__ __volatile__(
+ "btcl %2,%1\n\tsbbl %0,%0"
+ :"=r" (oldbit),ADDR
+ :"dIr" (nr) : "memory");
+ return oldbit;
+}
+
+static inline unsigned long __fls(unsigned long word)
+{
+ asm("bsr %1,%0"
+ : "=r" (word)
+ : "rm" (word));
+ return word;
+}
+
+static __inline__ unsigned int __get_size_class(unsigned int word) {
+ asm("dec %1\n"
+ "shr $2,%1\n"
+ "bsr %1,%0\n"
+ "cmovz %2,%0\n"
+ : "=r" (word)
+ : "rm" (word), "r" (0));
+ return word;
+}
+
+#endif /* _X86_64_BITOPS_H */
+
diff --git a/reference/ssmalloc/include-x86_64/cpu.h b/reference/ssmalloc/include-x86_64/cpu.h
new file mode 100644
index 0000000..1efce67
--- /dev/null
+++ b/reference/ssmalloc/include-x86_64/cpu.h
@@ -0,0 +1,30 @@
+#ifndef __CPU_H_
+#define __CPU_H_
+
+/* Machine related macros*/
+#define PAGE_SIZE (4096)
+#define SUPER_PAGE_SIZE (4*1024*1024)
+#define CACHE_LINE_SIZE (64)
+#define DEFAULT_BLOCK_CLASS (100)
+#define MAX_CORE_ID (8)
+
+static inline int get_core_id(void) {
+ return 0;
+ int result;
+ __asm__ __volatile__ (
+ "mov $1, %%eax\n"
+ "cpuid\n"
+ :"=b"(result)
+ :
+ :"eax","ecx","edx");
+ return (result>>24)%8;
+}
+
+static inline unsigned long read_tsc(void)
+{
+ unsigned a, d;
+ __asm __volatile("rdtsc":"=a"(a), "=d"(d));
+ return ((unsigned long)a) | (((unsigned long) d) << 32);
+}
+
+#endif
diff --git a/reference/ssmalloc/include-x86_64/double-list.h b/reference/ssmalloc/include-x86_64/double-list.h
new file mode 100644
index 0000000..9edd49c
--- /dev/null
+++ b/reference/ssmalloc/include-x86_64/double-list.h
@@ -0,0 +1,64 @@
+#ifndef __DOUBLE_LIST_H_
+#define __DOUBLE_LIST_H_
+
+typedef struct double_list_elem double_list_elem_t;
+typedef struct double_list double_list_t;
+
+struct double_list_elem {
+ void* __padding;
+ struct double_list_elem* next;
+ struct double_list_elem* prev;
+};
+
+struct double_list {
+ struct double_list_elem* head;
+ struct double_list_elem* tail;
+};
+
+
+/* Places new_node at the front of the list. */
+static void double_list_insert_front(void* new_node, double_list_t* list)
+{
+ double_list_elem_t* elem_new = (double_list_elem_t*)new_node;
+ double_list_elem_t* old_head = list->head;
+
+ if (old_head == NULL) {
+ list->tail = elem_new;
+ }
+ else {
+ old_head->prev = elem_new;
+ }
+
+ elem_new->next = old_head;
+ elem_new->prev = NULL;
+ list->head = elem_new;
+}
+
+/* Removes node from the list. */
+static void double_list_remove(void* node, double_list_t* list)
+{
+ double_list_elem_t* elem_node = (double_list_elem_t*)node;
+
+ if (elem_node->prev != NULL) {
+ elem_node->prev->next = elem_node->next;
+ }
+ else {
+ list->head = elem_node->next;
+ }
+
+ if (elem_node->next != NULL) {
+ elem_node->next->prev = elem_node->prev;
+ }
+ else {
+ list->tail = elem_node->prev;
+ }
+
+ if (list->head != NULL && list->head->next == NULL) {
+ list->tail = list->head;
+ }
+ else if (list->tail != NULL && list->tail->prev == NULL) {
+ list->head = list->tail;
+ }
+}
+
+#endif
diff --git a/reference/ssmalloc/include-x86_64/queue.h b/reference/ssmalloc/include-x86_64/queue.h
new file mode 100644
index 0000000..666a47c
--- /dev/null
+++ b/reference/ssmalloc/include-x86_64/queue.h
@@ -0,0 +1,192 @@
+#ifndef __QUEUE_H_
+#define __QUEUE_H_
+
+#include "atomic.h"
+#include <stdlib.h>
+
+#define CACHE_LINE_SIZE (64)
+#define CACHE_ALIGN __attribute__ ((aligned (CACHE_LINE_SIZE)))
+
+#define ABA_ADDR_BIT (48)
+#define ABA_ADDR_MASK ((1L<<ABA_ADDR_BIT)-1)
+#define ABA_COUNT_MASK (~ABA_ADDR_MASK)
+#define ABA_COUNT_ONE (1L<<ABA_ADDR_BIT)
+#define ABA_ADDR(e) ((void*)((unsigned long long)(e) & ABA_ADDR_MASK))
+#define ABA_COUNT(e) ((unsigned long long)(e) & ABA_COUNT_MASK)
+
+typedef struct {
+ CACHE_ALIGN volatile unsigned long long head;
+} queue_head_t;
+
+typedef void *seq_queue_head_t;
+
+/* Multi-Consumer LIFO Queue */
+
+static inline void mc_queue_init(queue_head_t *queue)
+{
+ queue->head = 0;
+}
+
+static inline void mc_enqueue(queue_head_t *queue, void *element)
+{
+ unsigned long long old_head;
+ unsigned long long new_head;
+
+ while(1) {
+ old_head = queue->head;
+ *(unsigned long long*)element = (unsigned long long )ABA_ADDR(old_head);
+ new_head = (unsigned long long)element;
+ new_head |= ABA_COUNT(old_head) + ABA_COUNT_ONE;
+ if (compare_and_swap64(&queue->head, old_head, new_head)) {
+ return;
+ }
+ }
+}
+
+static inline void *mc_dequeue(queue_head_t *queue)
+{
+ unsigned long long old_head;
+ unsigned long long new_head;
+ void* old_addr;
+
+ while(1) {
+ old_head = queue->head;
+ old_addr = ABA_ADDR(old_head);
+ if(old_addr == NULL) {
+ return NULL;
+ }
+ new_head = *(unsigned long long*)old_addr;
+ new_head |= ABA_COUNT(old_head) + ABA_COUNT_ONE;
+ if (compare_and_swap64(&queue->head, old_head, new_head)) {
+ return old_addr;
+ }
+ }
+}
+
+/* Single-Consumer LIFO Queue */
+
+static inline void sc_queue_init(queue_head_t *queue)
+{
+ queue->head = 0;
+}
+
+static inline void sc_enqueue(queue_head_t *queue, void *element)
+{
+ unsigned long long old_head;
+ unsigned long long new_head;
+
+ while(1) {
+ old_head = queue->head;
+ *(unsigned long long*)element = old_head;
+ new_head = (unsigned long long)element;
+ if (compare_and_swap64(&queue->head, old_head, new_head)) {
+ return;
+ }
+ }
+}
+
+static inline void *sc_dequeue(queue_head_t *queue)
+{
+ unsigned long long old_head;
+ unsigned long long new_head;
+
+ while(1) {
+ old_head = queue->head;
+ if(old_head == 0) {
+ return NULL;
+ }
+ new_head = *(unsigned long long*)old_head;
+ if (compare_and_swap64(&queue->head, old_head, new_head)) {
+ return (void*)old_head;
+ }
+ }
+}
+
+static inline void *sc_chain_dequeue(queue_head_t *queue)
+{
+ unsigned long long old_head;
+ while(1) {
+ old_head = queue->head;
+ if(old_head == 0) {
+ return NULL;
+ }
+ if (compare_and_swap64(&queue->head, old_head, 0)) {
+ return (void*)old_head;
+ }
+ }
+}
+
+/* Sequential LIFO Queue */
+
+static inline void seq_queue_init(seq_queue_head_t *queue)
+{
+ *queue = NULL;
+}
+
+static inline void seq_enqueue(seq_queue_head_t *queue, void *element)
+{
+ *(void**)element = *queue;
+ *queue = element;
+}
+
+static inline void *seq_dequeue(seq_queue_head_t *queue)
+{
+ void* old_head = *queue;
+ if(old_head == NULL) {
+ return NULL;
+ }
+ *queue = *(void**)old_head;
+ return old_head;
+}
+
+#define seq_head(queue) (queue)
+
+/* Counted Queue */
+static inline void* counted_enqueue(queue_head_t *queue, void* elem) {
+ unsigned long long old_head, new_head, prev;
+ do {
+ old_head = queue->head;
+ *(unsigned long long*)elem = (unsigned long long)ABA_ADDR(old_head);
+ new_head = (unsigned long long)elem;
+ new_head |= ABA_COUNT(old_head) + ABA_COUNT_ONE;
+
+ } while((prev=compare_and_swap64_out (
+ &queue->head,
+ old_head,
+ new_head
+ ))!=old_head);
+
+ return (void*)prev;
+}
+
+static inline void* counted_chain_enqueue(queue_head_t *queue, void* elems, void* tail, int cnt) {
+ unsigned long long old_head, new_head, prev;
+ do {
+ old_head = queue->head;
+ *(unsigned long long*)tail = (unsigned long long)ABA_ADDR(old_head);
+ new_head = (unsigned long long)elems;
+ new_head |= ABA_COUNT(old_head) + ABA_COUNT_ONE * cnt;
+
+ } while((prev=compare_and_swap64_out (
+ &queue->head,
+ old_head,
+ new_head
+ ))!=old_head);
+
+ return (void*)prev;
+}
+
+static inline void* counted_chain_dequeue(queue_head_t* queue, uint32_t *count) {
+ unsigned long long old_head;
+ while(1) {
+ old_head = *(unsigned long long*)queue;
+ if (old_head == 0)
+ return(NULL);
+ if (compare_and_swap64(&queue->head, old_head, 0)) {
+ *count = ABA_COUNT(old_head) >> ABA_ADDR_BIT;
+ return(ABA_ADDR(old_head));
+ }
+ }
+}
+
+#endif
diff --git a/reference/ssmalloc/ssmalloc.c b/reference/ssmalloc/ssmalloc.c
new file mode 100644
index 0000000..df79a89
--- /dev/null
+++ b/reference/ssmalloc/ssmalloc.c
@@ -0,0 +1,785 @@
+#define _GNU_SOURCE
+#include "ssmalloc.h"
+
+/* Global metadata */
+init_state global_state = UNINITIALIZED;
+gpool_t global_pool;
+pthread_key_t destructor;
+pthread_once_t init_once = PTHREAD_ONCE_INIT;
+
+/* Mappings */
+CACHE_ALIGN int cls2size[128];
+char sizemap[256];
+char sizemap2[128];
+
+/* Private metadata */
+THREAD_LOCAL init_state thread_state = UNINITIALIZED;
+THREAD_LOCAL lheap_t *local_heap = NULL;
+
+/* System init functions */
+static void maps_init(void);
+static void thread_init(void);
+static void thread_exit(void *dummy);
+static void global_init(void);
+inline static void check_init(void);
+
+/* Global pool management functions */
+inline static void gpool_check_size(void *target);
+static int gpool_grow(void);
+static void gpool_init(void);
+static void *gpool_make_raw_chunk(void);
+inline static chunk_t *gpool_acquire_chunk(void);
+inline static void gpool_release_chunk(dchunk_t *dc);
+static lheap_t *gpool_acquire_lheap(void);
+static void gpool_release_lheap(lheap_t *lh);
+
+/* Local heap management functions */
+inline static void lheap_init(lheap_t *lh);
+inline static void lheap_replace_foreground(lheap_t *lh, int size_cls);
+
+/* Data chunk management functions */
+inline static void dchunk_change_cls(dchunk_t *dc, int size_cls);
+inline static void dchunk_init(dchunk_t *dc, int size_cls);
+inline static void dchunk_collect_garbage(dchunk_t *dc);
+inline static void *dchunk_alloc_obj(dchunk_t *dc);
+inline static dchunk_t* dchunk_extract(void *ptr);
+
+/* Object buffer management functions */
+inline static void obj_buf_flush(obj_buf_t *obuf);
+inline static void obj_buf_flush_all(lheap_t *lh);
+inline static void obj_buf_put(obj_buf_t *bbuf, dchunk_t * dc, void *ptr);
+
+/* Allocator helpers */
+inline static void *large_malloc(size_t size);
+inline static void *small_malloc(int size_cls);
+inline static void large_free(void *ptr);
+inline static void local_free(lheap_t *lh, dchunk_t *dc, void *ptr);
+inline static void remote_free(lheap_t *lh, dchunk_t *dc, void *ptr);
+static void *large_memalign(size_t boundary, size_t size);
+
+/* Misc functions */
+static void* page_alloc(void *pos, size_t size);
+static void page_free(void *pos, size_t size);
+static void touch_memory_range(void *start, size_t len);
+inline static int size2cls(size_t size);
+
+#ifdef DEBUG
+static void handler(int sig);
+#endif
+
+/* Interface */
+void *malloc(size_t size);
+void free(void* ptr);
+void *realloc(void *ptr, size_t size);
+void *calloc(size_t nmemb, size_t size);
+void *memalign(size_t boundary, size_t size);
+int posix_memalign(void **memptr, size_t alignment, size_t size);
+void *valloc(size_t size);
+void *pvalloc(size_t size);
+
+#ifdef RETURN_MEMORY
+pthread_t gpool_gc_thread;
+static void* gpool_gc(void* arg)
+{
+ pthread_detach(pthread_self());
+ char *ptr = NULL;
+
+ /* sleeptime = 100 ms */
+ struct timespec sleeptime = {0, 10000000};
+
+ while(1) {
+ nanosleep(&sleeptime, NULL);
+ ptr = (char*) queue_fetch(&global_pool.free_dc_head[get_core_id()]);
+ if(ptr) {
+ void *ptr_end = PAGE_ROUNDDOWN(ptr + CHUNK_SIZE);
+ void *ptr_start = PAGE_ROUNDUP(ptr);
+ madvise(ptr_start, (uintptr_t)ptr_end - (uintptr_t)ptr_start, MADV_DONTNEED);
+ queue_put(&global_pool.released_dc_head[get_core_id()], ptr);
+ }
+ }
+}
+#endif
+
+static void maps_init()
+{
+ int size;
+ int class;
+
+ /* 8 +4 64 */
+ for (size = 8, class = 0; size <= 64; size += 4, class++) {
+ cls2size[class] = size;
+ }
+
+ /* 80 +16 128 */
+ for (size = 64 + 16; size <= 128; size += 16, class++) {
+ cls2size[class] = size;
+ }
+
+ /* 160 +32 256 */
+ for (size = 128 + 32; size <= 256; size += 32, class++) {
+ cls2size[class] = size;
+ }
+
+ for (size = 256; size < 65536; size <<= 1) {
+ cls2size[class++] = size + (size >> 1);
+ cls2size[class++] = size << 1;
+ }
+
+ int cur_class = 0;
+ int cur_size = 0;
+
+ /* init sizemap */
+ for (cur_size = 4; cur_size <= 1024; cur_size += 4) {
+ if (cur_size > cls2size[cur_class])
+ cur_class++;
+ sizemap[(cur_size - 1) >> 2] = cur_class;
+ }
+
+ /* init sizemap2 */
+ for (cur_size = 1024; cur_size <= 65536; cur_size += 512) {
+ if (cur_size > cls2size[cur_class])
+ cur_class++;
+ sizemap2[(cur_size - 1) >> 9] = cur_class;
+ }
+}
+
+static void thread_init()
+{
+ /* Register the destructor */
+ pthread_setspecific(destructor, ACTIVE);
+
+ /* Initialize thread pool */
+ local_heap = gpool_acquire_lheap();
+ thread_state = READY;
+}
+
+static void thread_exit(void *dummy)
+{
+ gpool_release_lheap(local_heap);
+}
+
+static void global_init()
+{
+#ifdef DEBUG
+ /* Register the signal handler for backtrace*/
+ signal(SIGSEGV, handler);
+#endif
+
+ pthread_key_create(&destructor, thread_exit);
+
+ /* Initialize global data */
+ gpool_init();
+ maps_init();
+ global_state = READY;
+
+#ifdef RETURN_MEMORY
+ /* Create the gc thread */
+ pthread_create(&gpool_gc_thread, NULL, gpool_gc, NULL);
+#endif
+}
+
+inline static void check_init()
+{
+ if (unlikely(thread_state != READY)) {
+ if (global_state != READY) {
+ pthread_once(&init_once, global_init);
+ }
+ thread_init();
+ }
+}
+
+inline static void gpool_check_size(void *target)
+{
+ if (global_pool.pool_end <= (char *)target) {
+ /* Global Pool Full */
+ pthread_mutex_lock(&global_pool.lock);
+ while (global_pool.pool_end <= (char *)target) {
+ gpool_grow();
+ }
+ pthread_mutex_unlock(&global_pool.lock);
+ }
+}
+
+static int gpool_grow()
+{
+ /* Enlarge the raw memory pool */
+ static int last_alloc = 8;
+ int alloc_size = ALLOC_UNIT * last_alloc;
+ if (last_alloc < 32) {
+ last_alloc *= 2;
+ }
+
+ void *mem = page_alloc((void *)global_pool.pool_end, alloc_size);
+ if (mem == MAP_FAILED) {
+ exit(-1);
+ return -1;
+ }
+
+ /* Increase the global pool size */
+ global_pool.pool_end += alloc_size;
+ return 0;
+}
+
+/* Initialize the global memory pool */
+static void gpool_init()
+{
+ global_pool.pool_start = RAW_POOL_START;
+ global_pool.pool_end = RAW_POOL_START;
+ global_pool.free_start = RAW_POOL_START;
+ //queue_init(&global_pool.free_dc_head);
+ pthread_mutex_init(&global_pool.lock, NULL);
+ gpool_grow();
+}
+
+inline static chunk_t *gpool_acquire_chunk()
+{
+ void *ptr = NULL;
+
+ /* Try to alloc a freed chunk from the free list */
+ ptr = queue_fetch(&global_pool.free_dc_head[get_core_id()]);
+ if (ptr) {
+ return (chunk_t *) ptr;
+ }
+
+#ifdef RETURN_MEMORY
+ ptr = queue_fetch(&global_pool.released_dc_head[get_core_id()]);
+ if (ptr) {
+ // XXX: Fix me
+ ((chunk_t *) ptr)->numa_node = get_core_id();
+ touch_memory_range(ptr, CHUNK_SIZE);
+ return (chunk_t *) ptr;
+ }
+#endif
+
+ /* Or just alloc a new chunk */
+ ptr = gpool_make_raw_chunk();
+ gpool_check_size(ptr);
+ ptr -= CHUNK_SIZE;
+ ((chunk_t *) ptr)->numa_node = get_core_id();
+ touch_memory_range(ptr, CHUNK_SIZE);
+ return (chunk_t *) ptr;
+}
+
+static void *gpool_make_raw_chunk()
+{
+ /* Atomic increse the global pool size */
+ void *ret = (void *)(atmc_fetch_and_add64((unsigned long long *)
+ &global_pool.free_start,
+ CHUNK_SIZE));
+ return ret;
+}
+
+inline static void gpool_release_chunk(dchunk_t *dc)
+{
+ queue_put(&global_pool.free_dc_head[dc->numa_node], dc);
+}
+
+static lheap_t *gpool_acquire_lheap()
+{
+ lheap_t *lh;
+
+ /* Try to alloc a freed thread pool from the list */
+ lh = queue_fetch(&global_pool.free_lh_head[get_core_id()]);
+
+ /* Alloc a new one */
+ if (!lh) {
+ lh = (lheap_t *) gpool_acquire_chunk();
+ lheap_init(lh);
+ }
+ return lh;
+}
+
+static void gpool_release_lheap(lheap_t *lh)
+{
+ queue_put(&global_pool.free_lh_head[local_heap->numa_node], lh);
+}
+
+inline static void lheap_init(lheap_t * lh)
+{
+ memset(&lh->free_head, 0, sizeof(lheap_t));
+
+ int size_cls;
+ lh->dummy_chunk.size_cls = DUMMY_CLASS;
+ lh->dummy_chunk.free_blk_cnt = 1;
+
+ for (size_cls = 0; size_cls < DEFAULT_BLOCK_CLASS; size_cls++) {
+ /* Install the dummy chunk */
+ lh->foreground[size_cls] = &lh->dummy_chunk;
+ }
+}
+
+inline static void lheap_replace_foreground
+ (lheap_t * lh, int size_cls) {
+ dchunk_t *dc;
+
+ /* Try to acquire the block from background list */
+ dc = (dchunk_t *) lh->background[size_cls].head;
+ if (dc != NULL) {
+ double_list_remove(dc, &lh->background[size_cls]);
+ goto finish;
+ }
+
+ /* Try to acquire a block in the remote freed list */
+ dc = fast_queue_fetch(&lh->need_gc[size_cls]);
+ if (dc != NULL) {
+ dchunk_collect_garbage(dc);
+ goto finish;
+ }
+
+ /* Try to acquire the chunk from local pool */
+ dc = (dchunk_t *) seq_queue_fetch(&lh->free_head);
+ if (dc != NULL) {
+ lh->free_cnt--;
+ dchunk_change_cls(dc, size_cls);
+ goto finish;
+ }
+
+ /* Acquire the chunk from global pool */
+
+ dc = (dchunk_t *) gpool_acquire_chunk();
+ dc->owner = lh;
+ fast_queue_init((FastQueue *) & (dc->remote_free_head));
+ dchunk_init(dc, size_cls);
+
+ finish:
+ /* Set the foreground chunk */
+ lh->foreground[size_cls] = dc;
+ dc->state = FOREGROUND;
+}
+
+inline static void dchunk_change_cls(dchunk_t * dc, int size_cls)
+{
+ int size = cls2size[size_cls];
+ int data_offset = DCH;
+ dc->blk_cnt = (CHUNK_SIZE - data_offset) / size;
+ dc->free_blk_cnt = dc->blk_cnt;
+ dc->block_size = size;
+ dc->free_mem = (char *)dc + data_offset;
+ dc->size_cls = size_cls;
+ seq_queue_init(&dc->free_head);
+}
+
+inline static void dchunk_init(dchunk_t * dc, int size_cls)
+{
+ dc->active_link.next = NULL;
+ dc->active_link.prev = NULL;
+ dchunk_change_cls(dc, size_cls);
+}
+
+inline static void dchunk_collect_garbage(dchunk_t * dc)
+{
+ seq_head(dc->free_head) =
+ counted_chain_dequeue(&dc->remote_free_head, &dc->free_blk_cnt);
+}
+
+inline static void *dchunk_alloc_obj(dchunk_t * dc)
+{
+ void *ret;
+
+ /* Dirty implementation of dequeue, avoid one branch */
+ ret = seq_head(dc->free_head);
+
+ if (unlikely(!ret)) {
+ ret = dc->free_mem;
+ dc->free_mem += dc->block_size;
+ } else {
+ seq_head(dc->free_head) = *(void**)ret;
+ }
+
+#if 0
+ /* A clearer implementation with one more branch*/
+ ret = seq_lifo_dequeue(&dc->free_head);
+ if (unlikely(!ret)) {
+ ret = dc->free_mem;
+ dc->free_mem += dc->block_size;
+ }
+#endif
+
+ return ret;
+}
+
+inline static dchunk_t *dchunk_extract(void *ptr)
+{
+ return (dchunk_t *) ((uintptr_t)ptr - ((uintptr_t)ptr % CHUNK_SIZE));
+}
+
+inline static void obj_buf_flush(obj_buf_t * bbuf)
+{
+ void *prev;
+
+ dchunk_t *dc = bbuf->dc;
+ lheap_t *lh = dc->owner;
+
+ prev = counted_chain_enqueue(&(dc->remote_free_head),
+ seq_head(bbuf->free_head), bbuf->first, bbuf->count);
+ bbuf->count = 0;
+ bbuf->dc = NULL;
+ bbuf->first = NULL;
+ seq_head(bbuf->free_head) = NULL;
+
+ /* If I am the first thread done remote free in this memory chunk*/
+ if ((unsigned long long)prev == 0L) {
+ fast_queue_put(&(lh->need_gc[dc->size_cls]), dc);
+ }
+ return;
+}
+
+inline static void obj_buf_flush_all(lheap_t *lh) {
+ int i;
+ for (i = 0; i < BLOCK_BUF_CNT; i++) {
+ obj_buf_t *buf = &lh->block_bufs[i];
+ if (buf->count == 0)
+ continue;
+ obj_buf_flush(buf);
+ buf->dc = NULL;
+ }
+}
+
+inline static void obj_buf_put(obj_buf_t *bbuf, dchunk_t * dc, void *ptr) {
+ if (unlikely(bbuf->dc != dc)) {
+ if (bbuf->dc != NULL) {
+ obj_buf_flush(bbuf);
+ }
+ bbuf->dc = dc;
+ bbuf->first = ptr;
+ bbuf->count = 0;
+ seq_head(bbuf->free_head) = NULL;
+ }
+
+ seq_queue_put(&bbuf->free_head, ptr);
+ bbuf->count++;
+}
+
+inline static void *large_malloc(size_t size)
+{
+ size_t alloc_size = PAGE_ROUNDUP(size + CHUNK_SIZE);
+ void *mem = page_alloc(NULL, alloc_size);
+ void *mem_start = (char*)mem + CHUNK_SIZE - CACHE_LINE_SIZE;
+ large_header_t *header = (large_header_t *)dchunk_extract(mem_start);
+
+ /* If space is enough for the header of a large block */
+ intptr_t distance = (intptr_t)mem_start - (intptr_t)header;
+ if (distance >= sizeof(large_header_t)) {
+ header->alloc_size = alloc_size;
+ header->mem = mem;
+ header->owner = LARGE_OWNER;
+ return mem_start;
+ }
+
+ /* If not, Retry Allocation */
+ void *ret = large_malloc(size);
+ page_free(mem, alloc_size);
+ return ret;
+}
+
+inline static void *small_malloc(int size_cls)
+{
+ lheap_t *lh = local_heap;
+ dchunk_t *dc;
+ void *ret;
+ retry:
+ dc = lh->foreground[size_cls];
+ ret = dchunk_alloc_obj(dc);
+
+ /* Check if the datachunk is full */
+ if (unlikely(--dc->free_blk_cnt == 0)) {
+ dc->state = FULL;
+ lheap_replace_foreground(lh, size_cls);
+ if (unlikely(dc->size_cls == DUMMY_CLASS)) {
+ /* A dummy chunk */
+ dc->free_blk_cnt = 1;
+ goto retry;
+ }
+ }
+
+ return ret;
+}
+
+inline static void large_free(void *ptr)
+{
+ large_header_t *header = (large_header_t*)dchunk_extract(ptr);
+ page_free(header->mem, header->alloc_size);
+}
+
+inline static void local_free(lheap_t * lh, dchunk_t * dc, void *ptr)
+{
+ unsigned int free_blk_cnt = ++dc->free_blk_cnt;
+ seq_queue_put(&dc->free_head, ptr);
+
+ switch (dc->state) {
+ case FULL:
+ double_list_insert_front(dc, &lh->background[dc->size_cls]);
+ dc->state = BACKGROUND;
+ break;
+ case BACKGROUND:
+ if (unlikely(free_blk_cnt == dc->blk_cnt)) {
+ int free_cnt = lh->free_cnt;
+ double_list_remove(dc, &lh->background[dc->size_cls]);
+
+ if (free_cnt >= MAX_FREE_CHUNK) {
+ gpool_release_chunk(dc);
+ } else {
+ seq_queue_put(&lh->free_head, dc);
+ lh->free_cnt = free_cnt + 1;
+ }
+ }
+ break;
+ case FOREGROUND:
+ /* Tada.. */
+ break;
+ }
+}
+
+THREAD_LOCAL int buf_cnt;
+inline static void remote_free(lheap_t * lh, dchunk_t * dc, void *ptr)
+{
+ /* Put the object in a local buffer rather than return it to owner */
+ int tag = ((unsigned long long)dc / CHUNK_SIZE) % BLOCK_BUF_CNT;
+ obj_buf_t *bbuf = &lh->block_bufs[tag];
+ obj_buf_put(bbuf, dc, ptr);
+
+ /* Periodically flush buffered remote objects */
+ if ((buf_cnt++ & 0xFFFF) == 0) {
+ obj_buf_flush_all(lh);
+ }
+}
+
+static void touch_memory_range(void *addr, size_t len)
+{
+ char *ptr = (char *)addr;
+ char *end = ptr + len;
+
+ for (; ptr < end; ptr += PAGE_SIZE) {
+ *ptr = 0;
+ }
+}
+
+static void *large_memalign(size_t boundary, size_t size) {
+ /* Alloc a large enough memory block */
+ size_t padding = boundary + CHUNK_SIZE;
+ size_t alloc_size = PAGE_ROUNDUP(size + padding);
+ void *mem = page_alloc(NULL, alloc_size);
+
+ /* Align up the address to boundary */
+ void *mem_start =
+ (void*)((uintptr_t)((char*)mem + padding) & ~(boundary - 1));
+
+ /* Extract space for an header */
+ large_header_t *header =
+ (large_header_t *)dchunk_extract(mem_start);
+
+ /* If space is enough for the header of a large block */
+ intptr_t distance = (intptr_t)mem_start - (intptr_t)header;
+ if (distance >= sizeof(large_header_t)) {
+ header->alloc_size = alloc_size;
+ header->mem = mem;
+ header->owner = LARGE_OWNER;
+ return mem_start;
+ }
+
+ /* If not, retry allocation */
+ void *ret = NULL;
+
+ /* Avoid infinite loop if application call memalign(CHUNK_SIZE,size),
+ * althrough it is actually illegal
+ */
+ if (boundary % CHUNK_SIZE != 0) {
+ ret = large_memalign(boundary, size);
+ }
+ page_free(mem, alloc_size);
+ return ret;
+}
+
+#ifdef DEBUG
+/* Signal handler for debugging use */
+static void handler(int sig)
+{
+ void *array[10];
+ size_t size;
+
+ /* get void*'s for all entries on the stack */
+ size = backtrace(array, 10);
+
+ /* print out all the frames to stderr */
+ fprintf(stderr, "Error: signal %d:\n", sig);
+ backtrace_symbols_fd(array, size, 2);
+ exit(1);
+}
+#endif
+
+static void *page_alloc(void *pos, size_t size)
+{
+ return mmap(pos,
+ size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+}
+
+static void page_free(void *pos, size_t size)
+{
+ munmap(pos, size);
+}
+
+inline static int size2cls(size_t size)
+{
+ int ret;
+ if (likely(size <= 1024)) {
+ ret = sizemap[(size - 1) >> 2];
+ } else if (size <= 65536) {
+ ret = sizemap2[(size - 1) >> 9];
+ } else {
+ ret = LARGE_CLASS;
+ }
+ return ret;
+}
+
+void *malloc(size_t size)
+{
+ void *ret = NULL;
+
+ /* Initialize the allocator */
+ check_init();
+
+ /* Deal with zero-size allocation */
+ size += (size == 0);
+
+#if 0
+ /* The expression above is equivalent to the code below */
+ if (unlikely(size == 0)) {
+ size = 1;
+ }
+#endif
+
+ int size_cls = size2cls(size);
+ if (likely(size_cls < DEFAULT_BLOCK_CLASS)) {
+ ret = small_malloc(size_cls);
+ } else {
+ ret = large_malloc(size);
+ }
+ return ret;
+}
+
+void free(void *ptr)
+{
+ if(ptr == NULL) {
+ return;
+ }
+
+ dchunk_t *dc = dchunk_extract(ptr);
+ lheap_t *lh = local_heap;
+ lheap_t *target_lh = dc->owner;
+
+ if (likely(target_lh == lh)) {
+ local_free(lh, dc, ptr);
+ } else if(likely(target_lh != LARGE_OWNER)){
+ check_init();
+ lh = local_heap;
+ remote_free(lh, dc, ptr);
+ } else {
+ large_free(ptr);
+ }
+}
+
+void *realloc(void* ptr, size_t size)
+{
+ /* Handle special cases */
+ if (ptr == NULL) {
+ void *ret = malloc(size);
+ return ret;
+ }
+
+ if (size == 0) {
+ free(ptr);
+ }
+
+ dchunk_t *dc = dchunk_extract(ptr);
+ if (dc->owner != LARGE_OWNER) {
+ int old_size = cls2size[dc->size_cls];
+
+ /* Not exceed the current size, return */
+ if (size <= old_size) {
+ return ptr;
+ }
+
+ /* Alloc a new block */
+ void *new_ptr = malloc(size);
+ memcpy(new_ptr, ptr, old_size);
+ free(ptr);
+ return new_ptr;
+ } else {
+ large_header_t *header = (large_header_t *)dc;
+ size_t alloc_size = header->alloc_size;
+ void* mem = header->mem;
+ size_t offset = (uintptr_t)ptr - (uintptr_t)mem;
+ size_t old_size = alloc_size - offset;
+
+ /* Not exceed the current size, return */
+ if(size <= old_size) {
+ return ptr;
+ }
+
+ /* Try to do mremap */
+ int new_size = PAGE_ROUNDUP(size + CHUNK_SIZE);
+ mem = mremap(mem, alloc_size, new_size, MREMAP_MAYMOVE);
+ void* mem_start = (void*)((uintptr_t)mem + offset);
+ header = (large_header_t*)dchunk_extract(mem_start);
+
+ intptr_t distance = (intptr_t)mem_start - (intptr_t)header;
+ if (distance >= sizeof(large_header_t)) {
+ header->alloc_size = new_size;
+ header->mem = mem;
+ header->owner = LARGE_OWNER;
+ return mem_start;
+ }
+
+ void* new_ptr = large_malloc(size);
+ memcpy(new_ptr, mem_start, old_size);
+ free(mem);
+ return new_ptr;
+ }
+}
+
+void *calloc(size_t nmemb, size_t size)
+{
+ void *ptr;
+ ptr = malloc(nmemb * size);
+ if (!ptr) {
+ return NULL;
+ }
+ return memset(ptr, 0, nmemb * size);
+}
+
+void *memalign(size_t boundary, size_t size) {
+ /* Deal with zero-size allocation */
+ size += (size == 0);
+ if(boundary <= 256 && size <= 65536) {
+ /* In this case, we handle it as small allocations */
+ int boundary_cls = size2cls(boundary);
+ int size_cls = size2cls(size);
+ int alloc_cls = max(boundary_cls, size_cls);
+ return small_malloc(alloc_cls);
+ } else {
+ /* Handle it as a special large allocation */
+ return large_memalign(boundary, size);
+ }
+}
+
+int posix_memalign(void **memptr, size_t alignment, size_t size)
+{
+ *memptr = memalign(alignment, size);
+ if (*memptr) {
+ return 0;
+ } else {
+ /* We have to "personalize" the return value according to the error */
+ return -1;
+ }
+}
+
+void *valloc(size_t size)
+{
+ return memalign(PAGE_SIZE, size);
+}
+
+void *pvalloc(size_t size)
+{
+ fprintf(stderr, "pvalloc() called. Not implemented! Exiting.\n");
+ exit(1);
+}
diff --git a/reference/ssmalloc/ssmalloc.h b/reference/ssmalloc/ssmalloc.h
new file mode 100644
index 0000000..fb10597
--- /dev/null
+++ b/reference/ssmalloc/ssmalloc.h
@@ -0,0 +1,177 @@
+#include <stdint.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <sched.h>
+#include <time.h>
+
+#include <assert.h>
+#include <execinfo.h>
+#include <signal.h>
+
+#include "atomic.h"
+#include "bitops.h"
+#include "queue.h"
+#include "double-list.h"
+#include "cpu.h"
+
+/* Configurations */
+#define CHUNK_DATA_SIZE (16*PAGE_SIZE)
+#define ALLOC_UNIT (4*1024*1024)
+#define MAX_FREE_SIZE (4*1024*1024)
+#define RAW_POOL_START ((void*)((0x600000000000/CHUNK_SIZE+1)*CHUNK_SIZE))
+#define BLOCK_BUF_CNT (16)
+
+// #define RETURN_MEMORY
+// #define DEBUG
+
+/* Other */
+#define CHUNK_SIZE (CHUNK_DATA_SIZE+sizeof(dchunk_t))
+#define CHUNK_MASK (~(CHUNK_SIZE-1))
+#define LARGE_CLASS (100)
+#define DUMMY_CLASS (101)
+#define DCH (sizeof(dchunk_t))
+#define MAX_FREE_CHUNK (MAX_FREE_SIZE/CHUNK_SIZE)
+#define LARGE_OWNER ((void*)0xDEAD)
+#define ACTIVE ((void*)1)
+
+/* Utility Macros */
+#define ROUNDUP(x,n) ((x+n-1)&(~(n-1)))
+#define ROUNDDOWN(x,n) (((x-n)&(~(n-1)))+1)
+#define PAGE_ROUNDUP(x) (ROUNDUP((uintptr_t)x,PAGE_SIZE))
+#define PAGE_ROUNDDOWN(x) (ROUNDDOWN((uintptr_t)x,PAGE_SIZE))
+#define CACHE_ALIGN __attribute__ ((aligned (CACHE_LINE_SIZE)))
+#define THREAD_LOCAL __attribute__ ((tls_model ("initial-exec"))) __thread
+#define likely(x) __builtin_expect(!!(x),1)
+#define unlikely(x) __builtin_expect(!!(x),0)
+
+/* Multi consumer queue */
+#define queue_init(head)\
+ mc_queue_init(head)
+#define queue_put(head,elem)\
+ mc_enqueue(head,elem)
+#define queue_fetch(head)\
+ mc_dequeue(head)
+typedef queue_head_t Queue;
+
+/* Single consumer queue */
+#define fast_queue_init(head)\
+ sc_queue_init(head)
+#define fast_queue_put(head,elem)\
+ sc_enqueue(head,elem)
+#define fast_queue_fetch(head)\
+ sc_dequeue(head)
+#define fast_queue_chain_fetch(head)\
+ sc_chain_dequeue(head)
+typedef queue_head_t FastQueue;
+
+/* Sequencial queue */
+#define seq_queue_init(head)\
+ seq_queue_init(head)
+#define seq_queue_put(head,elem)\
+ seq_enqueue(head,elem)
+#define seq_queue_fetch(head)\
+ seq_dequeue(head)
+#define fast_queue_chain_put(head)\
+ seq_chain_enqueue(head)
+typedef seq_queue_head_t SeqQueue;
+
+/* Type definations */
+typedef enum {
+ UNINITIALIZED,
+ READY
+} init_state;
+
+typedef enum {
+ FOREGROUND,
+ BACKGROUND,
+ FULL
+} dchunk_state;
+
+typedef struct lheap_s lheap_t;
+typedef struct gpool_s gpool_t;
+typedef struct dchunk_s dchunk_t;
+typedef struct chunk_s chunk_t;
+typedef struct obj_buf_s obj_buf_t;
+typedef struct large_header_s large_header_t;
+
+typedef double_list_t LinkedList;
+typedef double_list_elem_t LinkedListElem;
+
+struct large_header_s {
+ CACHE_ALIGN size_t alloc_size;
+ void* mem;
+ CACHE_ALIGN lheap_t *owner;
+};
+
+struct chunk_s {
+ CACHE_ALIGN LinkedListElem active_link;
+ uint32_t numa_node;
+};
+
+/* Data chunk header */
+struct dchunk_s {
+ /* Local Area */
+ CACHE_ALIGN LinkedListElem active_link;
+ uint32_t numa_node;
+
+ /* Read Area */
+ CACHE_ALIGN lheap_t * owner;
+ uint32_t size_cls;
+
+ /* Local Write Area */
+ CACHE_ALIGN dchunk_state state;
+ uint32_t free_blk_cnt;
+ uint32_t blk_cnt;
+ SeqQueue free_head;
+ uint32_t block_size;
+ char *free_mem;
+
+ /* Remote Write Area */
+ CACHE_ALIGN FastQueue remote_free_head;
+};
+
+struct gpool_s {
+ pthread_mutex_t lock;
+ volatile char *pool_start;
+ volatile char *pool_end;
+ volatile char *free_start;
+ Queue free_dc_head[MAX_CORE_ID];
+ Queue free_lh_head[MAX_CORE_ID];
+ Queue released_dc_head[MAX_CORE_ID];
+};
+
+struct obj_buf_s {
+ void *dc;
+ void *first;
+ SeqQueue free_head;
+ int count;
+};
+
+/* Per-thread data chunk pool */
+struct lheap_s {
+ CACHE_ALIGN LinkedListElem active_link;
+ uint32_t numa_node;
+ SeqQueue free_head;
+ uint32_t free_cnt;
+
+ dchunk_t *foreground[DEFAULT_BLOCK_CLASS];
+ LinkedList background[DEFAULT_BLOCK_CLASS];
+ dchunk_t dummy_chunk;
+ obj_buf_t block_bufs[BLOCK_BUF_CNT];
+
+ CACHE_ALIGN FastQueue need_gc[DEFAULT_BLOCK_CLASS];
+};
+
+static inline int max(int a, int b)
+{
+ return (a > b) ? a : b;
+}
+
+void *malloc(size_t __size);
+void *realloc(void *__ptr, size_t __size);
+void free(void *__ptr);