aboutsummaryrefslogtreecommitdiff
path: root/boehm-gc/mark.c
diff options
context:
space:
mode:
Diffstat (limited to 'boehm-gc/mark.c')
-rw-r--r--boehm-gc/mark.c126
1 files changed, 105 insertions, 21 deletions
diff --git a/boehm-gc/mark.c b/boehm-gc/mark.c
index 67085fbcc29..827b219018d 100644
--- a/boehm-gc/mark.c
+++ b/boehm-gc/mark.c
@@ -38,7 +38,7 @@ word x;
/* mark_proc GC_mark_procs[MAX_MARK_PROCS] = {0} -- declared in gc_priv.h */
-word GC_n_mark_procs = 0;
+word GC_n_mark_procs = GC_RESERVED_MARK_PROCS;
/* Initialize GC_obj_kinds properly and standard free lists properly. */
/* This must be done statically since they may be accessed before */
@@ -365,20 +365,20 @@ GC_bool GC_mark_stack_empty()
/* with IGNORE_OFF_PAGE set. */
/*ARGSUSED*/
# ifdef PRINT_BLACK_LIST
- word GC_find_start(current, hhdr, source)
+ ptr_t GC_find_start(current, hhdr, source)
word source;
# else
- word GC_find_start(current, hhdr)
+ ptr_t GC_find_start(current, hhdr)
# define source 0
# endif
-register word current;
+register ptr_t current;
register hdr * hhdr;
{
# ifdef ALL_INTERIOR_POINTERS
if (hhdr != 0) {
- register word orig = current;
+ register ptr_t orig = current;
- current = (word)HBLKPTR(current) + HDR_BYTES;
+ current = (ptr_t)HBLKPTR(current) + HDR_BYTES;
do {
current = current - HBLKSIZE*(word)hhdr;
hhdr = HDR(current);
@@ -429,6 +429,12 @@ mse * msp;
* is never 0. A mark stack entry never has size 0.
* We try to traverse on the order of a hblk of memory before we return.
* Caller is responsible for calling this until the mark stack is empty.
+ * Note that this is the most performance critical routine in the
+ * collector. Hence it contains all sorts of ugly hacks to speed
+ * things up. In particular, we avoid procedure calls on the common
+ * path, we take advantage of peculiarities of the mark descriptor
+ * encoding, we optionally maintain a cache for the block address to
+ * header mapping, we prefetch when an object is "grayed", etc.
*/
void GC_mark_from_mark_stack()
{
@@ -443,9 +449,12 @@ void GC_mark_from_mark_stack()
register word descr;
register ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
register ptr_t least_ha = GC_least_plausible_heap_addr;
+ DECLARE_HDR_CACHE;
+
# define SPLIT_RANGE_WORDS 128 /* Must be power of 2. */
GC_objects_are_marked = TRUE;
+ INIT_HDR_CACHE;
# ifdef OS2 /* Use untweaked version to circumvent compiler problem */
while (GC_mark_stack_top_reg >= GC_mark_stack_reg && credit >= 0) {
# else
@@ -453,8 +462,13 @@ void GC_mark_from_mark_stack()
>= 0) {
# endif
current_p = GC_mark_stack_top_reg -> mse_start;
- retry:
descr = GC_mark_stack_top_reg -> mse_descr;
+ retry:
+ /* current_p and descr describe the current object. */
+ /* *GC_mark_stack_top_reg is vacant. */
+ /* The following is 0 only for small objects described by a simple */
+ /* length descriptor. For many applications this is the common */
+ /* case, so we try to detect it quickly. */
if (descr & ((~(WORDS_TO_BYTES(SPLIT_RANGE_WORDS) - 1)) | DS_TAGS)) {
word tag = descr & DS_TAGS;
@@ -465,8 +479,8 @@ void GC_mark_from_mark_stack()
/* stack. */
GC_mark_stack_top_reg -> mse_start =
limit = current_p + SPLIT_RANGE_WORDS-1;
- GC_mark_stack_top_reg -> mse_descr -=
- WORDS_TO_BYTES(SPLIT_RANGE_WORDS-1);
+ GC_mark_stack_top_reg -> mse_descr =
+ descr - WORDS_TO_BYTES(SPLIT_RANGE_WORDS-1);
/* Make sure that pointers overlapping the two ranges are */
/* considered. */
limit = (word *)((char *)limit + sizeof(word) - ALIGNMENT);
@@ -479,8 +493,8 @@ void GC_mark_from_mark_stack()
if ((signed_word)descr < 0) {
current = *current_p;
if ((ptr_t)current >= least_ha && (ptr_t)current < greatest_ha) {
- PUSH_CONTENTS(current, GC_mark_stack_top_reg, mark_stack_limit,
- current_p, exit1);
+ PUSH_CONTENTS((ptr_t)current, GC_mark_stack_top_reg,
+ mark_stack_limit, current_p, exit1);
}
}
descr <<= 1;
@@ -496,24 +510,94 @@ void GC_mark_from_mark_stack()
mark_stack_limit, ENV(descr));
continue;
case DS_PER_OBJECT:
- GC_mark_stack_top_reg -> mse_descr =
- *(word *)((ptr_t)current_p + descr - tag);
+ if ((signed_word)descr >= 0) {
+ /* Descriptor is in the object. */
+ descr = *(word *)((ptr_t)current_p + descr - DS_PER_OBJECT);
+ } else {
+ /* Descriptor is in type descriptor pointed to by first */
+ /* word in object. */
+ ptr_t type_descr = *(ptr_t *)current_p;
+ /* type_descr is either a valid pointer to the descriptor */
+ /* structure, or this object was on a free list. If it */
+ /* it was anything but the last object on the free list, */
+ /* we will misinterpret the next object on the free list as */
+ /* the type descriptor, and get a 0 GC descriptor, which */
+ /* is ideal. Unfortunately, we need to check for the last */
+ /* object case explicitly. */
+ if (0 == type_descr) {
+ /* Rarely executed. */
+ GC_mark_stack_top_reg--;
+ continue;
+ }
+ descr = *(word *)(type_descr
+ - (descr - (DS_PER_OBJECT - INDIR_PER_OBJ_BIAS)));
+ }
goto retry;
}
- } else {
+ } else /* Small object with length descriptor */ {
GC_mark_stack_top_reg--;
limit = (word *)(((ptr_t)current_p) + (word)descr);
}
/* The simple case in which we're scanning a range. */
credit -= (ptr_t)limit - (ptr_t)current_p;
limit -= 1;
- while (current_p <= limit) {
- current = *current_p;
- if ((ptr_t)current >= least_ha && (ptr_t)current < greatest_ha) {
- PUSH_CONTENTS(current, GC_mark_stack_top_reg,
- mark_stack_limit, current_p, exit2);
+ {
+# define PREF_DIST 4
+
+# ifndef SMALL_CONFIG
+ word deferred;
+
+ /* Try to prefetch the next pointer to be examined asap. */
+ /* Empirically, this also seems to help slightly without */
+ /* prefetches, at least on linux/X86. Presumably this loop */
+ /* ends up with less register pressure, and gcc thus ends up */
+ /* generating slightly better code. Overall gcc code quality */
+ /* for this loop is still not great. */
+ for(;;) {
+ PREFETCH((ptr_t)limit - PREF_DIST*CACHE_LINE_SIZE);
+ deferred = *limit;
+ limit = (word *)((char *)limit - ALIGNMENT);
+ if ((ptr_t)deferred >= least_ha && (ptr_t)deferred < greatest_ha) {
+ PREFETCH(deferred);
+ break;
+ }
+ if (current_p > limit) goto next_object;
+ /* Unroll once, so we don't do too many of the prefetches */
+ /* based on limit. */
+ deferred = *limit;
+ limit = (word *)((char *)limit - ALIGNMENT);
+ if ((ptr_t)deferred >= least_ha && (ptr_t)deferred < greatest_ha) {
+ PREFETCH(deferred);
+ break;
+ }
+ if (current_p > limit) goto next_object;
+ }
+# endif
+
+ while (current_p <= limit) {
+ /* Empirically, unrolling this loop doesn't help a lot. */
+ /* Since HC_PUSH_CONTENTS expands to a lot of code, */
+ /* we don't. */
+ current = *current_p;
+ PREFETCH((ptr_t)current_p + PREF_DIST*CACHE_LINE_SIZE);
+ if ((ptr_t)current >= least_ha && (ptr_t)current < greatest_ha) {
+ /* Prefetch the contents of the object we just pushed. It's */
+ /* likely we will need them soon. */
+ PREFETCH(current);
+ HC_PUSH_CONTENTS((ptr_t)current, GC_mark_stack_top_reg,
+ mark_stack_limit, current_p, exit2);
+ }
+ current_p = (word *)((char *)current_p + ALIGNMENT);
}
- current_p = (word *)((char *)current_p + ALIGNMENT);
+
+# ifndef SMALL_CONFIG
+ /* We still need to mark the entry we previously prefetched. */
+ /* We alrady know that it passes the preliminary pointer */
+ /* validity test. */
+ HC_PUSH_CONTENTS((ptr_t)deferred, GC_mark_stack_top_reg,
+ mark_stack_limit, current_p, exit4);
+ next_object:;
+# endif
}
}
GC_mark_stack_top = GC_mark_stack_top_reg;
@@ -686,7 +770,7 @@ word p;
return;
}
# endif
- GC_PUSH_ONE_STACK(p, 0);
+ GC_PUSH_ONE_STACK(p, MARKED_FROM_REGISTER);
}
# ifdef __STDC__