1 files changed, 164 insertions, 57 deletions
diff --git a/libgo/go/runtime/mprof.go b/libgo/go/runtime/mprof.go
index 87f84a72acd..f31c88c5cae 100644
--- a/libgo/go/runtime/mprof.go
+++ b/libgo/go/runtime/mprof.go
@@ -64,27 +64,70 @@ type memRecord struct {
 	// come only after a GC during concurrent sweeping. So if we would
 	// naively count them, we would get a skew toward mallocs.
 	//
-	// Mallocs are accounted in recent stats.
-	// Explicit frees are accounted in recent stats.
-	// GC frees are accounted in prev stats.
-	// After GC prev stats are added to final stats and
-	// recent stats are moved into prev stats.
-	allocs      uintptr
-	frees       uintptr
-	alloc_bytes uintptr
-	free_bytes  uintptr
-
-	// changes between next-to-last GC and last GC
-	prev_allocs      uintptr
-	prev_frees       uintptr
-	prev_alloc_bytes uintptr
-	prev_free_bytes  uintptr
-
-	// changes since last GC
-	recent_allocs      uintptr
-	recent_frees       uintptr
-	recent_alloc_bytes uintptr
-	recent_free_bytes  uintptr
+	// Hence, we delay information to get consistent snapshots as
+	// of mark termination. Allocations count toward the next mark
+	// termination's snapshot, while sweep frees count toward the
+	// previous mark termination's snapshot:
+	//
+	//              MT          MT          MT          MT
+	//             .·|         .·|         .·|         .·|
+	//          .·˙  |      .·˙  |      .·˙  |      .·˙  |
+	//       .·˙     |   .·˙     |   .·˙     |   .·˙     |
+	//    .·˙        |.·˙        |.·˙        |.·˙        |
+	//
+	//       alloc → ▲ ← free
+	//               ┠┅┅┅┅┅┅┅┅┅┅┅P
+	//       C+2     →    C+1    →  C
+	//
+	//                   alloc → ▲ ← free
+	//                           ┠┅┅┅┅┅┅┅┅┅┅┅P
+	//                   C+2     →    C+1    →  C
+	//
+	// Since we can't publish a consistent snapshot until all of
+	// the sweep frees are accounted for, we wait until the next
+	// mark termination ("MT" above) to publish the previous mark
+	// termination's snapshot ("P" above). To do this, allocation
+	// and free events are accounted to *future* heap profile
+	// cycles ("C+n" above) and we only publish a cycle once all
+	// of the events from that cycle must be done. Specifically:
+	//
+	// Mallocs are accounted to cycle C+2.
+	// Explicit frees are accounted to cycle C+2.
+	// GC frees (done during sweeping) are accounted to cycle C+1.
+	//
+	// After mark termination, we increment the global heap
+	// profile cycle counter and accumulate the stats from cycle C
+	// into the active profile.
+
+	// active is the currently published profile. A profiling
+	// cycle can be accumulated into active once its complete.
+	active memRecordCycle
+
+	// future records the profile events we're counting for cycles
+	// that have not yet been published. This is ring buffer
+	// indexed by the global heap profile cycle C and stores
+	// cycles C, C+1, and C+2. Unlike active, these counts are
+	// only for a single cycle; they are not cumulative across
+	// cycles.
+	//
+	// We store cycle C here because there's a window between when
+	// C becomes the active cycle and when we've flushed it to
+	// active.
+	future [3]memRecordCycle
+}
+
+// memRecordCycle
+type memRecordCycle struct {
+	allocs, frees           uintptr
+	alloc_bytes, free_bytes uintptr
+}
+
+// add accumulates b into a. It does not zero b.
+func (a *memRecordCycle) add(b *memRecordCycle) {
+	a.allocs += b.allocs
+	a.frees += b.frees
+	a.alloc_bytes += b.alloc_bytes
+	a.free_bytes += b.free_bytes
 }
 
 // A blockRecord is the bucket data for a bucket of type blockProfile,
@@ -100,8 +143,21 @@ var (
 	xbuckets  *bucket // mutex profile buckets
 	buckhash  *[179999]*bucket
 	bucketmem uintptr
+
+	mProf struct {
+		// All fields in mProf are protected by proflock.
+
+		// cycle is the global heap profile cycle. This wraps
+		// at mProfCycleWrap.
+		cycle uint32
+		// flushed indicates that future[cycle] in all buckets
+		// has been flushed to the active profile.
+		flushed bool
+	}
 )
 
+const mProfCycleWrap = uint32(len(memRecord{}.future)) * (2 << 24)
+
 // newBucket allocates a bucket with the given type and number of stack entries.
 func newBucket(typ bucketType, nstk int) *bucket {
 	size := unsafe.Sizeof(bucket{}) + uintptr(nstk)*unsafe.Sizeof(location{})
@@ -212,30 +268,71 @@ func eqslice(x, y []location) bool {
 	return true
 }
 
-func mprof_GC() {
+// mProf_NextCycle publishes the next heap profile cycle and creates a
+// fresh heap profile cycle. This operation is fast and can be done
+// during STW. The caller must call mProf_Flush before calling
+// mProf_NextCycle again.
+//
+// This is called by mark termination during STW so allocations and
+// frees after the world is started again count towards a new heap
+// profiling cycle.
+func mProf_NextCycle() {
+	lock(&proflock)
+	// We explicitly wrap mProf.cycle rather than depending on
+	// uint wraparound because the memRecord.future ring does not
+	// itself wrap at a power of two.
+	mProf.cycle = (mProf.cycle + 1) % mProfCycleWrap
+	mProf.flushed = false
+	unlock(&proflock)
+}
+
+// mProf_Flush flushes the events from the current heap profiling
+// cycle into the active profile. After this it is safe to start a new
+// heap profiling cycle with mProf_NextCycle.
+//
+// This is called by GC after mark termination starts the world. In
+// contrast with mProf_NextCycle, this is somewhat expensive, but safe
+// to do concurrently.
+func mProf_Flush() {
+	lock(&proflock)
+	if !mProf.flushed {
+		mProf_FlushLocked()
+		mProf.flushed = true
+	}
+	unlock(&proflock)
+}
+
+func mProf_FlushLocked() {
+	c := mProf.cycle
 	for b := mbuckets; b != nil; b = b.allnext {
 		mp := b.mp()
-		mp.allocs += mp.prev_allocs
-		mp.frees += mp.prev_frees
-		mp.alloc_bytes += mp.prev_alloc_bytes
-		mp.free_bytes += mp.prev_free_bytes
 
-		mp.prev_allocs = mp.recent_allocs
-		mp.prev_frees = mp.recent_frees
-		mp.prev_alloc_bytes = mp.recent_alloc_bytes
-		mp.prev_free_bytes = mp.recent_free_bytes
-
-		mp.recent_allocs = 0
-		mp.recent_frees = 0
-		mp.recent_alloc_bytes = 0
-		mp.recent_free_bytes = 0
+		// Flush cycle C into the published profile and clear
+		// it for reuse.
+		mpc := &mp.future[c%uint32(len(mp.future))]
+		mp.active.add(mpc)
+		*mpc = memRecordCycle{}
 	}
 }
 
-// Record that a gc just happened: all the 'recent' statistics are now real.
-func mProf_GC() {
+// mProf_PostSweep records that all sweep frees for this GC cycle have
+// completed. This has the effect of publishing the heap profile
+// snapshot as of the last mark termination without advancing the heap
+// profile cycle.
+func mProf_PostSweep() {
 	lock(&proflock)
-	mprof_GC()
+	// Flush cycle C+1 to the active profile so everything as of
+	// the last mark termination becomes visible. *Don't* advance
+	// the cycle, since we're still accumulating allocs in cycle
+	// C+2, which have to become C+1 in the next mark termination
+	// and so on.
+	c := mProf.cycle
+	for b := mbuckets; b != nil; b = b.allnext {
+		mp := b.mp()
+		mpc := &mp.future[(c+1)%uint32(len(mp.future))]
+		mp.active.add(mpc)
+		*mpc = memRecordCycle{}
+	}
 	unlock(&proflock)
 }
 
@@ -245,9 +342,11 @@ func mProf_Malloc(p unsafe.Pointer, size uintptr) {
 	nstk := callers(4, stk[:])
 	lock(&proflock)
 	b := stkbucket(memProfile, size, stk[:nstk], true)
+	c := mProf.cycle
 	mp := b.mp()
-	mp.recent_allocs++
-	mp.recent_alloc_bytes += size
+	mpc := &mp.future[(c+2)%uint32(len(mp.future))]
+	mpc.allocs++
+	mpc.alloc_bytes += size
 	unlock(&proflock)
 
 	// Setprofilebucket locks a bunch of other mutexes, so we call it outside of proflock.
@@ -262,9 +361,11 @@ func mProf_Malloc(p unsafe.Pointer, size uintptr) {
 // Called when freeing a profiled block.
 func mProf_Free(b *bucket, size uintptr) {
 	lock(&proflock)
+	c := mProf.cycle
 	mp := b.mp()
-	mp.prev_frees++
-	mp.prev_free_bytes += size
+	mpc := &mp.future[(c+1)%uint32(len(mp.future))]
+	mpc.frees++
+	mpc.free_bytes += size
 	unlock(&proflock)
 }
 
@@ -298,7 +399,7 @@ func blockevent(cycles int64, skip int) {
 		cycles = 1
 	}
 	if blocksampled(cycles) {
-		saveblockevent(cycles, skip+1, blockProfile, &blockprofilerate)
+		saveblockevent(cycles, skip+1, blockProfile)
 	}
 }
 
@@ -310,7 +411,7 @@ func blocksampled(cycles int64) bool {
 	return true
 }
 
-func saveblockevent(cycles int64, skip int, which bucketType, ratep *uint64) {
+func saveblockevent(cycles int64, skip int, which bucketType) {
 	gp := getg()
 	var nstk int
 	var stk [maxStack]location
@@ -355,7 +456,7 @@ func mutexevent(cycles int64, skip int) {
 	// TODO(pjw): measure impact of always calling fastrand vs using something
 	// like malloc.go:nextSample()
 	if rate > 0 && int64(fastrand())%rate == 0 {
-		saveblockevent(cycles, skip+1, mutexProfile, &mutexprofilerate)
+		saveblockevent(cycles, skip+1, mutexProfile)
 	}
 }
 
@@ -443,13 +544,17 @@ func (r *MemProfileRecord) Stack() []uintptr {
 // of calling MemProfile directly.
 func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) {
 	lock(&proflock)
+	// If we're between mProf_NextCycle and mProf_Flush, take care
+	// of flushing to the active profile so we only have to look
+	// at the active profile below.
+	mProf_FlushLocked()
 	clear := true
 	for b := mbuckets; b != nil; b = b.allnext {
 		mp := b.mp()
-		if inuseZero || mp.alloc_bytes != mp.free_bytes {
+		if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes {
 			n++
 		}
-		if mp.allocs != 0 || mp.frees != 0 {
+		if mp.active.allocs != 0 || mp.active.frees != 0 {
 			clear = false
 		}
 	}
@@ -457,13 +562,15 @@ func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) {
 		// Absolutely no data, suggesting that a garbage collection
 		// has not yet happened. In order to allow profiling when
 		// garbage collection is disabled from the beginning of execution,
-		// accumulate stats as if a GC just happened, and recount buckets.
-		mprof_GC()
-		mprof_GC()
+		// accumulate all of the cycles, and recount buckets.
 		n = 0
 		for b := mbuckets; b != nil; b = b.allnext {
 			mp := b.mp()
-			if inuseZero || mp.alloc_bytes != mp.free_bytes {
+			for c := range mp.future {
+				mp.active.add(&mp.future[c])
+				mp.future[c] = memRecordCycle{}
+			}
+			if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes {
 				n++
 			}
 		}
@@ -473,7 +580,7 @@ func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) {
 		idx := 0
 		for b := mbuckets; b != nil; b = b.allnext {
 			mp := b.mp()
-			if inuseZero || mp.alloc_bytes != mp.free_bytes {
+			if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes {
 				record(&p[idx], b)
 				idx++
 			}
@@ -486,10 +593,10 @@ func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) {
 // Write b's data to r.
 func record(r *MemProfileRecord, b *bucket) {
 	mp := b.mp()
-	r.AllocBytes = int64(mp.alloc_bytes)
-	r.FreeBytes = int64(mp.free_bytes)
-	r.AllocObjects = int64(mp.allocs)
-	r.FreeObjects = int64(mp.frees)
+	r.AllocBytes = int64(mp.active.alloc_bytes)
+	r.FreeBytes = int64(mp.active.free_bytes)
+	r.AllocObjects = int64(mp.active.allocs)
+	r.FreeObjects = int64(mp.active.frees)
 	for i, loc := range b.stk() {
 		if i >= len(r.Stack0) {
 			break
@@ -505,7 +612,7 @@ func iterate_memprof(fn func(*bucket, uintptr, *location, uintptr, uintptr, uint
 	lock(&proflock)
 	for b := mbuckets; b != nil; b = b.allnext {
 		mp := b.mp()
-		fn(b, b.nstk, &b.stk()[0], b.size, mp.allocs, mp.frees)
+		fn(b, b.nstk, &b.stk()[0], b.size, mp.active.allocs, mp.active.frees)
 	}
 	unlock(&proflock)
 }