aboutsummaryrefslogtreecommitdiff
path: root/libgo/go/runtime/mprof.go
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/runtime/mprof.go')
-rw-r--r--libgo/go/runtime/mprof.go221
1 files changed, 164 insertions, 57 deletions
diff --git a/libgo/go/runtime/mprof.go b/libgo/go/runtime/mprof.go
index 87f84a72acd..f31c88c5cae 100644
--- a/libgo/go/runtime/mprof.go
+++ b/libgo/go/runtime/mprof.go
@@ -64,27 +64,70 @@ type memRecord struct {
// come only after a GC during concurrent sweeping. So if we would
// naively count them, we would get a skew toward mallocs.
//
- // Mallocs are accounted in recent stats.
- // Explicit frees are accounted in recent stats.
- // GC frees are accounted in prev stats.
- // After GC prev stats are added to final stats and
- // recent stats are moved into prev stats.
- allocs uintptr
- frees uintptr
- alloc_bytes uintptr
- free_bytes uintptr
-
- // changes between next-to-last GC and last GC
- prev_allocs uintptr
- prev_frees uintptr
- prev_alloc_bytes uintptr
- prev_free_bytes uintptr
-
- // changes since last GC
- recent_allocs uintptr
- recent_frees uintptr
- recent_alloc_bytes uintptr
- recent_free_bytes uintptr
+ // Hence, we delay information to get consistent snapshots as
+ // of mark termination. Allocations count toward the next mark
+ // termination's snapshot, while sweep frees count toward the
+ // previous mark termination's snapshot:
+ //
+ // MT MT MT MT
+ // .·| .·| .·| .·|
+ // .·˙ | .·˙ | .·˙ | .·˙ |
+ // .·˙ | .·˙ | .·˙ | .·˙ |
+ // .·˙ |.·˙ |.·˙ |.·˙ |
+ //
+ // alloc → ▲ ← free
+ // ┠┅┅┅┅┅┅┅┅┅┅┅P
+ // C+2 → C+1 → C
+ //
+ // alloc → ▲ ← free
+ // ┠┅┅┅┅┅┅┅┅┅┅┅P
+ // C+2 → C+1 → C
+ //
+ // Since we can't publish a consistent snapshot until all of
+ // the sweep frees are accounted for, we wait until the next
+ // mark termination ("MT" above) to publish the previous mark
+ // termination's snapshot ("P" above). To do this, allocation
+ // and free events are accounted to *future* heap profile
+ // cycles ("C+n" above) and we only publish a cycle once all
+ // of the events from that cycle must be done. Specifically:
+ //
+ // Mallocs are accounted to cycle C+2.
+ // Explicit frees are accounted to cycle C+2.
+ // GC frees (done during sweeping) are accounted to cycle C+1.
+ //
+ // After mark termination, we increment the global heap
+ // profile cycle counter and accumulate the stats from cycle C
+ // into the active profile.
+
+ // active is the currently published profile. A profiling
+ // cycle can be accumulated into active once its complete.
+ active memRecordCycle
+
+ // future records the profile events we're counting for cycles
+ // that have not yet been published. This is ring buffer
+ // indexed by the global heap profile cycle C and stores
+ // cycles C, C+1, and C+2. Unlike active, these counts are
+ // only for a single cycle; they are not cumulative across
+ // cycles.
+ //
+ // We store cycle C here because there's a window between when
+ // C becomes the active cycle and when we've flushed it to
+ // active.
+ future [3]memRecordCycle
+}
+
+// memRecordCycle
+type memRecordCycle struct {
+ allocs, frees uintptr
+ alloc_bytes, free_bytes uintptr
+}
+
+// add accumulates b into a. It does not zero b.
+func (a *memRecordCycle) add(b *memRecordCycle) {
+ a.allocs += b.allocs
+ a.frees += b.frees
+ a.alloc_bytes += b.alloc_bytes
+ a.free_bytes += b.free_bytes
}
// A blockRecord is the bucket data for a bucket of type blockProfile,
@@ -100,8 +143,21 @@ var (
xbuckets *bucket // mutex profile buckets
buckhash *[179999]*bucket
bucketmem uintptr
+
+ mProf struct {
+ // All fields in mProf are protected by proflock.
+
+ // cycle is the global heap profile cycle. This wraps
+ // at mProfCycleWrap.
+ cycle uint32
+ // flushed indicates that future[cycle] in all buckets
+ // has been flushed to the active profile.
+ flushed bool
+ }
)
+const mProfCycleWrap = uint32(len(memRecord{}.future)) * (2 << 24)
+
// newBucket allocates a bucket with the given type and number of stack entries.
func newBucket(typ bucketType, nstk int) *bucket {
size := unsafe.Sizeof(bucket{}) + uintptr(nstk)*unsafe.Sizeof(location{})
@@ -212,30 +268,71 @@ func eqslice(x, y []location) bool {
return true
}
-func mprof_GC() {
+// mProf_NextCycle publishes the next heap profile cycle and creates a
+// fresh heap profile cycle. This operation is fast and can be done
+// during STW. The caller must call mProf_Flush before calling
+// mProf_NextCycle again.
+//
+// This is called by mark termination during STW so allocations and
+// frees after the world is started again count towards a new heap
+// profiling cycle.
+func mProf_NextCycle() {
+ lock(&proflock)
+ // We explicitly wrap mProf.cycle rather than depending on
+ // uint wraparound because the memRecord.future ring does not
+ // itself wrap at a power of two.
+ mProf.cycle = (mProf.cycle + 1) % mProfCycleWrap
+ mProf.flushed = false
+ unlock(&proflock)
+}
+
+// mProf_Flush flushes the events from the current heap profiling
+// cycle into the active profile. After this it is safe to start a new
+// heap profiling cycle with mProf_NextCycle.
+//
+// This is called by GC after mark termination starts the world. In
+// contrast with mProf_NextCycle, this is somewhat expensive, but safe
+// to do concurrently.
+func mProf_Flush() {
+ lock(&proflock)
+ if !mProf.flushed {
+ mProf_FlushLocked()
+ mProf.flushed = true
+ }
+ unlock(&proflock)
+}
+
+func mProf_FlushLocked() {
+ c := mProf.cycle
for b := mbuckets; b != nil; b = b.allnext {
mp := b.mp()
- mp.allocs += mp.prev_allocs
- mp.frees += mp.prev_frees
- mp.alloc_bytes += mp.prev_alloc_bytes
- mp.free_bytes += mp.prev_free_bytes
- mp.prev_allocs = mp.recent_allocs
- mp.prev_frees = mp.recent_frees
- mp.prev_alloc_bytes = mp.recent_alloc_bytes
- mp.prev_free_bytes = mp.recent_free_bytes
-
- mp.recent_allocs = 0
- mp.recent_frees = 0
- mp.recent_alloc_bytes = 0
- mp.recent_free_bytes = 0
+ // Flush cycle C into the published profile and clear
+ // it for reuse.
+ mpc := &mp.future[c%uint32(len(mp.future))]
+ mp.active.add(mpc)
+ *mpc = memRecordCycle{}
}
}
-// Record that a gc just happened: all the 'recent' statistics are now real.
-func mProf_GC() {
+// mProf_PostSweep records that all sweep frees for this GC cycle have
+// completed. This has the effect of publishing the heap profile
+// snapshot as of the last mark termination without advancing the heap
+// profile cycle.
+func mProf_PostSweep() {
lock(&proflock)
- mprof_GC()
+ // Flush cycle C+1 to the active profile so everything as of
+ // the last mark termination becomes visible. *Don't* advance
+ // the cycle, since we're still accumulating allocs in cycle
+ // C+2, which have to become C+1 in the next mark termination
+ // and so on.
+ c := mProf.cycle
+ for b := mbuckets; b != nil; b = b.allnext {
+ mp := b.mp()
+ mpc := &mp.future[(c+1)%uint32(len(mp.future))]
+ mp.active.add(mpc)
+ *mpc = memRecordCycle{}
+ }
unlock(&proflock)
}
@@ -245,9 +342,11 @@ func mProf_Malloc(p unsafe.Pointer, size uintptr) {
nstk := callers(4, stk[:])
lock(&proflock)
b := stkbucket(memProfile, size, stk[:nstk], true)
+ c := mProf.cycle
mp := b.mp()
- mp.recent_allocs++
- mp.recent_alloc_bytes += size
+ mpc := &mp.future[(c+2)%uint32(len(mp.future))]
+ mpc.allocs++
+ mpc.alloc_bytes += size
unlock(&proflock)
// Setprofilebucket locks a bunch of other mutexes, so we call it outside of proflock.
@@ -262,9 +361,11 @@ func mProf_Malloc(p unsafe.Pointer, size uintptr) {
// Called when freeing a profiled block.
func mProf_Free(b *bucket, size uintptr) {
lock(&proflock)
+ c := mProf.cycle
mp := b.mp()
- mp.prev_frees++
- mp.prev_free_bytes += size
+ mpc := &mp.future[(c+1)%uint32(len(mp.future))]
+ mpc.frees++
+ mpc.free_bytes += size
unlock(&proflock)
}
@@ -298,7 +399,7 @@ func blockevent(cycles int64, skip int) {
cycles = 1
}
if blocksampled(cycles) {
- saveblockevent(cycles, skip+1, blockProfile, &blockprofilerate)
+ saveblockevent(cycles, skip+1, blockProfile)
}
}
@@ -310,7 +411,7 @@ func blocksampled(cycles int64) bool {
return true
}
-func saveblockevent(cycles int64, skip int, which bucketType, ratep *uint64) {
+func saveblockevent(cycles int64, skip int, which bucketType) {
gp := getg()
var nstk int
var stk [maxStack]location
@@ -355,7 +456,7 @@ func mutexevent(cycles int64, skip int) {
// TODO(pjw): measure impact of always calling fastrand vs using something
// like malloc.go:nextSample()
if rate > 0 && int64(fastrand())%rate == 0 {
- saveblockevent(cycles, skip+1, mutexProfile, &mutexprofilerate)
+ saveblockevent(cycles, skip+1, mutexProfile)
}
}
@@ -443,13 +544,17 @@ func (r *MemProfileRecord) Stack() []uintptr {
// of calling MemProfile directly.
func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) {
lock(&proflock)
+ // If we're between mProf_NextCycle and mProf_Flush, take care
+ // of flushing to the active profile so we only have to look
+ // at the active profile below.
+ mProf_FlushLocked()
clear := true
for b := mbuckets; b != nil; b = b.allnext {
mp := b.mp()
- if inuseZero || mp.alloc_bytes != mp.free_bytes {
+ if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes {
n++
}
- if mp.allocs != 0 || mp.frees != 0 {
+ if mp.active.allocs != 0 || mp.active.frees != 0 {
clear = false
}
}
@@ -457,13 +562,15 @@ func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) {
// Absolutely no data, suggesting that a garbage collection
// has not yet happened. In order to allow profiling when
// garbage collection is disabled from the beginning of execution,
- // accumulate stats as if a GC just happened, and recount buckets.
- mprof_GC()
- mprof_GC()
+ // accumulate all of the cycles, and recount buckets.
n = 0
for b := mbuckets; b != nil; b = b.allnext {
mp := b.mp()
- if inuseZero || mp.alloc_bytes != mp.free_bytes {
+ for c := range mp.future {
+ mp.active.add(&mp.future[c])
+ mp.future[c] = memRecordCycle{}
+ }
+ if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes {
n++
}
}
@@ -473,7 +580,7 @@ func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) {
idx := 0
for b := mbuckets; b != nil; b = b.allnext {
mp := b.mp()
- if inuseZero || mp.alloc_bytes != mp.free_bytes {
+ if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes {
record(&p[idx], b)
idx++
}
@@ -486,10 +593,10 @@ func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) {
// Write b's data to r.
func record(r *MemProfileRecord, b *bucket) {
mp := b.mp()
- r.AllocBytes = int64(mp.alloc_bytes)
- r.FreeBytes = int64(mp.free_bytes)
- r.AllocObjects = int64(mp.allocs)
- r.FreeObjects = int64(mp.frees)
+ r.AllocBytes = int64(mp.active.alloc_bytes)
+ r.FreeBytes = int64(mp.active.free_bytes)
+ r.AllocObjects = int64(mp.active.allocs)
+ r.FreeObjects = int64(mp.active.frees)
for i, loc := range b.stk() {
if i >= len(r.Stack0) {
break
@@ -505,7 +612,7 @@ func iterate_memprof(fn func(*bucket, uintptr, *location, uintptr, uintptr, uint
lock(&proflock)
for b := mbuckets; b != nil; b = b.allnext {
mp := b.mp()
- fn(b, b.nstk, &b.stk()[0], b.size, mp.allocs, mp.frees)
+ fn(b, b.nstk, &b.stk()[0], b.size, mp.active.allocs, mp.active.frees)
}
unlock(&proflock)
}