diff options
Diffstat (limited to 'libgo/go/runtime/mprof.go')
-rw-r--r-- | libgo/go/runtime/mprof.go | 221 |
1 files changed, 164 insertions, 57 deletions
diff --git a/libgo/go/runtime/mprof.go b/libgo/go/runtime/mprof.go index 87f84a72acd..f31c88c5cae 100644 --- a/libgo/go/runtime/mprof.go +++ b/libgo/go/runtime/mprof.go @@ -64,27 +64,70 @@ type memRecord struct { // come only after a GC during concurrent sweeping. So if we would // naively count them, we would get a skew toward mallocs. // - // Mallocs are accounted in recent stats. - // Explicit frees are accounted in recent stats. - // GC frees are accounted in prev stats. - // After GC prev stats are added to final stats and - // recent stats are moved into prev stats. - allocs uintptr - frees uintptr - alloc_bytes uintptr - free_bytes uintptr - - // changes between next-to-last GC and last GC - prev_allocs uintptr - prev_frees uintptr - prev_alloc_bytes uintptr - prev_free_bytes uintptr - - // changes since last GC - recent_allocs uintptr - recent_frees uintptr - recent_alloc_bytes uintptr - recent_free_bytes uintptr + // Hence, we delay information to get consistent snapshots as + // of mark termination. Allocations count toward the next mark + // termination's snapshot, while sweep frees count toward the + // previous mark termination's snapshot: + // + // MT MT MT MT + // .·| .·| .·| .·| + // .·˙ | .·˙ | .·˙ | .·˙ | + // .·˙ | .·˙ | .·˙ | .·˙ | + // .·˙ |.·˙ |.·˙ |.·˙ | + // + // alloc → ▲ ← free + // ┠┅┅┅┅┅┅┅┅┅┅┅P + // C+2 → C+1 → C + // + // alloc → ▲ ← free + // ┠┅┅┅┅┅┅┅┅┅┅┅P + // C+2 → C+1 → C + // + // Since we can't publish a consistent snapshot until all of + // the sweep frees are accounted for, we wait until the next + // mark termination ("MT" above) to publish the previous mark + // termination's snapshot ("P" above). To do this, allocation + // and free events are accounted to *future* heap profile + // cycles ("C+n" above) and we only publish a cycle once all + // of the events from that cycle must be done. Specifically: + // + // Mallocs are accounted to cycle C+2. + // Explicit frees are accounted to cycle C+2. + // GC frees (done during sweeping) are accounted to cycle C+1. + // + // After mark termination, we increment the global heap + // profile cycle counter and accumulate the stats from cycle C + // into the active profile. + + // active is the currently published profile. A profiling + // cycle can be accumulated into active once its complete. + active memRecordCycle + + // future records the profile events we're counting for cycles + // that have not yet been published. This is ring buffer + // indexed by the global heap profile cycle C and stores + // cycles C, C+1, and C+2. Unlike active, these counts are + // only for a single cycle; they are not cumulative across + // cycles. + // + // We store cycle C here because there's a window between when + // C becomes the active cycle and when we've flushed it to + // active. + future [3]memRecordCycle +} + +// memRecordCycle +type memRecordCycle struct { + allocs, frees uintptr + alloc_bytes, free_bytes uintptr +} + +// add accumulates b into a. It does not zero b. +func (a *memRecordCycle) add(b *memRecordCycle) { + a.allocs += b.allocs + a.frees += b.frees + a.alloc_bytes += b.alloc_bytes + a.free_bytes += b.free_bytes } // A blockRecord is the bucket data for a bucket of type blockProfile, @@ -100,8 +143,21 @@ var ( xbuckets *bucket // mutex profile buckets buckhash *[179999]*bucket bucketmem uintptr + + mProf struct { + // All fields in mProf are protected by proflock. + + // cycle is the global heap profile cycle. This wraps + // at mProfCycleWrap. + cycle uint32 + // flushed indicates that future[cycle] in all buckets + // has been flushed to the active profile. + flushed bool + } ) +const mProfCycleWrap = uint32(len(memRecord{}.future)) * (2 << 24) + // newBucket allocates a bucket with the given type and number of stack entries. func newBucket(typ bucketType, nstk int) *bucket { size := unsafe.Sizeof(bucket{}) + uintptr(nstk)*unsafe.Sizeof(location{}) @@ -212,30 +268,71 @@ func eqslice(x, y []location) bool { return true } -func mprof_GC() { +// mProf_NextCycle publishes the next heap profile cycle and creates a +// fresh heap profile cycle. This operation is fast and can be done +// during STW. The caller must call mProf_Flush before calling +// mProf_NextCycle again. +// +// This is called by mark termination during STW so allocations and +// frees after the world is started again count towards a new heap +// profiling cycle. +func mProf_NextCycle() { + lock(&proflock) + // We explicitly wrap mProf.cycle rather than depending on + // uint wraparound because the memRecord.future ring does not + // itself wrap at a power of two. + mProf.cycle = (mProf.cycle + 1) % mProfCycleWrap + mProf.flushed = false + unlock(&proflock) +} + +// mProf_Flush flushes the events from the current heap profiling +// cycle into the active profile. After this it is safe to start a new +// heap profiling cycle with mProf_NextCycle. +// +// This is called by GC after mark termination starts the world. In +// contrast with mProf_NextCycle, this is somewhat expensive, but safe +// to do concurrently. +func mProf_Flush() { + lock(&proflock) + if !mProf.flushed { + mProf_FlushLocked() + mProf.flushed = true + } + unlock(&proflock) +} + +func mProf_FlushLocked() { + c := mProf.cycle for b := mbuckets; b != nil; b = b.allnext { mp := b.mp() - mp.allocs += mp.prev_allocs - mp.frees += mp.prev_frees - mp.alloc_bytes += mp.prev_alloc_bytes - mp.free_bytes += mp.prev_free_bytes - mp.prev_allocs = mp.recent_allocs - mp.prev_frees = mp.recent_frees - mp.prev_alloc_bytes = mp.recent_alloc_bytes - mp.prev_free_bytes = mp.recent_free_bytes - - mp.recent_allocs = 0 - mp.recent_frees = 0 - mp.recent_alloc_bytes = 0 - mp.recent_free_bytes = 0 + // Flush cycle C into the published profile and clear + // it for reuse. + mpc := &mp.future[c%uint32(len(mp.future))] + mp.active.add(mpc) + *mpc = memRecordCycle{} } } -// Record that a gc just happened: all the 'recent' statistics are now real. -func mProf_GC() { +// mProf_PostSweep records that all sweep frees for this GC cycle have +// completed. This has the effect of publishing the heap profile +// snapshot as of the last mark termination without advancing the heap +// profile cycle. +func mProf_PostSweep() { lock(&proflock) - mprof_GC() + // Flush cycle C+1 to the active profile so everything as of + // the last mark termination becomes visible. *Don't* advance + // the cycle, since we're still accumulating allocs in cycle + // C+2, which have to become C+1 in the next mark termination + // and so on. + c := mProf.cycle + for b := mbuckets; b != nil; b = b.allnext { + mp := b.mp() + mpc := &mp.future[(c+1)%uint32(len(mp.future))] + mp.active.add(mpc) + *mpc = memRecordCycle{} + } unlock(&proflock) } @@ -245,9 +342,11 @@ func mProf_Malloc(p unsafe.Pointer, size uintptr) { nstk := callers(4, stk[:]) lock(&proflock) b := stkbucket(memProfile, size, stk[:nstk], true) + c := mProf.cycle mp := b.mp() - mp.recent_allocs++ - mp.recent_alloc_bytes += size + mpc := &mp.future[(c+2)%uint32(len(mp.future))] + mpc.allocs++ + mpc.alloc_bytes += size unlock(&proflock) // Setprofilebucket locks a bunch of other mutexes, so we call it outside of proflock. @@ -262,9 +361,11 @@ func mProf_Malloc(p unsafe.Pointer, size uintptr) { // Called when freeing a profiled block. func mProf_Free(b *bucket, size uintptr) { lock(&proflock) + c := mProf.cycle mp := b.mp() - mp.prev_frees++ - mp.prev_free_bytes += size + mpc := &mp.future[(c+1)%uint32(len(mp.future))] + mpc.frees++ + mpc.free_bytes += size unlock(&proflock) } @@ -298,7 +399,7 @@ func blockevent(cycles int64, skip int) { cycles = 1 } if blocksampled(cycles) { - saveblockevent(cycles, skip+1, blockProfile, &blockprofilerate) + saveblockevent(cycles, skip+1, blockProfile) } } @@ -310,7 +411,7 @@ func blocksampled(cycles int64) bool { return true } -func saveblockevent(cycles int64, skip int, which bucketType, ratep *uint64) { +func saveblockevent(cycles int64, skip int, which bucketType) { gp := getg() var nstk int var stk [maxStack]location @@ -355,7 +456,7 @@ func mutexevent(cycles int64, skip int) { // TODO(pjw): measure impact of always calling fastrand vs using something // like malloc.go:nextSample() if rate > 0 && int64(fastrand())%rate == 0 { - saveblockevent(cycles, skip+1, mutexProfile, &mutexprofilerate) + saveblockevent(cycles, skip+1, mutexProfile) } } @@ -443,13 +544,17 @@ func (r *MemProfileRecord) Stack() []uintptr { // of calling MemProfile directly. func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) { lock(&proflock) + // If we're between mProf_NextCycle and mProf_Flush, take care + // of flushing to the active profile so we only have to look + // at the active profile below. + mProf_FlushLocked() clear := true for b := mbuckets; b != nil; b = b.allnext { mp := b.mp() - if inuseZero || mp.alloc_bytes != mp.free_bytes { + if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes { n++ } - if mp.allocs != 0 || mp.frees != 0 { + if mp.active.allocs != 0 || mp.active.frees != 0 { clear = false } } @@ -457,13 +562,15 @@ func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) { // Absolutely no data, suggesting that a garbage collection // has not yet happened. In order to allow profiling when // garbage collection is disabled from the beginning of execution, - // accumulate stats as if a GC just happened, and recount buckets. - mprof_GC() - mprof_GC() + // accumulate all of the cycles, and recount buckets. n = 0 for b := mbuckets; b != nil; b = b.allnext { mp := b.mp() - if inuseZero || mp.alloc_bytes != mp.free_bytes { + for c := range mp.future { + mp.active.add(&mp.future[c]) + mp.future[c] = memRecordCycle{} + } + if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes { n++ } } @@ -473,7 +580,7 @@ func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) { idx := 0 for b := mbuckets; b != nil; b = b.allnext { mp := b.mp() - if inuseZero || mp.alloc_bytes != mp.free_bytes { + if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes { record(&p[idx], b) idx++ } @@ -486,10 +593,10 @@ func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) { // Write b's data to r. func record(r *MemProfileRecord, b *bucket) { mp := b.mp() - r.AllocBytes = int64(mp.alloc_bytes) - r.FreeBytes = int64(mp.free_bytes) - r.AllocObjects = int64(mp.allocs) - r.FreeObjects = int64(mp.frees) + r.AllocBytes = int64(mp.active.alloc_bytes) + r.FreeBytes = int64(mp.active.free_bytes) + r.AllocObjects = int64(mp.active.allocs) + r.FreeObjects = int64(mp.active.frees) for i, loc := range b.stk() { if i >= len(r.Stack0) { break @@ -505,7 +612,7 @@ func iterate_memprof(fn func(*bucket, uintptr, *location, uintptr, uintptr, uint lock(&proflock) for b := mbuckets; b != nil; b = b.allnext { mp := b.mp() - fn(b, b.nstk, &b.stk()[0], b.size, mp.allocs, mp.frees) + fn(b, b.nstk, &b.stk()[0], b.size, mp.active.allocs, mp.active.frees) } unlock(&proflock) } |