Skip to content

Commit db7fd1c

Browse files
committed
runtime: increase GC concurrency.
run GC in its own background goroutine making the caller runnable if resources are available. This is critical in single goroutine applications. Allow goroutines that allocate a lot to help out the GC and in doing so throttle their own allocation. Adjust test so that it only detects that a GC is run during init calls and not whether the GC is memory efficient. Memory efficiency work will happen later in 1.5. Change-Id: I4306f5e377bb47c69bda1aedba66164f12b20c2b Reviewed-on: https://go-review.googlesource.com/2349 Reviewed-by: Russ Cox <[email protected]> Reviewed-by: Austin Clements <[email protected]>
1 parent f21ee1e commit db7fd1c

File tree

4 files changed

+166
-28
lines changed

4 files changed

+166
-28
lines changed

src/runtime/malloc.go

Lines changed: 62 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,27 @@ type pageID uintptr
3939
// base address for all 0-byte allocations
4040
var zerobase uintptr
4141

42+
// Determine whether to initiate a GC.
43+
// Currently the primitive heuristic we use will start a new
44+
// concurrent GC when approximately half the available space
45+
// made available by the last GC cycle has been used.
46+
// If the GC is already working no need to trigger another one.
47+
// This should establish a feedback loop where if the GC does not
48+
// have sufficient time to complete then more memory will be
49+
// requested from the OS increasing heap size thus allow future
50+
// GCs more time to complete.
51+
// memstat.heap_alloc and memstat.next_gc reads have benign races
52+
// A false negative simple does not start a GC, a false positive
53+
// will start a GC needlessly. Neither have correctness issues.
54+
func shouldtriggergc() bool {
55+
return memstats.heap_alloc+memstats.heap_alloc*3/4 >= memstats.next_gc && atomicloaduint(&bggc.working) == 0
56+
}
57+
4258
// Allocate an object of size bytes.
4359
// Small objects are allocated from the per-P cache's free lists.
4460
// Large objects (> 32 kB) are allocated straight from the heap.
4561
func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
62+
shouldhelpgc := false
4663
if size == 0 {
4764
return unsafe.Pointer(&zerobase)
4865
}
@@ -144,6 +161,7 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
144161
systemstack(func() {
145162
mCache_Refill(c, tinySizeClass)
146163
})
164+
shouldhelpgc = true
147165
s = c.alloc[tinySizeClass]
148166
v = s.freelist
149167
}
@@ -174,6 +192,7 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
174192
systemstack(func() {
175193
mCache_Refill(c, int32(sizeclass))
176194
})
195+
shouldhelpgc = true
177196
s = c.alloc[sizeclass]
178197
v = s.freelist
179198
}
@@ -191,6 +210,7 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
191210
c.local_cachealloc += intptr(size)
192211
} else {
193212
var s *mspan
213+
shouldhelpgc = true
194214
systemstack(func() {
195215
s = largeAlloc(size, uint32(flags))
196216
})
@@ -345,8 +365,15 @@ marked:
345365
}
346366
}
347367

348-
if memstats.heap_alloc >= memstats.next_gc/2 {
368+
if shouldtriggergc() {
349369
gogc(0)
370+
} else if shouldhelpgc && atomicloaduint(&bggc.working) == 1 {
371+
// bggc.lock not taken since race on bggc.working is benign.
372+
// At worse we don't call gchelpwork.
373+
// Delay the gchelpwork until the epilogue so that it doesn't
374+
// interfere with the inner working of malloc such as
375+
// mcache refills that might happen while doing the gchelpwork
376+
systemstack(gchelpwork)
350377
}
351378

352379
return x
@@ -466,14 +493,25 @@ func gogc(force int32) {
466493
releasem(mp)
467494
mp = nil
468495

469-
semacquire(&worldsema, false)
470-
471-
if force == 0 && memstats.heap_alloc < memstats.next_gc {
472-
// typically threads which lost the race to grab
473-
// worldsema exit here when gc is done.
474-
semrelease(&worldsema)
475-
return
496+
if force == 0 {
497+
lock(&bggc.lock)
498+
if !bggc.started {
499+
bggc.working = 1
500+
bggc.started = true
501+
go backgroundgc()
502+
} else if bggc.working == 0 {
503+
bggc.working = 1
504+
ready(bggc.g)
505+
}
506+
unlock(&bggc.lock)
507+
} else {
508+
gcwork(force)
476509
}
510+
}
511+
512+
func gcwork(force int32) {
513+
514+
semacquire(&worldsema, false)
477515

478516
// Pick up the remaining unswept/not being swept spans concurrently
479517
for gosweepone() != ^uintptr(0) {
@@ -482,14 +520,17 @@ func gogc(force int32) {
482520

483521
// Ok, we're doing it! Stop everybody else
484522

485-
startTime := nanotime()
486-
mp = acquirem()
523+
mp := acquirem()
487524
mp.gcing = 1
488525
releasem(mp)
489526
gctimer.count++
490527
if force == 0 {
491528
gctimer.cycle.sweepterm = nanotime()
492529
}
530+
// Pick up the remaining unswept/not being swept spans before we STW
531+
for gosweepone() != ^uintptr(0) {
532+
sweep.nbgsweep++
533+
}
493534
systemstack(stoptheworld)
494535
systemstack(finishsweep_m) // finish sweep before we start concurrent scan.
495536
if force == 0 { // Do as much work concurrently as possible
@@ -500,7 +541,7 @@ func gogc(force int32) {
500541
systemstack(gcscan_m)
501542
gctimer.cycle.installmarkwb = nanotime()
502543
systemstack(stoptheworld)
503-
gcinstallmarkwb()
544+
systemstack(gcinstallmarkwb)
504545
systemstack(starttheworld)
505546
gctimer.cycle.mark = nanotime()
506547
systemstack(gcmark_m)
@@ -509,6 +550,7 @@ func gogc(force int32) {
509550
systemstack(gcinstalloffwb_m)
510551
}
511552

553+
startTime := nanotime()
512554
if mp != acquirem() {
513555
throw("gogc: rescheduled")
514556
}
@@ -527,6 +569,7 @@ func gogc(force int32) {
527569
eagersweep := force >= 2
528570
for i := 0; i < n; i++ {
529571
if i > 0 {
572+
// refresh start time if doing a second GC
530573
startTime = nanotime()
531574
}
532575
// switch to g0, call gc, then switch back
@@ -579,8 +622,8 @@ func GCcheckmarkdisable() {
579622
// gctimes records the time in nanoseconds of each phase of the concurrent GC.
580623
type gctimes struct {
581624
sweepterm int64 // stw
582-
scan int64 // stw
583-
installmarkwb int64
625+
scan int64
626+
installmarkwb int64 // stw
584627
mark int64
585628
markterm int64 // stw
586629
sweep int64
@@ -601,7 +644,7 @@ type gcchronograph struct {
601644

602645
var gctimer gcchronograph
603646

604-
// GCstarttimes initializes the gc timess. All previous timess are lost.
647+
// GCstarttimes initializes the gc times. All previous times are lost.
605648
func GCstarttimes(verbose int64) {
606649
gctimer = gcchronograph{verbose: verbose}
607650
}
@@ -655,6 +698,11 @@ func calctimes() gctimes {
655698
// the information from the most recent Concurent GC cycle. Calls from the
656699
// application to runtime.GC() are ignored.
657700
func GCprinttimes() {
701+
if gctimer.verbose == 0 {
702+
println("GC timers not enabled")
703+
return
704+
}
705+
658706
// Explicitly put times on the heap so printPhase can use it.
659707
times := new(gctimes)
660708
*times = calctimes()

src/runtime/mgc.go

Lines changed: 90 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ const (
123123
_DebugGCPtrs = false // if true, print trace of every pointer load during GC
124124
_ConcurrentSweep = true
125125

126-
_WorkbufSize = 4 * 1024
126+
_WorkbufSize = 4 * 256
127127
_FinBlockSize = 4 * 1024
128128
_RootData = 0
129129
_RootBss = 1
@@ -191,9 +191,9 @@ var badblock [1024]uintptr
191191
var nbadblock int32
192192

193193
type workdata struct {
194-
full uint64 // lock-free list of full blocks
195-
empty uint64 // lock-free list of empty blocks
196-
partial uint64 // lock-free list of partially filled blocks
194+
full uint64 // lock-free list of full blocks workbuf
195+
empty uint64 // lock-free list of empty blocks workbuf
196+
partial uint64 // lock-free list of partially filled blocks workbuf
197197
pad0 [_CacheLineSize]uint8 // prevents false-sharing between full/empty and nproc/nwait
198198
nproc uint32
199199
tstart int64
@@ -587,6 +587,11 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8) {
587587
// base and extent.
588588
b := b0
589589
n := n0
590+
591+
// ptrmask can have 2 possible values:
592+
// 1. nil - obtain pointer mask from GC bitmap.
593+
// 2. pointer to a compact mask (for stacks and data).
594+
590595
wbuf := getpartialorempty()
591596
if b != 0 {
592597
wbuf = scanobject(b, n, ptrmask, wbuf)
@@ -600,23 +605,23 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8) {
600605
return
601606
}
602607
}
603-
if gcphase == _GCscan {
604-
throw("scanblock: In GCscan phase but no b passed in.")
605-
}
606608

607-
keepworking := b == 0
609+
drainallwbufs := b == 0
610+
drainworkbuf(wbuf, drainallwbufs)
611+
}
608612

613+
// Scan objects in wbuf until wbuf is empty.
614+
// If drainallwbufs is true find all other available workbufs and repeat the process.
615+
//go:nowritebarrier
616+
func drainworkbuf(wbuf *workbuf, drainallwbufs bool) {
609617
if gcphase != _GCmark && gcphase != _GCmarktermination {
610618
println("gcphase", gcphase)
611619
throw("scanblock phase")
612620
}
613621

614-
// ptrmask can have 2 possible values:
615-
// 1. nil - obtain pointer mask from GC bitmap.
616-
// 2. pointer to a compact mask (for stacks and data).
617622
for {
618623
if wbuf.nobj == 0 {
619-
if !keepworking {
624+
if !drainallwbufs {
620625
putempty(wbuf)
621626
return
622627
}
@@ -641,9 +646,30 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8) {
641646
// PREFETCH(wbuf->obj[wbuf->nobj - 3];
642647
// }
643648
wbuf.nobj--
644-
b = wbuf.obj[wbuf.nobj]
649+
b := wbuf.obj[wbuf.nobj]
650+
wbuf = scanobject(b, mheap_.arena_used-b, nil, wbuf)
651+
}
652+
}
653+
654+
// Scan at most count objects in the wbuf.
655+
//go:nowritebarrier
656+
func drainobjects(wbuf *workbuf, count uintptr) {
657+
for i := uintptr(0); i < count; i++ {
658+
if wbuf.nobj == 0 {
659+
putempty(wbuf)
660+
return
661+
}
662+
663+
// This might be a good place to add prefetch code...
664+
// if(wbuf->nobj > 4) {
665+
// PREFETCH(wbuf->obj[wbuf->nobj - 3];
666+
// }
667+
wbuf.nobj--
668+
b := wbuf.obj[wbuf.nobj]
645669
wbuf = scanobject(b, mheap_.arena_used-b, nil, wbuf)
646670
}
671+
putpartial(wbuf)
672+
return
647673
}
648674

649675
//go:nowritebarrier
@@ -809,6 +835,17 @@ func putpartial(b *workbuf) {
809835
}
810836
}
811837

838+
// trygetfull tries to get a full or partially empty workbuffer.
839+
// if one is not immediately available return nil
840+
//go:nowritebarrier
841+
func trygetfull() *workbuf {
842+
wbuf := (*workbuf)(lfstackpop(&work.full))
843+
if wbuf == nil {
844+
wbuf = (*workbuf)(lfstackpop(&work.partial))
845+
}
846+
return wbuf
847+
}
848+
812849
// Get a full work buffer off the work.full or a partially
813850
// filled one off the work.partial list. If nothing is available
814851
// wait until all the other gc helpers have finished and then
@@ -1090,6 +1127,38 @@ func gcmarkwb_m(slot *uintptr, ptr uintptr) {
10901127
}
10911128
}
10921129

1130+
// gchelpwork does a small bounded amount of gc work. The purpose is to
1131+
// shorten the time (as measured by allocations) spent doing a concurrent GC.
1132+
// The number of mutator calls is roughly propotional to the number of allocations
1133+
// made by that mutator. This slows down the allocation while speeding up the GC.
1134+
//go:nowritebarrier
1135+
func gchelpwork() {
1136+
switch gcphase {
1137+
default:
1138+
throw("gcphasework in bad gcphase")
1139+
case _GCoff, _GCquiesce, _GCstw:
1140+
// No work.
1141+
case _GCsweep:
1142+
// We could help by calling sweepone to sweep a single span.
1143+
// _ = sweepone()
1144+
case _GCscan:
1145+
// scan the stack, mark the objects, put pointers in work buffers
1146+
// hanging off the P where this is being run.
1147+
// scanstack(gp)
1148+
case _GCmark:
1149+
// Get a full work buffer and empty it.
1150+
var wbuf *workbuf
1151+
wbuf = trygetfull()
1152+
if wbuf != nil {
1153+
drainobjects(wbuf, uintptr(len(wbuf.obj))) // drain upto one buffer's worth of objects
1154+
}
1155+
case _GCmarktermination:
1156+
// We should never be here since the world is stopped.
1157+
// All available mark work will be emptied before returning.
1158+
throw("gcphasework in bad gcphase")
1159+
}
1160+
}
1161+
10931162
// The gp has been moved to a GC safepoint. GC phase specific
10941163
// work is done here.
10951164
//go:nowritebarrier
@@ -1425,6 +1494,14 @@ type sweepdata struct {
14251494

14261495
var sweep sweepdata
14271496

1497+
// State of the background concurrent GC goroutine.
1498+
var bggc struct {
1499+
lock mutex
1500+
g *g
1501+
working uint
1502+
started bool
1503+
}
1504+
14281505
// sweeps one span
14291506
// returns number of pages returned to heap, or ^uintptr(0) if there is nothing to sweep
14301507
//go:nowritebarrier

src/runtime/mgc0.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,19 @@ func clearpools() {
7878
}
7979
}
8080

81+
// backgroundgc is running in a goroutine and does the concurrent GC work.
82+
// bggc holds the state of the backgroundgc.
83+
func backgroundgc() {
84+
bggc.g = getg()
85+
bggc.g.issystem = true
86+
for {
87+
gcwork(0)
88+
lock(&bggc.lock)
89+
bggc.working = 0
90+
goparkunlock(&bggc.lock, "Concurrent GC wait")
91+
}
92+
}
93+
8194
func bgsweep() {
8295
sweep.g = getg()
8396
getg().issystem = true

test/init1.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ func init() {
3131
}
3232
runtime.ReadMemStats(memstats)
3333
sys1 := memstats.Sys
34-
if sys1-sys > chunk*50 {
34+
if sys1-sys > chunk*500 {
3535
println("allocated 1000 chunks of", chunk, "and used ", sys1-sys, "memory")
3636
panic("init1")
3737
}

0 commit comments

Comments
 (0)