From b270dd5d3dc1cef48866a9b29d26a3745e5dee67 Mon Sep 17 00:00:00 2001 From: jinye Date: Wed, 6 Aug 2025 17:20:28 +0800 Subject: [PATCH] runtime:change gcBackgroundUtilization optional --- src/runtime/export_test.go | 9 ++-- src/runtime/mgc.go | 8 ++-- src/runtime/mgclimit.go | 2 +- src/runtime/mgclimit_test.go | 12 +++--- src/runtime/mgcpacer.go | 80 +++++++++++++++++++++++------------- src/runtime/mgcpacer_test.go | 24 ++++++++++- 6 files changed, 90 insertions(+), 45 deletions(-) diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index 79d83b3a..29341dd2 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -1324,25 +1324,26 @@ func GCTestPointerClass(p unsafe.Pointer) string { const Raceenabled = raceenabled const ( - GCBackgroundUtilization = gcBackgroundUtilization - GCGoalUtilization = gcGoalUtilization DefaultHeapMinimum = defaultHeapMinimum MemoryLimitHeapGoalHeadroomPercent = memoryLimitHeapGoalHeadroomPercent MemoryLimitMinHeapGoalHeadroom = memoryLimitMinHeapGoalHeadroom ) +var GCBackgroundUtilization = gcController.gcRatio +var GCGoalUtilization = gcGoalUtilization + type GCController struct { gcControllerState } -func NewGCController(gcPercent int, memoryLimit int64) *GCController { +func NewGCController(gcPercent int, memoryLimit int64, gcRatio float64) *GCController { // Force the controller to escape. We're going to // do 64-bit atomics on it, and if it gets stack-allocated // on a 32-bit architecture, it may get allocated unaligned // space. g := Escape(new(GCController)) g.gcControllerState.test = true // Mark it as a test copy. - g.init(int32(gcPercent), memoryLimit) + g.init(int32(gcPercent), memoryLimit, gcRatio) return g } diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 48001cfd..b8646679 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -184,7 +184,7 @@ func gcinit() { // Initialize GC pacer state. // Use the environment variable GOGC for the initial gcPercent value. // Use the environment variable GOMEMLIMIT for the initial memoryLimit value. - gcController.init(readGOGC(), readGOMEMLIMIT()) + gcController.init(readGOGC(), readGOMEMLIMIT(), readGOGCRATIO()) work.startSema = 1 work.markDoneSema = 1 @@ -270,12 +270,12 @@ const ( // gcMarkWorkerFractionalMode indicates that a P is currently // running the "fractional" mark worker. The fractional worker - // is necessary when GOMAXPROCS*gcBackgroundUtilization is not + // is necessary when GOMAXPROCS*gcController.gcRatio is not // an integer and using only dedicated workers would result in - // utilization too far from the target of gcBackgroundUtilization. + // utilization too far from the target of gcController.gcRatio. // The fractional worker should run until it is preempted and // will be scheduled to pick up the fractional part of - // GOMAXPROCS*gcBackgroundUtilization. + // GOMAXPROCS*gcController.gcRatio. gcMarkWorkerFractionalMode // gcMarkWorkerIdleMode indicates that a P is running the mark diff --git a/src/runtime/mgclimit.go b/src/runtime/mgclimit.go index ad86fbd6..ae35ee8d 100644 --- a/src/runtime/mgclimit.go +++ b/src/runtime/mgclimit.go @@ -230,7 +230,7 @@ func (l *gcCPULimiterState) updateLocked(now int64) { // Compute total GC time. windowGCTime := assistTime if l.gcEnabled { - windowGCTime += int64(float64(windowTotalTime) * gcBackgroundUtilization) + windowGCTime += int64(float64(windowTotalTime) * gcController.gcRatio) } // Subtract out all idle time from the total time. Do this after computing diff --git a/src/runtime/mgclimit_test.go b/src/runtime/mgclimit_test.go index 124da03e..8d3eaa6b 100644 --- a/src/runtime/mgclimit_test.go +++ b/src/runtime/mgclimit_test.go @@ -76,13 +76,13 @@ func TestGCCPULimiter(t *testing.T) { // Test passing time without assists during a GC. Specifically, just enough to drain the bucket to // exactly procs nanoseconds (easier to get to because of rounding). // - // The window we need to drain the bucket is 1/(1-2*gcBackgroundUtilization) times the current fill: + // The window we need to drain the bucket is 1/(1-2*gcController.gcRatio) times the current fill: // - // fill + (window * procs * gcBackgroundUtilization - window * procs * (1-gcBackgroundUtilization)) = n - // fill = n - (window * procs * gcBackgroundUtilization - window * procs * (1-gcBackgroundUtilization)) - // fill = n + window * procs * ((1-gcBackgroundUtilization) - gcBackgroundUtilization) - // fill = n + window * procs * (1-2*gcBackgroundUtilization) - // window = (fill - n) / (procs * (1-2*gcBackgroundUtilization))) + // fill + (window * procs * gcController.gcRatio - window * procs * (1-gcController.gcRatio)) = n + // fill = n - (window * procs * gcController.gcRatio - window * procs * (1-gcController.gcRatio)) + // fill = n + window * procs * ((1-gcController.gcRatio) - gcController.gcRatio) + // fill = n + window * procs * (1-2*gcController.gcRatio) + // window = (fill - n) / (procs * (1-2*gcController.gcRatio))) // // And here we want n=procs: factor := (1 / (1 - 2*GCBackgroundUtilization)) diff --git a/src/runtime/mgcpacer.go b/src/runtime/mgcpacer.go index 3e80fae4..20630c3f 100644 --- a/src/runtime/mgcpacer.go +++ b/src/runtime/mgcpacer.go @@ -12,30 +12,6 @@ import ( ) const ( - // gcGoalUtilization is the goal CPU utilization for - // marking as a fraction of GOMAXPROCS. - // - // Increasing the goal utilization will shorten GC cycles as the GC - // has more resources behind it, lessening costs from the write barrier, - // but comes at the cost of increasing mutator latency. - gcGoalUtilization = gcBackgroundUtilization - - // gcBackgroundUtilization is the fixed CPU utilization for background - // marking. It must be <= gcGoalUtilization. The difference between - // gcGoalUtilization and gcBackgroundUtilization will be made up by - // mark assists. The scheduler will aim to use within 50% of this - // goal. - // - // As a general rule, there's little reason to set gcBackgroundUtilization - // < gcGoalUtilization. One reason might be in mostly idle applications, - // where goroutines are unlikely to assist at all, so the actual - // utilization will be lower than the goal. But this is moot point - // because the idle mark workers already soak up idle CPU resources. - // These two values are still kept separate however because they are - // distinct conceptually, and in previous iterations of the pacer the - // distinction was more important. - gcBackgroundUtilization = 0.25 - // gcCreditSlack is the amount of scan work credit that can // accumulate locally before updating gcController.heapScanWork and, // optionally, gcController.bgScanCredit. Lower values give a more @@ -72,6 +48,15 @@ const ( // to maintain the memory limit. memoryLimitHeapGoalHeadroomPercent = 3 ) +// gcGoalUtilization is the goal CPU utilization for +// marking as a fraction of GOMAXPROCS. +// +// Increasing the goal utilization will shorten GC cycles as the GC +// has more resources behind it, lessening costs from the write barrier, +// but comes at the cost of increasing mutator latency. +var gcGoalUtilization = gcController.gcRatio + + // gcController implements the GC pacing controller that determines // when to trigger concurrent garbage collection and how much marking @@ -88,6 +73,11 @@ const ( var gcController gcControllerState type gcControllerState struct { + // gcController.gcRatio be optional, value equals gcratio/100.0. + // Initialized from GOGCRATIO, which in the range of (1, 99). + // Default GOGCRATIO is 25. + gcRatio float64 + // Initialized from GOGC. GOGC=off means no GC. gcPercent atomic.Int32 @@ -366,11 +356,12 @@ type gcControllerState struct { _ cpu.CacheLinePad } -func (c *gcControllerState) init(gcPercent int32, memoryLimit int64) { +func (c *gcControllerState) init(gcPercent int32, memoryLimit int64, gcRatio float64) { c.heapMinimum = defaultHeapMinimum c.triggered = ^uint64(0) c.setGCPercent(gcPercent) c.setMemoryLimit(memoryLimit) + c.setGOGCRatio(gcRatio) c.commit(true) // No sweep phase in the first GC cycle. // N.B. Don't bother calling traceHeapGoal. Tracing is never enabled at // initialization time. @@ -398,13 +389,13 @@ func (c *gcControllerState) startCycle(markStartTime int64, procs int, trigger g // dedicated workers so that the utilization is closest to // 25%. For small GOMAXPROCS, this would introduce too much // error, so we add fractional workers in that case. - totalUtilizationGoal := float64(procs) * gcBackgroundUtilization + totalUtilizationGoal := float64(procs) * gcController.gcRatio dedicatedMarkWorkersNeeded := int64(totalUtilizationGoal + 0.5) utilError := float64(dedicatedMarkWorkersNeeded)/totalUtilizationGoal - 1 const maxUtilError = 0.3 if utilError < -maxUtilError || utilError > maxUtilError { // Rounding put us more than 30% off our goal. With - // gcBackgroundUtilization of 25%, this happens for + // gcController.gcRatio of 25%, this happens for // GOMAXPROCS<=3 or GOMAXPROCS=6. Enable fractional // workers to compensate. if float64(dedicatedMarkWorkersNeeded) > totalUtilizationGoal { @@ -604,7 +595,7 @@ func (c *gcControllerState) endCycle(now int64, procs int, userForced bool) { assistDuration := now - c.markStartTime // Assume background mark hit its utilization goal. - utilization := gcBackgroundUtilization + utilization := gcController.gcRatio // Add assist utilization; avoid divide by zero. if assistDuration > 0 { utilization += float64(c.assistTime.Load()) / float64(assistDuration*int64(procs)) @@ -1344,6 +1335,39 @@ func readGOMEMLIMIT() int64 { return n } +func (c *gcControllerState) setGOGCRatio(in float64) float64 { + if !c.test { + assertWorldStoppedOrLockHeld(&mheap_.lock) + } + + out := c.gcRatio + c.gcRatio = in + + return out +} + +func readGOGCRATIO() float64 { + p := gogetenv("GOGCRATIO") + if p == "" { + return 0.25 + } + n, ok := parseByteCount(p) + if !ok { + print("GOGCRATIO=", p, "\n") + throw("malformed GOGCRATIO; get the wrong value") + } + + if n < 1 { + n = 1 + } else if n > 99 { + n = 99 + } + + out := float64(n)/100.0 + + return out +} + // addIdleMarkWorker attempts to add a new idle mark worker. // // If this returns true, the caller must become an idle mark worker unless diff --git a/src/runtime/mgcpacer_test.go b/src/runtime/mgcpacer_test.go index ef1483d6..32d4943c 100644 --- a/src/runtime/mgcpacer_test.go +++ b/src/runtime/mgcpacer_test.go @@ -24,6 +24,7 @@ func TestGcPacer(t *testing.T) { name: "Steady", gcPercent: 100, memoryLimit: math.MaxInt64, + gcRatio: 0.25, globalsBytes: 32 << 10, nCores: 8, allocRate: constant(33.0), @@ -49,6 +50,7 @@ func TestGcPacer(t *testing.T) { name: "SteadyBigStacks", gcPercent: 100, memoryLimit: math.MaxInt64, + gcRatio: 0.25, globalsBytes: 32 << 10, nCores: 8, allocRate: constant(132.0), @@ -77,6 +79,7 @@ func TestGcPacer(t *testing.T) { name: "SteadyBigGlobals", gcPercent: 100, memoryLimit: math.MaxInt64, + gcRatio: 0.25, globalsBytes: 128 << 20, nCores: 8, allocRate: constant(132.0), @@ -105,6 +108,7 @@ func TestGcPacer(t *testing.T) { name: "StepAlloc", gcPercent: 100, memoryLimit: math.MaxInt64, + gcRatio: 0.25, globalsBytes: 32 << 10, nCores: 8, allocRate: constant(33.0).sum(ramp(66.0, 1).delay(50)), @@ -128,6 +132,7 @@ func TestGcPacer(t *testing.T) { name: "HeavyStepAlloc", gcPercent: 100, memoryLimit: math.MaxInt64, + gcRatio: 0.25, globalsBytes: 32 << 10, nCores: 8, allocRate: constant(33).sum(ramp(330, 1).delay(50)), @@ -151,6 +156,7 @@ func TestGcPacer(t *testing.T) { name: "StepScannableFrac", gcPercent: 100, memoryLimit: math.MaxInt64, + gcRatio: 0.25, globalsBytes: 32 << 10, nCores: 8, allocRate: constant(128.0), @@ -176,6 +182,7 @@ func TestGcPacer(t *testing.T) { name: "HighGOGC", gcPercent: 1500, memoryLimit: math.MaxInt64, + gcRatio: 0.25, globalsBytes: 32 << 10, nCores: 8, allocRate: random(7, 0x53).offset(165), @@ -217,6 +224,7 @@ func TestGcPacer(t *testing.T) { name: "OscAlloc", gcPercent: 100, memoryLimit: math.MaxInt64, + gcRatio: 0.25, globalsBytes: 32 << 10, nCores: 8, allocRate: oscillate(13, 0, 8).offset(67), @@ -241,6 +249,7 @@ func TestGcPacer(t *testing.T) { name: "JitterAlloc", gcPercent: 100, memoryLimit: math.MaxInt64, + gcRatio: 0.25, globalsBytes: 32 << 10, nCores: 8, allocRate: random(13, 0xf).offset(132), @@ -266,6 +275,7 @@ func TestGcPacer(t *testing.T) { name: "HeavyJitterAlloc", gcPercent: 100, memoryLimit: math.MaxInt64, + gcRatio: 0.25, globalsBytes: 32 << 10, nCores: 8, allocRate: random(33.0, 0x0).offset(330), @@ -295,6 +305,7 @@ func TestGcPacer(t *testing.T) { name: "SmallHeapSlowAlloc", gcPercent: 100, memoryLimit: math.MaxInt64, + gcRatio: 0.25, globalsBytes: 32 << 10, nCores: 8, allocRate: constant(1.0), @@ -332,6 +343,7 @@ func TestGcPacer(t *testing.T) { name: "MediumHeapSlowAlloc", gcPercent: 100, memoryLimit: math.MaxInt64, + gcRatio: 0.25, globalsBytes: 32 << 10, nCores: 8, allocRate: constant(1.0), @@ -369,6 +381,7 @@ func TestGcPacer(t *testing.T) { name: "LargeHeapSlowAlloc", gcPercent: 100, memoryLimit: math.MaxInt64, + gcRatio: 0.25, globalsBytes: 32 << 10, nCores: 8, allocRate: constant(1.0), @@ -407,6 +420,7 @@ func TestGcPacer(t *testing.T) { name: "SteadyMemoryLimit", gcPercent: 100, memoryLimit: 512 << 20, + gcRatio: 0.25, globalsBytes: 32 << 10, nCores: 8, allocRate: constant(33.0), @@ -436,6 +450,7 @@ func TestGcPacer(t *testing.T) { name: "SteadyMemoryLimitNoGCPercent", gcPercent: -1, memoryLimit: 512 << 20, + gcRatio: 0.25, globalsBytes: 32 << 10, nCores: 8, allocRate: constant(33.0), @@ -465,6 +480,7 @@ func TestGcPacer(t *testing.T) { name: "ExceedMemoryLimit", gcPercent: 100, memoryLimit: 512 << 20, + gcRatio: 0.25, globalsBytes: 32 << 10, nCores: 8, allocRate: constant(33.0), @@ -499,6 +515,7 @@ func TestGcPacer(t *testing.T) { name: "ExceedMemoryLimitNoGCPercent", gcPercent: -1, memoryLimit: 512 << 20, + gcRatio: 0.25, globalsBytes: 32 << 10, nCores: 8, allocRate: constant(33.0), @@ -538,6 +555,7 @@ func TestGcPacer(t *testing.T) { name: "MaintainMemoryLimit", gcPercent: 100, memoryLimit: 512 << 20, + gcRatio: 0.25, globalsBytes: 32 << 10, nCores: 8, allocRate: constant(33.0), @@ -571,6 +589,7 @@ func TestGcPacer(t *testing.T) { name: "MaintainMemoryLimitNoGCPercent", gcPercent: -1, memoryLimit: 512 << 20, + gcRatio: 0.25, globalsBytes: 32 << 10, nCores: 8, allocRate: constant(33.0), @@ -607,7 +626,7 @@ func TestGcPacer(t *testing.T) { t.Run(e.name, func(t *testing.T) { t.Parallel() - c := NewGCController(e.gcPercent, e.memoryLimit) + c := NewGCController(e.gcPercent, e.memoryLimit, e.gcRatio) var bytesAllocatedBlackLast int64 results := make([]gcCycleResult, 0, e.length) for i := 0; i < e.length; i++ { @@ -762,6 +781,7 @@ type gcExecTest struct { gcPercent int memoryLimit int64 + gcRatio float64 globalsBytes uint64 nCores int @@ -1034,7 +1054,7 @@ func applyMemoryLimitHeapGoalHeadroom(goal uint64) uint64 { func TestIdleMarkWorkerCount(t *testing.T) { const workers = 10 - c := NewGCController(100, math.MaxInt64) + c := NewGCController(100, math.MaxInt64, 0.25) c.SetMaxIdleMarkWorkers(workers) for i := 0; i < workers; i++ { if !c.NeedIdleMarkWorker() { -- 2.33.0