lib/cgroup: support reading cpu/memory limits from systemd slices

cgroup v2 version supports slices ( aka path hierarchy) for resource limits. It's mostly supported by systemd
and container runtime build on top of it.

 This commit reads subpath for systemd slices and traverse it with reading minimal limit value.

Related docs:
https://docs.oracle.com/en/operating-systems/oracle-linux/9/systemd/SystemdMngCgroupsV2.html#SlicesServicesScopesHierarchy
https://www.freedesktop.org/software/systemd/man/latest/systemd.slice.html

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10635
This commit is contained in:
andriibeee
2026-04-22 11:18:03 +03:00
committed by GitHub
parent 0785d16711
commit a3df0f890b
13 changed files with 97 additions and 11 deletions

View File

@@ -26,6 +26,7 @@ See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-rel
## tip
* FEATURE: all VictoriaMetrics components: add support for reading cpu/memory limits configured via [systemd slices](https://www.freedesktop.org/software/systemd/man/latest/systemd.slice.html). Previously, only limits set directly on the process's own cgroup were detected. See [#10635](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10635). Thanks to @andriibeee for the contribution.
* FEATURE: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): now `Run query` link on the Alerting Rules page correctly propagates the alerts interval and evaluation time. See [#10366](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10366).
* FEATURE: [alerts](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/rules): add new `MetricNameStatsCacheUtilizationIsTooHigh` alerting rule to track overutilization of [Metric names usage stats tracker](https://docs.victoriametrics.com/victoriametrics/#track-ingested-metrics-usage) (used in [Cardinality Explorer](https://docs.victoriametrics.com/victoriametrics/#cardinality-explorer)). See [#10840](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10840).
* FEATURE: [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/): add `vm_streamaggr_counter_resets_total` metric for `total*`, `increase*` and `rate*` outputs that is useful for aggregation behaviour tracking. These metrics help to identify issues described in [Troubleshooting: counter resets](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#counter-resets).

View File

@@ -3,6 +3,7 @@ package cgroup
import (
"fmt"
"os"
"path"
"runtime"
"strconv"
"strings"
@@ -100,17 +101,31 @@ func getOnlineCPUCount() float64 {
return n
}
func getCPUQuotaV2(sysPrefix, cgroupPath string) (float64, error) {
data, err := getFileContents("cpu.max", sysPrefix, cgroupPath, "")
// See https://www.freedesktop.org/software/systemd/man/latest/systemd.slice.html
func getCPUQuotaV2(sysfsPrefix, cgroupPath string) (float64, error) {
subPath, err := readCgroupV2SubPath(cgroupPath)
if err != nil {
return 0, err
subPath = "/"
}
data = strings.TrimSpace(data)
n, err := parseCPUMax(data)
if err != nil {
return 0, fmt.Errorf("cannot parse cpu.max file contents: %w", err)
var minQuota float64 = -1
for {
// travers sub path hierarchy and use a minimal value for stat
data, err := os.ReadFile(path.Join(sysfsPrefix, subPath, "cpu.max"))
if err == nil {
quota, err := parseCPUMax(strings.TrimSpace(string(data)))
if err != nil {
return 0, fmt.Errorf("cannot parse cpu.max at %s: %w", subPath, err)
}
if quota > 0 && (minQuota < 0 || quota < minQuota) {
minQuota = quota
}
}
if subPath == "/" || subPath == "." {
break
}
subPath = path.Dir(subPath)
}
return n, nil
return minQuota, nil
}
// See https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html#cpu

View File

@@ -37,4 +37,7 @@ func TestGetCPUQuotaV2(t *testing.T) {
f("testdata/cgroup", "testdata/self/cgroupv2", 2)
f("testdata/cgroup/cpu_unset", "", -1)
f("testdata/cgroup/cpu_onlymax", "", 2)
// systemd slice
f("testdata/v2slice", "testdata/self/cgroupv2_slice", 2)
}

View File

@@ -1,9 +1,12 @@
package cgroup
import (
"fmt"
"os"
"path"
"runtime/debug"
"strconv"
"strings"
)
// GetGOGC returns GOGC value for the currently running process.
@@ -42,15 +45,44 @@ func GetMemoryLimit() int64 {
return n
}
n, err = getMemStatV2("memory.max")
if err != nil {
if err != nil || n <= 0 {
return 0
}
return n
}
func getMemStatV2(statName string) (int64, error) {
// See https: //www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html#memory-interface-files
return getStatGeneric(statName, "/sys/fs/cgroup", "/proc/self/cgroup", "")
// See https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html#memory-interface-files
return getMemLimitV2("/sys/fs/cgroup", "/proc/self/cgroup", statName)
}
func getMemLimitV2(sysfsPrefix, cgroupPath, statName string) (int64, error) {
subPath, err := readCgroupV2SubPath(cgroupPath)
if err != nil {
subPath = "/"
}
var minLimit int64 = -1
for {
// travers sub path hierarchy and use a minimal value for stat
data, err := os.ReadFile(path.Join(sysfsPrefix, subPath, statName))
if err == nil {
s := strings.TrimSpace(string(data))
if s != "max" {
n, err := strconv.ParseInt(s, 10, 64)
if err != nil {
return 0, fmt.Errorf("cannot parse %s at %s: %w", statName, subPath, err)
}
if n > 0 && (minLimit < 0 || n < minLimit) {
minLimit = n
}
}
}
if subPath == "/" || subPath == "." {
break
}
subPath = path.Dir(subPath)
}
return minLimit, nil
}
func getMemStat(statName string) (int64, error) {

View File

@@ -19,6 +19,22 @@ func TestGetHierarchicalMemoryLimitSuccess(t *testing.T) {
f("testdata/cgroup", "testdata/self/cgroup", 120)
}
func TestGetMemLimitV2(t *testing.T) {
f := func(sysPrefix, cgroupPath string, want int64) {
t.Helper()
got, err := getMemLimitV2(sysPrefix, cgroupPath, "memory.max")
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
if got != want {
t.Fatalf("unexpected result, got: %d, want %d", got, want)
}
}
f("testdata/cgroup", "testdata/self/cgroupv2", 523372036854771712)
// systemd slice
f("testdata/v2slice", "testdata/self/cgroupv2_slice", 1073741824)
}
func TestGetHierarchicalMemoryLimitFailure(t *testing.T) {
f := func(sysPath, cgroupPath string) {
t.Helper()

View File

@@ -0,0 +1 @@
0::/vm.slice/vmagent.service

1
lib/cgroup/testdata/v2slice/cpu.max vendored Normal file
View File

@@ -0,0 +1 @@
max 100000

View File

@@ -0,0 +1 @@
max

View File

@@ -0,0 +1 @@
200000 100000

View File

@@ -0,0 +1 @@
1073741824

View File

@@ -0,0 +1 @@
max 100000

View File

@@ -0,0 +1 @@
max

View File

@@ -43,6 +43,18 @@ func getFileContents(statName, sysfsPrefix, cgroupPath, cgroupGrepLine string) (
return string(data), nil
}
// readCgroupV2SubPath reads cgroupv2 sub-path
// for example 0::/user.slice/user-1000.slice/session-5.scope
// See https://www.freedesktop.org/software/systemd/man/latest/systemd.slice.html
// and https://docs.oracle.com/en/operating-systems/oracle-linux/9/systemd/SystemdMngCgroupsV2.html#SystemdScopes
func readCgroupV2SubPath(cgroupPath string) (string, error) {
data, err := os.ReadFile(cgroupPath)
if err != nil {
return "", err
}
return grepFirstMatch(string(data), "", 2, ":")
}
// grepFirstMatch searches match line at data and returns item from it by index with given delimiter.
func grepFirstMatch(data string, match string, index int, delimiter string) (string, error) {
lines := strings.Split(string(data), "\n")