From 1fc548b63a1078e7b51b3faccb43cfab20ceace8 Mon Sep 17 00:00:00 2001 From: Nikolay Date: Tue, 27 Jan 2026 20:29:01 +0100 Subject: [PATCH] lib/fs: add fs.disableMincore flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This flag allows disabling the mincore() syscall introduced in 50fc48ac476b3de8166752e67adfcf2ffde0b129. On older ZFS filesystems, mincore() may trigger a bug related to ZFSÕs own in-memory cache. Mixing reads from mmap()ed files and direct disk reads can corrupt the ZFS ARC cache and lead to data read corruption. Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10327 --- docs/victoriametrics/Troubleshooting.md | 16 ++++++++++++++++ docs/victoriametrics/changelog/CHANGELOG.md | 1 + .../victoria_metrics_common_flags.md | 2 ++ docs/victoriametrics/vmagent_common_flags.md | 2 ++ docs/victoriametrics/vmalert_common_flags.md | 2 ++ docs/victoriametrics/vmbackup.md | 2 ++ docs/victoriametrics/vmbackupmanager.md | 2 ++ docs/victoriametrics/vmgateway.md | 2 ++ docs/victoriametrics/vmrestore.md | 2 ++ docs/victoriametrics/vmselect_common_flags.md | 2 ++ docs/victoriametrics/vmstorage_common_flags.md | 2 ++ lib/fs/mincore_linux.go | 3 +-- lib/fs/mincore_other.go | 2 +- lib/fs/reader_at.go | 9 +++++++++ 14 files changed, 46 insertions(+), 3 deletions(-) diff --git a/docs/victoriametrics/Troubleshooting.md b/docs/victoriametrics/Troubleshooting.md index fb32623caf..a443eb9096 100644 --- a/docs/victoriametrics/Troubleshooting.md +++ b/docs/victoriametrics/Troubleshooting.md @@ -493,3 +493,19 @@ for VictoriaMetrics components will notify about issues and provide recommendati Internally, we heavily rely both on dashboards and alerts, and constantly improve them. It is important to stay up to date with such changes. + + +## Filesystem read corruption on ZFS + +On some ZFS filesystems, mixing reads from memory-mapped files (`mmap`) with usage of the `mincore()` syscall can trigger a bug in the ZFS in-memory cache (ARC), potentially resulting in **data read corruption** in VictoriaMetrics processes. This scenario has been observed when VictoriaMetrics instances access data directories on ZFS. + +Symptoms: +- Unexpected read errors when accessing data on ZFS. +- Corrupted or inconsistent query results. +- Crashes or panics in storage/query components when reading from ZFS. + +It could be mitigated with `--fs.disableMincore` flag: + +```text +./bin/victoria-metrics --storageDataPath /path/to/zfs/data --fs.disableMincore +``` diff --git a/docs/victoriametrics/changelog/CHANGELOG.md b/docs/victoriametrics/changelog/CHANGELOG.md index ea971cfd69..27414d45de 100644 --- a/docs/victoriametrics/changelog/CHANGELOG.md +++ b/docs/victoriametrics/changelog/CHANGELOG.md @@ -32,6 +32,7 @@ See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-rel * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/): support configuring different `-remoteWrite.queues` per remoteWrite url. This allows setting `-remoteWrite.queues=1` for backends that do not support out-of-order ingestion (e.g. Mimir), while keeping higher queue counts for other backends such as VictoriaMetrics. Previously, this required running multiple vmagent instances with different queue settings. See [#10270](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10270). * FEATURE: [alerts](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/rules): add new alerting rules `PersistentQueueRunsOutOfSpaceIn12Hours` and `PersistentQueueRunsOutOfSpaceIn4Hours` for `vmagent` persistent queue capacity. These alerts help users to take proactive actions before `vmagent` starts dropping metrics due to insufficient persistent queue space. See [#10193](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10193) * FEATURE: All VictoriaMetrics components: add build version information to the home page for consistency with other projects. See [#10249](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10249). +* FEATURE: all VictoriaMetrics components: add flag `fs.disableMincore`, which allows to disable `mincore` syscall. See [#10327](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10327). * BUGFIX: [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/): stop backend health checks for URL prefixes defined in `url_map` during configuration reloads. Previously, stale backends kept being health-checked and produced repeated warning logs after reloads. See [#10334](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10334). * BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): properly return [/api/v1/status/tsdb](https://docs.victoriametrics.com/victoriametrics/#tsdb-stats) response for time range outside [partition index](https://docs.victoriametrics.com/victoriametrics/#indexdb). See [#10315](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10315). diff --git a/docs/victoriametrics/victoria_metrics_common_flags.md b/docs/victoriametrics/victoria_metrics_common_flags.md index 216ab54bac..313c1d8068 100644 --- a/docs/victoriametrics/victoria_metrics_common_flags.md +++ b/docs/victoriametrics/victoria_metrics_common_flags.md @@ -68,6 +68,8 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/ authKey, which must be passed in query string to /internal/force_merge pages. It overrides -httpAuth.* Flag value can be read from the given file when using -forceMergeAuthKey=file:///abs/path/to/file or -forceMergeAuthKey=file://./relative/path/to/file. Flag value can be read from the given http/https url when using -forceMergeAuthKey=http://host/path or -forceMergeAuthKey=https://host/path + -fs.disableMincore + Whether to disable the mincore() syscall for checking mmap()ed files. By default, mincore() is used to detect whether mmap()ed file pages are resident in memory. Disabling mincore() may be needed on older ZFS filesystems (below 2.1.5), since it may trigger ZFS bug. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10327 for details. -fs.disableMmap Whether to use pread() instead of mmap() for reading data files. By default, mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread() -fs.maxConcurrency int diff --git a/docs/victoriametrics/vmagent_common_flags.md b/docs/victoriametrics/vmagent_common_flags.md index 973f315e2d..cc24d76718 100644 --- a/docs/victoriametrics/vmagent_common_flags.md +++ b/docs/victoriametrics/vmagent_common_flags.md @@ -48,6 +48,8 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/vmagent/ . Auth key for /flags endpoint. It must be passed via authKey query arg. It overrides -httpAuth.* Flag value can be read from the given file when using -flagsAuthKey=file:///abs/path/to/file or -flagsAuthKey=file://./relative/path/to/file. Flag value can be read from the given http/https url when using -flagsAuthKey=http://host/path or -flagsAuthKey=https://host/path + -fs.disableMincore + Whether to disable the mincore() syscall for checking mmap()ed files. By default, mincore() is used to detect whether mmap()ed file pages are resident in memory. Disabling mincore() may be needed on older ZFS filesystems (below 2.1.5), since it may trigger ZFS bug. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10327 for details. -fs.disableMmap Whether to use pread() instead of mmap() for reading data files. By default, mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread() -fs.maxConcurrency int diff --git a/docs/victoriametrics/vmalert_common_flags.md b/docs/victoriametrics/vmalert_common_flags.md index fe963e283b..6ec57d6f30 100644 --- a/docs/victoriametrics/vmalert_common_flags.md +++ b/docs/victoriametrics/vmalert_common_flags.md @@ -95,6 +95,8 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/vmalert/ . Auth key for /flags endpoint. It must be passed via authKey query arg. It overrides -httpAuth.* Flag value can be read from the given file when using -flagsAuthKey=file:///abs/path/to/file or -flagsAuthKey=file://./relative/path/to/file. Flag value can be read from the given http/https url when using -flagsAuthKey=http://host/path or -flagsAuthKey=https://host/path + -fs.disableMincore + Whether to disable the mincore() syscall for checking mmap()ed files. By default, mincore() is used to detect whether mmap()ed file pages are resident in memory. Disabling mincore() may be needed on older ZFS filesystems (below 2.1.5), since it may trigger ZFS bug. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10327 for details. -fs.disableMmap Whether to use pread() instead of mmap() for reading data files. By default, mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread() -fs.maxConcurrency int diff --git a/docs/victoriametrics/vmbackup.md b/docs/victoriametrics/vmbackup.md index a8158b8d47..e1a926bf65 100644 --- a/docs/victoriametrics/vmbackup.md +++ b/docs/victoriametrics/vmbackup.md @@ -380,6 +380,8 @@ Run `vmbackup -help` in order to see all the available options: -flagsAuthKey value Auth key for /flags endpoint. It must be passed via authKey query arg. It overrides -httpAuth.* Flag value can be read from the given file when using -flagsAuthKey=file:///abs/path/to/file or -flagsAuthKey=file://./relative/path/to/file . Flag value can be read from the given http/https url when using -flagsAuthKey=http://host/path or -flagsAuthKey=https://host/path + -fs.disableMincore + Whether to disable the mincore() syscall for checking mmap()ed files. By default, mincore() is used to detect whether mmap()ed file pages are resident in memory. Disabling mincore() may be needed on older ZFS filesystems (below 2.1.5), since it may trigger ZFS bug. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10327 for details. -fs.disableMmap Whether to use pread() instead of mmap() for reading data files. By default, mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread() -fs.maxConcurrency int diff --git a/docs/victoriametrics/vmbackupmanager.md b/docs/victoriametrics/vmbackupmanager.md index 11ffd2134f..53ff267195 100644 --- a/docs/victoriametrics/vmbackupmanager.md +++ b/docs/victoriametrics/vmbackupmanager.md @@ -555,6 +555,8 @@ command-line flags: -flagsAuthKey value Auth key for /flags endpoint. It must be passed via authKey query arg. It overrides -httpAuth.* Flag value can be read from the given file when using -flagsAuthKey=file:///abs/path/to/file or -flagsAuthKey=file://./relative/path/to/file . Flag value can be read from the given http/https url when using -flagsAuthKey=http://host/path or -flagsAuthKey=https://host/path + -fs.disableMincore + Whether to disable the mincore() syscall for checking mmap()ed files. By default, mincore() is used to detect whether mmap()ed file pages are resident in memory. Disabling mincore() may be needed on older ZFS filesystems (below 2.1.5), since it may trigger ZFS bug. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10327 for details. -fs.disableMmap Whether to use pread() instead of mmap() for reading data files. By default, mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread() -fs.maxConcurrency int diff --git a/docs/victoriametrics/vmgateway.md b/docs/victoriametrics/vmgateway.md index 68f387389d..ba9efbbb33 100644 --- a/docs/victoriametrics/vmgateway.md +++ b/docs/victoriametrics/vmgateway.md @@ -380,6 +380,8 @@ Below is the list of configuration flags (it can be viewed by running `./vmgatew -flagsAuthKey value Auth key for /flags endpoint. It must be passed via authKey query arg. It overrides -httpAuth.* Flag value can be read from the given file when using -flagsAuthKey=file:///abs/path/to/file or -flagsAuthKey=file://./relative/path/to/file . Flag value can be read from the given http/https url when using -flagsAuthKey=http://host/path or -flagsAuthKey=https://host/path + -fs.disableMincore + Whether to disable the mincore() syscall for checking mmap()ed files. By default, mincore() is used to detect whether mmap()ed file pages are resident in memory. Disabling mincore() may be needed on older ZFS filesystems (below 2.1.5), since it may trigger ZFS bug. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10327 for details. -fs.disableMmap Whether to use pread() instead of mmap() for reading data files. By default, mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread() -fs.maxConcurrency int diff --git a/docs/victoriametrics/vmrestore.md b/docs/victoriametrics/vmrestore.md index 54feac10cd..a7a0d54ef9 100644 --- a/docs/victoriametrics/vmrestore.md +++ b/docs/victoriametrics/vmrestore.md @@ -82,6 +82,8 @@ Run `vmrestore -help` in order to see all the available options: -flagsAuthKey value Auth key for /flags endpoint. It must be passed via authKey query arg. It overrides -httpAuth.* Flag value can be read from the given file when using -flagsAuthKey=file:///abs/path/to/file or -flagsAuthKey=file://./relative/path/to/file . Flag value can be read from the given http/https url when using -flagsAuthKey=http://host/path or -flagsAuthKey=https://host/path + -fs.disableMincore + Whether to disable the mincore() syscall for checking mmap()ed files. By default, mincore() is used to detect whether mmap()ed file pages are resident in memory. Disabling mincore() may be needed on older ZFS filesystems (below 2.1.5), since it may trigger ZFS bug. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10327 for details. -fs.disableMmap Whether to use pread() instead of mmap() for reading data files. By default, mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread() -fs.maxConcurrency int diff --git a/docs/victoriametrics/vmselect_common_flags.md b/docs/victoriametrics/vmselect_common_flags.md index c29169d965..ab31784f22 100644 --- a/docs/victoriametrics/vmselect_common_flags.md +++ b/docs/victoriametrics/vmselect_common_flags.md @@ -53,6 +53,8 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/cluster-victori Auth key for /flags endpoint. It must be passed via authKey query arg. It overrides -httpAuth.* Flag value can be read from the given file when using -flagsAuthKey=file:///abs/path/to/file or -flagsAuthKey=file://./relative/path/to/file. Flag value can be read from the given http/https url when using -flagsAuthKey=http://host/path or -flagsAuthKey=https://host/path + -fs.disableMincore + Whether to disable the mincore() syscall for checking mmap()ed files. By default, mincore() is used to detect whether mmap()ed file pages are resident in memory. Disabling mincore() may be needed on older ZFS filesystems (below 2.1.5), since it may trigger ZFS bug. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10327 for details. -fs.disableMmap Whether to use pread() instead of mmap() for reading data files. By default, mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread() -fs.maxConcurrency int diff --git a/docs/victoriametrics/vmstorage_common_flags.md b/docs/victoriametrics/vmstorage_common_flags.md index 547a484c2d..74852aad13 100644 --- a/docs/victoriametrics/vmstorage_common_flags.md +++ b/docs/victoriametrics/vmstorage_common_flags.md @@ -49,6 +49,8 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/cluster-victori authKey, which must be passed in query string to /internal/force_merge pages Flag value can be read from the given file when using -forceMergeAuthKey=file:///abs/path/to/file or -forceMergeAuthKey=file://./relative/path/to/file. Flag value can be read from the given http/https url when using -forceMergeAuthKey=http://host/path or -forceMergeAuthKey=https://host/path + -fs.disableMincore + Whether to disable the mincore() syscall for checking mmap()ed files. By default, mincore() is used to detect whether mmap()ed file pages are resident in memory. Disabling mincore() may be needed on older ZFS filesystems (below 2.1.5), since it may trigger ZFS bug. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10327 for details. -fs.disableMmap Whether to use pread() instead of mmap() for reading data files. By default, mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread() -fs.maxConcurrency int diff --git a/lib/fs/mincore_linux.go b/lib/fs/mincore_linux.go index c776cf3f6e..617f2b2428 100644 --- a/lib/fs/mincore_linux.go +++ b/lib/fs/mincore_linux.go @@ -10,10 +10,9 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" ) -func hasMincore() bool { +func supportsMincore() bool { return true } - func mincore(ptr *byte) bool { var result [1]byte _, _, err := unix.Syscall(unix.SYS_MINCORE, uintptr(unsafe.Pointer(ptr)), 1, uintptr(unsafe.Pointer(&result[0]))) diff --git a/lib/fs/mincore_other.go b/lib/fs/mincore_other.go index ba868e2030..14af98bf01 100644 --- a/lib/fs/mincore_other.go +++ b/lib/fs/mincore_other.go @@ -6,7 +6,7 @@ import ( "fmt" ) -func hasMincore() bool { +func supportsMincore() bool { return false } diff --git a/lib/fs/reader_at.go b/lib/fs/reader_at.go index b927211c76..85cde7d35d 100644 --- a/lib/fs/reader_at.go +++ b/lib/fs/reader_at.go @@ -17,6 +17,11 @@ var disableMmap = flag.Bool("fs.disableMmap", is32BitPtr, "Whether to use pread( "By default, mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. "+ "mmap() is usually faster for reading small data chunks than pread()") +var disableMincore = flag.Bool("fs.disableMincore", false, "Whether to disable the mincore() syscall for checking mmap()ed files. "+ + "By default, mincore() is used to detect whether mmap()ed file pages are resident in memory. "+ + "Disabling mincore() may be needed on older ZFS filesystems (below 2.1.5), since it may trigger ZFS bug. "+ + "See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10327 for details.") + // Disable mmap for architectures with 32-bit pointers in order to be able to work with files exceeding 2^32 bytes. const is32BitPtr = (^uintptr(0) >> 32) == 0 @@ -344,3 +349,7 @@ func mmapFile(f *os.File, size int64) ([]byte, error) { } var mmappedFiles = metrics.NewCounter("vm_mmapped_files") + +func hasMincore() bool { + return supportsMincore() && !*disableMincore +}