mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-05-17 00:26:36 +03:00
lib/fs: restore async deletion of NFS folders
Commit 83da33d8cf
removed NFS directory delete retries. It was made on assumption, that
only directory rename could cause such issues. However, both rename and
unlink uses the same "silly rename" logic
https://linux-nfs.org/wiki/index.php/Server-side_silly_rename
and linux kernel - `fs/nfs/dir.c` `nfs_unlink` and `nfs_rename`.
And NFS client may treat file still open, even if it
was properly closed by application. Most probably it could be triggered, because VictoriaMetrics may
open the same file multiple times ( data read and background merges).
There is no issue with VictoriaMetrics itself, it properly closes files. But NFS-client may have delays
or cache metadata information for the files. So it could trigger silly rename behavior.
This commit restores original behavior with deletion retries and brings
back metrics for unsuccessful delete operations.
Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9842
This commit is contained in:
@@ -319,6 +319,7 @@ func Stop() {
|
||||
Storage.MustClose()
|
||||
logger.Infof("successfully closed the storage in %.3f seconds", time.Since(startTime).Seconds())
|
||||
|
||||
fs.MustStopDirRemover()
|
||||
logger.Infof("the storage has been stopped")
|
||||
}
|
||||
|
||||
|
||||
@@ -42,6 +42,7 @@ See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-rel
|
||||
* BUGFIX: [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/): properly handle JWKS keys per [RFC 7517](https://datatracker.ietf.org/doc/html/rfc7517#section-4.2) during [OIDC discovery](https://docs.victoriametrics.com/victoriametrics/vmauth/#oidc-discovery): skip keys with `use=enc`, reject `use=sig` keys with unsupported `alg`, and warn-skip keys with empty `use` that have unsupported `alg`. See [#10663](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10663). Thanks to @andriibeee for the contribution.
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): enforce `datasource_type=prometheus` when [proxying](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#vmalert) Grafana requests to [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/). Grafana supports only `prometheus` and `loki` alerts. Without this fix, Grafana shows `Error loading alerts` when non-Prometheus alert types are returned. See [victoriametrics-datasource#329](https://github.com/VictoriaMetrics/victoriametrics-datasource/issues/329).
|
||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): stop logging `error`-level messages when scraping targets that expose OpenMetrics `info`, `gaugehistogram`, `stateset`, or `unknown` metric types. These are valid [OpenMetrics](https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md) types and should be parsed without error. See [#10685](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10685). Thanks to @tsarna for the contribution.
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): prevent panic during directory deletion on `NFS`-based mounts. The bug was introduced in [83da33d8](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/83da33d8cfe8352fd0022d05a8b6346ebb48420d) and included in [v1.123.0](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/docs/victoriametrics/changelog/CHANGELOG_2025.md#v11230). See [#9842](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9842).
|
||||
|
||||
## [v1.138.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.138.0)
|
||||
|
||||
|
||||
@@ -1,11 +1,17 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
// directories with this filename are scheduled to be removed by MustRemoveDir().
|
||||
@@ -48,45 +54,29 @@ func MustRemoveDir(dirPath string) {
|
||||
// on high-latency storage systems such as NFS.
|
||||
// Directories for VitoriaLogs parts may contain big number of items when wide events are stored there.
|
||||
// Also the number of parts in a partition may be quite big.
|
||||
des := MustReadDir(dirPath)
|
||||
var wg sync.WaitGroup
|
||||
concurrencyCh := make(chan struct{}, min(32, len(des)-1))
|
||||
for _, de := range des {
|
||||
name := de.Name()
|
||||
if name == deleteDirFilename {
|
||||
continue
|
||||
}
|
||||
dirEntryPath := filepath.Join(dirPath, name)
|
||||
|
||||
concurrencyCh <- struct{}{}
|
||||
wg.Go(func() {
|
||||
if err := os.RemoveAll(dirEntryPath); err != nil {
|
||||
logger.Panicf("FATAL: cannot remove %q: %s", dirEntryPath, err)
|
||||
}
|
||||
<-concurrencyCh
|
||||
})
|
||||
if tryRemoveDir(dirPath) {
|
||||
return
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
// Make sure the deleted names are properly synced to the dirPath,
|
||||
// so they are no longer visible after unclean shutdown.
|
||||
MustSyncPath(dirPath)
|
||||
|
||||
// Remove the deleteDirFilename file, since there are no other entries left in the directory.
|
||||
MustRemovePath(deleteFilePath)
|
||||
|
||||
// Sync the directory after the removing deletDirFilename file in order to make sure
|
||||
// all the metadata files are removed at some exotic filesystems such as OSSFS2.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaLogs/issues/649
|
||||
// and https://github.com/VictoriaMetrics/VictoriaMetrics/pull/9709
|
||||
MustSyncPath(dirPath)
|
||||
|
||||
// Remove the dirPath itself
|
||||
MustRemovePath(dirPath)
|
||||
|
||||
// Do not sync the parent directory for the dirPath - the caller can do this if needed.
|
||||
// It is OK if the dirPath will remain undeleted after unclean shutdown - it will be deleted
|
||||
// on the next startup.
|
||||
// schedule NFS background dir removal.
|
||||
// NFS may perform "silly rename" before deletion, if client detects more than 1 file reference.
|
||||
// Silly rename is async operation and client may take an additional time before
|
||||
// unlink operation will succeed and could be actually deleted.
|
||||
select {
|
||||
case removeDirConcurrencyCh <- struct{}{}:
|
||||
default:
|
||||
logger.Panicf("FATAL: cannot schedule %s for removal, since the removal queue is full (%d entries)", dirPath, cap(removeDirConcurrencyCh))
|
||||
}
|
||||
dirRemoverWG.Go(func() {
|
||||
defer func() { <-removeDirConcurrencyCh }()
|
||||
for {
|
||||
if tryRemoveDir(dirPath) {
|
||||
return
|
||||
}
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// IsPartiallyRemovedDir returns true if dirPath is partially removed because of unclean shutdown during the MustRemoveDir() call.
|
||||
@@ -141,3 +131,152 @@ func MustRemoveDirContents(dir string) {
|
||||
}
|
||||
MustSyncPath(dir)
|
||||
}
|
||||
|
||||
// tryRemoveDir attemps to remove a directory and returns true if the removal
|
||||
// succeeded.
|
||||
//
|
||||
// The function returns false if:
|
||||
//
|
||||
// 1. it detects .nfsXXXX files in inside dirPath. Since the creation of such
|
||||
// files is a valid NFS behavior, the function does not attempt to delete
|
||||
// them and lets the NFS do it gracefully (i.e. remove them when they are no
|
||||
// longer needed). See
|
||||
// https://wiki.linux-nfs.org/wiki/index.php/Server-side_silly_rename
|
||||
// 2. OR there has been a temprorary NFS error while deleting a dir entry.
|
||||
//
|
||||
// Returning false indicates that the caller should retry.
|
||||
//
|
||||
// Finally, the function panics in case of any other fs operation failure.
|
||||
func tryRemoveDir(dirPath string) bool {
|
||||
des := MustReadDir(dirPath)
|
||||
var wg sync.WaitGroup
|
||||
var mustRetry atomic.Bool
|
||||
workerCount := max(1, min(32, len(des)))
|
||||
concurrencyCh := make(chan struct{}, workerCount)
|
||||
for _, de := range des {
|
||||
name := de.Name()
|
||||
if name == deleteDirFilename {
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(name, ".nfs") {
|
||||
mustRetry.Store(true)
|
||||
continue
|
||||
}
|
||||
dirEntryPath := filepath.Join(dirPath, name)
|
||||
concurrencyCh <- struct{}{}
|
||||
wg.Add(1)
|
||||
go func(dirEntryPath string) {
|
||||
defer func() {
|
||||
wg.Done()
|
||||
<-concurrencyCh
|
||||
}()
|
||||
// os.RemoveAll may create stale NFS files with .nfs prefix after
|
||||
// dirEntry removal. So it's required to perform an additional check
|
||||
// later with hasStaleNFSFiles(). It could happen if NFS client
|
||||
// detects multiple file descriptors that point to the same inode.
|
||||
//
|
||||
// While it's expected for storage process to open a file multiple
|
||||
// times simultaneously and properly close it, fs caching may still
|
||||
// confuse NFS client.
|
||||
if err := os.RemoveAll(dirEntryPath); err != nil {
|
||||
if !isTemporaryNFSError(err) {
|
||||
logger.Fatalf("FATAL: cannot remove %q: %s", dirEntryPath, err)
|
||||
}
|
||||
mustRetry.Store(true)
|
||||
}
|
||||
}(dirEntryPath)
|
||||
}
|
||||
wg.Wait()
|
||||
if mustRetry.Load() {
|
||||
nfsDirRemoveFailedAttempts.Inc()
|
||||
return false
|
||||
}
|
||||
// Make sure the deleted names are properly synced to the dirPath,
|
||||
// so they are no longer visible after unclean shutdown.
|
||||
MustSyncPath(dirPath)
|
||||
|
||||
// New stale NFS files may have appeared since the loop
|
||||
if hasStaleNFSFiles(dirPath) {
|
||||
nfsDirRemoveFailedAttempts.Inc()
|
||||
return false
|
||||
}
|
||||
|
||||
deleteFilePath := filepath.Join(dirPath, deleteDirFilename)
|
||||
// Remove the deleteDirFilename file, since there are no other entries left in the directory.
|
||||
MustRemovePath(deleteFilePath)
|
||||
|
||||
// Sync the directory after the removing deletDirFilename file in order to make sure
|
||||
// all the metadata files are removed at some exotic filesystems such as OSSFS2.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaLogs/issues/649
|
||||
// and https://github.com/VictoriaMetrics/VictoriaMetrics/pull/9709
|
||||
MustSyncPath(dirPath)
|
||||
|
||||
// Remove the dirPath itself
|
||||
MustRemovePath(dirPath)
|
||||
|
||||
// Do not sync the parent directory for the dirPath - the caller can do this if needed.
|
||||
// It is OK if the dirPath will remain undeleted after unclean shutdown - it will be deleted
|
||||
// on the next startup.
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
var (
|
||||
dirRemoverWG sync.WaitGroup
|
||||
nfsDirRemoveFailedAttempts = metrics.NewCounter(`vm_nfs_dir_remove_failed_attempts_total`)
|
||||
_ = metrics.NewGauge(`vm_nfs_pending_dirs_to_remove`, func() float64 {
|
||||
return float64(len(removeDirConcurrencyCh))
|
||||
})
|
||||
)
|
||||
|
||||
var removeDirConcurrencyCh = make(chan struct{}, 1024)
|
||||
|
||||
// MustStopDirRemover must be called in the end of graceful shutdown
|
||||
// in order to wait for removing the remaining directories from removeDirConcurrencyCh.
|
||||
//
|
||||
// It is expected that nobody calls MustRemoveAll when MustStopDirRemover is called.
|
||||
func MustStopDirRemover() {
|
||||
doneCh := make(chan struct{})
|
||||
go func() {
|
||||
dirRemoverWG.Wait()
|
||||
close(doneCh)
|
||||
}()
|
||||
const maxWaitTime = 10 * time.Second
|
||||
select {
|
||||
case <-doneCh:
|
||||
return
|
||||
case <-time.After(maxWaitTime):
|
||||
logger.Errorf("cannot stop dirRemover in %s; the remaining partially deleted directories should be automatically removed on the next startup", maxWaitTime)
|
||||
}
|
||||
}
|
||||
|
||||
func isTemporaryNFSError(err error) bool {
|
||||
var pathErr *os.PathError
|
||||
if errors.As(err, &pathErr) {
|
||||
// Some NFS implementations return EEXIST instead of ENOTEMPTY
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6398
|
||||
if errors.Is(pathErr.Err, syscall.EEXIST) || errors.Is(pathErr.Err, syscall.ENOTEMPTY) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
// Do not check for NFS file handle error, usually it means that other client has opened file without proper lock
|
||||
// in this scenario it's better to panic.
|
||||
// User must configure proper locking options for the NFS client to prevent such error.
|
||||
// It must never have "nolock" or "local_lock=all" options to be set.
|
||||
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/61 for details.
|
||||
errStr := err.Error()
|
||||
return strings.Contains(errStr, "directory not empty") || strings.Contains(errStr, "device or resource busy")
|
||||
}
|
||||
|
||||
func hasStaleNFSFiles(dirPath string) bool {
|
||||
des := MustReadDir(dirPath)
|
||||
for _, de := range des {
|
||||
name := de.Name()
|
||||
if strings.HasPrefix(name, ".nfs") {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
@@ -32,3 +33,50 @@ func TestIsPartiallyRemovedDir(t *testing.T) {
|
||||
f("empty_dir", "", true)
|
||||
f("regular_dir", "index.bin", false)
|
||||
}
|
||||
|
||||
func TestTryRemoveDir(t *testing.T) {
|
||||
f := func(setup func(t *testing.T, wd string), want bool) {
|
||||
t.Helper()
|
||||
d := t.TempDir()
|
||||
setup(t, d)
|
||||
got := tryRemoveDir(d)
|
||||
if got != want {
|
||||
t.Fatalf("unexpected error: (-%v;+%v)", want, got)
|
||||
}
|
||||
}
|
||||
|
||||
writeEmptyFile := func(t *testing.T, filePath string) {
|
||||
t.Helper()
|
||||
err := os.WriteFile(filePath, []byte("empty"), os.ModePerm)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot write file: %q: %s", filePath, err)
|
||||
}
|
||||
}
|
||||
// regular delete
|
||||
setup := func(t *testing.T, wd string) {
|
||||
writeEmptyFile(t, filepath.Join(wd, "metadata.bin"))
|
||||
writeEmptyFile(t, filepath.Join(wd, deleteDirFilename))
|
||||
}
|
||||
f(setup, true)
|
||||
|
||||
// has stale nfs file
|
||||
setup = func(t *testing.T, wd string) {
|
||||
writeEmptyFile(t, filepath.Join(wd, ".nfs0000"))
|
||||
writeEmptyFile(t, filepath.Join(wd, deleteDirFilename))
|
||||
}
|
||||
f(setup, false)
|
||||
|
||||
// empty dir
|
||||
f(func(t *testing.T, wd string) {
|
||||
writeEmptyFile(t, filepath.Join(wd, deleteDirFilename))
|
||||
}, true)
|
||||
|
||||
// delete many files concurrent
|
||||
setup = func(t *testing.T, wd string) {
|
||||
for i := range 60 {
|
||||
writeEmptyFile(t, filepath.Join(wd, fmt.Sprintf("metadata_%d.bin", i)))
|
||||
}
|
||||
writeEmptyFile(t, filepath.Join(wd, deleteDirFilename))
|
||||
}
|
||||
f(setup, true)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user