lib/fs: simplify the code for directory removal and make it compatible with object storage (S3) and NFS

- Drop the code needed for asynchronous removal of the directory on NFS shares.
  This code was needed when VictoriaMetrics could keep open files after their deletion
  or renaming. This is no longer the case after the commit 43b24164ef .
  Now files are deleted only after all the readers close them.
  This updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/61

- Unify MustRemoveAll() and MustRemoveDirAtomic() into MustRemoveDir() and MustRemovePath()
  functions:

  - The MustRemoveDir() deletes the given directory with all its contents, in an "atomic" way:
    it creates a special `.delete-this-dir` file in the directory, then removes all its contents
    except of this file, and later removes the `.delete-this-dir` file together with the directory
    itself. This makes possible easily determining whether the given directory needs to be deleted
    after unclean shutdown - if it contains the `.delete-this-dir` file or if it is empty, it must be deleted.
    Add IsPartiallyRemovedDir() function, which can be used for detecting whether the given directory must be removed
    at starup.

    Previously the MustRemoveDirAtomic() was using a "trick" for atomic directory removal: it was "atomically" renaming
    the directory to a temporary directory with '.must-remove.' marker in the directory name, and after that it
    was removing the renamed directory. On startup all the directories with the `.must-remove.` marker were deleted
    if they are left after unclean shutdown. This "trick" doesn't work for NFS and object storage such as S3,
    since these storage systems do not support atomic renaming of directories with multiple entries inside.
    The new MustRemoveDir() function doesn't use this "trick", so it can be safely used in NFS and S3-like storage systems.

    This is based on the pull request from @func25 - https://github.com/VictoriaMetrics/VictoriaMetrics/pull/9486/files .

  - The MustRemovePath() deletes the given file or an empty directory.

- Delete the existing parts and partitions at startup if they were partially deleted.

- Consistently use fs.MustRemoveDir() and fs.MustRemovePath() instead of os.RemoveAll() across the codebase.
  This reduces the amounts of bolierplate code related to error handling.

- Consistently use fs.MustWriteSync() instead of os.WriteFile() across the codebase.
This commit is contained in:
Aliaksandr Valialkin
2025-07-25 18:41:17 +02:00
parent e8c622766b
commit 83da33d8cf
53 changed files with 403 additions and 594 deletions

View File

@@ -5,21 +5,23 @@ import (
"sync"
"testing"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
)
func TestFastQueueOpenClose(_ *testing.T) {
path := "fast-queue-open-close"
mustDeleteDir(path)
fs.MustRemoveDir(path)
for i := 0; i < 10; i++ {
fq := MustOpenFastQueue(path, "foobar", 100, 0, false)
fq.MustClose()
}
mustDeleteDir(path)
fs.MustRemoveDir(path)
}
func TestFastQueueWriteReadInmemory(t *testing.T) {
path := "fast-queue-write-read-inmemory"
mustDeleteDir(path)
fs.MustRemoveDir(path)
capacity := 100
fq := MustOpenFastQueue(path, "foobar", capacity, 0, false)
@@ -47,12 +49,12 @@ func TestFastQueueWriteReadInmemory(t *testing.T) {
}
}
fq.MustClose()
mustDeleteDir(path)
fs.MustRemoveDir(path)
}
func TestFastQueueWriteReadMixed(t *testing.T) {
path := "fast-queue-write-read-mixed"
mustDeleteDir(path)
fs.MustRemoveDir(path)
capacity := 100
fq := MustOpenFastQueue(path, "foobar", capacity, 0, false)
@@ -83,12 +85,12 @@ func TestFastQueueWriteReadMixed(t *testing.T) {
t.Fatalf("the number of pending bytes must be 0; got %d", n)
}
fq.MustClose()
mustDeleteDir(path)
fs.MustRemoveDir(path)
}
func TestFastQueueWriteReadWithCloses(t *testing.T) {
path := "fast-queue-write-read-with-closes"
mustDeleteDir(path)
fs.MustRemoveDir(path)
capacity := 100
fq := MustOpenFastQueue(path, "foobar", capacity, 0, false)
@@ -124,12 +126,12 @@ func TestFastQueueWriteReadWithCloses(t *testing.T) {
t.Fatalf("the number of pending bytes must be 0; got %d", n)
}
fq.MustClose()
mustDeleteDir(path)
fs.MustRemoveDir(path)
}
func TestFastQueueReadUnblockByClose(t *testing.T) {
path := "fast-queue-read-unblock-by-close"
mustDeleteDir(path)
fs.MustRemoveDir(path)
fq := MustOpenFastQueue(path, "foorbar", 123, 0, false)
resultCh := make(chan error)
@@ -154,12 +156,12 @@ func TestFastQueueReadUnblockByClose(t *testing.T) {
case <-time.After(time.Second):
t.Fatalf("timeout")
}
mustDeleteDir(path)
fs.MustRemoveDir(path)
}
func TestFastQueueReadUnblockByWrite(t *testing.T) {
path := "fast-queue-read-unblock-by-write"
mustDeleteDir(path)
fs.MustRemoveDir(path)
fq := MustOpenFastQueue(path, "foobar", 13, 0, false)
block := "foodsafdsaf sdf"
@@ -188,12 +190,12 @@ func TestFastQueueReadUnblockByWrite(t *testing.T) {
t.Fatalf("timeout")
}
fq.MustClose()
mustDeleteDir(path)
fs.MustRemoveDir(path)
}
func TestFastQueueReadWriteConcurrent(t *testing.T) {
path := "fast-queue-read-write-concurrent"
mustDeleteDir(path)
fs.MustRemoveDir(path)
fq := MustOpenFastQueue(path, "foobar", 5, 0, false)
@@ -287,12 +289,12 @@ func TestFastQueueReadWriteConcurrent(t *testing.T) {
t.Fatalf("timeout")
}
fq.MustClose()
mustDeleteDir(path)
fs.MustRemoveDir(path)
}
func TestFastQueueWriteReadWithDisabledPQ(t *testing.T) {
path := "fast-queue-write-read-inmemory-disabled-pq"
mustDeleteDir(path)
fs.MustRemoveDir(path)
capacity := 20
fq := MustOpenFastQueue(path, "foobar", capacity, 0, true)
@@ -323,12 +325,12 @@ func TestFastQueueWriteReadWithDisabledPQ(t *testing.T) {
}
}
fq.MustClose()
mustDeleteDir(path)
fs.MustRemoveDir(path)
}
func TestFastQueueWriteReadWithIgnoreDisabledPQ(t *testing.T) {
path := "fast-queue-write-read-inmemory-disabled-pq-force-write"
mustDeleteDir(path)
fs.MustRemoveDir(path)
capacity := 20
fq := MustOpenFastQueue(path, "foobar", capacity, 0, true)
@@ -364,5 +366,5 @@ func TestFastQueueWriteReadWithIgnoreDisabledPQ(t *testing.T) {
}
}
fq.MustClose()
mustDeleteDir(path)
fs.MustRemoveDir(path)
}

View File

@@ -5,6 +5,7 @@ import (
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
)
func BenchmarkFastQueueThroughputSerial(b *testing.B) {
@@ -15,11 +16,11 @@ func BenchmarkFastQueueThroughputSerial(b *testing.B) {
b.ReportAllocs()
b.SetBytes(int64(blockSize) * iterationsCount)
path := fmt.Sprintf("bench-fast-queue-throughput-serial-%d", blockSize)
mustDeleteDir(path)
fs.MustRemoveDir(path)
fq := MustOpenFastQueue(path, "foobar", iterationsCount*2, 0, false)
defer func() {
fq.MustClose()
mustDeleteDir(path)
fs.MustRemoveDir(path)
}()
for i := 0; i < b.N; i++ {
writeReadIterationFastQueue(fq, block, iterationsCount)
@@ -36,11 +37,11 @@ func BenchmarkFastQueueThroughputConcurrent(b *testing.B) {
b.ReportAllocs()
b.SetBytes(int64(blockSize) * iterationsCount)
path := fmt.Sprintf("bench-fast-queue-throughput-concurrent-%d", blockSize)
mustDeleteDir(path)
fs.MustRemoveDir(path)
fq := MustOpenFastQueue(path, "foobar", iterationsCount*cgroup.AvailableCPUs()*2, 0, false)
defer func() {
fq.MustClose()
mustDeleteDir(path)
fs.MustRemoveDir(path)
}()
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {

View File

@@ -84,7 +84,7 @@ func (q *queue) mustResetFiles() {
}
q.reader.MustClose()
q.writer.MustClose()
fs.MustRemoveAll(q.readerPath)
fs.MustRemovePath(q.readerPath)
q.writerOffset = 0
q.writerLocalOffset = 0
@@ -136,7 +136,7 @@ func mustOpenInternal(path, name string, chunkFileSize, maxBlockSize, maxPending
q, err := tryOpeningQueue(path, name, chunkFileSize, maxBlockSize, maxPendingBytes)
if err != nil {
logger.Errorf("cannot open persistent queue at %q: %s; cleaning it up and trying again", path, err)
fs.RemoveDirContents(path)
fs.MustRemoveDirContents(path)
q, err = tryOpeningQueue(path, name, chunkFileSize, maxBlockSize, maxPendingBytes)
if err != nil {
logger.Panicf("FATAL: %s", err)
@@ -189,7 +189,7 @@ func tryOpeningQueue(path, name string, chunkFileSize, maxBlockSize, maxPendingB
// path contents is broken or missing. Re-create it from scratch.
fs.MustClose(q.flockF)
fs.RemoveDirContents(path)
fs.MustRemoveDirContents(path)
q.flockF = fs.MustCreateFlockFile(path)
mi.Reset()
mi.Name = q.name
@@ -229,17 +229,17 @@ func tryOpeningQueue(path, name string, chunkFileSize, maxBlockSize, maxPendingB
}
if offset%q.chunkFileSize != 0 {
logger.Errorf("unexpected offset for chunk file %q: %d; it must be multiple of %d; removing the file", filepath, offset, q.chunkFileSize)
fs.MustRemoveAll(filepath)
fs.MustRemovePath(filepath)
continue
}
if mi.ReaderOffset >= offset+q.chunkFileSize {
logger.Errorf("unexpected chunk file found from the past: %q; removing it", filepath)
fs.MustRemoveAll(filepath)
fs.MustRemovePath(filepath)
continue
}
if mi.WriterOffset < offset {
logger.Errorf("unexpected chunk file found from the future: %q; removing it", filepath)
fs.MustRemoveAll(filepath)
fs.MustRemovePath(filepath)
continue
}
if mi.ReaderOffset >= offset && mi.ReaderOffset < offset+q.chunkFileSize {
@@ -254,13 +254,13 @@ func tryOpeningQueue(path, name string, chunkFileSize, maxBlockSize, maxPendingB
if fileSize := fs.MustFileSize(q.readerPath); fileSize < q.readerLocalOffset {
logger.Errorf("chunk file %q size is too small for the given reader offset; file size %d bytes; reader offset: %d bytes; removing the file",
q.readerPath, fileSize, q.readerLocalOffset)
fs.MustRemoveAll(q.readerPath)
fs.MustRemovePath(q.readerPath)
continue
}
r, err := filestream.OpenReaderAt(q.readerPath, int64(q.readerLocalOffset), true)
if err != nil {
logger.Errorf("cannot open %q for reading at offset %d: %s; removing this file", q.readerPath, q.readerLocalOffset, err)
fs.MustRemoveAll(filepath)
fs.MustRemovePath(filepath)
continue
}
q.reader = r
@@ -279,7 +279,7 @@ func tryOpeningQueue(path, name string, chunkFileSize, maxBlockSize, maxPendingB
if fileSize < q.writerLocalOffset {
logger.Errorf("%q size (%d bytes) is smaller than the writer offset (%d bytes); removing the file",
q.writerPath, fileSize, q.writerLocalOffset)
fs.MustRemoveAll(q.writerPath)
fs.MustRemovePath(q.writerPath)
continue
}
logger.Warnf("%q size (%d bytes) is bigger than writer offset (%d bytes); "+
@@ -289,7 +289,7 @@ func tryOpeningQueue(path, name string, chunkFileSize, maxBlockSize, maxPendingB
w, err := filestream.OpenWriterAt(q.writerPath, int64(q.writerLocalOffset), false)
if err != nil {
logger.Errorf("cannot open %q for writing at offset %d: %s; removing this file", q.writerPath, q.writerLocalOffset, err)
fs.MustRemoveAll(filepath)
fs.MustRemovePath(filepath)
continue
}
q.writer = w
@@ -528,7 +528,7 @@ var errEmptyQueue = fmt.Errorf("the queue is empty")
func (q *queue) nextChunkFileForRead() error {
// Remove the current chunk and go to the next chunk.
q.reader.MustClose()
fs.MustRemoveAll(q.readerPath)
fs.MustRemovePath(q.readerPath)
if n := q.readerOffset % q.chunkFileSize; n > 0 {
q.readerOffset += q.chunkFileSize - n
}
@@ -642,9 +642,7 @@ func (mi *metainfo) WriteToFile(path string) error {
if err != nil {
return fmt.Errorf("cannot marshal persistent queue metainfo %#v: %w", mi, err)
}
if err := os.WriteFile(path, data, 0600); err != nil {
return fmt.Errorf("cannot write persistent queue metainfo to %q: %w", path, err)
}
fs.MustWriteSync(path, data)
fs.MustSyncPath(path)
return nil
}

View File

@@ -6,11 +6,13 @@ import (
"path/filepath"
"strconv"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
)
func TestQueueOpenClose(t *testing.T) {
path := "queue-open-close"
mustDeleteDir(path)
fs.MustRemoveDir(path)
for i := 0; i < 3; i++ {
q := mustOpen(path, "foobar", 0)
if n := q.GetPendingBytes(); n > 0 {
@@ -18,7 +20,7 @@ func TestQueueOpenClose(t *testing.T) {
}
q.MustClose()
}
mustDeleteDir(path)
fs.MustRemoveDir(path)
}
func TestQueueOpen(t *testing.T) {
@@ -28,7 +30,7 @@ func TestQueueOpen(t *testing.T) {
mustCreateFile(filepath.Join(path, metainfoFilename), "foobarbaz")
q := mustOpen(path, "foobar", 0)
q.MustClose()
mustDeleteDir(path)
fs.MustRemoveDir(path)
})
t.Run("junk-files-and-dirs", func(_ *testing.T) {
path := "queue-open-junk-files-and-dir"
@@ -38,7 +40,7 @@ func TestQueueOpen(t *testing.T) {
mustCreateDir(filepath.Join(path, "junk-dir"))
q := mustOpen(path, "foobar", 0)
q.MustClose()
mustDeleteDir(path)
fs.MustRemoveDir(path)
})
t.Run("invalid-chunk-offset", func(_ *testing.T) {
path := "queue-open-invalid-chunk-offset"
@@ -47,7 +49,7 @@ func TestQueueOpen(t *testing.T) {
mustCreateFile(filepath.Join(path, fmt.Sprintf("%016X", 1234)), "qwere")
q := mustOpen(path, "foobar", 0)
q.MustClose()
mustDeleteDir(path)
fs.MustRemoveDir(path)
})
t.Run("too-new-chunk", func(_ *testing.T) {
path := "queue-open-too-new-chunk"
@@ -56,7 +58,7 @@ func TestQueueOpen(t *testing.T) {
mustCreateFile(filepath.Join(path, fmt.Sprintf("%016X", 100*uint64(DefaultChunkFileSize))), "asdf")
q := mustOpen(path, "foobar", 0)
q.MustClose()
mustDeleteDir(path)
fs.MustRemoveDir(path)
})
t.Run("too-old-chunk", func(t *testing.T) {
path := "queue-open-too-old-chunk"
@@ -72,7 +74,7 @@ func TestQueueOpen(t *testing.T) {
mustCreateFile(filepath.Join(path, fmt.Sprintf("%016X", 0)), "adfsfd")
q := mustOpen(path, mi.Name, 0)
q.MustClose()
mustDeleteDir(path)
fs.MustRemoveDir(path)
})
t.Run("too-big-reader-offset", func(t *testing.T) {
path := "queue-open-too-big-reader-offset"
@@ -86,7 +88,7 @@ func TestQueueOpen(t *testing.T) {
}
q := mustOpen(path, mi.Name, 0)
q.MustClose()
mustDeleteDir(path)
fs.MustRemoveDir(path)
})
t.Run("metainfo-dir", func(_ *testing.T) {
path := "queue-open-metainfo-dir"
@@ -94,7 +96,7 @@ func TestQueueOpen(t *testing.T) {
mustCreateDir(filepath.Join(path, metainfoFilename))
q := mustOpen(path, "foobar", 0)
q.MustClose()
mustDeleteDir(path)
fs.MustRemoveDir(path)
})
t.Run("too-small-reader-file", func(t *testing.T) {
path := "too-small-reader-file"
@@ -110,7 +112,7 @@ func TestQueueOpen(t *testing.T) {
mustCreateFile(filepath.Join(path, fmt.Sprintf("%016X", 0)), "sdf")
q := mustOpen(path, mi.Name, 0)
q.MustClose()
mustDeleteDir(path)
fs.MustRemoveDir(path)
})
t.Run("invalid-writer-file-size", func(_ *testing.T) {
path := "too-small-reader-file"
@@ -119,7 +121,7 @@ func TestQueueOpen(t *testing.T) {
mustCreateFile(filepath.Join(path, fmt.Sprintf("%016X", 0)), "sdfdsf")
q := mustOpen(path, "foobar", 0)
q.MustClose()
mustDeleteDir(path)
fs.MustRemoveDir(path)
})
t.Run("invalid-queue-name", func(t *testing.T) {
path := "invalid-queue-name"
@@ -133,17 +135,17 @@ func TestQueueOpen(t *testing.T) {
mustCreateFile(filepath.Join(path, fmt.Sprintf("%016X", 0)), "sdf")
q := mustOpen(path, "baz", 0)
q.MustClose()
mustDeleteDir(path)
fs.MustRemoveDir(path)
})
}
func TestQueueResetIfEmpty(t *testing.T) {
path := "queue-reset-if-empty"
mustDeleteDir(path)
fs.MustRemoveDir(path)
q := mustOpen(path, "foobar", 0)
defer func() {
q.MustClose()
mustDeleteDir(path)
fs.MustRemoveDir(path)
}()
block := make([]byte, 1024*1024)
@@ -170,11 +172,11 @@ func TestQueueResetIfEmpty(t *testing.T) {
func TestQueueWriteRead(t *testing.T) {
path := "queue-write-read"
mustDeleteDir(path)
fs.MustRemoveDir(path)
q := mustOpen(path, "foobar", 0)
defer func() {
q.MustClose()
mustDeleteDir(path)
fs.MustRemoveDir(path)
}()
for j := 0; j < 5; j++ {
@@ -206,11 +208,11 @@ func TestQueueWriteRead(t *testing.T) {
func TestQueueWriteCloseRead(t *testing.T) {
path := "queue-write-close-read"
mustDeleteDir(path)
fs.MustRemoveDir(path)
q := mustOpen(path, "foobar", 0)
defer func() {
q.MustClose()
mustDeleteDir(path)
fs.MustRemoveDir(path)
}()
for j := 0; j < 5; j++ {
@@ -247,11 +249,11 @@ func TestQueueWriteCloseRead(t *testing.T) {
func TestQueueChunkManagementSimple(t *testing.T) {
path := "queue-chunk-management-simple"
mustDeleteDir(path)
fs.MustRemoveDir(path)
const chunkFileSize = 100
const maxBlockSize = 20
q := mustOpenInternal(path, "foobar", chunkFileSize, maxBlockSize, 0)
defer mustDeleteDir(path)
defer fs.MustRemoveDir(path)
defer q.MustClose()
var blocks []string
for i := 0; i < 100; i++ {
@@ -278,13 +280,13 @@ func TestQueueChunkManagementSimple(t *testing.T) {
func TestQueueChunkManagementPeriodicClose(t *testing.T) {
path := "queue-chunk-management-periodic-close"
mustDeleteDir(path)
fs.MustRemoveDir(path)
const chunkFileSize = 100
const maxBlockSize = 20
q := mustOpenInternal(path, "foobar", chunkFileSize, maxBlockSize, 0)
defer func() {
q.MustClose()
mustDeleteDir(path)
fs.MustRemoveDir(path)
}()
var blocks []string
for i := 0; i < 100; i++ {
@@ -316,11 +318,11 @@ func TestQueueChunkManagementPeriodicClose(t *testing.T) {
func TestQueueLimitedSize(t *testing.T) {
const maxPendingBytes = 1000
path := "queue-limited-size"
mustDeleteDir(path)
fs.MustRemoveDir(path)
q := mustOpen(path, "foobar", maxPendingBytes)
defer func() {
q.MustClose()
mustDeleteDir(path)
fs.MustRemoveDir(path)
}()
// Check that small blocks are successfully buffered and read
@@ -371,24 +373,16 @@ func TestQueueLimitedSize(t *testing.T) {
}
func mustCreateFile(path, contents string) {
if err := os.WriteFile(path, []byte(contents), 0600); err != nil {
panic(fmt.Errorf("cannot create file %q with %d bytes contents: %w", path, len(contents), err))
}
fs.MustWriteSync(path, []byte(contents))
}
func mustCreateDir(path string) {
mustDeleteDir(path)
fs.MustRemoveDir(path)
if err := os.MkdirAll(path, 0700); err != nil {
panic(fmt.Errorf("cannot create dir %q: %w", path, err))
}
}
func mustDeleteDir(path string) {
if err := os.RemoveAll(path); err != nil {
panic(fmt.Errorf("cannot remove dir %q: %w", path, err))
}
}
func mustCreateEmptyMetainfo(path, name string) {
var mi metainfo
mi.Name = name

View File

@@ -6,6 +6,7 @@ import (
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
)
func BenchmarkQueueThroughputSerial(b *testing.B) {
@@ -16,11 +17,11 @@ func BenchmarkQueueThroughputSerial(b *testing.B) {
b.ReportAllocs()
b.SetBytes(int64(blockSize) * iterationsCount)
path := fmt.Sprintf("bench-queue-throughput-serial-%d", blockSize)
mustDeleteDir(path)
fs.MustRemoveDir(path)
q := mustOpen(path, "foobar", 0)
defer func() {
q.MustClose()
mustDeleteDir(path)
fs.MustRemoveDir(path)
}()
for i := 0; i < b.N; i++ {
writeReadIteration(q, block, iterationsCount)
@@ -37,12 +38,12 @@ func BenchmarkQueueThroughputConcurrent(b *testing.B) {
b.ReportAllocs()
b.SetBytes(int64(blockSize) * iterationsCount)
path := fmt.Sprintf("bench-queue-throughput-concurrent-%d", blockSize)
mustDeleteDir(path)
fs.MustRemoveDir(path)
q := mustOpen(path, "foobar", 0)
var qLock sync.Mutex
defer func() {
q.MustClose()
mustDeleteDir(path)
fs.MustRemoveDir(path)
}()
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {