Files
VictoriaMetrics/lib/chunkedbuffer/buffer.go
Aliaksandr Valialkin 5b87aff830 lib/promscrape: use chunkedbuffer.Buffer instead of bytesutil.ByteBuffer for reading response body from scrape targets
This reduces memory usage when reading large response bodies because the underlying buffer
doesn't need to be re-allocated during the read of large response body in the buffer.

Also decompress response body under the processScrapedDataConcurrencyLimitCh .
This reduces CPU usage and RAM usage a bit when scraping thousands of targets.
2025-04-01 20:30:39 +02:00

266 lines
5.6 KiB
Go

package chunkedbuffer
import (
"bytes"
"errors"
"fmt"
"io"
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/filestream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
const chunkSize = 4 * 1024
// Get returns Buffer from the pool.
//
// Return back the Buffer to the pool via Put() call when it is no longer needed.
func Get() *Buffer {
v := cbPool.Get()
if v == nil {
return &Buffer{}
}
return v.(*Buffer)
}
// Put returns cb to the pool, so it could be re-used via Get() call.
//
// The cb cannot be used after Put() call.
func Put(cb *Buffer) {
cb.Reset()
cbPool.Put(cb)
}
var cbPool sync.Pool
// Buffer provides in-memory buffer optimized for storing big bytes volumes.
//
// It stores the data in chunks of fixed size. This reduces memory fragmentation
// and memory waste comparing to the contiguous slices of bytes.
type Buffer struct {
chunks []*[chunkSize]byte
// offset is the offset in the last chunk to write data to.
offset int
}
// Reset resets the cb, so it can be reused for writing new data into it.
//
// Reset frees up memory chunks allocated for cb, so they could be reused by other Buffer instances.
func (cb *Buffer) Reset() {
for _, chunk := range cb.chunks {
putChunk(chunk)
}
clear(cb.chunks)
cb.chunks = cb.chunks[:0]
cb.offset = 0
}
// SizeBytes returns the number of bytes occupied by the cb.
func (cb *Buffer) SizeBytes() int {
return len(cb.chunks) * chunkSize
}
// Len returns the length of the data stored at cb.
func (cb *Buffer) Len() int {
if len(cb.chunks) == 0 {
return 0
}
return (len(cb.chunks)-1)*chunkSize + cb.offset
}
// MustWrite writes p to cb.
func (cb *Buffer) MustWrite(p []byte) {
for len(p) > 0 {
if len(cb.chunks) == 0 || cb.offset == chunkSize {
chunk := getChunk()
cb.chunks = append(cb.chunks, chunk)
cb.offset = 0
}
dst := cb.chunks[len(cb.chunks)-1]
n := copy(dst[cb.offset:], p)
cb.offset += n
p = p[n:]
}
}
// Write implements io.Writer interface for cb.
func (cb *Buffer) Write(p []byte) (int, error) {
cb.MustWrite(p)
return len(p), nil
}
// MustReadAt reads len(p) bytes from cb at the offset off.
func (cb *Buffer) MustReadAt(p []byte, off int64) {
if len(p) == 0 {
return
}
chunkIdx := off / chunkSize
offset := off % chunkSize
chunk := cb.chunks[chunkIdx]
n := copy(p, chunk[offset:])
p = p[n:]
for len(p) > 0 {
chunkIdx++
chunk := cb.chunks[chunkIdx]
n := copy(p, chunk[:])
p = p[n:]
}
}
// ReadFrom reads all the data from r and appends it to cb.
func (cb *Buffer) ReadFrom(r io.Reader) (int64, error) {
v := copyBufPool.Get()
if v == nil {
v = new([16 * 1024]byte)
}
b := (v.(*[16 * 1024]byte))[:]
bytesRead := int64(0)
for {
n, err := r.Read(b)
cb.MustWrite(b[:n])
bytesRead += int64(n)
if err != nil {
copyBufPool.Put(v)
if errors.Is(err, io.EOF) {
return bytesRead, nil
}
return bytesRead, err
}
}
}
var copyBufPool sync.Pool
// WriteTo writes cb data to w.
func (cb *Buffer) WriteTo(w io.Writer) (int64, error) {
bLen := cb.Len()
if bLen == 0 {
return 0, nil
}
switch t := w.(type) {
case *bytesutil.ByteBuffer:
t.Grow(bLen)
case *bytes.Buffer:
t.Grow(bLen)
}
nTotal := 0
// Write all the chunks except the last one, which may be incomplete.
for _, chunk := range cb.chunks[:len(cb.chunks)-1] {
n, err := w.Write(chunk[:])
nTotal += n
if err != nil {
return int64(nTotal), err
}
if n != chunkSize {
return int64(nTotal), fmt.Errorf("unexpected number of bytes written; got %d; want %d", n, chunkSize)
}
}
// Write the last chunk
chunk := cb.chunks[len(cb.chunks)-1]
n, err := w.Write(chunk[:cb.offset])
nTotal += n
if err != nil {
return int64(nTotal), err
}
if n != cb.offset {
return int64(nTotal), fmt.Errorf("unexpected number of bytes written; got %d; want %d", n, cb.offset)
}
return int64(nTotal), nil
}
// MustWriteTo writes cb contents w.
//
// Use this function only if w cannot return errors. For example, if w is bytes.Buffer of bytesutil.ByteBuffer.
// If w can return errors, then use WriteTo function instead.
func (cb *Buffer) MustWriteTo(w io.Writer) {
if _, err := cb.WriteTo(w); err != nil {
logger.Panicf("BUG: unexpected error writing Buffer data to the provided writer: %s", err)
}
}
// Path returns cb path.
func (cb *Buffer) Path() string {
return fmt.Sprintf("Buffer/%p/mem", cb)
}
// MustClose closes cb for subsequent reuse.
func (cb *Buffer) MustClose() {
// Do nothing, since certain code rely on cb reading after MustClose call.
}
// NewReader returns a reader for reading the data stored in cb.
func (cb *Buffer) NewReader() filestream.ReadCloser {
return &reader{
cb: cb,
}
}
type reader struct {
cb *Buffer
// offset is the offset at cb to read the next data at Read call.
offset int
}
func (r *reader) Path() string {
return r.cb.Path()
}
func (r *reader) Read(p []byte) (int, error) {
chunkIdx := r.offset / chunkSize
offset := r.offset % chunkSize
if chunkIdx == len(r.cb.chunks) {
if offset != 0 {
panic(fmt.Errorf("BUG: offset must be 0; got %d", offset))
}
return 0, io.EOF
}
chunk := r.cb.chunks[chunkIdx]
if chunkIdx == len(r.cb.chunks)-1 {
// read the last chunk
n := copy(p, chunk[offset:r.cb.offset])
r.offset += n
if offset+n == r.cb.offset {
return n, io.EOF
}
return n, nil
}
n := copy(p, chunk[offset:])
r.offset += n
return n, nil
}
func (r *reader) MustClose() {
r.cb = nil
r.offset = 0
}
func getChunk() *[chunkSize]byte {
v := chunkPool.Get()
if v == nil {
return new([chunkSize]byte)
}
return v.(*[chunkSize]byte)
}
func putChunk(chunk *[chunkSize]byte) {
chunkPool.Put(chunk)
}
var chunkPool sync.Pool