mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-05-23 03:36:31 +03:00
Compare commits
40 Commits
v1.110.26
...
fs-paralle
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
305f1c91f8 | ||
|
|
74b03c93a6 | ||
|
|
0e9bb5a42d | ||
|
|
f1a88e57cf | ||
|
|
76176ac1d3 | ||
|
|
c08adb31bb | ||
|
|
b49b0471ef | ||
|
|
13102045a7 | ||
|
|
d226e5b95f | ||
|
|
30bbb5660b | ||
|
|
1792b6bd9a | ||
|
|
f97f627f79 | ||
|
|
785c1fd053 | ||
|
|
697bfd5cee | ||
|
|
f0ac6d9ac9 | ||
|
|
f0b251d967 | ||
|
|
c3346ae8fd | ||
|
|
0ffb3fdfce | ||
|
|
4e234ccbd1 | ||
|
|
943589ca31 | ||
|
|
c9596a0364 | ||
|
|
e7b0a00493 | ||
|
|
be0fe546e5 | ||
|
|
13911db316 | ||
|
|
0cb90f91fc | ||
|
|
bdf65dde88 | ||
|
|
4d9b69b5a6 | ||
|
|
692a9be5fa | ||
|
|
c8742ab120 | ||
|
|
b6f8128273 | ||
|
|
bed7cbd0a4 | ||
|
|
d9c07dbc0b | ||
|
|
20ad9cd395 | ||
|
|
8b3fe9cdec | ||
|
|
e1e367b3cb | ||
|
|
f40c6fcad1 | ||
|
|
b6bc186013 | ||
|
|
9bc7a17d80 | ||
|
|
9ce548dcb5 | ||
|
|
82e583338d |
2
.github/ISSUE_TEMPLATE/question.yml
vendored
2
.github/ISSUE_TEMPLATE/question.yml
vendored
@@ -5,7 +5,7 @@ body:
|
||||
- type: textarea
|
||||
id: describe-the-component
|
||||
attributes:
|
||||
label: Is your question request related to a specific component?
|
||||
label: Is your question related to a specific component?
|
||||
placeholder: |
|
||||
VictoriaMetrics, vmagent, vmalert, vmui, etc...
|
||||
validations:
|
||||
|
||||
20
Makefile
20
Makefile
@@ -17,7 +17,7 @@ EXTRA_GO_BUILD_TAGS ?=
|
||||
GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(DATEINFO_TAG)-$(BUILDINFO_TAG)'
|
||||
TAR_OWNERSHIP ?= --owner=1000 --group=1000
|
||||
|
||||
GOLANGCI_LINT_VERSION := 2.4.0
|
||||
GOLANGCI_LINT_VERSION := 2.7.2
|
||||
|
||||
.PHONY: $(MAKECMDGOALS)
|
||||
|
||||
@@ -471,7 +471,23 @@ integration-test:
|
||||
|
||||
apptest:
|
||||
$(MAKE) victoria-metrics vmagent vmalert vmauth vmctl vmbackup vmrestore
|
||||
go test ./apptest/... -skip="^TestCluster.*"
|
||||
go test ./apptest/... -skip="^Test(Cluster|Legacy).*"
|
||||
|
||||
integration-test-legacy: victoria-metrics vmbackup vmrestore
|
||||
OS=$$(uname | tr '[:upper:]' '[:lower:]'); \
|
||||
ARCH=$$(uname -m | tr '[:upper:]' '[:lower:]' | sed 's/x86_64/amd64/'); \
|
||||
VERSION=v1.132.0; \
|
||||
VMSINGLE=victoria-metrics-$${OS}-$${ARCH}-$${VERSION}.tar.gz; \
|
||||
VMCLUSTER=victoria-metrics-$${OS}-$${ARCH}-$${VERSION}-cluster.tar.gz; \
|
||||
URL=https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/$${VERSION}; \
|
||||
DIR=/tmp/$${VERSION}; \
|
||||
test -d $${DIR} || (mkdir $${DIR} && \
|
||||
curl --output-dir /tmp -LO $${URL}/$${VMSINGLE} && tar xzf /tmp/$${VMSINGLE} -C $${DIR} && \
|
||||
curl --output-dir /tmp -LO $${URL}/$${VMCLUSTER} && tar xzf /tmp/$${VMCLUSTER} -C $${DIR} \
|
||||
); \
|
||||
VM_LEGACY_VMSINGLE_PATH=$${DIR}/victoria-metrics-prod \
|
||||
VM_LEGACY_VMSTORAGE_PATH=$${DIR}/vmstorage-prod \
|
||||
go test ./apptest/tests -run="^TestLegacySingle.*"
|
||||
|
||||
benchmark:
|
||||
GOEXPERIMENT=synctest go test -bench=. ./lib/...
|
||||
|
||||
@@ -10,9 +10,11 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricsmetadata"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits"
|
||||
)
|
||||
|
||||
@@ -48,6 +50,7 @@ func selfScraper(scrapeInterval time.Duration) {
|
||||
|
||||
var bb bytesutil.ByteBuffer
|
||||
var rows prometheus.Rows
|
||||
var metadataRows prometheus.MetadataRows
|
||||
var mrs []storage.MetricRow
|
||||
var labels []prompb.Label
|
||||
t := time.NewTicker(scrapeInterval)
|
||||
@@ -57,8 +60,12 @@ func selfScraper(scrapeInterval time.Duration) {
|
||||
appmetrics.WritePrometheusMetrics(&bb)
|
||||
s := bytesutil.ToUnsafeString(bb.B)
|
||||
rows.Reset()
|
||||
// VictoriaMetrics components don't expose metadata yet, only need to parse samples
|
||||
rows.UnmarshalWithErrLogger(s, nil)
|
||||
// Parse metrics and optionally metadata when enabled
|
||||
if prommetadata.IsEnabled() {
|
||||
rows, metadataRows = prometheus.UnmarshalWithMetadata(rows, metadataRows, s, nil)
|
||||
} else {
|
||||
rows.UnmarshalWithErrLogger(s, nil)
|
||||
}
|
||||
mrs = mrs[:0]
|
||||
for i := range rows.Rows {
|
||||
r := &rows.Rows[i]
|
||||
@@ -91,6 +98,19 @@ func selfScraper(scrapeInterval time.Duration) {
|
||||
if err := vmstorage.AddRows(mrs); err != nil {
|
||||
logger.Errorf("cannot store self-scraped metrics: %s", err)
|
||||
}
|
||||
if len(metadataRows.Rows) > 0 {
|
||||
mms := make([]metricsmetadata.Row, 0, len(metadataRows.Rows))
|
||||
for _, mm := range metadataRows.Rows {
|
||||
mms = append(mms, metricsmetadata.Row{
|
||||
MetricFamilyName: bytesutil.ToUnsafeBytes(mm.Metric),
|
||||
Help: bytesutil.ToUnsafeBytes(mm.Help),
|
||||
Type: mm.Type,
|
||||
})
|
||||
}
|
||||
if err := vmstorage.AddMetadataRows(mms); err != nil {
|
||||
logger.Errorf("cannot store self-scraped metrics metadata: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
for {
|
||||
select {
|
||||
|
||||
@@ -78,7 +78,7 @@ func insertRows(at *auth.Token, rows []newrelic.Row, extraLabels []prompb.Label)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(len(rows))
|
||||
rowsInserted.Add(samplesCount)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(samplesCount)
|
||||
}
|
||||
|
||||
@@ -25,7 +25,7 @@ var (
|
||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="opentelemetry"}`)
|
||||
)
|
||||
|
||||
// InsertHandler processes metrics from given reader.
|
||||
// InsertHandlerForReader processes metrics from given reader.
|
||||
func InsertHandlerForReader(at *auth.Token, r io.Reader, encoding string) error {
|
||||
return stream.ParseStream(r, encoding, nil, func(tss []prompb.TimeSeries, mms []prompb.MetricMetadata) error {
|
||||
return insertRows(at, tss, mms, nil)
|
||||
|
||||
@@ -15,7 +15,6 @@ import (
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/awsapi"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
@@ -554,9 +553,9 @@ func getRetryDuration(retryAfterDuration, retryDuration, maxRetryDuration time.D
|
||||
// For more details, see: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9417
|
||||
func repackBlockFromZstdToSnappy(zstdBlock []byte) ([]byte, error) {
|
||||
plainBlock := make([]byte, 0, len(zstdBlock)*2)
|
||||
plainBlock, err := zstd.Decompress(plainBlock, zstdBlock)
|
||||
plainBlock, err := encoding.DecompressZSTD(plainBlock, zstdBlock)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("zstd: decompress: %s", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return snappy.Encode(nil, plainBlock), nil
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"math"
|
||||
@@ -94,6 +95,8 @@ type UserInfo struct {
|
||||
rt http.RoundTripper
|
||||
|
||||
requests *metrics.Counter
|
||||
requestErrors *metrics.Counter
|
||||
backendRequests *metrics.Counter
|
||||
backendErrors *metrics.Counter
|
||||
requestsDuration *metrics.Summary
|
||||
}
|
||||
@@ -105,13 +108,29 @@ type HeadersConf struct {
|
||||
KeepOriginalHost *bool `yaml:"keep_original_host,omitempty"`
|
||||
}
|
||||
|
||||
func (ui *UserInfo) beginConcurrencyLimit() error {
|
||||
func (ui *UserInfo) beginConcurrencyLimit(ctx context.Context) error {
|
||||
select {
|
||||
case ui.concurrencyLimitCh <- struct{}{}:
|
||||
return nil
|
||||
default:
|
||||
ui.concurrencyLimitReached.Inc()
|
||||
return fmt.Errorf("cannot handle more than %d concurrent requests from user %s", ui.getMaxConcurrentRequests(), ui.name())
|
||||
|
||||
// The per-user limit for the number of concurrent requests is reached.
|
||||
// Wait until the currently executed requests are finished, so the current request could be executed.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10078
|
||||
select {
|
||||
case ui.concurrencyLimitCh <- struct{}{}:
|
||||
return nil
|
||||
case <-ctx.Done():
|
||||
err := ctx.Err()
|
||||
if errors.Is(err, context.DeadlineExceeded) {
|
||||
return fmt.Errorf("cannot start executing the request during -maxQueueDuration=%s because %d concurrent requests from the user %s are executed",
|
||||
*maxQueueDuration, ui.getMaxConcurrentRequests(), ui.name())
|
||||
}
|
||||
|
||||
return fmt.Errorf("cannot start executing the request because %d concurrent requests from the user %s are executed: %w",
|
||||
ui.getMaxConcurrentRequests(), ui.name(), err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -135,12 +154,8 @@ func (ui *UserInfo) stopHealthChecks() {
|
||||
return
|
||||
}
|
||||
|
||||
pbus := ui.URLPrefix.bus.Load()
|
||||
if *pbus != nil {
|
||||
for _, bu := range *pbus {
|
||||
bu.stopHealthCheck()
|
||||
}
|
||||
}
|
||||
bus := ui.URLPrefix.bus.Load()
|
||||
bus.stopHealthChecks()
|
||||
}
|
||||
|
||||
// Header is `Name: Value` http header, which must be added to the proxied request.
|
||||
@@ -278,7 +293,7 @@ type URLPrefix struct {
|
||||
// the list of backend urls
|
||||
//
|
||||
// the list can be dynamically updated if `discover_backend_ips` option is set.
|
||||
bus atomic.Pointer[[]*backendURL]
|
||||
bus atomic.Pointer[backendURLs]
|
||||
|
||||
// if this option is set, then backend ips for busOriginal are periodically re-discovered and put to bus.
|
||||
discoverBackendIPs bool
|
||||
@@ -302,10 +317,40 @@ func (up *URLPrefix) setLoadBalancingPolicy(loadBalancingPolicy string) error {
|
||||
}
|
||||
}
|
||||
|
||||
type backendURLs struct {
|
||||
healthChecksContext context.Context
|
||||
healthChecksCancel func()
|
||||
healthChecksWG sync.WaitGroup
|
||||
|
||||
bus []*backendURL
|
||||
}
|
||||
|
||||
func newBackendURLs() *backendURLs {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
return &backendURLs{
|
||||
healthChecksContext: ctx,
|
||||
healthChecksCancel: cancel,
|
||||
}
|
||||
}
|
||||
|
||||
func (bus *backendURLs) add(u *url.URL) {
|
||||
bus.bus = append(bus.bus, &backendURL{
|
||||
url: u,
|
||||
healthCheckContext: bus.healthChecksContext,
|
||||
healthCheckWG: &bus.healthChecksWG,
|
||||
})
|
||||
}
|
||||
|
||||
func (bus *backendURLs) stopHealthChecks() {
|
||||
bus.healthChecksCancel()
|
||||
bus.healthChecksWG.Wait()
|
||||
}
|
||||
|
||||
type backendURL struct {
|
||||
broken atomic.Bool
|
||||
stopHealthCheckCh chan struct{}
|
||||
stopHealthCheckOnce sync.Once
|
||||
broken atomic.Bool
|
||||
|
||||
healthCheckContext context.Context
|
||||
healthCheckWG *sync.WaitGroup
|
||||
|
||||
concurrentRequests atomic.Int32
|
||||
|
||||
@@ -317,55 +362,48 @@ func (bu *backendURL) isBroken() bool {
|
||||
}
|
||||
|
||||
func (bu *backendURL) setBroken() {
|
||||
if !bu.broken.Load() && bu.broken.CompareAndSwap(false, true) {
|
||||
bu.startHealthCheck()
|
||||
if bu.broken.CompareAndSwap(false, true) {
|
||||
bu.healthCheckWG.Add(1)
|
||||
go func() {
|
||||
defer bu.healthCheckWG.Done()
|
||||
bu.runHealthCheck()
|
||||
bu.broken.Store(false)
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
func (bu *backendURL) startHealthCheck() {
|
||||
go func() {
|
||||
port := bu.url.Port()
|
||||
if port == "" {
|
||||
port = "80"
|
||||
}
|
||||
addr := net.JoinHostPort(bu.url.Hostname(), port)
|
||||
func (bu *backendURL) runHealthCheck() {
|
||||
port := bu.url.Port()
|
||||
if port == "" {
|
||||
port = "80"
|
||||
}
|
||||
addr := net.JoinHostPort(bu.url.Hostname(), port)
|
||||
|
||||
t := time.NewTimer(*failTimeout)
|
||||
for {
|
||||
select {
|
||||
case <-t.C:
|
||||
// Do not perform tcp probe for https urls as
|
||||
// - the network unavailability is less probable for https urls
|
||||
// - it will pollute logs on server side with SSL handshake errors.
|
||||
if bu.url.Scheme == "https" {
|
||||
bu.broken.Store(false)
|
||||
t := time.NewTicker(*failTimeout)
|
||||
defer t.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-t.C:
|
||||
// Verify network connectivity via TCP dial before marking backend healthy.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9997
|
||||
ctx, cancel := context.WithTimeout(bu.healthCheckContext, time.Second)
|
||||
c, err := netutil.Dialer.DialContext(ctx, "tcp", addr)
|
||||
cancel()
|
||||
if err != nil {
|
||||
if errors.Is(bu.healthCheckContext.Err(), context.Canceled) {
|
||||
return
|
||||
}
|
||||
|
||||
// Verify network connectivity via TCP dial before marking backend healthy.
|
||||
// Previously, backends were auto-restored after failTimeout without validation,
|
||||
// causing requests to repeatedly hang on unreachable backends.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9890
|
||||
c, err := net.DialTimeout(`tcp`, addr, time.Second)
|
||||
if err != nil {
|
||||
t.Reset(*failTimeout)
|
||||
continue
|
||||
}
|
||||
_ = c.Close()
|
||||
bu.broken.Store(false)
|
||||
return
|
||||
case <-bu.stopHealthCheckCh:
|
||||
t.Stop()
|
||||
return
|
||||
logger.Warnf("ignoring the backend at %s for %s becasue of dial error: %s", addr, *failTimeout, err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
func (bu *backendURL) stopHealthCheck() {
|
||||
bu.stopHealthCheckOnce.Do(func() {
|
||||
close(bu.stopHealthCheckCh)
|
||||
})
|
||||
_ = c.Close()
|
||||
return
|
||||
case <-bu.healthCheckContext.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (bu *backendURL) get() {
|
||||
@@ -377,8 +415,8 @@ func (bu *backendURL) put() {
|
||||
}
|
||||
|
||||
func (up *URLPrefix) getBackendsCount() int {
|
||||
pbus := up.bus.Load()
|
||||
return len(*pbus)
|
||||
bus := up.bus.Load()
|
||||
return len(bus.bus)
|
||||
}
|
||||
|
||||
// getBackendURL returns the backendURL depending on the load balance policy.
|
||||
@@ -389,16 +427,15 @@ func (up *URLPrefix) getBackendsCount() int {
|
||||
func (up *URLPrefix) getBackendURL() *backendURL {
|
||||
up.discoverBackendAddrsIfNeeded()
|
||||
|
||||
pbus := up.bus.Load()
|
||||
bus := *pbus
|
||||
if len(bus) == 0 {
|
||||
bus := up.bus.Load()
|
||||
if len(bus.bus) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
if up.loadBalancingPolicy == "first_available" {
|
||||
return getFirstAvailableBackendURL(bus)
|
||||
return getFirstAvailableBackendURL(bus.bus)
|
||||
}
|
||||
return getLeastLoadedBackendURL(bus, &up.n)
|
||||
return getLeastLoadedBackendURL(bus.bus, &up.n)
|
||||
}
|
||||
|
||||
func (up *URLPrefix) discoverBackendAddrsIfNeeded() {
|
||||
@@ -472,32 +509,24 @@ func (up *URLPrefix) discoverBackendAddrsIfNeeded() {
|
||||
cancel()
|
||||
|
||||
// generate new backendURLs for the resolved IPs
|
||||
var busNew []*backendURL
|
||||
busNew := newBackendURLs()
|
||||
for _, bu := range up.busOriginal {
|
||||
host := bu.Hostname()
|
||||
for _, addr := range hostToAddrs[host] {
|
||||
buCopy := *bu
|
||||
buCopy.Host = addr
|
||||
busNew = append(busNew, &backendURL{
|
||||
url: &buCopy,
|
||||
stopHealthCheckCh: make(chan struct{}),
|
||||
})
|
||||
busNew.add(&buCopy)
|
||||
}
|
||||
}
|
||||
|
||||
pbus := up.bus.Load()
|
||||
if areEqualBackendURLs(*pbus, busNew) {
|
||||
bus := up.bus.Load()
|
||||
if areEqualBackendURLs(bus.bus, busNew.bus) {
|
||||
return
|
||||
}
|
||||
|
||||
// Store new backend urls
|
||||
up.bus.Store(&busNew)
|
||||
|
||||
if *pbus != nil {
|
||||
for _, bu := range *pbus {
|
||||
bu.stopHealthCheck()
|
||||
}
|
||||
}
|
||||
up.bus.Store(busNew)
|
||||
bus.stopHealthChecks()
|
||||
}
|
||||
|
||||
func areEqualBackendURLs(a, b []*backendURL) bool {
|
||||
@@ -528,20 +557,23 @@ func getFirstAvailableBackendURL(bus []*backendURL) *backendURL {
|
||||
for i := 1; i < len(bus); i++ {
|
||||
if !bus[i].isBroken() {
|
||||
bu = bus[i]
|
||||
break
|
||||
bu.get()
|
||||
return bu
|
||||
}
|
||||
}
|
||||
bu.get()
|
||||
return bu
|
||||
return nil
|
||||
}
|
||||
|
||||
// getLeastLoadedBackendURL returns the backendURL with the minimum number of concurrent requests.
|
||||
// getLeastLoadedBackendURL returns a non-broken backendURL with the lowest number of concurrent requests.
|
||||
//
|
||||
// backendURL.put() must be called on the returned backendURL after the request is complete.
|
||||
func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *backendURL {
|
||||
if len(bus) == 1 {
|
||||
// Fast path - return the only backend url.
|
||||
bu := bus[0]
|
||||
if bu.isBroken() {
|
||||
return nil
|
||||
}
|
||||
bu.get()
|
||||
return bu
|
||||
}
|
||||
@@ -566,7 +598,7 @@ func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *
|
||||
// Slow path - return the backend with the minimum number of concurrently executed requests.
|
||||
buMinIdx := n % uint32(len(bus))
|
||||
minRequests := bus[buMinIdx].concurrentRequests.Load()
|
||||
for i := uint32(0); i < uint32(len(bus)); i++ {
|
||||
for i := uint32(1); i < uint32(len(bus)); i++ {
|
||||
idx := (n + i) % uint32(len(bus))
|
||||
bu := bus[idx]
|
||||
if bu.isBroken() {
|
||||
@@ -580,6 +612,9 @@ func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *
|
||||
}
|
||||
}
|
||||
buMin := bus[buMinIdx]
|
||||
if buMin.isBroken() {
|
||||
return nil
|
||||
}
|
||||
buMin.get()
|
||||
atomicCounter.CompareAndSwap(n+1, buMinIdx+1)
|
||||
return buMin
|
||||
@@ -855,6 +890,8 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) {
|
||||
return nil, fmt.Errorf("cannot parse metric_labels for unauthorized_user: %w", err)
|
||||
}
|
||||
ui.requests = ac.ms.NewCounter(`vmauth_unauthorized_user_requests_total` + metricLabels)
|
||||
ui.requestErrors = ac.ms.NewCounter(`vmauth_unauthorized_user_request_errors_total` + metricLabels)
|
||||
ui.backendRequests = ac.ms.NewCounter(`vmauth_unauthorized_user_request_backend_requests_total` + metricLabels)
|
||||
ui.backendErrors = ac.ms.NewCounter(`vmauth_unauthorized_user_request_backend_errors_total` + metricLabels)
|
||||
ui.requestsDuration = ac.ms.NewSummary(`vmauth_unauthorized_user_request_duration_seconds` + metricLabels)
|
||||
ui.concurrencyLimitCh = make(chan struct{}, ui.getMaxConcurrentRequests())
|
||||
@@ -903,6 +940,8 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
|
||||
return nil, fmt.Errorf("cannot parse metric_labels: %w", err)
|
||||
}
|
||||
ui.requests = ac.ms.GetOrCreateCounter(`vmauth_user_requests_total` + metricLabels)
|
||||
ui.requestErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_errors_total` + metricLabels)
|
||||
ui.backendRequests = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_requests_total` + metricLabels)
|
||||
ui.backendErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_errors_total` + metricLabels)
|
||||
ui.requestsDuration = ac.ms.GetOrCreateSummary(`vmauth_user_request_duration_seconds` + metricLabels)
|
||||
mcr := ui.getMaxConcurrentRequests()
|
||||
@@ -1137,14 +1176,11 @@ func (up *URLPrefix) sanitizeAndInitialize() error {
|
||||
}
|
||||
|
||||
// Initialize up.bus
|
||||
bus := make([]*backendURL, len(up.busOriginal))
|
||||
for i, bu := range up.busOriginal {
|
||||
bus[i] = &backendURL{
|
||||
url: bu,
|
||||
stopHealthCheckCh: make(chan struct{}),
|
||||
}
|
||||
bus := newBackendURLs()
|
||||
for _, bu := range up.busOriginal {
|
||||
bus.add(bu)
|
||||
}
|
||||
up.bus.Store(&bus)
|
||||
up.bus.Store(bus)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -753,7 +753,7 @@ func TestGetLeastLoadedBackendURL(t *testing.T) {
|
||||
up.loadBalancingPolicy = "least_loaded"
|
||||
|
||||
pbus := up.bus.Load()
|
||||
bus := *pbus
|
||||
bus := pbus.bus
|
||||
|
||||
fn := func(ns ...int) {
|
||||
t.Helper()
|
||||
@@ -825,7 +825,7 @@ func TestBrokenBackend(t *testing.T) {
|
||||
})
|
||||
up.loadBalancingPolicy = "least_loaded"
|
||||
pbus := up.bus.Load()
|
||||
bus := *pbus
|
||||
bus := pbus.bus
|
||||
|
||||
// explicitly mark one of the backends as broken
|
||||
bus[1].setBroken()
|
||||
@@ -848,7 +848,7 @@ func TestDiscoverBackendIPsWithIPV6(t *testing.T) {
|
||||
|
||||
up.discoverBackendAddrsIfNeeded()
|
||||
pbus := up.bus.Load()
|
||||
bus := *pbus
|
||||
bus := pbus.bus
|
||||
|
||||
if len(bus) != 1 {
|
||||
t.Fatalf("expected url list to be of size 1; got %d instead", len(bus))
|
||||
@@ -942,17 +942,14 @@ func mustParseURL(u string) *URLPrefix {
|
||||
}
|
||||
|
||||
func mustParseURLs(us []string) *URLPrefix {
|
||||
bus := make([]*backendURL, len(us))
|
||||
bus := newBackendURLs()
|
||||
urls := make([]*url.URL, len(us))
|
||||
for i, u := range us {
|
||||
pu, err := url.Parse(u)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("BUG: cannot parse %q: %w", u, err))
|
||||
}
|
||||
bus[i] = &backendURL{
|
||||
url: pu,
|
||||
stopHealthCheckCh: make(chan struct{}),
|
||||
}
|
||||
bus.add(pu)
|
||||
urls[i] = pu
|
||||
}
|
||||
up := &URLPrefix{}
|
||||
@@ -961,7 +958,7 @@ func mustParseURLs(us []string) *URLPrefix {
|
||||
} else {
|
||||
up.vOriginal = us
|
||||
}
|
||||
up.bus.Store(&bus)
|
||||
up.bus.Store(bus)
|
||||
up.busOriginal = urls
|
||||
return up
|
||||
}
|
||||
|
||||
@@ -44,12 +44,17 @@ var (
|
||||
"See also -maxConcurrentRequests")
|
||||
idleConnTimeout = flag.Duration("idleConnTimeout", 50*time.Second, "The timeout for HTTP keep-alive connections to backend services. "+
|
||||
"It is recommended setting this value to values smaller than -http.idleConnTimeout set at backend services")
|
||||
responseTimeout = flag.Duration("responseTimeout", 5*time.Minute, "The timeout for receiving a response from backend")
|
||||
responseTimeout = flag.Duration("responseTimeout", 5*time.Minute, "The timeout for receiving a response from backend")
|
||||
|
||||
maxConcurrentRequests = flag.Int("maxConcurrentRequests", 1000, "The maximum number of concurrent requests vmauth can process. Other requests are rejected with "+
|
||||
"'429 Too Many Requests' http status code. See also -maxConcurrentPerUserRequests and -maxIdleConnsPerBackend command-line options")
|
||||
"'429 Too Many Requests' http status code. See also -maxQueueDuration, -maxConcurrentPerUserRequests and -maxIdleConnsPerBackend command-line options")
|
||||
maxConcurrentPerUserRequests = flag.Int("maxConcurrentPerUserRequests", 300, "The maximum number of concurrent requests vmauth can process per each configured user. "+
|
||||
"Other requests are rejected with '429 Too Many Requests' http status code. See also -maxConcurrentRequests command-line option and max_concurrent_requests option "+
|
||||
"in per-user config")
|
||||
"Other requests are rejected with '429 Too Many Requests' http status code. See also -maxQueueDuration and -maxConcurrentRequests command-line options "+
|
||||
"and max_concurrent_requests option in per-user config")
|
||||
maxQueueDuration = flag.Duration("maxQueueDuration", 10*time.Second, "The maximum duration the request waits for execution when the number of concurrently executed "+
|
||||
"requests reach -maxConcurrentRequests or -maxConcurrentPerUserRequests before returning '429 Too Many Requests' error. "+
|
||||
"This allows graceful handling of short spikes in the number of concurrent requests")
|
||||
|
||||
reloadAuthKey = flagutil.NewPassword("reloadAuthKey", "Auth key for /-/reload http endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*")
|
||||
logInvalidAuthTokens = flag.Bool("logInvalidAuthTokens", false, "Whether to log requests with invalid auth tokens. "+
|
||||
`Such requests are always counted at vmauth_http_request_errors_total{reason="invalid_auth_token"} metric, which is exposed at /metrics page`)
|
||||
@@ -151,7 +156,6 @@ func requestHandlerWithInternalRoutes(w http.ResponseWriter, r *http.Request) bo
|
||||
}
|
||||
|
||||
func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
|
||||
ats := getAuthTokensFromRequest(r)
|
||||
if len(ats) == 0 {
|
||||
// Process requests for unauthorized users
|
||||
@@ -208,20 +212,45 @@ func processUserRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
||||
|
||||
ui.requests.Inc()
|
||||
|
||||
ctx, cancel := context.WithTimeout(r.Context(), *maxQueueDuration)
|
||||
defer cancel()
|
||||
|
||||
// Limit the concurrency of requests to backends
|
||||
concurrencyLimitOnce.Do(concurrencyLimitInit)
|
||||
select {
|
||||
case concurrencyLimitCh <- struct{}{}:
|
||||
if err := ui.beginConcurrencyLimit(); err != nil {
|
||||
if err := ui.beginConcurrencyLimit(ctx); err != nil {
|
||||
handleConcurrencyLimitError(w, r, err)
|
||||
<-concurrencyLimitCh
|
||||
return
|
||||
}
|
||||
default:
|
||||
concurrentRequestsLimitReached.Inc()
|
||||
err := fmt.Errorf("cannot serve more than -maxConcurrentRequests=%d concurrent requests", cap(concurrencyLimitCh))
|
||||
handleConcurrencyLimitError(w, r, err)
|
||||
return
|
||||
// The -maxConcurrentRequests are executed. Wait until some of the requests are finished,
|
||||
// so the current request could be executed.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10078
|
||||
select {
|
||||
case concurrencyLimitCh <- struct{}{}:
|
||||
if err := ui.beginConcurrencyLimit(ctx); err != nil {
|
||||
handleConcurrencyLimitError(w, r, err)
|
||||
<-concurrencyLimitCh
|
||||
return
|
||||
}
|
||||
case <-ctx.Done():
|
||||
err := ctx.Err()
|
||||
|
||||
concurrentRequestsLimitReached.Inc()
|
||||
|
||||
if errors.Is(err, context.DeadlineExceeded) {
|
||||
err = fmt.Errorf("cannot start executing the request during -maxQueueDuration=%s because -maxConcurrentRequests=%d concurrent requests are executed",
|
||||
*maxQueueDuration, cap(concurrencyLimitCh))
|
||||
handleConcurrencyLimitError(w, r, err)
|
||||
return
|
||||
}
|
||||
|
||||
err = fmt.Errorf("cannot start executing the request because -maxConcurrentRequests=%d concurrent requests are executed: %w", cap(concurrencyLimitCh), err)
|
||||
handleConcurrencyLimitError(w, r, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
processRequest(w, r, ui)
|
||||
ui.endConcurrencyLimit()
|
||||
@@ -285,16 +314,18 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
||||
return
|
||||
}
|
||||
bu.setBroken()
|
||||
ui.backendErrors.Inc()
|
||||
}
|
||||
err := &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("all the %d backends for the user %q are unavailable", up.getBackendsCount(), ui.name()),
|
||||
StatusCode: http.StatusBadGateway,
|
||||
}
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
ui.backendErrors.Inc()
|
||||
ui.requestErrors.Inc()
|
||||
}
|
||||
|
||||
func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url.URL, hc HeadersConf, retryStatusCodes []int, ui *UserInfo) (bool, bool) {
|
||||
ui.backendRequests.Inc()
|
||||
req := sanitizeRequestHeaders(r)
|
||||
|
||||
req.URL = targetURL
|
||||
@@ -325,7 +356,6 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
|
||||
if errors.Is(err, context.DeadlineExceeded) {
|
||||
// Timed out request must be counted as errors, since this usually means that the backend is slow.
|
||||
logger.Warnf("remoteAddr: %s; requestURI: %s; timeout while proxying the response from %s: %s", remoteAddr, requestURI, targetURL, err)
|
||||
ui.backendErrors.Inc()
|
||||
}
|
||||
return false, false
|
||||
}
|
||||
@@ -337,6 +367,7 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
|
||||
}
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
ui.backendErrors.Inc()
|
||||
ui.requestErrors.Inc()
|
||||
return true, false
|
||||
}
|
||||
if netutil.IsTrivialNetworkError(err) {
|
||||
@@ -344,11 +375,11 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
|
||||
return false, true
|
||||
}
|
||||
|
||||
// Retry the request if its body wasn't read yet. This usually means that the backend isn't reachable.
|
||||
// Request body wasn't read yet, this usually means that the backend isn't reachable; retry the request at another backend
|
||||
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
|
||||
// NOTE: do not use httpserver.GetRequestURI
|
||||
// it explicitly reads request body, which may fail retries.
|
||||
logger.Warnf("remoteAddr: %s; requestURI: %s; retrying the request to %s because of response error: %s", remoteAddr, req.URL, targetURL, err)
|
||||
logger.Warnf("remoteAddr: %s; requestURI: %s; request to %s failed: %s, retrying the request at another backend", remoteAddr, req.URL, targetURL, err)
|
||||
return false, false
|
||||
}
|
||||
if slices.Contains(retryStatusCodes, res.StatusCode) {
|
||||
@@ -357,12 +388,13 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
|
||||
// If we get an error from the retry_status_codes list, but cannot execute retry,
|
||||
// we consider such a request an error as well.
|
||||
err := &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("got response status code=%d from %s, but cannot retry the request on another backend, because the request has been already consumed",
|
||||
Err: fmt.Errorf("got response status code=%d from %s, but cannot retry the request at another backend, because the request has been already consumed",
|
||||
res.StatusCode, targetURL),
|
||||
StatusCode: http.StatusServiceUnavailable,
|
||||
}
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
ui.backendErrors.Inc()
|
||||
ui.requestErrors.Inc()
|
||||
return true, false
|
||||
}
|
||||
// Retry requests at other backends if it matches retryStatusCodes.
|
||||
@@ -370,7 +402,7 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
|
||||
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
|
||||
// NOTE: do not use httpserver.GetRequestURI
|
||||
// it explicitly reads request body, which may fail retries.
|
||||
logger.Warnf("remoteAddr: %s; requestURI: %s; retrying the request to %s because response status code=%d belongs to retry_status_codes=%d",
|
||||
logger.Warnf("remoteAddr: %s; requestURI: %s; request to %s failed, retrying the request at another backend because response status code=%d belongs to retry_status_codes=%d",
|
||||
remoteAddr, req.URL, targetURL, res.StatusCode, retryStatusCodes)
|
||||
return false, false
|
||||
}
|
||||
@@ -386,6 +418,7 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
|
||||
requestURI := httpserver.GetRequestURI(r)
|
||||
|
||||
logger.Warnf("remoteAddr: %s; requestURI: %s; error when proxying response body from %s: %s", remoteAddr, requestURI, targetURL, err)
|
||||
ui.requestErrors.Inc()
|
||||
return true, false
|
||||
}
|
||||
return true, false
|
||||
@@ -596,6 +629,13 @@ func handleMissingAuthorizationError(w http.ResponseWriter) {
|
||||
}
|
||||
|
||||
func handleConcurrencyLimitError(w http.ResponseWriter, r *http.Request, err error) {
|
||||
ctx := r.Context()
|
||||
if errors.Is(ctx.Err(), context.Canceled) {
|
||||
// Do not return any response for the request canceled by the client,
|
||||
// since the connection to the client is already closed.
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Add("Retry-After", "10")
|
||||
err = &httpserver.ErrorWithStatusCode{
|
||||
Err: err,
|
||||
@@ -652,6 +692,7 @@ type zeroReader struct{}
|
||||
func (r *zeroReader) Read(_ []byte) (int, error) {
|
||||
return 0, io.EOF
|
||||
}
|
||||
|
||||
func (r *zeroReader) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -4,13 +4,15 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/relabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="promscrape"}`)
|
||||
rowsPerInsert = metrics.NewHistogram(`vm_rows_per_insert{type="promscrape"}`)
|
||||
rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="promscrape"}`)
|
||||
rowsPerInsert = metrics.NewHistogram(`vm_rows_per_insert{type="promscrape"}`)
|
||||
metadataRowsInserted = metrics.NewCounter(`vm_metadata_rows_inserted_total{type="promscrape"}`)
|
||||
)
|
||||
|
||||
const maxRowsPerBlock = 10000
|
||||
@@ -41,6 +43,13 @@ func Push(wr *prompb.WriteRequest) {
|
||||
}
|
||||
push(ctx, tssBlock)
|
||||
}
|
||||
if prommetadata.IsEnabled() {
|
||||
if err := ctx.WriteMetadata(wr.Metadata); err != nil {
|
||||
logger.Errorf("cannot write promscrape metrics metadata to storage: %s", err)
|
||||
} else {
|
||||
metadataRowsInserted.Add(len(wr.Metadata))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func push(ctx *common.InsertCtx, tss []prompb.TimeSeries) {
|
||||
|
||||
@@ -75,8 +75,6 @@ var (
|
||||
"See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cache-tuning")
|
||||
cacheSizeIndexDBTagFilters = flagutil.NewBytes("storage.cacheSizeIndexDBTagFilters", 0, "Overrides max size for indexdb/tagFiltersToMetricIDs cache. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cache-tuning")
|
||||
cacheSizeIndexDBDateMetricID = flagutil.NewBytes("storage.cacheSizeIndexDBDateMetricID", 0, "Overrides max size for indexdb/date_metricID cache. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cache-tuning")
|
||||
|
||||
disablePerDayIndex = flag.Bool("disablePerDayIndex", false, "Disable per-day index and use global index for all searches. "+
|
||||
"This may improve performance and decrease disk space usage for the use cases with fixed set of timeseries scattered across a "+
|
||||
@@ -120,14 +118,13 @@ func Init(resetCacheIfNeeded func(mrs []storage.MetricRow)) {
|
||||
}
|
||||
|
||||
resetResponseCacheIfNeeded = resetCacheIfNeeded
|
||||
storage.SetRetentionTimezoneOffset(*retentionTimezoneOffset)
|
||||
storage.LegacySetRetentionTimezoneOffset(*retentionTimezoneOffset)
|
||||
storage.SetFreeDiskSpaceLimit(minFreeDiskSpaceBytes.N)
|
||||
storage.SetTSIDCacheSize(cacheSizeStorageTSID.IntN())
|
||||
storage.SetTagFiltersCacheSize(cacheSizeIndexDBTagFilters.IntN())
|
||||
storage.SetMetricNamesStatsCacheSize(cacheSizeMetricNamesStats.IntN())
|
||||
storage.SetMetricNameCacheSize(cacheSizeStorageMetricName.IntN())
|
||||
storage.SetMetadataStorageSize(metadataStorageSize.IntN())
|
||||
storage.SetDateMetricIDCacheSize(cacheSizeIndexDBDateMetricID.IntN())
|
||||
mergeset.SetIndexBlocksCacheSize(cacheSizeIndexDBIndexBlocks.IntN())
|
||||
mergeset.SetDataBlocksCacheSize(cacheSizeIndexDBDataBlocks.IntN())
|
||||
mergeset.SetDataBlocksSparseCacheSize(cacheSizeIndexDBDataBlocksSparse.IntN())
|
||||
@@ -503,7 +500,7 @@ func writeStorageMetrics(w io.Writer, strg *storage.Storage) {
|
||||
var m storage.Metrics
|
||||
strg.UpdateMetrics(&m)
|
||||
tm := &m.TableMetrics
|
||||
idbm := &m.IndexDBMetrics
|
||||
idbm := &m.TableMetrics.IndexDBMetrics
|
||||
|
||||
metrics.WriteGaugeUint64(w, fmt.Sprintf(`vm_free_disk_space_bytes{path=%q}`, *DataPath), fs.MustGetFreeSpace(*DataPath))
|
||||
metrics.WriteGaugeUint64(w, fmt.Sprintf(`vm_free_disk_space_limit_bytes{path=%q}`, *DataPath), uint64(minFreeDiskSpaceBytes.N))
|
||||
@@ -642,6 +639,7 @@ func writeStorageMetrics(w io.Writer, strg *storage.Storage) {
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_entries{type="indexdb/dataBlocks"}`, idbm.DataBlocksCacheSize)
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_entries{type="indexdb/dataBlocksSparse"}`, idbm.DataBlocksSparseCacheSize)
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_entries{type="indexdb/indexBlocks"}`, idbm.IndexBlocksCacheSize)
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_entries{type="indexdb/metricID"}`, idbm.MetricIDCacheSize)
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_entries{type="indexdb/date_metricID"}`, idbm.DateMetricIDCacheSize)
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_entries{type="indexdb/tagFiltersToMetricIDs"}`, idbm.TagFiltersToMetricIDsCacheSize)
|
||||
|
||||
@@ -653,6 +651,7 @@ func writeStorageMetrics(w io.Writer, strg *storage.Storage) {
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_size_bytes{type="storage/next_day_metric_ids"}`, m.NextDayMetricIDCacheSizeBytes)
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_size_bytes{type="storage/regexps"}`, storage.RegexpCacheSizeBytes())
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_size_bytes{type="storage/regexpPrefixes"}`, storage.RegexpPrefixesCacheSizeBytes())
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_size_bytes{type="indexdb/metricID"}`, idbm.MetricIDCacheSizeBytes)
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_size_bytes{type="indexdb/date_metricID"}`, idbm.DateMetricIDCacheSizeBytes)
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_size_bytes{type="indexdb/dataBlocks"}`, idbm.DataBlocksCacheSizeBytes)
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_size_bytes{type="indexdb/dataBlocksSparse"}`, idbm.DataBlocksSparseCacheSizeBytes)
|
||||
@@ -668,7 +667,6 @@ func writeStorageMetrics(w io.Writer, strg *storage.Storage) {
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_size_max_bytes{type="indexdb/dataBlocks"}`, idbm.DataBlocksCacheSizeMaxBytes)
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_size_max_bytes{type="indexdb/dataBlocksSparse"}`, idbm.DataBlocksSparseCacheSizeMaxBytes)
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_size_max_bytes{type="indexdb/indexBlocks"}`, idbm.IndexBlocksCacheSizeMaxBytes)
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_size_max_bytes{type="indexdb/date_metricID"}`, idbm.DateMetricIDCacheSizeMaxBytes)
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_size_max_bytes{type="indexdb/tagFiltersToMetricIDs"}`, idbm.TagFiltersToMetricIDsCacheSizeMaxBytes)
|
||||
|
||||
metrics.WriteCounterUint64(w, `vm_cache_requests_total{type="storage/indexBlocks"}`, tm.IndexBlocksCacheRequests)
|
||||
@@ -693,14 +691,17 @@ func writeStorageMetrics(w io.Writer, strg *storage.Storage) {
|
||||
metrics.WriteCounterUint64(w, `vm_cache_misses_total{type="indexdb/indexBlocks"}`, idbm.IndexBlocksCacheMisses)
|
||||
metrics.WriteCounterUint64(w, `vm_cache_misses_total{type="indexdb/tagFiltersToMetricIDs"}`, idbm.TagFiltersToMetricIDsCacheMisses)
|
||||
|
||||
metrics.WriteCounterUint64(w, `vm_cache_resets_total{type="indexdb/date_metricID"}`, idbm.DateMetricIDCacheResetsCount)
|
||||
metrics.WriteCounterUint64(w, `vm_cache_resets_total{type="indexdb/tagFiltersToMetricIDs"}`, idbm.TagFiltersToMetricIDsCacheResets)
|
||||
|
||||
metrics.WriteCounterUint64(w, `vm_cache_collisions_total{type="storage/tsid"}`, m.TSIDCacheCollisions)
|
||||
metrics.WriteCounterUint64(w, `vm_cache_collisions_total{type="storage/metricName"}`, m.MetricNameCacheCollisions)
|
||||
|
||||
metrics.WriteCounterUint64(w, `vm_cache_syncs_total{type="indexdb/metricID"}`, idbm.MetricIDCacheSyncsCount)
|
||||
metrics.WriteCounterUint64(w, `vm_cache_syncs_total{type="indexdb/date_metricID"}`, idbm.DateMetricIDCacheSyncsCount)
|
||||
|
||||
metrics.WriteCounterUint64(w, `vm_cache_rotations_total{type="indexdb/metricID"}`, idbm.MetricIDCacheRotationsCount)
|
||||
metrics.WriteCounterUint64(w, `vm_cache_rotations_total{type="indexdb/date_metricID"}`, idbm.DateMetricIDCacheRotationsCount)
|
||||
|
||||
metrics.WriteCounterUint64(w, `vm_deleted_metrics_total{type="indexdb"}`, m.DeletedMetricsCount)
|
||||
|
||||
metrics.WriteGaugeUint64(w, `vm_next_retention_seconds`, m.NextRetentionSeconds)
|
||||
|
||||
898
apptest/tests/legacy_indexdb_test.go
Normal file
898
apptest/tests/legacy_indexdb_test.go
Normal file
@@ -0,0 +1,898 @@
|
||||
package tests
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
at "github.com/VictoriaMetrics/VictoriaMetrics/apptest"
|
||||
)
|
||||
|
||||
var (
|
||||
legacyVmsinglePath = os.Getenv("VM_LEGACY_VMSINGLE_PATH")
|
||||
legacyVmstoragePath = os.Getenv("VM_LEGACY_VMSTORAGE_PATH")
|
||||
)
|
||||
|
||||
type testLegacyDeleteSeriesOpts struct {
|
||||
startLegacySUT func() at.PrometheusWriteQuerier
|
||||
startNewSUT func() at.PrometheusWriteQuerier
|
||||
stopLegacySUT func()
|
||||
stopNewSUT func()
|
||||
}
|
||||
|
||||
func TestLegacySingleDeleteSeries(t *testing.T) {
|
||||
tc := at.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
storageDataPath := filepath.Join(tc.Dir(), "vmsingle")
|
||||
|
||||
opts := testLegacyDeleteSeriesOpts{
|
||||
startLegacySUT: func() at.PrometheusWriteQuerier {
|
||||
return tc.MustStartVmsingleAt("vmsingle-legacy", legacyVmsinglePath, []string{
|
||||
"-storageDataPath=" + storageDataPath,
|
||||
"-retentionPeriod=100y",
|
||||
"-search.maxStalenessInterval=1m",
|
||||
})
|
||||
},
|
||||
startNewSUT: func() at.PrometheusWriteQuerier {
|
||||
return tc.MustStartVmsingle("vmsingle-new", []string{
|
||||
"-storageDataPath=" + storageDataPath,
|
||||
"-retentionPeriod=100y",
|
||||
"-search.maxStalenessInterval=1m",
|
||||
})
|
||||
},
|
||||
stopLegacySUT: func() {
|
||||
tc.StopApp("vmsingle-legacy")
|
||||
},
|
||||
stopNewSUT: func() {
|
||||
tc.StopApp("vmsingle-new")
|
||||
},
|
||||
}
|
||||
|
||||
testLegacyDeleteSeries(tc, opts)
|
||||
}
|
||||
|
||||
func TestLegacyClusterDeleteSeries(t *testing.T) {
|
||||
tc := at.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
storage1DataPath := filepath.Join(tc.Dir(), "vmstorage1")
|
||||
storage2DataPath := filepath.Join(tc.Dir(), "vmstorage2")
|
||||
|
||||
opts := testLegacyDeleteSeriesOpts{
|
||||
startLegacySUT: func() at.PrometheusWriteQuerier {
|
||||
return tc.MustStartCluster(&at.ClusterOptions{
|
||||
Vmstorage1Instance: "vmstorage1-legacy",
|
||||
Vmstorage1Binary: legacyVmstoragePath,
|
||||
Vmstorage1Flags: []string{
|
||||
"-storageDataPath=" + storage1DataPath,
|
||||
"-retentionPeriod=100y",
|
||||
},
|
||||
Vmstorage2Instance: "vmstorage2-legacy",
|
||||
Vmstorage2Binary: legacyVmstoragePath,
|
||||
Vmstorage2Flags: []string{
|
||||
"-storageDataPath=" + storage2DataPath,
|
||||
"-retentionPeriod=100y",
|
||||
},
|
||||
VminsertInstance: "vminsert",
|
||||
VminsertFlags: []string{},
|
||||
VmselectInstance: "vmselect",
|
||||
VmselectFlags: []string{
|
||||
"-search.maxStalenessInterval=1m",
|
||||
},
|
||||
})
|
||||
},
|
||||
startNewSUT: func() at.PrometheusWriteQuerier {
|
||||
return tc.MustStartCluster(&at.ClusterOptions{
|
||||
Vmstorage1Instance: "vmstorage1-new",
|
||||
Vmstorage1Flags: []string{
|
||||
"-storageDataPath=" + storage1DataPath,
|
||||
"-retentionPeriod=100y",
|
||||
},
|
||||
Vmstorage2Instance: "vmstorage2-new",
|
||||
Vmstorage2Flags: []string{
|
||||
"-storageDataPath=" + storage2DataPath,
|
||||
"-retentionPeriod=100y",
|
||||
},
|
||||
VminsertInstance: "vminsert",
|
||||
VminsertFlags: []string{},
|
||||
VmselectInstance: "vmselect",
|
||||
VmselectFlags: []string{
|
||||
"-search.maxStalenessInterval=1m",
|
||||
},
|
||||
})
|
||||
},
|
||||
stopLegacySUT: func() {
|
||||
tc.StopApp("vminsert")
|
||||
tc.StopApp("vmselect")
|
||||
tc.StopApp("vmstorage1-legacy")
|
||||
tc.StopApp("vmstorage2-legacy")
|
||||
},
|
||||
stopNewSUT: func() {
|
||||
tc.StopApp("vminsert")
|
||||
tc.StopApp("vmselect")
|
||||
tc.StopApp("vmstorage1-new")
|
||||
tc.StopApp("vmstorage2-new")
|
||||
},
|
||||
}
|
||||
|
||||
testLegacyDeleteSeries(tc, opts)
|
||||
}
|
||||
|
||||
func testLegacyDeleteSeries(tc *at.TestCase, opts testLegacyDeleteSeriesOpts) {
|
||||
t := tc.T()
|
||||
|
||||
type want struct {
|
||||
series []map[string]string
|
||||
queryResults []*at.QueryResult
|
||||
}
|
||||
|
||||
genData := func(prefix string, start, end, step int64, value float64) (recs []string, w *want) {
|
||||
count := (end - start) / step
|
||||
recs = make([]string, count)
|
||||
w = &want{
|
||||
series: make([]map[string]string, count),
|
||||
queryResults: make([]*at.QueryResult, count),
|
||||
}
|
||||
for i := range count {
|
||||
name := fmt.Sprintf("%s_%03d", prefix, i)
|
||||
timestamp := start + int64(i)*step
|
||||
|
||||
recs[i] = fmt.Sprintf("%s %f %d", name, value, timestamp)
|
||||
w.series[i] = map[string]string{"__name__": name}
|
||||
w.queryResults[i] = &at.QueryResult{
|
||||
Metric: map[string]string{"__name__": name},
|
||||
Samples: []*at.Sample{{Timestamp: timestamp, Value: value}},
|
||||
}
|
||||
}
|
||||
return recs, w
|
||||
}
|
||||
|
||||
assertSearchResults := func(app at.PrometheusQuerier, query string, start, end int64, step string, want *want) {
|
||||
t.Helper()
|
||||
|
||||
tc.Assert(&at.AssertOptions{
|
||||
Msg: "unexpected /api/v1/series response",
|
||||
Got: func() any {
|
||||
return app.PrometheusAPIV1Series(t, query, at.QueryOpts{
|
||||
Start: fmt.Sprintf("%d", start),
|
||||
End: fmt.Sprintf("%d", end),
|
||||
}).Sort()
|
||||
},
|
||||
Want: &at.PrometheusAPIV1SeriesResponse{
|
||||
Status: "success",
|
||||
Data: want.series,
|
||||
},
|
||||
FailNow: true,
|
||||
})
|
||||
|
||||
tc.Assert(&at.AssertOptions{
|
||||
Msg: "unexpected /api/v1/query_range response",
|
||||
Got: func() any {
|
||||
return app.PrometheusAPIV1QueryRange(t, query, at.QueryOpts{
|
||||
Start: fmt.Sprintf("%d", start),
|
||||
End: fmt.Sprintf("%d", end),
|
||||
Step: step,
|
||||
})
|
||||
},
|
||||
Want: &at.PrometheusAPIV1QueryResponse{
|
||||
Status: "success",
|
||||
Data: &at.QueryData{
|
||||
ResultType: "matrix",
|
||||
Result: want.queryResults,
|
||||
},
|
||||
},
|
||||
FailNow: true,
|
||||
})
|
||||
}
|
||||
|
||||
// - start legacy vmsingle
|
||||
// - insert data1
|
||||
// - confirm that metric names and samples are searcheable
|
||||
// - stop legacy vmsingle
|
||||
const step = 24 * 3600 * 1000 // 24h
|
||||
start1 := time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC).UnixMilli()
|
||||
end1 := time.Date(2000, 1, 10, 0, 0, 0, 0, time.UTC).UnixMilli()
|
||||
data1, want1 := genData("metric", start1, end1, step, 1)
|
||||
legacySUT := opts.startLegacySUT()
|
||||
legacySUT.PrometheusAPIV1ImportPrometheus(t, data1, at.QueryOpts{})
|
||||
legacySUT.ForceFlush(t)
|
||||
assertSearchResults(legacySUT, `{__name__=~".*"}`, start1, end1, "1d", want1)
|
||||
opts.stopLegacySUT()
|
||||
|
||||
// - start new vmsingle
|
||||
// - confirm that data1 metric names and samples are searcheable
|
||||
// - delete data1
|
||||
// - confirm that data1 metric names and samples are not searcheable anymore
|
||||
// - insert data2 (same metric names, different dates)
|
||||
// - confirm that metric names become searcheable again
|
||||
// - confirm that data1 samples are not searchable and data2 samples are searcheable
|
||||
|
||||
newSUT := opts.startNewSUT()
|
||||
assertSearchResults(newSUT, `{__name__=~".*"}`, start1, end1, "1d", want1)
|
||||
|
||||
newSUT.APIV1AdminTSDBDeleteSeries(t, `{__name__=~".*"}`, at.QueryOpts{})
|
||||
wantNoResults := &want{
|
||||
series: []map[string]string{},
|
||||
queryResults: []*at.QueryResult{},
|
||||
}
|
||||
assertSearchResults(newSUT, `{__name__=~".*"}`, start1, end1, "1d", wantNoResults)
|
||||
|
||||
start2 := time.Date(2000, 1, 11, 0, 0, 0, 0, time.UTC).UnixMilli()
|
||||
end2 := time.Date(2000, 1, 20, 0, 0, 0, 0, time.UTC).UnixMilli()
|
||||
data2, want2 := genData("metric", start2, end2, step, 2)
|
||||
newSUT.PrometheusAPIV1ImportPrometheus(t, data2, at.QueryOpts{})
|
||||
newSUT.ForceFlush(t)
|
||||
assertSearchResults(newSUT, `{__name__=~".*"}`, start1, end2, "1d", want2)
|
||||
|
||||
// - restart new vmsingle
|
||||
// - confirm that metric names still searchable, data1 samples are not
|
||||
// searchable, and data2 samples are searcheable
|
||||
|
||||
opts.stopNewSUT()
|
||||
newSUT = opts.startNewSUT()
|
||||
assertSearchResults(newSUT, `{__name__=~".*"}`, start1, end2, "1d", want2)
|
||||
opts.stopNewSUT()
|
||||
}
|
||||
|
||||
type testLegacyBackupRestoreOpts struct {
|
||||
startLegacySUT func() at.PrometheusWriteQuerier
|
||||
startNewSUT func() at.PrometheusWriteQuerier
|
||||
stopLegacySUT func()
|
||||
stopNewSUT func()
|
||||
storageDataPaths []string
|
||||
snapshotCreateURLs func(at.PrometheusWriteQuerier) []string
|
||||
}
|
||||
|
||||
func TestLegacySingleBackupRestore(t *testing.T) {
|
||||
tc := at.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
storageDataPath := filepath.Join(tc.Dir(), "vmsingle")
|
||||
|
||||
opts := testLegacyBackupRestoreOpts{
|
||||
startLegacySUT: func() at.PrometheusWriteQuerier {
|
||||
return tc.MustStartVmsingleAt("vmsingle-legacy", legacyVmsinglePath, []string{
|
||||
"-storageDataPath=" + storageDataPath,
|
||||
"-retentionPeriod=100y",
|
||||
"-search.disableCache=true",
|
||||
"-search.maxStalenessInterval=1m",
|
||||
})
|
||||
},
|
||||
startNewSUT: func() at.PrometheusWriteQuerier {
|
||||
return tc.MustStartVmsingle("vmsingle-new", []string{
|
||||
"-storageDataPath=" + storageDataPath,
|
||||
"-retentionPeriod=100y",
|
||||
"-search.disableCache=true",
|
||||
"-search.maxStalenessInterval=1m",
|
||||
})
|
||||
},
|
||||
stopLegacySUT: func() {
|
||||
tc.StopApp("vmsingle-legacy")
|
||||
},
|
||||
stopNewSUT: func() {
|
||||
tc.StopApp("vmsingle-new")
|
||||
},
|
||||
storageDataPaths: []string{
|
||||
storageDataPath,
|
||||
},
|
||||
snapshotCreateURLs: func(sut at.PrometheusWriteQuerier) []string {
|
||||
return []string{
|
||||
sut.(*at.Vmsingle).SnapshotCreateURL(),
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
testLegacyBackupRestore(tc, opts)
|
||||
}
|
||||
|
||||
func TestLegacyClusterBackupRestore(t *testing.T) {
|
||||
tc := at.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
storage1DataPath := filepath.Join(tc.Dir(), "vmstorage1")
|
||||
storage2DataPath := filepath.Join(tc.Dir(), "vmstorage2")
|
||||
|
||||
opts := testLegacyBackupRestoreOpts{
|
||||
startLegacySUT: func() at.PrometheusWriteQuerier {
|
||||
return tc.MustStartCluster(&at.ClusterOptions{
|
||||
Vmstorage1Instance: "vmstorage1-legacy",
|
||||
Vmstorage1Binary: legacyVmstoragePath,
|
||||
Vmstorage1Flags: []string{
|
||||
"-storageDataPath=" + storage1DataPath,
|
||||
"-retentionPeriod=100y",
|
||||
},
|
||||
Vmstorage2Instance: "vmstorage2-legacy",
|
||||
Vmstorage2Binary: legacyVmstoragePath,
|
||||
Vmstorage2Flags: []string{
|
||||
"-storageDataPath=" + storage2DataPath,
|
||||
"-retentionPeriod=100y",
|
||||
},
|
||||
VminsertInstance: "vminsert",
|
||||
VminsertFlags: []string{},
|
||||
VmselectInstance: "vmselect",
|
||||
VmselectFlags: []string{
|
||||
"-search.disableCache=true",
|
||||
"-search.maxStalenessInterval=1m",
|
||||
},
|
||||
})
|
||||
},
|
||||
startNewSUT: func() at.PrometheusWriteQuerier {
|
||||
return tc.MustStartCluster(&at.ClusterOptions{
|
||||
Vmstorage1Instance: "vmstorage1-new",
|
||||
Vmstorage1Flags: []string{
|
||||
"-storageDataPath=" + storage1DataPath,
|
||||
"-retentionPeriod=100y",
|
||||
},
|
||||
Vmstorage2Instance: "vmstorage2-new",
|
||||
Vmstorage2Flags: []string{
|
||||
"-storageDataPath=" + storage2DataPath,
|
||||
"-retentionPeriod=100y",
|
||||
},
|
||||
VminsertInstance: "vminsert",
|
||||
VmselectInstance: "vmselect",
|
||||
VmselectFlags: []string{
|
||||
"-search.disableCache=true",
|
||||
"-search.maxStalenessInterval=1m",
|
||||
},
|
||||
})
|
||||
},
|
||||
stopLegacySUT: func() {
|
||||
tc.StopApp("vminsert")
|
||||
tc.StopApp("vmselect")
|
||||
tc.StopApp("vmstorage1-legacy")
|
||||
tc.StopApp("vmstorage2-legacy")
|
||||
},
|
||||
stopNewSUT: func() {
|
||||
tc.StopApp("vminsert")
|
||||
tc.StopApp("vmselect")
|
||||
tc.StopApp("vmstorage1-new")
|
||||
tc.StopApp("vmstorage2-new")
|
||||
},
|
||||
storageDataPaths: []string{
|
||||
storage1DataPath,
|
||||
storage2DataPath,
|
||||
},
|
||||
snapshotCreateURLs: func(sut at.PrometheusWriteQuerier) []string {
|
||||
c := sut.(*at.Vmcluster)
|
||||
return []string{
|
||||
c.Vmstorages[0].SnapshotCreateURL(),
|
||||
c.Vmstorages[1].SnapshotCreateURL(),
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
testLegacyBackupRestore(tc, opts)
|
||||
}
|
||||
|
||||
func testLegacyBackupRestore(tc *at.TestCase, opts testLegacyBackupRestoreOpts) {
|
||||
t := tc.T()
|
||||
|
||||
const msecPerMinute = 60 * 1000
|
||||
// Use the same number of metrics and time range for all the data ingestions
|
||||
// below.
|
||||
const numMetrics = 1000
|
||||
start := time.Date(2025, 3, 1, 10, 0, 0, 0, time.UTC).Add(-numMetrics * time.Minute).UnixMilli()
|
||||
end := time.Date(2025, 3, 1, 10, 0, 0, 0, time.UTC).UnixMilli()
|
||||
genData := func(prefix string) (recs []string, wantSeries []map[string]string, wantQueryResults []*at.QueryResult) {
|
||||
recs = make([]string, numMetrics)
|
||||
wantSeries = make([]map[string]string, numMetrics)
|
||||
wantQueryResults = make([]*at.QueryResult, numMetrics)
|
||||
for i := range numMetrics {
|
||||
name := fmt.Sprintf("%s_%03d", prefix, i)
|
||||
value := float64(i)
|
||||
timestamp := start + int64(i)*msecPerMinute
|
||||
|
||||
recs[i] = fmt.Sprintf("%s %f %d", name, value, timestamp)
|
||||
wantSeries[i] = map[string]string{"__name__": name}
|
||||
wantQueryResults[i] = &at.QueryResult{
|
||||
Metric: map[string]string{"__name__": name},
|
||||
Samples: []*at.Sample{{Timestamp: timestamp, Value: value}},
|
||||
}
|
||||
}
|
||||
return recs, wantSeries, wantQueryResults
|
||||
}
|
||||
|
||||
backupBaseDir, err := filepath.Abs(filepath.Join(tc.Dir(), "backups"))
|
||||
if err != nil {
|
||||
t.Fatalf("could not get absolute path for the backup base dir")
|
||||
}
|
||||
|
||||
// assertSeries issues various queries to the app and compares the query
|
||||
// results with the expected ones.
|
||||
assertQueries := func(app at.PrometheusQuerier, query string, wantSeries []map[string]string, wantQueryResults []*at.QueryResult) {
|
||||
t.Helper()
|
||||
tc.Assert(&at.AssertOptions{
|
||||
Msg: "unexpected /api/v1/series response",
|
||||
Got: func() any {
|
||||
return app.PrometheusAPIV1Series(t, query, at.QueryOpts{
|
||||
Start: fmt.Sprintf("%d", start),
|
||||
End: fmt.Sprintf("%d", end),
|
||||
}).Sort()
|
||||
},
|
||||
Want: &at.PrometheusAPIV1SeriesResponse{
|
||||
Status: "success",
|
||||
Data: wantSeries,
|
||||
},
|
||||
FailNow: true,
|
||||
})
|
||||
|
||||
tc.Assert(&at.AssertOptions{
|
||||
Msg: "unexpected /api/v1/query_range response",
|
||||
Got: func() any {
|
||||
return app.PrometheusAPIV1QueryRange(t, query, at.QueryOpts{
|
||||
Start: fmt.Sprintf("%d", start),
|
||||
End: fmt.Sprintf("%d", end),
|
||||
Step: "60s",
|
||||
})
|
||||
},
|
||||
Want: &at.PrometheusAPIV1QueryResponse{
|
||||
Status: "success",
|
||||
Data: &at.QueryData{
|
||||
ResultType: "matrix",
|
||||
Result: wantQueryResults,
|
||||
},
|
||||
},
|
||||
Retries: 300,
|
||||
FailNow: true,
|
||||
})
|
||||
}
|
||||
|
||||
createBackup := func(sut at.PrometheusWriteQuerier, name string) {
|
||||
t.Helper()
|
||||
for i, storageDataPath := range opts.storageDataPaths {
|
||||
replica := fmt.Sprintf("replica-%d", i)
|
||||
instance := fmt.Sprintf("vmbackup-%s-%s", name, replica)
|
||||
snapshotCreateURL := opts.snapshotCreateURLs(sut)[i]
|
||||
backupPath := "fs://" + filepath.Join(backupBaseDir, name, replica)
|
||||
tc.MustStartVmbackup(instance, storageDataPath, snapshotCreateURL, backupPath)
|
||||
}
|
||||
}
|
||||
|
||||
restoreFromBackup := func(name string) {
|
||||
t.Helper()
|
||||
for i, storageDataPath := range opts.storageDataPaths {
|
||||
replica := fmt.Sprintf("replica-%d", i)
|
||||
instance := fmt.Sprintf("vmrestore-%s-%s", name, replica)
|
||||
backupPath := "fs://" + filepath.Join(backupBaseDir, name, replica)
|
||||
tc.MustStartVmrestore(instance, backupPath, storageDataPath)
|
||||
}
|
||||
}
|
||||
|
||||
legacy1Data, wantLegacy1Series, wantLegacy1QueryResults := genData("legacy1")
|
||||
legacy2Data, wantLegacy2Series, wantLegacy2QueryResults := genData("legacy2")
|
||||
new1Data, wantNew1Series, wantNew1QueryResults := genData("new1")
|
||||
new2Data, wantNew2Series, wantNew2QueryResults := genData("new2")
|
||||
wantLegacy12Series := slices.Concat(wantLegacy1Series, wantLegacy2Series)
|
||||
wantLegacy12QueryResults := slices.Concat(wantLegacy1QueryResults, wantLegacy2QueryResults)
|
||||
wantLegacy1New1Series := slices.Concat(wantLegacy1Series, wantNew1Series)
|
||||
wantLegacy1New1QueryResults := slices.Concat(wantLegacy1QueryResults, wantNew1QueryResults)
|
||||
wantLegacy1New12Series := slices.Concat(wantLegacy1New1Series, wantNew2Series)
|
||||
wantLegacy1New12QueryResults := slices.Concat(wantLegacy1New1QueryResults, wantNew2QueryResults)
|
||||
var legacySUT, newSUT at.PrometheusWriteQuerier
|
||||
|
||||
// Verify backup/restore with legacy SUT.
|
||||
|
||||
// Start legacy SUT with empty storage data dir.
|
||||
legacySUT = opts.startLegacySUT()
|
||||
|
||||
// Ingest legacy1 records, ensure the queries return legacy1, and create
|
||||
// legacy1 backup.
|
||||
legacySUT.PrometheusAPIV1ImportPrometheus(t, legacy1Data, at.QueryOpts{})
|
||||
legacySUT.ForceFlush(t)
|
||||
assertQueries(legacySUT, `{__name__=~".*"}`, wantLegacy1Series, wantLegacy1QueryResults)
|
||||
createBackup(legacySUT, "legacy1")
|
||||
|
||||
// Ingest legacy2 records, ensure the queries return legacy1+legacy2, and
|
||||
// create legacy1+legacy2 backup.
|
||||
legacySUT.PrometheusAPIV1ImportPrometheus(t, legacy2Data, at.QueryOpts{})
|
||||
legacySUT.ForceFlush(t)
|
||||
assertQueries(legacySUT, `{__name__=~"legacy.*"}`, wantLegacy12Series, wantLegacy12QueryResults)
|
||||
createBackup(legacySUT, "legacy12")
|
||||
|
||||
// Stop legacy SUT and restore legacy1 data.
|
||||
// Start legacy SUT and ensure the queries return legacy1.
|
||||
opts.stopLegacySUT()
|
||||
restoreFromBackup("legacy1")
|
||||
legacySUT = opts.startLegacySUT()
|
||||
assertQueries(legacySUT, `{__name__=~".*"}`, wantLegacy1Series, wantLegacy1QueryResults)
|
||||
|
||||
opts.stopLegacySUT()
|
||||
|
||||
// Verify backup/restore with new SUT.
|
||||
|
||||
// Start new SUT (with partition indexDBs) with storage containing legacy1
|
||||
// data and Ensure that queries return legacy1 data.
|
||||
newSUT = opts.startNewSUT()
|
||||
assertQueries(newSUT, `{__name__=~".*"}`, wantLegacy1Series, wantLegacy1QueryResults)
|
||||
|
||||
// Ingest new1 records, ensure that queries now return legacy1+new1, and
|
||||
// create the legacy1+new1 backup.
|
||||
newSUT.PrometheusAPIV1ImportPrometheus(t, new1Data, at.QueryOpts{})
|
||||
newSUT.ForceFlush(t)
|
||||
assertQueries(newSUT, `{__name__=~"(legacy|new).*"}`, wantLegacy1New1Series, wantLegacy1New1QueryResults)
|
||||
createBackup(newSUT, "legacy1-new1")
|
||||
|
||||
// Ingest new2 records, ensure that queries now return legacy1+new1+new2,
|
||||
// and create the legacy1+new1+new2 backup.
|
||||
newSUT.PrometheusAPIV1ImportPrometheus(t, new2Data, at.QueryOpts{})
|
||||
newSUT.ForceFlush(t)
|
||||
assertQueries(newSUT, `{__name__=~"(legacy|new1|new2).*"}`, wantLegacy1New12Series, wantLegacy1New12QueryResults)
|
||||
createBackup(newSUT, "legacy1-new12")
|
||||
|
||||
// Stop new SUT and restore legacy1+new1 data.
|
||||
// Start new SUT and ensure queries return legacy1+new1 data.
|
||||
opts.stopNewSUT()
|
||||
restoreFromBackup("legacy1-new1")
|
||||
newSUT = opts.startNewSUT()
|
||||
assertQueries(newSUT, `{__name__=~".*"}`, wantLegacy1New1Series, wantLegacy1New1QueryResults)
|
||||
|
||||
opts.stopNewSUT()
|
||||
|
||||
// Verify backup/restore with legacy SUT again.
|
||||
|
||||
// Start legacy SUT with storage containing legacy1+new1 data.
|
||||
//
|
||||
// Ensure that the /series and /query_range queries return legacy1 data only.
|
||||
// new1 data is not returned because legacy vmsingle does not know about
|
||||
// partition indexDBs.
|
||||
legacySUT = opts.startLegacySUT()
|
||||
assertQueries(legacySUT, `{__name__=~".*"}`, wantLegacy1Series, wantLegacy1QueryResults)
|
||||
|
||||
// Stop legacy SUT and restore legacy1+legacy2 data.
|
||||
// Start legacy SUT and ensure that queries now return legacy1+legacy2 data.
|
||||
opts.stopLegacySUT()
|
||||
restoreFromBackup("legacy12")
|
||||
legacySUT = opts.startLegacySUT()
|
||||
assertQueries(legacySUT, `{__name__=~".*"}`, wantLegacy12Series, wantLegacy12QueryResults)
|
||||
|
||||
opts.stopLegacySUT()
|
||||
|
||||
// Verify backup/restore with new vmsingle again.
|
||||
|
||||
// Start new vmsingle with storage containing legacy1+legacy2 data and
|
||||
// ensure that queries return legacy1+legacy2 data.
|
||||
newSUT = opts.startNewSUT()
|
||||
assertQueries(newSUT, `{__name__=~".*"}`, wantLegacy12Series, wantLegacy12QueryResults)
|
||||
|
||||
// Stop new SUT and restore legacy1+new1+new2 data.
|
||||
// Start new SUT and ensure that queries return legacy1+new1+new2 data.
|
||||
opts.stopNewSUT()
|
||||
restoreFromBackup("legacy1-new12")
|
||||
newSUT = opts.startNewSUT()
|
||||
assertQueries(newSUT, `{__name__=~"(legacy|new).*"}`, wantLegacy1New12Series, wantLegacy1New12QueryResults)
|
||||
|
||||
opts.stopNewSUT()
|
||||
}
|
||||
|
||||
type testLegacyDowngradeOpts struct {
|
||||
startLegacySUT func() at.PrometheusWriteQuerier
|
||||
startNewSUT func() at.PrometheusWriteQuerier
|
||||
stopLegacySUT func()
|
||||
stopNewSUT func()
|
||||
}
|
||||
|
||||
func TestLegacySingleDowngrade(t *testing.T) {
|
||||
tc := at.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
storageDataPath := filepath.Join(tc.Dir(), "vmsingle")
|
||||
|
||||
opts := testLegacyDowngradeOpts{
|
||||
startLegacySUT: func() at.PrometheusWriteQuerier {
|
||||
return tc.MustStartVmsingleAt("vmsingle-legacy", legacyVmsinglePath, []string{
|
||||
"-storageDataPath=" + storageDataPath,
|
||||
"-retentionPeriod=100y",
|
||||
"-search.disableCache=true",
|
||||
"-search.maxStalenessInterval=1m",
|
||||
})
|
||||
},
|
||||
startNewSUT: func() at.PrometheusWriteQuerier {
|
||||
return tc.MustStartVmsingle("vmsingle-new", []string{
|
||||
"-storageDataPath=" + storageDataPath,
|
||||
"-retentionPeriod=100y",
|
||||
"-search.disableCache=true",
|
||||
"-search.maxStalenessInterval=1m",
|
||||
})
|
||||
},
|
||||
stopLegacySUT: func() {
|
||||
tc.StopApp("vmsingle-legacy")
|
||||
},
|
||||
stopNewSUT: func() {
|
||||
tc.StopApp("vmsingle-new")
|
||||
},
|
||||
}
|
||||
|
||||
testLegacyDowngrade(tc, opts)
|
||||
}
|
||||
|
||||
func TestLegacyClusterDowngrade(t *testing.T) {
|
||||
tc := at.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
storage1DataPath := filepath.Join(tc.Dir(), "vmstorage1")
|
||||
storage2DataPath := filepath.Join(tc.Dir(), "vmstorage2")
|
||||
|
||||
opts := testLegacyDowngradeOpts{
|
||||
startLegacySUT: func() at.PrometheusWriteQuerier {
|
||||
return tc.MustStartCluster(&at.ClusterOptions{
|
||||
Vmstorage1Instance: "vmstorage1-legacy",
|
||||
Vmstorage1Binary: legacyVmstoragePath,
|
||||
Vmstorage1Flags: []string{
|
||||
"-storageDataPath=" + storage1DataPath,
|
||||
"-retentionPeriod=100y",
|
||||
},
|
||||
Vmstorage2Instance: "vmstorage2-legacy",
|
||||
Vmstorage2Binary: legacyVmstoragePath,
|
||||
Vmstorage2Flags: []string{
|
||||
"-storageDataPath=" + storage2DataPath,
|
||||
"-retentionPeriod=100y",
|
||||
},
|
||||
VminsertInstance: "vminsert",
|
||||
VminsertFlags: []string{},
|
||||
VmselectInstance: "vmselect",
|
||||
VmselectFlags: []string{
|
||||
"-search.disableCache=true",
|
||||
"-search.maxStalenessInterval=1m",
|
||||
},
|
||||
})
|
||||
},
|
||||
startNewSUT: func() at.PrometheusWriteQuerier {
|
||||
return tc.MustStartCluster(&at.ClusterOptions{
|
||||
Vmstorage1Instance: "vmstorage1-new",
|
||||
Vmstorage1Flags: []string{
|
||||
"-storageDataPath=" + storage1DataPath,
|
||||
"-retentionPeriod=100y",
|
||||
},
|
||||
Vmstorage2Instance: "vmstorage2-new",
|
||||
Vmstorage2Flags: []string{
|
||||
"-storageDataPath=" + storage2DataPath,
|
||||
"-retentionPeriod=100y",
|
||||
},
|
||||
VminsertInstance: "vminsert",
|
||||
VminsertFlags: []string{},
|
||||
VmselectInstance: "vmselect",
|
||||
VmselectFlags: []string{
|
||||
"-search.disableCache=true",
|
||||
"-search.maxStalenessInterval=1m",
|
||||
},
|
||||
})
|
||||
},
|
||||
stopLegacySUT: func() {
|
||||
tc.StopApp("vminsert")
|
||||
tc.StopApp("vmselect")
|
||||
tc.StopApp("vmstorage1-legacy")
|
||||
tc.StopApp("vmstorage2-legacy")
|
||||
},
|
||||
stopNewSUT: func() {
|
||||
tc.StopApp("vminsert")
|
||||
tc.StopApp("vmselect")
|
||||
tc.StopApp("vmstorage1-new")
|
||||
tc.StopApp("vmstorage2-new")
|
||||
},
|
||||
}
|
||||
|
||||
testLegacyDowngrade(tc, opts)
|
||||
}
|
||||
|
||||
func testLegacyDowngrade(tc *at.TestCase, opts testLegacyDowngradeOpts) {
|
||||
t := tc.T()
|
||||
|
||||
type want struct {
|
||||
series []map[string]string
|
||||
labels []string
|
||||
labelValues []string
|
||||
queryResults []*at.QueryResult
|
||||
queryRangeResults []*at.QueryResult
|
||||
}
|
||||
|
||||
uniq := func(s []string) []string {
|
||||
slices.Sort(s)
|
||||
return slices.Compact(s)
|
||||
}
|
||||
|
||||
mergeWant := func(want1, want2 want) want {
|
||||
var result want
|
||||
result.series = slices.Concat(want1.series, want2.series)
|
||||
result.labels = uniq(slices.Concat(want1.labels, want2.labels))
|
||||
result.labelValues = slices.Concat(want1.labelValues, want2.labelValues)
|
||||
result.queryResults = slices.Concat(want1.queryResults, want2.queryResults)
|
||||
result.queryRangeResults = slices.Concat(want1.queryRangeResults, want2.queryRangeResults)
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// Use the same number of metrics and time range for all the data batches below.
|
||||
const numMetrics = 1000
|
||||
const labelName = "prefix"
|
||||
start := time.Date(2025, 3, 1, 10, 0, 0, 0, time.UTC).UnixMilli()
|
||||
end := start
|
||||
genData := func(prefix string) (recs []string, want want) {
|
||||
labelValue := prefix
|
||||
recs = make([]string, numMetrics)
|
||||
want.series = make([]map[string]string, numMetrics)
|
||||
want.labels = []string{"__name__", labelName}
|
||||
want.labelValues = []string{labelValue}
|
||||
want.queryResults = make([]*at.QueryResult, numMetrics)
|
||||
want.queryRangeResults = make([]*at.QueryResult, numMetrics)
|
||||
for i := range numMetrics {
|
||||
name := fmt.Sprintf("%s_%03d", prefix, i)
|
||||
value := float64(i)
|
||||
timestamp := start
|
||||
|
||||
recs[i] = fmt.Sprintf("%s{%s=\"%s\"} %f %d", name, labelName, labelValue, value, timestamp)
|
||||
want.series[i] = map[string]string{"__name__": name, labelName: labelValue}
|
||||
want.queryResults[i] = &at.QueryResult{
|
||||
Metric: map[string]string{"__name__": name, labelName: labelValue},
|
||||
Sample: &at.Sample{Timestamp: timestamp, Value: value},
|
||||
}
|
||||
want.queryRangeResults[i] = &at.QueryResult{
|
||||
Metric: map[string]string{"__name__": name, labelName: labelValue},
|
||||
Samples: []*at.Sample{{Timestamp: timestamp, Value: value}},
|
||||
}
|
||||
}
|
||||
return recs, want
|
||||
}
|
||||
|
||||
// assertSeries issues various queries to the app and compares the query
|
||||
// results with the expected ones.
|
||||
assertQueries := func(app at.PrometheusQuerier, query string, want want, wantSeriesCount uint64) {
|
||||
t.Helper()
|
||||
tc.Assert(&at.AssertOptions{
|
||||
Msg: "unexpected /api/v1/series response",
|
||||
Got: func() any {
|
||||
return app.PrometheusAPIV1Series(t, query, at.QueryOpts{
|
||||
Start: fmt.Sprintf("%d", start),
|
||||
End: fmt.Sprintf("%d", end),
|
||||
}).Sort()
|
||||
},
|
||||
Want: &at.PrometheusAPIV1SeriesResponse{
|
||||
Status: "success",
|
||||
Data: want.series,
|
||||
},
|
||||
FailNow: true,
|
||||
})
|
||||
|
||||
tc.Assert(&at.AssertOptions{
|
||||
Msg: "unexpected /api/v1/series/count response",
|
||||
Got: func() any {
|
||||
return app.PrometheusAPIV1SeriesCount(t, at.QueryOpts{
|
||||
Start: fmt.Sprintf("%d", start),
|
||||
End: fmt.Sprintf("%d", end),
|
||||
})
|
||||
},
|
||||
Want: &at.PrometheusAPIV1SeriesCountResponse{
|
||||
Status: "success",
|
||||
Data: []uint64{wantSeriesCount},
|
||||
},
|
||||
FailNow: true,
|
||||
})
|
||||
|
||||
tc.Assert(&at.AssertOptions{
|
||||
Msg: "unexpected /api/v1/labels response",
|
||||
Got: func() any {
|
||||
return app.PrometheusAPIV1Labels(t, query, at.QueryOpts{
|
||||
Start: fmt.Sprintf("%d", start),
|
||||
End: fmt.Sprintf("%d", end),
|
||||
})
|
||||
},
|
||||
Want: &at.PrometheusAPIV1LabelsResponse{
|
||||
Status: "success",
|
||||
Data: want.labels,
|
||||
},
|
||||
FailNow: true,
|
||||
})
|
||||
|
||||
tc.Assert(&at.AssertOptions{
|
||||
Msg: "unexpected /api/v1/label/../values response",
|
||||
Got: func() any {
|
||||
return app.PrometheusAPIV1LabelValues(t, labelName, query, at.QueryOpts{
|
||||
Start: fmt.Sprintf("%d", start),
|
||||
End: fmt.Sprintf("%d", end),
|
||||
})
|
||||
},
|
||||
Want: &at.PrometheusAPIV1LabelValuesResponse{
|
||||
Status: "success",
|
||||
Data: want.labelValues,
|
||||
},
|
||||
FailNow: true,
|
||||
})
|
||||
|
||||
tc.Assert(&at.AssertOptions{
|
||||
Msg: "unexpected /api/v1/query response",
|
||||
Got: func() any {
|
||||
return app.PrometheusAPIV1Query(t, query, at.QueryOpts{
|
||||
Time: fmt.Sprintf("%d", start),
|
||||
Step: "10m",
|
||||
})
|
||||
},
|
||||
Want: &at.PrometheusAPIV1QueryResponse{
|
||||
Status: "success",
|
||||
Data: &at.QueryData{
|
||||
ResultType: "vector",
|
||||
Result: want.queryResults,
|
||||
},
|
||||
},
|
||||
Retries: 300,
|
||||
FailNow: true,
|
||||
})
|
||||
|
||||
tc.Assert(&at.AssertOptions{
|
||||
Msg: "unexpected /api/v1/query_range response",
|
||||
Got: func() any {
|
||||
return app.PrometheusAPIV1QueryRange(t, query, at.QueryOpts{
|
||||
Start: fmt.Sprintf("%d", start),
|
||||
End: fmt.Sprintf("%d", end),
|
||||
Step: "60s",
|
||||
})
|
||||
},
|
||||
Want: &at.PrometheusAPIV1QueryResponse{
|
||||
Status: "success",
|
||||
Data: &at.QueryData{
|
||||
ResultType: "matrix",
|
||||
Result: want.queryRangeResults,
|
||||
},
|
||||
},
|
||||
Retries: 300,
|
||||
FailNow: true,
|
||||
})
|
||||
}
|
||||
|
||||
wantEmpty := want{
|
||||
series: []map[string]string{},
|
||||
labels: []string{"__name__"},
|
||||
labelValues: []string{},
|
||||
queryResults: []*at.QueryResult{},
|
||||
queryRangeResults: []*at.QueryResult{},
|
||||
}
|
||||
|
||||
legacy1Data, wantLegacy1 := genData("legacy1")
|
||||
legacy2Data, wantLegacy2 := genData("legacy2")
|
||||
new1Data, wantNew1 := genData("new1")
|
||||
wantLegacy1New1 := mergeWant(wantLegacy1, wantNew1)
|
||||
wantLegacy2New1 := mergeWant(wantLegacy2, wantNew1)
|
||||
var legacySUT, newSUT at.PrometheusWriteQuerier
|
||||
|
||||
// Start legacy SUT with empty storage data dir.
|
||||
// Ingest legacy1 records, ensure the queries return legacy1
|
||||
legacySUT = opts.startLegacySUT()
|
||||
legacySUT.PrometheusAPIV1ImportPrometheus(t, legacy1Data, at.QueryOpts{})
|
||||
legacySUT.ForceFlush(t)
|
||||
assertQueries(legacySUT, `{__name__=~".*"}`, wantLegacy1, numMetrics)
|
||||
opts.stopLegacySUT()
|
||||
|
||||
// Start new SUT (with partition indexDBs) with storage containing legacy1
|
||||
// data and ensure that queries return new1 and legacy1 data.
|
||||
newSUT = opts.startNewSUT()
|
||||
newSUT.PrometheusAPIV1ImportPrometheus(t, new1Data, at.QueryOpts{})
|
||||
newSUT.ForceFlush(t)
|
||||
assertQueries(newSUT, `{__name__=~".*"}`, wantLegacy1New1, 2*numMetrics)
|
||||
opts.stopNewSUT()
|
||||
|
||||
// Downgrade to legacy SUT, ensure the queries return only legacy1.
|
||||
// Delete all series, ensure that queries return no series.
|
||||
// Ingest legacy2 records, ensure the queries return only legacy2.
|
||||
legacySUT = opts.startLegacySUT()
|
||||
assertQueries(legacySUT, `{__name__=~".*"}`, wantLegacy1, numMetrics)
|
||||
legacySUT.APIV1AdminTSDBDeleteSeries(t, `{__name__=~".*"}`, at.QueryOpts{})
|
||||
assertQueries(legacySUT, `{__name__=~".*"}`, wantEmpty, numMetrics)
|
||||
legacySUT.PrometheusAPIV1ImportPrometheus(t, legacy2Data, at.QueryOpts{})
|
||||
legacySUT.ForceFlush(t)
|
||||
// series count includes deleted metrics
|
||||
assertQueries(legacySUT, `{__name__=~".*"}`, wantLegacy2, 2*numMetrics)
|
||||
opts.stopLegacySUT()
|
||||
|
||||
// Upgrade to new SUT, ensure the queries return recently ingested legacy2 and new1
|
||||
// since legacy SUT cannot delete them.
|
||||
// Delete all series, ensure that queries return no series.
|
||||
newSUT = opts.startNewSUT()
|
||||
// series count includes deleted metrics
|
||||
assertQueries(newSUT, `{__name__=~".*"}`, wantLegacy2New1, 3*numMetrics)
|
||||
newSUT.APIV1AdminTSDBDeleteSeries(t, `{__name__=~".*"}`, at.QueryOpts{})
|
||||
// series count includes deleted metrics
|
||||
assertQueries(newSUT, `{__name__=~".*"}`, wantEmpty, 3*numMetrics)
|
||||
opts.stopNewSUT()
|
||||
}
|
||||
@@ -603,224 +603,13 @@
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 9
|
||||
"y": 8
|
||||
},
|
||||
"id": 13,
|
||||
"panels": [],
|
||||
"title": "Overview",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 10
|
||||
},
|
||||
"id": 11,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vmauth_user_requests_total{job=~\"$job\", instance=~\"$instance\", username=~\"$user\"}[$__rate_interval])) by(username)",
|
||||
"hide": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vmauth_unauthorized_user_requests_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
|
||||
"hide": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Requests rate",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows percent utilization of per concurrent requests capacity.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "bars",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "dashed"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"max": 1,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 0.9
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percentunit"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 10
|
||||
},
|
||||
"id": 14,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"max",
|
||||
"mean"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true,
|
||||
"sortBy": "Mean",
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "max(\nmax_over_time(vmauth_user_concurrent_requests_current{job=~\"$job\", instance=~\"$instance\", username=~\"$user\"}[$__rate_interval])\n/ \nvmauth_user_concurrent_requests_capacity{job=~\"$job\", instance=~\"$instance\", username=~\"$user\"}\n) by(username) > 0\n",
|
||||
"hide": false,
|
||||
"interval": "5m",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "User concurrent requests usage",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
@@ -839,6 +628,7 @@
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
@@ -855,6 +645,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -870,7 +661,7 @@
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -885,7 +676,7 @@
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 19
|
||||
"y": 9
|
||||
},
|
||||
"id": 16,
|
||||
"options": {
|
||||
@@ -896,10 +687,12 @@
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -935,6 +728,7 @@
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "bars",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
@@ -951,6 +745,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -966,7 +761,7 @@
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -981,7 +776,7 @@
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 19
|
||||
"y": 9
|
||||
},
|
||||
"id": 10,
|
||||
"options": {
|
||||
@@ -992,10 +787,12 @@
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -1042,7 +839,6 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows duration in seconds of user requests by quantile.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -1055,6 +851,7 @@
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
@@ -1071,6 +868,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -1086,7 +884,338 @@
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 18
|
||||
},
|
||||
"id": 11,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vmauth_user_requests_total{job=~\"$job\", instance=~\"$instance\", username=~\"$user\"}[$__rate_interval])) by(username)",
|
||||
"hide": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vmauth_unauthorized_user_requests_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
|
||||
"hide": false,
|
||||
"legendFormat": "unauthorized_user",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "User requests rate",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 18
|
||||
},
|
||||
"id": 37,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vmauth_user_request_errors_total{job=~\"$job\", instance=~\"$instance\", username=~\"$user\"}[$__rate_interval])) by (username) > 0",
|
||||
"hide": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vmauth_unauthorized_user_request_errors_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) > 0",
|
||||
"hide": false,
|
||||
"legendFormat": "unauthorized_user",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "User requests error rate",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows percent utilization of per concurrent requests capacity.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "bars",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "dashed"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"max": 1,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 0.9
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percentunit"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 27
|
||||
},
|
||||
"id": 14,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"max",
|
||||
"mean"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true,
|
||||
"sortBy": "Mean",
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "max(\nmax_over_time(vmauth_user_concurrent_requests_current{job=~\"$job\", instance=~\"$instance\", username=~\"$user\"}[$__rate_interval])\n/ \nvmauth_user_concurrent_requests_capacity{job=~\"$job\", instance=~\"$instance\", username=~\"$user\"}\n) by(username) > 0\n",
|
||||
"hide": false,
|
||||
"interval": "5m",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "User concurrent requests usage",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows duration in seconds of user requests by quantile.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -1101,7 +1230,7 @@
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"x": 12,
|
||||
"y": 27
|
||||
},
|
||||
"id": 19,
|
||||
@@ -1118,10 +1247,12 @@
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
||||
@@ -3,7 +3,7 @@ services:
|
||||
# It scrapes targets defined in --promscrape.config
|
||||
# And forward them to --remoteWrite.url
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.131.0
|
||||
image: victoriametrics/vmagent:v1.132.0
|
||||
depends_on:
|
||||
- "vmauth"
|
||||
ports:
|
||||
@@ -37,14 +37,14 @@ services:
|
||||
# vmstorage shards. Each shard receives 1/N of all metrics sent to vminserts,
|
||||
# where N is number of vmstorages (2 in this case).
|
||||
vmstorage-1:
|
||||
image: victoriametrics/vmstorage:v1.131.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.132.0-cluster
|
||||
volumes:
|
||||
- strgdata-1:/storage
|
||||
command:
|
||||
- "--storageDataPath=/storage"
|
||||
restart: always
|
||||
vmstorage-2:
|
||||
image: victoriametrics/vmstorage:v1.131.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.132.0-cluster
|
||||
volumes:
|
||||
- strgdata-2:/storage
|
||||
command:
|
||||
@@ -54,7 +54,7 @@ services:
|
||||
# vminsert is ingestion frontend. It receives metrics pushed by vmagent,
|
||||
# pre-process them and distributes across configured vmstorage shards.
|
||||
vminsert-1:
|
||||
image: victoriametrics/vminsert:v1.131.0-cluster
|
||||
image: victoriametrics/vminsert:v1.132.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -63,7 +63,7 @@ services:
|
||||
- "--storageNode=vmstorage-2:8400"
|
||||
restart: always
|
||||
vminsert-2:
|
||||
image: victoriametrics/vminsert:v1.131.0-cluster
|
||||
image: victoriametrics/vminsert:v1.132.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -75,7 +75,7 @@ services:
|
||||
# vmselect is a query fronted. It serves read queries in MetricsQL or PromQL.
|
||||
# vmselect collects results from configured `--storageNode` shards.
|
||||
vmselect-1:
|
||||
image: victoriametrics/vmselect:v1.131.0-cluster
|
||||
image: victoriametrics/vmselect:v1.132.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -85,7 +85,7 @@ services:
|
||||
- "--vmalert.proxyURL=http://vmalert:8880"
|
||||
restart: always
|
||||
vmselect-2:
|
||||
image: victoriametrics/vmselect:v1.131.0-cluster
|
||||
image: victoriametrics/vmselect:v1.132.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -100,7 +100,7 @@ services:
|
||||
# read requests from Grafana, vmui, vmalert among vmselects.
|
||||
# It can be used as an authentication proxy.
|
||||
vmauth:
|
||||
image: victoriametrics/vmauth:v1.131.0
|
||||
image: victoriametrics/vmauth:v1.132.0
|
||||
depends_on:
|
||||
- "vmselect-1"
|
||||
- "vmselect-2"
|
||||
@@ -114,7 +114,7 @@ services:
|
||||
|
||||
# vmalert executes alerting and recording rules
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.131.0
|
||||
image: victoriametrics/vmalert:v1.132.0
|
||||
depends_on:
|
||||
- "vmauth"
|
||||
ports:
|
||||
|
||||
@@ -3,7 +3,7 @@ services:
|
||||
# It scrapes targets defined in --promscrape.config
|
||||
# And forward them to --remoteWrite.url
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.131.0
|
||||
image: victoriametrics/vmagent:v1.132.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -18,7 +18,7 @@ services:
|
||||
# VictoriaMetrics instance, a single process responsible for
|
||||
# storing metrics and serve read requests.
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.131.0
|
||||
image: victoriametrics/victoria-metrics:v1.132.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
- 8089:8089
|
||||
@@ -54,7 +54,7 @@ services:
|
||||
|
||||
# vmalert executes alerting and recording rules
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.131.0
|
||||
image: victoriametrics/vmalert:v1.132.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
- "alertmanager"
|
||||
|
||||
@@ -11,6 +11,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/nbuo5Mr4k?viewPanel=10&var-instance={{ $labels.instance }}"
|
||||
summary: "vmauth ({{ $labels.instance }}) reached concurrent requests limit"
|
||||
description: "Possible solutions: increase the limit with flag: -maxConcurrentRequests,
|
||||
deploy additional vmauth replicas, check requests latency at backend service.
|
||||
@@ -21,7 +22,27 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/nbuo5Mr4k?viewPanel=10&var-instance={{ $labels.instance }}"
|
||||
summary: "vmauth has reached concurrent requests limit for username {{ $labels.username }}"
|
||||
description: "Possible solutions: increase limit with flag: -maxConcurrentPerUserRequests,
|
||||
deploy additional vmauth replicas, check requests latency at backend service."
|
||||
|
||||
- alert: UnauthorizedUserRequestErrors
|
||||
expr: increase(vmauth_unauthorized_user_request_errors_total[5m]) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/nbuo5Mr4k?viewPanel=37&var-instance={{ $labels.instance }}"
|
||||
summary: "Too many errors served for unauthorized user (instance {{ $labels.instance }})"
|
||||
description: "Requests from unauthorized user are receiving errors.
|
||||
Please check the vmauth logs to verify that the configuration is correct and clients are sending valid requests."
|
||||
- alert: UserRequestErrors
|
||||
expr: increase(vmauth_user_request_errors_total[5m]) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/nbuo5Mr4k?viewPanel=37&var-instance={{ $labels.instance }}"
|
||||
summary: "Too many errors served for user {{ $labels.username }} (instance {{ $labels.instance }})"
|
||||
description: "Requests from user {{ $labels.username }} are receiving errors.
|
||||
Please check the vmauth logs to verify that the configuration is correct and clients are sending valid requests."
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
services:
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.131.0
|
||||
image: victoriametrics/vmagent:v1.132.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -14,7 +14,7 @@ services:
|
||||
restart: always
|
||||
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.131.0
|
||||
image: victoriametrics/victoria-metrics:v1.132.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
volumes:
|
||||
@@ -40,7 +40,7 @@ services:
|
||||
restart: always
|
||||
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.131.0
|
||||
image: victoriametrics/vmalert:v1.132.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
|
||||
@@ -193,4 +193,17 @@ docs-update-flags:
|
||||
# remove after https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9680 implemented
|
||||
sed -i '/The maximum number of concurrent insert requests/ s/(default [0-9]\+)/(default 2*cgroup.AvailableCPUs())/' docs/victoriametrics/vmstorage_flags.md
|
||||
sed -i '/The maximum number of concurrent vmselect requests the vmstorage can process at./ s/(default [0-9]\+)/(default 2*cgroup.AvailableCPUs())/' docs/victoriametrics/vmstorage_flags.md
|
||||
sed -i '/The maximum number of concurrent goroutines to work with files;/ s/(default [0-9]\+)/(default fsutil.getDefaultConcurrency())/' docs/victoriametrics/vmstorage_flags.md
|
||||
sed -i '/The maximum number of concurrent goroutines to work with files;/ s/(default [0-9]\+)/(default fsutil.getDefaultConcurrency())/' docs/victoriametrics/vmstorage_flags.md
|
||||
|
||||
# ---- vmauth
|
||||
(cd /tmp/vm-enterprise-cluster && make vmauth)
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmauth_flags.md
|
||||
(cd /tmp/vm-enterprise-cluster && ./bin/vmauth -help 2>&1) >> docs/victoriametrics/vmauth_flags.md
|
||||
echo '```' >> docs/victoriametrics/vmauth_flags.md
|
||||
|
||||
# replace tabs in output with one space
|
||||
sed -i 's/\t/ /g' docs/victoriametrics/vmauth_flags.md
|
||||
|
||||
# adjust flags with dynamic default values
|
||||
# remove after https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9680 implemented
|
||||
sed -i '/The maximum number of concurrent goroutines to work with files;/ s/(default [0-9]\+)/(default fsutil.getDefaultConcurrency())/' docs/victoriametrics/vmauth_flags.md
|
||||
|
||||
@@ -10,9 +10,9 @@ sitemap:
|
||||
|
||||
- To use *vmanomaly*, part of the enterprise package, a license key is required. Obtain your key [here](https://victoriametrics.com/products/enterprise/trial/) for this tutorial or for enterprise use.
|
||||
- In the tutorial, we'll be using the following VictoriaMetrics components:
|
||||
- [VictoriaMetrics Single-Node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) (v1.131.0)
|
||||
- [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/) (v1.131.0)
|
||||
- [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) (v1.131.0)
|
||||
- [VictoriaMetrics Single-Node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) (v1.132.0)
|
||||
- [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/) (v1.132.0)
|
||||
- [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) (v1.132.0)
|
||||
- [Grafana](https://grafana.com/) (v.10.2.1)
|
||||
- [Docker](https://docs.docker.com/get-docker/) and [Docker Compose](https://docs.docker.com/compose/)
|
||||
- [Node exporter](https://github.com/prometheus/node_exporter#node-exporter) (v1.7.0) and [Alertmanager](https://prometheus.io/docs/alerting/latest/alertmanager/) (v0.27.0)
|
||||
@@ -323,7 +323,7 @@ Let's wrap it all up together into the `docker-compose.yml` file.
|
||||
services:
|
||||
vmagent:
|
||||
container_name: vmagent
|
||||
image: victoriametrics/vmagent:v1.131.0
|
||||
image: victoriametrics/vmagent:v1.132.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -340,7 +340,7 @@ services:
|
||||
|
||||
victoriametrics:
|
||||
container_name: victoriametrics
|
||||
image: victoriametrics/victoria-metrics:v1.131.0
|
||||
image: victoriametrics/victoria-metrics:v1.132.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
volumes:
|
||||
@@ -373,7 +373,7 @@ services:
|
||||
|
||||
vmalert:
|
||||
container_name: vmalert
|
||||
image: victoriametrics/vmalert:v1.131.0
|
||||
image: victoriametrics/vmalert:v1.132.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
|
||||
@@ -249,27 +249,27 @@ services:
|
||||
- grafana_data:/var/lib/grafana/
|
||||
|
||||
vmsingle:
|
||||
image: victoriametrics/victoria-metrics:v1.131.0
|
||||
image: victoriametrics/victoria-metrics:v1.132.0
|
||||
command:
|
||||
- -httpListenAddr=0.0.0.0:8429
|
||||
|
||||
vmstorage:
|
||||
image: victoriametrics/vmstorage:v1.131.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.132.0-cluster
|
||||
|
||||
vminsert:
|
||||
image: victoriametrics/vminsert:v1.131.0-cluster
|
||||
image: victoriametrics/vminsert:v1.132.0-cluster
|
||||
command:
|
||||
- -storageNode=vmstorage:8400
|
||||
- -httpListenAddr=0.0.0.0:8480
|
||||
|
||||
vmselect:
|
||||
image: victoriametrics/vmselect:v1.131.0-cluster
|
||||
image: victoriametrics/vmselect:v1.132.0-cluster
|
||||
command:
|
||||
- -storageNode=vmstorage:8401
|
||||
- -httpListenAddr=0.0.0.0:8481
|
||||
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.131.0
|
||||
image: victoriametrics/vmagent:v1.132.0
|
||||
volumes:
|
||||
- ./scrape.yaml:/etc/vmagent/config.yaml
|
||||
command:
|
||||
@@ -278,7 +278,7 @@ services:
|
||||
- -remoteWrite.url=http://vmsingle:8429/api/v1/write
|
||||
|
||||
vmgateway-cluster:
|
||||
image: victoriametrics/vmgateway:v1.131.0-enterprise
|
||||
image: victoriametrics/vmgateway:v1.132.0-enterprise
|
||||
ports:
|
||||
- 8431:8431
|
||||
volumes:
|
||||
@@ -294,7 +294,7 @@ services:
|
||||
- -auth.oidcDiscoveryEndpoints=http://keycloak:8080/realms/master/.well-known/openid-configuration
|
||||
|
||||
vmgateway-single:
|
||||
image: victoriametrics/vmgateway:v1.131.0-enterprise
|
||||
image: victoriametrics/vmgateway:v1.132.0-enterprise
|
||||
ports:
|
||||
- 8432:8431
|
||||
volumes:
|
||||
@@ -405,7 +405,7 @@ Once iDP configuration is done, vmagent configuration needs to be updated to use
|
||||
|
||||
```yaml
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.131.0
|
||||
image: victoriametrics/vmagent:v1.132.0
|
||||
volumes:
|
||||
- ./scrape.yaml:/etc/vmagent/config.yaml
|
||||
- ./vmagent-client-secret:/etc/vmagent/oauth2-client-secret
|
||||
|
||||
@@ -105,6 +105,7 @@ See also [case studies](https://docs.victoriametrics.com/victoriametrics/casestu
|
||||
* [How We Eliminated $10K+/Year in AWS Cross-Zone Data Transfer Costs with Zone-Aware Kubernetes Monitoring](https://medium.com/@vijayrauniyar1818/how-we-eliminated-10k-year-in-aws-cross-zone-data-transfer-costs-with-zone-aware-kubernetes-09fff0c2435b)
|
||||
* [Why I Switched to VictoriaMetrics: Scaling from Small Business to Enterprise](https://blackmetalz.github.io/why-i-switched-to-victoriametrics-scaling-from-small-business-to-enterprise.html)
|
||||
* [Backing up VictoriaMetrics Data: A Complete Guide](https://medium.com/@kanakaraju896/backing-up-victoriametrics-data-a-complete-guide-24473c74450f)
|
||||
* [Unlocking the Power of VictoriaMetrics: A Prometheus Alternative](https://developer-friendly.blog/blog/2024/06/17/unlocking-the-power-of-victoriametrics-a-prometheus-alternative/)
|
||||
|
||||
## Third-party articles and slides about VictoriaLogs
|
||||
|
||||
|
||||
@@ -27,5 +27,5 @@ to [the latest available releases](https://docs.victoriametrics.com/victoriametr
|
||||
|
||||
## Currently supported LTS release lines
|
||||
|
||||
- v1.122.x - the latest one is [v1.122.10 LTS release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.122.10)
|
||||
- v1.110.x - the latest one is [v1.110.25 LTS release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.110.25)
|
||||
- v1.122.x - the latest one is [v1.122.11 LTS release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.122.11)
|
||||
- v1.110.x - the latest one is [v1.110.26 LTS release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.110.26)
|
||||
|
||||
@@ -58,9 +58,9 @@ Download the newest available [VictoriaMetrics release](https://docs.victoriamet
|
||||
from [DockerHub](https://hub.docker.com/r/victoriametrics/victoria-metrics) or [Quay](https://quay.io/repository/victoriametrics/victoria-metrics?tab=tags):
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/victoria-metrics:v1.131.0
|
||||
docker pull victoriametrics/victoria-metrics:v1.132.0
|
||||
docker run -it --rm -v `pwd`/victoria-metrics-data:/victoria-metrics-data -p 8428:8428 \
|
||||
victoriametrics/victoria-metrics:v1.131.0 --selfScrapeInterval=5s -storageDataPath=victoria-metrics-data
|
||||
victoriametrics/victoria-metrics:v1.132.0 --selfScrapeInterval=5s -storageDataPath=victoria-metrics-data
|
||||
```
|
||||
|
||||
_For Enterprise images see [this link](https://docs.victoriametrics.com/victoriametrics/enterprise/#docker-images)._
|
||||
|
||||
@@ -26,13 +26,35 @@ See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-rel
|
||||
|
||||
## tip
|
||||
|
||||
**Update Note 1:** [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): Upgrading to per-partition index requires registering all active time series. Expect slow down of data ingestion and queries during upgrade roll-out. This is a one-time operation. Additionally, for users with retention periods shorter than 1 month the disk usage may increase.
|
||||
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/) and [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/): add support for global `sampleLimit` setting. This allows users to efficiently limit the number of samples accepted per scrape target. This also ensures target-level `sample_limit` can correctly override the global setting. See [#10145](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10145). Thanks to @kobylyanskiy for the contribution.
|
||||
* FEATURE: [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/): expose `vmauth_user_request_backend_requests_total` and `vmauth_unauthorized_user_request_backend_requests_total` [metrics](https://docs.victoriametrics.com/victoriametrics/vmauth/#monitoring), which track the number of requests sent to backends. These counts may exceed `vmauth_user_requests_total` and `vmauth_unauthorized_user_requests_total` when requests are retried across multiple backends. See [#10171](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10171).
|
||||
* FEATURE: [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/): expose `vmauth_user_request_errors_total` and `vmauth_unauthorized_user_request_errors_total` [metrics](https://docs.victoriametrics.com/victoriametrics/vmauth/#monitoring), which track the number of user request errors. See [#10188](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10188).
|
||||
* FEATURE: [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/): add `-maxQueueDuration` command-line flag for waiting until the incoming request could be executed if `-maxConcurrentRequests` are already exceeded. This should help with graceful handling of a short spike in the number of concurrent requests without leading to a retry storm. See [#10078](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10078).
|
||||
* FEATURE: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): introduce per-partition index. This should reduce disk space occupied by indexDBs as they get deleted along with the corresponding partitions once those partitions become outside the retention window. Read more about the motivation behind this feature at [#7599](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7599) and what to expect at [#8134](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8134).
|
||||
|
||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) and [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/): properly add `exported_` prefix to scraped metrics that have the same names as [auto-generated metrics](https://docs.victoriametrics.com/victoriametrics/vmagent/#automatically-generated-metrics). Previously, some auto-generated metrics were not recognized, so scraped metrics with the same names could overwrite them. See [#10197](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10197). Thanks to @fxrlv for the contribution.
|
||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): fix `vmagent_rows_inserted_total{type="newrelic"}` metric to correctly count samples. See [#10191](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10191). Thanks to @fxrlv for the contribution.
|
||||
* BUGFIX: [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/): fix `vmauth_user_request_backend_errors_total` and `vmauth_unauthorized_user_request_backend_errors_total` to only reflect backend request errors. Previously, these counters could be overcounted with user request error. See [#10177](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10177).
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): rotate `dateMetricIDCache` instead of resetting it. This should make the eviction less aggressive. Since the cache does not have fixed max size anymore the `-storage.cacheSizeIndexDBDateMetricID` flag has been removed. See [#10064](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10053) and PR [#10169](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10169).
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/): properly add metrics metadata scraped with `promscrape.config` and `selfScrapeInterval`. See [#10175](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10175).
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fix stats collection for `indexdb/tagFiltersToMetricIDs`, `indexdb/metricID`, and `indexdb/date_metricID` caches. As per PR [#10131](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10131), the stats is collected for most utilized instance only, but if the size of all instances is 0 then the stats won't be collected at all. This may result in max cache size alternating between the actual value and 0. See [#10204](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10204).
|
||||
|
||||
## [v1.132.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.132.0)
|
||||
|
||||
Released at 2025-12-12
|
||||
|
||||
**Known issue: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/), [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vminsert` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): may leak memory when ingesting data via the [OpenTelemetry protocol](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#sending-data-via-opentelemetry).
|
||||
The problem introduced in [293d809](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/293d80910ce14c247e943c63cd19467df5767c3c), and is already fixed in commits [fastjson#18c81211](https://github.com/valyala/fastjson/commit/18c812114b638d460f0fc6d8e2b86b719e171389) and [19009836](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/19009836c704a75a295c11b5d55a171c206646bd).
|
||||
If you rely on OpenTelemetry ingestion, skip this version or [build from master](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-build-from-sources) to avoid the leak.
|
||||
Read [VictoriaLogs#869](https://github.com/VictoriaMetrics/VictoriaLogs/issues/869) for more details.**
|
||||
|
||||
**Known issue: `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): may shutdown ungracefully (data, indexes, and caches are not flushed to disk) in Kubernetes deployments if the number of `vminserts` is > 1 and `terminationGracePeriodSeconds` < 60s (30s by default). The issue was introduced in [9487](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/9487) and [10136](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10136) and was fixed in [10224](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10224). The fix will be included into `v1.133.0`. Even with this fix, ungraceful shutdowns are possible if flushing in-memory contents takes more than 5s. In this case decrease `-storage.vminsertConnsShutdownDuration` and/or increase [terminationGracePeriodSeconds](https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-termination-flow).**
|
||||
|
||||
* SECURITY: upgrade Go builder from Go1.25.4 to Go1.25.5. See [the list of issues addressed in Go1.25.5](https://github.com/golang/go/issues?q=milestone%3AGo1.25.5%20label%3ACherryPickApproved).
|
||||
|
||||
* FEATURE: [dashboards/operator](https://grafana.com/grafana/dashboards/17869-victoriametrics-operator/): add panels for flags and configuration parameters values. See [#1341-operator](https://github.com/VictoriaMetrics/operator/issues/1341).
|
||||
* FEATURE: [dashboards/operator](https://grafana.com/grafana/dashboards/17869-victoriametrics-operator/): add panels for flags and configuration parameters values. See [operator#1341](https://github.com/VictoriaMetrics/operator/issues/1341).
|
||||
* FEATURE: [dashboards/single](https://grafana.com/grafana/dashboards/10229), [dashboards/cluster](https://grafana.com/grafana/dashboards/11176): add `Memory usage breakdown` panels to `Drilldown` section. These panels help analyze overall memory distribution and diagnose anomalies or leaks. See [#10139](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10139).
|
||||
* FEATURE: [dashboards/single](https://grafana.com/grafana/dashboards/10229), [dashboards/cluster](https://grafana.com/grafana/dashboards/11176): add `Major page faults rate` panels to `Troubleshooting` and `Drilldown` sections. See [#9974](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9974)
|
||||
* FEATURE: [Influx line protocol data ingestion](https://docs.victoriametrics.com/victoriametrics/integrations/influxdb/): reduce CPU and memory usage when parsing Influx lines with escaped chars - `,`, `\\`, `=` and ` `. See [#10053](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10053).
|
||||
@@ -263,6 +285,22 @@ Released at 2025-08-01
|
||||
* BUGFIX: [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/): do not configure `-httpListenAddr.useProxyProtocol` for `-httpInternalListenAddr`. See this issue [#9515](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9515) for details.
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): always display the tenant selector if the list of tenants is not empty. See [#9396](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9396).
|
||||
|
||||
## [v1.122.11](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.122.11)
|
||||
|
||||
Released at 2025-12-12
|
||||
|
||||
**v1.122.x is a line of [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/). It contains important up-to-date bugfixes for [VictoriaMetrics enterprise](https://docs.victoriametrics.com/victoriametrics/enterprise/).
|
||||
All these fixes are also included in [the latest community release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
|
||||
The v1.122.x line will be supported for at least 12 months since [v1.122.0](https://docs.victoriametrics.com/victoriametrics/changelog/#v11220) release**
|
||||
|
||||
* BUGFIX: all VictoriaMetrics components: properly validate remaining system memory limit. Previously it could have negative values. See this issue [#10083](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10083) for details.
|
||||
* BUGFIX: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/): don't increase `vmalert_alerting_rules_errors_total`/`vmalert_recording_rules_errors_total` for request context cancellation, which may occur during graceful shutdown or group configuration update. See [#10128](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10128). Thanks to @tIGO for the contribution.
|
||||
* BUGFIX: [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/): verify backend network reachability with a TCP dial before marking it healthy. Previously, backends were auto-restored after `-failTimeout` even if the network was still unreachable, causing requests to hang repeatedly. `vmauth` now performs a 1s TCP dial check before returning a backend to the healthy pool. See [#9997](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9997).
|
||||
* BUGFIX: [vmctl](https://docs.victoriametrics.com/victoriametrics/vmctl/): properly handle process termination during prompt confirmation. Previously, termination signal was ignored and process was still waiting for user input. See[#10104](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10104).
|
||||
* BUGFIX: [vmgateway](https://docs.victoriametrics.com/victoriametrics/vmgateway/): properly recover from proxy requests errors. Previously, vmgateway may return empty response.
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fix potential datapoint lost in response when query at the last millisecond of the day. See issue [#9804](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9804) for details.
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): always add `/prometheus` suffix while generating backend URL. See [#10097](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10097).
|
||||
|
||||
## [v1.122.10](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.122.10)
|
||||
|
||||
Released at 2025-11-28
|
||||
@@ -768,6 +806,20 @@ Released at 2025-02-10
|
||||
* BUGFIX: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and [vmselect](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fix discrepancies when using `or` binary operator. See [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7759) and [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7640) issues for details.
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): properly update number of unique series for [cardinality limiter](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cardinality-limiter) on ingestion. Previously, limit could undercount the real number of the ingested unique series.
|
||||
|
||||
## [v1.110.26](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.110.26)
|
||||
|
||||
Released at 2025-12-12
|
||||
|
||||
**v1.110.x is a line of [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/). It contains important up-to-date bugfixes for [VictoriaMetrics enterprise](https://docs.victoriametrics.com/victoriametrics/enterprise/).
|
||||
All these fixes are also included in [the latest community release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
|
||||
The v1.110.x line will be supported for at least 12 months since [v1.110.0](https://docs.victoriametrics.com/victoriametrics/changelog/#v11100) release**
|
||||
|
||||
* BUGFIX: all VictoriaMetrics components: properly validate remaining system memory limit. Previously it could have negative values. See this issue [#10083](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10083) for details.
|
||||
* BUGFIX: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/): don't increase `vmalert_alerting_rules_errors_total`/`vmalert_recording_rules_errors_total` for request context cancellation, which may occur during graceful shutdown or group configuration update. See [#10128](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10128). Thanks to @tIGO for the contribution.
|
||||
* BUGFIX: [vmctl](https://docs.victoriametrics.com/victoriametrics/vmctl/): properly handle process termination during prompt confirmation. Previously, termination signal was ignored and process was still waiting for user input. See[#10104](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10104).
|
||||
* BUGFIX: [vmgateway](https://docs.victoriametrics.com/victoriametrics/vmgateway/): properly recover from proxy requests errors. Previously, vmgateway may return empty response.
|
||||
* BUGFIX: [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/): verify backend network reachability with a TCP dial before marking it healthy. Previously, backends were auto-restored after `-failTimeout` even if the network was still unreachable, causing requests to hang repeatedly. `vmauth` now performs a 1s TCP dial check before returning a backend to the healthy pool. See [#9997](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9997).
|
||||
|
||||
## [v1.110.25](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.110.25)
|
||||
|
||||
Released at 2025-11-28
|
||||
|
||||
@@ -117,7 +117,7 @@ It is allowed to run VictoriaMetrics and VictoriaLogs Enterprise components in [
|
||||
|
||||
Binary releases of Enterprise components are available at [the releases page for VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest)
|
||||
and [the releases page for VictoriaLogs](https://github.com/VictoriaMetrics/VictoriaLogs/releases/latest).
|
||||
Enterprise binaries and packages have `enterprise` suffix in their names. For example, `victoria-metrics-linux-amd64-v1.131.0-enterprise.tar.gz`.
|
||||
Enterprise binaries and packages have `enterprise` suffix in their names. For example, `victoria-metrics-linux-amd64-v1.132.0-enterprise.tar.gz`.
|
||||
|
||||
In order to run binary release of Enterprise component, please download the `*-enterprise.tar.gz` archive for your OS and architecture
|
||||
from the corresponding releases page and unpack it. Then run the unpacked binary.
|
||||
@@ -135,8 +135,8 @@ For example, the following command runs VictoriaMetrics Enterprise binary with t
|
||||
obtained at [this page](https://victoriametrics.com/products/enterprise/trial/):
|
||||
|
||||
```sh
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.131.0/victoria-metrics-linux-amd64-v1.131.0-enterprise.tar.gz
|
||||
tar -xzf victoria-metrics-linux-amd64-v1.131.0-enterprise.tar.gz
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.132.0/victoria-metrics-linux-amd64-v1.132.0-enterprise.tar.gz
|
||||
tar -xzf victoria-metrics-linux-amd64-v1.132.0-enterprise.tar.gz
|
||||
./victoria-metrics-prod -license=BASE64_ENCODED_LICENSE_KEY
|
||||
```
|
||||
|
||||
@@ -151,7 +151,7 @@ Alternatively, VictoriaMetrics Enterprise license can be stored in the file and
|
||||
It is allowed to run VictoriaMetrics and VictoriaLogs Enterprise components in [cases listed here](#valid-cases-for-victoriametrics-enterprise).
|
||||
|
||||
Docker images for Enterprise components are available at [VictoriaMetrics Docker Hub](https://hub.docker.com/u/victoriametrics) and [VictoriaMetrics Quay](https://quay.io/organization/victoriametrics).
|
||||
Enterprise docker images have `enterprise` suffix in their names. For example, `victoriametrics/victoria-metrics:v1.131.0-enterprise`.
|
||||
Enterprise docker images have `enterprise` suffix in their names. For example, `victoriametrics/victoria-metrics:v1.132.0-enterprise`.
|
||||
|
||||
In order to run Docker image of VictoriaMetrics Enterprise component, it is required to provide the license key via the command-line
|
||||
flag as described in the [binary-releases](#binary-releases) section.
|
||||
@@ -161,13 +161,13 @@ Enterprise license key can be obtained at [this page](https://victoriametrics.co
|
||||
For example, the following command runs VictoriaMetrics Enterprise Docker image with the specified license key:
|
||||
|
||||
```sh
|
||||
docker run --name=victoria-metrics victoriametrics/victoria-metrics:v1.131.0-enterprise -license=BASE64_ENCODED_LICENSE_KEY
|
||||
docker run --name=victoria-metrics victoriametrics/victoria-metrics:v1.132.0-enterprise -license=BASE64_ENCODED_LICENSE_KEY
|
||||
```
|
||||
|
||||
Alternatively, the license code can be stored in the file and then referred via `-licenseFile` command-line flag:
|
||||
|
||||
```sh
|
||||
docker run --name=victoria-metrics -v /vm-license:/vm-license victoriametrics/victoria-metrics:v1.131.0-enterprise -licenseFile=/path/to/vm-license
|
||||
docker run --name=victoria-metrics -v /vm-license:/vm-license victoriametrics/victoria-metrics:v1.132.0-enterprise -licenseFile=/path/to/vm-license
|
||||
```
|
||||
|
||||
Example docker-compose configuration:
|
||||
@@ -177,7 +177,7 @@ version: "3.5"
|
||||
services:
|
||||
victoriametrics:
|
||||
container_name: victoriametrics
|
||||
image: victoriametrics/victoria-metrics:v1.131.0
|
||||
image: victoriametrics/victoria-metrics:v1.132.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
volumes:
|
||||
@@ -209,7 +209,7 @@ is used to provide the license key in plain-text:
|
||||
```yaml
|
||||
server:
|
||||
image:
|
||||
tag: v1.131.0-enterprise
|
||||
tag: v1.132.0-enterprise
|
||||
|
||||
license:
|
||||
key: {BASE64_ENCODED_LICENSE_KEY}
|
||||
@@ -220,7 +220,7 @@ In order to provide the license key via existing secret, the following values fi
|
||||
```yaml
|
||||
server:
|
||||
image:
|
||||
tag: v1.131.0-enterprise
|
||||
tag: v1.132.0-enterprise
|
||||
|
||||
license:
|
||||
secret:
|
||||
@@ -246,7 +246,7 @@ Or create secret via `kubectl`:
|
||||
kubectl create secret generic vm-license --from-literal=license={BASE64_ENCODED_LICENSE_KEY}
|
||||
```
|
||||
|
||||
Note that license key provided by using secret is mounted in a file. This allows to perform updates of the license without the need to restart the pod.
|
||||
Note that the license key provided by using secret is mounted in a file. This allows to perform updates of the license without the need to restart the pod.
|
||||
|
||||
### Kubernetes operator
|
||||
|
||||
@@ -270,10 +270,10 @@ spec:
|
||||
license:
|
||||
key: {BASE64_ENCODED_LICENSE_KEY}
|
||||
image:
|
||||
tag: v1.131.0-enterprise
|
||||
tag: v1.132.0-enterprise
|
||||
```
|
||||
|
||||
In order to provide the license key via existing secret, the following custom resource is used:
|
||||
In order to provide the license key via an existing secret, the following custom resource is used:
|
||||
|
||||
```yaml
|
||||
apiVersion: operator.victoriametrics.com/v1beta1
|
||||
@@ -287,7 +287,7 @@ spec:
|
||||
name: vm-license
|
||||
key: license
|
||||
image:
|
||||
tag: v1.131.0-enterprise
|
||||
tag: v1.132.0-enterprise
|
||||
```
|
||||
|
||||
Example secret with license key:
|
||||
@@ -308,8 +308,26 @@ Or create secret via `kubectl`:
|
||||
kubectl create secret generic vm-license --from-literal=license={BASE64_ENCODED_LICENSE_KEY}
|
||||
```
|
||||
|
||||
Note that license key provided by using secret is mounted in a file. This allows to perform updates of the license without the need to restart the pod.
|
||||
See full list of the CRD specifications in the [Operator API](https://docs.victoriametrics.com/operator/api/).
|
||||
Note that the license key provided by using a secret is mounted as a file. This allows updates to the license without the need to restart the pod.
|
||||
See the full list of the CRD specifications in the [Operator API](https://docs.victoriametrics.com/operator/api/).
|
||||
|
||||
### Updating the license key
|
||||
|
||||
Updating the license key for VictoriaMetrics and VictoriaLogs Enterprise components depends on the way
|
||||
the license key is provided to the component:
|
||||
- If the license key is provided via `-license` command-line flag, then the component should be restarted
|
||||
with the new license key.
|
||||
- If the license key is provided via `-licenseFile` command-line flag, then the license file should be updated
|
||||
with the new license key. The component will automatically reload the license file at the interval specified
|
||||
via `-licenseFile.reloadInterval` command-line flag (1 hour by default) and apply the new license key without the need to restart the component.
|
||||
- If the license key is provided via Kubernetes secret, then the secret should be updated
|
||||
with the new license key. The component will automatically reload the license file at the interval specified
|
||||
via `-licenseFile.reloadInterval` command-line flag (1 hour by default) and apply the new license key without the need to restart the component.
|
||||
- If the license key is provided via Helm chart value, then the corresponding `values.yaml` file
|
||||
should be updated with the new license key and then the Helm chart should be upgraded via `helm upgrade` command.
|
||||
This will restart the component with the new license key.
|
||||
- If the license key is provided via Kubernetes operator custom resource, then the corresponding custom resource
|
||||
should be updated with the new license key. This will restart the component with the new license key.
|
||||
|
||||
### FIPS Compatibility
|
||||
|
||||
@@ -320,7 +338,7 @@ Builds are available for amd64 and arm64 architectures.
|
||||
|
||||
Example archive:
|
||||
|
||||
`victoria-metrics-linux-amd64-v1.131.0-enterprise.tar.gz`
|
||||
`victoria-metrics-linux-amd64-v1.132.0-enterprise.tar.gz`
|
||||
|
||||
Includes:
|
||||
|
||||
@@ -329,7 +347,7 @@ Includes:
|
||||
|
||||
Example Docker image:
|
||||
|
||||
`victoriametrics/victoria-metrics:v1.131.0-enterprise-fips` – uses the FIPS-compatible binary and based on `scratch` image.
|
||||
`victoriametrics/victoria-metrics:v1.132.0-enterprise-fips` – uses the FIPS-compatible binary and based on `scratch` image.
|
||||
|
||||
## Monitoring license expiration
|
||||
|
||||
|
||||
@@ -35,8 +35,8 @@ scrape_configs:
|
||||
After you created the `scrape.yaml` file, download and unpack [single-node VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) to the same directory:
|
||||
|
||||
```sh
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.131.0/victoria-metrics-linux-amd64-v1.131.0.tar.gz
|
||||
tar xzf victoria-metrics-linux-amd64-v1.131.0.tar.gz
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.132.0/victoria-metrics-linux-amd64-v1.132.0.tar.gz
|
||||
tar xzf victoria-metrics-linux-amd64-v1.132.0.tar.gz
|
||||
```
|
||||
|
||||
Then start VictoriaMetrics and instruct it to scrape targets defined in `scrape.yaml` and save scraped metrics
|
||||
@@ -150,8 +150,8 @@ Then start [single-node VictoriaMetrics](https://docs.victoriametrics.com/victor
|
||||
|
||||
```yaml
|
||||
# Download and unpack single-node VictoriaMetrics
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.131.0/victoria-metrics-linux-amd64-v1.131.0.tar.gz
|
||||
tar xzf victoria-metrics-linux-amd64-v1.131.0.tar.gz
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.132.0/victoria-metrics-linux-amd64-v1.132.0.tar.gz
|
||||
tar xzf victoria-metrics-linux-amd64-v1.132.0.tar.gz
|
||||
|
||||
# Run single-node VictoriaMetrics with the given scrape.yaml
|
||||
./victoria-metrics-prod -promscrape.config=scrape.yaml
|
||||
|
||||
@@ -1981,6 +1981,7 @@ scrape_configs:
|
||||
# If more than this number of samples are present after metric relabeling
|
||||
# the entire scrape will be treated as failed.
|
||||
# By default, the limit is disabled.
|
||||
# The `global` sample_limit sets a default limit for all scrape targets. Available starting from v1.133.0.
|
||||
# The sample_limit can be set on a per-target basis by specifying `__sample_limit__`
|
||||
# label during target relabeling phase. Available starting from v1.103.0.
|
||||
# See https://docs.victoriametrics.com/victoriametrics/relabeling/
|
||||
|
||||
@@ -555,9 +555,6 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/
|
||||
-storage.cacheSizeIndexDBDataBlocksSparse size
|
||||
Overrides max size for indexdb/dataBlocksSparse cache. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cache-tuning
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-storage.cacheSizeIndexDBDateMetricID size
|
||||
Overrides max size for indexdb/date_metricID cache. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cache-tuning
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-storage.cacheSizeIndexDBIndexBlocks size
|
||||
Overrides max size for indexdb/indexBlocks cache. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cache-tuning
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
@@ -647,4 +644,13 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/
|
||||
Optional path to vmui dashboards. See https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/app/vmui/packages/vmui/public/dashboards
|
||||
-vmui.defaultTimezone string
|
||||
The default timezone to be used in vmui. Timezone must be a valid IANA Time Zone. For example: America/New_York, Europe/Berlin, Etc/GMT+3 or Local
|
||||
-zabbixconnector.addDuplicateTagsSeparator string
|
||||
If set to non-empty value, enables merging of duplicate Zabbix tag values and set a separator for the values of these labels.
|
||||
-zabbixconnector.addEmptyTagsValue string
|
||||
If set to non-empty value, enables adding Zabbix tags without values to labels and set value for these labels.
|
||||
-zabbixconnector.addGroupsValue string
|
||||
If set to non-empty value, enables adding Zabbix host groups to labels and set value for these labels.
|
||||
-zabbixconnector.maxLineLen size
|
||||
The maximum length in bytes of a single line accepted by /zabbixconnector/api/v1/history
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 33554432)
|
||||
```
|
||||
@@ -717,4 +717,13 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/vmagent/ .
|
||||
Whether to replace characters unsupported by Prometheus with underscores in the ingested metric names and label names. For example, foo.bar{a.b='c'} is transformed into foo_bar{a_b='c'} during data ingestion if this flag is set. See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
|
||||
-version
|
||||
Show VictoriaMetrics version
|
||||
-zabbixconnector.addDuplicateTagsSeparator string
|
||||
If set to non-empty value, enables merging of duplicate Zabbix tag values and set a separator for the values of these labels.
|
||||
-zabbixconnector.addEmptyTagsValue string
|
||||
If set to non-empty value, enables adding Zabbix tags without values to labels and set value for these labels.
|
||||
-zabbixconnector.addGroupsValue string
|
||||
If set to non-empty value, enables adding Zabbix host groups to labels and set value for these labels.
|
||||
-zabbixconnector.maxLineLen size
|
||||
The maximum length in bytes of a single line accepted by /zabbixconnector/api/v1/history
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 33554432)
|
||||
```
|
||||
|
||||
@@ -414,6 +414,8 @@ expr: <string>
|
||||
|
||||
# Labels to add or overwrite before storing the result.
|
||||
# In case of conflicts, original labels are kept with prefix `exported_`.
|
||||
#
|
||||
# Labels do not support templating in https://docs.victoriametrics.com/victoriametrics/vmalert/#templating due to cardinality concerns. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8171.
|
||||
labels:
|
||||
[ <labelname>: <labelvalue> ]
|
||||
|
||||
|
||||
@@ -704,7 +704,8 @@ unauthorized_user:
|
||||
* `-maxConcurrentRequests` limits the global number of concurrent requests `vmauth` can serve across all the configured users.
|
||||
* `-maxConcurrentPerUserRequests` limits the number of concurrent requests `vmauth` can serve per each configured user.
|
||||
|
||||
It is also possible to set individual limits on the number of concurrent requests per each user with the `max_concurrent_requests` option. For example, the following [`-auth.config`](#auth-config) limits the number of concurrent requests from the user `foo` to 10:
|
||||
It is also possible to set individual limits on the number of concurrent requests per each user with the `max_concurrent_requests` option.
|
||||
For example, the following [`-auth.config`](#auth-config) limits the number of concurrent requests from the user `foo` to 10:
|
||||
|
||||
```yaml
|
||||
users:
|
||||
@@ -714,7 +715,8 @@ users:
|
||||
max_concurrent_requests: 10
|
||||
```
|
||||
|
||||
`vmauth` responds with `429 Too Many Requests` HTTP error when the number of concurrent requests exceeds the configured limits.
|
||||
`vmauth` responds with `429 Too Many Requests` HTTP error when the number of concurrent requests exceeds the configured limits for the duration
|
||||
exceeding the `-maxQueueDuration` command-line flag value.
|
||||
|
||||
The following [metrics](#monitoring) related to concurrency limits are exposed by `vmauth`:
|
||||
|
||||
@@ -725,10 +727,12 @@ The following [metrics](#monitoring) related to concurrency limits are exposed b
|
||||
because of the global concurrency limit has been reached.
|
||||
* `vmauth_user_concurrent_requests_capacity{username="..."}` - the limit on the number of concurrent requests for the given `username`.
|
||||
* `vmauth_user_concurrent_requests_current{username="..."}` - the current number of concurrent requests for the given `username`.
|
||||
* `vmauth_user_concurrent_requests_limit_reached_total{username="..."}` - the number of requests rejected with `429 Too Many Requests` error because of the concurrency limit has been reached for the given `username`.
|
||||
* `vmauth_user_concurrent_requests_limit_reached_total{username="..."}` - the number of requests rejected with `429 Too Many Requests` error
|
||||
because of the concurrency limit has been reached for the given `username`.
|
||||
* `vmauth_unauthorized_user_concurrent_requests_capacity` - the limit on the number of concurrent requests for unauthorized users (if `unauthorized_user` section is used).
|
||||
* `vmauth_unauthorized_user_concurrent_requests_current` - the current number of concurrent requests for unauthorized users (if `unauthorized_user` section is used).
|
||||
* `vmauth_unauthorized_user_concurrent_requests_limit_reached_total` - the number of requests rejected with `429 Too Many Requests` error because of the concurrency limit has been reached for unauthorized users (if `unauthorized_user` section is used).
|
||||
* `vmauth_unauthorized_user_concurrent_requests_limit_reached_total` - the number of requests rejected with `429 Too Many Requests` error
|
||||
because of the concurrency limit has been reached for unauthorized users (if `unauthorized_user` section is used).
|
||||
|
||||
## Backend TLS setup
|
||||
|
||||
@@ -1106,7 +1110,9 @@ See [these docs](https://cloud.google.com/stackdriver/docs/managed-prometheus/tr
|
||||
`vmauth` exports the following metrics per each defined user in [`-auth.config`](#auth-config):
|
||||
|
||||
* `vmauth_user_requests_total` [counter](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#counter) - the number of requests served for the given `username`
|
||||
* `vmauth_user_request_backend_errors_total` [counter](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#counter) - the number of request errors for the given `username`
|
||||
* `vmauth_user_request_errors_total` [counter](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#counter) - the number of request errors for the given `username`
|
||||
* `vmauth_user_request_backend_requests_total` [counter](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#counter) - the number of backend requests for the given `username`
|
||||
* `vmauth_user_request_backend_errors_total` [counter](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#counter) - the number of backend request errors for the given `username`
|
||||
* `vmauth_user_request_duration_seconds` [summary](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#summary) - the duration of requests for the given `username`
|
||||
* `vmauth_user_concurrent_requests_limit_reached_total` [counter](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#counter) - the number of failed requests
|
||||
for the given `username` because of exceeded [concurrency limits](#concurrency-limiting)
|
||||
@@ -1139,12 +1145,14 @@ users:
|
||||
|
||||
`vmauth` exports the following metrics if `unauthorized_user` section is defined in [`-auth.config`](#auth-config):
|
||||
|
||||
* `vmauth_unauthorized_user_requests_total` [counter](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#counter) - the number of unauthorized requests served
|
||||
* `vmauth_unauthorized_user_request_backend_errors_total` [counter](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#counter) - the number of unauthorized request errors
|
||||
* `vmauth_unauthorized_user_request_duration_seconds` [summary](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#summary) - the duration of unauthorized requests
|
||||
* `vmauth_unauthorized_user_concurrent_requests_limit_reached_total` [counter](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#counter) - the number of failed unauthorized requests because of exceeded [concurrency limits](#concurrency-limiting)
|
||||
* `vmauth_unauthorized_user_concurrent_requests_capacity` [gauge](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#gauge) - the maximum number of [concurrent unauthorized requests](#concurrency-limiting)
|
||||
* `vmauth_unauthorized_user_concurrent_requests_current` [gauge](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#gauge) - the current number of [concurrent unauthorized requests](#concurrency-limiting)
|
||||
* `vmauth_unauthorized_user_requests_total` [counter](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#counter) - the number of requests served for unauthorized user
|
||||
* `vmauth_unauthorized_user_request_errors_total` [counter](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#counter) - the number of request errors for unauthorized user
|
||||
* `vmauth_unauthorized_user_request_backend_requests_total` [counter](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#counter) - the number of backend requests for unauthorized user
|
||||
* `vmauth_unauthorized_user_request_backend_errors_total` [counter](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#counter) - the number of backend request errors for unauthorized user
|
||||
* `vmauth_unauthorized_user_request_duration_seconds` [summary](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#summary) - the duration of requests for unauthorized user
|
||||
* `vmauth_unauthorized_user_concurrent_requests_limit_reached_total` [counter](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#counter) - the number of failed requests because of exceeded [concurrency limits](#concurrency-limiting) for unauthorized user
|
||||
* `vmauth_unauthorized_user_concurrent_requests_capacity` [gauge](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#gauge) - the maximum number of [concurrent requests](#concurrency-limiting) for unauthorized user
|
||||
* `vmauth_unauthorized_user_concurrent_requests_current` [gauge](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#gauge) - the current number of [concurrent requests](#concurrency-limiting) for unauthorized user
|
||||
|
||||
## How to build from sources
|
||||
|
||||
@@ -1199,220 +1207,4 @@ It is safe to share the collected profiles from security point of view, since th
|
||||
|
||||
Pass `-help` command-line arg to `vmauth` in order to see all the configuration options:
|
||||
|
||||
```shellhelp
|
||||
./vmauth -help
|
||||
|
||||
vmauth authenticates and authorizes incoming requests and proxies them to VictoriaMetrics.
|
||||
|
||||
See the docs at https://docs.victoriametrics.com/victoriametrics/vmauth/ .
|
||||
|
||||
-auth.config string
|
||||
Path to auth config. It can point either to local file or to http url. See https://docs.victoriametrics.com/victoriametrics/vmauth/ for details on the format of this auth config
|
||||
-backend.TLSCAFile string
|
||||
Optional path to TLS root CA file, which is used for TLS verification when connecting to backends over HTTPS. See https://docs.victoriametrics.com/victoriametrics/vmauth/#backend-tls-setup
|
||||
-backend.TLSCertFile string
|
||||
Optional path to TLS client certificate file, which must be sent to HTTPS backend. See https://docs.victoriametrics.com/victoriametrics/vmauth/#backend-tls-setup
|
||||
-backend.TLSKeyFile string
|
||||
Optional path to TLS client key file, which must be sent to HTTPS backend. See https://docs.victoriametrics.com/victoriametrics/vmauth/#backend-tls-setup
|
||||
-backend.TLSServerName string
|
||||
Optional TLS ServerName, which must be sent to HTTPS backend. See https://docs.victoriametrics.com/victoriametrics/vmauth/#backend-tls-setup
|
||||
-backend.tlsInsecureSkipVerify
|
||||
Whether to skip TLS verification when connecting to backends over HTTPS. See https://docs.victoriametrics.com/victoriametrics/vmauth/#backend-tls-setup
|
||||
-configCheckInterval duration
|
||||
interval for config file re-read. Zero value disables config re-reading. By default, refreshing is disabled, send SIGHUP for config refresh.
|
||||
-discoverBackendIPs
|
||||
Whether to discover backend IPs via periodic DNS queries to hostnames specified in url_prefix. This may be useful when url_prefix points to a hostname with dynamically scaled instances behind it. See https://docs.victoriametrics.com/victoriametrics/vmauth/#discovering-backend-ips
|
||||
-discoverBackendIPsInterval duration
|
||||
The interval for re-discovering backend IPs if -discoverBackendIPs command-line flag is set. Too low value may lead to DNS errors (default 10s)
|
||||
-dryRun
|
||||
Whether to check only config files without running vmauth. The auth configuration file is validated. The -auth.config flag must be specified.
|
||||
-enableTCP6
|
||||
Whether to enable IPv6 for listening and dialing. By default, only IPv4 TCP and UDP are used
|
||||
-envflag.enable
|
||||
Whether to enable reading flags from environment variables in addition to the command line. Command line flag values have priority over values from environment vars. Flags are read only from the command line if this flag isn't set. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#environment-variables for more details
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-eula
|
||||
Deprecated, please use -license or -licenseFile flags instead. By specifying this flag, you confirm that you have an enterprise license and accept the ESA https://victoriametrics.com/legal/esa/ . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/victoriametrics/enterprise/
|
||||
-failTimeout duration
|
||||
Sets a delay period for load balancing to skip a malfunctioning backend (default 3s)
|
||||
-filestream.disableFadvise
|
||||
Whether to disable fadvise() syscall when reading large data files. The fadvise() syscall prevents from eviction of recently accessed data from OS page cache during background merges and backups. In some rare cases it is better to disable the syscall if it uses too much CPU
|
||||
-flagsAuthKey value
|
||||
Auth key for /flags endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*
|
||||
Flag value can be read from the given file when using -flagsAuthKey=file:///abs/path/to/file or -flagsAuthKey=file://./relative/path/to/file . Flag value can be read from the given http/https url when using -flagsAuthKey=http://host/path or -flagsAuthKey=https://host/path
|
||||
-http.connTimeout duration
|
||||
Incoming connections to -httpListenAddr are closed after the configured timeout. This may help evenly spreading load among a cluster of services behind TCP-level load balancer. Zero value disables closing of incoming connections (default 2m0s)
|
||||
-http.disableCORS
|
||||
Disable CORS for all origins (*)
|
||||
-http.disableKeepAlive
|
||||
Whether to disable HTTP keep-alive for incoming connections at -httpListenAddr
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
|
||||
-http.header.csp string
|
||||
Value for 'Content-Security-Policy' header, recommended: "default-src 'self'"
|
||||
-http.header.frameOptions string
|
||||
Value for 'X-Frame-Options' header
|
||||
-http.header.hsts string
|
||||
Value for 'Strict-Transport-Security' header, recommended: 'max-age=31536000; includeSubDomains'
|
||||
-http.idleConnTimeout duration
|
||||
Timeout for incoming idle http connections (default 1m0s)
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
The maximum duration for a graceful shutdown of the HTTP server. A highly loaded server may require increased value for a graceful shutdown (default 7s)
|
||||
-http.pathPrefix string
|
||||
An optional prefix to add to all the paths handled by http server. For example, if '-http.pathPrefix=/foo/bar' is set, then all the http requests will be handled on '/foo/bar/*' paths. This may be useful for proxied requests. See https://www.robustperception.io/using-external-urls-and-proxies-with-prometheus
|
||||
-http.shutdownDelay duration
|
||||
Optional delay before http server shutdown. During this delay, the server returns non-OK responses from /health page, so load balancers can route new requests to other servers
|
||||
-httpAuth.password value
|
||||
Password for HTTP server's Basic Auth. The authentication is disabled if -httpAuth.username is empty
|
||||
Flag value can be read from the given file when using -httpAuth.password=file:///abs/path/to/file or -httpAuth.password=file://./relative/path/to/file . Flag value can be read from the given http/https url when using -httpAuth.password=http://host/path or -httpAuth.password=https://host/path
|
||||
-httpAuth.username string
|
||||
Username for HTTP server's Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
|
||||
-httpAuthHeader array
|
||||
HTTP request header to use for obtaining authorization tokens. By default auth tokens are read from Authorization request header
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-httpInternalListenAddr array
|
||||
TCP address to listen for incoming internal API http requests. Such as /health, /-/reload, /debug/pprof, etc. If flag is set, vmauth no longer serves internal API at -httpListenAddr.
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-httpListenAddr array
|
||||
TCP address to listen for incoming http requests. By default, serves internal API and proxy requests. See also -tls, -httpListenAddr.useProxyProtocol and -httpInternalListenAddr.
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-httpListenAddr.useProxyProtocol array
|
||||
Whether to use proxy protocol for connections accepted at the corresponding -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to false.
|
||||
-httpRealIPHeader string
|
||||
HTTP request header to use for obtaining IP address of client for applying 'ip_filters'. By default vmauth uses IP address of TCP the client. Useful if vmauth is behind reverse-proxy
|
||||
-idleConnTimeout duration
|
||||
The timeout for HTTP keep-alive connections to backend services. It is recommended setting this value to values smaller than -http.idleConnTimeout set at backend services (default 50s)
|
||||
-internStringCacheExpireDuration duration
|
||||
The expiry duration for caches for interned strings. See https://en.wikipedia.org/wiki/String_interning . See also -internStringMaxLen and -internStringDisableCache (default 6m0s)
|
||||
-internStringDisableCache
|
||||
Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen
|
||||
-internStringMaxLen int
|
||||
The maximum length for strings to intern. A lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500)
|
||||
-license string
|
||||
License key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed via file specified by -licenseFile command-line flag
|
||||
-license.forceOffline
|
||||
Whether to enable offline verification for VictoriaMetrics Enterprise license key, which has been passed either via -license or via -licenseFile command-line flag. The issued license key must support offline verification feature. Contact info@victoriametrics.com if you need offline license verification. This flag is available only in Enterprise binaries
|
||||
-licenseFile string
|
||||
Path to file with license key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed inline via -license command-line flag
|
||||
-licenseFile.reloadInterval duration
|
||||
Interval for reloading the license file specified via -licenseFile. See https://victoriametrics.com/products/enterprise/ . This flag is available only in Enterprise binaries (default 1h0m0s)
|
||||
-loadBalancingPolicy string
|
||||
The default load balancing policy to use for backend urls specified inside url_prefix section. Supported policies: least_loaded, first_available. See https://docs.victoriametrics.com/victoriametrics/vmauth/#load-balancing (default "least_loaded")
|
||||
-logInvalidAuthTokens
|
||||
Whether to log requests with invalid auth tokens. Such requests are always counted at vmauth_http_request_errors_total{reason="invalid_auth_token"} metric, which is exposed at /metrics page
|
||||
-loggerDisableTimestamps
|
||||
Whether to disable writing timestamps in logs
|
||||
-loggerErrorsPerSecondLimit int
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerJSONFields string
|
||||
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerMaxArgLen int
|
||||
The maximum length of a single logged argument. Longer arguments are replaced with 'arg_start..arg_end', where 'arg_start' and 'arg_end' is prefix and suffix of the arg with the length not exceeding -loggerMaxArgLen / 2 (default 5000)
|
||||
-loggerOutput string
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-loggerTimezone string
|
||||
Timezone to use for timestamps in logs. Timezone must be a valid IANA Time Zone. For example: America/New_York, Europe/Berlin, Etc/GMT+3 or Local (default "UTC")
|
||||
-loggerWarnsPerSecondLimit int
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
|
||||
-maxConcurrentPerUserRequests int
|
||||
The maximum number of concurrent requests vmauth can process per each configured user. Other requests are rejected with '429 Too Many Requests' http status code. See also -maxConcurrentRequests command-line option and max_concurrent_requests option in per-user config (default 300)
|
||||
-maxConcurrentRequests int
|
||||
The maximum number of concurrent requests vmauth can process. Other requests are rejected with '429 Too Many Requests' http status code. See also -maxConcurrentPerUserRequests and -maxIdleConnsPerBackend command-line options (default 1000)
|
||||
-maxIdleConnsPerBackend int
|
||||
The maximum number of idle connections vmauth can open per each backend host. See also -maxConcurrentRequests (default 100)
|
||||
-maxRequestBodySizeToRetry size
|
||||
The maximum request body size, which can be cached and re-tried at other backends. Bigger values may require more memory. Zero or negative value disables caching of request body. This may be useful when proxying data ingestion requests
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 16384)
|
||||
-memory.allowedBytes size
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache resulting in higher disk IO usage
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache which will result in higher disk IO usage (default 60)
|
||||
-mergeQueryArgs array
|
||||
An optional list of client query arg names, which must be merged with args at backend urls. The rest of client query args are replaced by the corresponding query args from backend urls for security reasons; see https://docs.victoriametrics.com/victoriametrics/vmauth/#query-args-handling
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-metrics.exposeMetadata
|
||||
Whether to expose TYPE and HELP metadata at the /metrics page, which is exposed at -httpListenAddr . The metadata may be needed when the /metrics page is consumed by systems, which require this information. For example, Managed Prometheus in Google Cloud - https://cloud.google.com/stackdriver/docs/managed-prometheus/troubleshooting#missing-metric-type
|
||||
-metricsAuthKey value
|
||||
Auth key for /metrics endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*
|
||||
Flag value can be read from the given file when using -metricsAuthKey=file:///abs/path/to/file or -metricsAuthKey=file://./relative/path/to/file . Flag value can be read from the given http/https url when using -metricsAuthKey=http://host/path or -metricsAuthKey=https://host/path
|
||||
-mtls array
|
||||
Whether to require valid client certificate for https requests to the corresponding -httpListenAddr . This flag works only if -tls flag is set. See also -mtlsCAFile . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/victoriametrics/enterprise/
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to false.
|
||||
-mtlsCAFile array
|
||||
Optional path to TLS Root CA for verifying client certificates at the corresponding -httpListenAddr when -mtls is enabled. By default the host system TLS Root CA is used for client certificate verification. This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/victoriametrics/enterprise/
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-pprofAuthKey value
|
||||
Auth key for /debug/pprof/* endpoints. It must be passed via authKey query arg. It overrides -httpAuth.*
|
||||
Flag value can be read from the given file when using -pprofAuthKey=file:///abs/path/to/file or -pprofAuthKey=file://./relative/path/to/file . Flag value can be read from the given http/https url when using -pprofAuthKey=http://host/path or -pprofAuthKey=https://host/path
|
||||
-pushmetrics.disableCompression
|
||||
Whether to disable request body compression when pushing metrics to every -pushmetrics.url
|
||||
-pushmetrics.extraLabel array
|
||||
Optional labels to add to metrics pushed to every -pushmetrics.url . For example, -pushmetrics.extraLabel='instance="foo"' adds instance="foo" label to all the metrics pushed to every -pushmetrics.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-pushmetrics.header array
|
||||
Optional HTTP request header to send to every -pushmetrics.url . For example, -pushmetrics.header='Authorization: Basic foobar' adds 'Authorization: Basic foobar' header to every request to every -pushmetrics.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-pushmetrics.interval duration
|
||||
Interval for pushing metrics to every -pushmetrics.url (default 10s)
|
||||
-pushmetrics.url array
|
||||
Optional URL to push metrics exposed at /metrics page. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#push-metrics . By default, metrics exposed at /metrics page aren't pushed to any remote storage
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-reloadAuthKey value
|
||||
Auth key for /-/reload http endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*
|
||||
Flag value can be read from the given file when using -reloadAuthKey=file:///abs/path/to/file or -reloadAuthKey=file://./relative/path/to/file . Flag value can be read from the given http/https url when using -reloadAuthKey=http://host/path or -reloadAuthKey=https://host/path
|
||||
-removeXFFHTTPHeaderValue
|
||||
Whether to remove the X-Forwarded-For HTTP header value from client requests before forwarding them to the backend. Recommended when vmauth is exposed to the internet.
|
||||
-responseTimeout duration
|
||||
The timeout for receiving a response from backend (default 5m0s)
|
||||
-retryStatusCodes array
|
||||
Comma-separated list of default HTTP response status codes when vmauth re-tries the request on other backends. See https://docs.victoriametrics.com/victoriametrics/vmauth/#load-balancing for details (default 0)
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to default value.
|
||||
-tls array
|
||||
Whether to enable TLS for incoming HTTP requests at the given -httpListenAddr (aka https). -tlsCertFile and -tlsKeyFile must be set if -tls is set. See also -mtls
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to false.
|
||||
-tlsAutocertCacheDir string
|
||||
Directory to store TLS certificates issued via Let's Encrypt. Certificates are lost on restarts if this flag isn't set. This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/victoriametrics/enterprise/
|
||||
-tlsAutocertEmail string
|
||||
Contact email for the issued Let's Encrypt TLS certificates. See also -tlsAutocertHosts and -tlsAutocertCacheDir . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/victoriametrics/enterprise/
|
||||
-tlsAutocertHosts array
|
||||
Optional hostnames for automatic issuing of Let's Encrypt TLS certificates. These hostnames must be reachable at -httpListenAddr . The -httpListenAddr must listen tcp port 443 . The -tlsAutocertHosts overrides -tlsCertFile and -tlsKeyFile . See also -tlsAutocertEmail and -tlsAutocertCacheDir . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/victoriametrics/enterprise/
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-tlsCertFile array
|
||||
Path to file with TLS certificate for the corresponding -httpListenAddr if -tls is set. Prefer ECDSA certs instead of RSA certs as RSA certs are slower. The provided certificate file is automatically re-read every second, so it can be dynamically updated. See also -tlsAutocertHosts
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-tlsCipherSuites array
|
||||
Optional list of TLS cipher suites for incoming requests over HTTPS if -tls is set. See the list of supported cipher suites at https://pkg.go.dev/crypto/tls#pkg-constants
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-tlsKeyFile array
|
||||
Path to file with TLS key for the corresponding -httpListenAddr if -tls is set. The provided key file is automatically re-read every second, so it can be dynamically updated. See also -tlsAutocertHosts
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-tlsMinVersion array
|
||||
Optional minimum TLS version to use for the corresponding -httpListenAddr if -tls is set. Supported values: TLS10, TLS11, TLS12, TLS13
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-version
|
||||
Show VictoriaMetrics version
|
||||
```
|
||||
{{% content "vmauth_flags.md" %}}
|
||||
|
||||
238
docs/victoriametrics/vmauth_flags.md
Normal file
238
docs/victoriametrics/vmauth_flags.md
Normal file
@@ -0,0 +1,238 @@
|
||||
---
|
||||
build:
|
||||
list: never
|
||||
publishResources: false
|
||||
render: never
|
||||
sitemap:
|
||||
disable: true
|
||||
---
|
||||
<!-- The file has to be manually updated during feature work in PR, make docs-update-flags command could be used peridically to ensure the flags in sync. -->
|
||||
```shellhelp
|
||||
|
||||
vmauth authenticates and authorizes incoming requests and proxies them to VictoriaMetrics.
|
||||
|
||||
See the docs at https://docs.victoriametrics.com/victoriametrics/vmauth/ .
|
||||
|
||||
-auth.config string
|
||||
Path to auth config. It can point either to local file or to http url. See https://docs.victoriametrics.com/victoriametrics/vmauth/ for details on the format of this auth config
|
||||
-backend.TLSCAFile string
|
||||
Optional path to TLS root CA file, which is used for TLS verification when connecting to backends over HTTPS. See https://docs.victoriametrics.com/victoriametrics/vmauth/#backend-tls-setup
|
||||
-backend.TLSCertFile string
|
||||
Optional path to TLS client certificate file, which must be sent to HTTPS backend. See https://docs.victoriametrics.com/victoriametrics/vmauth/#backend-tls-setup
|
||||
-backend.TLSKeyFile string
|
||||
Optional path to TLS client key file, which must be sent to HTTPS backend. See https://docs.victoriametrics.com/victoriametrics/vmauth/#backend-tls-setup
|
||||
-backend.TLSServerName string
|
||||
Optional TLS ServerName, which must be sent to HTTPS backend. See https://docs.victoriametrics.com/victoriametrics/vmauth/#backend-tls-setup
|
||||
-backend.tlsInsecureSkipVerify
|
||||
Whether to skip TLS verification when connecting to backends over HTTPS. See https://docs.victoriametrics.com/victoriametrics/vmauth/#backend-tls-setup
|
||||
-configCheckInterval duration
|
||||
interval for config file re-read. Zero value disables config re-reading. By default, refreshing is disabled, send SIGHUP for config refresh.
|
||||
-discoverBackendIPs
|
||||
Whether to discover backend IPs via periodic DNS queries to hostnames specified in url_prefix. This may be useful when url_prefix points to a hostname with dynamically scaled instances behind it. See https://docs.victoriametrics.com/victoriametrics/vmauth/#discovering-backend-ips
|
||||
-discoverBackendIPsInterval duration
|
||||
The interval for re-discovering backend IPs if -discoverBackendIPs command-line flag is set. Too low value may lead to DNS errors (default 10s)
|
||||
-dryRun
|
||||
Whether to check only config files without running vmauth. The auth configuration file is validated. The -auth.config flag must be specified.
|
||||
-enableTCP6
|
||||
Whether to enable IPv6 for listening and dialing. By default, only IPv4 TCP and UDP are used
|
||||
-envflag.enable
|
||||
Whether to enable reading flags from environment variables in addition to the command line. Command line flag values have priority over values from environment vars. Flags are read only from the command line if this flag isn't set. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#environment-variables for more details
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-eula
|
||||
Deprecated, please use -license or -licenseFile flags instead. By specifying this flag, you confirm that you have an enterprise license and accept the ESA https://victoriametrics.com/legal/esa/ . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/victoriametrics/enterprise/
|
||||
-failTimeout duration
|
||||
Sets a delay period for load balancing to skip a malfunctioning backend (default 3s)
|
||||
-filestream.disableFadvise
|
||||
Whether to disable fadvise() syscall when reading large data files. The fadvise() syscall prevents from eviction of recently accessed data from OS page cache during background merges and backups. In some rare cases it is better to disable the syscall if it uses too much CPU
|
||||
-flagsAuthKey value
|
||||
Auth key for /flags endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*
|
||||
Flag value can be read from the given file when using -flagsAuthKey=file:///abs/path/to/file or -flagsAuthKey=file://./relative/path/to/file.
|
||||
Flag value can be read from the given http/https url when using -flagsAuthKey=http://host/path or -flagsAuthKey=https://host/path
|
||||
-fs.maxConcurrency int
|
||||
The maximum number of concurrent goroutines to work with files; smaller values may help reducing Go scheduling latency on systems with small number of CPU cores; higher values may help reducing data ingestion latency on systems with high-latency storage such as NFS or Ceph (default fsutil.getDefaultConcurrency())
|
||||
-http.connTimeout duration
|
||||
Incoming connections to -httpListenAddr are closed after the configured timeout. This may help evenly spreading load among a cluster of services behind TCP-level load balancer. Zero value disables closing of incoming connections (default 2m0s)
|
||||
-http.disableCORS
|
||||
Disable CORS for all origins (*)
|
||||
-http.disableKeepAlive
|
||||
Whether to disable HTTP keep-alive for incoming connections at -httpListenAddr
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
|
||||
-http.header.csp string
|
||||
Value for 'Content-Security-Policy' header, recommended: "default-src 'self'"
|
||||
-http.header.frameOptions string
|
||||
Value for 'X-Frame-Options' header
|
||||
-http.header.hsts string
|
||||
Value for 'Strict-Transport-Security' header, recommended: 'max-age=31536000; includeSubDomains'
|
||||
-http.idleConnTimeout duration
|
||||
Timeout for incoming idle http connections (default 1m0s)
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
The maximum duration for a graceful shutdown of the HTTP server. A highly loaded server may require increased value for a graceful shutdown (default 7s)
|
||||
-http.pathPrefix string
|
||||
An optional prefix to add to all the paths handled by http server. For example, if '-http.pathPrefix=/foo/bar' is set, then all the http requests will be handled on '/foo/bar/*' paths. This may be useful for proxied requests. See https://www.robustperception.io/using-external-urls-and-proxies-with-prometheus
|
||||
-http.shutdownDelay duration
|
||||
Optional delay before http server shutdown. During this delay, the server returns non-OK responses from /health page, so load balancers can route new requests to other servers
|
||||
-httpAuth.password value
|
||||
Password for HTTP server's Basic Auth. The authentication is disabled if -httpAuth.username is empty
|
||||
Flag value can be read from the given file when using -httpAuth.password=file:///abs/path/to/file or -httpAuth.password=file://./relative/path/to/file.
|
||||
Flag value can be read from the given http/https url when using -httpAuth.password=http://host/path or -httpAuth.password=https://host/path
|
||||
-httpAuth.username string
|
||||
Username for HTTP server's Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
|
||||
-httpAuthHeader array
|
||||
HTTP request header to use for obtaining authorization tokens. By default auth tokens are read from Authorization request header
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-httpInternalListenAddr array
|
||||
TCP address to listen for incoming internal API http requests. Such as /health, /-/reload, /debug/pprof, etc. If flag is set, vmauth no longer serves internal API at -httpListenAddr.
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-httpListenAddr array
|
||||
TCP address to listen for incoming http requests. By default, serves internal API and proxy requests. See also -tls, -httpListenAddr.useProxyProtocol and -httpInternalListenAddr.
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-httpListenAddr.useProxyProtocol array
|
||||
Whether to use proxy protocol for connections accepted at the corresponding -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to false.
|
||||
-httpRealIPHeader string
|
||||
HTTP request header to use for obtaining IP address of client for applying 'ip_filters'. By default vmauth uses IP address of TCP the client. Useful if vmauth is behind reverse-proxy
|
||||
-idleConnTimeout duration
|
||||
The timeout for HTTP keep-alive connections to backend services. It is recommended setting this value to values smaller than -http.idleConnTimeout set at backend services (default 50s)
|
||||
-internStringCacheExpireDuration duration
|
||||
The expiry duration for caches for interned strings. See https://en.wikipedia.org/wiki/String_interning . See also -internStringMaxLen and -internStringDisableCache (default 6m0s)
|
||||
-internStringDisableCache
|
||||
Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen
|
||||
-internStringMaxLen int
|
||||
The maximum length for strings to intern. A lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500)
|
||||
-license string
|
||||
License key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed via file specified by -licenseFile command-line flag
|
||||
-license.forceOffline
|
||||
Whether to enable offline verification for VictoriaMetrics Enterprise license key, which has been passed either via -license or via -licenseFile command-line flag. The issued license key must support offline verification feature. Contact info@victoriametrics.com if you need offline license verification. This flag is available only in Enterprise binaries
|
||||
-licenseFile string
|
||||
Path to file with license key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed inline via -license command-line flag
|
||||
-licenseFile.reloadInterval duration
|
||||
Interval for reloading the license file specified via -licenseFile. See https://victoriametrics.com/products/enterprise/ . This flag is available only in Enterprise binaries (default 1h0m0s)
|
||||
-loadBalancingPolicy string
|
||||
The default load balancing policy to use for backend urls specified inside url_prefix section. Supported policies: least_loaded, first_available. See https://docs.victoriametrics.com/victoriametrics/vmauth/#load-balancing (default "least_loaded")
|
||||
-logInvalidAuthTokens
|
||||
Whether to log requests with invalid auth tokens. Such requests are always counted at vmauth_http_request_errors_total{reason="invalid_auth_token"} metric, which is exposed at /metrics page
|
||||
-loggerDisableTimestamps
|
||||
Whether to disable writing timestamps in logs
|
||||
-loggerErrorsPerSecondLimit int
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerJSONFields string
|
||||
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerMaxArgLen int
|
||||
The maximum length of a single logged argument. Longer arguments are replaced with 'arg_start..arg_end', where 'arg_start' and 'arg_end' is prefix and suffix of the arg with the length not exceeding -loggerMaxArgLen / 2 (default 5000)
|
||||
-loggerOutput string
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-loggerTimezone string
|
||||
Timezone to use for timestamps in logs. Timezone must be a valid IANA Time Zone. For example: America/New_York, Europe/Berlin, Etc/GMT+3 or Local (default "UTC")
|
||||
-loggerWarnsPerSecondLimit int
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
|
||||
-maxConcurrentPerUserRequests int
|
||||
The maximum number of concurrent requests vmauth can process per each configured user. Other requests are rejected with '429 Too Many Requests' http status code. See also -maxQueueDuration and -maxConcurrentRequests command-line options and max_concurrent_requests option in per-user config (default 300)
|
||||
-maxConcurrentRequests int
|
||||
The maximum number of concurrent requests vmauth can process. Other requests are rejected with '429 Too Many Requests' http status code. See also -maxQueueDuration, -maxConcurrentPerUserRequests and -maxIdleConnsPerBackend command-line options (default 1000)
|
||||
-maxIdleConnsPerBackend int
|
||||
The maximum number of idle connections vmauth can open per each backend host. See also -maxConcurrentRequests (default 100)
|
||||
-maxQueueDuration duration
|
||||
The maximum duration the request waits for execution when the number of concurrently executed requests reach -maxConcurrentRequests or -maxConcurrentPerUserRequests before returning '429 Too Many Requests' error. This allows graceful handling of short spikes in the number of concurrent requests (default 10s)
|
||||
-maxRequestBodySizeToRetry size
|
||||
The maximum request body size, which can be cached and re-tried at other backends. Bigger values may require more memory. Zero or negative value disables caching of request body. This may be useful when proxying data ingestion requests
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 16384)
|
||||
-memory.allowedBytes size
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache resulting in higher disk IO usage
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache which will result in higher disk IO usage (default 60)
|
||||
-mergeQueryArgs array
|
||||
An optional list of client query arg names, which must be merged with args at backend urls. The rest of client query args are replaced by the corresponding query args from backend urls for security reasons; see https://docs.victoriametrics.com/victoriametrics/vmauth/#query-args-handling
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-metrics.exposeMetadata
|
||||
Whether to expose TYPE and HELP metadata at the /metrics page, which is exposed at -httpListenAddr . The metadata may be needed when the /metrics page is consumed by systems, which require this information. For example, Managed Prometheus in Google Cloud - https://cloud.google.com/stackdriver/docs/managed-prometheus/troubleshooting#missing-metric-type
|
||||
-metricsAuthKey value
|
||||
Auth key for /metrics endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*
|
||||
Flag value can be read from the given file when using -metricsAuthKey=file:///abs/path/to/file or -metricsAuthKey=file://./relative/path/to/file.
|
||||
Flag value can be read from the given http/https url when using -metricsAuthKey=http://host/path or -metricsAuthKey=https://host/path
|
||||
-mtls array
|
||||
Whether to require valid client certificate for https requests to the corresponding -httpListenAddr . This flag works only if -tls flag is set. See also -mtlsCAFile . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/victoriametrics/enterprise/
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to false.
|
||||
-mtlsCAFile array
|
||||
Optional path to TLS Root CA for verifying client certificates at the corresponding -httpListenAddr when -mtls is enabled. By default the host system TLS Root CA is used for client certificate verification. This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/victoriametrics/enterprise/
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-pprofAuthKey value
|
||||
Auth key for /debug/pprof/* endpoints. It must be passed via authKey query arg. It overrides -httpAuth.*
|
||||
Flag value can be read from the given file when using -pprofAuthKey=file:///abs/path/to/file or -pprofAuthKey=file://./relative/path/to/file.
|
||||
Flag value can be read from the given http/https url when using -pprofAuthKey=http://host/path or -pprofAuthKey=https://host/path
|
||||
-pushmetrics.disableCompression
|
||||
Whether to disable request body compression when pushing metrics to every -pushmetrics.url
|
||||
-pushmetrics.extraLabel array
|
||||
Optional labels to add to metrics pushed to every -pushmetrics.url . For example, -pushmetrics.extraLabel='instance="foo"' adds instance="foo" label to all the metrics pushed to every -pushmetrics.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-pushmetrics.header array
|
||||
Optional HTTP request header to send to every -pushmetrics.url . For example, -pushmetrics.header='Authorization: Basic foobar' adds 'Authorization: Basic foobar' header to every request to every -pushmetrics.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-pushmetrics.interval duration
|
||||
Interval for pushing metrics to every -pushmetrics.url (default 10s)
|
||||
-pushmetrics.url array
|
||||
Optional URL to push metrics exposed at /metrics page. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#push-metrics . By default, metrics exposed at /metrics page aren't pushed to any remote storage
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-reloadAuthKey value
|
||||
Auth key for /-/reload http endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*
|
||||
Flag value can be read from the given file when using -reloadAuthKey=file:///abs/path/to/file or -reloadAuthKey=file://./relative/path/to/file.
|
||||
Flag value can be read from the given http/https url when using -reloadAuthKey=http://host/path or -reloadAuthKey=https://host/path
|
||||
-removeXFFHTTPHeaderValue
|
||||
Whether to remove the X-Forwarded-For HTTP header value from client requests before forwarding them to the backend. Recommended when vmauth is exposed to the internet.
|
||||
-responseTimeout duration
|
||||
The timeout for receiving a response from backend (default 5m0s)
|
||||
-retryStatusCodes array
|
||||
Comma-separated list of default HTTP response status codes when vmauth re-tries the request on other backends. See https://docs.victoriametrics.com/victoriametrics/vmauth/#load-balancing for details (default 0)
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to default value.
|
||||
-secret.flags array
|
||||
Comma-separated list of flag names with secret values. Values for these flags are hidden in logs and on /metrics page
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-tls array
|
||||
Whether to enable TLS for incoming HTTP requests at the given -httpListenAddr (aka https). -tlsCertFile and -tlsKeyFile must be set if -tls is set. See also -mtls
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to false.
|
||||
-tlsAutocertCacheDir string
|
||||
Directory to store TLS certificates issued via Let's Encrypt. Certificates are lost on restarts if this flag isn't set. This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/victoriametrics/enterprise/
|
||||
-tlsAutocertEmail string
|
||||
Contact email for the issued Let's Encrypt TLS certificates. See also -tlsAutocertHosts and -tlsAutocertCacheDir . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/victoriametrics/enterprise/
|
||||
-tlsAutocertHosts array
|
||||
Optional hostnames for automatic issuing of Let's Encrypt TLS certificates. These hostnames must be reachable at -httpListenAddr . The -httpListenAddr must listen tcp port 443 . The -tlsAutocertHosts overrides -tlsCertFile and -tlsKeyFile . See also -tlsAutocertEmail and -tlsAutocertCacheDir . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/victoriametrics/enterprise/
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-tlsCertFile array
|
||||
Path to file with TLS certificate for the corresponding -httpListenAddr if -tls is set. Prefer ECDSA certs instead of RSA certs as RSA certs are slower. The provided certificate file is automatically re-read every second, so it can be dynamically updated. See also -tlsAutocertHosts
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-tlsCipherSuites array
|
||||
Optional list of TLS cipher suites for incoming requests over HTTPS if -tls is set. See the list of supported cipher suites at https://pkg.go.dev/crypto/tls#pkg-constants
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-tlsKeyFile array
|
||||
Path to file with TLS key for the corresponding -httpListenAddr if -tls is set. The provided key file is automatically re-read every second, so it can be dynamically updated. See also -tlsAutocertHosts
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-tlsMinVersion array
|
||||
Optional minimum TLS version to use for the corresponding -httpListenAddr if -tls is set. Supported values: TLS10, TLS11, TLS12, TLS13
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-version
|
||||
Show VictoriaMetrics version
|
||||
```
|
||||
@@ -34,9 +34,9 @@ vmctl command-line tool is available as:
|
||||
|
||||
Download and unpack vmctl:
|
||||
```sh
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.131.0/vmutils-darwin-arm64-v1.131.0.tar.gz
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.132.0/vmutils-darwin-arm64-v1.132.0.tar.gz
|
||||
|
||||
tar xzf vmutils-darwin-arm64-v1.131.0.tar.gz
|
||||
tar xzf vmutils-darwin-arm64-v1.132.0.tar.gz
|
||||
```
|
||||
|
||||
Once binary is unpacked, see the full list of supported modes by running the following command:
|
||||
|
||||
@@ -312,4 +312,13 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/cluster-victori
|
||||
Timeout for establishing RPC connections from vminsert to vmstorage. See also -vmstorageUserTimeout (default 3s)
|
||||
-vmstorageUserTimeout duration
|
||||
Network timeout for RPC connections from vminsert to vmstorage (Linux only). Lower values speed up re-rerouting recovery when some of vmstorage nodes become unavailable because of networking issues. Read more about TCP_USER_TIMEOUT at https://blog.cloudflare.com/when-tcp-sockets-refuse-to-die/ . See also -vmstorageDialTimeout (default 3s)
|
||||
-zabbixconnector.addDuplicateTagsSeparator string
|
||||
If set to non-empty value, enables merging of duplicate Zabbix tag values and set a separator for the values of these labels.
|
||||
-zabbixconnector.addEmptyTagsValue string
|
||||
If set to non-empty value, enables adding Zabbix tags without values to labels and set value for these labels.
|
||||
-zabbixconnector.addGroupsValue string
|
||||
If set to non-empty value, enables adding Zabbix host groups to labels and set value for these labels.
|
||||
-zabbixconnector.maxLineLen size
|
||||
The maximum length in bytes of a single line accepted by /zabbixconnector/api/v1/history
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 33554432)
|
||||
```
|
||||
|
||||
@@ -242,9 +242,6 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/cluster-victori
|
||||
-storage.cacheSizeIndexDBDataBlocksSparse size
|
||||
Overrides max size for indexdb/dataBlocksSparse cache. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cache-tuning
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-storage.cacheSizeIndexDBDateMetricID size
|
||||
Overrides max size for indexdb/date_metricID cache. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cache-tuning
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-storage.cacheSizeIndexDBIndexBlocks size
|
||||
Overrides max size for indexdb/indexBlocks cache. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cache-tuning
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
|
||||
@@ -7,8 +7,7 @@ import (
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
// CompressZSTDLevel appends compressed src to dst and returns
|
||||
// the appended dst.
|
||||
// CompressZSTDLevel appends compressed src to dst and returns the appended dst.
|
||||
//
|
||||
// The given compressLevel is used for the compression.
|
||||
func CompressZSTDLevel(dst, src []byte, compressLevel int) []byte {
|
||||
@@ -20,15 +19,27 @@ func CompressZSTDLevel(dst, src []byte, compressLevel int) []byte {
|
||||
return dst
|
||||
}
|
||||
|
||||
// DecompressZSTD decompresses src, appends the result to dst and returns
|
||||
// the appended dst.
|
||||
// DecompressZSTD decompresses src, appends the result to dst and returns the appended dst.
|
||||
//
|
||||
// This function must be called only for the trusted src.
|
||||
// Use DecompressZSTDLimited for untrusted src.
|
||||
func DecompressZSTD(dst, src []byte) ([]byte, error) {
|
||||
decompressCalls.Inc()
|
||||
b, err := zstd.Decompress(dst, src)
|
||||
if err != nil {
|
||||
return b, fmt.Errorf("cannot decompress zstd block with len=%d to a buffer with len=%d: %w; block data (hex): %X", len(src), len(dst), err, src)
|
||||
return b, fmt.Errorf("cannot decompress zstd block with len=%d: %w; block data (hex): %X", len(src), err, src)
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
|
||||
// DecompressZSTDLimited decompresses src, appends the result to dst and returns the appended dst.
|
||||
//
|
||||
// If the decompressed result exceeds maxDataSizeBytes, then error is returned.
|
||||
func DecompressZSTDLimited(dst, src []byte, maxDataSizeBytes int) ([]byte, error) {
|
||||
decompressCalls.Inc()
|
||||
b, err := zstd.DecompressLimited(dst, src, maxDataSizeBytes)
|
||||
if err != nil {
|
||||
return b, fmt.Errorf("cannot decompress zstd block with len=%d and maxDataSizeBytes=%d: %w", len(src), maxDataSizeBytes, err)
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
|
||||
@@ -9,11 +9,14 @@ import (
|
||||
// Decompress appends decompressed src to dst and returns the result.
|
||||
//
|
||||
// This function must be called only for the trusted src.
|
||||
// Otherwise use DecompressLimited function.
|
||||
func Decompress(dst, src []byte) ([]byte, error) {
|
||||
return gozstd.Decompress(dst, src)
|
||||
}
|
||||
|
||||
// Decompress appends decompressed src to dst and returns the result.
|
||||
//
|
||||
// If the decompressed result exceeds maxDataSizeBytes, then error is returned.
|
||||
func DecompressLimited(dst, src []byte, maxDataSizeBytes int) ([]byte, error) {
|
||||
return gozstd.DecompressLimited(dst, src, maxDataSizeBytes)
|
||||
}
|
||||
|
||||
@@ -37,12 +37,16 @@ func init() {
|
||||
// Decompress appends decompressed src to dst and returns the result.
|
||||
//
|
||||
// This function must be called only for the trusted src.
|
||||
//
|
||||
// Otherwise use DecompressLimited function.
|
||||
func Decompress(dst, src []byte) ([]byte, error) {
|
||||
d := getDecoder(0)
|
||||
return d.DecodeAll(src, dst)
|
||||
}
|
||||
|
||||
// Decompress appends decompressed src to dst and returns the result.
|
||||
//
|
||||
// If the decompressed result exceeds maxDataSizeBytes, then error is returned.
|
||||
func DecompressLimited(dst, src []byte, maxDataSizeBytes int) ([]byte, error) {
|
||||
d := getDecoder(maxDataSizeBytes)
|
||||
return d.DecodeAll(src, dst)
|
||||
|
||||
@@ -2,148 +2,73 @@ package filestream
|
||||
|
||||
import (
|
||||
"io"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs/fsutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
// ParallelFileCreator is used for parallel creating of files for the given dstPath.
|
||||
//
|
||||
// ParallelFileCreator is needed for speeding up creating many files on high-latency
|
||||
// storage systems such as NFS or Ceph.
|
||||
type ParallelFileCreator struct {
|
||||
tasks []parallelFileCreatorTask
|
||||
}
|
||||
|
||||
type parallelFileCreatorTask struct {
|
||||
dstPath string
|
||||
// FileCreatorTask a task for creating the file at the given path and assigning it to *wc.
|
||||
type FileCreatorTask struct {
|
||||
path string
|
||||
wc *WriteCloser
|
||||
nocache bool
|
||||
}
|
||||
|
||||
// Add registers a task for creating the file at dstPath and assigning it to *wc.
|
||||
//
|
||||
// Tasks are executed in parallel on Run() call.
|
||||
func (pfc *ParallelFileCreator) Add(dstPath string, wc *WriteCloser, nocache bool) {
|
||||
pfc.tasks = append(pfc.tasks, parallelFileCreatorTask{
|
||||
dstPath: dstPath,
|
||||
// NewFileCreatorTask creates new task for creating the file at the given path an assigning it to *wc
|
||||
func NewFileCreatorTask(path string, wc *WriteCloser, nocache bool) *FileCreatorTask {
|
||||
return &FileCreatorTask{
|
||||
path: path,
|
||||
wc: wc,
|
||||
nocache: nocache,
|
||||
})
|
||||
}
|
||||
|
||||
// Run runs all the registered tasks for creating files in parallel.
|
||||
func (pfc *ParallelFileCreator) Run() {
|
||||
var wg sync.WaitGroup
|
||||
concurrencyCh := fsutil.GetConcurrencyCh()
|
||||
for _, task := range pfc.tasks {
|
||||
concurrencyCh <- struct{}{}
|
||||
wg.Add(1)
|
||||
|
||||
go func(dstPath string, wc *WriteCloser, nocache bool) {
|
||||
defer func() {
|
||||
wg.Done()
|
||||
<-concurrencyCh
|
||||
}()
|
||||
|
||||
*wc = MustCreate(dstPath, nocache)
|
||||
}(task.dstPath, task.wc, task.nocache)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
// ParallelFileOpener is used for parallel opening of files at the given dstPath.
|
||||
//
|
||||
// ParallelFileOpener is needed for speeding up opening many files on high-latency
|
||||
// storage systems such as NFS or Ceph.
|
||||
type ParallelFileOpener struct {
|
||||
tasks []parallelFileOpenerTask
|
||||
// Run executes file creating task
|
||||
func (t *FileCreatorTask) Run() {
|
||||
*t.wc = MustCreate(t.path, t.nocache)
|
||||
}
|
||||
|
||||
type parallelFileOpenerTask struct {
|
||||
// FileOpenerTask a task for opening the file at the given path and assigning it to *rc.
|
||||
type FileOpenerTask struct {
|
||||
path string
|
||||
rc *ReadCloser
|
||||
nocache bool
|
||||
}
|
||||
|
||||
// Add registers a task for opening the file ath the given path and assigning it to *rc.
|
||||
//
|
||||
// Tasks are executed in parallel on Run() call.
|
||||
func (pfo *ParallelFileOpener) Add(path string, rc *ReadCloser, nocache bool) {
|
||||
pfo.tasks = append(pfo.tasks, parallelFileOpenerTask{
|
||||
// NewFileOpenerTask creates new task for opening the file at the given path an assigning it to *rc
|
||||
func NewFileOpenerTask(path string, rc *ReadCloser, nocache bool) *FileOpenerTask {
|
||||
return &FileOpenerTask{
|
||||
path: path,
|
||||
rc: rc,
|
||||
nocache: nocache,
|
||||
})
|
||||
}
|
||||
|
||||
// Run runs all the registered tasks for opening files in parallel.
|
||||
func (pfo *ParallelFileOpener) Run() {
|
||||
var wg sync.WaitGroup
|
||||
concurrencyCh := fsutil.GetConcurrencyCh()
|
||||
for _, task := range pfo.tasks {
|
||||
concurrencyCh <- struct{}{}
|
||||
wg.Add(1)
|
||||
|
||||
go func(path string, rc *ReadCloser, nocache bool) {
|
||||
defer func() {
|
||||
wg.Done()
|
||||
<-concurrencyCh
|
||||
}()
|
||||
|
||||
*rc = MustOpen(path, nocache)
|
||||
}(task.path, task.rc, task.nocache)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
// ParallelStreamWriter is used for parallel writing of data from io.WriterTo to the given dstPath files.
|
||||
//
|
||||
// ParallelStreamWriter is needed for speeding up writing data to many files on high-latency
|
||||
// storage systems such as NFS or Ceph.
|
||||
type ParallelStreamWriter struct {
|
||||
tasks []parallelStreamWriterTask
|
||||
// Run executes file opening task
|
||||
func (t *FileOpenerTask) Run() {
|
||||
*t.rc = MustOpen(t.path, t.nocache)
|
||||
}
|
||||
|
||||
type parallelStreamWriterTask struct {
|
||||
dstPath string
|
||||
src io.WriterTo
|
||||
// StreamWriterTask adds a task to execute in parallel - to write the data from src to the path.
|
||||
type StreamWriterTask struct {
|
||||
path string
|
||||
src io.WriterTo
|
||||
}
|
||||
|
||||
// Add adds a task to execute in parallel - to write the data from src to the dstPath.
|
||||
//
|
||||
// Tasks are executed in parallel on Run() call.
|
||||
func (psw *ParallelStreamWriter) Add(dstPath string, src io.WriterTo) {
|
||||
psw.tasks = append(psw.tasks, parallelStreamWriterTask{
|
||||
dstPath: dstPath,
|
||||
src: src,
|
||||
})
|
||||
}
|
||||
|
||||
// Run executes all the tasks added via Add() call in parallel.
|
||||
func (psw *ParallelStreamWriter) Run() {
|
||||
var wg sync.WaitGroup
|
||||
concurrencyCh := fsutil.GetConcurrencyCh()
|
||||
for _, task := range psw.tasks {
|
||||
concurrencyCh <- struct{}{}
|
||||
wg.Add(1)
|
||||
|
||||
go func(dstPath string, src io.WriterTo) {
|
||||
defer func() {
|
||||
wg.Done()
|
||||
<-concurrencyCh
|
||||
}()
|
||||
|
||||
f := MustCreate(dstPath, false)
|
||||
if _, err := src.WriteTo(f); err != nil {
|
||||
f.MustClose()
|
||||
// Do not call MustRemovePath(path), so the user could inspect
|
||||
// the file contents during investigation of the issue.
|
||||
logger.Panicf("FATAL: cannot write data to %q: %s", dstPath, err)
|
||||
}
|
||||
f.MustClose()
|
||||
}(task.dstPath, task.src)
|
||||
// NewStreamWriterTask creates new task for writing the data from src to the path
|
||||
func NewStreamWriterTask(path string, src io.WriterTo) *StreamWriterTask {
|
||||
return &StreamWriterTask{
|
||||
path: path,
|
||||
src: src,
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func (t *StreamWriterTask) Run() {
|
||||
f := MustCreate(t.path, false)
|
||||
if _, err := t.src.WriteTo(f); err != nil {
|
||||
f.MustClose()
|
||||
// Do not call MustRemovePath(path), so the user could inspect
|
||||
// the file contents during investigation of the issue.
|
||||
logger.Panicf("FATAL: cannot write data to %q: %s", t.path, err)
|
||||
}
|
||||
f.MustClose()
|
||||
}
|
||||
|
||||
@@ -18,8 +18,8 @@ func getDefaultConcurrency() int {
|
||||
return n
|
||||
}
|
||||
|
||||
// GetConcurrencyCh returns a channel for limiting the concurrency of operations with files.
|
||||
func GetConcurrencyCh() chan struct{} {
|
||||
// getConcurrencyCh returns a channel for limiting the concurrency of operations with files.
|
||||
func getConcurrencyCh() chan struct{} {
|
||||
concurrencyChOnce.Do(initConcurrencyCh)
|
||||
return concurrencyCh
|
||||
}
|
||||
@@ -30,3 +30,39 @@ func initConcurrencyCh() {
|
||||
|
||||
var concurrencyChOnce sync.Once
|
||||
var concurrencyCh chan struct{}
|
||||
|
||||
type parallelTask interface {
|
||||
Run()
|
||||
}
|
||||
|
||||
// ParallelExecutor is used for parallel files operations
|
||||
//
|
||||
// ParallelExecutor is needed for speeding up files operations on high-latency storage systems such as NFS or Ceph.
|
||||
type ParallelExecutor struct {
|
||||
tasks []parallelTask
|
||||
}
|
||||
|
||||
// Add registers a task for parallel file operations
|
||||
//
|
||||
// Tasks are executed in parallel on Run() call.
|
||||
func (pe *ParallelExecutor) Add(task parallelTask) {
|
||||
pe.tasks = append(pe.tasks, task)
|
||||
}
|
||||
|
||||
func (pe *ParallelExecutor) Run() {
|
||||
var wg sync.WaitGroup
|
||||
concurrencyCh := getConcurrencyCh()
|
||||
for _, task := range pe.tasks {
|
||||
concurrencyCh <- struct{}{}
|
||||
wg.Add(1)
|
||||
|
||||
go func(task parallelTask) {
|
||||
defer func() {
|
||||
wg.Done()
|
||||
<-concurrencyCh
|
||||
}()
|
||||
task.Run()
|
||||
}(task)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
@@ -1,55 +1,27 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs/fsutil"
|
||||
)
|
||||
|
||||
// ParallelReaderAtOpener opens ReaderAt files in parallel.
|
||||
//
|
||||
// ParallelReaderAtOpener speeds up opening multiple ReaderAt files on high-latency
|
||||
// storage systems such as NFS or Ceph.
|
||||
type ParallelReaderAtOpener struct {
|
||||
tasks []parallelReaderAtOpenerTask
|
||||
}
|
||||
|
||||
type parallelReaderAtOpenerTask struct {
|
||||
// ReaderAtOpenerTask task to open ReaderAt files in parallel.
|
||||
type ReaderAtOpenerTask struct {
|
||||
path string
|
||||
rc *MustReadAtCloser
|
||||
fileSize *uint64
|
||||
}
|
||||
|
||||
// Add adds a task for opening the file at the given path and storing it to *r, while storing the file size into *fileSize.
|
||||
// NewReaderAtOpenerTask creates new task for writing the data from src to the path
|
||||
//
|
||||
// Call Run() for running all the registered tasks in parallel.
|
||||
func (pro *ParallelReaderAtOpener) Add(path string, rc *MustReadAtCloser, fileSize *uint64) {
|
||||
pro.tasks = append(pro.tasks, parallelReaderAtOpenerTask{
|
||||
// ParallelReaderAtOpener speeds up opening multiple ReaderAt files on high-latency
|
||||
// storage systems such as NFS or Ceph.
|
||||
func NewReaderAtOpenerTask(path string, rc *MustReadAtCloser, fileSize *uint64) *ReaderAtOpenerTask {
|
||||
return &ReaderAtOpenerTask{
|
||||
path: path,
|
||||
rc: rc,
|
||||
fileSize: fileSize,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Run executes all the registered tasks in parallel.
|
||||
func (pro *ParallelReaderAtOpener) Run() {
|
||||
var wg sync.WaitGroup
|
||||
concurrencyCh := fsutil.GetConcurrencyCh()
|
||||
for _, task := range pro.tasks {
|
||||
concurrencyCh <- struct{}{}
|
||||
wg.Add(1)
|
||||
|
||||
go func(path string, rc *MustReadAtCloser, fileSize *uint64) {
|
||||
defer func() {
|
||||
wg.Done()
|
||||
<-concurrencyCh
|
||||
}()
|
||||
|
||||
*rc = MustOpenReaderAt(path)
|
||||
*fileSize = MustFileSize(path)
|
||||
}(task.path, task.rc, task.fileSize)
|
||||
}
|
||||
wg.Wait()
|
||||
func (t *ReaderAtOpenerTask) Run() {
|
||||
*t.rc = OpenReaderAt(t.path)
|
||||
*t.fileSize = MustFileSize(t.path)
|
||||
}
|
||||
|
||||
// MustCloser must implement MustClose() function.
|
||||
@@ -57,23 +29,24 @@ type MustCloser interface {
|
||||
MustClose()
|
||||
}
|
||||
|
||||
// MustCloseParallel closes all the cs in parallel.
|
||||
// CloserTask task to close all the MustCloser in parallel.
|
||||
//
|
||||
// Parallel closing reduces the time needed to flush the data to the underlying files on close
|
||||
// on high-latency storage systems such as NFS or Ceph.
|
||||
func MustCloseParallel(cs []MustCloser) {
|
||||
var wg sync.WaitGroup
|
||||
concurrencyCh := fsutil.GetConcurrencyCh()
|
||||
for _, c := range cs {
|
||||
concurrencyCh <- struct{}{}
|
||||
wg.Add(1)
|
||||
go func(c MustCloser) {
|
||||
defer func() {
|
||||
wg.Done()
|
||||
<-concurrencyCh
|
||||
}()
|
||||
c.MustClose()
|
||||
}(c)
|
||||
}
|
||||
wg.Wait()
|
||||
type CloserTask struct {
|
||||
c MustCloser
|
||||
}
|
||||
|
||||
// NewCloserTask creates new task for writing the data from src to the path
|
||||
//
|
||||
// NewCloserTask speeds up opening multiple MustCloser files on high-latency
|
||||
// storage systems such as NFS or Ceph.
|
||||
func NewCloserTask(c MustCloser) *CloserTask {
|
||||
return &CloserTask{
|
||||
c: c,
|
||||
}
|
||||
}
|
||||
|
||||
func (t *CloserTask) Run() {
|
||||
t.c.MustClose()
|
||||
}
|
||||
|
||||
@@ -148,10 +148,10 @@ func (r *ReaderAt) MustFadviseSequentialRead(prefetch bool) {
|
||||
}
|
||||
}
|
||||
|
||||
// MustOpenReaderAt opens ReaderAt for reading from the file located at path.
|
||||
// OpenReaderAt opens ReaderAt for reading from the file located at path.
|
||||
//
|
||||
// MustClose must be called on the returned ReaderAt when it is no longer needed.
|
||||
func MustOpenReaderAt(path string) *ReaderAt {
|
||||
func OpenReaderAt(path string) *ReaderAt {
|
||||
var r ReaderAt
|
||||
r.path = path
|
||||
return &r
|
||||
|
||||
@@ -19,7 +19,7 @@ func testReaderAt(t *testing.T, bufSize int) {
|
||||
data := make([]byte, fileSize)
|
||||
MustWriteSync(path, data)
|
||||
defer MustRemovePath(path)
|
||||
r := MustOpenReaderAt(path)
|
||||
r := OpenReaderAt(path)
|
||||
defer r.MustClose()
|
||||
|
||||
buf := make([]byte, bufSize)
|
||||
|
||||
@@ -26,7 +26,7 @@ func benchmarkReaderAtMustReadAt(b *testing.B, isMmap bool) {
|
||||
data := make([]byte, fileSize)
|
||||
MustWriteSync(path, data)
|
||||
defer MustRemovePath(path)
|
||||
r := MustOpenReaderAt(path)
|
||||
r := OpenReaderAt(path)
|
||||
defer r.MustClose()
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
@@ -94,7 +94,7 @@ func (cm *ConnsMap) CloseAll(shutdownDuration time.Duration) {
|
||||
shutdownInterval := shutdownDuration / time.Duration(len(conns)-1)
|
||||
startTime := time.Now()
|
||||
logger.Infof("closing %d %s connections with %dms interval between them", len(conns), cm.clientName, shutdownInterval.Milliseconds())
|
||||
_ = conns[0].closeAll
|
||||
conns[0].closeAll()
|
||||
for _, c := range conns[1:] {
|
||||
time.Sleep(shutdownInterval)
|
||||
c.closeAll()
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/filestream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs/fsutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
@@ -160,17 +161,17 @@ func (bsr *blockStreamReader) MustInitFromFilePart(path string) {
|
||||
// Open part files in parallel in order to speed up this process
|
||||
// on high-latency storage systems such as NFS or Ceph.
|
||||
|
||||
var pfo filestream.ParallelFileOpener
|
||||
var pe fsutil.ParallelExecutor
|
||||
|
||||
indexPath := filepath.Join(path, indexFilename)
|
||||
itemsPath := filepath.Join(path, itemsFilename)
|
||||
lensPath := filepath.Join(path, lensFilename)
|
||||
|
||||
pfo.Add(indexPath, &bsr.indexReader, true)
|
||||
pfo.Add(itemsPath, &bsr.itemsReader, true)
|
||||
pfo.Add(lensPath, &bsr.lensReader, true)
|
||||
pe.Add(filestream.NewFileOpenerTask(indexPath, &bsr.indexReader, true))
|
||||
pe.Add(filestream.NewFileOpenerTask(itemsPath, &bsr.itemsReader, true))
|
||||
pe.Add(filestream.NewFileOpenerTask(lensPath, &bsr.lensReader, true))
|
||||
|
||||
pfo.Run()
|
||||
pe.Run()
|
||||
}
|
||||
|
||||
// MustClose closes the bsr.
|
||||
@@ -180,12 +181,11 @@ func (bsr *blockStreamReader) MustClose() {
|
||||
if !bsr.isInmemoryBlock {
|
||||
// Close files in parallel in order to speed up this process on storage systems with high latency
|
||||
// such as NFS or Ceph.
|
||||
cs := []fs.MustCloser{
|
||||
bsr.indexReader,
|
||||
bsr.itemsReader,
|
||||
bsr.lensReader,
|
||||
}
|
||||
fs.MustCloseParallel(cs)
|
||||
var pe fsutil.ParallelExecutor
|
||||
pe.Add(fs.NewCloserTask(bsr.indexReader))
|
||||
pe.Add(fs.NewCloserTask(bsr.itemsReader))
|
||||
pe.Add(fs.NewCloserTask(bsr.lensReader))
|
||||
pe.Run()
|
||||
}
|
||||
bsr.reset()
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/filestream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs/fsutil"
|
||||
)
|
||||
|
||||
type blockStreamWriter struct {
|
||||
@@ -85,22 +86,22 @@ func (bsw *blockStreamWriter) MustInitFromFilePart(path string, nocache bool, co
|
||||
// Create part files in the directory in parallel in order to speedup the process
|
||||
// on high-latency storage systems such as NFS or Ceph.
|
||||
|
||||
var pfc filestream.ParallelFileCreator
|
||||
var pe fsutil.ParallelExecutor
|
||||
|
||||
indexPath := filepath.Join(path, indexFilename)
|
||||
itemsPath := filepath.Join(path, itemsFilename)
|
||||
lensPath := filepath.Join(path, lensFilename)
|
||||
metaindexPath := filepath.Join(path, metaindexFilename)
|
||||
|
||||
pfc.Add(indexPath, &bsw.indexWriter, nocache)
|
||||
pfc.Add(itemsPath, &bsw.itemsWriter, nocache)
|
||||
pfc.Add(lensPath, &bsw.lensWriter, nocache)
|
||||
pe.Add(filestream.NewFileCreatorTask(indexPath, &bsw.indexWriter, nocache))
|
||||
pe.Add(filestream.NewFileCreatorTask(itemsPath, &bsw.itemsWriter, nocache))
|
||||
pe.Add(filestream.NewFileCreatorTask(lensPath, &bsw.lensWriter, nocache))
|
||||
|
||||
// Always cache metaindex file in OS page cache, since it is immediately
|
||||
// read after the merge.
|
||||
pfc.Add(metaindexPath, &bsw.metaindexWriter, false)
|
||||
pe.Add(filestream.NewFileCreatorTask(metaindexPath, &bsw.metaindexWriter, false))
|
||||
|
||||
pfc.Run()
|
||||
pe.Run()
|
||||
}
|
||||
|
||||
// MustClose closes the bsw.
|
||||
@@ -116,13 +117,12 @@ func (bsw *blockStreamWriter) MustClose() {
|
||||
|
||||
// Close writers in parallel in order to reduce the time needed for closing them
|
||||
// on high-latency storage systems such as NFS or Ceph.
|
||||
cs := []fs.MustCloser{
|
||||
bsw.metaindexWriter,
|
||||
bsw.indexWriter,
|
||||
bsw.itemsWriter,
|
||||
bsw.lensWriter,
|
||||
}
|
||||
fs.MustCloseParallel(cs)
|
||||
var pe fsutil.ParallelExecutor
|
||||
pe.Add(fs.NewCloserTask(bsw.metaindexWriter))
|
||||
pe.Add(fs.NewCloserTask(bsw.indexWriter))
|
||||
pe.Add(fs.NewCloserTask(bsw.itemsWriter))
|
||||
pe.Add(fs.NewCloserTask(bsw.lensWriter))
|
||||
pe.Run()
|
||||
|
||||
bsw.reset()
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/filestream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs/fsutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
@@ -42,12 +43,12 @@ func (mp *inmemoryPart) MustStoreToDisk(path string) {
|
||||
itemsPath := filepath.Join(path, itemsFilename)
|
||||
lensPath := filepath.Join(path, lensFilename)
|
||||
|
||||
var psw filestream.ParallelStreamWriter
|
||||
psw.Add(metaindexPath, &mp.metaindexData)
|
||||
psw.Add(indexPath, &mp.indexData)
|
||||
psw.Add(itemsPath, &mp.itemsData)
|
||||
psw.Add(lensPath, &mp.lensData)
|
||||
psw.Run()
|
||||
var pe fsutil.ParallelExecutor
|
||||
pe.Add(filestream.NewStreamWriterTask(metaindexPath, &mp.metaindexData))
|
||||
pe.Add(filestream.NewStreamWriterTask(indexPath, &mp.indexData))
|
||||
pe.Add(filestream.NewStreamWriterTask(itemsPath, &mp.itemsData))
|
||||
pe.Add(filestream.NewStreamWriterTask(lensPath, &mp.lensData))
|
||||
pe.Run()
|
||||
|
||||
mp.ph.MustWriteMetadata(path)
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/blockcache"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/filestream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs/fsutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
)
|
||||
@@ -96,7 +97,7 @@ func mustOpenFilePart(path string) *part {
|
||||
// Open part files in parallel in order to speed up this process
|
||||
// on high-latency storage systems such as NFS or Ceph.
|
||||
|
||||
var pro fs.ParallelReaderAtOpener
|
||||
var pe fsutil.ParallelExecutor
|
||||
|
||||
indexPath := filepath.Join(path, indexFilename)
|
||||
itemsPath := filepath.Join(path, itemsFilename)
|
||||
@@ -104,17 +105,17 @@ func mustOpenFilePart(path string) *part {
|
||||
|
||||
var indexFile fs.MustReadAtCloser
|
||||
var indexSize uint64
|
||||
pro.Add(indexPath, &indexFile, &indexSize)
|
||||
pe.Add(fs.NewReaderAtOpenerTask(indexPath, &indexFile, &indexSize))
|
||||
|
||||
var itemsFile fs.MustReadAtCloser
|
||||
var itemsSize uint64
|
||||
pro.Add(itemsPath, &itemsFile, &itemsSize)
|
||||
pe.Add(fs.NewReaderAtOpenerTask(itemsPath, &itemsFile, &itemsSize))
|
||||
|
||||
var lensFile fs.MustReadAtCloser
|
||||
var lensSize uint64
|
||||
pro.Add(lensPath, &lensFile, &lensSize)
|
||||
pe.Add(fs.NewReaderAtOpenerTask(lensPath, &lensFile, &lensSize))
|
||||
|
||||
pro.Run()
|
||||
pe.Run()
|
||||
|
||||
size := metaindexSize + indexSize + itemsSize + lensSize
|
||||
return newPart(&ph, path, size, metaindexFile, indexFile, itemsFile, lensFile)
|
||||
@@ -143,12 +144,11 @@ func newPart(ph *partHeader, path string, size uint64, metaindexReader filestrea
|
||||
func (p *part) MustClose() {
|
||||
// Close files in parallel in order to speed up this process on storage systems with high latency
|
||||
// such as NFS or Ceph.
|
||||
cs := []fs.MustCloser{
|
||||
p.indexFile,
|
||||
p.itemsFile,
|
||||
p.lensFile,
|
||||
}
|
||||
fs.MustCloseParallel(cs)
|
||||
var pe fsutil.ParallelExecutor
|
||||
pe.Add(fs.NewCloserTask(p.indexFile))
|
||||
pe.Add(fs.NewCloserTask(p.itemsFile))
|
||||
pe.Add(fs.NewCloserTask(p.lensFile))
|
||||
pe.Run()
|
||||
|
||||
idxbCache.RemoveBlocksForPart(p)
|
||||
ibCache.RemoveBlocksForPart(p)
|
||||
|
||||
@@ -1561,9 +1561,6 @@ func mustOpenParts(path string) []*partWrapper {
|
||||
// or a problem with the underlying file system (such as insufficient
|
||||
// permissions).
|
||||
func (tb *Table) MustCreateSnapshotAt(dstDir string) {
|
||||
logger.Infof("creating Table snapshot of %q...", tb.path)
|
||||
startTime := time.Now()
|
||||
|
||||
var err error
|
||||
srcDir := tb.path
|
||||
srcDir, err = filepath.Abs(srcDir)
|
||||
@@ -1602,8 +1599,6 @@ func (tb *Table) MustCreateSnapshotAt(dstDir string) {
|
||||
}
|
||||
|
||||
fs.MustSyncPathAndParentDir(dstDir)
|
||||
|
||||
logger.Infof("created Table snapshot of %q at %q in %.3f seconds", srcDir, dstDir, time.Since(startTime).Seconds())
|
||||
}
|
||||
|
||||
func mustWritePartNames(pws []*partWrapper, dstDir string) {
|
||||
|
||||
15
lib/mergeset/table_legacy.go
Normal file
15
lib/mergeset/table_legacy.go
Normal file
@@ -0,0 +1,15 @@
|
||||
package mergeset
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
// LegacyMustCreateSnapshotAt is used for creating snapshots for legacy IndexDBs.
|
||||
func (tb *Table) LegacyMustCreateSnapshotAt(dstDir string) {
|
||||
logger.Infof("creating legacy IndexDB snapshot of %q...", tb.path)
|
||||
startTime := time.Now()
|
||||
tb.MustCreateSnapshotAt(dstDir)
|
||||
logger.Infof("created legacy IndexDB snapshot of %q at %q in %.3f seconds", tb.path, dstDir, time.Since(startTime).Seconds())
|
||||
}
|
||||
@@ -262,6 +262,7 @@ func (cfg *Config) getJobNames() []string {
|
||||
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/
|
||||
type GlobalConfig struct {
|
||||
LabelLimit int `yaml:"label_limit,omitempty"`
|
||||
SampleLimit int `yaml:"sample_limit,omitempty"`
|
||||
ScrapeInterval *promutil.Duration `yaml:"scrape_interval,omitempty"`
|
||||
ScrapeTimeout *promutil.Duration `yaml:"scrape_timeout,omitempty"`
|
||||
ExternalLabels *promutil.Labels `yaml:"external_labels,omitempty"`
|
||||
@@ -955,6 +956,10 @@ func getScrapeWorkConfig(sc *ScrapeConfig, baseDir string, globalCfg *GlobalConf
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse `metric_relabel_configs` for `job_name` %q: %w", jobName, err)
|
||||
}
|
||||
sampleLimit := sc.SampleLimit
|
||||
if sampleLimit <= 0 {
|
||||
sampleLimit = globalCfg.SampleLimit
|
||||
}
|
||||
externalLabels := globalCfg.ExternalLabels
|
||||
noStaleTracking := *noStaleMarkers
|
||||
if sc.NoStaleMarkers != nil {
|
||||
@@ -987,7 +992,7 @@ func getScrapeWorkConfig(sc *ScrapeConfig, baseDir string, globalCfg *GlobalConf
|
||||
externalLabels: externalLabels,
|
||||
relabelConfigs: relabelConfigs,
|
||||
metricRelabelConfigs: metricRelabelConfigs,
|
||||
sampleLimit: sc.SampleLimit,
|
||||
sampleLimit: sampleLimit,
|
||||
labelLimit: labelLimit,
|
||||
disableCompression: disableCompression,
|
||||
disableKeepAlive: sc.DisableKeepAlive,
|
||||
|
||||
@@ -1135,6 +1135,67 @@ scrape_configs:
|
||||
},
|
||||
})
|
||||
|
||||
// Test different precedence levels for sample_limit:
|
||||
// - Job 'a' uses the global limit (100).
|
||||
// - Job 'b' overrides the global limit with its own job-level limit (200).
|
||||
// - Job 'c' uses relabeling to set the final limit via the __sample_limit__ label (500), overriding the job-level limit.
|
||||
f(`
|
||||
global:
|
||||
sample_limit: 100
|
||||
scrape_configs:
|
||||
- job_name: a
|
||||
static_configs:
|
||||
- targets: ["foo.a:1234"]
|
||||
- job_name: b
|
||||
sample_limit: 200
|
||||
static_configs:
|
||||
- targets: ["foo.b:1234"]
|
||||
- job_name: c
|
||||
sample_limit: 100
|
||||
static_configs:
|
||||
- targets: ["foo.c:1234"]
|
||||
relabel_configs:
|
||||
- target_label: __sample_limit__
|
||||
replacement: 500
|
||||
`, []*ScrapeWork{
|
||||
{
|
||||
ScrapeURL: "http://foo.a:1234/metrics",
|
||||
ScrapeInterval: defaultScrapeInterval,
|
||||
ScrapeTimeout: defaultScrapeTimeout,
|
||||
MaxScrapeSize: maxScrapeSize.N,
|
||||
SampleLimit: 100,
|
||||
Labels: promutil.NewLabelsFromMap(map[string]string{
|
||||
"instance": "foo.a:1234",
|
||||
"job": "a",
|
||||
}),
|
||||
jobNameOriginal: "a",
|
||||
},
|
||||
{
|
||||
ScrapeURL: "http://foo.b:1234/metrics",
|
||||
ScrapeInterval: defaultScrapeInterval,
|
||||
ScrapeTimeout: defaultScrapeTimeout,
|
||||
MaxScrapeSize: maxScrapeSize.N,
|
||||
SampleLimit: 200,
|
||||
Labels: promutil.NewLabelsFromMap(map[string]string{
|
||||
"instance": "foo.b:1234",
|
||||
"job": "b",
|
||||
}),
|
||||
jobNameOriginal: "b",
|
||||
},
|
||||
{
|
||||
ScrapeURL: "http://foo.c:1234/metrics",
|
||||
ScrapeInterval: defaultScrapeInterval,
|
||||
ScrapeTimeout: defaultScrapeTimeout,
|
||||
MaxScrapeSize: maxScrapeSize.N,
|
||||
SampleLimit: 500,
|
||||
Labels: promutil.NewLabelsFromMap(map[string]string{
|
||||
"instance": "foo.c:1234",
|
||||
"job": "c",
|
||||
}),
|
||||
jobNameOriginal: "c",
|
||||
},
|
||||
})
|
||||
|
||||
f(`
|
||||
scrape_configs:
|
||||
- job_name: 'snmp'
|
||||
|
||||
@@ -961,8 +961,9 @@ func isAutoMetric(s string) bool {
|
||||
"scrape_samples_scraped",
|
||||
"scrape_series_added",
|
||||
"scrape_series_current",
|
||||
"scrape_series_limit",
|
||||
"scrape_series_limit_samples_dropped",
|
||||
"scrape_series_limit",
|
||||
"scrape_labels_limit",
|
||||
"scrape_timeout_seconds":
|
||||
return true
|
||||
default:
|
||||
@@ -977,7 +978,7 @@ func isAutoMetric(s string) bool {
|
||||
// sw is used as read-only config source.
|
||||
func (wc *writeRequestCtx) addAutoMetrics(sw *scrapeWork, am *autoMetrics, timestamp int64) {
|
||||
rows := getAutoRows()
|
||||
dst := slicesutil.SetLength(rows.Rows, 11)[:0]
|
||||
dst := slicesutil.SetLength(rows.Rows, 12)[:0]
|
||||
|
||||
dst = appendRow(dst, "scrape_duration_seconds", am.scrapeDurationSeconds, timestamp)
|
||||
dst = appendRow(dst, "scrape_response_size_bytes", float64(am.scrapeResponseSize), timestamp)
|
||||
|
||||
@@ -29,14 +29,16 @@ func TestIsAutoMetric(t *testing.T) {
|
||||
}
|
||||
f("up", true)
|
||||
f("scrape_duration_seconds", true)
|
||||
f("scrape_samples_scraped", true)
|
||||
f("scrape_samples_post_metric_relabeling", true)
|
||||
f("scrape_series_added", true)
|
||||
f("scrape_timeout_seconds", true)
|
||||
f("scrape_response_size_bytes", true)
|
||||
f("scrape_samples_limit", true)
|
||||
f("scrape_samples_post_metric_relabeling", true)
|
||||
f("scrape_samples_scraped", true)
|
||||
f("scrape_series_added", true)
|
||||
f("scrape_series_current", true)
|
||||
f("scrape_series_limit_samples_dropped", true)
|
||||
f("scrape_series_limit", true)
|
||||
f("scrape_series_current", true)
|
||||
f("scrape_labels_limit", true)
|
||||
f("scrape_timeout_seconds", true)
|
||||
|
||||
f("foobar", false)
|
||||
f("exported_up", false)
|
||||
@@ -234,7 +236,7 @@ func testScrapeWorkScrapeInternalSuccess(t *testing.T, streamParse bool) {
|
||||
scrape_timeout_seconds 42 123
|
||||
`, []prompb.MetricMetadata{})
|
||||
f(`
|
||||
# HELP foo This is test metric.
|
||||
# HELP foo This is test metric.
|
||||
# TYPE foo gauge
|
||||
foo{bar="baz",empty_label=""} 34.45 3
|
||||
abc -2
|
||||
@@ -246,7 +248,7 @@ func testScrapeWorkScrapeInternalSuccess(t *testing.T, streamParse bool) {
|
||||
abc -2 123
|
||||
up 1 123
|
||||
scrape_samples_scraped 2 123
|
||||
scrape_response_size_bytes 107 123
|
||||
scrape_response_size_bytes 104 123
|
||||
scrape_duration_seconds 0 123
|
||||
scrape_samples_post_metric_relabeling 2 123
|
||||
scrape_series_added 2 123
|
||||
@@ -532,23 +534,23 @@ func testScrapeWorkScrapeInternalSuccess(t *testing.T, streamParse bool) {
|
||||
`, []prompb.MetricMetadata{})
|
||||
// Scrape failure because of the exceeded LabelLimit
|
||||
f(`
|
||||
foo{bar="baz"} 34.44
|
||||
bar{a="b",c="d",e="f"} -3e4
|
||||
`, &ScrapeWork{
|
||||
foo{bar="baz"} 34.44
|
||||
bar{a="b",c="d",e="f"} -3e4
|
||||
`, &ScrapeWork{
|
||||
StreamParse: streamParse,
|
||||
ScrapeTimeout: time.Second * 42,
|
||||
HonorLabels: true,
|
||||
LabelLimit: 2,
|
||||
}, `
|
||||
up 0 123
|
||||
scrape_samples_scraped 2 123
|
||||
scrape_response_size_bytes 0 123
|
||||
scrape_duration_seconds 0 123
|
||||
scrape_samples_post_metric_relabeling 0 123
|
||||
scrape_series_added 0 123
|
||||
scrape_timeout_seconds 42 123
|
||||
up 0 123
|
||||
scrape_samples_scraped 2 123
|
||||
scrape_response_size_bytes 0 123
|
||||
scrape_duration_seconds 0 123
|
||||
scrape_samples_post_metric_relabeling 0 123
|
||||
scrape_series_added 0 123
|
||||
scrape_timeout_seconds 42 123
|
||||
scrape_labels_limit 2 123
|
||||
`, []prompb.MetricMetadata{})
|
||||
`, []prompb.MetricMetadata{})
|
||||
// Scrape success with the given SeriesLimit.
|
||||
f(`
|
||||
foo{bar="baz"} 34.44
|
||||
|
||||
@@ -14,7 +14,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
||||
)
|
||||
|
||||
// MetricPusher must implement the ability to push the parsed samples and metric metadata to the underlying storage.
|
||||
// MetricPusher must push the parsed samples and metric metadata to the underlying storage.
|
||||
type MetricPusher interface {
|
||||
// PushSample must store a sample with the given args.
|
||||
//
|
||||
@@ -23,7 +23,7 @@ type MetricPusher interface {
|
||||
|
||||
// PushMetricMetadata must store mm.
|
||||
//
|
||||
// The PushMetricMetadata must copy mm contents, wince it becomes invalid after returning from the func.
|
||||
// The PushMetricMetadata must copy mm contents, since it becomes invalid after returning from the func.
|
||||
PushMetricMetadata(mm *MetricMetadata)
|
||||
}
|
||||
|
||||
|
||||
@@ -7,8 +7,8 @@ import (
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/snappy"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ioutil"
|
||||
@@ -40,7 +40,7 @@ func Parse(r io.Reader, isVMRemoteWrite bool, callback func(tss []prompb.TimeSer
|
||||
defer bodyBufferPool.Put(bb)
|
||||
var err error
|
||||
if isVMRemoteWrite {
|
||||
bb.B, err = zstd.DecompressLimited(bb.B[:0], ctx.reqBuf.B, maxInsertRequestSize.IntN())
|
||||
bb.B, err = encoding.DecompressZSTDLimited(bb.B[:0], ctx.reqBuf.B, maxInsertRequestSize.IntN())
|
||||
if err != nil {
|
||||
// Fall back to Snappy decompression, since vmagent may send snappy-encoded messages
|
||||
// with 'Content-Encoding: zstd' header if they were put into persistent queue before vmagent restart.
|
||||
@@ -66,7 +66,7 @@ func Parse(r io.Reader, isVMRemoteWrite bool, callback func(tss []prompb.TimeSer
|
||||
// The logic is preserved for backwards compatibility.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/8650
|
||||
snappyErr := err
|
||||
bb.B, err = zstd.DecompressLimited(bb.B[:0], ctx.reqBuf.B, maxInsertRequestSize.IntN())
|
||||
bb.B, err = encoding.DecompressZSTDLimited(bb.B[:0], ctx.reqBuf.B, maxInsertRequestSize.IntN())
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot decompress snappy-encoded request with length %d: %w", len(ctx.reqBuf.B), snappyErr)
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"github.com/klauspost/compress/zlib"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/snappy"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
@@ -29,18 +30,18 @@ const maxSnappyBlockSize = 56_000_000
|
||||
// The maxDataSize limits the maximum data size, which can be read from r.
|
||||
//
|
||||
// The callback must not hold references to the data after returning.
|
||||
func ReadUncompressedData(r io.Reader, encoding string, maxDataSize *flagutil.Bytes, callback func(data []byte) error) error {
|
||||
func ReadUncompressedData(r io.Reader, contentType string, maxDataSize *flagutil.Bytes, callback func(data []byte) error) error {
|
||||
wcr := writeconcurrencylimiter.GetReader(r)
|
||||
defer writeconcurrencylimiter.PutReader(wcr)
|
||||
|
||||
if encoding == "zstd" {
|
||||
// Fast path for zstd encoding - read the data in full and then decompress it by a single call.
|
||||
if contentType == "zstd" {
|
||||
// Fast path for zstd contentType - read the data in full and then decompress it by a single call.
|
||||
dcompress := func(dst, src []byte) ([]byte, error) {
|
||||
return zstd.DecompressLimited(dst, src, maxDataSize.IntN())
|
||||
return encoding.DecompressZSTDLimited(dst, src, maxDataSize.IntN())
|
||||
}
|
||||
return readUncompressedData(wcr, maxDataSize, dcompress, callback)
|
||||
}
|
||||
if encoding == "snappy" {
|
||||
if contentType == "snappy" {
|
||||
// Special case for snappy. The snappy data must be read in full and then decompressed,
|
||||
// since streaming snappy encoding is incompatible with block snappy encoding.
|
||||
decompress := func(dst, src []byte) ([]byte, error) {
|
||||
@@ -50,7 +51,7 @@ func ReadUncompressedData(r io.Reader, encoding string, maxDataSize *flagutil.By
|
||||
}
|
||||
|
||||
// Slow path for other supported protocol encoders.
|
||||
reader, err := GetUncompressedReader(wcr, encoding)
|
||||
reader, err := GetUncompressedReader(wcr, contentType)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -114,11 +115,11 @@ var (
|
||||
decompressedBufPool bytesutil.ByteBufferPool
|
||||
)
|
||||
|
||||
// GetUncompressedReader returns uncompressed reader for r and the given encoding
|
||||
// GetUncompressedReader returns uncompressed reader for r and the given contentType
|
||||
//
|
||||
// The returned reader must be passed to PutUncompressedReader when no longer needed.
|
||||
func GetUncompressedReader(r io.Reader, encoding string) (io.Reader, error) {
|
||||
switch encoding {
|
||||
func GetUncompressedReader(r io.Reader, contentType string) (io.Reader, error) {
|
||||
switch contentType {
|
||||
case "zstd":
|
||||
return zstd.GetReader(r), nil
|
||||
case "snappy":
|
||||
@@ -132,7 +133,7 @@ func GetUncompressedReader(r io.Reader, encoding string) (io.Reader, error) {
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8649
|
||||
return getPlainReader(r), nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported encoding: %s", encoding)
|
||||
return nil, fmt.Errorf("unsupported contentType: %s", contentType)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/filestream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs/fsutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
@@ -146,17 +147,17 @@ func (bsr *blockStreamReader) MustInitFromFilePart(path string) {
|
||||
// Open part files in parallel in order to speed up this operation
|
||||
// on high-latency storage systems such as NFS or Ceph.
|
||||
|
||||
var pfo filestream.ParallelFileOpener
|
||||
var pe fsutil.ParallelExecutor
|
||||
|
||||
timestampsPath := filepath.Join(path, timestampsFilename)
|
||||
valuesPath := filepath.Join(path, valuesFilename)
|
||||
indexPath := filepath.Join(path, indexFilename)
|
||||
|
||||
pfo.Add(timestampsPath, &bsr.timestampsReader, true)
|
||||
pfo.Add(valuesPath, &bsr.valuesReader, true)
|
||||
pfo.Add(indexPath, &bsr.indexReader, true)
|
||||
pe.Add(filestream.NewFileOpenerTask(timestampsPath, &bsr.timestampsReader, true))
|
||||
pe.Add(filestream.NewFileOpenerTask(valuesPath, &bsr.valuesReader, true))
|
||||
pe.Add(filestream.NewFileOpenerTask(indexPath, &bsr.indexReader, true))
|
||||
|
||||
pfo.Run()
|
||||
pe.Run()
|
||||
}
|
||||
|
||||
// MustClose closes the bsr.
|
||||
@@ -165,12 +166,11 @@ func (bsr *blockStreamReader) MustInitFromFilePart(path string) {
|
||||
func (bsr *blockStreamReader) MustClose() {
|
||||
// Close files in parallel in order to speed up this process on storage systems with high latency
|
||||
// such as NFS or Ceph.
|
||||
cs := []fs.MustCloser{
|
||||
bsr.timestampsReader,
|
||||
bsr.valuesReader,
|
||||
bsr.indexReader,
|
||||
}
|
||||
fs.MustCloseParallel(cs)
|
||||
var pe fsutil.ParallelExecutor
|
||||
pe.Add(fs.NewCloserTask(bsr.timestampsReader))
|
||||
pe.Add(fs.NewCloserTask(bsr.valuesReader))
|
||||
pe.Add(fs.NewCloserTask(bsr.indexReader))
|
||||
pe.Run()
|
||||
|
||||
bsr.reset()
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/filestream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs/fsutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
@@ -92,22 +93,22 @@ func (bsw *blockStreamWriter) MustInitFromFilePart(path string, nocache bool, co
|
||||
// Create part files in the directory in parallel in order to reduce the duration
|
||||
// of the operation on high-latency storage systems such as NFS and Ceph.
|
||||
|
||||
var pfc filestream.ParallelFileCreator
|
||||
var pe fsutil.ParallelExecutor
|
||||
|
||||
timestampsPath := filepath.Join(path, timestampsFilename)
|
||||
valuesPath := filepath.Join(path, valuesFilename)
|
||||
indexPath := filepath.Join(path, indexFilename)
|
||||
metaindexPath := filepath.Join(path, metaindexFilename)
|
||||
|
||||
pfc.Add(timestampsPath, &bsw.timestampsWriter, nocache)
|
||||
pfc.Add(valuesPath, &bsw.valuesWriter, nocache)
|
||||
pfc.Add(indexPath, &bsw.indexWriter, nocache)
|
||||
pe.Add(filestream.NewFileCreatorTask(timestampsPath, &bsw.timestampsWriter, nocache))
|
||||
pe.Add(filestream.NewFileCreatorTask(valuesPath, &bsw.valuesWriter, nocache))
|
||||
pe.Add(filestream.NewFileCreatorTask(indexPath, &bsw.indexWriter, nocache))
|
||||
|
||||
// Always cache metaindex file in OS page cache, since it is immediately
|
||||
// read after the merge.
|
||||
pfc.Add(metaindexPath, &bsw.metaindexWriter, false)
|
||||
pe.Add(filestream.NewFileCreatorTask(metaindexPath, &bsw.metaindexWriter, false))
|
||||
|
||||
pfc.Run()
|
||||
pe.Run()
|
||||
}
|
||||
|
||||
// MustClose closes the bsw.
|
||||
@@ -123,13 +124,12 @@ func (bsw *blockStreamWriter) MustClose() {
|
||||
|
||||
// Close writers in parallel in order to reduce the time needed for closing them
|
||||
// on high-latency storage systems such as NFS or Ceph.
|
||||
cs := []fs.MustCloser{
|
||||
bsw.timestampsWriter,
|
||||
bsw.valuesWriter,
|
||||
bsw.indexWriter,
|
||||
bsw.metaindexWriter,
|
||||
}
|
||||
fs.MustCloseParallel(cs)
|
||||
var pe fsutil.ParallelExecutor
|
||||
pe.Add(fs.NewCloserTask(bsw.timestampsWriter))
|
||||
pe.Add(fs.NewCloserTask(bsw.valuesWriter))
|
||||
pe.Add(fs.NewCloserTask(bsw.indexWriter))
|
||||
pe.Add(fs.NewCloserTask(bsw.metaindexWriter))
|
||||
pe.Run()
|
||||
|
||||
bsw.reset()
|
||||
}
|
||||
|
||||
@@ -4,97 +4,104 @@ import (
|
||||
"sort"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/uint64set"
|
||||
)
|
||||
|
||||
var maxDateMetricIDCacheSize uint64
|
||||
|
||||
// SetDateMetricIDCacheSize overrides the default size of dateMetricIDCache
|
||||
func SetDateMetricIDCacheSize(size int) {
|
||||
maxDateMetricIDCacheSize = uint64(size)
|
||||
}
|
||||
|
||||
func getDateMetricIDCacheSize() uint64 {
|
||||
if maxDateMetricIDCacheSize <= 0 {
|
||||
return uint64(float64(memory.Allowed()) / 256)
|
||||
}
|
||||
return maxDateMetricIDCacheSize
|
||||
}
|
||||
|
||||
// dateMetricIDCache is fast cache for holding (date, metricID) entries.
|
||||
// dateMetricIDCache stores (date, metricIDs) entries that have been added to
|
||||
// the index. It is used during data ingestion to decide whether a new entry
|
||||
// needs to be added to the per-day index.
|
||||
//
|
||||
// It should be faster than map[date]*uint64set.Set on multicore systems.
|
||||
type dateMetricIDCache struct {
|
||||
// Contains immutable map
|
||||
byDate atomic.Pointer[byDateMetricIDMap]
|
||||
// Contains immutable (date, metricIDs) entries.
|
||||
curr atomic.Pointer[byDateMetricIDMap]
|
||||
|
||||
// Contains mutable map protected by mu
|
||||
byDateMutable *byDateMetricIDMap
|
||||
|
||||
// Contains the number of slow accesses to byDateMutable.
|
||||
// Is used for deciding when to merge byDateMutable to byDate.
|
||||
// Contains immutable (date, metricIDs) entries that used to be current
|
||||
// before cache rotation. It is used to implement periodic cache clean-up.
|
||||
// Protected by mu.
|
||||
prev *byDateMetricIDMap
|
||||
|
||||
// Contains mutable (date metricIDs) entries that either have been added to
|
||||
// the cache recently or migrated from prev. Protected by mu.
|
||||
next *byDateMetricIDMap
|
||||
|
||||
// Contains the number of slow accesses to next. Is used for deciding when
|
||||
// to merge next to curr. Protected by mu.
|
||||
slowHits int
|
||||
|
||||
// Protected by mu.
|
||||
// Contains the number times the next was merged into curr. Protected by mu.
|
||||
syncsCount uint64
|
||||
|
||||
// Protected by mu.
|
||||
resetsCount uint64
|
||||
// Contains the number times the cache has been rotated. Protected by mu.
|
||||
rotationsCount uint64
|
||||
|
||||
mu sync.Mutex
|
||||
|
||||
stopCh chan struct{}
|
||||
rotationStoppedCh chan struct{}
|
||||
}
|
||||
|
||||
func newDateMetricIDCache() *dateMetricIDCache {
|
||||
var dmc dateMetricIDCache
|
||||
dmc.resetLocked()
|
||||
dmc := dateMetricIDCache{
|
||||
prev: newByDateMetricIDMap(),
|
||||
next: newByDateMetricIDMap(),
|
||||
stopCh: make(chan struct{}),
|
||||
rotationStoppedCh: make(chan struct{}),
|
||||
}
|
||||
dmc.curr.Store(newByDateMetricIDMap())
|
||||
go dmc.startRotation()
|
||||
return &dmc
|
||||
}
|
||||
|
||||
func (dmc *dateMetricIDCache) resetLocked() {
|
||||
// Do not reset syncsCount and resetsCount
|
||||
dmc.byDate.Store(newByDateMetricIDMap())
|
||||
dmc.byDateMutable = newByDateMetricIDMap()
|
||||
dmc.slowHits = 0
|
||||
|
||||
dmc.resetsCount++
|
||||
func (dmc *dateMetricIDCache) MustStop() {
|
||||
close(dmc.stopCh)
|
||||
<-dmc.rotationStoppedCh
|
||||
}
|
||||
|
||||
type dateMetricIDCacheStats struct {
|
||||
Size uint64
|
||||
SizeBytes uint64
|
||||
SizeMaxBytes uint64
|
||||
ResetsCount uint64
|
||||
SyncsCount uint64
|
||||
Size uint64
|
||||
SizeBytes uint64
|
||||
SyncsCount uint64
|
||||
RotationsCount uint64
|
||||
}
|
||||
|
||||
func (dmc *dateMetricIDCache) Stats() dateMetricIDCacheStats {
|
||||
s := dateMetricIDCacheStats{
|
||||
SizeMaxBytes: getDateMetricIDCacheSize(),
|
||||
}
|
||||
|
||||
dmc.mu.Lock()
|
||||
defer dmc.mu.Unlock()
|
||||
|
||||
for _, metricIDs := range dmc.byDate.Load().m {
|
||||
s.Size += uint64(metricIDs.Len())
|
||||
s.SizeBytes += metricIDs.SizeBytes()
|
||||
var s dateMetricIDCacheStats
|
||||
for _, metricIDs := range dmc.curr.Load().m {
|
||||
if metricIDs.Len() > 0 {
|
||||
// empty uint64set.Set still occupies a few bytes. Ignore them.
|
||||
s.Size += uint64(metricIDs.Len())
|
||||
s.SizeBytes += metricIDs.SizeBytes()
|
||||
}
|
||||
}
|
||||
for _, metricIDs := range dmc.byDateMutable.m {
|
||||
s.Size += uint64(metricIDs.Len())
|
||||
s.SizeBytes += metricIDs.SizeBytes()
|
||||
for _, metricIDs := range dmc.prev.m {
|
||||
if metricIDs.Len() > 0 {
|
||||
s.Size += uint64(metricIDs.Len())
|
||||
s.SizeBytes += metricIDs.SizeBytes()
|
||||
}
|
||||
}
|
||||
for _, metricIDs := range dmc.next.m {
|
||||
if metricIDs.Len() > 0 {
|
||||
s.Size += uint64(metricIDs.Len())
|
||||
s.SizeBytes += metricIDs.SizeBytes()
|
||||
}
|
||||
}
|
||||
|
||||
s.ResetsCount = dmc.resetsCount
|
||||
s.SyncsCount = dmc.syncsCount
|
||||
s.RotationsCount = dmc.rotationsCount
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
func (dmc *dateMetricIDCache) Has(date, metricID uint64) bool {
|
||||
if byDate := dmc.byDate.Load(); byDate.get(date).Has(metricID) {
|
||||
curr := dmc.curr.Load()
|
||||
vCurr := curr.get(date)
|
||||
if vCurr.Has(metricID) {
|
||||
// Fast path. The majority of calls must go here.
|
||||
return true
|
||||
}
|
||||
@@ -109,19 +116,29 @@ func (dmc *dateMetricIDCache) hasSlow(date, metricID uint64) bool {
|
||||
|
||||
// First, check immutable map again because the entry may have been moved to
|
||||
// the immutable map by the time the caller acquires the lock.
|
||||
byDate := dmc.byDate.Load()
|
||||
v := byDate.get(date)
|
||||
if v.Has(metricID) {
|
||||
curr := dmc.curr.Load()
|
||||
vCurr := curr.get(date)
|
||||
if vCurr.Has(metricID) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Then check mutable map.
|
||||
vMutable := dmc.byDateMutable.get(date)
|
||||
ok := vMutable.Has(metricID)
|
||||
// Then check next and prev.
|
||||
vNext := dmc.next.getOrCreate(date)
|
||||
ok := vNext.Has(metricID)
|
||||
if !ok {
|
||||
vPrev := dmc.prev.get(date)
|
||||
ok = vPrev.Has(metricID)
|
||||
if ok {
|
||||
// The metricID is in prev but is still in use. Migrate it to next.
|
||||
vNext.Add(metricID)
|
||||
}
|
||||
}
|
||||
|
||||
if ok {
|
||||
dmc.slowHits++
|
||||
if dmc.slowHits > (v.Len()+vMutable.Len())/2 {
|
||||
// It is cheaper to merge byDateMutable into byDate than to pay inter-cpu sync costs when accessing vMutable.
|
||||
if dmc.slowHits > (vCurr.Len()+vNext.Len())/2 {
|
||||
// It is cheaper to merge next into curr than to pay inter-cpu sync
|
||||
// costs when accessing next.
|
||||
dmc.syncLocked()
|
||||
dmc.slowHits = 0
|
||||
}
|
||||
@@ -131,48 +148,50 @@ func (dmc *dateMetricIDCache) hasSlow(date, metricID uint64) bool {
|
||||
|
||||
func (dmc *dateMetricIDCache) Set(date, metricID uint64) {
|
||||
dmc.mu.Lock()
|
||||
v := dmc.byDateMutable.getOrCreate(date)
|
||||
v := dmc.next.getOrCreate(date)
|
||||
v.Add(metricID)
|
||||
dmc.mu.Unlock()
|
||||
}
|
||||
|
||||
func (dmc *dateMetricIDCache) syncLocked() {
|
||||
if len(dmc.byDateMutable.m) == 0 {
|
||||
if len(dmc.next.m) == 0 {
|
||||
// Nothing to sync.
|
||||
return
|
||||
}
|
||||
|
||||
// Merge data from byDate into byDateMutable and then atomically replace byDate with the merged data.
|
||||
byDate := dmc.byDate.Load()
|
||||
byDateMutable := dmc.byDateMutable
|
||||
byDateMutable.hotEntry.Store(nil)
|
||||
// Merge data from curr into next and then atomically replace curr with the
|
||||
// merged data.
|
||||
curr := dmc.curr.Load()
|
||||
next := dmc.next
|
||||
next.hotEntry.Store(nil)
|
||||
|
||||
keepDatesMap := make(map[uint64]struct{}, len(byDateMutable.m))
|
||||
for date, metricIDsMutable := range byDateMutable.m {
|
||||
keepDatesMap := make(map[uint64]struct{}, len(next.m))
|
||||
for date, vNext := range next.m {
|
||||
keepDatesMap[date] = struct{}{}
|
||||
metricIDs := byDate.get(date)
|
||||
if metricIDs == nil {
|
||||
vCurr := curr.get(date)
|
||||
if vCurr == nil {
|
||||
// Nothing to merge
|
||||
continue
|
||||
}
|
||||
metricIDs = metricIDs.Clone()
|
||||
metricIDs.Union(metricIDsMutable)
|
||||
byDateMutable.m[date] = metricIDs
|
||||
vCurr = vCurr.Clone()
|
||||
vCurr.Union(vNext)
|
||||
next.m[date] = vCurr
|
||||
}
|
||||
|
||||
// Copy entries from byDate, which are missing in byDateMutable
|
||||
allDatesMap := make(map[uint64]struct{}, len(byDate.m))
|
||||
for date, metricIDs := range byDate.m {
|
||||
// Copy entries from curr, which are missing in next
|
||||
allDatesMap := make(map[uint64]struct{}, len(curr.m))
|
||||
for date, vCurr := range curr.m {
|
||||
allDatesMap[date] = struct{}{}
|
||||
v := byDateMutable.get(date)
|
||||
if v != nil {
|
||||
vNext := next.get(date)
|
||||
if vNext != nil {
|
||||
continue
|
||||
}
|
||||
byDateMutable.m[date] = metricIDs
|
||||
next.m[date] = vCurr
|
||||
}
|
||||
|
||||
if len(byDateMutable.m) > 2 {
|
||||
// Keep only entries for the last two dates from allDatesMap plus all the entries for byDateMutable.
|
||||
if len(next.m) > 2 {
|
||||
// Keep only entries for the last two dates from allDatesMap plus all
|
||||
// the entries for next.
|
||||
dates := make([]uint64, 0, len(allDatesMap))
|
||||
for date := range allDatesMap {
|
||||
dates = append(dates, date)
|
||||
@@ -186,29 +205,46 @@ func (dmc *dateMetricIDCache) syncLocked() {
|
||||
for _, date := range dates {
|
||||
keepDatesMap[date] = struct{}{}
|
||||
}
|
||||
for date := range byDateMutable.m {
|
||||
for date := range next.m {
|
||||
if _, ok := keepDatesMap[date]; !ok {
|
||||
delete(byDateMutable.m, date)
|
||||
delete(next.m, date)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var sizeBytes uint64
|
||||
for _, v := range dmc.byDateMutable.m {
|
||||
sizeBytes += v.SizeBytes()
|
||||
}
|
||||
|
||||
// Atomically replace byDate with byDateMutable
|
||||
dmc.byDate.Store(dmc.byDateMutable)
|
||||
dmc.byDateMutable = newByDateMetricIDMap()
|
||||
// Atomically replace curr with next.
|
||||
dmc.curr.Store(dmc.next)
|
||||
dmc.next = newByDateMetricIDMap()
|
||||
|
||||
dmc.syncsCount++
|
||||
}
|
||||
|
||||
if sizeBytes > getDateMetricIDCacheSize() {
|
||||
dmc.resetLocked()
|
||||
func (dmc *dateMetricIDCache) startRotation() {
|
||||
d := timeutil.AddJitterToDuration(10 * time.Minute)
|
||||
ticker := time.NewTicker(d)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-dmc.stopCh:
|
||||
close(dmc.rotationStoppedCh)
|
||||
return
|
||||
case <-ticker.C:
|
||||
dmc.rotate()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// rotate atomically rotates next, curr, and prev cache parts.
|
||||
func (dmc *dateMetricIDCache) rotate() {
|
||||
dmc.mu.Lock()
|
||||
defer dmc.mu.Unlock()
|
||||
curr := dmc.curr.Load()
|
||||
dmc.prev = curr
|
||||
dmc.curr.Store(dmc.next)
|
||||
dmc.next = newByDateMetricIDMap()
|
||||
dmc.rotationsCount++
|
||||
}
|
||||
|
||||
// dateMetricIDs holds the date and corresponding metricIDs together and is used
|
||||
// for implementing hot entry fast path in byDateMetricIDMap.
|
||||
type dateMetricIDs struct {
|
||||
|
||||
@@ -6,12 +6,12 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/uint64set"
|
||||
)
|
||||
|
||||
func TestDateMetricIDCacheSerial(t *testing.T) {
|
||||
c := newDateMetricIDCache()
|
||||
defer c.MustStop()
|
||||
if err := testDateMetricIDCache(c, false); err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
@@ -19,6 +19,7 @@ func TestDateMetricIDCacheSerial(t *testing.T) {
|
||||
|
||||
func TestDateMetricIDCacheConcurrent(t *testing.T) {
|
||||
c := newDateMetricIDCache()
|
||||
defer c.MustStop()
|
||||
ch := make(chan error, 5)
|
||||
for i := 0; i < 5; i++ {
|
||||
go func() {
|
||||
@@ -63,9 +64,9 @@ func testDateMetricIDCache(c *dateMetricIDCache, concurrent bool) error {
|
||||
c.mu.Unlock()
|
||||
}
|
||||
if i%34323 == 0 {
|
||||
c.mu.Lock()
|
||||
c.resetLocked()
|
||||
c.mu.Unlock()
|
||||
// Two rotations are needed to clear the cache.
|
||||
c.rotate()
|
||||
c.rotate()
|
||||
m = make(map[dmk]bool)
|
||||
}
|
||||
}
|
||||
@@ -87,13 +88,15 @@ func testDateMetricIDCache(c *dateMetricIDCache, concurrent bool) error {
|
||||
}
|
||||
}
|
||||
|
||||
// Verify c.Reset
|
||||
// Verify that cache becomes empty after two rotations.
|
||||
if n := c.Stats().Size; !concurrent && n < 123 {
|
||||
return fmt.Errorf("c.EntriesCount must return at least 123; returned %d", n)
|
||||
}
|
||||
c.mu.Lock()
|
||||
c.resetLocked()
|
||||
c.mu.Unlock()
|
||||
c.rotate()
|
||||
if n := c.Stats().Size; !concurrent && n < 123 {
|
||||
return fmt.Errorf("c.EntriesCount must return at least 123; returned %d", n)
|
||||
}
|
||||
c.rotate()
|
||||
if n := c.Stats().Size; !concurrent && n > 0 {
|
||||
return fmt.Errorf("c.EntriesCount must return 0 after reset; returned %d", n)
|
||||
}
|
||||
@@ -108,6 +111,7 @@ func TestDateMetricIDCacheIsConsistent(_ *testing.T) {
|
||||
numMetrics = 100000
|
||||
)
|
||||
dmc := newDateMetricIDCache()
|
||||
defer dmc.MustStop()
|
||||
var wg sync.WaitGroup
|
||||
for i := range concurrency {
|
||||
wg.Add(1)
|
||||
@@ -124,39 +128,9 @@ func TestDateMetricIDCacheIsConsistent(_ *testing.T) {
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func TestDateMetricIDCache_SizeMaxBytes(t *testing.T) {
|
||||
defer SetDateMetricIDCacheSize(0)
|
||||
|
||||
assertSizeMaxBytes := func(dmc *dateMetricIDCache, want uint64) {
|
||||
t.Helper()
|
||||
if got := dmc.Stats().SizeMaxBytes; got != want {
|
||||
t.Fatalf("unexpected sizeMaxBytes: got %d, want %d", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
defaultSizeMaxBytes := uint64(float64(memory.Allowed()) / 256)
|
||||
var dmc *dateMetricIDCache
|
||||
|
||||
// Default.
|
||||
dmc = newDateMetricIDCache()
|
||||
assertSizeMaxBytes(dmc, defaultSizeMaxBytes)
|
||||
|
||||
// Overriden.
|
||||
SetDateMetricIDCacheSize(1024)
|
||||
dmc = newDateMetricIDCache()
|
||||
assertSizeMaxBytes(dmc, 1024)
|
||||
|
||||
// Overriden at runtime.
|
||||
SetDateMetricIDCacheSize(2048)
|
||||
assertSizeMaxBytes(dmc, 2048)
|
||||
|
||||
// Reset to default at runtime.
|
||||
SetDateMetricIDCacheSize(0)
|
||||
assertSizeMaxBytes(dmc, defaultSizeMaxBytes)
|
||||
}
|
||||
|
||||
func TestDateMetricIDCache_Size(t *testing.T) {
|
||||
dmc := newDateMetricIDCache()
|
||||
defer dmc.MustStop()
|
||||
for i := range 100_000 {
|
||||
date := 12345 + uint64(i%30)
|
||||
metricID := uint64(i)
|
||||
@@ -182,6 +156,7 @@ func TestDateMetricIDCache_Size(t *testing.T) {
|
||||
|
||||
func TestDateMetricIDCache_SizeBytes(t *testing.T) {
|
||||
dmc := newDateMetricIDCache()
|
||||
defer dmc.MustStop()
|
||||
metricIDs := &uint64set.Set{}
|
||||
for i := range 100_000 {
|
||||
date := uint64(123)
|
||||
|
||||
@@ -14,9 +14,9 @@ const (
|
||||
tsidCacheFilename = "metricName_tsid"
|
||||
metricIDCacheFilename = "metricID_tsid"
|
||||
metricNameCacheFilename = "metricID_metricName"
|
||||
prevHourMetricIDsFilename = "prev_hour_metric_ids"
|
||||
currHourMetricIDsFilename = "curr_hour_metric_ids"
|
||||
nextDayMetricIDsFilename = "next_day_metric_ids_v2"
|
||||
prevHourMetricIDsFilename = "prev_hour_metric_ids_v2"
|
||||
currHourMetricIDsFilename = "curr_hour_metric_ids_v2"
|
||||
nextDayMetricIDsFilename = "next_day_metric_ids_v3"
|
||||
metricNameTrackerFilename = "metric_usage_tracker"
|
||||
)
|
||||
|
||||
|
||||
@@ -6,10 +6,8 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
@@ -23,7 +21,6 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/lrucache"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
@@ -74,12 +71,6 @@ const (
|
||||
|
||||
// indexDB represents an index db.
|
||||
type indexDB struct {
|
||||
// The number of references to indexDB struct.
|
||||
refCount atomic.Int32
|
||||
|
||||
// if the mustDrop is set to true, then the indexDB must be dropped after refCount reaches zero.
|
||||
mustDrop atomic.Bool
|
||||
|
||||
// The number of calls for date range searches.
|
||||
dateRangeSearchCalls atomic.Uint64
|
||||
|
||||
@@ -98,21 +89,24 @@ type indexDB struct {
|
||||
// The db must be automatically recovered after that.
|
||||
missingMetricNamesForMetricID atomic.Uint64
|
||||
|
||||
// minMissingTimestampByKey holds the minimum timestamps by index search key,
|
||||
// legacyMinMissingTimestampByKey holds the minimum timestamps by index search key,
|
||||
// which is missing in the given indexDB.
|
||||
// Key must be formed with marshalCommonPrefix function.
|
||||
//
|
||||
// This field is used at containsTimeRange() function only for the previous indexDB,
|
||||
// since this indexDB is readonly.
|
||||
// This field cannot be used for the current indexDB, since it may receive data
|
||||
// This field is used at legacyContainsTimeRange() function only for the
|
||||
// legacy indexDBs, since these indexDBs are readonly.
|
||||
// This field cannot be used for the partition indexDBs, since they may receive data
|
||||
// with bigger timestamps at any time.
|
||||
minMissingTimestampByKey map[string]int64
|
||||
// protects minMissingTimestampByKey
|
||||
minMissingTimestampByKeyLock sync.Mutex
|
||||
legacyMinMissingTimestampByKey map[string]int64
|
||||
// protects legacyMinMissingTimestampByKey
|
||||
legacyMinMissingTimestampByKeyLock sync.Mutex
|
||||
|
||||
// generation identifies the index generation ID
|
||||
// and is used for syncing items from different indexDBs
|
||||
generation uint64
|
||||
// id identifies the indexDB. It is used for in various caches to know which
|
||||
// indexDB contains a metricID and which does not.
|
||||
id uint64
|
||||
|
||||
// Time range covered by this IndexDB.
|
||||
tr TimeRange
|
||||
|
||||
name string
|
||||
tb *mergeset.Table
|
||||
@@ -123,10 +117,6 @@ type indexDB struct {
|
||||
|
||||
// noRegisterNewSeries indicates whether the indexDB receives new entries or
|
||||
// not.
|
||||
//
|
||||
// Note that setting this field to true won't disable registering new
|
||||
// index entries (should they arrive). It is solely used to decide whether
|
||||
// the containsTimeRange() optimization can be applied to the indexDB.
|
||||
noRegisterNewSeries atomic.Bool
|
||||
|
||||
// Cache for fast TagFilters -> MetricIDs lookup.
|
||||
@@ -136,11 +126,25 @@ type indexDB struct {
|
||||
// the amount of work when matching a set of filters.
|
||||
loopsPerDateTagFilterCache *lrucache.Cache
|
||||
|
||||
// A cache that stores metricIDs that have been added to the index.
|
||||
// The cache is not populated on startup nor does it store a complete set of
|
||||
// metricIDs. A metricID is added to the cache either when a new entry is
|
||||
// added to the global index or when the global index is searched for
|
||||
// existing metricID (see is.createGlobalIndexes() and is.hasMetricID()).
|
||||
//
|
||||
// The cache is used solely for creating new index entries during the data
|
||||
// ingestion (see Storage.RegisterMetricNames() and Storage.add())
|
||||
metricIDCache *metricIDCache
|
||||
|
||||
// dateMetricIDCache is (date, metricID) cache that is used to speed up the
|
||||
// data ingestion by storing the is.hasDateMetricID() search results in
|
||||
// memory.
|
||||
dateMetricIDCache *dateMetricIDCache
|
||||
|
||||
// An inmemory set of deleted metricIDs.
|
||||
deletedMetricIDs atomic.Pointer[uint64set.Set]
|
||||
deletedMetricIDsUpdateLock sync.Mutex
|
||||
|
||||
indexSearchPool sync.Pool
|
||||
}
|
||||
|
||||
@@ -162,36 +166,27 @@ func getTagFiltersLoopsCacheSize() uint64 {
|
||||
return uint64(float64(memory.Allowed()) / 128)
|
||||
}
|
||||
|
||||
// mustOpenIndexDB opens index db from the given path.
|
||||
//
|
||||
// The last segment of the path should contain unique hex value which
|
||||
// will be then used as indexDB.generation
|
||||
func mustOpenIndexDB(path string, s *Storage, isReadOnly *atomic.Bool, noRegisterNewSeries bool) *indexDB {
|
||||
func mustOpenIndexDB(id uint64, tr TimeRange, name, path string, s *Storage, isReadOnly *atomic.Bool, noRegisterNewSeries bool) *indexDB {
|
||||
if s == nil {
|
||||
logger.Panicf("BUG: Storage must be non-nil")
|
||||
}
|
||||
|
||||
name := filepath.Base(path)
|
||||
gen, err := strconv.ParseUint(name, 16, 64)
|
||||
if err != nil {
|
||||
logger.Panicf("FATAL: cannot parse indexdb path %q: %s", path, err)
|
||||
logger.Panicf("BUG: Storage must not be nil")
|
||||
}
|
||||
|
||||
tfssCache := lrucache.NewCache(getTagFiltersCacheSize)
|
||||
tb := mergeset.MustOpenTable(path, dataFlushInterval, tfssCache.Reset, mergeTagToMetricIDsRows, isReadOnly)
|
||||
db := &indexDB{
|
||||
generation: gen,
|
||||
tb: tb,
|
||||
name: name,
|
||||
|
||||
minMissingTimestampByKey: make(map[string]int64),
|
||||
tagFiltersToMetricIDsCache: tfssCache,
|
||||
s: s,
|
||||
loopsPerDateTagFilterCache: lrucache.NewCache(getTagFiltersLoopsCacheSize),
|
||||
dateMetricIDCache: newDateMetricIDCache(),
|
||||
legacyMinMissingTimestampByKey: make(map[string]int64),
|
||||
id: id,
|
||||
tr: tr,
|
||||
name: name,
|
||||
tb: tb,
|
||||
s: s,
|
||||
tagFiltersToMetricIDsCache: tfssCache,
|
||||
loopsPerDateTagFilterCache: lrucache.NewCache(getTagFiltersLoopsCacheSize),
|
||||
metricIDCache: newMetricIDCache(),
|
||||
dateMetricIDCache: newDateMetricIDCache(),
|
||||
}
|
||||
db.noRegisterNewSeries.Store(noRegisterNewSeries)
|
||||
db.incRef()
|
||||
db.mustLoadDeletedMetricIDs()
|
||||
return db
|
||||
}
|
||||
|
||||
@@ -206,12 +201,18 @@ type IndexDBMetrics struct {
|
||||
TagFiltersToMetricIDsCacheMisses uint64
|
||||
TagFiltersToMetricIDsCacheResets uint64
|
||||
|
||||
DateMetricIDCacheSize uint64
|
||||
DateMetricIDCacheSizeBytes uint64
|
||||
DateMetricIDCacheSizeMaxBytes uint64
|
||||
DateMetricIDCacheSyncsCount uint64
|
||||
DateMetricIDCacheResetsCount uint64
|
||||
MetricIDCacheSize uint64
|
||||
MetricIDCacheSizeBytes uint64
|
||||
MetricIDCacheSyncsCount uint64
|
||||
MetricIDCacheRotationsCount uint64
|
||||
|
||||
DateMetricIDCacheSize uint64
|
||||
DateMetricIDCacheSizeBytes uint64
|
||||
DateMetricIDCacheSyncsCount uint64
|
||||
DateMetricIDCacheRotationsCount uint64
|
||||
|
||||
// Used by legacy indexDBs only.
|
||||
// See UpdateMetrics() in index_db_legacy.go
|
||||
IndexDBRefCount uint64
|
||||
|
||||
RecentHourMetricIDsSearchCalls uint64
|
||||
@@ -234,10 +235,6 @@ type IndexDBMetrics struct {
|
||||
mergeset.TableMetrics
|
||||
}
|
||||
|
||||
func (db *indexDB) scheduleToDrop() {
|
||||
db.mustDrop.Store(true)
|
||||
}
|
||||
|
||||
// UpdateMetrics updates m with metrics from the db.
|
||||
func (db *indexDB) UpdateMetrics(m *IndexDBMetrics) {
|
||||
// global index metrics
|
||||
@@ -250,7 +247,7 @@ func (db *indexDB) UpdateMetrics(m *IndexDBMetrics) {
|
||||
|
||||
// Report only once and for an indexDB instance whose tagFiltersCache is
|
||||
// utilized the most.
|
||||
if db.tagFiltersToMetricIDsCache.SizeBytes() > m.TagFiltersToMetricIDsCacheSizeBytes {
|
||||
if m.TagFiltersToMetricIDsCacheSizeBytes == 0 || db.tagFiltersToMetricIDsCache.SizeBytes() > m.TagFiltersToMetricIDsCacheSizeBytes {
|
||||
m.TagFiltersToMetricIDsCacheSize = uint64(db.tagFiltersToMetricIDsCache.Len())
|
||||
m.TagFiltersToMetricIDsCacheSizeBytes = db.tagFiltersToMetricIDsCache.SizeBytes()
|
||||
m.TagFiltersToMetricIDsCacheSizeMaxBytes = db.tagFiltersToMetricIDsCache.SizeMaxBytes()
|
||||
@@ -259,17 +256,25 @@ func (db *indexDB) UpdateMetrics(m *IndexDBMetrics) {
|
||||
m.TagFiltersToMetricIDsCacheResets = db.tagFiltersToMetricIDsCache.Resets()
|
||||
}
|
||||
|
||||
// Report only once and for an indexDB instance whose metricIDCache is
|
||||
// utilized the most.
|
||||
mcs := db.metricIDCache.Stats()
|
||||
if m.MetricIDCacheSizeBytes == 0 || mcs.SizeBytes > m.MetricIDCacheSizeBytes {
|
||||
m.MetricIDCacheSize = mcs.Size
|
||||
m.MetricIDCacheSizeBytes = mcs.SizeBytes
|
||||
m.MetricIDCacheSyncsCount = mcs.SyncsCount
|
||||
m.MetricIDCacheRotationsCount = mcs.RotationsCount
|
||||
}
|
||||
|
||||
// Report only once and for an indexDB instance whose dateMetricIDCache is
|
||||
// utilized the most.
|
||||
dmcs := db.dateMetricIDCache.Stats()
|
||||
if dmcs.SizeBytes > m.DateMetricIDCacheSizeBytes {
|
||||
if m.DateMetricIDCacheSizeBytes == 0 || dmcs.SizeBytes > m.DateMetricIDCacheSizeBytes {
|
||||
m.DateMetricIDCacheSize = dmcs.Size
|
||||
m.DateMetricIDCacheSizeBytes = dmcs.SizeBytes
|
||||
m.DateMetricIDCacheSizeMaxBytes = dmcs.SizeMaxBytes
|
||||
m.DateMetricIDCacheSyncsCount = dmcs.SyncsCount
|
||||
m.DateMetricIDCacheResetsCount = dmcs.ResetsCount
|
||||
m.DateMetricIDCacheRotationsCount = dmcs.RotationsCount
|
||||
}
|
||||
m.IndexDBRefCount += uint64(db.refCount.Load())
|
||||
|
||||
m.DateRangeSearchCalls += db.dateRangeSearchCalls.Load()
|
||||
m.DateRangeSearchHits += db.dateRangeSearchHits.Load()
|
||||
@@ -283,23 +288,6 @@ func (db *indexDB) UpdateMetrics(m *IndexDBMetrics) {
|
||||
|
||||
// MustClose closes db.
|
||||
func (db *indexDB) MustClose() {
|
||||
db.decRef()
|
||||
}
|
||||
|
||||
func (db *indexDB) incRef() {
|
||||
db.refCount.Add(1)
|
||||
}
|
||||
|
||||
func (db *indexDB) decRef() {
|
||||
n := db.refCount.Add(-1)
|
||||
if n < 0 {
|
||||
logger.Panicf("BUG: %q negative refCount: %d", db.name, n)
|
||||
}
|
||||
if n > 0 {
|
||||
return
|
||||
}
|
||||
|
||||
tbPath := db.tb.Path()
|
||||
db.tb.MustClose()
|
||||
db.tb = nil
|
||||
db.s = nil
|
||||
@@ -307,17 +295,13 @@ func (db *indexDB) decRef() {
|
||||
// Free space occupied by caches owned by db.
|
||||
db.tagFiltersToMetricIDsCache.MustStop()
|
||||
db.loopsPerDateTagFilterCache.MustStop()
|
||||
db.metricIDCache.MustStop()
|
||||
db.dateMetricIDCache.MustStop()
|
||||
|
||||
db.tagFiltersToMetricIDsCache = nil
|
||||
db.loopsPerDateTagFilterCache = nil
|
||||
|
||||
if !db.mustDrop.Load() {
|
||||
return
|
||||
}
|
||||
|
||||
logger.Infof("dropping indexDB %q", tbPath)
|
||||
fs.MustRemoveDir(tbPath)
|
||||
logger.Infof("indexDB %q has been dropped", tbPath)
|
||||
db.metricIDCache = nil
|
||||
db.dateMetricIDCache = nil
|
||||
}
|
||||
|
||||
// getMetricIDsFromTagFiltersCache retrieves the set of metricIDs that
|
||||
@@ -442,6 +426,13 @@ func generateTSID(dst *TSID, mn *MetricName) {
|
||||
}
|
||||
|
||||
func (db *indexDB) createGlobalIndexes(tsid *TSID, mn *MetricName) {
|
||||
if db.noRegisterNewSeries.Load() {
|
||||
logger.Panicf("BUG: registration of new series is disabled for indexDB %q", db.name)
|
||||
}
|
||||
|
||||
// Add new metricID to cache.
|
||||
db.metricIDCache.Set(tsid.MetricID)
|
||||
|
||||
ii := getIndexItems()
|
||||
defer putIndexItems(ii)
|
||||
|
||||
@@ -598,7 +589,7 @@ func (is *indexSearch) searchLabelNamesWithFiltersOnDate(qt *querytracer.Tracer,
|
||||
ts := &is.ts
|
||||
kb := &is.kb
|
||||
mp := &is.mp
|
||||
dmis := is.db.s.getDeletedMetricIDs()
|
||||
dmis := is.db.getDeletedMetricIDs()
|
||||
loopsPaceLimiter := 0
|
||||
underscoreNameSeen := false
|
||||
nsPrefixExpected := byte(nsPrefixDateTagToMetricIDs)
|
||||
@@ -681,7 +672,7 @@ func (is *indexSearch) getLabelNamesForMetricIDs(qt *querytracer.Tracer, metricI
|
||||
lns["__name__"] = struct{}{}
|
||||
}
|
||||
|
||||
dmis := is.db.s.getDeletedMetricIDs()
|
||||
dmis := is.db.getDeletedMetricIDs()
|
||||
|
||||
var mn MetricName
|
||||
foundLabelNames := 0
|
||||
@@ -874,7 +865,7 @@ func (is *indexSearch) searchLabelValuesOnDate(qt *querytracer.Tracer, labelName
|
||||
ts := &is.ts
|
||||
kb := &is.kb
|
||||
mp := &is.mp
|
||||
dmis := is.db.s.getDeletedMetricIDs()
|
||||
dmis := is.db.getDeletedMetricIDs()
|
||||
loopsPaceLimiter := 0
|
||||
nsPrefixExpected := byte(nsPrefixDateTagToMetricIDs)
|
||||
if date == globalIndexDate {
|
||||
@@ -928,8 +919,7 @@ func (is *indexSearch) getLabelValuesForMetricIDs(qt *querytracer.Tracer, labelN
|
||||
}
|
||||
|
||||
lvs := make(map[string]struct{})
|
||||
dmis := is.db.s.getDeletedMetricIDs()
|
||||
|
||||
dmis := is.db.getDeletedMetricIDs()
|
||||
var mn MetricName
|
||||
foundLabelValues := 0
|
||||
var buf []byte
|
||||
@@ -1055,7 +1045,7 @@ func (is *indexSearch) searchTagValueSuffixesForPrefix(nsPrefix byte, prefix []b
|
||||
kb := &is.kb
|
||||
ts := &is.ts
|
||||
mp := &is.mp
|
||||
dmis := is.db.s.getDeletedMetricIDs()
|
||||
dmis := is.db.getDeletedMetricIDs()
|
||||
loopsPaceLimiter := 0
|
||||
ts.Seek(prefix)
|
||||
tvss := make(map[string]struct{})
|
||||
@@ -1315,7 +1305,7 @@ func (is *indexSearch) getTSDBStatus(qt *querytracer.Tracer, tfss []*TagFilters,
|
||||
ts := &is.ts
|
||||
kb := &is.kb
|
||||
mp := &is.mp
|
||||
dmis := is.db.s.getDeletedMetricIDs()
|
||||
dmis := is.db.getDeletedMetricIDs()
|
||||
thSeriesCountByMetricName := newTopHeap(topN)
|
||||
thSeriesCountByLabelName := newTopHeap(topN)
|
||||
thSeriesCountByFocusLabelValue := newTopHeap(topN)
|
||||
@@ -1580,10 +1570,10 @@ func (db *indexDB) saveDeletedMetricIDs(metricIDs *uint64set.Set) {
|
||||
}
|
||||
|
||||
// atomically add deleted metricIDs to an inmemory map.
|
||||
db.s.updateDeletedMetricIDs(metricIDs)
|
||||
db.updateDeletedMetricIDs(metricIDs)
|
||||
|
||||
// Reset MetricName -> TSID cache, since it may contain deleted TSIDs.
|
||||
db.s.resetAndSaveTSIDCache()
|
||||
// Do not reset tsidCache (MetricName -> TSID), since a given TSID can be
|
||||
// deleted in one indexDB but still be used in another indexDB.
|
||||
|
||||
// Do not reset Storage's metricIDCache (MetricID -> TSID) and
|
||||
// metricNameCache (MetricID -> MetricName) since they must be used only
|
||||
@@ -1615,10 +1605,11 @@ func (db *indexDB) saveDeletedMetricIDs(metricIDs *uint64set.Set) {
|
||||
// required to search metricIDs, but it is used at the stage when it does
|
||||
// not matter whether a metricID is deleted or not.
|
||||
|
||||
// Do not reset dateMetricIDCache. The cache is used during data ingestion
|
||||
// to decide whether a metricID needs to be added to the per-day index and
|
||||
// index records must not be created for deleted metricIDs. But presence of
|
||||
// deleted metricID in this cache will not lead to an index record creation.
|
||||
// Do not reset metricIDCache and dateMetricIDCache. These caches are used
|
||||
// during data ingestion to decide whether a metricID needs to be added to
|
||||
// the per-day index and index records must not be created for deleted
|
||||
// metricIDs. But presence of deleted metricID in these caches will not lead
|
||||
// to an index record creation.
|
||||
|
||||
// Store the metricIDs as deleted.
|
||||
items := getIndexItems()
|
||||
@@ -1635,14 +1626,32 @@ func (db *indexDB) saveDeletedMetricIDs(metricIDs *uint64set.Set) {
|
||||
putIndexItems(items)
|
||||
}
|
||||
|
||||
func (db *indexDB) loadDeletedMetricIDs() (*uint64set.Set, error) {
|
||||
func (db *indexDB) getDeletedMetricIDs() *uint64set.Set {
|
||||
return db.deletedMetricIDs.Load()
|
||||
}
|
||||
|
||||
func (db *indexDB) setDeletedMetricIDs(dmis *uint64set.Set) {
|
||||
db.deletedMetricIDs.Store(dmis)
|
||||
}
|
||||
|
||||
func (db *indexDB) updateDeletedMetricIDs(metricIDs *uint64set.Set) {
|
||||
db.deletedMetricIDsUpdateLock.Lock()
|
||||
dmisOld := db.getDeletedMetricIDs()
|
||||
dmisNew := dmisOld.Clone()
|
||||
dmisNew.Union(metricIDs)
|
||||
db.setDeletedMetricIDs(dmisNew)
|
||||
db.deletedMetricIDsUpdateLock.Unlock()
|
||||
}
|
||||
|
||||
func (db *indexDB) mustLoadDeletedMetricIDs() {
|
||||
is := db.getIndexSearch(noDeadline)
|
||||
dmis, err := is.loadDeletedMetricIDs()
|
||||
db.putIndexSearch(is)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
logger.Panicf("FATAL: cannot load deleted metricIDs for indexDB %q: %v", db.name, err)
|
||||
return
|
||||
}
|
||||
return dmis, nil
|
||||
db.setDeletedMetricIDs(dmis)
|
||||
}
|
||||
|
||||
func (is *indexSearch) loadDeletedMetricIDs() (*uint64set.Set, error) {
|
||||
@@ -1854,8 +1863,9 @@ func (db *indexDB) SearchMetricNames(qt *querytracer.Tracer, tfss []*TagFilters,
|
||||
|
||||
var tagFiltersKeyBufPool bytesutil.ByteBufferPool
|
||||
|
||||
func (is *indexSearch) getTSIDByMetricName(dst *generationTSID, metricName []byte, date uint64) bool {
|
||||
dmis := is.db.s.getDeletedMetricIDs()
|
||||
func (is *indexSearch) getTSIDByMetricName(dst *TSID, metricName []byte, date uint64) bool {
|
||||
dmis := is.db.getDeletedMetricIDs()
|
||||
|
||||
ts := &is.ts
|
||||
kb := &is.kb
|
||||
|
||||
@@ -1874,18 +1884,17 @@ func (is *indexSearch) getTSIDByMetricName(dst *generationTSID, metricName []byt
|
||||
return false
|
||||
}
|
||||
v := ts.Item[len(kb.B):]
|
||||
tail, err := dst.TSID.Unmarshal(v)
|
||||
tail, err := dst.Unmarshal(v)
|
||||
if err != nil {
|
||||
logger.Panicf("FATAL: cannot unmarshal TSID: %s", err)
|
||||
}
|
||||
if len(tail) > 0 {
|
||||
logger.Panicf("FATAL: unexpected non-empty tail left after unmarshaling TSID: %X", tail)
|
||||
}
|
||||
if dmis.Has(dst.TSID.MetricID) {
|
||||
if dmis.Has(dst.MetricID) {
|
||||
// The dst is deleted. Continue searching.
|
||||
continue
|
||||
}
|
||||
dst.generation = is.db.generation
|
||||
// Found valid dst.
|
||||
return true
|
||||
}
|
||||
@@ -1928,73 +1937,6 @@ func (is *indexSearch) searchMetricName(dst []byte, metricID uint64) ([]byte, bo
|
||||
return dst, true
|
||||
}
|
||||
|
||||
func (is *indexSearch) containsTimeRange(tr TimeRange) bool {
|
||||
if tr == globalIndexTimeRange {
|
||||
return true
|
||||
}
|
||||
|
||||
db := is.db
|
||||
if !db.noRegisterNewSeries.Load() {
|
||||
// indexDB could register new time series - it is not safe to cache minMissingTimestamp
|
||||
return true
|
||||
}
|
||||
// The db corresponds to the previous indexDB, which is readonly.
|
||||
// So it is safe caching the minimum timestamp, which isn't covered by the db.
|
||||
|
||||
// use common prefix as a key for minMissingTimestamp
|
||||
// it's needed to properly track timestamps for cluster version
|
||||
// which uses tenant labels for the index search
|
||||
kb := &is.kb
|
||||
kb.B = is.marshalCommonPrefix(kb.B[:0], nsPrefixDateToMetricID)
|
||||
key := kb.B
|
||||
|
||||
db.minMissingTimestampByKeyLock.Lock()
|
||||
minMissingTimestamp, ok := db.minMissingTimestampByKey[string(key)]
|
||||
db.minMissingTimestampByKeyLock.Unlock()
|
||||
|
||||
if ok && tr.MinTimestamp >= minMissingTimestamp {
|
||||
return false
|
||||
}
|
||||
if is.containsTimeRangeSlowForPrefixBuf(kb, tr) {
|
||||
return true
|
||||
}
|
||||
|
||||
db.minMissingTimestampByKeyLock.Lock()
|
||||
minMissingTimestamp, ok = db.minMissingTimestampByKey[string(key)]
|
||||
if !ok || tr.MinTimestamp < minMissingTimestamp {
|
||||
db.minMissingTimestampByKey[string(key)] = tr.MinTimestamp
|
||||
}
|
||||
db.minMissingTimestampByKeyLock.Unlock()
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (is *indexSearch) containsTimeRangeSlowForPrefixBuf(prefixBuf *bytesutil.ByteBuffer, tr TimeRange) bool {
|
||||
ts := &is.ts
|
||||
|
||||
// Verify whether the tr.MinTimestamp is included into `ts` or is smaller than the minimum date stored in `ts`.
|
||||
// Do not check whether tr.MaxTimestamp is included into `ts` or is bigger than the max date stored in `ts` for performance reasons.
|
||||
// This means that containsTimeRangeSlow() can return true if `tr` is located below the min date stored in `ts`.
|
||||
// This is OK, since this case isn't encountered too much in practice.
|
||||
// The main practical case allows skipping searching in prev indexdb (`ts`) when `tr`
|
||||
// is located above the max date stored there.
|
||||
minDate := uint64(tr.MinTimestamp) / msecPerDay
|
||||
prefix := prefixBuf.B
|
||||
prefixBuf.B = encoding.MarshalUint64(prefixBuf.B, minDate)
|
||||
ts.Seek(prefixBuf.B)
|
||||
if !ts.NextItem() {
|
||||
if err := ts.Error(); err != nil {
|
||||
logger.Panicf("FATAL: error when searching for minDate=%d, prefix %q: %w", minDate, prefixBuf.B, err)
|
||||
}
|
||||
return false
|
||||
}
|
||||
if !bytes.HasPrefix(ts.Item, prefix) {
|
||||
// minDate exceeds max date from ts.
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (is *indexSearch) getTSIDByMetricID(dst *TSID, metricID uint64) bool {
|
||||
// There is no need in checking for deleted metricIDs here, since they
|
||||
// must be checked by the caller.
|
||||
@@ -2265,7 +2207,7 @@ func (is *indexSearch) searchMetricIDs(qt *querytracer.Tracer, tfss []*TagFilter
|
||||
}
|
||||
|
||||
// Filter out deleted metricIDs.
|
||||
dmis := is.db.s.getDeletedMetricIDs()
|
||||
dmis := is.db.getDeletedMetricIDs()
|
||||
metricIDs.Subtract(dmis)
|
||||
|
||||
return metricIDs, nil
|
||||
@@ -2284,9 +2226,7 @@ func (is *indexSearch) searchMetricIDsInternal(qt *querytracer.Tracer, tfss []*T
|
||||
|
||||
metricIDs := &uint64set.Set{}
|
||||
|
||||
// Always returns (true, nil) for zero time range used to indicate global
|
||||
// index search.
|
||||
if !is.containsTimeRange(tr) {
|
||||
if !is.legacyContainsTimeRange(tr) {
|
||||
qt.Printf("indexdb doesn't contain data for the given timeRange=%s", &tr)
|
||||
return metricIDs, nil
|
||||
}
|
||||
@@ -2778,18 +2718,16 @@ const (
|
||||
)
|
||||
|
||||
func (db *indexDB) createPerDayIndexes(date uint64, tsid *TSID, mn *MetricName) {
|
||||
// Note that even if per-day indexes are disabled (i.e.
|
||||
// db.s.disablePerDayIndex == true), we still need to add the entry to this
|
||||
// cache because Storage.prefillNextIndexDB() relies on
|
||||
// indexDB.hasDateMetricID() to decide whether the index records given
|
||||
// metricID need to be created and without this cache the next indexDB
|
||||
// prefill will be significantly slower when per-day indexes are disabled.
|
||||
db.dateMetricIDCache.Set(date, tsid.MetricID)
|
||||
if db.noRegisterNewSeries.Load() {
|
||||
logger.Panicf("BUG: registration of new series is disabled for indexDB %q", db.name)
|
||||
}
|
||||
|
||||
if db.s.disablePerDayIndex {
|
||||
return
|
||||
}
|
||||
|
||||
db.dateMetricIDCache.Set(date, tsid.MetricID)
|
||||
|
||||
ii := getIndexItems()
|
||||
defer putIndexItems(ii)
|
||||
|
||||
@@ -2929,13 +2867,7 @@ func (is *indexSearch) hasDateMetricID(date, metricID uint64) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
var ok bool
|
||||
if date == globalIndexDate {
|
||||
ok = is.hasMetricID(metricID)
|
||||
} else {
|
||||
ok = is.hasDateMetricIDSlow(date, metricID)
|
||||
}
|
||||
|
||||
ok := is.hasDateMetricIDSlow(date, metricID)
|
||||
if ok {
|
||||
is.db.dateMetricIDCache.Set(date, metricID)
|
||||
}
|
||||
@@ -2963,6 +2895,18 @@ func (is *indexSearch) hasDateMetricIDSlow(date, metricID uint64) bool {
|
||||
}
|
||||
|
||||
func (is *indexSearch) hasMetricID(metricID uint64) bool {
|
||||
if is.db.metricIDCache.Has(metricID) {
|
||||
return true
|
||||
}
|
||||
|
||||
ok := is.hasMetricIDSlow(metricID)
|
||||
if ok {
|
||||
is.db.metricIDCache.Set(metricID)
|
||||
}
|
||||
return ok
|
||||
}
|
||||
|
||||
func (is *indexSearch) hasMetricIDSlow(metricID uint64) bool {
|
||||
ts := &is.ts
|
||||
kb := &is.kb
|
||||
kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixMetricIDToTSID)
|
||||
@@ -2971,7 +2915,7 @@ func (is *indexSearch) hasMetricID(metricID uint64) bool {
|
||||
if err == io.EOF {
|
||||
return false
|
||||
}
|
||||
logger.Panicf("FATAL: error when for metricID=%d; searchPrefix %q: %s", metricID, kb.B, err)
|
||||
logger.Panicf("FATAL: error when searching for metricID=%d; searchPrefix %q: %s", metricID, kb.B, err)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
153
lib/storage/index_db_legacy.go
Normal file
153
lib/storage/index_db_legacy.go
Normal file
@@ -0,0 +1,153 @@
|
||||
package storage
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"math"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
// legacyIndexDB is a wrapper around indexDB that provides reference counting
|
||||
type legacyIndexDB struct {
|
||||
// The number of references to legacyIndexDB struct.
|
||||
refCount atomic.Int32
|
||||
|
||||
// if the mustDrop is set to true, then the legacyIndexDB must be dropped after refCount reaches zero.
|
||||
mustDrop atomic.Bool
|
||||
|
||||
idb *indexDB
|
||||
}
|
||||
|
||||
func (db *legacyIndexDB) incRef() {
|
||||
db.refCount.Add(1)
|
||||
}
|
||||
|
||||
func (db *legacyIndexDB) decRef() {
|
||||
n := db.refCount.Add(-1)
|
||||
if n < 0 {
|
||||
logger.Panicf("BUG: %q negative refCount: %d", db.idb.name, n)
|
||||
}
|
||||
if n > 0 {
|
||||
return
|
||||
}
|
||||
|
||||
tbPath := db.idb.tb.Path()
|
||||
db.idb.MustClose()
|
||||
|
||||
if !db.mustDrop.Load() {
|
||||
return
|
||||
}
|
||||
|
||||
logger.Infof("dropping indexDB %q", tbPath)
|
||||
fs.MustRemoveDir(tbPath)
|
||||
logger.Infof("indexDB %q has been dropped", tbPath)
|
||||
}
|
||||
|
||||
func (db *legacyIndexDB) scheduleToDrop() {
|
||||
db.mustDrop.Store(true)
|
||||
}
|
||||
|
||||
func (db *legacyIndexDB) MustClose() {
|
||||
rc := db.refCount.Load()
|
||||
if rc != 1 {
|
||||
logger.Fatalf("BUG: %q unexpected legacy indexDB refCount: %d", db.idb.name, rc)
|
||||
}
|
||||
db.decRef()
|
||||
}
|
||||
|
||||
func (db *legacyIndexDB) UpdateMetrics(m *IndexDBMetrics) {
|
||||
db.idb.UpdateMetrics(m)
|
||||
m.IndexDBRefCount += uint64(db.refCount.Load())
|
||||
}
|
||||
|
||||
// mustOpenLegacyIndexDB opens legacy index db from the given path.
|
||||
//
|
||||
// The last segment of the path should contain unique hex value which
|
||||
// will be then used as indexDB.generation
|
||||
func mustOpenLegacyIndexDB(path string, s *Storage) *legacyIndexDB {
|
||||
name := filepath.Base(path)
|
||||
id, err := strconv.ParseUint(name, 16, 64)
|
||||
if err != nil {
|
||||
logger.Panicf("FATAL: cannot parse indexdb path %q: %s", path, err)
|
||||
}
|
||||
|
||||
tr := TimeRange{
|
||||
MinTimestamp: 0,
|
||||
MaxTimestamp: math.MaxInt64,
|
||||
}
|
||||
idb := mustOpenIndexDB(id, tr, name, path, s, &s.isReadOnly, true)
|
||||
legacyIDB := &legacyIndexDB{idb: idb}
|
||||
legacyIDB.incRef()
|
||||
return legacyIDB
|
||||
}
|
||||
|
||||
func (is *indexSearch) legacyContainsTimeRange(tr TimeRange) bool {
|
||||
if tr == globalIndexTimeRange {
|
||||
return true
|
||||
}
|
||||
|
||||
db := is.db
|
||||
if !db.noRegisterNewSeries.Load() {
|
||||
// indexDB could register new time series - it is not safe to cache minMissingTimestamp
|
||||
return true
|
||||
}
|
||||
|
||||
// use common prefix as a key for minMissingTimestamp
|
||||
// it's needed to properly track timestamps for cluster version
|
||||
// which uses tenant labels for the index search
|
||||
kb := &is.kb
|
||||
kb.B = is.marshalCommonPrefix(kb.B[:0], nsPrefixDateToMetricID)
|
||||
key := kb.B
|
||||
|
||||
db.legacyMinMissingTimestampByKeyLock.Lock()
|
||||
minMissingTimestamp, ok := db.legacyMinMissingTimestampByKey[string(key)]
|
||||
db.legacyMinMissingTimestampByKeyLock.Unlock()
|
||||
|
||||
if ok && tr.MinTimestamp >= minMissingTimestamp {
|
||||
return false
|
||||
}
|
||||
if is.legacyContainsTimeRangeSlow(kb, tr) {
|
||||
return true
|
||||
}
|
||||
|
||||
db.legacyMinMissingTimestampByKeyLock.Lock()
|
||||
minMissingTimestamp, ok = db.legacyMinMissingTimestampByKey[string(key)]
|
||||
if !ok || tr.MinTimestamp < minMissingTimestamp {
|
||||
db.legacyMinMissingTimestampByKey[string(key)] = tr.MinTimestamp
|
||||
}
|
||||
db.legacyMinMissingTimestampByKeyLock.Unlock()
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (is *indexSearch) legacyContainsTimeRangeSlow(prefixBuf *bytesutil.ByteBuffer, tr TimeRange) bool {
|
||||
ts := &is.ts
|
||||
|
||||
// Verify whether the tr.MinTimestamp is included into `ts` or is smaller than the minimum date stored in `ts`.
|
||||
// Do not check whether tr.MaxTimestamp is included into `ts` or is bigger than the max date stored in `ts` for performance reasons.
|
||||
// This means that this func can return true if `tr` is located below the min date stored in `ts`.
|
||||
// This is OK, since this case isn't encountered too much in practice.
|
||||
// The main practical case allows skipping searching in prev indexdb (`ts`) when `tr`
|
||||
// is located above the max date stored there.
|
||||
minDate := uint64(tr.MinTimestamp) / msecPerDay
|
||||
prefix := prefixBuf.B
|
||||
prefixBuf.B = encoding.MarshalUint64(prefixBuf.B, minDate)
|
||||
ts.Seek(prefixBuf.B)
|
||||
if !ts.NextItem() {
|
||||
if err := ts.Error(); err != nil {
|
||||
logger.Panicf("FATAL: error when searching for minDate=%d, prefix %q: %w", minDate, prefixBuf.B, err)
|
||||
}
|
||||
return false
|
||||
}
|
||||
if !bytes.HasPrefix(ts.Item, prefix) {
|
||||
// minDate exceeds max date from ts.
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
136
lib/storage/index_db_legacy_test.go
Normal file
136
lib/storage/index_db_legacy_test.go
Normal file
@@ -0,0 +1,136 @@
|
||||
package storage
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestLegacyContainsTimeRange(t *testing.T) {
|
||||
defer testRemoveAll(t)
|
||||
|
||||
rng := rand.New(rand.NewSource(1))
|
||||
const numMetrics = 10000
|
||||
trPrev := TimeRange{
|
||||
MinTimestamp: time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC).UnixMilli(),
|
||||
MaxTimestamp: time.Date(2025, 1, 15, 23, 59, 59, 999_999_999, time.UTC).UnixMilli(),
|
||||
}
|
||||
trCurr := TimeRange{
|
||||
MinTimestamp: time.Date(2025, 1, 16, 0, 0, 0, 0, time.UTC).UnixMilli(),
|
||||
MaxTimestamp: time.Date(2025, 1, 31, 23, 59, 59, 999_999_999, time.UTC).UnixMilli(),
|
||||
}
|
||||
trPt := TimeRange{
|
||||
MinTimestamp: time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC).UnixMilli(),
|
||||
MaxTimestamp: time.Date(2025, 1, 31, 23, 59, 59, 999_999_999, time.UTC).UnixMilli(),
|
||||
}
|
||||
mrsPrev := testGenerateMetricRowsWithPrefix(rng, numMetrics, "legacy_prev", trPrev)
|
||||
mrsCurr := testGenerateMetricRowsWithPrefix(rng, numMetrics, "legacy_curr", trCurr)
|
||||
mrsPt := testGenerateMetricRowsWithPrefix(rng, numMetrics, "pt", trPt)
|
||||
|
||||
f := func(idb *indexDB, tr TimeRange, want bool) {
|
||||
t.Helper()
|
||||
is := idb.getIndexSearch(noDeadline)
|
||||
defer idb.putIndexSearch(is)
|
||||
|
||||
got := is.legacyContainsTimeRange(tr)
|
||||
|
||||
if got != want {
|
||||
t.Fatalf("legacyContainsTimeRange(%s) for index db %s returns unexpected result: got %t, want %t", tr.String(), idb.name, got, want)
|
||||
}
|
||||
}
|
||||
|
||||
// fill legacy index with data
|
||||
s := MustOpenStorage(t.Name(), OpenOptions{})
|
||||
s.AddRows(mrsPrev, defaultPrecisionBits)
|
||||
s.DebugFlush()
|
||||
s = mustConvertToLegacy(s)
|
||||
s.AddRows(mrsCurr, defaultPrecisionBits)
|
||||
s.DebugFlush()
|
||||
s = mustConvertToLegacy(s)
|
||||
// fill partitioned index with data
|
||||
s.AddRows(mrsPt, defaultPrecisionBits)
|
||||
s.DebugFlush()
|
||||
defer s.MustClose()
|
||||
|
||||
legacyIDBs := s.getLegacyIndexDBs()
|
||||
defer s.putLegacyIndexDBs(legacyIDBs)
|
||||
|
||||
ptws := s.tb.GetPartitions(trPt)
|
||||
defer s.tb.PutPartitions(ptws)
|
||||
if len(ptws) != 1 {
|
||||
t.Fatalf("unexpected number of partitions for one month time range %v: got %d, want 1", &trPt, len(ptws))
|
||||
}
|
||||
idb := ptws[0].pt.idb
|
||||
|
||||
var tr TimeRange
|
||||
|
||||
// Global index time range.
|
||||
tr = globalIndexTimeRange
|
||||
f(legacyIDBs.getIDBPrev(), tr, true)
|
||||
f(legacyIDBs.getIDBCurr(), tr, true)
|
||||
f(idb, tr, true)
|
||||
|
||||
// Fully before trPrev, trCurr, and trPt.
|
||||
tr = TimeRange{
|
||||
MinTimestamp: time.Date(2024, 12, 1, 0, 0, 0, 0, time.UTC).UnixMilli(),
|
||||
MaxTimestamp: time.Date(2024, 12, 31, 23, 59, 59, 999_999_999, time.UTC).UnixMilli(),
|
||||
}
|
||||
f(legacyIDBs.getIDBPrev(), tr, true)
|
||||
f(legacyIDBs.getIDBCurr(), tr, true)
|
||||
f(idb, tr, true)
|
||||
|
||||
// Overlaps with trPrev and trPt on the left side, fully before trCurr.
|
||||
tr = TimeRange{
|
||||
MinTimestamp: time.Date(2024, 12, 1, 0, 0, 0, 0, time.UTC).UnixMilli(),
|
||||
MaxTimestamp: time.Date(2025, 1, 7, 23, 59, 59, 999_999_999, time.UTC).UnixMilli(),
|
||||
}
|
||||
f(legacyIDBs.getIDBPrev(), tr, true)
|
||||
f(legacyIDBs.getIDBCurr(), tr, true)
|
||||
f(idb, tr, true)
|
||||
|
||||
// Fully inside trPrev and trPt, fully before trCurr.
|
||||
tr = TimeRange{
|
||||
MinTimestamp: time.Date(2025, 1, 2, 0, 0, 0, 0, time.UTC).UnixMilli(),
|
||||
MaxTimestamp: time.Date(2025, 1, 7, 23, 59, 59, 999_999_999, time.UTC).UnixMilli(),
|
||||
}
|
||||
f(legacyIDBs.getIDBPrev(), tr, true)
|
||||
f(legacyIDBs.getIDBCurr(), tr, true)
|
||||
f(idb, tr, true)
|
||||
|
||||
// Fully inside trPt, overlaps with trPrev on the right side and trCurr on
|
||||
// the left side.
|
||||
tr = TimeRange{
|
||||
MinTimestamp: time.Date(2025, 1, 7, 0, 0, 0, 0, time.UTC).UnixMilli(),
|
||||
MaxTimestamp: time.Date(2025, 1, 21, 23, 59, 59, 999_999_999, time.UTC).UnixMilli(),
|
||||
}
|
||||
f(legacyIDBs.getIDBPrev(), tr, true)
|
||||
f(legacyIDBs.getIDBCurr(), tr, true)
|
||||
f(idb, tr, true)
|
||||
|
||||
// Fully inside trPt and trCurr, fully after trPrev.
|
||||
tr = TimeRange{
|
||||
MinTimestamp: time.Date(2025, 1, 18, 0, 0, 0, 0, time.UTC).UnixMilli(),
|
||||
MaxTimestamp: time.Date(2025, 1, 21, 23, 59, 59, 999_999_999, time.UTC).UnixMilli(),
|
||||
}
|
||||
f(legacyIDBs.getIDBPrev(), tr, false)
|
||||
f(legacyIDBs.getIDBCurr(), tr, true)
|
||||
f(idb, tr, true)
|
||||
|
||||
// Overlaps with trPt and trCurr on the right side, fully after trPrev.
|
||||
tr = TimeRange{
|
||||
MinTimestamp: time.Date(2025, 1, 21, 0, 0, 0, 0, time.UTC).UnixMilli(),
|
||||
MaxTimestamp: time.Date(2025, 2, 21, 23, 59, 59, 999_999_999, time.UTC).UnixMilli(),
|
||||
}
|
||||
f(legacyIDBs.getIDBPrev(), tr, false)
|
||||
f(legacyIDBs.getIDBCurr(), tr, true)
|
||||
f(idb, tr, true)
|
||||
|
||||
// fully after trPrev, trCurr, and trPt.
|
||||
tr = TimeRange{
|
||||
MinTimestamp: time.Date(2025, 2, 1, 0, 0, 0, 0, time.UTC).UnixMilli(),
|
||||
MaxTimestamp: time.Date(2025, 3, 31, 23, 59, 59, 999_999_999, time.UTC).UnixMilli(),
|
||||
}
|
||||
f(legacyIDBs.getIDBPrev(), tr, false)
|
||||
f(legacyIDBs.getIDBCurr(), tr, false)
|
||||
f(idb, tr, true)
|
||||
}
|
||||
@@ -20,7 +20,6 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/mergeset"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/uint64set"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache"
|
||||
"github.com/VictoriaMetrics/fastcache"
|
||||
)
|
||||
|
||||
func TestTagFiltersToMetricIDsCache(t *testing.T) {
|
||||
@@ -33,14 +32,15 @@ func TestTagFiltersToMetricIDsCache(t *testing.T) {
|
||||
s := MustOpenStorage(path, OpenOptions{})
|
||||
defer s.MustClose()
|
||||
|
||||
idbPrev, idbCurr, idbNext := s.getIndexDBs()
|
||||
defer s.putIndexDBs(idbPrev, idbCurr, idbNext)
|
||||
ptw := s.tb.MustGetPartition(time.Now().UnixMilli())
|
||||
idb := ptw.pt.idb
|
||||
defer s.tb.PutPartition(ptw)
|
||||
|
||||
key := []byte("key")
|
||||
wantSet := &uint64set.Set{}
|
||||
wantSet.AddMulti(want)
|
||||
idbCurr.putMetricIDsToTagFiltersCache(nil, wantSet, key)
|
||||
gotSet, ok := idbCurr.getMetricIDsFromTagFiltersCache(nil, key)
|
||||
idb.putMetricIDsToTagFiltersCache(nil, wantSet, key)
|
||||
gotSet, ok := idb.getMetricIDsFromTagFiltersCache(nil, key)
|
||||
if !ok {
|
||||
t.Fatalf("expected metricIDs to be found in cache but they weren't: %v", want)
|
||||
}
|
||||
@@ -62,12 +62,13 @@ func TestTagFiltersToMetricIDsCache_EmptyMetricIDList(t *testing.T) {
|
||||
defer fs.MustRemoveDir(path)
|
||||
s := MustOpenStorage(path, OpenOptions{})
|
||||
defer s.MustClose()
|
||||
idbPrev, idbCurr := s.getPrevAndCurrIndexDBs()
|
||||
defer s.putPrevAndCurrIndexDBs(idbPrev, idbCurr)
|
||||
ptw := s.tb.MustGetPartition(time.Now().UnixMilli())
|
||||
idb := ptw.pt.idb
|
||||
defer s.tb.PutPartition(ptw)
|
||||
|
||||
key := []byte("key")
|
||||
idbCurr.putMetricIDsToTagFiltersCache(nil, nil, key)
|
||||
got, ok := idbCurr.getMetricIDsFromTagFiltersCache(nil, key)
|
||||
idb.putMetricIDsToTagFiltersCache(nil, nil, key)
|
||||
got, ok := idb.getMetricIDsFromTagFiltersCache(nil, key)
|
||||
if !ok {
|
||||
t.Fatalf("expected empty metricID list to be found in cache but it wasn't")
|
||||
}
|
||||
@@ -471,14 +472,15 @@ func TestRemoveDuplicateMetricIDs(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestIndexDBOpenClose(t *testing.T) {
|
||||
defer testRemoveAll(t)
|
||||
|
||||
var s Storage
|
||||
tableName := nextIndexDBTableName()
|
||||
path := filepath.Join(t.Name(), "2025_01")
|
||||
for i := 0; i < 5; i++ {
|
||||
var isReadOnly atomic.Bool
|
||||
db := mustOpenIndexDB(tableName, &s, &isReadOnly, false)
|
||||
db := mustOpenIndexDB(123, TimeRange{}, "name", path, &s, &isReadOnly, false)
|
||||
db.MustClose()
|
||||
}
|
||||
fs.MustRemoveDir(tableName)
|
||||
}
|
||||
|
||||
func TestIndexDB(t *testing.T) {
|
||||
@@ -488,27 +490,28 @@ func TestIndexDB(t *testing.T) {
|
||||
t.Run("serial", func(t *testing.T) {
|
||||
const path = "TestIndexDB-serial"
|
||||
s := MustOpenStorage(path, OpenOptions{})
|
||||
|
||||
idbPrev, idbCurr := s.getPrevAndCurrIndexDBs()
|
||||
mns, tsids, err := testIndexDBGetOrCreateTSIDByName(idbCurr, metricGroups, timestamp)
|
||||
ptw := s.tb.MustGetPartition(timestamp)
|
||||
db := ptw.pt.idb
|
||||
mns, tsids, err := testIndexDBGetOrCreateTSIDByName(db, metricGroups, timestamp)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
if err := testIndexDBCheckTSIDByName(idbCurr, mns, tsids, timestamp, false); err != nil {
|
||||
if err := testIndexDBCheckTSIDByName(db, mns, tsids, timestamp, false); err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
|
||||
// Re-open the storage and verify it works as expected.
|
||||
s.putPrevAndCurrIndexDBs(idbPrev, idbCurr)
|
||||
s.tb.PutPartition(ptw)
|
||||
s.MustClose()
|
||||
s = MustOpenStorage(path, OpenOptions{})
|
||||
|
||||
idbPrev, idbCurr = s.getPrevAndCurrIndexDBs()
|
||||
if err := testIndexDBCheckTSIDByName(idbCurr, mns, tsids, timestamp, false); err != nil {
|
||||
ptw = s.tb.MustGetPartition(timestamp)
|
||||
db = ptw.pt.idb
|
||||
if err := testIndexDBCheckTSIDByName(db, mns, tsids, timestamp, false); err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
|
||||
s.putPrevAndCurrIndexDBs(idbPrev, idbCurr)
|
||||
s.tb.PutPartition(ptw)
|
||||
s.MustClose()
|
||||
fs.MustRemoveDir(path)
|
||||
})
|
||||
@@ -516,17 +519,18 @@ func TestIndexDB(t *testing.T) {
|
||||
t.Run("concurrent", func(t *testing.T) {
|
||||
const path = "TestIndexDB-concurrent"
|
||||
s := MustOpenStorage(path, OpenOptions{})
|
||||
idbPrev, idbCurr := s.getPrevAndCurrIndexDBs()
|
||||
ptw := s.tb.MustGetPartition(timestamp)
|
||||
db := ptw.pt.idb
|
||||
|
||||
ch := make(chan error, 3)
|
||||
for i := 0; i < cap(ch); i++ {
|
||||
go func() {
|
||||
mns, tsid, err := testIndexDBGetOrCreateTSIDByName(idbCurr, metricGroups, timestamp)
|
||||
mns, tsid, err := testIndexDBGetOrCreateTSIDByName(db, metricGroups, timestamp)
|
||||
if err != nil {
|
||||
ch <- err
|
||||
return
|
||||
}
|
||||
if err := testIndexDBCheckTSIDByName(idbCurr, mns, tsid, timestamp, true); err != nil {
|
||||
if err := testIndexDBCheckTSIDByName(db, mns, tsid, timestamp, true); err != nil {
|
||||
ch <- err
|
||||
return
|
||||
}
|
||||
@@ -545,7 +549,7 @@ func TestIndexDB(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
s.putPrevAndCurrIndexDBs(idbPrev, idbCurr)
|
||||
s.tb.PutPartition(ptw)
|
||||
s.MustClose()
|
||||
fs.MustRemoveDir(path)
|
||||
})
|
||||
@@ -579,26 +583,26 @@ func testIndexDBGetOrCreateTSIDByName(db *indexDB, metricGroups int, timestamp i
|
||||
metricNameBuf = mn.Marshal(metricNameBuf[:0])
|
||||
|
||||
// Create tsid for the metricName.
|
||||
var genTSID generationTSID
|
||||
if !is.getTSIDByMetricName(&genTSID, metricNameBuf, date) {
|
||||
generateTSID(&genTSID.TSID, &mn)
|
||||
createAllIndexesForMetricName(db, &mn, &genTSID.TSID, date)
|
||||
var tsid TSID
|
||||
if !is.getTSIDByMetricName(&tsid, metricNameBuf, date) {
|
||||
generateTSID(&tsid, &mn)
|
||||
createAllIndexesForMetricName(db, &mn, &tsid, date)
|
||||
}
|
||||
|
||||
mns = append(mns, mn)
|
||||
tsids = append(tsids, genTSID.TSID)
|
||||
tsids = append(tsids, tsid)
|
||||
}
|
||||
|
||||
db.putIndexSearch(is)
|
||||
|
||||
// Flush index to disk, so it becomes visible for search
|
||||
db.s.DebugFlush()
|
||||
db.tb.DebugFlush()
|
||||
|
||||
return mns, tsids, nil
|
||||
}
|
||||
|
||||
func testIndexDBCheckTSIDByName(db *indexDB, mns []MetricName, tsids []TSID, timestamp int64, isConcurrent bool) error {
|
||||
timeseriesCounters := make(map[uint64]bool)
|
||||
var genTSID generationTSID
|
||||
var tsidLocal TSID
|
||||
var metricNameCopy []byte
|
||||
allLabelNames := make(map[string]bool)
|
||||
@@ -613,12 +617,11 @@ func testIndexDBCheckTSIDByName(db *indexDB, mns []MetricName, tsids []TSID, tim
|
||||
metricName := mn.Marshal(nil)
|
||||
|
||||
is := db.getIndexSearch(noDeadline)
|
||||
if !is.getTSIDByMetricName(&genTSID, metricName, uint64(timestamp)/msecPerDay) {
|
||||
if !is.getTSIDByMetricName(&tsidLocal, metricName, uint64(timestamp)/msecPerDay) {
|
||||
return fmt.Errorf("cannot obtain tsid #%d for mn %s", i, mn)
|
||||
}
|
||||
db.putIndexSearch(is)
|
||||
|
||||
tsidLocal = genTSID.TSID
|
||||
if isConcurrent {
|
||||
// Copy tsid.MetricID, since multiple TSIDs may match
|
||||
// the same mn in concurrent mode.
|
||||
@@ -1419,103 +1422,8 @@ func TestMatchTagFilters(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestIndexDBRepopulateAfterRotation(t *testing.T) {
|
||||
r := rand.New(rand.NewSource(1))
|
||||
path := "TestIndexRepopulateAfterRotation"
|
||||
opts := OpenOptions{
|
||||
Retention: retention31Days,
|
||||
MaxHourlySeries: 1e5,
|
||||
MaxDailySeries: 1e5,
|
||||
}
|
||||
s := MustOpenStorage(path, opts)
|
||||
|
||||
idbPrev, idbCurr := s.getPrevAndCurrIndexDBs()
|
||||
if idbCurr.generation == 0 {
|
||||
t.Fatalf("expected indexDB generation to be not 0")
|
||||
}
|
||||
|
||||
const metricRowsN = 1000
|
||||
|
||||
currentDayTimestamp := (time.Now().UnixMilli() / msecPerDay) * msecPerDay
|
||||
timeMin := currentDayTimestamp - 24*3600*1000
|
||||
timeMax := currentDayTimestamp + 24*3600*1000
|
||||
mrs := testGenerateMetricRows(r, metricRowsN, timeMin, timeMax)
|
||||
s.AddRows(mrs, defaultPrecisionBits)
|
||||
s.DebugFlush()
|
||||
|
||||
// verify the storage contains rows.
|
||||
var m Metrics
|
||||
s.UpdateMetrics(&m)
|
||||
if rowsCount := m.TableMetrics.TotalRowsCount(); rowsCount < uint64(metricRowsN) {
|
||||
t.Fatalf("expecting at least %d rows in the table; got %d", metricRowsN, rowsCount)
|
||||
}
|
||||
|
||||
// check new series were registered in indexDB
|
||||
added := idbCurr.s.newTimeseriesCreated.Load()
|
||||
if added != metricRowsN {
|
||||
t.Fatalf("expected indexDB to contain %d rows; got %d", metricRowsN, added)
|
||||
}
|
||||
|
||||
// check new series were added to cache
|
||||
var cs fastcache.Stats
|
||||
s.tsidCache.UpdateStats(&cs)
|
||||
if cs.EntriesCount != metricRowsN {
|
||||
t.Fatalf("expected tsidCache to contain %d rows; got %d", metricRowsN, cs.EntriesCount)
|
||||
}
|
||||
|
||||
// check if cache entries do belong to current indexDB generation
|
||||
var genTSID generationTSID
|
||||
for _, mr := range mrs {
|
||||
s.getTSIDFromCache(&genTSID, mr.MetricNameRaw)
|
||||
if genTSID.generation != idbCurr.generation {
|
||||
t.Fatalf("expected all entries in tsidCache to have the same indexDB generation: %d;"+
|
||||
"got %d", idbCurr.generation, genTSID.generation)
|
||||
}
|
||||
}
|
||||
prevGeneration := idbCurr.generation
|
||||
s.putPrevAndCurrIndexDBs(idbPrev, idbCurr)
|
||||
|
||||
// force index rotation
|
||||
s.mustRotateIndexDB(time.Now())
|
||||
|
||||
// check tsidCache wasn't reset after the rotation
|
||||
var cs2 fastcache.Stats
|
||||
s.tsidCache.UpdateStats(&cs2)
|
||||
if cs.EntriesCount != metricRowsN {
|
||||
t.Fatalf("expected tsidCache after rotation to contain %d rows; got %d", metricRowsN, cs2.EntriesCount)
|
||||
}
|
||||
|
||||
idbPrev, idbCurr = s.getPrevAndCurrIndexDBs()
|
||||
if idbCurr.generation == 0 {
|
||||
t.Fatalf("expected new indexDB generation to be not 0")
|
||||
}
|
||||
if idbCurr.generation == prevGeneration {
|
||||
t.Fatalf("expected new indexDB generation %d to be different from prev indexDB", idbCurr.generation)
|
||||
}
|
||||
|
||||
// Re-insert rows again and verify that all the entries belong to new generation
|
||||
s.AddRows(mrs, defaultPrecisionBits)
|
||||
s.DebugFlush()
|
||||
|
||||
for _, mr := range mrs {
|
||||
s.getTSIDFromCache(&genTSID, mr.MetricNameRaw)
|
||||
if genTSID.generation != idbCurr.generation {
|
||||
t.Fatalf("unexpected generation for data after rotation; got %d; want %d", genTSID.generation, idbCurr.generation)
|
||||
}
|
||||
}
|
||||
|
||||
s.putPrevAndCurrIndexDBs(idbPrev, idbCurr)
|
||||
s.MustClose()
|
||||
fs.MustRemoveDir(path)
|
||||
}
|
||||
|
||||
func TestSearchTSIDWithTimeRange(t *testing.T) {
|
||||
const path = "TestSearchTSIDWithTimeRange"
|
||||
s := MustOpenStorage(path, OpenOptions{})
|
||||
idbPrev, idbCurr := s.getPrevAndCurrIndexDBs()
|
||||
|
||||
is := idbCurr.getIndexSearch(noDeadline)
|
||||
|
||||
// Create a bunch of per-day time series
|
||||
const days = 5
|
||||
const metricsPerDay = 1000
|
||||
@@ -1554,29 +1462,35 @@ func TestSearchTSIDWithTimeRange(t *testing.T) {
|
||||
mn.sortTags()
|
||||
return mn
|
||||
}
|
||||
|
||||
s := MustOpenStorage(path, OpenOptions{})
|
||||
ptw := s.tb.MustGetPartition(timestamp)
|
||||
db := ptw.pt.idb
|
||||
is := db.getIndexSearch(noDeadline)
|
||||
|
||||
for day := 0; day < days; day++ {
|
||||
date := baseDate - uint64(day)
|
||||
var metricIDs uint64set.Set
|
||||
for metric := 0; metric < metricsPerDay; metric++ {
|
||||
mn := newMN("testMetric", day, metric)
|
||||
metricNameBuf = mn.Marshal(metricNameBuf[:0])
|
||||
var genTSID generationTSID
|
||||
if !is.getTSIDByMetricName(&genTSID, metricNameBuf, date) {
|
||||
generateTSID(&genTSID.TSID, &mn)
|
||||
createAllIndexesForMetricName(idbCurr, &mn, &genTSID.TSID, date)
|
||||
var tsid TSID
|
||||
if !is.getTSIDByMetricName(&tsid, metricNameBuf, date) {
|
||||
generateTSID(&tsid, &mn)
|
||||
createAllIndexesForMetricName(db, &mn, &tsid, date)
|
||||
}
|
||||
metricIDs.Add(genTSID.TSID.MetricID)
|
||||
metricIDs.Add(tsid.MetricID)
|
||||
}
|
||||
|
||||
allMetricIDs.Union(&metricIDs)
|
||||
perDayMetricIDs[date] = &metricIDs
|
||||
}
|
||||
idbCurr.putIndexSearch(is)
|
||||
db.putIndexSearch(is)
|
||||
|
||||
// Flush index to disk, so it becomes visible for search
|
||||
s.DebugFlush()
|
||||
db.tb.DebugFlush()
|
||||
|
||||
is2 := idbCurr.getIndexSearch(noDeadline)
|
||||
is2 := db.getIndexSearch(noDeadline)
|
||||
|
||||
// Check that all the metrics are found for all the days.
|
||||
for date := baseDate - days + 1; date <= baseDate; date++ {
|
||||
@@ -1597,10 +1511,10 @@ func TestSearchTSIDWithTimeRange(t *testing.T) {
|
||||
if !allMetricIDs.Equal(metricIDs) {
|
||||
t.Fatalf("unexpected metricIDs found;\ngot\n%d\nwant\n%d", metricIDs.AppendTo(nil), allMetricIDs.AppendTo(nil))
|
||||
}
|
||||
idbCurr.putIndexSearch(is2)
|
||||
db.putIndexSearch(is2)
|
||||
|
||||
// add a metric that will be deleted shortly
|
||||
is3 := idbCurr.getIndexSearch(noDeadline)
|
||||
is3 := db.getIndexSearch(noDeadline)
|
||||
day := days
|
||||
date := baseDate - uint64(day)
|
||||
mn := newMN("deletedMetric", day, 999)
|
||||
@@ -1610,24 +1524,24 @@ func TestSearchTSIDWithTimeRange(t *testing.T) {
|
||||
)
|
||||
mn.sortTags()
|
||||
metricNameBuf = mn.Marshal(metricNameBuf[:0])
|
||||
var genTSID generationTSID
|
||||
if !is3.getTSIDByMetricName(&genTSID, metricNameBuf, date) {
|
||||
generateTSID(&genTSID.TSID, &mn)
|
||||
createAllIndexesForMetricName(idbCurr, &mn, &genTSID.TSID, date)
|
||||
var tsid TSID
|
||||
if !is3.getTSIDByMetricName(&tsid, metricNameBuf, date) {
|
||||
generateTSID(&tsid, &mn)
|
||||
createAllIndexesForMetricName(db, &mn, &tsid, date)
|
||||
}
|
||||
// delete the added metric. It is expected it won't be returned during searches
|
||||
deletedSet := &uint64set.Set{}
|
||||
deletedSet.Add(genTSID.TSID.MetricID)
|
||||
s.setDeletedMetricIDs(deletedSet)
|
||||
idbCurr.putIndexSearch(is3)
|
||||
s.DebugFlush()
|
||||
deletedSet.Add(tsid.MetricID)
|
||||
db.setDeletedMetricIDs(deletedSet)
|
||||
db.putIndexSearch(is3)
|
||||
db.tb.DebugFlush()
|
||||
|
||||
// Check SearchLabelNames with the specified time range.
|
||||
tr := TimeRange{
|
||||
MinTimestamp: int64(timestamp) - msecPerDay,
|
||||
MaxTimestamp: int64(timestamp),
|
||||
MinTimestamp: timestamp - msecPerDay,
|
||||
MaxTimestamp: timestamp,
|
||||
}
|
||||
lns, err := idbCurr.SearchLabelNames(nil, nil, tr, 10000, 1e9, noDeadline)
|
||||
lns, err := db.SearchLabelNames(nil, nil, tr, 10000, 1e9, noDeadline)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error in SearchLabelNames(timeRange=%s): %s", &tr, err)
|
||||
}
|
||||
@@ -1637,7 +1551,7 @@ func TestSearchTSIDWithTimeRange(t *testing.T) {
|
||||
}
|
||||
|
||||
// Check SearchLabelValues with the specified time range.
|
||||
lvs, err := idbCurr.SearchLabelValues(nil, "", nil, tr, 10000, 1e9, noDeadline)
|
||||
lvs, err := db.SearchLabelValues(nil, "", nil, tr, 10000, 1e9, noDeadline)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error in SearchLabelValues(timeRange=%s): %s", &tr, err)
|
||||
}
|
||||
@@ -1666,10 +1580,10 @@ func TestSearchTSIDWithTimeRange(t *testing.T) {
|
||||
// Perform a search within a day.
|
||||
// This should return the metrics for the day
|
||||
tr = TimeRange{
|
||||
MinTimestamp: int64(timestamp - 2*msecPerHour - 1),
|
||||
MaxTimestamp: int64(timestamp),
|
||||
MinTimestamp: timestamp - 2*msecPerHour - 1,
|
||||
MaxTimestamp: timestamp,
|
||||
}
|
||||
matchedTSIDs, err := idbCurr.SearchTSIDs(nil, []*TagFilters{tfs}, tr, 1e5, noDeadline)
|
||||
matchedTSIDs, err := db.SearchTSIDs(nil, []*TagFilters{tfs}, tr, 1e5, noDeadline)
|
||||
if err != nil {
|
||||
t.Fatalf("error searching tsids: %v", err)
|
||||
}
|
||||
@@ -1678,7 +1592,7 @@ func TestSearchTSIDWithTimeRange(t *testing.T) {
|
||||
}
|
||||
|
||||
// Check SearchLabelNames with the specified filter.
|
||||
lns, err = idbCurr.SearchLabelNames(nil, []*TagFilters{tfs}, TimeRange{}, 10000, 1e9, noDeadline)
|
||||
lns, err = db.SearchLabelNames(nil, []*TagFilters{tfs}, TimeRange{}, 10000, 1e9, noDeadline)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error in SearchLabelNames(filters=%s): %s", tfs, err)
|
||||
}
|
||||
@@ -1688,7 +1602,7 @@ func TestSearchTSIDWithTimeRange(t *testing.T) {
|
||||
}
|
||||
|
||||
// Check SearchLabelNames with the specified filter and time range.
|
||||
lns, err = idbCurr.SearchLabelNames(nil, []*TagFilters{tfs}, tr, 10000, 1e9, noDeadline)
|
||||
lns, err = db.SearchLabelNames(nil, []*TagFilters{tfs}, tr, 10000, 1e9, noDeadline)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error in SearchLabelNames(filters=%s, timeRange=%s): %s", tfs, &tr, err)
|
||||
}
|
||||
@@ -1698,7 +1612,7 @@ func TestSearchTSIDWithTimeRange(t *testing.T) {
|
||||
}
|
||||
|
||||
// Check SearchLabelNames with filters on metric name and time range.
|
||||
lns, err = idbCurr.SearchLabelNames(nil, []*TagFilters{tfsMetricName}, tr, 10000, 1e9, noDeadline)
|
||||
lns, err = db.SearchLabelNames(nil, []*TagFilters{tfsMetricName}, tr, 10000, 1e9, noDeadline)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error in SearchLabelNames(filters=%s, timeRange=%s): %s", tfs, &tr, err)
|
||||
}
|
||||
@@ -1708,7 +1622,7 @@ func TestSearchTSIDWithTimeRange(t *testing.T) {
|
||||
}
|
||||
|
||||
// Check SearchLabelNames with filters on composite key and time range.
|
||||
lns, err = idbCurr.SearchLabelNames(nil, []*TagFilters{tfsComposite}, tr, 10000, 1e9, noDeadline)
|
||||
lns, err = db.SearchLabelNames(nil, []*TagFilters{tfsComposite}, tr, 10000, 1e9, noDeadline)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error in SearchLabelNames(filters=%s, timeRange=%s): %s", tfs, &tr, err)
|
||||
}
|
||||
@@ -1718,7 +1632,7 @@ func TestSearchTSIDWithTimeRange(t *testing.T) {
|
||||
}
|
||||
|
||||
// Check SearchLabelValues with the specified filter.
|
||||
lvs, err = idbCurr.SearchLabelValues(nil, "", []*TagFilters{tfs}, TimeRange{}, 10000, 1e9, noDeadline)
|
||||
lvs, err = db.SearchLabelValues(nil, "", []*TagFilters{tfs}, TimeRange{}, 10000, 1e9, noDeadline)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error in SearchLabelValues(filters=%s): %s", tfs, err)
|
||||
}
|
||||
@@ -1728,7 +1642,7 @@ func TestSearchTSIDWithTimeRange(t *testing.T) {
|
||||
}
|
||||
|
||||
// Check SearchLabelValues with the specified filter and time range.
|
||||
lvs, err = idbCurr.SearchLabelValues(nil, "", []*TagFilters{tfs}, tr, 10000, 1e9, noDeadline)
|
||||
lvs, err = db.SearchLabelValues(nil, "", []*TagFilters{tfs}, tr, 10000, 1e9, noDeadline)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error in SearchLabelValues(filters=%s, timeRange=%s): %s", tfs, &tr, err)
|
||||
}
|
||||
@@ -1738,7 +1652,7 @@ func TestSearchTSIDWithTimeRange(t *testing.T) {
|
||||
}
|
||||
|
||||
// Check SearchLabelValues with filters on metric name and time range.
|
||||
lvs, err = idbCurr.SearchLabelValues(nil, "", []*TagFilters{tfsMetricName}, tr, 10000, 1e9, noDeadline)
|
||||
lvs, err = db.SearchLabelValues(nil, "", []*TagFilters{tfsMetricName}, tr, 10000, 1e9, noDeadline)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error in SearchLabelValues(filters=%s, timeRange=%s): %s", tfs, &tr, err)
|
||||
}
|
||||
@@ -1748,7 +1662,7 @@ func TestSearchTSIDWithTimeRange(t *testing.T) {
|
||||
}
|
||||
|
||||
// Check SearchLabelValues with filters on composite key and time range.
|
||||
lvs, err = idbCurr.SearchLabelValues(nil, "constant", []*TagFilters{tfsComposite}, tr, 10000, 1e9, noDeadline)
|
||||
lvs, err = db.SearchLabelValues(nil, "constant", []*TagFilters{tfsComposite}, tr, 10000, 1e9, noDeadline)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error in SearchLabelValues(filters=%s, timeRange=%s): %s", tfs, &tr, err)
|
||||
}
|
||||
@@ -1760,11 +1674,11 @@ func TestSearchTSIDWithTimeRange(t *testing.T) {
|
||||
|
||||
// Perform a search across all the days, should match all metrics
|
||||
tr = TimeRange{
|
||||
MinTimestamp: int64(timestamp - msecPerDay*days),
|
||||
MaxTimestamp: int64(timestamp),
|
||||
MinTimestamp: timestamp - msecPerDay*days,
|
||||
MaxTimestamp: timestamp,
|
||||
}
|
||||
|
||||
matchedTSIDs, err = idbCurr.SearchTSIDs(nil, []*TagFilters{tfs}, tr, 1e5, noDeadline)
|
||||
matchedTSIDs, err = db.SearchTSIDs(nil, []*TagFilters{tfs}, tr, 1e5, noDeadline)
|
||||
if err != nil {
|
||||
t.Fatalf("error searching tsids: %v", err)
|
||||
}
|
||||
@@ -1773,7 +1687,7 @@ func TestSearchTSIDWithTimeRange(t *testing.T) {
|
||||
}
|
||||
|
||||
// Check GetTSDBStatus with nil filters.
|
||||
status, err := idbCurr.GetTSDBStatus(nil, nil, baseDate, "day", 5, 1e6, noDeadline)
|
||||
status, err := db.GetTSDBStatus(nil, nil, baseDate, "day", 5, 1e6, noDeadline)
|
||||
if err != nil {
|
||||
t.Fatalf("error in GetTSDBStatus with nil filters: %s", err)
|
||||
}
|
||||
@@ -1887,7 +1801,7 @@ func TestSearchTSIDWithTimeRange(t *testing.T) {
|
||||
if err := tfs.Add([]byte("day"), []byte("0"), false, false); err != nil {
|
||||
t.Fatalf("cannot add filter: %s", err)
|
||||
}
|
||||
status, err = idbCurr.GetTSDBStatus(nil, []*TagFilters{tfs}, baseDate, "", 5, 1e6, noDeadline)
|
||||
status, err = db.GetTSDBStatus(nil, []*TagFilters{tfs}, baseDate, "", 5, 1e6, noDeadline)
|
||||
if err != nil {
|
||||
t.Fatalf("error in GetTSDBStatus: %s", err)
|
||||
}
|
||||
@@ -1913,7 +1827,7 @@ func TestSearchTSIDWithTimeRange(t *testing.T) {
|
||||
}
|
||||
|
||||
// Check GetTSDBStatus, which matches all the series on a global time range
|
||||
status, err = idbCurr.GetTSDBStatus(nil, nil, 0, "day", 5, 1e6, noDeadline)
|
||||
status, err = db.GetTSDBStatus(nil, nil, 0, "day", 5, 1e6, noDeadline)
|
||||
if err != nil {
|
||||
t.Fatalf("error in GetTSDBStatus: %s", err)
|
||||
}
|
||||
@@ -1968,7 +1882,7 @@ func TestSearchTSIDWithTimeRange(t *testing.T) {
|
||||
if err := tfs.Add([]byte("UniqueId"), []byte("0|1|3"), false, true); err != nil {
|
||||
t.Fatalf("cannot add filter: %s", err)
|
||||
}
|
||||
status, err = idbCurr.GetTSDBStatus(nil, []*TagFilters{tfs}, baseDate, "", 5, 1e6, noDeadline)
|
||||
status, err = db.GetTSDBStatus(nil, []*TagFilters{tfs}, baseDate, "", 5, 1e6, noDeadline)
|
||||
if err != nil {
|
||||
t.Fatalf("error in GetTSDBStatus: %s", err)
|
||||
}
|
||||
@@ -1994,7 +1908,7 @@ func TestSearchTSIDWithTimeRange(t *testing.T) {
|
||||
}
|
||||
|
||||
// Check GetTSDBStatus with non-nil filter on global time range, which matches only 15 series
|
||||
status, err = idbCurr.GetTSDBStatus(nil, []*TagFilters{tfs}, 0, "", 5, 1e6, noDeadline)
|
||||
status, err = db.GetTSDBStatus(nil, []*TagFilters{tfs}, 0, "", 5, 1e6, noDeadline)
|
||||
if err != nil {
|
||||
t.Fatalf("error in GetTSDBStatus: %s", err)
|
||||
}
|
||||
@@ -2019,7 +1933,7 @@ func TestSearchTSIDWithTimeRange(t *testing.T) {
|
||||
t.Fatalf("unexpected TotalLabelValuePairs; got %d; want %d", status.TotalLabelValuePairs, expectedLabelValuePairs)
|
||||
}
|
||||
|
||||
s.putPrevAndCurrIndexDBs(idbPrev, idbCurr)
|
||||
s.tb.PutPartition(ptw)
|
||||
s.MustClose()
|
||||
fs.MustRemoveDir(path)
|
||||
}
|
||||
@@ -2041,7 +1955,6 @@ func newTestStorage() *Storage {
|
||||
tsidCache: workingsetcache.New(1234),
|
||||
retentionMsecs: retentionMax.Milliseconds(),
|
||||
}
|
||||
s.setDeletedMetricIDs(&uint64set.Set{})
|
||||
return s
|
||||
}
|
||||
|
||||
@@ -2052,135 +1965,6 @@ func stopTestStorage(s *Storage) {
|
||||
fs.MustRemoveDir(s.cachePath)
|
||||
}
|
||||
|
||||
func TestSearchContainsTimeRange(t *testing.T) {
|
||||
path := t.Name()
|
||||
fs.MustRemoveDir(path)
|
||||
s := MustOpenStorage(path, OpenOptions{})
|
||||
idbPrev, idbCurr := s.getPrevAndCurrIndexDBs()
|
||||
|
||||
is := idbCurr.getIndexSearch(noDeadline)
|
||||
|
||||
// Create a bunch of per-day time series
|
||||
const (
|
||||
days = 6
|
||||
tenant2IngestionDay = 8
|
||||
metricsPerDay = 1000
|
||||
)
|
||||
rotationDay := time.Date(2019, time.October, 15, 5, 1, 0, 0, time.UTC)
|
||||
rotationMillis := uint64(rotationDay.UnixMilli())
|
||||
rotationDate := rotationMillis / msecPerDay
|
||||
var metricNameBuf []byte
|
||||
perDayMetricIDs := make(map[uint64]*uint64set.Set)
|
||||
labelNames := []string{
|
||||
"__name__", "constant", "day", "UniqueId", "some_unique_id",
|
||||
}
|
||||
|
||||
sort.Strings(labelNames)
|
||||
|
||||
newMN := func(name string, day, metric int) MetricName {
|
||||
var mn MetricName
|
||||
mn.MetricGroup = []byte(name)
|
||||
mn.AddTag(
|
||||
"constant",
|
||||
"const",
|
||||
)
|
||||
mn.AddTag(
|
||||
"day",
|
||||
fmt.Sprintf("%v", day),
|
||||
)
|
||||
mn.AddTag(
|
||||
"UniqueId",
|
||||
fmt.Sprintf("%v", metric),
|
||||
)
|
||||
mn.AddTag(
|
||||
"some_unique_id",
|
||||
fmt.Sprintf("%v", day),
|
||||
)
|
||||
mn.sortTags()
|
||||
return mn
|
||||
}
|
||||
|
||||
// ingest metrics for tenant 0:0
|
||||
for day := 0; day < days; day++ {
|
||||
date := rotationDate - uint64(day)
|
||||
|
||||
var metricIDs uint64set.Set
|
||||
for metric := range metricsPerDay {
|
||||
mn := newMN("testMetric", day, metric)
|
||||
metricNameBuf = mn.Marshal(metricNameBuf[:0])
|
||||
var genTSID generationTSID
|
||||
if !is.getTSIDByMetricName(&genTSID, metricNameBuf, date) {
|
||||
generateTSID(&genTSID.TSID, &mn)
|
||||
createAllIndexesForMetricName(idbCurr, &mn, &genTSID.TSID, date)
|
||||
}
|
||||
metricIDs.Add(genTSID.TSID.MetricID)
|
||||
}
|
||||
|
||||
perDayMetricIDs[date] = &metricIDs
|
||||
}
|
||||
idbCurr.putIndexSearch(is)
|
||||
|
||||
// Flush index to disk, so it becomes visible for search
|
||||
s.DebugFlush()
|
||||
|
||||
is2 := idbCurr.getIndexSearch(noDeadline)
|
||||
|
||||
// Check that all the metrics are found for all the days.
|
||||
for date := rotationDate - days + 1; date <= rotationDate; date++ {
|
||||
|
||||
metricIDs, err := is2.getMetricIDsForDate(date, metricsPerDay)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
if !perDayMetricIDs[date].Equal(metricIDs) {
|
||||
t.Fatalf("unexpected metricIDs found;\ngot\n%d\nwant\n%d", metricIDs.AppendTo(nil), perDayMetricIDs[date].AppendTo(nil))
|
||||
}
|
||||
}
|
||||
|
||||
idbCurr.putIndexSearch(is2)
|
||||
s.putPrevAndCurrIndexDBs(idbPrev, idbCurr)
|
||||
|
||||
// rotate indexdb
|
||||
s.mustRotateIndexDB(rotationDay)
|
||||
idbPrev, idbNext := s.getPrevAndCurrIndexDBs()
|
||||
|
||||
// perform search for 0:0 tenant
|
||||
// results of previous search requests shouldn't affect it
|
||||
|
||||
isPrev := idbPrev.getIndexSearch(noDeadline)
|
||||
// search for range that covers prev indexDB for dates before ingestion
|
||||
tr := TimeRange{
|
||||
MinTimestamp: int64(rotationMillis - msecPerDay*(days)),
|
||||
MaxTimestamp: int64(rotationMillis),
|
||||
}
|
||||
if !isPrev.containsTimeRange(tr) {
|
||||
t.Fatalf("expected to have given time range at prev IndexDB")
|
||||
}
|
||||
|
||||
// search for range not exist at prev indexDB
|
||||
tr = TimeRange{
|
||||
MinTimestamp: int64(rotationMillis + msecPerDay*(days+4)),
|
||||
MaxTimestamp: int64(rotationMillis + msecPerDay*(days+2)),
|
||||
}
|
||||
if isPrev.containsTimeRange(tr) {
|
||||
t.Fatalf("not expected to have given time range at prev IndexDB")
|
||||
}
|
||||
key := isPrev.marshalCommonPrefix(nil, nsPrefixDateToMetricID)
|
||||
|
||||
idbPrev.minMissingTimestampByKeyLock.Lock()
|
||||
minMissingTimetamp := idbPrev.minMissingTimestampByKey[string(key)]
|
||||
idbPrev.minMissingTimestampByKeyLock.Unlock()
|
||||
|
||||
if minMissingTimetamp != tr.MinTimestamp {
|
||||
t.Fatalf("unexpected minMissingTimestamp for 0:0 tenant got %d, want %d", minMissingTimetamp, tr.MinTimestamp)
|
||||
}
|
||||
|
||||
idbPrev.putIndexSearch(isPrev)
|
||||
s.putPrevAndCurrIndexDBs(idbPrev, idbNext)
|
||||
s.MustClose()
|
||||
fs.MustRemoveDir(path)
|
||||
}
|
||||
|
||||
func sortedSlice(m map[string]struct{}) []string {
|
||||
s := make([]string, 0, len(m))
|
||||
for k := range m {
|
||||
@@ -2190,19 +1974,19 @@ func sortedSlice(m map[string]struct{}) []string {
|
||||
return s
|
||||
}
|
||||
|
||||
func TestIndexSearchContainsTimeRange_Concurrent(t *testing.T) {
|
||||
func TestIndexSearchLegacyContainsTimeRange_Concurrent(t *testing.T) {
|
||||
defer testRemoveAll(t)
|
||||
|
||||
// Create storage because indexDB depends on it.
|
||||
s := MustOpenStorage(filepath.Join(t.Name(), "storage"), OpenOptions{})
|
||||
defer s.MustClose()
|
||||
|
||||
idbName := nextIndexDBTableName()
|
||||
idbName := "test"
|
||||
idbPath := filepath.Join(t.Name(), indexdbDirname, idbName)
|
||||
var readOnly atomic.Bool
|
||||
readOnly.Store(true)
|
||||
noRegisterNewSeries := true
|
||||
idb := mustOpenIndexDB(idbPath, s, &readOnly, noRegisterNewSeries)
|
||||
idb := mustOpenIndexDB(123, TimeRange{}, idbName, idbPath, s, &readOnly, noRegisterNewSeries)
|
||||
defer idb.MustClose()
|
||||
|
||||
minTimestamp := time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC).UnixMilli()
|
||||
@@ -2212,7 +1996,7 @@ func TestIndexSearchContainsTimeRange_Concurrent(t *testing.T) {
|
||||
wg.Add(1)
|
||||
go func(ts int64) {
|
||||
is := idb.getIndexSearch(noDeadline)
|
||||
_ = is.containsTimeRange(TimeRange{ts, ts})
|
||||
_ = is.legacyContainsTimeRange(TimeRange{ts, ts})
|
||||
idb.putIndexSearch(is)
|
||||
wg.Done()
|
||||
}(minTimestamp + msecPerDay*i)
|
||||
@@ -2220,7 +2004,7 @@ func TestIndexSearchContainsTimeRange_Concurrent(t *testing.T) {
|
||||
wg.Wait()
|
||||
|
||||
key := marshalCommonPrefix(nil, nsPrefixDateToMetricID)
|
||||
if got, want := idb.minMissingTimestampByKey[string(key)], minTimestamp; got != want {
|
||||
if got, want := idb.legacyMinMissingTimestampByKey[string(key)], minTimestamp; got != want {
|
||||
t.Fatalf("unexpected min timestamp: got %v, want %v", time.UnixMilli(got).UTC(), time.UnixMilli(want).UTC())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,7 +42,8 @@ func BenchmarkIndexDBAddTSIDs(b *testing.B) {
|
||||
const path = "BenchmarkIndexDBAddTSIDs"
|
||||
timestamp := time.Date(2025, 3, 17, 0, 0, 0, 0, time.UTC).UnixMilli()
|
||||
s := MustOpenStorage(path, OpenOptions{})
|
||||
idbPrev, idbCurr := s.getPrevAndCurrIndexDBs()
|
||||
ptw := s.tb.MustGetPartition(timestamp)
|
||||
db := ptw.pt.idb
|
||||
|
||||
const recordsPerLoop = 1e3
|
||||
|
||||
@@ -51,7 +52,7 @@ func BenchmarkIndexDBAddTSIDs(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
var mn MetricName
|
||||
var genTSID generationTSID
|
||||
var tsid TSID
|
||||
|
||||
// The most common tags.
|
||||
mn.Tags = []Tag{
|
||||
@@ -65,18 +66,18 @@ func BenchmarkIndexDBAddTSIDs(b *testing.B) {
|
||||
|
||||
startOffset := 0
|
||||
for pb.Next() {
|
||||
benchmarkIndexDBAddTSIDs(idbCurr, &genTSID, &mn, timestamp, startOffset, recordsPerLoop)
|
||||
benchmarkIndexDBAddTSIDs(db, &tsid, &mn, timestamp, startOffset, recordsPerLoop)
|
||||
startOffset += recordsPerLoop
|
||||
}
|
||||
})
|
||||
b.StopTimer()
|
||||
|
||||
s.putPrevAndCurrIndexDBs(idbPrev, idbCurr)
|
||||
s.tb.PutPartition(ptw)
|
||||
s.MustClose()
|
||||
fs.MustRemoveDir(path)
|
||||
}
|
||||
|
||||
func benchmarkIndexDBAddTSIDs(db *indexDB, genTSID *generationTSID, mn *MetricName, timestamp int64, startOffset, recordsPerLoop int) {
|
||||
func benchmarkIndexDBAddTSIDs(db *indexDB, tsid *TSID, mn *MetricName, timestamp int64, startOffset, recordsPerLoop int) {
|
||||
date := uint64(timestamp) / msecPerDay
|
||||
for i := 0; i < recordsPerLoop; i++ {
|
||||
mn.MetricGroup = strconv.AppendUint(mn.MetricGroup[:0], uint64(i+startOffset), 10)
|
||||
@@ -85,8 +86,8 @@ func benchmarkIndexDBAddTSIDs(db *indexDB, genTSID *generationTSID, mn *MetricNa
|
||||
}
|
||||
mn.sortTags()
|
||||
|
||||
generateTSID(&genTSID.TSID, mn)
|
||||
createAllIndexesForMetricName(db, mn, &genTSID.TSID, date)
|
||||
generateTSID(tsid, mn)
|
||||
createAllIndexesForMetricName(db, mn, tsid, date)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -96,11 +97,12 @@ func BenchmarkHeadPostingForMatchers(b *testing.B) {
|
||||
const path = "BenchmarkHeadPostingForMatchers"
|
||||
timestamp := int64(0)
|
||||
s := MustOpenStorage(path, OpenOptions{})
|
||||
idbPrev, idbCurr := s.getPrevAndCurrIndexDBs()
|
||||
ptw := s.tb.MustGetPartition(timestamp)
|
||||
db := ptw.pt.idb
|
||||
|
||||
// Fill the db with data as in https://github.com/prometheus/prometheus/blob/23c0299d85bfeb5d9b59e994861553a25ca578e5/tsdb/head_bench_test.go#L66
|
||||
var mn MetricName
|
||||
var genTSID generationTSID
|
||||
var tsid TSID
|
||||
date := uint64(timestamp) / msecPerDay
|
||||
addSeries := func(kvs ...string) {
|
||||
mn.Reset()
|
||||
@@ -108,8 +110,8 @@ func BenchmarkHeadPostingForMatchers(b *testing.B) {
|
||||
mn.AddTag(kvs[i], kvs[i+1])
|
||||
}
|
||||
mn.sortTags()
|
||||
generateTSID(&genTSID.TSID, &mn)
|
||||
createAllIndexesForMetricName(idbCurr, &mn, &genTSID.TSID, date)
|
||||
generateTSID(&tsid, &mn)
|
||||
createAllIndexesForMetricName(db, &mn, &tsid, date)
|
||||
}
|
||||
for n := 0; n < 10; n++ {
|
||||
ns := strconv.Itoa(n)
|
||||
@@ -134,9 +136,9 @@ func BenchmarkHeadPostingForMatchers(b *testing.B) {
|
||||
// index instead of per-day index.
|
||||
tr := globalIndexTimeRange
|
||||
for i := 0; i < b.N; i++ {
|
||||
is := idbCurr.getIndexSearch(noDeadline)
|
||||
is := db.getIndexSearch(noDeadline)
|
||||
metricIDs, err := is.searchMetricIDs(nil, tfss, tr, 2e9)
|
||||
idbCurr.putIndexSearch(is)
|
||||
db.putIndexSearch(is)
|
||||
if err != nil {
|
||||
b.Fatalf("unexpected error in searchMetricIDs: %s", err)
|
||||
}
|
||||
@@ -253,7 +255,7 @@ func BenchmarkHeadPostingForMatchers(b *testing.B) {
|
||||
benchSearch(b, tfs, 88889)
|
||||
})
|
||||
|
||||
s.putPrevAndCurrIndexDBs(idbPrev, idbCurr)
|
||||
s.tb.PutPartition(ptw)
|
||||
s.MustClose()
|
||||
fs.MustRemoveDir(path)
|
||||
}
|
||||
@@ -262,7 +264,8 @@ func BenchmarkIndexDBGetTSIDs(b *testing.B) {
|
||||
const path = "BenchmarkIndexDBGetTSIDs"
|
||||
timestamp := time.Date(2025, 3, 17, 0, 0, 0, 0, time.UTC).UnixMilli()
|
||||
s := MustOpenStorage(path, OpenOptions{})
|
||||
idbPrev, idbCurr := s.getPrevAndCurrIndexDBs()
|
||||
ptw := s.tb.MustGetPartition(timestamp)
|
||||
db := ptw.pt.idb
|
||||
|
||||
const recordsPerLoop = 1000
|
||||
const recordsCount = 1e5
|
||||
@@ -277,38 +280,38 @@ func BenchmarkIndexDBGetTSIDs(b *testing.B) {
|
||||
}
|
||||
mn.sortTags()
|
||||
|
||||
var genTSID generationTSID
|
||||
var tsid TSID
|
||||
date := uint64(timestamp) / msecPerDay
|
||||
|
||||
for i := 0; i < recordsCount; i++ {
|
||||
generateTSID(&genTSID.TSID, &mn)
|
||||
createAllIndexesForMetricName(idbCurr, &mn, &genTSID.TSID, date)
|
||||
generateTSID(&tsid, &mn)
|
||||
createAllIndexesForMetricName(db, &mn, &tsid, date)
|
||||
}
|
||||
idbCurr.s.DebugFlush()
|
||||
db.s.DebugFlush()
|
||||
|
||||
b.SetBytes(recordsPerLoop)
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
var genTSIDLocal generationTSID
|
||||
var tsidLocal TSID
|
||||
var metricNameLocal []byte
|
||||
var mnLocal MetricName
|
||||
mnLocal.CopyFrom(&mn)
|
||||
mnLocal.sortTags()
|
||||
for pb.Next() {
|
||||
is := idbCurr.getIndexSearch(noDeadline)
|
||||
is := db.getIndexSearch(noDeadline)
|
||||
for i := 0; i < recordsPerLoop; i++ {
|
||||
metricNameLocal = mnLocal.Marshal(metricNameLocal[:0])
|
||||
if !is.getTSIDByMetricName(&genTSIDLocal, metricNameLocal, date) {
|
||||
if !is.getTSIDByMetricName(&tsidLocal, metricNameLocal, date) {
|
||||
panic(fmt.Errorf("cannot obtain tsid for row %d", i))
|
||||
}
|
||||
}
|
||||
idbCurr.putIndexSearch(is)
|
||||
db.putIndexSearch(is)
|
||||
}
|
||||
})
|
||||
b.StopTimer()
|
||||
|
||||
s.putPrevAndCurrIndexDBs(idbPrev, idbCurr)
|
||||
s.tb.PutPartition(ptw)
|
||||
s.MustClose()
|
||||
fs.MustRemoveDir(path)
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/filestream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs/fsutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
@@ -44,12 +45,13 @@ func (mp *inmemoryPart) MustStoreToDisk(path string) {
|
||||
indexPath := filepath.Join(path, indexFilename)
|
||||
metaindexPath := filepath.Join(path, metaindexFilename)
|
||||
|
||||
var psw filestream.ParallelStreamWriter
|
||||
psw.Add(timestampsPath, &mp.timestampsData)
|
||||
psw.Add(valuesPath, &mp.valuesData)
|
||||
psw.Add(indexPath, &mp.indexData)
|
||||
psw.Add(metaindexPath, &mp.metaindexData)
|
||||
psw.Run()
|
||||
var pe fsutil.ParallelExecutor
|
||||
|
||||
pe.Add(filestream.NewStreamWriterTask(timestampsPath, &mp.timestampsData))
|
||||
pe.Add(filestream.NewStreamWriterTask(valuesPath, &mp.valuesData))
|
||||
pe.Add(filestream.NewStreamWriterTask(indexPath, &mp.indexData))
|
||||
pe.Add(filestream.NewStreamWriterTask(metaindexPath, &mp.metaindexData))
|
||||
pe.Run()
|
||||
|
||||
mp.ph.MustWriteMetadata(path)
|
||||
|
||||
|
||||
175
lib/storage/metric_id_cache.go
Normal file
175
lib/storage/metric_id_cache.go
Normal file
@@ -0,0 +1,175 @@
|
||||
package storage
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/uint64set"
|
||||
)
|
||||
|
||||
// metricIDCache stores metricIDs that have been added to the index. It is used
|
||||
// during data ingestion to decide whether a new entry needs to be added to the
|
||||
// global index.
|
||||
//
|
||||
// The cache avoids synchronization on the read path if possible to reduce
|
||||
// contention. Based on dateMetricIDCache ideas.
|
||||
type metricIDCache struct {
|
||||
// Contains immutable set of metricIDs.
|
||||
curr atomic.Pointer[uint64set.Set]
|
||||
|
||||
// Contains immutable set of metricIDs that used to be current before cache
|
||||
// rotation. It is used to implement periodic cache clean-up. Protected by
|
||||
// mu.
|
||||
prev *uint64set.Set
|
||||
|
||||
// Contains the mutable set of metricIDs that either have been added to the
|
||||
// cache recently or migrated from prev. Protected by mu.
|
||||
next *uint64set.Set
|
||||
|
||||
// Contains the number of slow accesses to next. Is used for deciding when
|
||||
// to merge next to curr. Protected by mu.
|
||||
slowHits int
|
||||
|
||||
// Contains the number times the next was merged into curr. Protected by mu.
|
||||
syncsCount uint64
|
||||
|
||||
// Contains the number times the cache has been rotated. Protected by mu.
|
||||
rotationsCount uint64
|
||||
|
||||
mu sync.Mutex
|
||||
|
||||
stopCh chan struct{}
|
||||
rotationStoppedCh chan struct{}
|
||||
}
|
||||
|
||||
func newMetricIDCache() *metricIDCache {
|
||||
c := metricIDCache{
|
||||
prev: &uint64set.Set{},
|
||||
next: &uint64set.Set{},
|
||||
stopCh: make(chan struct{}),
|
||||
rotationStoppedCh: make(chan struct{}),
|
||||
}
|
||||
c.curr.Store(&uint64set.Set{})
|
||||
go c.startRotation()
|
||||
return &c
|
||||
}
|
||||
|
||||
func (c *metricIDCache) MustStop() {
|
||||
close(c.stopCh)
|
||||
<-c.rotationStoppedCh
|
||||
}
|
||||
|
||||
type metricIDCacheStats struct {
|
||||
Size uint64
|
||||
SizeBytes uint64
|
||||
SyncsCount uint64
|
||||
RotationsCount uint64
|
||||
}
|
||||
|
||||
func (c *metricIDCache) Stats() metricIDCacheStats {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
|
||||
var s metricIDCacheStats
|
||||
curr := c.curr.Load()
|
||||
s.Size = uint64(curr.Len() + c.prev.Len() + c.next.Len())
|
||||
if curr.Len() > 0 {
|
||||
// empty uint64set.Set still occupies a few bytes. Ignore them.
|
||||
s.SizeBytes = curr.SizeBytes()
|
||||
}
|
||||
if c.prev.Len() > 0 {
|
||||
s.SizeBytes += c.prev.SizeBytes()
|
||||
}
|
||||
if c.next.Len() > 0 {
|
||||
s.SizeBytes += c.next.SizeBytes()
|
||||
}
|
||||
s.SyncsCount = c.syncsCount
|
||||
s.RotationsCount = c.rotationsCount
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
func (c *metricIDCache) Has(metricID uint64) bool {
|
||||
if c.curr.Load().Has(metricID) {
|
||||
// Fast path. The majority of calls must go here.
|
||||
return true
|
||||
}
|
||||
// Slow path. Acquire the lock and search the curr again and then also
|
||||
// search prev and next.
|
||||
return c.hasSlow(metricID)
|
||||
}
|
||||
|
||||
func (c *metricIDCache) hasSlow(metricID uint64) bool {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
|
||||
// First, check curr again because the entry may have been moved to curr by
|
||||
// the time the caller acquires the lock.
|
||||
curr := c.curr.Load()
|
||||
if curr.Has(metricID) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Then check next and prev sets.
|
||||
ok := c.next.Has(metricID)
|
||||
if !ok && c.prev.Has(metricID) {
|
||||
// The metricID is in prev but is still in use. Migrate it to next.
|
||||
c.next.Add(metricID)
|
||||
ok = true
|
||||
}
|
||||
|
||||
if ok {
|
||||
c.slowHits++
|
||||
if c.slowHits > (curr.Len()+c.next.Len())/2 {
|
||||
// It is cheaper to merge next into curr than to pay inter-cpu sync
|
||||
// costs when accessing next.
|
||||
c.syncLocked()
|
||||
c.slowHits = 0
|
||||
}
|
||||
}
|
||||
return ok
|
||||
}
|
||||
|
||||
func (c *metricIDCache) Set(metricID uint64) {
|
||||
c.mu.Lock()
|
||||
c.next.Add(metricID)
|
||||
c.mu.Unlock()
|
||||
}
|
||||
|
||||
// syncLocked merges data from curr into next and atomically replaces curr with
|
||||
// next.
|
||||
func (c *metricIDCache) syncLocked() {
|
||||
curr := c.curr.Load()
|
||||
c.next.Union(curr)
|
||||
c.curr.Store(c.next)
|
||||
c.next = &uint64set.Set{}
|
||||
c.syncsCount++
|
||||
}
|
||||
|
||||
func (c *metricIDCache) startRotation() {
|
||||
d := timeutil.AddJitterToDuration(10 * time.Minute)
|
||||
ticker := time.NewTicker(d)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-c.stopCh:
|
||||
close(c.rotationStoppedCh)
|
||||
return
|
||||
case <-ticker.C:
|
||||
c.rotate()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// rotate atomically rotates next, curr, and prev cache parts.
|
||||
func (c *metricIDCache) rotate() {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
curr := c.curr.Load()
|
||||
c.prev = curr
|
||||
c.curr.Store(c.next)
|
||||
c.next = &uint64set.Set{}
|
||||
c.rotationsCount++
|
||||
}
|
||||
116
lib/storage/metric_id_cache_synctest_test.go
Normal file
116
lib/storage/metric_id_cache_synctest_test.go
Normal file
@@ -0,0 +1,116 @@
|
||||
package storage
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"testing/synctest"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/uint64set"
|
||||
"github.com/google/go-cmp/cmp"
|
||||
)
|
||||
|
||||
func TestMetricIDCache_ClearedWhenUnused(t *testing.T) {
|
||||
// Entries that are added to the cache but then never retrieved will be
|
||||
// eventually removed from it.
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
c := newMetricIDCache()
|
||||
defer c.MustStop()
|
||||
c.Set(123)
|
||||
time.Sleep(15 * time.Minute)
|
||||
time.Sleep(15 * time.Minute)
|
||||
time.Sleep(15 * time.Minute)
|
||||
if c.Has(123) {
|
||||
t.Fatalf("entry is still in cache")
|
||||
}
|
||||
})
|
||||
|
||||
// Entries that are added to the cache and retrieved but then never
|
||||
// retrieved again will be eventually removed from it.
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
c := newMetricIDCache()
|
||||
defer c.MustStop()
|
||||
c.Set(123)
|
||||
time.Sleep(5 * time.Minute)
|
||||
if !c.Has(123) {
|
||||
t.Fatalf("entry not in cache")
|
||||
}
|
||||
time.Sleep(15 * time.Minute)
|
||||
time.Sleep(15 * time.Minute)
|
||||
if c.Has(123) {
|
||||
t.Fatalf("entry is still in cache")
|
||||
}
|
||||
})
|
||||
|
||||
// Entries that are added to the cache and then periodically retrieved,
|
||||
// will remain in cache indefinitely.
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
c := newMetricIDCache()
|
||||
defer c.MustStop()
|
||||
c.Set(123)
|
||||
for range 10_000 {
|
||||
time.Sleep(5 * time.Minute)
|
||||
if !c.Has(123) {
|
||||
t.Fatalf("entry not in cache")
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestMetricIDCache_Stats(t *testing.T) {
|
||||
assertStats := func(t *testing.T, c *metricIDCache, want metricIDCacheStats) {
|
||||
if diff := cmp.Diff(want, c.Stats()); diff != "" {
|
||||
t.Fatalf("unexpected stats (-want, +got):\n%s", diff)
|
||||
}
|
||||
}
|
||||
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
c := newMetricIDCache()
|
||||
defer c.MustStop()
|
||||
|
||||
// Check stats right after the creation.
|
||||
assertStats(t, c, metricIDCacheStats{})
|
||||
|
||||
// Add metricIDs and check stats.
|
||||
// At this point, all metricIDs are in next.
|
||||
metricIDs := uint64set.Set{}
|
||||
for metricID := range uint64(100_000) {
|
||||
c.Set(metricID)
|
||||
metricIDs.Add(metricID)
|
||||
}
|
||||
assertStats(t, c, metricIDCacheStats{
|
||||
Size: 100_000,
|
||||
SizeBytes: metricIDs.SizeBytes(),
|
||||
})
|
||||
|
||||
// Get all metricIDs and check stats.
|
||||
// All metricIDs will be sync'ed from next to curr.
|
||||
for metricID := range uint64(100_000) {
|
||||
if !c.Has(metricID) {
|
||||
t.Fatalf("metricID not in cache: %d", metricID)
|
||||
}
|
||||
}
|
||||
assertStats(t, c, metricIDCacheStats{
|
||||
Size: 100_000,
|
||||
SizeBytes: metricIDs.SizeBytes(),
|
||||
SyncsCount: 1,
|
||||
})
|
||||
|
||||
// Wait until next rotation.
|
||||
// curr metricIDs will be moved to prev.
|
||||
time.Sleep(15 * time.Minute)
|
||||
assertStats(t, c, metricIDCacheStats{
|
||||
Size: 100_000,
|
||||
SizeBytes: metricIDs.SizeBytes(),
|
||||
SyncsCount: 1,
|
||||
RotationsCount: 1,
|
||||
})
|
||||
|
||||
// Wait until another rotation.
|
||||
// The cache now should be empty.
|
||||
time.Sleep(15 * time.Minute)
|
||||
assertStats(t, c, metricIDCacheStats{
|
||||
SyncsCount: 1,
|
||||
RotationsCount: 2,
|
||||
})
|
||||
})
|
||||
}
|
||||
69
lib/storage/metric_id_cache_test.go
Normal file
69
lib/storage/metric_id_cache_test.go
Normal file
@@ -0,0 +1,69 @@
|
||||
package storage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestMetricIDCache_SetHas(t *testing.T) {
|
||||
c := newMetricIDCache()
|
||||
defer c.MustStop()
|
||||
|
||||
metricIDMin := uint64(time.Now().UnixNano())
|
||||
|
||||
for i := range uint64(1_000_000) {
|
||||
c.Set(metricIDMin + i)
|
||||
}
|
||||
|
||||
for i := range uint64(1_000_000) {
|
||||
metricID := metricIDMin + i
|
||||
if !c.Has(metricID) {
|
||||
t.Fatalf("metricID not found: %d", metricID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMetricIDCache_SetHas_Concurrent(t *testing.T) {
|
||||
c := newMetricIDCache()
|
||||
defer c.MustStop()
|
||||
|
||||
const (
|
||||
numMetricIDs = 1_000_000
|
||||
concurrency = 1000
|
||||
)
|
||||
|
||||
var writeWG, readWG sync.WaitGroup
|
||||
writeCh := make(chan uint64, concurrency)
|
||||
readCh := make(chan uint64, concurrency)
|
||||
for range concurrency {
|
||||
writeWG.Add(1)
|
||||
go func() {
|
||||
for metricID := range writeCh {
|
||||
c.Set(metricID)
|
||||
readCh <- metricID
|
||||
}
|
||||
writeWG.Done()
|
||||
}()
|
||||
|
||||
readWG.Add(1)
|
||||
go func() {
|
||||
for metricID := range readCh {
|
||||
if !c.Has(metricID) {
|
||||
panic(fmt.Sprintf("metricID not found: %d", metricID))
|
||||
}
|
||||
}
|
||||
readWG.Done()
|
||||
}()
|
||||
}
|
||||
|
||||
metricIDMin := uint64(time.Now().UnixNano())
|
||||
for i := range uint64(numMetricIDs) {
|
||||
writeCh <- metricIDMin + i
|
||||
}
|
||||
close(writeCh)
|
||||
writeWG.Wait()
|
||||
close(readCh)
|
||||
readWG.Wait()
|
||||
}
|
||||
73
lib/storage/metric_id_cache_timing_test.go
Normal file
73
lib/storage/metric_id_cache_timing_test.go
Normal file
@@ -0,0 +1,73 @@
|
||||
package storage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func BenchmarkMetricIDCache_Has(b *testing.B) {
|
||||
f := func(b *testing.B, numMetricIDs, distance int64, hitsOnly, warmUp bool) {
|
||||
b.Helper()
|
||||
c := newMetricIDCache()
|
||||
defer c.MustStop()
|
||||
metricIDMin := time.Now().UnixNano()
|
||||
metricIDMax := metricIDMin + numMetricIDs*distance
|
||||
for metricID := metricIDMin; metricID <= metricIDMax; metricID += distance {
|
||||
c.Set(uint64(metricID))
|
||||
if warmUp && !c.Has(uint64(metricID)) {
|
||||
b.Fatalf("metricID not in cache: %d", metricID)
|
||||
}
|
||||
}
|
||||
b.ResetTimer()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
if hitsOnly {
|
||||
metricID := metricIDMin + rand.Int63n(numMetricIDs)*distance
|
||||
for pb.Next() {
|
||||
if !c.Has(uint64(metricID)) {
|
||||
b.Fatalf("metricID not in cache: %d", metricID)
|
||||
}
|
||||
metricID += distance
|
||||
if metricID > metricIDMax {
|
||||
metricID = metricIDMin
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// misses only
|
||||
metricID := metricIDMax + distance
|
||||
for pb.Next() {
|
||||
if c.Has(uint64(metricID)) {
|
||||
b.Fatalf("metricID is in cache: %d", metricID)
|
||||
}
|
||||
metricID += distance
|
||||
}
|
||||
}
|
||||
})
|
||||
b.ReportAllocs()
|
||||
}
|
||||
|
||||
subB := func(numMetricIDs, distance int64, hitsOnly, warmUp bool) {
|
||||
hitsOrMisses := "hitsss"
|
||||
if !hitsOnly {
|
||||
hitsOrMisses = "misses"
|
||||
}
|
||||
coldOrWarm := "cold"
|
||||
if warmUp {
|
||||
coldOrWarm = "warm"
|
||||
}
|
||||
name := fmt.Sprintf("%s/%s/n%d/d%d", hitsOrMisses, coldOrWarm, numMetricIDs, distance)
|
||||
b.Run(name, func(b *testing.B) {
|
||||
f(b, numMetricIDs, distance, hitsOnly, warmUp)
|
||||
})
|
||||
}
|
||||
for _, hitsOnly := range []bool{true, false} {
|
||||
for _, warmUp := range []bool{false, true} {
|
||||
for _, numMetricIDs := range []int64{100_000, 1_000_000, 10_000_000} {
|
||||
for _, distance := range []int64{1, 10, 100} {
|
||||
subB(numMetricIDs, distance, hitsOnly, warmUp)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -4,14 +4,14 @@ import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
// metricNameSearch is used for searching a metricName by a metricID in curr
|
||||
// and prev indexDBs. If useSparseCache is false the name is first searched in
|
||||
// metricNameCache and also stored in that cache when found in one of the
|
||||
// indexDBs.
|
||||
// metricNameSearch is used for searching a metricName by a metricID in
|
||||
// partition and legacy indexDBs. If useSparseCache is false the name is first
|
||||
// searched in metricNameCache and also stored in that cache when found in one
|
||||
// of the indexDBs.
|
||||
//
|
||||
// Most index search methods invoked only once per API call. For example, one
|
||||
// request to /api/v1/series results in one invocation of
|
||||
// Storage.SearchMetricNames() method. However, searching is metricName by
|
||||
// Storage.SearchMetricNames() method. However, searching a metricName by
|
||||
// metricID is done multiple times per API call. For example, data search
|
||||
// performs the metricName search for each data block (see search.go).
|
||||
//
|
||||
@@ -28,8 +28,9 @@ import (
|
||||
// search method is invoked (due do mutex locks).
|
||||
type metricNameSearch struct {
|
||||
storage *Storage
|
||||
idbPrev *indexDB
|
||||
idbCurr *indexDB
|
||||
ptws []*partitionWrapper
|
||||
idbs []*indexDB
|
||||
legacyIDBs *legacyIndexDBs
|
||||
useSparseCache bool
|
||||
}
|
||||
|
||||
@@ -43,21 +44,40 @@ func (s *metricNameSearch) search(dst []byte, metricID uint64) ([]byte, bool) {
|
||||
}
|
||||
}
|
||||
|
||||
dst, found := s.idbCurr.searchMetricName(dst, metricID, s.useSparseCache)
|
||||
if found {
|
||||
if !s.useSparseCache {
|
||||
s.storage.putMetricNameToCache(metricID, dst)
|
||||
var found bool
|
||||
|
||||
// This will be just one idb most of the time since a typical time range
|
||||
// fits within a single month.
|
||||
for _, idb := range s.idbs {
|
||||
dst, found = idb.searchMetricName(dst, metricID, s.useSparseCache)
|
||||
if found {
|
||||
if !s.useSparseCache {
|
||||
s.storage.putMetricNameToCache(metricID, dst)
|
||||
}
|
||||
return dst, true
|
||||
}
|
||||
return dst, true
|
||||
}
|
||||
|
||||
// Fallback to previous indexDB.
|
||||
dst, found = s.idbPrev.searchMetricName(dst, metricID, s.useSparseCache)
|
||||
if found {
|
||||
if !s.useSparseCache {
|
||||
s.storage.putMetricNameToCache(metricID, dst)
|
||||
// Fallback to current legacy indexDB.
|
||||
if idb := s.legacyIDBs.getIDBCurr(); idb != nil {
|
||||
dst, found = idb.searchMetricName(dst, metricID, s.useSparseCache)
|
||||
if found {
|
||||
if !s.useSparseCache {
|
||||
s.storage.putMetricNameToCache(metricID, dst)
|
||||
}
|
||||
return dst, true
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to previous legacy indexDB.
|
||||
if idb := s.legacyIDBs.getIDBPrev(); idb != nil {
|
||||
dst, found = idb.searchMetricName(dst, metricID, s.useSparseCache)
|
||||
if found {
|
||||
if !s.useSparseCache {
|
||||
s.storage.putMetricNameToCache(metricID, dst)
|
||||
}
|
||||
return dst, true
|
||||
}
|
||||
return dst, true
|
||||
}
|
||||
|
||||
// Not deleting metricID if no corresponding metricName has been found
|
||||
@@ -74,19 +94,25 @@ var mnsPool = &sync.Pool{
|
||||
},
|
||||
}
|
||||
|
||||
func getMetricNameSearch(storage *Storage, useSparseCache bool) *metricNameSearch {
|
||||
func getMetricNameSearch(storage *Storage, tr TimeRange, useSparseCache bool) *metricNameSearch {
|
||||
s := mnsPool.Get().(*metricNameSearch)
|
||||
s.storage = storage
|
||||
s.idbPrev, s.idbCurr = storage.getPrevAndCurrIndexDBs()
|
||||
s.ptws = storage.tb.GetPartitions(tr)
|
||||
for _, ptw := range s.ptws {
|
||||
s.idbs = append(s.idbs, ptw.pt.idb)
|
||||
}
|
||||
s.legacyIDBs = storage.getLegacyIndexDBs()
|
||||
s.useSparseCache = useSparseCache
|
||||
return s
|
||||
}
|
||||
|
||||
func putMetricNameSearch(s *metricNameSearch) {
|
||||
s.storage.putPrevAndCurrIndexDBs(s.idbPrev, s.idbCurr)
|
||||
s.storage.tb.PutPartitions(s.ptws)
|
||||
s.storage.putLegacyIndexDBs(s.legacyIDBs)
|
||||
s.storage = nil
|
||||
s.idbPrev = nil
|
||||
s.idbCurr = nil
|
||||
s.ptws = nil
|
||||
s.idbs = nil
|
||||
s.legacyIDBs = nil
|
||||
s.useSparseCache = false
|
||||
mnsPool.Put(s)
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
"sort"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
@@ -206,6 +207,9 @@ func (mt *Tracker) MustClose() {
|
||||
|
||||
// mustSaveLocked stores in-memory state of tracker on disk
|
||||
func (mt *Tracker) mustSaveLocked() {
|
||||
startTime := time.Now()
|
||||
logger.Infof("saving metric name usage stats to %s", mt.cachePath)
|
||||
|
||||
var bb bytes.Buffer
|
||||
zw := gzip.NewWriter(&bb)
|
||||
jw := json.NewEncoder(zw)
|
||||
@@ -242,6 +246,11 @@ func (mt *Tracker) mustSaveLocked() {
|
||||
// Atomically store the data at mt.cachePath.
|
||||
data := bb.Bytes()
|
||||
fs.MustWriteAtomic(mt.cachePath, data, true)
|
||||
|
||||
d := time.Since(startTime).Seconds()
|
||||
size := mt.currentItemsCount.Load()
|
||||
sizeBytes := mt.currentSizeBytes.Load()
|
||||
logger.Infof("metric name usage stats has been successfully saved to %s in %.3f seconds; entriesCount: %d, sizeBytes: %d", mt.cachePath, d, size, sizeBytes)
|
||||
}
|
||||
|
||||
// TrackerMetrics holds metrics to report
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/blockcache"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/filestream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs/fsutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
)
|
||||
@@ -59,7 +60,7 @@ func mustOpenFilePart(path string) *part {
|
||||
// Open part files in parallel in order to speed up this process
|
||||
// on high-latency storage systems such as NFS and Ceph.
|
||||
|
||||
var pro fs.ParallelReaderAtOpener
|
||||
var pe fsutil.ParallelExecutor
|
||||
|
||||
timestampsPath := filepath.Join(path, timestampsFilename)
|
||||
valuesPath := filepath.Join(path, valuesFilename)
|
||||
@@ -67,17 +68,17 @@ func mustOpenFilePart(path string) *part {
|
||||
|
||||
var timestampsFile fs.MustReadAtCloser
|
||||
var timestampsSize uint64
|
||||
pro.Add(timestampsPath, ×tampsFile, ×tampsSize)
|
||||
pe.Add(fs.NewReaderAtOpenerTask(timestampsPath, ×tampsFile, ×tampsSize))
|
||||
|
||||
var valuesFile fs.MustReadAtCloser
|
||||
var valuesSize uint64
|
||||
pro.Add(valuesPath, &valuesFile, &valuesSize)
|
||||
pe.Add(fs.NewReaderAtOpenerTask(valuesPath, &valuesFile, &valuesSize))
|
||||
|
||||
var indexFile fs.MustReadAtCloser
|
||||
var indexSize uint64
|
||||
pro.Add(indexPath, &indexFile, &indexSize)
|
||||
pe.Add(fs.NewReaderAtOpenerTask(indexPath, &indexFile, &indexSize))
|
||||
|
||||
pro.Run()
|
||||
pe.Run()
|
||||
|
||||
size := timestampsSize + valuesSize + indexSize + metaindexSize
|
||||
return newPart(&ph, path, size, metaindexFile, timestampsFile, valuesFile, indexFile)
|
||||
@@ -118,12 +119,11 @@ func (p *part) String() string {
|
||||
func (p *part) MustClose() {
|
||||
// Close files in parallel in order to speed up this process on storage systems with high latency
|
||||
// such as NFS or Ceph.
|
||||
cs := []fs.MustCloser{
|
||||
p.timestampsFile,
|
||||
p.valuesFile,
|
||||
p.indexFile,
|
||||
}
|
||||
fs.MustCloseParallel(cs)
|
||||
var pe fsutil.ParallelExecutor
|
||||
pe.Add(fs.NewCloserTask(p.timestampsFile))
|
||||
pe.Add(fs.NewCloserTask(p.valuesFile))
|
||||
pe.Add(fs.NewCloserTask(p.indexFile))
|
||||
pe.Run()
|
||||
|
||||
ibCache.RemoveBlocksForPart(p)
|
||||
}
|
||||
|
||||
@@ -99,7 +99,11 @@ type partition struct {
|
||||
// the path to directory with bigParts.
|
||||
bigPartsPath string
|
||||
|
||||
// the path to directory with IndexDB parts.
|
||||
indexDBPartsPath string
|
||||
|
||||
// The parent storage.
|
||||
// TODO(@rtm0): Do not depend on Storage, pass only what is required.
|
||||
s *Storage
|
||||
|
||||
// Name is the name of the partition in the form YYYY_MM.
|
||||
@@ -127,6 +131,9 @@ type partition struct {
|
||||
// Contains file-based parts with big number of items, which are visible for search.
|
||||
bigParts []*partWrapper
|
||||
|
||||
// Contains the inverted index for the data stored in this partition.
|
||||
idb *indexDB
|
||||
|
||||
// stopCh is used for notifying all the background workers to stop.
|
||||
//
|
||||
// It must be closed under partsLock in order to prevent from calling wg.Add()
|
||||
@@ -193,14 +200,19 @@ func (pw *partWrapper) decRef() {
|
||||
}
|
||||
|
||||
// mustCreatePartition creates new partition for the given timestamp and the given paths to small and big partitions.
|
||||
func mustCreatePartition(timestamp int64, smallPartitionsPath, bigPartitionsPath string, s *Storage) *partition {
|
||||
func mustCreatePartition(timestamp int64, smallPartitionsPath, bigPartitionsPath, indexDBPath string, s *Storage) *partition {
|
||||
var tr TimeRange
|
||||
tr.fromPartitionTimestamp(timestamp)
|
||||
name := timestampToPartitionName(timestamp)
|
||||
|
||||
smallPartsPath := filepath.Join(filepath.Clean(smallPartitionsPath), name)
|
||||
bigPartsPath := filepath.Join(filepath.Clean(bigPartitionsPath), name)
|
||||
logger.Infof("creating a partition %q with smallPartsPath=%q, bigPartsPath=%q", name, smallPartsPath, bigPartsPath)
|
||||
indexDBPartsPath := filepath.Join(filepath.Clean(indexDBPath), name)
|
||||
logger.Infof("creating a partition %q with smallPartsPath=%q, bigPartsPath=%q, indexDBPartsPath=%q", name, smallPartsPath, bigPartsPath, indexDBPartsPath)
|
||||
|
||||
fs.MustMkdirFailIfExist(smallPartsPath)
|
||||
fs.MustMkdirFailIfExist(bigPartsPath)
|
||||
fs.MustMkdirFailIfExist(indexDBPartsPath)
|
||||
|
||||
// Create parts.json file. Since we are creating a new partition, there
|
||||
// will be no parts, i.e. the smallPartsPath and bigPartPath dirs will be
|
||||
@@ -208,13 +220,11 @@ func mustCreatePartition(timestamp int64, smallPartitionsPath, bigPartitionsPath
|
||||
// there will be panic.
|
||||
mustWritePartNames(nil, nil, smallPartsPath)
|
||||
|
||||
var tr TimeRange
|
||||
tr.fromPartitionTimestamp(timestamp)
|
||||
|
||||
pt := newPartition(name, smallPartsPath, bigPartsPath, tr, s)
|
||||
pt := newPartition(name, smallPartsPath, bigPartsPath, indexDBPartsPath, tr, s)
|
||||
|
||||
fs.MustSyncPathAndParentDir(smallPartsPath)
|
||||
fs.MustSyncPathAndParentDir(bigPartsPath)
|
||||
fs.MustSyncPathAndParentDir(indexDBPartsPath)
|
||||
|
||||
pt.startBackgroundWorkers()
|
||||
|
||||
@@ -238,21 +248,24 @@ func (pt *partition) startBackgroundWorkers() {
|
||||
//
|
||||
// The pt must be detached from table before calling pt.Drop.
|
||||
func (pt *partition) Drop() {
|
||||
logger.Infof("dropping partition %q at smallPartsPath=%q, bigPartsPath=%q", pt.name, pt.smallPartsPath, pt.bigPartsPath)
|
||||
logger.Infof("dropping partition %q at smallPartsPath=%q, bigPartsPath=%q, indexDBPartsPath=%q", pt.name, pt.smallPartsPath, pt.bigPartsPath, pt.indexDBPartsPath)
|
||||
|
||||
fs.MustRemoveDir(pt.smallPartsPath)
|
||||
fs.MustRemoveDir(pt.bigPartsPath)
|
||||
fs.MustRemoveDir(pt.indexDBPartsPath)
|
||||
logger.Infof("partition %q has been dropped", pt.name)
|
||||
}
|
||||
|
||||
// mustOpenPartition opens the existing partition from the given paths.
|
||||
func mustOpenPartition(smallPartsPath, bigPartsPath string, s *Storage) *partition {
|
||||
func mustOpenPartition(smallPartsPath, bigPartsPath, indexDBPartsPath string, s *Storage) *partition {
|
||||
smallPartsPath = filepath.Clean(smallPartsPath)
|
||||
bigPartsPath = filepath.Clean(bigPartsPath)
|
||||
indexDBPartsPath = filepath.Clean(indexDBPartsPath)
|
||||
|
||||
// Create paths to parts if they are missing.
|
||||
fs.MustMkdirIfNotExist(smallPartsPath)
|
||||
fs.MustMkdirIfNotExist(bigPartsPath)
|
||||
fs.MustMkdirIfNotExist(indexDBPartsPath)
|
||||
|
||||
name := filepath.Base(smallPartsPath)
|
||||
var tr TimeRange
|
||||
@@ -262,6 +275,9 @@ func mustOpenPartition(smallPartsPath, bigPartsPath string, s *Storage) *partiti
|
||||
if !strings.HasSuffix(bigPartsPath, name) {
|
||||
logger.Panicf("FATAL: partition name in bigPartsPath %q doesn't match smallPartsPath %q; want %q", bigPartsPath, smallPartsPath, name)
|
||||
}
|
||||
if !strings.HasSuffix(indexDBPartsPath, name) {
|
||||
logger.Panicf("FATAL: partition name in indexDBPartsPath %q doesn't match smallPartsPath %q; want %q", indexDBPartsPath, smallPartsPath, name)
|
||||
}
|
||||
|
||||
partsFile := filepath.Join(smallPartsPath, partsFilename)
|
||||
partNamesSmall, partNamesBig := mustReadPartNames(partsFile, smallPartsPath, bigPartsPath)
|
||||
@@ -276,7 +292,7 @@ func mustOpenPartition(smallPartsPath, bigPartsPath string, s *Storage) *partiti
|
||||
mustWritePartNames(smallParts, bigParts, smallPartsPath)
|
||||
}
|
||||
|
||||
pt := newPartition(name, smallPartsPath, bigPartsPath, tr, s)
|
||||
pt := newPartition(name, smallPartsPath, bigPartsPath, indexDBPartsPath, tr, s)
|
||||
pt.smallParts = smallParts
|
||||
pt.bigParts = bigParts
|
||||
|
||||
@@ -288,17 +304,23 @@ func mustOpenPartition(smallPartsPath, bigPartsPath string, s *Storage) *partiti
|
||||
return pt
|
||||
}
|
||||
|
||||
func newPartition(name, smallPartsPath, bigPartsPath string, tr TimeRange, s *Storage) *partition {
|
||||
func newPartition(name, smallPartsPath, bigPartsPath, indexDBPartsPath string, tr TimeRange, s *Storage) *partition {
|
||||
id := uint64(tr.MinTimestamp)
|
||||
idb := mustOpenIndexDB(id, tr, name, indexDBPartsPath, s, &s.isReadOnly, false)
|
||||
|
||||
p := &partition{
|
||||
name: name,
|
||||
smallPartsPath: smallPartsPath,
|
||||
bigPartsPath: bigPartsPath,
|
||||
tr: tr,
|
||||
s: s,
|
||||
stopCh: make(chan struct{}),
|
||||
name: name,
|
||||
smallPartsPath: smallPartsPath,
|
||||
bigPartsPath: bigPartsPath,
|
||||
indexDBPartsPath: indexDBPartsPath,
|
||||
tr: tr,
|
||||
s: s,
|
||||
idb: idb,
|
||||
stopCh: make(chan struct{}),
|
||||
}
|
||||
p.mergeIdx.Store(uint64(time.Now().UnixNano()))
|
||||
p.rawRows.init()
|
||||
|
||||
return p
|
||||
}
|
||||
|
||||
@@ -350,6 +372,8 @@ type partitionMetrics struct {
|
||||
|
||||
ScheduledDownsamplingPartitions uint64
|
||||
ScheduledDownsamplingPartitionsSize uint64
|
||||
|
||||
IndexDBMetrics IndexDBMetrics
|
||||
}
|
||||
|
||||
// TotalRowsCount returns total number of rows in tm.
|
||||
@@ -426,6 +450,8 @@ func (pt *partition) UpdateMetrics(m *partitionMetrics) {
|
||||
m.InmemoryRowsDeleted += pt.inmemoryRowsDeleted.Load()
|
||||
m.SmallRowsDeleted += pt.smallRowsDeleted.Load()
|
||||
m.BigRowsDeleted += pt.bigRowsDeleted.Load()
|
||||
|
||||
pt.idb.UpdateMetrics(&m.IndexDBMetrics)
|
||||
}
|
||||
|
||||
// AddRows adds the given rows to the partition pt.
|
||||
@@ -634,6 +660,7 @@ func (pt *partition) NotifyReadWriteMode() {
|
||||
pt.startInmemoryPartsMergers()
|
||||
pt.startSmallPartsMergers()
|
||||
pt.startBigPartsMergers()
|
||||
pt.idb.tb.NotifyReadWriteMode()
|
||||
}
|
||||
|
||||
func (pt *partition) inmemoryPartsMerger() {
|
||||
@@ -962,13 +989,18 @@ func (pt *partition) MustClose() {
|
||||
logger.Panicf("BUG: unexpected non-zero refCount: %d", refCount)
|
||||
}
|
||||
}
|
||||
|
||||
idb := pt.idb
|
||||
pt.idb = nil
|
||||
idb.MustClose()
|
||||
}
|
||||
|
||||
// DebugFlush flushes pending raw data rows of this partition so they
|
||||
// DebugFlush flushes pending raw index and data rows of this partition so they
|
||||
// become visible to search.
|
||||
//
|
||||
// This function is for debug purposes only.
|
||||
func (pt *partition) DebugFlush() {
|
||||
pt.idb.tb.DebugFlush()
|
||||
pt.flushPendingRows(true)
|
||||
}
|
||||
|
||||
@@ -1600,7 +1632,7 @@ func (pt *partition) mergePartsInternal(dstPartPath string, bsw *blockStreamWrit
|
||||
retentionDeadline := currentTimestamp - pt.s.retentionMsecs
|
||||
activeMerges.Add(1)
|
||||
_ = useSparseCache // unused in OSS version.
|
||||
dmis := pt.s.getDeletedMetricIDs()
|
||||
dmis := pt.idb.getDeletedMetricIDs()
|
||||
err := mergeBlockStreams(&ph, bsw, bsrs, stopCh, dmis, retentionDeadline, rowsMerged, rowsDeleted)
|
||||
activeMerges.Add(-1)
|
||||
mergesCount.Add(1)
|
||||
@@ -1994,8 +2026,8 @@ func mustOpenParts(partsFile, path string, partNames []string) []*partWrapper {
|
||||
// MustCreateSnapshotAt creates pt snapshot at the given smallPath and bigPath dirs.
|
||||
//
|
||||
// Snapshot is created using linux hard links, so it is usually created very quickly.
|
||||
func (pt *partition) MustCreateSnapshotAt(smallPath, bigPath string) {
|
||||
logger.Infof("creating partition snapshot of %q and %q...", pt.smallPartsPath, pt.bigPartsPath)
|
||||
func (pt *partition) MustCreateSnapshotAt(smallPath, bigPath, indexDBPath string) {
|
||||
logger.Infof("creating partition snapshot of %q, %q, and %q...", pt.smallPartsPath, pt.bigPartsPath, pt.indexDBPartsPath)
|
||||
startTime := time.Now()
|
||||
|
||||
// Flush inmemory data to disk.
|
||||
@@ -2025,8 +2057,10 @@ func (pt *partition) MustCreateSnapshotAt(smallPath, bigPath string) {
|
||||
fs.MustSyncPathAndParentDir(smallPath)
|
||||
fs.MustSyncPathAndParentDir(bigPath)
|
||||
|
||||
logger.Infof("created partition snapshot of %q and %q at %q and %q in %.3f seconds",
|
||||
pt.smallPartsPath, pt.bigPartsPath, smallPath, bigPath, time.Since(startTime).Seconds())
|
||||
pt.idb.tb.MustCreateSnapshotAt(indexDBPath)
|
||||
|
||||
logger.Infof("created partition snapshot of %q, %q, and %q at %q, %q, and %q in %.3f seconds",
|
||||
pt.smallPartsPath, pt.bigPartsPath, pt.indexDBPartsPath, smallPath, bigPath, indexDBPath, time.Since(startTime).Seconds())
|
||||
}
|
||||
|
||||
// mustCreateSnapshot creates a snapshot from srcDir to dstDir.
|
||||
|
||||
@@ -81,12 +81,29 @@ func (pts *partitionSearch) Init(pt *partition, tsids []TSID, tr TimeRange) {
|
||||
return
|
||||
}
|
||||
|
||||
filteredTSIDs := tsids
|
||||
deletedMetricsIDs := pt.idb.getDeletedMetricIDs()
|
||||
if deletedMetricsIDs.Len() > 0 {
|
||||
filteredTSIDs = make([]TSID, 0, len(tsids))
|
||||
for _, tsid := range tsids {
|
||||
if !deletedMetricsIDs.Has(tsid.MetricID) {
|
||||
filteredTSIDs = append(filteredTSIDs, tsid)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(filteredTSIDs) == 0 {
|
||||
// Fast path - zero tsids.
|
||||
pts.err = io.EOF
|
||||
return
|
||||
}
|
||||
|
||||
pts.pws = pt.GetParts(pts.pws[:0], true)
|
||||
|
||||
// Initialize psPool.
|
||||
pts.psPool = slicesutil.SetLength(pts.psPool, len(pts.pws))
|
||||
for i, pw := range pts.pws {
|
||||
pts.psPool[i].Init(pw.p, tsids, tr)
|
||||
pts.psPool[i].Init(pw.p, filteredTSIDs, tr)
|
||||
}
|
||||
|
||||
// Initialize the psHeap.
|
||||
|
||||
@@ -173,6 +173,7 @@ func testPartitionSearchEx(t *testing.T, ptt int64, tr TimeRange, partsCount, ma
|
||||
pt := testCreatePartition(t, ptt, strg)
|
||||
smallPartsPath := pt.smallPartsPath
|
||||
bigPartsPath := pt.bigPartsPath
|
||||
indexDBPartsPath := pt.indexDBPartsPath
|
||||
for _, rows := range rowss {
|
||||
pt.AddRows(rows)
|
||||
|
||||
@@ -183,7 +184,7 @@ func testPartitionSearchEx(t *testing.T, ptt int64, tr TimeRange, partsCount, ma
|
||||
pt.MustClose()
|
||||
|
||||
// Open the created partition and test search on it.
|
||||
pt = mustOpenPartition(smallPartsPath, bigPartsPath, strg)
|
||||
pt = mustOpenPartition(smallPartsPath, bigPartsPath, indexDBPartsPath, strg)
|
||||
testPartitionSearch(t, pt, tsids, tr, rbsExpected, rowsCountExpected)
|
||||
pt.MustClose()
|
||||
stopTestStorage(strg)
|
||||
|
||||
@@ -199,7 +199,8 @@ func testCreatePartition(t *testing.T, timestamp int64, s *Storage) *partition {
|
||||
t.Helper()
|
||||
small := filepath.Join(t.Name(), smallDirname)
|
||||
big := filepath.Join(t.Name(), bigDirname)
|
||||
return mustCreatePartition(timestamp, small, big, s)
|
||||
indexdb := filepath.Join(t.Name(), indexdbDirname)
|
||||
return mustCreatePartition(timestamp, small, big, indexdb, s)
|
||||
}
|
||||
|
||||
func TestMustCreatePartition(t *testing.T) {
|
||||
@@ -214,9 +215,13 @@ func TestMustCreatePartition(t *testing.T) {
|
||||
if fs.IsPathExist(bigPath) {
|
||||
t.Errorf("big partition directory must not exist: %s", bigPath)
|
||||
}
|
||||
indexDBPath := filepath.Join(t.Name(), "indexdb")
|
||||
if fs.IsPathExist(indexDBPath) {
|
||||
t.Errorf("indexdb parition directory must not exist: %s", indexDBPath)
|
||||
}
|
||||
s := &Storage{}
|
||||
|
||||
got := mustCreatePartition(ts, smallPath, bigPath, s)
|
||||
got := mustCreatePartition(ts, smallPath, bigPath, indexDBPath, s)
|
||||
defer got.MustClose()
|
||||
|
||||
wantSmallPartsPath := filepath.Join(smallPath, "2025_03")
|
||||
@@ -233,6 +238,14 @@ func TestMustCreatePartition(t *testing.T) {
|
||||
if !fs.IsPathExist(wantBigPartsPath) {
|
||||
t.Errorf("big parts directory hasn't been created: %s", wantBigPartsPath)
|
||||
}
|
||||
wantIndexDBPartsPath := filepath.Join(indexDBPath, "2025_03")
|
||||
if got.indexDBPartsPath != wantIndexDBPartsPath {
|
||||
t.Errorf("unexpected indexDB parts path: got %s, want %s", got.indexDBPartsPath, wantIndexDBPartsPath)
|
||||
}
|
||||
if !fs.IsPathExist(wantIndexDBPartsPath) {
|
||||
t.Errorf("indexDB parts directory hasn't been created: %s", wantIndexDBPartsPath)
|
||||
}
|
||||
|
||||
wantStorage := s
|
||||
if got.s != wantStorage {
|
||||
t.Errorf("unexpected storage: got %v, want %v", got.s, wantStorage)
|
||||
@@ -248,7 +261,6 @@ func TestMustCreatePartition(t *testing.T) {
|
||||
if got.tr != wantTR {
|
||||
t.Errorf("unexpected time range: got %v, want %v", &got.tr, &wantTR)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestMustOpenPartition(t *testing.T) {
|
||||
@@ -256,10 +268,11 @@ func TestMustOpenPartition(t *testing.T) {
|
||||
|
||||
smallPartsPath := filepath.Join(t.Name(), "small", "2025_03")
|
||||
bigPartsPath := filepath.Join(t.Name(), "big", "2025_03")
|
||||
indexDBPartsPath := filepath.Join(t.Name(), "indexdb", "2025_03")
|
||||
|
||||
s := &Storage{}
|
||||
|
||||
got := mustOpenPartition(smallPartsPath, bigPartsPath, s)
|
||||
got := mustOpenPartition(smallPartsPath, bigPartsPath, indexDBPartsPath, s)
|
||||
defer got.MustClose()
|
||||
|
||||
if got.smallPartsPath != smallPartsPath {
|
||||
@@ -274,6 +287,12 @@ func TestMustOpenPartition(t *testing.T) {
|
||||
if !fs.IsPathExist(bigPartsPath) {
|
||||
t.Errorf("big parts directory hasn't been created: %s", bigPartsPath)
|
||||
}
|
||||
if got.indexDBPartsPath != indexDBPartsPath {
|
||||
t.Errorf("unexpected indexDB parts path: got %s, want %s", got.indexDBPartsPath, indexDBPartsPath)
|
||||
}
|
||||
if !fs.IsPathExist(indexDBPartsPath) {
|
||||
t.Errorf("indexDB parts directory hasn't been created: %s", indexDBPartsPath)
|
||||
}
|
||||
if got.s != s {
|
||||
t.Errorf("unexpected storage: got %v, want %v", got.s, s)
|
||||
}
|
||||
@@ -296,15 +315,16 @@ func TestMustOpenPartition_invalidPartitionName(t *testing.T) {
|
||||
|
||||
smallPartsPath := filepath.Join(t.Name(), "small", "2025_03_invalid")
|
||||
bigPartsPath := filepath.Join(t.Name(), "big", "2025_03_invalid")
|
||||
indexDBPartsPath := filepath.Join(t.Name(), "indexdb", "2025_03_invalid")
|
||||
|
||||
defer func() {
|
||||
if err := recover(); err == nil {
|
||||
t.Fatalf("expected panic on invalid partition name in smallPartsPath but it did not happen: %v", smallPartsPath)
|
||||
t.Fatalf("expected panic on invalid partition name in smallPartsPath but it did not happen: %q", smallPartsPath)
|
||||
}
|
||||
}()
|
||||
|
||||
s := &Storage{}
|
||||
_ = mustOpenPartition(smallPartsPath, bigPartsPath, s)
|
||||
_ = mustOpenPartition(smallPartsPath, bigPartsPath, indexDBPartsPath, s)
|
||||
|
||||
}
|
||||
|
||||
@@ -313,13 +333,14 @@ func TestMustOpenPartition_smallAndBigPartsPathsAreNotTheSame(t *testing.T) {
|
||||
|
||||
smallPartsPath := filepath.Join(t.Name(), "small", "2025_03")
|
||||
bigPartsPath := filepath.Join(t.Name(), "big", "2025_04")
|
||||
indexDBPartsPath := filepath.Join(t.Name(), "indexDB", "2025_04")
|
||||
|
||||
defer func() {
|
||||
if err := recover(); err == nil {
|
||||
t.Fatalf("expected panic on different partition name in smallPartsPath=%v and bigPartsPath=%v but it did not happen", smallPartsPath, bigPartsPath)
|
||||
t.Fatalf("expected panic on different partition name in smallPartsPath=%q and bigPartsPath=%q indexDBPartsPath=%q but it did not happen", smallPartsPath, bigPartsPath, indexDBPartsPath)
|
||||
}
|
||||
}()
|
||||
|
||||
s := &Storage{}
|
||||
_ = mustOpenPartition(smallPartsPath, bigPartsPath, s)
|
||||
|
||||
_ = mustOpenPartition(smallPartsPath, bigPartsPath, indexDBPartsPath, s)
|
||||
}
|
||||
|
||||
@@ -151,18 +151,16 @@ func (s *Search) reset() {
|
||||
//
|
||||
// Init returns the upper bound on the number of found time series.
|
||||
func (s *Search) Init(qt *querytracer.Tracer, storage *Storage, tfss []*TagFilters, tr TimeRange, maxMetrics int, deadline uint64) int {
|
||||
qt = qt.NewChild("init series search: filters=%s, timeRange=%s", tfss, &tr)
|
||||
qt = qt.NewChild("init series search: filters=%s, timeRange=%s, maxMetrics=%d", tfss, &tr, maxMetrics)
|
||||
defer qt.Done()
|
||||
|
||||
dataTR := tr
|
||||
|
||||
if s.needClosing {
|
||||
logger.Panicf("BUG: missing MustClose call before the next call to Init")
|
||||
}
|
||||
retentionDeadline := int64(fasttime.UnixTimestamp()*1e3) - storage.retentionMsecs
|
||||
|
||||
s.reset()
|
||||
s.mns = getMetricNameSearch(storage, false)
|
||||
s.mns = getMetricNameSearch(storage, tr, false)
|
||||
s.retentionDeadline = retentionDeadline
|
||||
s.metricsTracker = storage.metricsTracker
|
||||
s.tr = tr
|
||||
@@ -175,7 +173,7 @@ func (s *Search) Init(qt *querytracer.Tracer, storage *Storage, tfss []*TagFilte
|
||||
// It is ok to call Init on non-nil err.
|
||||
// Init must be called before returning because it will fail
|
||||
// on Search.MustClose otherwise.
|
||||
s.ts.Init(storage.tb, tsids, dataTR)
|
||||
s.ts.Init(storage.tb, tsids, tr)
|
||||
qt.Printf("search for parts with data for %d series", len(tsids))
|
||||
if err != nil {
|
||||
s.err = err
|
||||
|
||||
@@ -4,6 +4,7 @@ package storage
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"slices"
|
||||
"testing"
|
||||
"testing/synctest"
|
||||
"time"
|
||||
@@ -12,22 +13,37 @@ import (
|
||||
func TestSearch_metricNamesIndifferentIndexDBs(t *testing.T) {
|
||||
defer testRemoveAll(t)
|
||||
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
const numMetrics = 10
|
||||
synctest.Run(func() {
|
||||
const numSeries = 10
|
||||
tr := TimeRange{
|
||||
MinTimestamp: time.Now().UnixMilli(),
|
||||
MaxTimestamp: time.Now().Add(23 * time.Hour).UnixMilli(),
|
||||
}
|
||||
rng := rand.New(rand.NewSource(1))
|
||||
mrs := testGenerateMetricRowsWithPrefix(rng, numMetrics, "metric", tr)
|
||||
mrsPrev := testGenerateMetricRowsWithPrefix(rng, numSeries, "legacy_prev", tr)
|
||||
mrsCurr := testGenerateMetricRowsWithPrefix(rng, numSeries, "legacy_curr", tr)
|
||||
mrsPt := testGenerateMetricRowsWithPrefix(rng, numSeries, "pt", tr)
|
||||
mrs := slices.Concat(mrsPrev, mrsCurr, mrsPt)
|
||||
s := MustOpenStorage(t.Name(), OpenOptions{})
|
||||
defer s.MustClose()
|
||||
s.AddRows(mrs[:numMetrics/2], defaultPrecisionBits)
|
||||
// Rotate the indexDB to ensure that the index for the entire dataset is
|
||||
// split across prev and curr indexDBs.
|
||||
s.mustRotateIndexDB(time.Now())
|
||||
s.AddRows(mrs[numMetrics/2:], defaultPrecisionBits)
|
||||
s.AddRows(mrsPrev, defaultPrecisionBits)
|
||||
s.DebugFlush()
|
||||
// Advance the time a bit before converting to legacy so that the
|
||||
// storage could use a different timestamp for a legacy idb.
|
||||
time.Sleep(time.Second)
|
||||
s = mustConvertToLegacy(s)
|
||||
s.AddRows(mrsCurr, defaultPrecisionBits)
|
||||
s.DebugFlush()
|
||||
// Advance the time a bit before converting to legacy so that the
|
||||
// storage could use a different timestamp for a legacy idb.
|
||||
time.Sleep(time.Second)
|
||||
// Convert second time to have two legacy idbs (prev and curr)
|
||||
s = mustConvertToLegacy(s)
|
||||
// Advance the time a bit before converting to legacy so that the
|
||||
// storage could use a different timestamp for data and pt index parts.
|
||||
time.Sleep(time.Second)
|
||||
s.AddRows(mrsPt, defaultPrecisionBits)
|
||||
s.DebugFlush()
|
||||
defer s.MustClose()
|
||||
|
||||
tfs := NewTagFilters()
|
||||
if err := tfs.Add(nil, []byte(".*"), false, true); err != nil {
|
||||
@@ -45,7 +61,7 @@ func TestSearch_metricNamesIndifferentIndexDBs(t *testing.T) {
|
||||
|
||||
var m Metrics
|
||||
s.UpdateMetrics(&m)
|
||||
if got, want := m.IndexDBMetrics.MissingTSIDsForMetricID, uint64(0); got != want {
|
||||
if got, want := m.TableMetrics.IndexDBMetrics.MissingTSIDsForMetricID, uint64(0); got != want {
|
||||
t.Fatalf("unexpected MissingTSIDsForMetricID count: got %d, want %d", got, want)
|
||||
}
|
||||
|
||||
@@ -63,7 +79,7 @@ func TestSearch_metricNamesIndifferentIndexDBs(t *testing.T) {
|
||||
}
|
||||
|
||||
s.UpdateMetrics(&m)
|
||||
if got, want := m.IndexDBMetrics.MissingTSIDsForMetricID, uint64(0); got != want {
|
||||
if got, want := m.TableMetrics.IndexDBMetrics.MissingTSIDsForMetricID, uint64(0); got != want {
|
||||
t.Fatalf("unexpected MissingTSIDsForMetricID count: got %d, want %d", got, want)
|
||||
}
|
||||
|
||||
@@ -77,7 +93,7 @@ func TestSearch_metricNamesIndifferentIndexDBs(t *testing.T) {
|
||||
}
|
||||
|
||||
s.UpdateMetrics(&m)
|
||||
if got, want := m.IndexDBMetrics.MissingTSIDsForMetricID, uint64(0); got != want {
|
||||
if got, want := m.TableMetrics.IndexDBMetrics.MissingTSIDsForMetricID, uint64(0); got != want {
|
||||
t.Fatalf("unexpected MissingTSIDsForMetricID count: got %d, want %d", got, want)
|
||||
}
|
||||
})
|
||||
|
||||
@@ -166,10 +166,7 @@ func TestSearch_VariousTimeRanges(t *testing.T) {
|
||||
|
||||
s := MustOpenStorage(t.Name(), OpenOptions{})
|
||||
defer s.MustClose()
|
||||
s.AddRows(mrs[:numMetrics/2], defaultPrecisionBits)
|
||||
// Rotate the indexDB to ensure that the search operation covers both current and prev indexDBs.
|
||||
s.mustRotateIndexDB(time.Now())
|
||||
s.AddRows(mrs[numMetrics/2:], defaultPrecisionBits)
|
||||
s.AddRows(mrs, defaultPrecisionBits)
|
||||
s.DebugFlush()
|
||||
|
||||
if err := testSearchInternal(s, tr, mrs); err != nil {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
371
lib/storage/storage_legacy.go
Normal file
371
lib/storage/storage_legacy.go
Normal file
@@ -0,0 +1,371 @@
|
||||
package storage
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/querytracer"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/uint64set"
|
||||
)
|
||||
|
||||
type legacyIndexDBs struct {
|
||||
idbPrev *legacyIndexDB
|
||||
idbCurr *legacyIndexDB
|
||||
}
|
||||
|
||||
func (dbs *legacyIndexDBs) incRef() {
|
||||
if dbs == nil {
|
||||
// No legacy indexDBs, nothing to increment reference count.
|
||||
return
|
||||
}
|
||||
|
||||
if dbs.idbPrev != nil {
|
||||
dbs.idbPrev.incRef()
|
||||
}
|
||||
if dbs.idbCurr != nil {
|
||||
dbs.idbCurr.incRef()
|
||||
}
|
||||
}
|
||||
|
||||
func (dbs *legacyIndexDBs) decRef() {
|
||||
if dbs == nil {
|
||||
// No legacy indexDBs, nothing to decrement reference count.
|
||||
return
|
||||
}
|
||||
|
||||
if dbs.idbPrev != nil {
|
||||
dbs.idbPrev.decRef()
|
||||
}
|
||||
if dbs.idbCurr != nil {
|
||||
dbs.idbCurr.decRef()
|
||||
}
|
||||
}
|
||||
|
||||
func (dbs *legacyIndexDBs) appendTo(dst []*indexDB) []*indexDB {
|
||||
if dbs == nil {
|
||||
// No legacy indexDBs, nothing to append.
|
||||
return dst
|
||||
}
|
||||
|
||||
if dbs.idbPrev != nil {
|
||||
dst = append(dst, dbs.idbPrev.idb)
|
||||
}
|
||||
if dbs.idbCurr != nil {
|
||||
dst = append(dst, dbs.idbCurr.idb)
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
func (dbs *legacyIndexDBs) getIDBPrev() *indexDB {
|
||||
if dbs == nil || dbs.idbPrev == nil {
|
||||
return nil
|
||||
}
|
||||
return dbs.idbPrev.idb
|
||||
}
|
||||
|
||||
func (dbs *legacyIndexDBs) getIDBCurr() *indexDB {
|
||||
if dbs == nil || dbs.idbCurr == nil {
|
||||
return nil
|
||||
}
|
||||
return dbs.idbCurr.idb
|
||||
}
|
||||
|
||||
func (s *Storage) mustOpenLegacyIndexDBTables(path string) *legacyIndexDBs {
|
||||
if !fs.IsPathExist(path) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Search for the two most recent tables: prev and curr.
|
||||
|
||||
// Placing the regexp inside the func in order to keep legacy code close to
|
||||
// each other and because this function is called only once on startup.
|
||||
indexDBTableNameRegexp := regexp.MustCompile("^[0-9A-F]{16}$")
|
||||
des := fs.MustReadDir(path)
|
||||
var tableNames []string
|
||||
for _, de := range des {
|
||||
if !fs.IsDirOrSymlink(de) {
|
||||
// Skip non-directories.
|
||||
continue
|
||||
}
|
||||
tableName := de.Name()
|
||||
if !indexDBTableNameRegexp.MatchString(tableName) {
|
||||
// Skip invalid directories.
|
||||
continue
|
||||
}
|
||||
tableDirPath := filepath.Join(path, tableName)
|
||||
if fs.IsPartiallyRemovedDir(tableDirPath) {
|
||||
// Finish the removal of partially deleted directory, which can occur
|
||||
// when the directory was removed during unclean shutdown.
|
||||
fs.MustRemoveDir(tableDirPath)
|
||||
continue
|
||||
}
|
||||
tableNames = append(tableNames, tableName)
|
||||
}
|
||||
sort.Slice(tableNames, func(i, j int) bool {
|
||||
return tableNames[i] < tableNames[j]
|
||||
})
|
||||
|
||||
if len(tableNames) > 3 {
|
||||
// Remove all the tables except the last three tables.
|
||||
for _, tn := range tableNames[:len(tableNames)-3] {
|
||||
pathToRemove := filepath.Join(path, tn)
|
||||
logger.Infof("removing obsolete indexdb dir %q...", pathToRemove)
|
||||
fs.MustRemoveDir(pathToRemove)
|
||||
logger.Infof("removed obsolete indexdb dir %q", pathToRemove)
|
||||
}
|
||||
fs.MustSyncPath(path)
|
||||
tableNames = tableNames[len(tableNames)-3:]
|
||||
}
|
||||
if len(tableNames) == 3 {
|
||||
// Also remove next idb.
|
||||
pathToRemove := filepath.Join(path, tableNames[2])
|
||||
logger.Infof("removing next indexdb dir %q...", pathToRemove)
|
||||
fs.MustRemoveDir(pathToRemove)
|
||||
logger.Infof("removed next indexdb dir %q", pathToRemove)
|
||||
fs.MustSyncPath(path)
|
||||
tableNames = tableNames[:2]
|
||||
}
|
||||
|
||||
numIDBs := len(tableNames)
|
||||
legacyIDBs := &legacyIndexDBs{}
|
||||
|
||||
if numIDBs == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
if numIDBs > 1 {
|
||||
currPath := filepath.Join(path, tableNames[1])
|
||||
legacyIDBs.idbCurr = mustOpenLegacyIndexDB(currPath, s)
|
||||
}
|
||||
|
||||
if numIDBs > 0 {
|
||||
prevPath := filepath.Join(path, tableNames[0])
|
||||
legacyIDBs.idbPrev = mustOpenLegacyIndexDB(prevPath, s)
|
||||
}
|
||||
|
||||
return legacyIDBs
|
||||
}
|
||||
|
||||
func (s *Storage) hasLegacyIndexDBs() bool {
|
||||
return s.legacyIndexDBs.Load() != nil
|
||||
}
|
||||
|
||||
func (s *Storage) getLegacyIndexDBs() *legacyIndexDBs {
|
||||
legacyIDBs := s.legacyIndexDBs.Load()
|
||||
legacyIDBs.incRef()
|
||||
return legacyIDBs
|
||||
}
|
||||
|
||||
func (s *Storage) putLegacyIndexDBs(legacyIDBs *legacyIndexDBs) {
|
||||
legacyIDBs.decRef()
|
||||
}
|
||||
|
||||
func (s *Storage) legacyNextRetentionSeconds() int64 {
|
||||
return s.legacyNextRotationTimestamp.Load() - int64(fasttime.UnixTimestamp())
|
||||
}
|
||||
|
||||
func (s *Storage) startLegacyRetentionWatcher() {
|
||||
if !s.hasLegacyIndexDBs() {
|
||||
return
|
||||
}
|
||||
s.legacyRetentionWatcherWG.Add(1)
|
||||
go func() {
|
||||
s.legacyRetentionWatcher()
|
||||
s.legacyRetentionWatcherWG.Done()
|
||||
}()
|
||||
}
|
||||
|
||||
func (s *Storage) legacyRetentionWatcher() {
|
||||
for {
|
||||
d := s.legacyNextRetentionSeconds()
|
||||
select {
|
||||
case <-s.stopCh:
|
||||
return
|
||||
case currentTime := <-time.After(time.Second * time.Duration(d)):
|
||||
s.legacyMustRotateIndexDB(currentTime)
|
||||
if !s.hasLegacyIndexDBs() {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// LegacySetRetentionTimezoneOffset sets the offset, which is used for
|
||||
// calculating the time for legacy indexdb rotation.
|
||||
//
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2574
|
||||
func LegacySetRetentionTimezoneOffset(offset time.Duration) {
|
||||
legacyRetentionTimezoneOffsetSecs = int64(offset.Seconds())
|
||||
}
|
||||
|
||||
var legacyRetentionTimezoneOffsetSecs int64
|
||||
|
||||
func legacyNextRetentionDeadlineSeconds(atSecs, retentionSecs, offsetSecs int64) int64 {
|
||||
// Round retentionSecs to days. This guarantees that per-day inverted index works as expected
|
||||
const secsPerDay = 24 * 3600
|
||||
retentionSecs = ((retentionSecs + secsPerDay - 1) / secsPerDay) * secsPerDay
|
||||
|
||||
// Schedule the deadline to +4 hours from the next retention period start
|
||||
// because of historical reasons - see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/248
|
||||
offsetSecs -= 4 * 3600
|
||||
|
||||
// Make sure that offsetSecs doesn't exceed retentionSecs
|
||||
offsetSecs %= retentionSecs
|
||||
|
||||
// align the retention deadline to multiples of retentionSecs
|
||||
// This makes the deadline independent of atSecs.
|
||||
deadline := ((atSecs + offsetSecs + retentionSecs - 1) / retentionSecs) * retentionSecs
|
||||
|
||||
// Apply the provided offsetSecs
|
||||
deadline -= offsetSecs
|
||||
|
||||
return deadline
|
||||
}
|
||||
|
||||
func (s *Storage) legacyCreateSnapshot(snapshotName, srcDir, dstDir string) {
|
||||
legacyIDBs := s.getLegacyIndexDBs()
|
||||
defer s.putLegacyIndexDBs(legacyIDBs)
|
||||
|
||||
if legacyIDBs == nil {
|
||||
return
|
||||
}
|
||||
|
||||
idbSnapshot := filepath.Join(srcDir, indexdbDirname, snapshotsDirname, snapshotName)
|
||||
if idbPrev := legacyIDBs.getIDBPrev(); idbPrev != nil {
|
||||
prevSnapshot := filepath.Join(idbSnapshot, idbPrev.name)
|
||||
idbPrev.tb.LegacyMustCreateSnapshotAt(prevSnapshot)
|
||||
}
|
||||
if idbCurr := legacyIDBs.getIDBCurr(); idbCurr != nil {
|
||||
currSnapshot := filepath.Join(idbSnapshot, idbCurr.name)
|
||||
idbCurr.tb.LegacyMustCreateSnapshotAt(currSnapshot)
|
||||
}
|
||||
dstIdbDir := filepath.Join(dstDir, indexdbDirname)
|
||||
fs.MustSymlinkRelative(idbSnapshot, dstIdbDir)
|
||||
}
|
||||
|
||||
func (s *Storage) legacyMustRotateIndexDB(currentTime time.Time) {
|
||||
legacyIDBs := s.getLegacyIndexDBs()
|
||||
defer s.putLegacyIndexDBs(legacyIDBs)
|
||||
|
||||
if legacyIDBs == nil {
|
||||
// No legacy indexDBs, nothing to rotate.
|
||||
return
|
||||
}
|
||||
|
||||
legacyIDBs.idbPrev.scheduleToDrop()
|
||||
legacyIDBs.idbPrev.decRef()
|
||||
|
||||
var rotatedLegacyIDBs *legacyIndexDBs
|
||||
|
||||
if legacyIDBs.idbCurr != nil {
|
||||
rotatedLegacyIDBs = &legacyIndexDBs{
|
||||
idbPrev: legacyIDBs.idbCurr,
|
||||
}
|
||||
}
|
||||
s.legacyIndexDBs.Store(rotatedLegacyIDBs)
|
||||
|
||||
// Update nextRotationTimestamp
|
||||
nextRotationTimestamp := currentTime.Unix() + s.retentionMsecs/1000
|
||||
s.legacyNextRotationTimestamp.Store(nextRotationTimestamp)
|
||||
}
|
||||
|
||||
func (s *Storage) legacyDeleteSeries(qt *querytracer.Tracer, tfss []*TagFilters, maxMetrics int) (*uint64set.Set, error) {
|
||||
legacyIDBs := s.getLegacyIndexDBs()
|
||||
defer s.putLegacyIndexDBs(legacyIDBs)
|
||||
|
||||
if legacyIDBs == nil {
|
||||
// No legacy indexDBs, nothing to delete.
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
all := &uint64set.Set{}
|
||||
|
||||
if idbPrev := legacyIDBs.getIDBPrev(); idbPrev != nil {
|
||||
qt.Printf("start deleting from previous legacy indexDB")
|
||||
dmis, err := idbPrev.DeleteSeries(qt, tfss, maxMetrics)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qt.Printf("deleted %d metricIDs from previous legacy indexDB", dmis.Len())
|
||||
all.UnionMayOwn(dmis)
|
||||
}
|
||||
|
||||
if idbCurr := legacyIDBs.getIDBCurr(); idbCurr != nil {
|
||||
qt.Printf("start deleting from current legacy indexDB")
|
||||
dmis, err := idbCurr.DeleteSeries(qt, tfss, maxMetrics)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qt.Printf("deleted %d metricIDs from current legacy indexDB", dmis.Len())
|
||||
all.UnionMayOwn(dmis)
|
||||
}
|
||||
|
||||
return all, nil
|
||||
}
|
||||
|
||||
func (s *Storage) legacyDebugFlush() {
|
||||
legacyIDBs := s.getLegacyIndexDBs()
|
||||
defer s.putLegacyIndexDBs(legacyIDBs)
|
||||
|
||||
if legacyIDBs == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if idbPrev := legacyIDBs.getIDBPrev(); idbPrev != nil {
|
||||
idbPrev.tb.DebugFlush()
|
||||
}
|
||||
if idbCurr := legacyIDBs.getIDBCurr(); idbCurr != nil {
|
||||
idbCurr.tb.DebugFlush()
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Storage) legacyNotifyReadWriteMode() {
|
||||
legacyIDBs := s.getLegacyIndexDBs()
|
||||
defer s.putLegacyIndexDBs(legacyIDBs)
|
||||
|
||||
if legacyIDBs == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if idbPrev := legacyIDBs.getIDBPrev(); idbPrev != nil {
|
||||
idbPrev.tb.NotifyReadWriteMode()
|
||||
}
|
||||
if idbCurr := legacyIDBs.getIDBCurr(); idbCurr != nil {
|
||||
idbCurr.tb.NotifyReadWriteMode()
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Storage) legacyUpdateMetrics(m *Metrics) {
|
||||
legacyIDBs := s.getLegacyIndexDBs()
|
||||
defer s.putLegacyIndexDBs(legacyIDBs)
|
||||
|
||||
if legacyIDBs == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if legacyIDBs.idbPrev != nil {
|
||||
legacyIDBs.idbPrev.UpdateMetrics(&m.TableMetrics.IndexDBMetrics)
|
||||
}
|
||||
if legacyIDBs.idbCurr != nil {
|
||||
legacyIDBs.idbCurr.UpdateMetrics(&m.TableMetrics.IndexDBMetrics)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Storage) legacyMustCloseIndexDBs() {
|
||||
legacyIDBs := s.legacyIndexDBs.Load()
|
||||
if legacyIDBs == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if legacyIDBs.idbPrev != nil {
|
||||
legacyIDBs.idbPrev.MustClose()
|
||||
}
|
||||
if legacyIDBs.idbCurr != nil {
|
||||
legacyIDBs.idbCurr.MustClose()
|
||||
}
|
||||
}
|
||||
1360
lib/storage/storage_legacy_test.go
Normal file
1360
lib/storage/storage_legacy_test.go
Normal file
File diff suppressed because it is too large
Load Diff
@@ -27,8 +27,9 @@ func TestStorageSearchTSIDs_CorruptedIndex(t *testing.T) {
|
||||
}
|
||||
const numMetrics = 10
|
||||
date := uint64(tr.MinTimestamp) / msecPerDay
|
||||
idbPrev, idbCurr := s.getPrevAndCurrIndexDBs()
|
||||
defer s.putPrevAndCurrIndexDBs(idbPrev, idbCurr)
|
||||
ptw := s.tb.MustGetPartition(tr.MinTimestamp)
|
||||
idb := ptw.pt.idb
|
||||
defer s.tb.PutPartition(ptw)
|
||||
var wantMetricIDs []uint64
|
||||
|
||||
// Simulate corrupted index by not creating nsPrefixMetricIDToTSID
|
||||
@@ -44,9 +45,9 @@ func TestStorageSearchTSIDs_CorruptedIndex(t *testing.T) {
|
||||
skipMetricIDToTSID: true,
|
||||
})
|
||||
|
||||
idbCurr.tb.AddItems(ii.Items)
|
||||
idb.tb.AddItems(ii.Items)
|
||||
}
|
||||
idbCurr.tb.DebugFlush()
|
||||
idb.tb.DebugFlush()
|
||||
|
||||
tfsAll := NewTagFilters()
|
||||
if err := tfsAll.Add([]byte("__name__"), []byte(".*"), false, true); err != nil {
|
||||
@@ -55,7 +56,7 @@ func TestStorageSearchTSIDs_CorruptedIndex(t *testing.T) {
|
||||
tfssAll := []*TagFilters{tfsAll}
|
||||
|
||||
searchMetricIDs := func() []uint64 {
|
||||
metricIDs, err := idbCurr.searchMetricIDs(nil, tfssAll, tr, 1e9, noDeadline)
|
||||
metricIDs, err := idb.searchMetricIDs(nil, tfssAll, tr, 1e9, noDeadline)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("searchMetricIDs() failed unexpectedly: %v", err))
|
||||
}
|
||||
@@ -89,7 +90,7 @@ func TestStorageSearchTSIDs_CorruptedIndex(t *testing.T) {
|
||||
// is not incremented yet.
|
||||
var m Metrics
|
||||
s.UpdateMetrics(&m)
|
||||
if got, want := m.IndexDBMetrics.MissingTSIDsForMetricID, uint64(0); got != want {
|
||||
if got, want := m.TableMetrics.IndexDBMetrics.MissingTSIDsForMetricID, uint64(0); got != want {
|
||||
t.Fatalf("unexpected MissingTSIDsForMetricID: got %d, want %d", got, want)
|
||||
}
|
||||
|
||||
@@ -108,7 +109,7 @@ func TestStorageSearchTSIDs_CorruptedIndex(t *testing.T) {
|
||||
// Ensure the metric that counts metricIDs for which no TSIDs were found
|
||||
// is incremented after the metricID deletion.
|
||||
s.UpdateMetrics(&m)
|
||||
if got, want := m.IndexDBMetrics.MissingTSIDsForMetricID, uint64(numMetrics); got != want {
|
||||
if got, want := m.TableMetrics.IndexDBMetrics.MissingTSIDsForMetricID, uint64(numMetrics); got != want {
|
||||
t.Fatalf("unexpected MissingTSIDsForMetricID: got %d, want %d", got, want)
|
||||
}
|
||||
})
|
||||
@@ -128,8 +129,9 @@ func TestStorageSearchMetricNames_CorruptedIndex(t *testing.T) {
|
||||
}
|
||||
const numMetrics = 10
|
||||
date := uint64(tr.MinTimestamp) / msecPerDay
|
||||
idbPrev, idbCurr := s.getPrevAndCurrIndexDBs()
|
||||
defer s.putPrevAndCurrIndexDBs(idbPrev, idbCurr)
|
||||
ptw := s.tb.MustGetPartition(tr.MinTimestamp)
|
||||
idb := ptw.pt.idb
|
||||
defer s.tb.PutPartition(ptw)
|
||||
var wantMetricIDs []uint64
|
||||
|
||||
// Simulate corrupted index by not creating nsPrefixMetricIDToMetricName
|
||||
@@ -145,9 +147,9 @@ func TestStorageSearchMetricNames_CorruptedIndex(t *testing.T) {
|
||||
skipMetricIDToMetricName: true,
|
||||
})
|
||||
|
||||
idbCurr.tb.AddItems(ii.Items)
|
||||
idb.tb.AddItems(ii.Items)
|
||||
}
|
||||
idbCurr.tb.DebugFlush()
|
||||
idb.tb.DebugFlush()
|
||||
|
||||
tfsAll := NewTagFilters()
|
||||
if err := tfsAll.Add([]byte("__name__"), []byte(".*"), false, true); err != nil {
|
||||
@@ -156,7 +158,7 @@ func TestStorageSearchMetricNames_CorruptedIndex(t *testing.T) {
|
||||
tfssAll := []*TagFilters{tfsAll}
|
||||
|
||||
searchMetricIDs := func() []uint64 {
|
||||
metricIDs, err := idbCurr.searchMetricIDs(nil, tfssAll, tr, 1e9, noDeadline)
|
||||
metricIDs, err := idb.searchMetricIDs(nil, tfssAll, tr, 1e9, noDeadline)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("searchMetricIDs() failed unexpectedly: %v", err))
|
||||
}
|
||||
@@ -190,7 +192,7 @@ func TestStorageSearchMetricNames_CorruptedIndex(t *testing.T) {
|
||||
// were found is not incremented yet.
|
||||
var m Metrics
|
||||
s.UpdateMetrics(&m)
|
||||
if got, want := m.IndexDBMetrics.MissingMetricNamesForMetricID, uint64(0); got != want {
|
||||
if got, want := m.TableMetrics.IndexDBMetrics.MissingMetricNamesForMetricID, uint64(0); got != want {
|
||||
t.Fatalf("unexpected MissingMetricNamesForMetricID: got %d, want %d", got, want)
|
||||
}
|
||||
|
||||
@@ -209,7 +211,7 @@ func TestStorageSearchMetricNames_CorruptedIndex(t *testing.T) {
|
||||
// Ensure the metric that counts metricIDs for which no metric names
|
||||
// were found is incremented after the metricID deletion.
|
||||
s.UpdateMetrics(&m)
|
||||
if got, want := m.IndexDBMetrics.MissingMetricNamesForMetricID, uint64(numMetrics); got != want {
|
||||
if got, want := m.TableMetrics.IndexDBMetrics.MissingMetricNamesForMetricID, uint64(numMetrics); got != want {
|
||||
t.Fatalf("unexpected MissingMetricNamesForMetricID: got %d, want %d", got, want)
|
||||
}
|
||||
})
|
||||
@@ -283,89 +285,99 @@ func testCreateIndexItems(date uint64, tsid *TSID, mn *MetricName, opts testInde
|
||||
}
|
||||
|
||||
func TestStorageRotateIndexDBPrefill(t *testing.T) {
|
||||
f := func(opts OpenOptions, prefillStart time.Duration) {
|
||||
defer testRemoveAll(t)
|
||||
defer testRemoveAll(t)
|
||||
f := func(t *testing.T, opts OpenOptions, prefillStart time.Duration) {
|
||||
t.Helper()
|
||||
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
// Align start time to 05:00 in order to have 23h before the next rotation cycle at 04:00 next morning.
|
||||
time.Sleep(time.Hour * 5)
|
||||
|
||||
nextRotationTime := time.Now().Add(time.Hour * 23).Truncate(time.Hour)
|
||||
synctest.Run(func() {
|
||||
// Prefill of the next partition indexDB happens during the
|
||||
// (nextMonth-prefillStart, nextMonth] time interval.
|
||||
// Advance current time right before the the beginning of that interval.
|
||||
ct := time.Now().UTC()
|
||||
nextMonth := time.Date(ct.Year(), ct.Month()+1, 1, 0, 0, 0, 0, time.UTC)
|
||||
time.Sleep(nextMonth.Sub(ct.Add(prefillStart)))
|
||||
|
||||
s := MustOpenStorage(t.Name(), opts)
|
||||
defer s.MustClose()
|
||||
// first rotation cycle in 4 hours due to synctest start time of 00:00:00
|
||||
rng := rand.New(rand.NewSource(1))
|
||||
ct := time.Now()
|
||||
tr := TimeRange{
|
||||
MinTimestamp: ct.Add(time.Hour).UnixMilli(),
|
||||
MaxTimestamp: ct.Add(time.Hour * 24).UnixMilli(),
|
||||
}
|
||||
|
||||
const numSeries = 1000
|
||||
|
||||
mrs := testGenerateMetricRowsWithPrefix(rng, numSeries, "metric.", tr)
|
||||
s.AddRows(mrs, 1)
|
||||
s.DebugFlush()
|
||||
createdSeries := s.newTimeseriesCreated.Load()
|
||||
if createdSeries != numSeries {
|
||||
t.Fatalf("unexpected number of created series (-%d;+%d)", numSeries, createdSeries)
|
||||
addRows := func() {
|
||||
t.Helper()
|
||||
rng := rand.New(rand.NewSource(1))
|
||||
ct := time.Now().UTC()
|
||||
tr := TimeRange{
|
||||
MinTimestamp: ct.Add(-prefillStart).UnixMilli(),
|
||||
MaxTimestamp: ct.UnixMilli(),
|
||||
}
|
||||
mrs := testGenerateMetricRowsWithPrefix(rng, numSeries, "metric.", tr)
|
||||
s.AddRows(mrs, 1)
|
||||
s.DebugFlush()
|
||||
}
|
||||
|
||||
// Sleep until a minute before the prefill start time,
|
||||
// then verify that no timeseries have been pre-created yet.
|
||||
time.Sleep(time.Hour*23 - prefillStart - 1*time.Minute)
|
||||
s.AddRows(mrs, 1)
|
||||
s.DebugFlush()
|
||||
preCreated := s.timeseriesPreCreated.Load()
|
||||
if preCreated != 0 {
|
||||
t.Fatalf("expected no timeseries to be re-created, got: %d", preCreated)
|
||||
// Insert metrics into the empty storage right before the prefill
|
||||
// interval starts.
|
||||
addRows()
|
||||
if got, want := s.newTimeseriesCreated.Load(), uint64(numSeries); got != want {
|
||||
t.Fatalf("unexpected number of new timeseries: got %d, want %d", got, want)
|
||||
}
|
||||
if got, want := s.timeseriesPreCreated.Load(), uint64(0); got != want {
|
||||
t.Fatalf("unexpected number of pre-created timeseries: got %d, want %d", got, want)
|
||||
}
|
||||
|
||||
// Sleep until half of the prefill rotation interval has elapsed,
|
||||
// Sleep until half of the prefill interval has elapsed,
|
||||
// then verify that some time series have been pre-created.
|
||||
time.Sleep(prefillStart / 2)
|
||||
s.AddRows(mrs, 1)
|
||||
s.DebugFlush()
|
||||
preCreated = s.timeseriesPreCreated.Load()
|
||||
if preCreated == 0 {
|
||||
t.Fatalf("expected some timeseries to be re-created, got: %d", preCreated)
|
||||
addRows()
|
||||
if got, want := s.timeseriesPreCreated.Load(), uint64(0); got <= want {
|
||||
t.Fatalf("unexpected number of pre-created timeseries: got %d, want > %d", got, want)
|
||||
}
|
||||
|
||||
// Sleep until a minute before the index rotation,
|
||||
// verify that almost all time series have been pre-created.
|
||||
time.Sleep(nextRotationTime.Sub(time.Now().Add(time.Minute)))
|
||||
s.AddRows(mrs, 1)
|
||||
s.DebugFlush()
|
||||
preCreated = s.timeseriesPreCreated.Load()
|
||||
if preCreated == 0 || preCreated < numSeries/2 {
|
||||
t.Fatalf("expected more than 50 percent of timeseries to be re-created, got: %d", preCreated)
|
||||
// Sleep until a minute before the next partition transition, verify
|
||||
// that almost all time series have been pre-created.
|
||||
ct = time.Now().UTC()
|
||||
time.Sleep(nextMonth.Sub(ct.Add(time.Minute)))
|
||||
addRows()
|
||||
if got, want := s.timeseriesPreCreated.Load(), uint64(numSeries/2); got <= want {
|
||||
t.Fatalf("unexpected number of pre-created timeseries: got %d, want > %d", got, want)
|
||||
}
|
||||
|
||||
// Sleep until the rotation is over, verify that the rest of time series have been re-created
|
||||
time.Sleep(time.Hour)
|
||||
s.AddRows(mrs, 1)
|
||||
s.DebugFlush()
|
||||
createdSeries, reCreated, rePopulated := s.newTimeseriesCreated.Load(), s.timeseriesPreCreated.Load(), s.timeseriesRepopulated.Load()
|
||||
if createdSeries != numSeries {
|
||||
t.Fatalf("unexpected number of created series (-%d;+%d)", numSeries, createdSeries)
|
||||
}
|
||||
if reCreated+rePopulated != numSeries {
|
||||
t.Fatalf("unexpected number of re-created=%d and re-populated=%d series, want sum to be equal to %d", numSeries, createdSeries, numSeries)
|
||||
// Align the time with the start of the next month.
|
||||
time.Sleep(time.Minute)
|
||||
// Sleep until the transition to the next partition is over, verify
|
||||
// that the rest of time series have been re-created
|
||||
time.Sleep(prefillStart)
|
||||
newCreated := s.newTimeseriesCreated.Load()
|
||||
addRows()
|
||||
newCreated = s.newTimeseriesCreated.Load() - newCreated
|
||||
// If jump in time is bigger than 1h, the tsidCache will be cleared
|
||||
// and therefore the metrics will not be repopulated. Instead, new
|
||||
// metrics will be created.
|
||||
preCreated, repopulated := s.timeseriesPreCreated.Load(), s.timeseriesRepopulated.Load()
|
||||
if preCreated+repopulated+newCreated != numSeries {
|
||||
t.Fatalf("unexpected number of pre-populated, repopulated, and new timeseries: got %d + %d + %d, want %d", preCreated, repopulated, newCreated, numSeries)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Test the default prefill start duration, see -storage.idbPrefillStart flag:
|
||||
// VictoriaMetrics starts prefill indexDB at 3 A.M UTC, while indexDB rotates at 4 A.M UTC.
|
||||
f(OpenOptions{Retention: time.Hour * 24, IDBPrefillStart: time.Hour}, time.Hour)
|
||||
|
||||
// Zero IDBPrefillStart option should fallback to 1 hour prefill start:
|
||||
f(OpenOptions{Retention: time.Hour * 24, IDBPrefillStart: 0}, time.Hour)
|
||||
|
||||
// Test a custom prefill duration: 2h:
|
||||
// VictoriaMetrics starts prefill indexDB at 2 A.M UTC, while indexDB rotates at 4 A.M UTC.
|
||||
f(OpenOptions{Retention: time.Hour * 24, IDBPrefillStart: 2 * time.Hour}, 2*time.Hour)
|
||||
// Verify an interval that is shorter than one hour.
|
||||
t.Run("30m", func(t *testing.T) {
|
||||
f(t, OpenOptions{IDBPrefillStart: 30 * time.Minute}, 30*time.Minute)
|
||||
})
|
||||
// Verify 1h inteval (which is also the default).
|
||||
// tsidCache will be cleared because it will have two cache rotations (one
|
||||
// every 30 mins). This means that once the new month starts the timeseries
|
||||
// that waren't pre-populated will be re-created instead of being
|
||||
// re-populated.
|
||||
t.Run("default", func(t *testing.T) {
|
||||
f(t, OpenOptions{IDBPrefillStart: 0}, time.Hour)
|
||||
})
|
||||
t.Run("1h", func(t *testing.T) {
|
||||
f(t, OpenOptions{IDBPrefillStart: time.Hour}, time.Hour)
|
||||
})
|
||||
// Vefiry 2h interval. Same here, the tsidCache will be cleared.
|
||||
t.Run("2h", func(t *testing.T) {
|
||||
f(t, OpenOptions{IDBPrefillStart: 2 * time.Hour}, 2*time.Hour)
|
||||
})
|
||||
}
|
||||
|
||||
// TestStorageAddRows_nextDayIndexPrefill tests gradual creation of per-day
|
||||
@@ -538,11 +550,11 @@ func TestStorageAddRows_nextDayIndexPrefill(t *testing.T) {
|
||||
func TestStorageMustLoadNextDayMetricIDs(t *testing.T) {
|
||||
defer testRemoveAll(t)
|
||||
|
||||
assertNextDayMetricIDs := func(t *testing.T, gotNextDayMetricIDs *nextDayMetricIDs, wantGen, wantDate uint64, wantLen int) {
|
||||
assertNextDayMetricIDs := func(t *testing.T, gotNextDayMetricIDs *nextDayMetricIDs, wantIDBID, wantDate uint64, wantLen int) {
|
||||
t.Helper()
|
||||
|
||||
if got, want := gotNextDayMetricIDs.generation, wantGen; got != want {
|
||||
t.Fatalf("unexpected nextDayMetricIDs idb generation: got %d, want %d", got, want)
|
||||
if got, want := gotNextDayMetricIDs.idbID, wantIDBID; got != want {
|
||||
t.Fatalf("unexpected nextDayMetricIDs idb id: got %d, want %d", got, want)
|
||||
}
|
||||
if got, want := gotNextDayMetricIDs.date, wantDate; got != want {
|
||||
t.Fatalf("unexpected nextDayMetricIDs date: got %d, want %d", got, want)
|
||||
@@ -556,13 +568,13 @@ func TestStorageMustLoadNextDayMetricIDs(t *testing.T) {
|
||||
// synctest starts at 2000-01-01T00:00:00Z.
|
||||
// Advance time to 23:30 to enable next day prefill.
|
||||
time.Sleep(23*time.Hour + 30*time.Minute) // 2000-01-01T23:30:00Z
|
||||
date := uint64(time.Now().UnixMilli() / msecPerDay)
|
||||
date := uint64(time.Now().UnixMilli()) / msecPerDay
|
||||
|
||||
const numSeries = 1000
|
||||
s := MustOpenStorage(t.Name(), OpenOptions{})
|
||||
idbPrev, idbCurr := s.getPrevAndCurrIndexDBs()
|
||||
genCurr := idbCurr.generation
|
||||
s.putPrevAndCurrIndexDBs(idbPrev, idbCurr)
|
||||
ptw := s.tb.MustGetPartition(time.Now().UnixMilli())
|
||||
idbID := ptw.pt.idb.id
|
||||
s.tb.PutPartition(ptw)
|
||||
|
||||
rng := rand.New(rand.NewSource(1))
|
||||
mrs := testGenerateMetricRowsWithPrefix(rng, numSeries, "metric", TimeRange{
|
||||
@@ -579,7 +591,7 @@ func TestStorageMustLoadNextDayMetricIDs(t *testing.T) {
|
||||
numNextDayMetricIDs := s.pendingNextDayMetricIDs.Len()
|
||||
// But not in the nextDayMetricIDs cache. The pending metrics will be
|
||||
// moved to it by a bg process a few seconds later.
|
||||
assertNextDayMetricIDs(t, s.nextDayMetricIDs.Load(), genCurr, date, 0)
|
||||
assertNextDayMetricIDs(t, s.nextDayMetricIDs.Load(), idbID, date, 0)
|
||||
|
||||
// Wait for nextDayMetricIDs cache to populate.
|
||||
time.Sleep(15 * time.Second)
|
||||
@@ -592,7 +604,7 @@ func TestStorageMustLoadNextDayMetricIDs(t *testing.T) {
|
||||
}
|
||||
// While the actual cache, must contain the exact number of metricIDs
|
||||
// that once were pending.
|
||||
assertNextDayMetricIDs(t, s.nextDayMetricIDs.Load(), genCurr, date, numNextDayMetricIDs)
|
||||
assertNextDayMetricIDs(t, s.nextDayMetricIDs.Load(), idbID, date, numNextDayMetricIDs)
|
||||
|
||||
// Close the storage to persist nextDayMetricIDs cache to a file.
|
||||
s.MustClose()
|
||||
@@ -602,18 +614,19 @@ func TestStorageMustLoadNextDayMetricIDs(t *testing.T) {
|
||||
if got := s.pendingNextDayMetricIDs.Len(); got != 0 {
|
||||
t.Fatalf("unexpected pendingNextDayMetricIDs count: got %d, want 0", got)
|
||||
}
|
||||
assertNextDayMetricIDs(t, s.nextDayMetricIDs.Load(), genCurr, date, numNextDayMetricIDs)
|
||||
|
||||
// Try loading the cache file contents for a different indexDB.
|
||||
genOther := genCurr + 1
|
||||
gotNextDayMetricIDs := s.mustLoadNextDayMetricIDs(genOther, date)
|
||||
assertNextDayMetricIDs(t, gotNextDayMetricIDs, genOther, date, 0)
|
||||
|
||||
// Try loading the cache file contents for a different date.
|
||||
dateOther := date + 1
|
||||
gotNextDayMetricIDs = s.mustLoadNextDayMetricIDs(genCurr, dateOther)
|
||||
assertNextDayMetricIDs(t, gotNextDayMetricIDs, genCurr, dateOther, 0)
|
||||
assertNextDayMetricIDs(t, s.nextDayMetricIDs.Load(), idbID, date, numNextDayMetricIDs)
|
||||
s.MustClose()
|
||||
|
||||
// Advance the time by one day and open the storage.
|
||||
// Since the current date and the date in the cache file do not match,
|
||||
// nothing will be loaded into cache.
|
||||
time.Sleep(24 * time.Hour)
|
||||
date = uint64(time.Now().UnixMilli()) / msecPerDay
|
||||
s = MustOpenStorage(t.Name(), OpenOptions{})
|
||||
if got := s.pendingNextDayMetricIDs.Len(); got != 0 {
|
||||
t.Fatalf("unexpected pendingNextDayMetricIDs count: got %d, want 0", got)
|
||||
}
|
||||
assertNextDayMetricIDs(t, s.nextDayMetricIDs.Load(), idbID, date, 0)
|
||||
s.MustClose()
|
||||
})
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -273,7 +273,6 @@ func BenchmarkStorageInsertWithAndWithoutPerDayIndex(b *testing.B) {
|
||||
var (
|
||||
rowsAddedTotal int
|
||||
dataSize int64
|
||||
indexSize int64
|
||||
)
|
||||
|
||||
path := b.Name()
|
||||
@@ -295,7 +294,6 @@ func BenchmarkStorageInsertWithAndWithoutPerDayIndex(b *testing.B) {
|
||||
|
||||
rowsAddedTotal = numBatches * numRowsPerBatch
|
||||
dataSize = benchmarkDirSize(path + "/data")
|
||||
indexSize = benchmarkDirSize(path + "/indexdb")
|
||||
|
||||
s.MustClose()
|
||||
fs.MustRemoveDir(path)
|
||||
@@ -303,7 +301,6 @@ func BenchmarkStorageInsertWithAndWithoutPerDayIndex(b *testing.B) {
|
||||
|
||||
b.ReportMetric(float64(rowsAddedTotal)/float64(b.Elapsed().Seconds()), "rows/s")
|
||||
b.ReportMetric(float64(dataSize)/(1024*1024), "data-MiB")
|
||||
b.ReportMetric(float64(indexSize)/(1024*1024), "indexdb-MiB")
|
||||
}
|
||||
|
||||
b.Run("HighChurnRate/perDayIndexes", func(b *testing.B) {
|
||||
@@ -392,7 +389,7 @@ type dataConfig struct {
|
||||
type searchFunc func(b *testing.B, s *Storage, tr TimeRange, mrs []MetricRow)
|
||||
|
||||
// splitFunc split the test data between prev and curr indexDBs.
|
||||
type splitFunc func(total dataConfig) (prev, curr dataConfig)
|
||||
type splitFunc func(total dataConfig) (prev, curr, pt dataConfig)
|
||||
|
||||
// benchmarkSearch implements the core logic of benchmark of a search operation.
|
||||
//
|
||||
@@ -455,11 +452,13 @@ func benchmarkSearch(b *testing.B, dataConfig dataConfig, split splitFunc, searc
|
||||
|
||||
}
|
||||
|
||||
cfgPrev, cfgCurr := split(dataConfig)
|
||||
cfgPrev, cfgCurr, cfgPt := split(dataConfig)
|
||||
mrsToDeletePrev := genRows(cfgPrev.numDeletedSeries, "prev", cfgPrev.tr)
|
||||
mrsToDeleteCurr := genRows(cfgCurr.numDeletedSeries, "curr", cfgCurr.tr)
|
||||
mrsToDeletePt := genRows(cfgPt.numDeletedSeries, "pt", cfgPt.tr)
|
||||
mrsPrev := genRows(cfgPrev.numSeries, "prev", cfgPrev.tr)
|
||||
mrsCurr := genRows(cfgCurr.numSeries, "curr", cfgCurr.tr)
|
||||
mrsPt := genRows(cfgPt.numSeries, "pt", cfgPt.tr)
|
||||
|
||||
s := MustOpenStorage(b.Name(), OpenOptions{})
|
||||
s.AddRows(mrsToDeletePrev, defaultPrecisionBits)
|
||||
@@ -468,8 +467,7 @@ func benchmarkSearch(b *testing.B, dataConfig dataConfig, split splitFunc, searc
|
||||
s.DebugFlush()
|
||||
s.AddRows(mrsPrev, defaultPrecisionBits)
|
||||
s.DebugFlush()
|
||||
|
||||
s.mustRotateIndexDB(time.Now())
|
||||
s = mustConvertToLegacy(s)
|
||||
|
||||
s.AddRows(mrsToDeleteCurr, defaultPrecisionBits)
|
||||
s.DebugFlush()
|
||||
@@ -477,8 +475,16 @@ func benchmarkSearch(b *testing.B, dataConfig dataConfig, split splitFunc, searc
|
||||
s.DebugFlush()
|
||||
s.AddRows(mrsCurr, defaultPrecisionBits)
|
||||
s.DebugFlush()
|
||||
s = mustConvertToLegacy(s)
|
||||
|
||||
mrs := slices.Concat(mrsPrev, mrsCurr)
|
||||
s.AddRows(mrsToDeletePt, defaultPrecisionBits)
|
||||
s.DebugFlush()
|
||||
deleteSeries(s, "pt", cfgPt.numDeletedSeries)
|
||||
s.DebugFlush()
|
||||
s.AddRows(mrsPt, defaultPrecisionBits)
|
||||
s.DebugFlush()
|
||||
|
||||
mrs := slices.Concat(mrsPrev, mrsCurr, mrsPt)
|
||||
search(b, s, dataConfig.tr, mrs)
|
||||
|
||||
s.MustClose()
|
||||
@@ -691,36 +697,38 @@ func isGraphite(op searchFunc) bool {
|
||||
|
||||
// indexConfigs holds the index configurations for which BenchmarkSearch() will
|
||||
// perform the measurements.
|
||||
var indexConfigs = []splitFunc{prevOnly, currOnly, prevCurr}
|
||||
var indexConfigNames = []string{"PrevOnly", "CurrOnly", "PrevCurr"}
|
||||
var indexConfigs = []splitFunc{prevOnly, currOnly, prevCurr, ptOnly, prevPt, currPt, prevCurrPt}
|
||||
var indexConfigNames = []string{"PrevOnly", "CurrOnly", "PrevCurr", "PtOnly", "PrevPt", "CurrPt", "PrevCurrPt"}
|
||||
|
||||
// prevOnly is an index config func that puts all index data into prev indexDB.
|
||||
// No index data goes to curr indexDB.
|
||||
// prevOnly is an index config func that puts all index data into legacy prev
|
||||
// indexDB. No index data goes to legacy curr indexDB or pt indexDBs.
|
||||
//
|
||||
// This config corresponds to a state when indexDBs have just been rotated.
|
||||
// I.e. most of the index entries are in the prev indexDB.
|
||||
func prevOnly(total dataConfig) (prev, curr dataConfig) {
|
||||
// This config corresponds to a state when the deployment has switched to pt
|
||||
// index right after legacy indexDBs have just been rotated. I.e. most of the
|
||||
// index entries are in the prev indexDB.
|
||||
func prevOnly(total dataConfig) (prev, curr, pt dataConfig) {
|
||||
prev = total
|
||||
return prev, curr
|
||||
return prev, curr, pt
|
||||
}
|
||||
|
||||
// currOnly is an index config func that puts all index data into curr
|
||||
// indexDB. No index data goes to prev indexDB.
|
||||
// currOnly is an index config func that puts all index data into legacy curr
|
||||
// indexDB. No index data goes to legacy prev indexDB or pt indexDBs.
|
||||
//
|
||||
// This config corresponds to a state when indexDBs haven't been rotated yet or
|
||||
// rotated long time ago. I.e. most of the index entries are in the curr
|
||||
// indexDB.
|
||||
func currOnly(total dataConfig) (prev, curr dataConfig) {
|
||||
// This config corresponds to a state when the deployment has switched to pt
|
||||
// index before legacy indexDB rotation or the rotation has happened long time
|
||||
// ago. I.e. most of the index entries are in the curr indexDB.
|
||||
func currOnly(total dataConfig) (prev, curr, pt dataConfig) {
|
||||
curr = total
|
||||
return prev, curr
|
||||
return prev, curr, pt
|
||||
}
|
||||
|
||||
// prevCurr is an index config func that splits index data evenly between
|
||||
// prev and curr indexDBs.
|
||||
// prev and curr legacy indexDBs. No data goes to pt indexDBs.
|
||||
//
|
||||
// This config corresponds to a state when the indexDB rotation has happened
|
||||
// some time ago. I.e. index entries are in both prev and curr indexDBs.
|
||||
func prevCurr(total dataConfig) (prev, curr dataConfig) {
|
||||
// This config corresponds to a state when the the deployment has switched to pt
|
||||
// index some significant time after legacy indexDB rotation. I.e. index entries
|
||||
// are in both prev and curr legacy indexDBs.
|
||||
func prevCurr(total dataConfig) (prev, curr, pt dataConfig) {
|
||||
prev.numSeries = total.numSeries / 2
|
||||
prev.numDeletedSeries = total.numDeletedSeries / 2
|
||||
prev.tr.MinTimestamp = total.tr.MinTimestamp
|
||||
@@ -731,7 +739,82 @@ func prevCurr(total dataConfig) (prev, curr dataConfig) {
|
||||
curr.tr.MinTimestamp = prev.tr.MaxTimestamp + 1
|
||||
curr.tr.MaxTimestamp = total.tr.MaxTimestamp
|
||||
|
||||
return prev, curr
|
||||
return prev, curr, pt
|
||||
}
|
||||
|
||||
// ptOnly is an index config func that puts all index data into pt indexDBs. No
|
||||
// index data goes to prev or curr legacy indexDBs.
|
||||
//
|
||||
// This config corresponds to a state when a fresh deployment has started with
|
||||
// pt index right away.I.e. all the index entries are in the pt indexDBs.
|
||||
func ptOnly(total dataConfig) (prev, curr, pt dataConfig) {
|
||||
pt = total
|
||||
return prev, curr, pt
|
||||
}
|
||||
|
||||
// prevPt is an index config func that splits index data evenly between
|
||||
// prev legacy indexDB and pt indexDBs. No data goes to curr legacy indexDB.
|
||||
//
|
||||
// This config corresponds to a state when the the deployment has switched to pt
|
||||
// index right after legacy indexDB rotation and continued to work for some
|
||||
// time.
|
||||
func prevPt(total dataConfig) (prev, curr, pt dataConfig) {
|
||||
prev.numSeries = total.numSeries / 2
|
||||
prev.numDeletedSeries = total.numDeletedSeries / 2
|
||||
prev.tr.MinTimestamp = total.tr.MinTimestamp
|
||||
prev.tr.MaxTimestamp = total.tr.MinTimestamp + (total.tr.MaxTimestamp-total.tr.MinTimestamp)/2
|
||||
|
||||
pt.numSeries = total.numSeries - prev.numSeries
|
||||
pt.numDeletedSeries = total.numDeletedSeries - prev.numDeletedSeries
|
||||
pt.tr.MinTimestamp = prev.tr.MaxTimestamp + 1
|
||||
pt.tr.MaxTimestamp = total.tr.MaxTimestamp
|
||||
|
||||
return prev, curr, pt
|
||||
}
|
||||
|
||||
// currPt is an index config func that splits index data evenly between
|
||||
// curr legacy indexDB and pt indexDBs. No data goes to prev legacy indexDB.
|
||||
//
|
||||
// This config corresponds to a state when the the deployment has switched to pt
|
||||
// index right before legacy indexDB rotation and continued to work for some
|
||||
// time.
|
||||
func currPt(total dataConfig) (prev, curr, pt dataConfig) {
|
||||
curr.numSeries = total.numSeries / 2
|
||||
curr.numDeletedSeries = total.numDeletedSeries / 2
|
||||
curr.tr.MinTimestamp = total.tr.MinTimestamp
|
||||
curr.tr.MaxTimestamp = total.tr.MinTimestamp + (total.tr.MaxTimestamp-total.tr.MinTimestamp)/2
|
||||
|
||||
pt.numSeries = total.numSeries - curr.numSeries
|
||||
pt.numDeletedSeries = total.numDeletedSeries - curr.numDeletedSeries
|
||||
pt.tr.MinTimestamp = curr.tr.MaxTimestamp + 1
|
||||
pt.tr.MaxTimestamp = total.tr.MaxTimestamp
|
||||
|
||||
return prev, curr, pt
|
||||
}
|
||||
|
||||
// prevCurrPt is an index config func that splits index data evenly between
|
||||
// prev and curr legacy indexDBs and pt indexDBs.
|
||||
//
|
||||
// This config corresponds to a state when the the deployment has switched to pt
|
||||
// index right some time after legacy indexDB rotation and continued to work for
|
||||
// some time.
|
||||
func prevCurrPt(total dataConfig) (prev, curr, pt dataConfig) {
|
||||
prev.numSeries = total.numSeries / 3
|
||||
prev.numDeletedSeries = total.numDeletedSeries / 3
|
||||
prev.tr.MinTimestamp = total.tr.MinTimestamp
|
||||
prev.tr.MaxTimestamp = total.tr.MinTimestamp + (total.tr.MaxTimestamp-total.tr.MinTimestamp)/3
|
||||
|
||||
curr.numSeries = prev.numSeries
|
||||
curr.numDeletedSeries = prev.numDeletedSeries
|
||||
curr.tr.MinTimestamp = prev.tr.MaxTimestamp + 1
|
||||
curr.tr.MaxTimestamp = prev.tr.MaxTimestamp + (total.tr.MaxTimestamp-total.tr.MinTimestamp)/3
|
||||
|
||||
pt.numSeries = total.numSeries - prev.numSeries - curr.numSeries
|
||||
pt.numDeletedSeries = total.numDeletedSeries - prev.numDeletedSeries - curr.numDeletedSeries
|
||||
pt.tr.MinTimestamp = curr.tr.MaxTimestamp + 1
|
||||
pt.tr.MaxTimestamp = total.tr.MaxTimestamp
|
||||
|
||||
return prev, curr, pt
|
||||
}
|
||||
|
||||
// dataConfigFunc generates a collection of data configs. For example, various
|
||||
|
||||
@@ -28,7 +28,9 @@ type table struct {
|
||||
path string
|
||||
smallPartitionsPath string
|
||||
bigPartitionsPath string
|
||||
indexDBPath string
|
||||
|
||||
// TODO(@rtm0): Do not depend on Storage.
|
||||
s *Storage
|
||||
|
||||
ptws []*partitionWrapper
|
||||
@@ -105,8 +107,11 @@ func mustOpenTable(path string, s *Storage) *table {
|
||||
bigSnapshotsPath := filepath.Join(bigPartitionsPath, snapshotsDirname)
|
||||
fs.MustMkdirIfNotExist(bigSnapshotsPath)
|
||||
|
||||
indexDBPath := filepath.Join(path, indexdbDirname)
|
||||
fs.MustMkdirIfNotExist(indexDBPath)
|
||||
|
||||
// Open partitions.
|
||||
pts := mustOpenPartitions(smallPartitionsPath, bigPartitionsPath, s)
|
||||
pts := mustOpenPartitions(smallPartitionsPath, bigPartitionsPath, indexDBPath, s)
|
||||
|
||||
// Make sure all the directories inside the path are properly synced.
|
||||
fs.MustSyncPathAndParentDir(path)
|
||||
@@ -115,6 +120,7 @@ func mustOpenTable(path string, s *Storage) *table {
|
||||
path: path,
|
||||
smallPartitionsPath: smallPartitionsPath,
|
||||
bigPartitionsPath: bigPartitionsPath,
|
||||
indexDBPath: indexDBPath,
|
||||
s: s,
|
||||
|
||||
stopCh: make(chan struct{}),
|
||||
@@ -127,12 +133,13 @@ func mustOpenTable(path string, s *Storage) *table {
|
||||
return tb
|
||||
}
|
||||
|
||||
// MustCreateSnapshot creates tb snapshot and returns paths to small and big parts of it.
|
||||
func (tb *table) MustCreateSnapshot(snapshotName string) (string, string) {
|
||||
// MustCreateSnapshot creates tb snapshot and returns paths to small parts, big
|
||||
// parts, and indexdb.
|
||||
func (tb *table) MustCreateSnapshot(snapshotName string) (string, string, string) {
|
||||
logger.Infof("creating table snapshot of %q...", tb.path)
|
||||
startTime := time.Now()
|
||||
|
||||
ptws := tb.GetPartitions(nil)
|
||||
ptws := tb.GetAllPartitions(nil)
|
||||
defer tb.PutPartitions(ptws)
|
||||
|
||||
dstSmallDir := filepath.Join(tb.path, smallDirname, snapshotsDirname, snapshotName)
|
||||
@@ -141,17 +148,22 @@ func (tb *table) MustCreateSnapshot(snapshotName string) (string, string) {
|
||||
dstBigDir := filepath.Join(tb.path, bigDirname, snapshotsDirname, snapshotName)
|
||||
fs.MustMkdirFailIfExist(dstBigDir)
|
||||
|
||||
dstIndexDBDir := filepath.Join(tb.path, indexdbDirname, snapshotsDirname, snapshotName)
|
||||
fs.MustMkdirFailIfExist(dstIndexDBDir)
|
||||
|
||||
for _, ptw := range ptws {
|
||||
smallPath := filepath.Join(dstSmallDir, ptw.pt.name)
|
||||
bigPath := filepath.Join(dstBigDir, ptw.pt.name)
|
||||
ptw.pt.MustCreateSnapshotAt(smallPath, bigPath)
|
||||
indexDBPath := filepath.Join(dstIndexDBDir, ptw.pt.name)
|
||||
ptw.pt.MustCreateSnapshotAt(smallPath, bigPath, indexDBPath)
|
||||
}
|
||||
|
||||
fs.MustSyncPathAndParentDir(dstSmallDir)
|
||||
fs.MustSyncPathAndParentDir(dstBigDir)
|
||||
fs.MustSyncPathAndParentDir(dstIndexDBDir)
|
||||
|
||||
logger.Infof("created table snapshot for %q at (%q, %q) in %.3f seconds", tb.path, dstSmallDir, dstBigDir, time.Since(startTime).Seconds())
|
||||
return dstSmallDir, dstBigDir
|
||||
logger.Infof("created table snapshot for %q at (%q, %q, %q) in %.3f seconds", tb.path, dstSmallDir, dstBigDir, dstIndexDBDir, time.Since(startTime).Seconds())
|
||||
return dstSmallDir, dstBigDir, dstIndexDBDir
|
||||
}
|
||||
|
||||
// MustDeleteSnapshot deletes snapshot with the given snapshotName.
|
||||
@@ -160,14 +172,24 @@ func (tb *table) MustDeleteSnapshot(snapshotName string) {
|
||||
fs.MustRemoveDir(smallDir)
|
||||
bigDir := filepath.Join(tb.path, bigDirname, snapshotsDirname, snapshotName)
|
||||
fs.MustRemoveDir(bigDir)
|
||||
indexDBDir := filepath.Join(tb.path, indexdbDirname, snapshotsDirname, snapshotName)
|
||||
fs.MustRemoveDir(indexDBDir)
|
||||
}
|
||||
|
||||
func (tb *table) addPartitionLocked(pt *partition) {
|
||||
_ = tb.addPartitionWrapperLocked(pt)
|
||||
// It is expected that the caller of this method will eventually decrement
|
||||
// the pt refCount.
|
||||
}
|
||||
|
||||
func (tb *table) addPartitionWrapperLocked(pt *partition) *partitionWrapper {
|
||||
ptw := &partitionWrapper{
|
||||
pt: pt,
|
||||
}
|
||||
ptw.incRef()
|
||||
|
||||
tb.ptws = append(tb.ptws, ptw)
|
||||
return ptw
|
||||
}
|
||||
|
||||
// MustClose closes the table.
|
||||
@@ -194,12 +216,12 @@ func (tb *table) MustClose() {
|
||||
}
|
||||
}
|
||||
|
||||
// DebugFlush flushes all pending raw data rows, so they become
|
||||
// DebugFlush flushes all pending raw index and data rows, so they become
|
||||
// visible to search.
|
||||
//
|
||||
// This function is for debug purposes only.
|
||||
func (tb *table) DebugFlush() {
|
||||
ptws := tb.GetPartitions(nil)
|
||||
ptws := tb.GetAllPartitions(nil)
|
||||
defer tb.PutPartitions(ptws)
|
||||
|
||||
for _, ptw := range ptws {
|
||||
@@ -229,7 +251,7 @@ type TableMetrics struct {
|
||||
|
||||
// UpdateMetrics updates m with metrics from tb.
|
||||
func (tb *table) UpdateMetrics(m *TableMetrics) {
|
||||
ptws := tb.GetPartitions(nil)
|
||||
ptws := tb.GetAllPartitions(nil)
|
||||
defer tb.PutPartitions(ptws)
|
||||
|
||||
for _, ptw := range ptws {
|
||||
@@ -253,7 +275,7 @@ func (tb *table) UpdateMetrics(m *TableMetrics) {
|
||||
//
|
||||
// Partitions are merged sequentially in order to reduce load on the system.
|
||||
func (tb *table) ForceMergePartitions(partitionNamePrefix string) error {
|
||||
ptws := tb.GetPartitions(nil)
|
||||
ptws := tb.GetAllPartitions(nil)
|
||||
defer tb.PutPartitions(ptws)
|
||||
|
||||
tb.forceMergeWG.Add(1)
|
||||
@@ -283,7 +305,7 @@ func (tb *table) MustAddRows(rows []rawRow) {
|
||||
ptwsX := getPartitionWrappers()
|
||||
defer putPartitionWrappers(ptwsX)
|
||||
|
||||
ptwsX.a = tb.GetPartitions(ptwsX.a[:0])
|
||||
ptwsX.a = tb.GetAllPartitions(ptwsX.a[:0])
|
||||
ptws := ptwsX.a
|
||||
for i, ptw := range ptws {
|
||||
singlePt := true
|
||||
@@ -368,13 +390,22 @@ func (tb *table) MustAddRows(rows []rawRow) {
|
||||
continue
|
||||
}
|
||||
|
||||
pt := mustCreatePartition(r.Timestamp, tb.smallPartitionsPath, tb.bigPartitionsPath, tb.s)
|
||||
pt := mustCreatePartition(r.Timestamp, tb.smallPartitionsPath, tb.bigPartitionsPath, tb.indexDBPath, tb.s)
|
||||
pt.AddRows(missingRows[i : i+1])
|
||||
tb.addPartitionLocked(pt)
|
||||
}
|
||||
tb.ptwsLock.Unlock()
|
||||
}
|
||||
|
||||
// MustGetIndexDBIDByHour returns the id of the indexDB which contains the
|
||||
// provided hour. If the indexDB does not exist it will be created.
|
||||
func (tb *table) MustGetIndexDBIDByHour(hour uint64) uint64 {
|
||||
ts := int64(hour * msecPerHour)
|
||||
ptw := tb.MustGetPartition(ts)
|
||||
defer tb.PutPartition(ptw)
|
||||
return ptw.pt.idb.id
|
||||
}
|
||||
|
||||
func (tb *table) getMinMaxTimestamps() (int64, int64) {
|
||||
now := int64(fasttime.UnixTimestamp() * 1000)
|
||||
minTimestamp := now - tb.s.retentionMsecs
|
||||
@@ -452,7 +483,7 @@ func (tb *table) historicalMergeWatcher() {
|
||||
}
|
||||
|
||||
f := func() {
|
||||
ptws := tb.GetPartitions(nil)
|
||||
ptws := tb.GetAllPartitions(nil)
|
||||
defer tb.PutPartitions(ptws)
|
||||
timestamp := timestampFromTime(time.Now())
|
||||
currentPartitionName := timestampToPartitionName(timestamp)
|
||||
@@ -519,11 +550,54 @@ func (tb *table) historicalMergeWatcher() {
|
||||
}
|
||||
}
|
||||
|
||||
// GetPartitions appends tb's partitions snapshot to dst and returns the result.
|
||||
// MustGetPartition returns a partition that corresponds to the given timestamp.
|
||||
//
|
||||
// If the partition does not exist yet, it will be created.
|
||||
//
|
||||
// The function increments the ref counter for the found partition.
|
||||
// The returned partition must be passed to PutPartition when no longer needed.
|
||||
func (tb *table) MustGetPartition(timestamp int64) *partitionWrapper {
|
||||
tb.ptwsLock.Lock()
|
||||
defer tb.ptwsLock.Unlock()
|
||||
|
||||
ptw := tb.getPartitionLocked(timestamp)
|
||||
if ptw != nil {
|
||||
return ptw
|
||||
}
|
||||
|
||||
pt := mustCreatePartition(timestamp, tb.smallPartitionsPath, tb.bigPartitionsPath, tb.indexDBPath, tb.s)
|
||||
ptw = tb.addPartitionWrapperLocked(pt)
|
||||
ptw.incRef()
|
||||
return ptw
|
||||
}
|
||||
|
||||
// GetPartition returns a partition that corresponds to the given timestamp or
|
||||
// nil if such partition does not exist.
|
||||
//
|
||||
// If the partition is found, the function increments its ref counter. When no
|
||||
// longer needed, the returned partition must be passed to PutPartition to
|
||||
// decrement its ref counter.
|
||||
func (tb *table) GetPartition(timestamp int64) *partitionWrapper {
|
||||
tb.ptwsLock.Lock()
|
||||
defer tb.ptwsLock.Unlock()
|
||||
return tb.getPartitionLocked(timestamp)
|
||||
}
|
||||
|
||||
func (tb *table) getPartitionLocked(timestamp int64) *partitionWrapper {
|
||||
for _, ptw := range tb.ptws {
|
||||
if ptw.pt.HasTimestamp(timestamp) {
|
||||
ptw.incRef()
|
||||
return ptw
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetAllPartitions appends tb's partitions snapshot to dst and returns the result.
|
||||
//
|
||||
// The returned partitions must be passed to PutPartitions
|
||||
// when they no longer needed.
|
||||
func (tb *table) GetPartitions(dst []*partitionWrapper) []*partitionWrapper {
|
||||
func (tb *table) GetAllPartitions(dst []*partitionWrapper) []*partitionWrapper {
|
||||
tb.ptwsLock.Lock()
|
||||
for _, ptw := range tb.ptws {
|
||||
ptw.incRef()
|
||||
@@ -534,19 +608,46 @@ func (tb *table) GetPartitions(dst []*partitionWrapper) []*partitionWrapper {
|
||||
return dst
|
||||
}
|
||||
|
||||
// PutPartitions deregisters ptws obtained via GetPartitions.
|
||||
// GetPartitions returns snapshot of partitions whose time ranges overlap with the
|
||||
// given time range.
|
||||
//
|
||||
// The returned partitions must be passed to PutPartitions
|
||||
// when they no longer needed.
|
||||
func (tb *table) GetPartitions(tr TimeRange) []*partitionWrapper {
|
||||
tb.ptwsLock.Lock()
|
||||
defer tb.ptwsLock.Unlock()
|
||||
|
||||
var ptws []*partitionWrapper
|
||||
|
||||
for _, ptw := range tb.ptws {
|
||||
if ptw.pt.tr.overlapsWith(tr) {
|
||||
ptw.incRef()
|
||||
ptws = append(ptws, ptw)
|
||||
}
|
||||
}
|
||||
|
||||
return ptws
|
||||
}
|
||||
|
||||
// PutPartition decrements the ref counter for the given partition.
|
||||
func (tb *table) PutPartition(ptw *partitionWrapper) {
|
||||
ptw.decRef()
|
||||
}
|
||||
|
||||
// PutPartitions deregisters ptws obtained via GetAllPartitions or GetPartitions.
|
||||
func (tb *table) PutPartitions(ptws []*partitionWrapper) {
|
||||
for _, ptw := range ptws {
|
||||
ptw.decRef()
|
||||
tb.PutPartition(ptw)
|
||||
}
|
||||
}
|
||||
|
||||
func mustOpenPartitions(smallPartitionsPath, bigPartitionsPath string, s *Storage) []*partition {
|
||||
func mustOpenPartitions(smallPartitionsPath, bigPartitionsPath, indexDBPath string, s *Storage) []*partition {
|
||||
// Certain partition directories in either `big` or `small` dir may be missing
|
||||
// after restoring from backup. So populate partition names from both dirs.
|
||||
ptNames := make(map[string]bool)
|
||||
mustPopulatePartitionNames(smallPartitionsPath, ptNames)
|
||||
mustPopulatePartitionNames(bigPartitionsPath, ptNames)
|
||||
mustPopulatePartitionNames(indexDBPath, ptNames)
|
||||
var pts []*partition
|
||||
var ptsLock sync.Mutex
|
||||
|
||||
@@ -564,7 +665,8 @@ func mustOpenPartitions(smallPartitionsPath, bigPartitionsPath string, s *Storag
|
||||
|
||||
smallPartsPath := filepath.Join(smallPartitionsPath, ptName)
|
||||
bigPartsPath := filepath.Join(bigPartitionsPath, ptName)
|
||||
pt := mustOpenPartition(smallPartsPath, bigPartsPath, s)
|
||||
indexDBPartsPath := filepath.Join(indexDBPath, ptName)
|
||||
pt := mustOpenPartition(smallPartsPath, bigPartsPath, indexDBPartsPath, s)
|
||||
|
||||
ptsLock.Lock()
|
||||
pts = append(pts, pt)
|
||||
|
||||
@@ -82,7 +82,7 @@ func (ts *tableSearch) Init(tb *table, tsids []TSID, tr TimeRange) {
|
||||
return
|
||||
}
|
||||
|
||||
ts.ptws = tb.GetPartitions(ts.ptws[:0])
|
||||
ts.ptws = tb.GetAllPartitions(ts.ptws[:0])
|
||||
|
||||
// Initialize the ptsPool.
|
||||
ts.ptsPool = slicesutil.SetLength(ts.ptsPool, len(ts.ptws))
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
package storage
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
)
|
||||
@@ -29,3 +31,57 @@ func TestTableOpenClose(t *testing.T) {
|
||||
|
||||
stopTestStorage(strg)
|
||||
}
|
||||
|
||||
func TestGetPartition(t *testing.T) {
|
||||
defer testRemoveAll(t)
|
||||
|
||||
s := MustOpenStorage(t.Name(), OpenOptions{})
|
||||
defer s.MustClose()
|
||||
|
||||
var ptw *partitionWrapper
|
||||
timestamp := time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC).UnixMilli()
|
||||
|
||||
ptw = s.tb.GetPartition(timestamp)
|
||||
if ptw != nil {
|
||||
name := ptw.pt.name
|
||||
s.tb.PutPartition(ptw)
|
||||
t.Fatalf("GetPartition() unexpectedly returned a partition that should not exist: %s", name)
|
||||
}
|
||||
|
||||
ptw = s.tb.MustGetPartition(timestamp)
|
||||
if ptw == nil {
|
||||
t.Fatalf("MustGetPartition() unexpectedly did not create a new partition")
|
||||
}
|
||||
s.tb.PutPartition(ptw)
|
||||
|
||||
ptw = s.tb.GetPartition(timestamp)
|
||||
if ptw == nil {
|
||||
t.Fatalf("GetPartition() unexpectedly did not find partition")
|
||||
}
|
||||
s.tb.PutPartition(ptw)
|
||||
}
|
||||
|
||||
func TestGetPartition_concurrent(t *testing.T) {
|
||||
defer testRemoveAll(t)
|
||||
|
||||
s := MustOpenStorage(t.Name(), OpenOptions{})
|
||||
defer s.MustClose()
|
||||
|
||||
begin := time.Date(2024, 1, 1, 0, 0, 0, 0, time.UTC).UnixMilli()
|
||||
limit := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC).UnixMilli()
|
||||
for ts := begin; ts < limit; ts += msecPerDay {
|
||||
var wg sync.WaitGroup
|
||||
for range 100 {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
ptw := s.tb.MustGetPartition(ts)
|
||||
s.tb.PutPartition(ptw)
|
||||
|
||||
ptw = s.tb.GetPartition(ts)
|
||||
s.tb.PutPartition(ptw)
|
||||
wg.Done()
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
}
|
||||
|
||||
63
vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/block_stream_reader.go
generated
vendored
63
vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/block_stream_reader.go
generated
vendored
@@ -9,6 +9,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/filestream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs/fsutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/slicesutil"
|
||||
)
|
||||
@@ -95,10 +96,9 @@ func (r *bloomValuesReader) totalBytesRead() uint64 {
|
||||
return r.bloom.bytesRead + r.values.bytesRead
|
||||
}
|
||||
|
||||
func (r *bloomValuesReader) appendClosers(dst []fs.MustCloser) []fs.MustCloser {
|
||||
dst = append(dst, &r.bloom)
|
||||
dst = append(dst, &r.values)
|
||||
return dst
|
||||
func (r *bloomValuesReader) appendCloserTasks(pe *fsutil.ParallelExecutor) {
|
||||
pe.Add(fs.NewCloserTask(&r.bloom))
|
||||
pe.Add(fs.NewCloserTask(&r.values))
|
||||
}
|
||||
|
||||
type bloomValuesStreamReader struct {
|
||||
@@ -181,23 +181,22 @@ func (sr *streamReaders) totalBytesRead() uint64 {
|
||||
func (sr *streamReaders) MustClose() {
|
||||
// Close files in parallel in order to reduce the time needed for this operation
|
||||
// on high-latency storage systems such as NFS or Ceph.
|
||||
cs := []fs.MustCloser{
|
||||
&sr.columnNamesReader,
|
||||
&sr.columnIdxsReader,
|
||||
&sr.metaindexReader,
|
||||
&sr.indexReader,
|
||||
&sr.columnsHeaderIndexReader,
|
||||
&sr.columnsHeaderReader,
|
||||
&sr.timestampsReader,
|
||||
}
|
||||
var pe fsutil.ParallelExecutor
|
||||
pe.Add(fs.NewCloserTask(&sr.columnNamesReader))
|
||||
pe.Add(fs.NewCloserTask(&sr.columnIdxsReader))
|
||||
pe.Add(fs.NewCloserTask(&sr.metaindexReader))
|
||||
pe.Add(fs.NewCloserTask(&sr.indexReader))
|
||||
pe.Add(fs.NewCloserTask(&sr.columnsHeaderIndexReader))
|
||||
pe.Add(fs.NewCloserTask(&sr.columnsHeaderReader))
|
||||
pe.Add(fs.NewCloserTask(&sr.timestampsReader))
|
||||
|
||||
cs = sr.messageBloomValuesReader.appendClosers(cs)
|
||||
cs = sr.oldBloomValuesReader.appendClosers(cs)
|
||||
sr.messageBloomValuesReader.appendCloserTasks(&pe)
|
||||
sr.oldBloomValuesReader.appendCloserTasks(&pe)
|
||||
for i := range sr.bloomValuesShards {
|
||||
cs = sr.bloomValuesShards[i].appendClosers(cs)
|
||||
sr.bloomValuesShards[i].appendCloserTasks(&pe)
|
||||
}
|
||||
|
||||
fs.MustCloseParallel(cs)
|
||||
pe.Run()
|
||||
}
|
||||
|
||||
func (sr *streamReaders) getBloomValuesReaderForColumnName(name string) *bloomValuesReader {
|
||||
@@ -355,63 +354,63 @@ func (bsr *blockStreamReader) MustInitFromFilePart(path string) {
|
||||
// Open data readers in parallel in order to reduce the time for this operation
|
||||
// on high-latency storage systems such as NFS or Ceph.
|
||||
|
||||
var pfo filestream.ParallelFileOpener
|
||||
var pe fsutil.ParallelExecutor
|
||||
|
||||
var columnNamesReader filestream.ReadCloser
|
||||
if bsr.ph.FormatVersion >= 1 {
|
||||
pfo.Add(columnNamesPath, &columnNamesReader, nocache)
|
||||
pe.Add(filestream.NewFileOpenerTask(columnNamesPath, &columnNamesReader, nocache))
|
||||
}
|
||||
|
||||
var columnIdxsReader filestream.ReadCloser
|
||||
if bsr.ph.FormatVersion >= 3 {
|
||||
pfo.Add(columnIdxsPath, &columnIdxsReader, nocache)
|
||||
pe.Add(filestream.NewFileOpenerTask(columnIdxsPath, &columnIdxsReader, nocache))
|
||||
}
|
||||
|
||||
var metaindexReader filestream.ReadCloser
|
||||
pfo.Add(metaindexPath, &metaindexReader, nocache)
|
||||
pe.Add(filestream.NewFileOpenerTask(metaindexPath, &metaindexReader, nocache))
|
||||
|
||||
var indexReader filestream.ReadCloser
|
||||
pfo.Add(indexPath, &indexReader, nocache)
|
||||
pe.Add(filestream.NewFileOpenerTask(indexPath, &indexReader, nocache))
|
||||
|
||||
var columnsHeaderIndexReader filestream.ReadCloser
|
||||
if bsr.ph.FormatVersion >= 1 {
|
||||
pfo.Add(columnsHeaderIndexPath, &columnsHeaderIndexReader, nocache)
|
||||
pe.Add(filestream.NewFileOpenerTask(columnsHeaderIndexPath, &columnsHeaderIndexReader, nocache))
|
||||
}
|
||||
|
||||
var columnsHeaderReader filestream.ReadCloser
|
||||
pfo.Add(columnsHeaderPath, &columnsHeaderReader, nocache)
|
||||
pe.Add(filestream.NewFileOpenerTask(columnsHeaderPath, &columnsHeaderReader, nocache))
|
||||
|
||||
var timestampsReader filestream.ReadCloser
|
||||
pfo.Add(timestampsPath, ×tampsReader, nocache)
|
||||
pe.Add(filestream.NewFileOpenerTask(timestampsPath, ×tampsReader, nocache))
|
||||
|
||||
messageBloomFilterPath := filepath.Join(path, messageBloomFilename)
|
||||
messageValuesPath := filepath.Join(path, messageValuesFilename)
|
||||
var messageBloomValuesReader bloomValuesStreamReader
|
||||
pfo.Add(messageBloomFilterPath, &messageBloomValuesReader.bloom, nocache)
|
||||
pfo.Add(messageValuesPath, &messageBloomValuesReader.values, nocache)
|
||||
pe.Add(filestream.NewFileOpenerTask(messageBloomFilterPath, &messageBloomValuesReader.bloom, nocache))
|
||||
pe.Add(filestream.NewFileOpenerTask(messageValuesPath, &messageBloomValuesReader.values, nocache))
|
||||
|
||||
var oldBloomValuesReader bloomValuesStreamReader
|
||||
var bloomValuesShards []bloomValuesStreamReader
|
||||
if bsr.ph.FormatVersion < 1 {
|
||||
bloomPath := filepath.Join(path, oldBloomFilename)
|
||||
pfo.Add(bloomPath, &oldBloomValuesReader.bloom, nocache)
|
||||
pe.Add(filestream.NewFileOpenerTask(bloomPath, &oldBloomValuesReader.bloom, nocache))
|
||||
|
||||
valuesPath := filepath.Join(path, oldValuesFilename)
|
||||
pfo.Add(valuesPath, &oldBloomValuesReader.values, nocache)
|
||||
pe.Add(filestream.NewFileOpenerTask(valuesPath, &oldBloomValuesReader.values, nocache))
|
||||
} else {
|
||||
bloomValuesShards = make([]bloomValuesStreamReader, bsr.ph.BloomValuesShardsCount)
|
||||
for i := range bloomValuesShards {
|
||||
shard := &bloomValuesShards[i]
|
||||
|
||||
bloomPath := getBloomFilePath(path, uint64(i))
|
||||
pfo.Add(bloomPath, &shard.bloom, nocache)
|
||||
pe.Add(filestream.NewFileOpenerTask(bloomPath, &shard.bloom, nocache))
|
||||
|
||||
valuesPath := getValuesFilePath(path, uint64(i))
|
||||
pfo.Add(valuesPath, &shard.values, nocache)
|
||||
pe.Add(filestream.NewFileOpenerTask(valuesPath, &shard.values, nocache))
|
||||
}
|
||||
}
|
||||
|
||||
pfo.Run()
|
||||
pe.Run()
|
||||
|
||||
// Initialize streamReaders
|
||||
bsr.streamReaders.init(bsr.ph.FormatVersion, columnNamesReader, columnIdxsReader, metaindexReader, indexReader,
|
||||
|
||||
53
vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/block_stream_writer.go
generated
vendored
53
vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/block_stream_writer.go
generated
vendored
@@ -7,6 +7,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/filestream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs/fsutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/slicesutil"
|
||||
)
|
||||
@@ -84,10 +85,9 @@ func (w *bloomValuesWriter) totalBytesWritten() uint64 {
|
||||
return w.bloom.bytesWritten + w.values.bytesWritten
|
||||
}
|
||||
|
||||
func (w *bloomValuesWriter) appendClosers(dst []fs.MustCloser) []fs.MustCloser {
|
||||
dst = append(dst, &w.bloom)
|
||||
dst = append(dst, &w.values)
|
||||
return dst
|
||||
func (w *bloomValuesWriter) appendCloserTasks(pe *fsutil.ParallelExecutor) {
|
||||
pe.Add(fs.NewCloserTask(&w.bloom))
|
||||
pe.Add(fs.NewCloserTask(&w.values))
|
||||
}
|
||||
|
||||
type bloomValuesStreamWriter struct {
|
||||
@@ -158,22 +158,21 @@ func (sw *streamWriters) totalBytesWritten() uint64 {
|
||||
func (sw *streamWriters) MustClose() {
|
||||
// Flush and close files in parallel in order to reduce the time needed for this operation
|
||||
// on high-latency storage systems such as NFS or Ceph.
|
||||
cs := []fs.MustCloser{
|
||||
&sw.columnNamesWriter,
|
||||
&sw.columnIdxsWriter,
|
||||
&sw.metaindexWriter,
|
||||
&sw.indexWriter,
|
||||
&sw.columnsHeaderIndexWriter,
|
||||
&sw.columnsHeaderWriter,
|
||||
&sw.timestampsWriter,
|
||||
}
|
||||
var pe fsutil.ParallelExecutor
|
||||
pe.Add(fs.NewCloserTask(&sw.columnNamesWriter))
|
||||
pe.Add(fs.NewCloserTask(&sw.columnIdxsWriter))
|
||||
pe.Add(fs.NewCloserTask(&sw.metaindexWriter))
|
||||
pe.Add(fs.NewCloserTask(&sw.indexWriter))
|
||||
pe.Add(fs.NewCloserTask(&sw.columnsHeaderIndexWriter))
|
||||
pe.Add(fs.NewCloserTask(&sw.columnsHeaderWriter))
|
||||
pe.Add(fs.NewCloserTask(&sw.timestampsWriter))
|
||||
|
||||
cs = sw.messageBloomValuesWriter.appendClosers(cs)
|
||||
sw.messageBloomValuesWriter.appendCloserTasks(&pe)
|
||||
for i := range sw.bloomValuesShards {
|
||||
cs = sw.bloomValuesShards[i].appendClosers(cs)
|
||||
sw.bloomValuesShards[i].appendCloserTasks(&pe)
|
||||
}
|
||||
|
||||
fs.MustCloseParallel(cs)
|
||||
pe.Run()
|
||||
}
|
||||
|
||||
func (sw *streamWriters) getBloomValuesWriterForColumnName(name string) *bloomValuesWriter {
|
||||
@@ -312,39 +311,39 @@ func (bsw *blockStreamWriter) MustInitForFilePart(path string, nocache bool) {
|
||||
columnsHeaderPath := filepath.Join(path, columnsHeaderFilename)
|
||||
timestampsPath := filepath.Join(path, timestampsFilename)
|
||||
|
||||
var pfc filestream.ParallelFileCreator
|
||||
var pe fsutil.ParallelExecutor
|
||||
|
||||
// Always cache columnNames file, since it is re-read immediately after part creation
|
||||
var columnNamesWriter filestream.WriteCloser
|
||||
pfc.Add(columnNamesPath, &columnNamesWriter, false)
|
||||
pe.Add(filestream.NewFileCreatorTask(columnNamesPath, &columnNamesWriter, false))
|
||||
|
||||
// Always cache columnIdxs file, since it is re-read immediately after part creation
|
||||
var columnIdxsWriter filestream.WriteCloser
|
||||
pfc.Add(columnIdxsPath, &columnIdxsWriter, false)
|
||||
pe.Add(filestream.NewFileCreatorTask(columnIdxsPath, &columnIdxsWriter, false))
|
||||
|
||||
// Always cache metaindex file, since it is re-read immediately after part creation
|
||||
var metaindexWriter filestream.WriteCloser
|
||||
pfc.Add(metaindexPath, &metaindexWriter, false)
|
||||
pe.Add(filestream.NewFileCreatorTask(metaindexPath, &metaindexWriter, false))
|
||||
|
||||
var indexWriter filestream.WriteCloser
|
||||
pfc.Add(indexPath, &indexWriter, nocache)
|
||||
pe.Add(filestream.NewFileCreatorTask(indexPath, &indexWriter, nocache))
|
||||
|
||||
var columnsHeaderIndexWriter filestream.WriteCloser
|
||||
pfc.Add(columnsHeaderIndexPath, &columnsHeaderIndexWriter, nocache)
|
||||
pe.Add(filestream.NewFileCreatorTask(columnsHeaderIndexPath, &columnsHeaderIndexWriter, nocache))
|
||||
|
||||
var columnsHeaderWriter filestream.WriteCloser
|
||||
pfc.Add(columnsHeaderPath, &columnsHeaderWriter, nocache)
|
||||
pe.Add(filestream.NewFileCreatorTask(columnsHeaderPath, &columnsHeaderWriter, nocache))
|
||||
|
||||
var timestampsWriter filestream.WriteCloser
|
||||
pfc.Add(timestampsPath, ×tampsWriter, nocache)
|
||||
pe.Add(filestream.NewFileCreatorTask(timestampsPath, ×tampsWriter, nocache))
|
||||
|
||||
messageBloomFilterPath := filepath.Join(path, messageBloomFilename)
|
||||
messageValuesPath := filepath.Join(path, messageValuesFilename)
|
||||
var messageBloomValuesWriter bloomValuesStreamWriter
|
||||
pfc.Add(messageBloomFilterPath, &messageBloomValuesWriter.bloom, nocache)
|
||||
pfc.Add(messageValuesPath, &messageBloomValuesWriter.values, nocache)
|
||||
pe.Add(filestream.NewFileCreatorTask(messageBloomFilterPath, &messageBloomValuesWriter.bloom, nocache))
|
||||
pe.Add(filestream.NewFileCreatorTask(messageValuesPath, &messageBloomValuesWriter.values, nocache))
|
||||
|
||||
pfc.Run()
|
||||
pe.Run()
|
||||
|
||||
createBloomValuesWriter := func(shardIdx uint64) bloomValuesStreamWriter {
|
||||
bloomPath := getBloomFilePath(path, shardIdx)
|
||||
|
||||
27
vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/inmemory_part.go
generated
vendored
27
vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/inmemory_part.go
generated
vendored
@@ -8,6 +8,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/chunkedbuffer"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/filestream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs/fsutil"
|
||||
)
|
||||
|
||||
// inmemoryPart is an in-memory part.
|
||||
@@ -120,26 +121,26 @@ func (mp *inmemoryPart) MustStoreToDisk(path string) {
|
||||
messageValuesPath := filepath.Join(path, messageValuesFilename)
|
||||
messageBloomFilterPath := filepath.Join(path, messageBloomFilename)
|
||||
|
||||
var psw filestream.ParallelStreamWriter
|
||||
var pe fsutil.ParallelExecutor
|
||||
|
||||
psw.Add(columnNamesPath, &mp.columnNames)
|
||||
psw.Add(columnIdxsPath, &mp.columnIdxs)
|
||||
psw.Add(metaindexPath, &mp.metaindex)
|
||||
psw.Add(indexPath, &mp.index)
|
||||
psw.Add(columnsHeaderIndexPath, &mp.columnsHeaderIndex)
|
||||
psw.Add(columnsHeaderPath, &mp.columnsHeader)
|
||||
psw.Add(timestampsPath, &mp.timestamps)
|
||||
pe.Add(filestream.NewStreamWriterTask(columnNamesPath, &mp.columnNames))
|
||||
pe.Add(filestream.NewStreamWriterTask(columnIdxsPath, &mp.columnIdxs))
|
||||
pe.Add(filestream.NewStreamWriterTask(metaindexPath, &mp.metaindex))
|
||||
pe.Add(filestream.NewStreamWriterTask(indexPath, &mp.index))
|
||||
pe.Add(filestream.NewStreamWriterTask(columnsHeaderIndexPath, &mp.columnsHeaderIndex))
|
||||
pe.Add(filestream.NewStreamWriterTask(columnsHeaderPath, &mp.columnsHeader))
|
||||
pe.Add(filestream.NewStreamWriterTask(timestampsPath, &mp.timestamps))
|
||||
|
||||
psw.Add(messageBloomFilterPath, &mp.messageBloomValues.bloom)
|
||||
psw.Add(messageValuesPath, &mp.messageBloomValues.values)
|
||||
pe.Add(filestream.NewStreamWriterTask(messageBloomFilterPath, &mp.messageBloomValues.bloom))
|
||||
pe.Add(filestream.NewStreamWriterTask(messageValuesPath, &mp.messageBloomValues.values))
|
||||
|
||||
bloomPath := getBloomFilePath(path, 0)
|
||||
psw.Add(bloomPath, &mp.fieldBloomValues.bloom)
|
||||
pe.Add(filestream.NewStreamWriterTask(bloomPath, &mp.fieldBloomValues.bloom))
|
||||
|
||||
valuesPath := getValuesFilePath(path, 0)
|
||||
psw.Add(valuesPath, &mp.fieldBloomValues.values)
|
||||
pe.Add(filestream.NewStreamWriterTask(valuesPath, &mp.fieldBloomValues.values))
|
||||
|
||||
psw.Run()
|
||||
pe.Run()
|
||||
|
||||
mp.ph.mustWriteMetadata(path)
|
||||
|
||||
|
||||
46
vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/part.go
generated
vendored
46
vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/part.go
generated
vendored
@@ -9,6 +9,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/filestream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs/fsutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
@@ -54,10 +55,9 @@ type bloomValuesReaderAt struct {
|
||||
values fs.MustReadAtCloser
|
||||
}
|
||||
|
||||
func (r *bloomValuesReaderAt) appendClosers(dst []fs.MustCloser) []fs.MustCloser {
|
||||
dst = append(dst, r.bloom)
|
||||
dst = append(dst, r.values)
|
||||
return dst
|
||||
func (r *bloomValuesReaderAt) appendCloserTasks(pe *fsutil.ParallelExecutor) {
|
||||
pe.Add(fs.NewCloserTask(r.bloom))
|
||||
pe.Add(fs.NewCloserTask(r.values))
|
||||
}
|
||||
|
||||
func mustOpenInmemoryPart(pt *partition, mp *inmemoryPart) *part {
|
||||
@@ -137,36 +137,36 @@ func mustOpenFilePart(pt *partition, path string) *part {
|
||||
mrs.MustClose()
|
||||
|
||||
// Open data files
|
||||
p.indexFile = fs.MustOpenReaderAt(indexPath)
|
||||
p.indexFile = fs.OpenReaderAt(indexPath)
|
||||
if p.ph.FormatVersion >= 1 {
|
||||
p.columnsHeaderIndexFile = fs.MustOpenReaderAt(columnsHeaderIndexPath)
|
||||
p.columnsHeaderIndexFile = fs.OpenReaderAt(columnsHeaderIndexPath)
|
||||
}
|
||||
p.columnsHeaderFile = fs.MustOpenReaderAt(columnsHeaderPath)
|
||||
p.timestampsFile = fs.MustOpenReaderAt(timestampsPath)
|
||||
p.columnsHeaderFile = fs.OpenReaderAt(columnsHeaderPath)
|
||||
p.timestampsFile = fs.OpenReaderAt(timestampsPath)
|
||||
|
||||
// Open files with bloom filters and column values
|
||||
messageBloomFilterPath := filepath.Join(path, messageBloomFilename)
|
||||
p.messageBloomValues.bloom = fs.MustOpenReaderAt(messageBloomFilterPath)
|
||||
p.messageBloomValues.bloom = fs.OpenReaderAt(messageBloomFilterPath)
|
||||
|
||||
messageValuesPath := filepath.Join(path, messageValuesFilename)
|
||||
p.messageBloomValues.values = fs.MustOpenReaderAt(messageValuesPath)
|
||||
p.messageBloomValues.values = fs.OpenReaderAt(messageValuesPath)
|
||||
|
||||
if p.ph.FormatVersion < 1 {
|
||||
bloomPath := filepath.Join(path, oldBloomFilename)
|
||||
p.oldBloomValues.bloom = fs.MustOpenReaderAt(bloomPath)
|
||||
p.oldBloomValues.bloom = fs.OpenReaderAt(bloomPath)
|
||||
|
||||
valuesPath := filepath.Join(path, oldValuesFilename)
|
||||
p.oldBloomValues.values = fs.MustOpenReaderAt(valuesPath)
|
||||
p.oldBloomValues.values = fs.OpenReaderAt(valuesPath)
|
||||
} else {
|
||||
p.bloomValuesShards = make([]bloomValuesReaderAt, p.ph.BloomValuesShardsCount)
|
||||
for i := range p.bloomValuesShards {
|
||||
shard := &p.bloomValuesShards[i]
|
||||
|
||||
bloomPath := getBloomFilePath(path, uint64(i))
|
||||
shard.bloom = fs.MustOpenReaderAt(bloomPath)
|
||||
shard.bloom = fs.OpenReaderAt(bloomPath)
|
||||
|
||||
valuesPath := getValuesFilePath(path, uint64(i))
|
||||
shard.values = fs.MustOpenReaderAt(valuesPath)
|
||||
shard.values = fs.OpenReaderAt(valuesPath)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -176,25 +176,25 @@ func mustOpenFilePart(pt *partition, path string) *part {
|
||||
func mustClosePart(p *part) {
|
||||
// Close files in parallel in order to speed up this operation
|
||||
// on high-latency storage systems such as NFS and Ceph.
|
||||
var cs []fs.MustCloser
|
||||
var pe fsutil.ParallelExecutor
|
||||
|
||||
cs = append(cs, p.indexFile)
|
||||
pe.Add(fs.NewCloserTask(p.indexFile))
|
||||
if p.ph.FormatVersion >= 1 {
|
||||
cs = append(cs, p.columnsHeaderIndexFile)
|
||||
pe.Add(fs.NewCloserTask(p.columnsHeaderIndexFile))
|
||||
}
|
||||
cs = append(cs, p.columnsHeaderFile)
|
||||
cs = append(cs, p.timestampsFile)
|
||||
cs = p.messageBloomValues.appendClosers(cs)
|
||||
pe.Add(fs.NewCloserTask(p.columnsHeaderFile))
|
||||
pe.Add(fs.NewCloserTask(p.timestampsFile))
|
||||
p.messageBloomValues.appendCloserTasks(&pe)
|
||||
|
||||
if p.ph.FormatVersion < 1 {
|
||||
cs = p.oldBloomValues.appendClosers(cs)
|
||||
p.oldBloomValues.appendCloserTasks(&pe)
|
||||
} else {
|
||||
for i := range p.bloomValuesShards {
|
||||
cs = p.bloomValuesShards[i].appendClosers(cs)
|
||||
p.bloomValuesShards[i].appendCloserTasks(&pe)
|
||||
}
|
||||
}
|
||||
|
||||
fs.MustCloseParallel(cs)
|
||||
pe.Run()
|
||||
|
||||
p.pt = nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user