Compare commits
82 Commits
sort-order
...
v1.136.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7386a35942 | ||
|
|
6be2d89008 | ||
|
|
e5c8581bad | ||
|
|
14bc51554b | ||
|
|
7db81d062c | ||
|
|
ad62fe88ed | ||
|
|
40b85eb211 | ||
|
|
88b2464fe8 | ||
|
|
e4221f97a7 | ||
|
|
d40696a2f2 | ||
|
|
b2a74ec494 | ||
|
|
9774fe8df1 | ||
|
|
efd3b66609 | ||
|
|
785daff65d | ||
|
|
e3a57a3d80 | ||
|
|
161633158c | ||
|
|
7b708a8947 | ||
|
|
16d5f281fe | ||
|
|
6846ca09cb | ||
|
|
71997bc754 | ||
|
|
2ec6fafed0 | ||
|
|
a8ac5dfae5 | ||
|
|
6292d5fefa | ||
|
|
2a09f25f78 | ||
|
|
6824ade224 | ||
|
|
3a3c2084d3 | ||
|
|
3d6f353430 | ||
|
|
b1f333093b | ||
|
|
19403b9cd1 | ||
|
|
4edff7eae2 | ||
|
|
ce4b131816 | ||
|
|
cf69c56bb7 | ||
|
|
42ec981fe9 | ||
|
|
35e287d740 | ||
|
|
9df9a77169 | ||
|
|
17c514d2fa | ||
|
|
c12512bdd7 | ||
|
|
a108da8215 | ||
|
|
4e7606f669 | ||
|
|
060d7f6ed1 | ||
|
|
b3c1b00e4d | ||
|
|
a65f693649 | ||
|
|
6285bc4179 | ||
|
|
e89f131e34 | ||
|
|
493c1d410f | ||
|
|
b0029ee933 | ||
|
|
97e1308386 | ||
|
|
a279517034 | ||
|
|
f7ba76a59d | ||
|
|
60dbd5a97e | ||
|
|
32ddfa973b | ||
|
|
d9554a3a22 | ||
|
|
fbab6403dc | ||
|
|
07dd79608b | ||
|
|
5915c57b46 | ||
|
|
f36e1857c0 | ||
|
|
04f4a28cf4 | ||
|
|
7f3d370244 | ||
|
|
c89b7f7ad5 | ||
|
|
d9dabea303 | ||
|
|
09d2ce36e8 | ||
|
|
08755c838b | ||
|
|
d2e438ef41 | ||
|
|
e508fa5fe2 | ||
|
|
9a7deca207 | ||
|
|
60cadfbad1 | ||
|
|
b36c8b1110 | ||
|
|
90f0405b11 | ||
|
|
eac0a7ed86 | ||
|
|
a8a99105b1 | ||
|
|
c7f52992e7 | ||
|
|
5fe14e5479 | ||
|
|
c7ef079eba | ||
|
|
424d007a39 | ||
|
|
ad4562cd56 | ||
|
|
f9895d7e5e | ||
|
|
6bc809813b | ||
|
|
9b40fd00e0 | ||
|
|
9d59a31290 | ||
|
|
8391be18be | ||
|
|
8feb8c17aa | ||
|
|
634b4d035d |
3
.github/workflows/build.yml
vendored
@@ -71,7 +71,8 @@ jobs:
|
||||
go.sum
|
||||
Makefile
|
||||
app/**/Makefile
|
||||
go-version: stable
|
||||
go-version-file: 'go.mod'
|
||||
- run: go version
|
||||
|
||||
- name: Build victoria-metrics for ${{ matrix.os }}-${{ matrix.arch }}
|
||||
run: make victoria-metrics-${{ matrix.os }}-${{ matrix.arch }}
|
||||
|
||||
6
.github/workflows/check-licenses.yml
vendored
@@ -21,9 +21,11 @@ jobs:
|
||||
id: go
|
||||
uses: actions/setup-go@v6
|
||||
with:
|
||||
go-version: stable
|
||||
go-version-file: 'go.mod'
|
||||
cache: false
|
||||
|
||||
- run: go version
|
||||
|
||||
- name: Cache Go artifacts
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
@@ -32,7 +34,7 @@ jobs:
|
||||
~/go/pkg/mod
|
||||
~/go/bin
|
||||
key: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
||||
restore-keys: go-artifacts-${{ runner.os }}-check-licenses-
|
||||
restore-keys: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}-
|
||||
|
||||
- name: Check License
|
||||
run: make check-licenses
|
||||
|
||||
5
.github/workflows/codeql-analysis-go.yml
vendored
@@ -36,7 +36,8 @@ jobs:
|
||||
uses: actions/setup-go@v6
|
||||
with:
|
||||
cache: false
|
||||
go-version: stable
|
||||
go-version-file: 'go.mod'
|
||||
- run: go version
|
||||
|
||||
- name: Cache Go artifacts
|
||||
uses: actions/cache@v4
|
||||
@@ -46,7 +47,7 @@ jobs:
|
||||
~/go/bin
|
||||
~/go/pkg/mod
|
||||
key: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
||||
restore-keys: go-artifacts-${{ runner.os }}-codeql-analyze-
|
||||
restore-keys: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-
|
||||
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v4
|
||||
|
||||
19
.github/workflows/test.yml
vendored
@@ -42,8 +42,9 @@ jobs:
|
||||
go.sum
|
||||
Makefile
|
||||
app/**/Makefile
|
||||
go-version: stable
|
||||
go-version-file: 'go.mod'
|
||||
|
||||
- run: go version
|
||||
|
||||
- name: Cache golangci-lint
|
||||
uses: actions/cache@v4
|
||||
@@ -51,7 +52,7 @@ jobs:
|
||||
path: |
|
||||
~/.cache/golangci-lint
|
||||
~/go/bin
|
||||
key: golangci-lint-${{ runner.os }}-${{ hashFiles('.golangci.yml') }}
|
||||
key: golangci-lint-${{ runner.os }}-${{ steps.go.outputs.go-version }}-${{ hashFiles('.golangci.yml') }}
|
||||
|
||||
- name: Run check-all
|
||||
run: |
|
||||
@@ -81,7 +82,8 @@ jobs:
|
||||
go.sum
|
||||
Makefile
|
||||
app/**/Makefile
|
||||
go-version: stable
|
||||
go-version-file: 'go.mod'
|
||||
- run: go version
|
||||
|
||||
- name: Run tests
|
||||
run: GOGC=10 make ${{ matrix.scenario}}
|
||||
@@ -91,8 +93,8 @@ jobs:
|
||||
with:
|
||||
files: ./coverage.txt
|
||||
|
||||
integration:
|
||||
name: integration
|
||||
apptest:
|
||||
name: apptest
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
@@ -107,7 +109,8 @@ jobs:
|
||||
go.sum
|
||||
Makefile
|
||||
app/**/Makefile
|
||||
go-version: stable
|
||||
go-version-file: 'go.mod'
|
||||
- run: go version
|
||||
|
||||
- name: Run integration tests
|
||||
run: make integration-test
|
||||
- name: Run app tests
|
||||
run: make apptest
|
||||
|
||||
31
Makefile
@@ -17,7 +17,7 @@ EXTRA_GO_BUILD_TAGS ?=
|
||||
GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(DATEINFO_TAG)-$(BUILDINFO_TAG)'
|
||||
TAR_OWNERSHIP ?= --owner=1000 --group=1000
|
||||
|
||||
GOLANGCI_LINT_VERSION := 2.7.2
|
||||
GOLANGCI_LINT_VERSION := 2.9.0
|
||||
|
||||
.PHONY: $(MAKECMDGOALS)
|
||||
|
||||
@@ -443,7 +443,7 @@ fmt:
|
||||
gofmt -l -w -s ./apptest
|
||||
|
||||
vet:
|
||||
GOEXPERIMENT=synctest go vet ./lib/...
|
||||
go vet -tags 'synctest' ./lib/...
|
||||
go vet ./app/...
|
||||
go vet ./apptest/...
|
||||
|
||||
@@ -452,28 +452,25 @@ check-all: fmt vet golangci-lint govulncheck
|
||||
clean-checkers: remove-golangci-lint remove-govulncheck
|
||||
|
||||
test:
|
||||
GOEXPERIMENT=synctest go test ./lib/... ./app/...
|
||||
go test -tags 'synctest' ./lib/... ./app/...
|
||||
|
||||
test-race:
|
||||
GOEXPERIMENT=synctest go test -race ./lib/... ./app/...
|
||||
go test -tags 'synctest' -race ./lib/... ./app/...
|
||||
|
||||
test-pure:
|
||||
GOEXPERIMENT=synctest CGO_ENABLED=0 go test ./lib/... ./app/...
|
||||
CGO_ENABLED=0 go test -tags 'synctest' ./lib/... ./app/...
|
||||
|
||||
test-full:
|
||||
GOEXPERIMENT=synctest go test -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
|
||||
go test -tags 'synctest' -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
|
||||
|
||||
test-full-386:
|
||||
GOEXPERIMENT=synctest GOARCH=386 go test -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
|
||||
|
||||
integration-test:
|
||||
$(MAKE) apptest
|
||||
GOARCH=386 go test -tags 'synctest' -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
|
||||
|
||||
apptest:
|
||||
$(MAKE) victoria-metrics vmagent vmalert vmauth vmctl vmbackup vmrestore
|
||||
go test ./apptest/... -skip="^Test(Cluster|Legacy).*"
|
||||
|
||||
integration-test-legacy: victoria-metrics vmbackup vmrestore
|
||||
apptest-legacy: victoria-metrics vmbackup vmrestore
|
||||
OS=$$(uname | tr '[:upper:]' '[:lower:]'); \
|
||||
ARCH=$$(uname -m | tr '[:upper:]' '[:lower:]' | sed 's/x86_64/amd64/'); \
|
||||
VERSION=v1.132.0; \
|
||||
@@ -490,17 +487,17 @@ integration-test-legacy: victoria-metrics vmbackup vmrestore
|
||||
go test ./apptest/tests -run="^TestLegacySingle.*"
|
||||
|
||||
benchmark:
|
||||
GOEXPERIMENT=synctest go test -bench=. ./lib/...
|
||||
go test -bench=. ./app/...
|
||||
go test -run=NO_TESTS -bench=. ./lib/...
|
||||
go test -run=NO_TESTS -bench=. ./app/...
|
||||
|
||||
benchmark-pure:
|
||||
GOEXPERIMENT=synctest CGO_ENABLED=0 go test -bench=. ./lib/...
|
||||
CGO_ENABLED=0 go test -bench=. ./app/...
|
||||
CGO_ENABLED=0 go test -run=NO_TESTS -bench=. ./lib/...
|
||||
CGO_ENABLED=0 go test -run=NO_TESTS -bench=. ./app/...
|
||||
|
||||
vendor-update:
|
||||
go get -u ./lib/...
|
||||
go get -u ./app/...
|
||||
go mod tidy -compat=1.24
|
||||
go mod tidy -compat=1.26
|
||||
go mod vendor
|
||||
|
||||
app-local:
|
||||
@@ -524,7 +521,7 @@ install-qtc:
|
||||
|
||||
|
||||
golangci-lint: install-golangci-lint
|
||||
GOEXPERIMENT=synctest golangci-lint run
|
||||
golangci-lint run --build-tags 'synctest'
|
||||
|
||||
install-golangci-lint:
|
||||
which golangci-lint && (golangci-lint --version | grep -q $(GOLANGCI_LINT_VERSION)) || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v$(GOLANGCI_LINT_VERSION)
|
||||
|
||||
19
README.md
@@ -16,16 +16,21 @@
|
||||
<img src="docs/victoriametrics/logo.webp" width="300" alt="VictoriaMetrics logo">
|
||||
</picture>
|
||||
|
||||
VictoriaMetrics is a fast, cost-saving, and scalable solution for monitoring and managing time series data. It delivers high performance and reliability, making it an ideal choice for businesses of all sizes.
|
||||
VictoriaMetrics is a fast, cost-effective, and scalable solution for monitoring and managing time series data. It delivers high performance and reliability, making it an ideal choice for businesses of all sizes.
|
||||
|
||||
Here are some resources and information about VictoriaMetrics:
|
||||
|
||||
- Documentation: [docs.victoriametrics.com](https://docs.victoriametrics.com)
|
||||
- Case studies: [Grammarly, Roblox, Wix,...](https://docs.victoriametrics.com/victoriametrics/casestudies/).
|
||||
- Available: [Binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest), docker images [Docker Hub](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and [Quay](https://quay.io/repository/victoriametrics/victoria-metrics), [Source code](https://github.com/VictoriaMetrics/VictoriaMetrics)
|
||||
- Deployment types: [Single-node version](https://docs.victoriametrics.com/), [Cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/), and [Enterprise version](https://docs.victoriametrics.com/victoriametrics/enterprise/)
|
||||
- Changelog: [CHANGELOG](https://docs.victoriametrics.com/victoriametrics/changelog/), and [How to upgrade](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-upgrade-victoriametrics)
|
||||
- Community: [Slack](https://slack.victoriametrics.com/), [X (Twitter)](https://x.com/VictoriaMetrics), [LinkedIn](https://www.linkedin.com/company/victoriametrics/), [YouTube](https://www.youtube.com/@VictoriaMetrics)
|
||||
- **Case studies**: [Grammarly, Roblox, Wix, Spotify,...](https://docs.victoriametrics.com/victoriametrics/casestudies/).
|
||||
- **Available**: [Binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest), Docker images on [Docker Hub](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and [Quay](https://quay.io/repository/victoriametrics/victoria-metrics), [Source code](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
- **Deployment types**: [Single-node version](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and [Cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) under [Apache License 2.0](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE).
|
||||
- **Getting started:** Read [key concepts](https://docs.victoriametrics.com/victoriametrics/keyconcepts/) and follow the
|
||||
[quick start guide](https://docs.victoriametrics.com/victoriametrics/quick-start/).
|
||||
- **Community**: [Slack](https://slack.victoriametrics.com/) (join via [Slack Inviter](https://slack.victoriametrics.com/)), [X (Twitter)](https://x.com/VictoriaMetrics), [YouTube](https://www.youtube.com/@VictoriaMetrics). See full list [here](https://docs.victoriametrics.com/victoriametrics/#community-and-contributions).
|
||||
- **Changelog**: Project evolves fast - check the [CHANGELOG](https://docs.victoriametrics.com/victoriametrics/changelog/), and [How to upgrade](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-upgrade-victoriametrics).
|
||||
- **Enterprise support:** [Contact us](mailto:info@victoriametrics.com) for commercial support with additional [enterprise features](https://docs.victoriametrics.com/victoriametrics/enterprise/).
|
||||
- **Enterprise releases:** Enterprise and [long-term support releases (LTS)](https://docs.victoriametrics.com/victoriametrics/lts-releases/) are publicly available and can be evaluated for free
|
||||
using a [free trial license](https://victoriametrics.com/products/enterprise/trial/).
|
||||
- **Security:** we achieved [security certifications](https://victoriametrics.com/security/) for Database Software Development and Software-Based Monitoring Services.
|
||||
|
||||
Yes, we open-source both the single-node VictoriaMetrics and the cluster version.
|
||||
|
||||
|
||||
@@ -20,8 +20,7 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
unparsedLabelsGlobal = flagutil.NewArrayString("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to -remoteWrite.url. "+
|
||||
"Pass multiple -remoteWrite.label flags in order to add multiple labels to metrics before sending them to remote storage")
|
||||
unparsedLabelsGlobal = flagutil.NewArrayString("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to all -remoteWrite.url.")
|
||||
relabelConfigPathGlobal = flag.String("remoteWrite.relabelConfig", "", "Optional path to file with relabeling configs, which are applied "+
|
||||
"to all the metrics before sending them to -remoteWrite.url. See also -remoteWrite.urlRelabelConfig. "+
|
||||
"The path can point either to local file or to http url. "+
|
||||
|
||||
@@ -1080,7 +1080,7 @@ func (rwctx *remoteWriteCtx) tryPushTimeSeriesInternal(tss []prompb.TimeSeries)
|
||||
}()
|
||||
|
||||
if len(labelsGlobal) > 0 {
|
||||
// Make a copy of tss before adding extra labels in order to prevent
|
||||
// Make a copy of tss before adding extra labels to prevent
|
||||
// from affecting time series for other remoteWrite.url configs.
|
||||
rctx = getRelabelCtx()
|
||||
v = tssPool.Get().(*[]prompb.TimeSeries)
|
||||
|
||||
@@ -259,7 +259,7 @@ func compareGroups(t *testing.T, a, b *rule.Group) {
|
||||
for i, r := range a.Rules {
|
||||
got, want := r, b.Rules[i]
|
||||
if a.CreateID() != b.CreateID() {
|
||||
t.Fatalf("expected to have rule %q; got %q", want.ID(), got.ID())
|
||||
t.Fatalf("expected to have rule %d; got %d", want.ID(), got.ID())
|
||||
}
|
||||
if err := rule.CompareRules(t, want, got); err != nil {
|
||||
t.Fatalf("comparison error: %s", err)
|
||||
|
||||
@@ -11,7 +11,6 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
@@ -291,7 +290,7 @@ func GetTargets() map[TargetType][]Target {
|
||||
}
|
||||
|
||||
// Send sends alerts to all active notifiers
|
||||
func Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string) *vmalertutil.ErrGroup {
|
||||
func Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string) chan error {
|
||||
alertsToSend := make([]Alert, 0, len(alerts))
|
||||
lblss := make([][]prompb.Label, 0, len(alerts))
|
||||
// apply global relabel config first without modifying original alerts in alerts
|
||||
@@ -304,17 +303,18 @@ func Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string
|
||||
lblss = append(lblss, lbls)
|
||||
}
|
||||
|
||||
errGr := new(vmalertutil.ErrGroup)
|
||||
wg := sync.WaitGroup{}
|
||||
activeNotifiers := getActiveNotifiers()
|
||||
errCh := make(chan error, len(activeNotifiers))
|
||||
defer close(errCh)
|
||||
for i := range activeNotifiers {
|
||||
nt := activeNotifiers[i]
|
||||
wg.Go(func() {
|
||||
if err := nt.Send(ctx, alertsToSend, lblss, notifierHeaders); err != nil {
|
||||
errGr.Add(fmt.Errorf("failed to send alerts to addr %q: %w", nt.Addr(), err))
|
||||
errCh <- fmt.Errorf("failed to send alerts to addr %q: %w", nt.Addr(), err)
|
||||
}
|
||||
})
|
||||
}
|
||||
wg.Wait()
|
||||
return errGr
|
||||
return errCh
|
||||
}
|
||||
|
||||
@@ -205,7 +205,9 @@ alert_relabel_configs:
|
||||
},
|
||||
}
|
||||
errG := Send(context.Background(), firingAlerts, nil)
|
||||
if errG.Err() != nil {
|
||||
t.Fatalf("unexpected error when sending alerts: %s", err)
|
||||
for err := range errG {
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error when sending alerts: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -818,7 +818,9 @@ func (ar *AlertingRule) restore(ctx context.Context, q datasource.Querier, ts ti
|
||||
expr := fmt.Sprintf("default_rollup(%s{%s%s}[%ds])",
|
||||
alertForStateMetricName, nameStr, labelsFilter, int(lookback.Seconds()))
|
||||
|
||||
res, _, err := q.Query(ctx, expr, ts)
|
||||
// query ALERTS_FOR_STATE at `ts-1s` instead `ts` to avoid retrieving data written in the current run,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10335
|
||||
res, _, err := q.Query(ctx, expr, ts.Add(-1*time.Second))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to execute restore query %q: %w ", expr, err)
|
||||
}
|
||||
|
||||
106
app/vmalert/rule/alerting_synctest_test.go
Normal file
@@ -0,0 +1,106 @@
|
||||
//go:build synctest
|
||||
|
||||
package rule
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"testing"
|
||||
"testing/synctest"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
)
|
||||
|
||||
// TestAlertingRule_ActiveAtPreservedInAnnotations ensures that the fix for
|
||||
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9543 is preserved
|
||||
// while allowing query templates in labels (https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9783)
|
||||
func TestAlertingRule_ActiveAtPreservedInAnnotations(t *testing.T) {
|
||||
// wrap into synctest because of time manipulations
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
fq := &datasource.FakeQuerier{}
|
||||
|
||||
ar := &AlertingRule{
|
||||
Name: "TestActiveAtPreservation",
|
||||
Labels: map[string]string{
|
||||
"test_query_in_label": `{{ "static_value" }}`,
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"description": "Alert active since {{ $activeAt }}",
|
||||
},
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
q: fq,
|
||||
state: &ruleState{
|
||||
entries: make([]StateEntry, 10),
|
||||
},
|
||||
}
|
||||
|
||||
// Mock query result - return empty result to make suppress_for_mass_alert = false
|
||||
// (no need to add anything to fq for empty result)
|
||||
|
||||
// Add a metric that should trigger the alert
|
||||
fq.Add(metricWithValueAndLabels(t, 1, "instance", "server1"))
|
||||
|
||||
// First execution - creates new alert
|
||||
ts1 := time.Now()
|
||||
_, err := ar.exec(context.TODO(), ts1, 0)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error on first exec: %s", err)
|
||||
}
|
||||
|
||||
if len(ar.alerts) != 1 {
|
||||
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
|
||||
}
|
||||
|
||||
firstAlert := ar.GetAlerts()[0]
|
||||
// Verify first execution: activeAt should be ts1 and annotation should reflect it
|
||||
if !firstAlert.ActiveAt.Equal(ts1) {
|
||||
t.Fatalf("expected activeAt to be %v, got %v", ts1, firstAlert.ActiveAt)
|
||||
}
|
||||
|
||||
// Extract time from annotation (format will be like "Alert active since 2025-09-30 08:55:13.638551611 -0400 EDT m=+0.002928464")
|
||||
expectedTimeStr := ts1.Format("2006-01-02 15:04:05")
|
||||
if !strings.Contains(firstAlert.Annotations["description"], expectedTimeStr) {
|
||||
t.Fatalf("first exec annotation should contain time %s, got: %s", expectedTimeStr, firstAlert.Annotations["description"])
|
||||
}
|
||||
|
||||
// Second execution - should preserve activeAt in annotation
|
||||
|
||||
// Ensure different timestamp with different seconds
|
||||
// sleep is non-blocking thanks to synctest
|
||||
time.Sleep(2 * time.Second)
|
||||
ts2 := time.Now()
|
||||
_, err = ar.exec(context.TODO(), ts2, 0)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error on second exec: %s", err)
|
||||
}
|
||||
|
||||
// Get the alert again (should be the same alert)
|
||||
if len(ar.alerts) != 1 {
|
||||
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
|
||||
}
|
||||
secondAlert := ar.GetAlerts()[0]
|
||||
|
||||
// Critical test: activeAt should still be ts1, not ts2
|
||||
if !secondAlert.ActiveAt.Equal(ts1) {
|
||||
t.Fatalf("activeAt should be preserved as %v, but got %v", ts1, secondAlert.ActiveAt)
|
||||
}
|
||||
|
||||
// Critical test: annotation should still contain ts1 time, not ts2
|
||||
if !strings.Contains(secondAlert.Annotations["description"], expectedTimeStr) {
|
||||
t.Fatalf("second exec annotation should still contain original time %s, got: %s", expectedTimeStr, secondAlert.Annotations["description"])
|
||||
}
|
||||
|
||||
// Additional verification: annotation should NOT contain ts2 time
|
||||
ts2TimeStr := ts2.Format("2006-01-02 15:04:05")
|
||||
if strings.Contains(secondAlert.Annotations["description"], ts2TimeStr) {
|
||||
t.Fatalf("annotation should NOT contain new eval time %s, got: %s", ts2TimeStr, secondAlert.Annotations["description"])
|
||||
}
|
||||
|
||||
// Verify query template in labels still works (this would fail if query templates were broken)
|
||||
if firstAlert.Labels["test_query_in_label"] != "static_value" {
|
||||
t.Fatalf("expected test_query_in_label=static_value, got %s", firstAlert.Labels["test_query_in_label"])
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -10,7 +10,6 @@ import (
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"testing/synctest"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -1479,95 +1478,3 @@ func TestAlertingRule_QueryTemplateInLabels(t *testing.T) {
|
||||
t.Fatalf("expected 'suppress_for_mass_alert' label to be 'true' or 'false', got '%s'", suppressLabel)
|
||||
}
|
||||
}
|
||||
|
||||
// TestAlertingRule_ActiveAtPreservedInAnnotations ensures that the fix for
|
||||
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9543 is preserved
|
||||
// while allowing query templates in labels (https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9783)
|
||||
func TestAlertingRule_ActiveAtPreservedInAnnotations(t *testing.T) {
|
||||
// wrap into synctest because of time manipulations
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
fq := &datasource.FakeQuerier{}
|
||||
|
||||
ar := &AlertingRule{
|
||||
Name: "TestActiveAtPreservation",
|
||||
Labels: map[string]string{
|
||||
"test_query_in_label": `{{ "static_value" }}`,
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"description": "Alert active since {{ $activeAt }}",
|
||||
},
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
q: fq,
|
||||
state: &ruleState{
|
||||
entries: make([]StateEntry, 10),
|
||||
},
|
||||
}
|
||||
|
||||
// Mock query result - return empty result to make suppress_for_mass_alert = false
|
||||
// (no need to add anything to fq for empty result)
|
||||
|
||||
// Add a metric that should trigger the alert
|
||||
fq.Add(metricWithValueAndLabels(t, 1, "instance", "server1"))
|
||||
|
||||
// First execution - creates new alert
|
||||
ts1 := time.Now()
|
||||
_, err := ar.exec(context.TODO(), ts1, 0)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error on first exec: %s", err)
|
||||
}
|
||||
|
||||
if len(ar.alerts) != 1 {
|
||||
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
|
||||
}
|
||||
|
||||
firstAlert := ar.GetAlerts()[0]
|
||||
// Verify first execution: activeAt should be ts1 and annotation should reflect it
|
||||
if !firstAlert.ActiveAt.Equal(ts1) {
|
||||
t.Fatalf("expected activeAt to be %v, got %v", ts1, firstAlert.ActiveAt)
|
||||
}
|
||||
|
||||
// Extract time from annotation (format will be like "Alert active since 2025-09-30 08:55:13.638551611 -0400 EDT m=+0.002928464")
|
||||
expectedTimeStr := ts1.Format("2006-01-02 15:04:05")
|
||||
if !strings.Contains(firstAlert.Annotations["description"], expectedTimeStr) {
|
||||
t.Fatalf("first exec annotation should contain time %s, got: %s", expectedTimeStr, firstAlert.Annotations["description"])
|
||||
}
|
||||
|
||||
// Second execution - should preserve activeAt in annotation
|
||||
|
||||
// Ensure different timestamp with different seconds
|
||||
// sleep is non-blocking thanks to synctest
|
||||
time.Sleep(2 * time.Second)
|
||||
ts2 := time.Now()
|
||||
_, err = ar.exec(context.TODO(), ts2, 0)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error on second exec: %s", err)
|
||||
}
|
||||
|
||||
// Get the alert again (should be the same alert)
|
||||
if len(ar.alerts) != 1 {
|
||||
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
|
||||
}
|
||||
secondAlert := ar.GetAlerts()[0]
|
||||
|
||||
// Critical test: activeAt should still be ts1, not ts2
|
||||
if !secondAlert.ActiveAt.Equal(ts1) {
|
||||
t.Fatalf("activeAt should be preserved as %v, but got %v", ts1, secondAlert.ActiveAt)
|
||||
}
|
||||
|
||||
// Critical test: annotation should still contain ts1 time, not ts2
|
||||
if !strings.Contains(secondAlert.Annotations["description"], expectedTimeStr) {
|
||||
t.Fatalf("second exec annotation should still contain original time %s, got: %s", expectedTimeStr, secondAlert.Annotations["description"])
|
||||
}
|
||||
|
||||
// Additional verification: annotation should NOT contain ts2 time
|
||||
ts2TimeStr := ts2.Format("2006-01-02 15:04:05")
|
||||
if strings.Contains(secondAlert.Annotations["description"], ts2TimeStr) {
|
||||
t.Fatalf("annotation should NOT contain new eval time %s, got: %s", ts2TimeStr, secondAlert.Annotations["description"])
|
||||
}
|
||||
|
||||
// Verify query template in labels still works (this would fail if query templates were broken)
|
||||
if firstAlert.Labels["test_query_in_label"] != "static_value" {
|
||||
t.Fatalf("expected test_query_in_label=static_value, got %s", firstAlert.Labels["test_query_in_label"])
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
)
|
||||
@@ -374,7 +375,7 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
|
||||
|
||||
g.infof("started")
|
||||
|
||||
eval := func(ctx context.Context, ts time.Time) {
|
||||
eval := func(ctx context.Context, ts time.Time) time.Time {
|
||||
g.metrics.iterationTotal.Inc()
|
||||
|
||||
start := time.Now()
|
||||
@@ -382,7 +383,7 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
|
||||
if len(g.Rules) < 1 {
|
||||
g.metrics.iterationDuration.UpdateDuration(start)
|
||||
g.LastEvaluation = start
|
||||
return
|
||||
return ts
|
||||
}
|
||||
|
||||
resolveDuration := getResolveDuration(g.Interval, *resendDelay, *maxResolveDuration)
|
||||
@@ -396,6 +397,7 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
|
||||
}
|
||||
g.metrics.iterationDuration.UpdateDuration(start)
|
||||
g.LastEvaluation = start
|
||||
return ts
|
||||
}
|
||||
|
||||
evalCtx, cancel := context.WithCancel(ctx)
|
||||
@@ -404,7 +406,7 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
|
||||
g.mu.Unlock()
|
||||
defer g.evalCancel()
|
||||
|
||||
eval(evalCtx, evalTS)
|
||||
realEvalTS := eval(evalCtx, evalTS)
|
||||
|
||||
t := time.NewTicker(g.Interval)
|
||||
defer t.Stop()
|
||||
@@ -412,7 +414,7 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
|
||||
// restore the rules state after the first evaluation
|
||||
// so only active alerts can be restored.
|
||||
if rr != nil {
|
||||
err := g.restore(ctx, rr, evalTS, *remoteReadLookBack)
|
||||
err := g.restore(ctx, rr, realEvalTS, *remoteReadLookBack)
|
||||
if err != nil {
|
||||
logger.Errorf("error while restoring ruleState for group %q: %s", g.Name, err)
|
||||
}
|
||||
@@ -755,6 +757,7 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
|
||||
return fmt.Errorf("rule %q: failed to execute: %w", r, err)
|
||||
}
|
||||
|
||||
var errG vmalertutil.ErrGroup
|
||||
if e.Rw != nil {
|
||||
pushToRW := func(tss []prompb.TimeSeries) error {
|
||||
var lastErr error
|
||||
@@ -766,20 +769,26 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
|
||||
return lastErr
|
||||
}
|
||||
if err := pushToRW(tss); err != nil {
|
||||
return err
|
||||
errG.Add(err)
|
||||
}
|
||||
}
|
||||
|
||||
ar, ok := r.(*AlertingRule)
|
||||
if !ok {
|
||||
return nil
|
||||
return errG.Err()
|
||||
}
|
||||
|
||||
alerts := ar.alertsToSend(resolveDuration, *resendDelay)
|
||||
if len(alerts) < 1 {
|
||||
return nil
|
||||
return errG.Err()
|
||||
}
|
||||
|
||||
errGr := notifier.Send(ctx, alerts, e.notifierHeaders)
|
||||
return errGr.Err()
|
||||
notifierErr := notifier.Send(ctx, alerts, e.notifierHeaders)
|
||||
for err := range notifierErr {
|
||||
if err != nil {
|
||||
errG.Add(fmt.Errorf("rule %q: notifier failure: %w", r, err))
|
||||
}
|
||||
}
|
||||
|
||||
return errG.Err()
|
||||
}
|
||||
|
||||
@@ -19,13 +19,13 @@ func CompareRules(t *testing.T, a, b Rule) error {
|
||||
case *AlertingRule:
|
||||
br, ok := b.(*AlertingRule)
|
||||
if !ok {
|
||||
return fmt.Errorf("rule %q supposed to be of type AlertingRule", b.ID())
|
||||
return fmt.Errorf("rule %d supposed to be of type AlertingRule", b.ID())
|
||||
}
|
||||
return compareAlertingRules(t, v, br)
|
||||
case *RecordingRule:
|
||||
br, ok := b.(*RecordingRule)
|
||||
if !ok {
|
||||
return fmt.Errorf("rule %q supposed to be of type RecordingRule", b.ID())
|
||||
return fmt.Errorf("rule %d supposed to be of type RecordingRule", b.ID())
|
||||
}
|
||||
return compareRecordingRules(t, v, br)
|
||||
default:
|
||||
|
||||
@@ -45,7 +45,7 @@ func (eg *ErrGroup) Error() string {
|
||||
return ""
|
||||
}
|
||||
var b strings.Builder
|
||||
fmt.Fprintf(&b, "errors(%d): ", len(eg.errs))
|
||||
fmt.Fprintf(&b, "errors(%d): \n", len(eg.errs))
|
||||
for i, err := range eg.errs {
|
||||
b.WriteString(err.Error())
|
||||
if i != len(eg.errs)-1 {
|
||||
|
||||
@@ -30,8 +30,8 @@ func TestErrGroup(t *testing.T) {
|
||||
}
|
||||
|
||||
f(nil, "")
|
||||
f([]error{errors.New("timeout")}, "errors(1): timeout")
|
||||
f([]error{errors.New("timeout"), errors.New("deadline")}, "errors(2): timeout\ndeadline")
|
||||
f([]error{errors.New("timeout")}, "errors(1): \ntimeout")
|
||||
f([]error{errors.New("timeout"), errors.New("deadline")}, "errors(2): \ntimeout\ndeadline")
|
||||
}
|
||||
|
||||
// TestErrGroupConcurrent supposed to test concurrent
|
||||
|
||||
@@ -7,6 +7,8 @@ import (
|
||||
"math"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
@@ -55,6 +57,7 @@ func (b *Backoff) Retry(ctx context.Context, cb retryableFunc) (uint64, error) {
|
||||
return attempt, err // fail fast if not recoverable
|
||||
}
|
||||
attempt++
|
||||
retriesTotal.Inc()
|
||||
backoff := float64(b.minDuration) * math.Pow(b.factor, float64(i))
|
||||
dur := time.Duration(backoff)
|
||||
logger.Errorf("got error: %s on attempt: %d; will retry in %v", err, attempt, dur)
|
||||
@@ -74,3 +77,7 @@ func (b *Backoff) Retry(ctx context.Context, cb retryableFunc) (uint64, error) {
|
||||
}
|
||||
return attempt, fmt.Errorf("execution failed after %d retry attempts", b.retries)
|
||||
}
|
||||
|
||||
var (
|
||||
retriesTotal = metrics.NewCounter(`vmctl_backoff_retries_total`)
|
||||
)
|
||||
|
||||
@@ -14,6 +14,12 @@ const (
|
||||
globalSilent = "s"
|
||||
globalVerbose = "verbose"
|
||||
globalDisableProgressBar = "disable-progress-bar"
|
||||
|
||||
globalPushMetricsURL = "pushmetrics.url"
|
||||
globalPushMetricsInterval = "pushmetrics.interval"
|
||||
globalPushExtraLabels = "pushmetrics.extraLabel"
|
||||
globalPushHeaders = "pushmetrics.header"
|
||||
globalPushDisableCompression = "pushmetrics.disableCompression"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -33,6 +39,29 @@ var (
|
||||
Value: false,
|
||||
Usage: "Whether to disable progress bar during the import.",
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: globalPushMetricsURL,
|
||||
Usage: "Optional URL to push metrics. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#push-metrics",
|
||||
},
|
||||
&cli.DurationFlag{
|
||||
Name: globalPushMetricsInterval,
|
||||
Value: 10 * time.Second,
|
||||
Usage: "Interval for pushing metrics to every -pushmetrics.url",
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: globalPushExtraLabels,
|
||||
Usage: "Extra labels to add to pushed metrics. In case of collision, label value defined by flag will have priority. " +
|
||||
"Flag can be set multiple times, to add few additional labels. " +
|
||||
"For example, -pushmetrics.extraLabel='instance=\"foo\"' adds instance=\"foo\" label to all the metrics pushed to every -pushmetrics.url",
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: globalPushHeaders,
|
||||
Usage: "Optional HTTP headers to add to pushed metrics. Flag can be set multiple times, to add few additional headers.",
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: globalPushDisableCompression,
|
||||
Usage: "Whether to disable compression when pushing metrics.",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@@ -7,6 +7,8 @@ import (
|
||||
"log"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/barpool"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/influx"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
|
||||
@@ -52,6 +54,7 @@ func (ip *influxProcessor) run(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
influxSeriesTotal.Add(len(series))
|
||||
bar := barpool.AddWithTemplate(fmt.Sprintf(barTpl, "Processing series"), len(series))
|
||||
if err := barpool.Start(); err != nil {
|
||||
return err
|
||||
@@ -67,9 +70,11 @@ func (ip *influxProcessor) run(ctx context.Context) error {
|
||||
wg.Go(func() {
|
||||
for s := range seriesCh {
|
||||
if err := ip.do(s); err != nil {
|
||||
influxErrorsTotal.Inc()
|
||||
errCh <- fmt.Errorf("request failed for %q.%q: %s", s.Measurement, s.Field, err)
|
||||
return
|
||||
}
|
||||
influxSeriesProcessed.Inc()
|
||||
bar.Increment()
|
||||
}
|
||||
})
|
||||
@@ -81,6 +86,7 @@ func (ip *influxProcessor) run(ctx context.Context) error {
|
||||
case infErr := <-errCh:
|
||||
return fmt.Errorf("influx error: %s", infErr)
|
||||
case vmErr := <-ip.im.Errors():
|
||||
influxErrorsTotal.Inc()
|
||||
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, ip.isVerbose))
|
||||
case seriesCh <- s:
|
||||
}
|
||||
@@ -93,6 +99,7 @@ func (ip *influxProcessor) run(ctx context.Context) error {
|
||||
// drain import errors channel
|
||||
for vmErr := range ip.im.Errors() {
|
||||
if vmErr.Err != nil {
|
||||
influxErrorsTotal.Inc()
|
||||
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, ip.isVerbose))
|
||||
}
|
||||
}
|
||||
@@ -167,3 +174,9 @@ func (ip *influxProcessor) do(s *influx.Series) error {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
influxSeriesTotal = metrics.NewCounter(`vmctl_influx_migration_series_total`)
|
||||
influxSeriesProcessed = metrics.NewCounter(`vmctl_influx_migration_series_processed`)
|
||||
influxErrorsTotal = metrics.NewCounter(`vmctl_influx_migration_errors_total`)
|
||||
)
|
||||
|
||||
@@ -4,6 +4,8 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
|
||||
)
|
||||
|
||||
@@ -45,9 +47,16 @@ func (l *Limiter) Register(dataLen int) {
|
||||
t := timerpool.Get(d)
|
||||
<-t.C
|
||||
timerpool.Put(t)
|
||||
limiterThrottleEventsTotal.Inc()
|
||||
}
|
||||
l.budget += limit
|
||||
l.deadline = time.Now().Add(time.Second)
|
||||
}
|
||||
l.budget -= int64(dataLen)
|
||||
limiterBytesProcessed.Add(dataLen)
|
||||
}
|
||||
|
||||
var (
|
||||
limiterBytesProcessed = metrics.NewCounter(`vmctl_limiter_bytes_processed_total`)
|
||||
limiterThrottleEventsTotal = metrics.NewCounter(`vmctl_limiter_throttle_events_total`)
|
||||
)
|
||||
|
||||
@@ -2,6 +2,7 @@ package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
@@ -19,7 +20,9 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/barpool"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/native"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/remoteread"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/influx"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/opentsdb"
|
||||
@@ -41,11 +44,20 @@ func main() {
|
||||
ctx, cancelCtx := context.WithCancel(context.Background())
|
||||
start := time.Now()
|
||||
beforeFn := func(c *cli.Context) error {
|
||||
flag.Parse()
|
||||
logger.Init()
|
||||
isSilent = c.Bool(globalSilent)
|
||||
if c.Bool(globalDisableProgressBar) {
|
||||
barpool.Disable(true)
|
||||
}
|
||||
netutil.EnableIPv6()
|
||||
pushmetrics.InitWith(&pushmetrics.Config{
|
||||
URLs: c.StringSlice(globalPushMetricsURL),
|
||||
Interval: c.Duration(globalPushMetricsInterval),
|
||||
ExtraLabels: c.StringSlice(globalPushExtraLabels),
|
||||
DisableCompression: c.Bool(globalPushDisableCompression),
|
||||
Headers: c.StringSlice(globalPushHeaders),
|
||||
})
|
||||
return nil
|
||||
}
|
||||
app := &cli.App{
|
||||
@@ -451,6 +463,7 @@ func main() {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
log.Printf("Total time: %v", time.Since(start))
|
||||
pushmetrics.StopAndPush()
|
||||
}
|
||||
|
||||
func initConfigVM(c *cli.Context) (vm.Config, error) {
|
||||
|
||||
@@ -8,6 +8,8 @@ import (
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/auth"
|
||||
)
|
||||
|
||||
@@ -36,12 +38,15 @@ type Response struct {
|
||||
|
||||
// Explore finds metric names by provided filter from api/v1/label/__name__/values
|
||||
func (c *Client) Explore(ctx context.Context, f Filter, tenantID string, start, end time.Time) ([]string, error) {
|
||||
startTime := time.Now()
|
||||
exploreRequestsTotal.Inc()
|
||||
url := fmt.Sprintf("%s/%s", c.Addr, nativeMetricNamesAddr)
|
||||
if tenantID != "" {
|
||||
url = fmt.Sprintf("%s/select/%s/prometheus/%s", c.Addr, tenantID, nativeMetricNamesAddr)
|
||||
}
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
exploreRequestsErrorsTotal.Inc()
|
||||
return nil, fmt.Errorf("cannot create request to %q: %s", url, err)
|
||||
}
|
||||
|
||||
@@ -53,37 +58,53 @@ func (c *Client) Explore(ctx context.Context, f Filter, tenantID string, start,
|
||||
|
||||
resp, err := c.do(req, http.StatusOK)
|
||||
if err != nil {
|
||||
exploreRequestsErrorsTotal.Inc()
|
||||
exploreDuration.UpdateDuration(startTime)
|
||||
return nil, fmt.Errorf("series request failed: %s", err)
|
||||
}
|
||||
|
||||
var response Response
|
||||
if err := json.NewDecoder(resp.Body).Decode(&response); err != nil {
|
||||
exploreRequestsErrorsTotal.Inc()
|
||||
exploreDuration.UpdateDuration(startTime)
|
||||
return nil, fmt.Errorf("cannot decode series response: %s", err)
|
||||
}
|
||||
exploreDuration.UpdateDuration(startTime)
|
||||
return response.MetricNames, resp.Body.Close()
|
||||
}
|
||||
|
||||
// ImportPipe uses pipe reader in request to process data
|
||||
func (c *Client) ImportPipe(ctx context.Context, dstURL string, pr *io.PipeReader) error {
|
||||
startTime := time.Now()
|
||||
importRequestsTotal.Inc()
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, dstURL, pr)
|
||||
if err != nil {
|
||||
importRequestsErrorsTotal.Inc()
|
||||
return fmt.Errorf("cannot create import request to %q: %s", c.Addr, err)
|
||||
}
|
||||
|
||||
importResp, err := c.do(req, http.StatusNoContent)
|
||||
if err != nil {
|
||||
importRequestsErrorsTotal.Inc()
|
||||
importDuration.UpdateDuration(startTime)
|
||||
return fmt.Errorf("import request failed: %s", err)
|
||||
}
|
||||
if err := importResp.Body.Close(); err != nil {
|
||||
importRequestsErrorsTotal.Inc()
|
||||
importDuration.UpdateDuration(startTime)
|
||||
return fmt.Errorf("cannot close import response body: %s", err)
|
||||
}
|
||||
importDuration.UpdateDuration(startTime)
|
||||
return nil
|
||||
}
|
||||
|
||||
// ExportPipe makes request by provided filter and return io.ReadCloser which can be used to get data
|
||||
func (c *Client) ExportPipe(ctx context.Context, url string, f Filter) (io.ReadCloser, error) {
|
||||
startTime := time.Now()
|
||||
exportRequestsTotal.Inc()
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
exportRequestsErrorsTotal.Inc()
|
||||
return nil, fmt.Errorf("cannot create request to %q: %s", c.Addr, err)
|
||||
}
|
||||
|
||||
@@ -102,8 +123,11 @@ func (c *Client) ExportPipe(ctx context.Context, url string, f Filter) (io.ReadC
|
||||
|
||||
resp, err := c.do(req, http.StatusOK)
|
||||
if err != nil {
|
||||
exportRequestsErrorsTotal.Inc()
|
||||
exportDuration.UpdateDuration(startTime)
|
||||
return nil, fmt.Errorf("export request failed: %w", err)
|
||||
}
|
||||
exportDuration.UpdateDuration(startTime)
|
||||
return resp.Body, nil
|
||||
}
|
||||
|
||||
@@ -162,3 +186,16 @@ func (c *Client) do(req *http.Request, expSC int) (*http.Response, error) {
|
||||
}
|
||||
return resp, err
|
||||
}
|
||||
|
||||
var (
|
||||
importRequestsTotal = metrics.NewCounter(`vmctl_vm_native_requests_total{type="import"}`)
|
||||
exportRequestsTotal = metrics.NewCounter(`vmctl_vm_native_requests_total{type="export"}`)
|
||||
exploreRequestsTotal = metrics.NewCounter(`vmctl_vm_native_requests_total{type="explore"}`)
|
||||
importRequestsErrorsTotal = metrics.NewCounter(`vmctl_vm_native_request_errors_total{type="import"}`)
|
||||
exportRequestsErrorsTotal = metrics.NewCounter(`vmctl_vm_native_request_errors_total{type="export"}`)
|
||||
exploreRequestsErrorsTotal = metrics.NewCounter(`vmctl_vm_native_request_errors_total{type="explore"}`)
|
||||
|
||||
importDuration = metrics.NewHistogram(`vmctl_vm_native_import_duration_seconds`)
|
||||
exportDuration = metrics.NewHistogram(`vmctl_vm_native_export_duration_seconds`)
|
||||
exploreDuration = metrics.NewHistogram(`vmctl_vm_native_explore_duration_seconds`)
|
||||
)
|
||||
|
||||
@@ -7,6 +7,8 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
vmetrics "github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/opentsdb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
|
||||
"github.com/cheggaaa/pb/v3"
|
||||
@@ -57,6 +59,7 @@ func (op *otsdbProcessor) run(ctx context.Context) error {
|
||||
if !prompt(ctx, question) {
|
||||
return nil
|
||||
}
|
||||
|
||||
op.im.ResetStats()
|
||||
var startTime int64
|
||||
if op.oc.HardTS != 0 {
|
||||
@@ -84,6 +87,7 @@ func (op *otsdbProcessor) run(ctx context.Context) error {
|
||||
seriesCh := make(chan queryObj, op.otsdbcc)
|
||||
errCh := make(chan error)
|
||||
// we're going to make serieslist * queryRanges queries, so we should represent that in the progress bar
|
||||
otsdbSeriesTotal.Add(len(serieslist) * queryRanges)
|
||||
bar := pb.StartNew(len(serieslist) * queryRanges)
|
||||
defer func(bar *pb.ProgressBar) {
|
||||
bar.Finish()
|
||||
@@ -93,9 +97,11 @@ func (op *otsdbProcessor) run(ctx context.Context) error {
|
||||
wg.Go(func() {
|
||||
for s := range seriesCh {
|
||||
if err := op.do(s); err != nil {
|
||||
otsdbErrorsTotal.Inc()
|
||||
errCh <- fmt.Errorf("couldn't retrieve series for %s : %s", metric, err)
|
||||
return
|
||||
}
|
||||
otsdbSeriesProcessed.Inc()
|
||||
bar.Increment()
|
||||
}
|
||||
})
|
||||
@@ -115,6 +121,7 @@ func (op *otsdbProcessor) run(ctx context.Context) error {
|
||||
case otsdbErr := <-errCh:
|
||||
return fmt.Errorf("opentsdb error: %s", otsdbErr)
|
||||
case vmErr := <-op.im.Errors():
|
||||
otsdbErrorsTotal.Inc()
|
||||
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, op.isVerbose))
|
||||
case seriesCh <- queryObj{
|
||||
Tr: tr, StartTime: startTime,
|
||||
@@ -139,6 +146,7 @@ func (op *otsdbProcessor) run(ctx context.Context) error {
|
||||
op.im.Close()
|
||||
for vmErr := range op.im.Errors() {
|
||||
if vmErr.Err != nil {
|
||||
otsdbErrorsTotal.Inc()
|
||||
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, op.isVerbose))
|
||||
}
|
||||
}
|
||||
@@ -169,3 +177,9 @@ func (op *otsdbProcessor) do(s queryObj) error {
|
||||
}
|
||||
return op.im.Input(&ts)
|
||||
}
|
||||
|
||||
var (
|
||||
otsdbSeriesTotal = vmetrics.NewCounter(`vmctl_opentsdb_migration_series_total`)
|
||||
otsdbSeriesProcessed = vmetrics.NewCounter(`vmctl_opentsdb_migration_series_processed`)
|
||||
otsdbErrorsTotal = vmetrics.NewCounter(`vmctl_opentsdb_migration_errors_total`)
|
||||
)
|
||||
|
||||
@@ -109,7 +109,7 @@ func (c Client) FindMetrics(q string) ([]string, error) {
|
||||
return nil, fmt.Errorf("failed to send GET request to %q: %s", q, err)
|
||||
}
|
||||
if resp.StatusCode != 200 {
|
||||
return nil, fmt.Errorf("bad return from OpenTSDB: %q: %v", resp.StatusCode, resp)
|
||||
return nil, fmt.Errorf("bad return from OpenTSDB: %d: %v", resp.StatusCode, resp)
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
@@ -133,7 +133,7 @@ func (c Client) FindSeries(metric string) ([]Meta, error) {
|
||||
return nil, fmt.Errorf("failed to set GET request to %q: %s", q, err)
|
||||
}
|
||||
if resp.StatusCode != 200 {
|
||||
return nil, fmt.Errorf("bad return from OpenTSDB: %q: %v", resp.StatusCode, resp)
|
||||
return nil, fmt.Errorf("bad return from OpenTSDB: %d: %v", resp.StatusCode, resp)
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
|
||||
@@ -11,6 +11,8 @@ import (
|
||||
"github.com/prometheus/prometheus/tsdb"
|
||||
"github.com/prometheus/prometheus/tsdb/chunkenc"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/barpool"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/prometheus"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
|
||||
@@ -113,6 +115,7 @@ func (pp *prometheusProcessor) do(b tsdb.BlockReader) error {
|
||||
}
|
||||
|
||||
func (pp *prometheusProcessor) processBlocks(blocks []tsdb.BlockReader) error {
|
||||
promBlocksTotal.Add(len(blocks))
|
||||
bar := barpool.AddWithTemplate(fmt.Sprintf(barTpl, "Processing blocks"), len(blocks))
|
||||
if err := barpool.Start(); err != nil {
|
||||
return err
|
||||
@@ -128,9 +131,11 @@ func (pp *prometheusProcessor) processBlocks(blocks []tsdb.BlockReader) error {
|
||||
wg.Go(func() {
|
||||
for br := range blockReadersCh {
|
||||
if err := pp.do(br); err != nil {
|
||||
promErrorsTotal.Inc()
|
||||
errCh <- fmt.Errorf("read failed for block %q: %s", br.Meta().ULID, err)
|
||||
return
|
||||
}
|
||||
promBlocksProcessed.Inc()
|
||||
bar.Increment()
|
||||
}
|
||||
})
|
||||
@@ -143,6 +148,7 @@ func (pp *prometheusProcessor) processBlocks(blocks []tsdb.BlockReader) error {
|
||||
return fmt.Errorf("prometheus error: %s", promErr)
|
||||
case vmErr := <-pp.im.Errors():
|
||||
close(blockReadersCh)
|
||||
promErrorsTotal.Inc()
|
||||
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, pp.isVerbose))
|
||||
case blockReadersCh <- br:
|
||||
}
|
||||
@@ -156,6 +162,7 @@ func (pp *prometheusProcessor) processBlocks(blocks []tsdb.BlockReader) error {
|
||||
// drain import errors channel
|
||||
for vmErr := range pp.im.Errors() {
|
||||
if vmErr.Err != nil {
|
||||
promErrorsTotal.Inc()
|
||||
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, pp.isVerbose))
|
||||
}
|
||||
}
|
||||
@@ -165,3 +172,9 @@ func (pp *prometheusProcessor) processBlocks(blocks []tsdb.BlockReader) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
var (
|
||||
promBlocksTotal = metrics.NewCounter(`vmctl_prometheus_migration_blocks_total`)
|
||||
promBlocksProcessed = metrics.NewCounter(`vmctl_prometheus_migration_blocks_processed`)
|
||||
promErrorsTotal = metrics.NewCounter(`vmctl_prometheus_migration_errors_total`)
|
||||
)
|
||||
|
||||
@@ -7,6 +7,8 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/barpool"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/remoteread"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/stepper"
|
||||
@@ -51,6 +53,7 @@ func (rrp *remoteReadProcessor) run(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
remoteReadRangesTotal.Add(len(ranges))
|
||||
bar := barpool.AddWithTemplate(fmt.Sprintf(barTpl, "Processing ranges"), len(ranges))
|
||||
if err := barpool.Start(); err != nil {
|
||||
return err
|
||||
@@ -70,9 +73,11 @@ func (rrp *remoteReadProcessor) run(ctx context.Context) error {
|
||||
wg.Go(func() {
|
||||
for r := range rangeC {
|
||||
if err := rrp.do(ctx, r); err != nil {
|
||||
remoteReadErrorsTotal.Inc()
|
||||
errCh <- fmt.Errorf("request failed for: %s", err)
|
||||
return
|
||||
}
|
||||
remoteReadRangesProcessed.Inc()
|
||||
bar.Increment()
|
||||
}
|
||||
})
|
||||
@@ -83,6 +88,7 @@ func (rrp *remoteReadProcessor) run(ctx context.Context) error {
|
||||
case infErr := <-errCh:
|
||||
return fmt.Errorf("remote read error: %s", infErr)
|
||||
case vmErr := <-rrp.dst.Errors():
|
||||
remoteReadErrorsTotal.Inc()
|
||||
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, rrp.isVerbose))
|
||||
case rangeC <- &remoteread.Filter{
|
||||
StartTimestampMs: r[0].UnixMilli(),
|
||||
@@ -98,6 +104,7 @@ func (rrp *remoteReadProcessor) run(ctx context.Context) error {
|
||||
// drain import errors channel
|
||||
for vmErr := range rrp.dst.Errors() {
|
||||
if vmErr.Err != nil {
|
||||
remoteReadErrorsTotal.Inc()
|
||||
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, rrp.isVerbose))
|
||||
}
|
||||
}
|
||||
@@ -118,3 +125,9 @@ func (rrp *remoteReadProcessor) do(ctx context.Context, filter *remoteread.Filte
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
var (
|
||||
remoteReadRangesTotal = metrics.NewCounter(`vmctl_remote_read_migration_ranges_total`)
|
||||
remoteReadRangesProcessed = metrics.NewCounter(`vmctl_remote_read_migration_ranges_processed`)
|
||||
remoteReadErrorsTotal = metrics.NewCounter(`vmctl_remote_read_migration_errors_total`)
|
||||
)
|
||||
|
||||
@@ -12,6 +12,8 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/backoff"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/barpool"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/limiter"
|
||||
@@ -80,6 +82,12 @@ type Importer struct {
|
||||
|
||||
s *stats
|
||||
backoff *backoff.Backoff
|
||||
|
||||
importRequestsTotal *metrics.Counter
|
||||
importRequestsErrorsTotal *metrics.Counter
|
||||
importSamplesTotal *metrics.Counter
|
||||
importBytesTotal *metrics.Counter
|
||||
importDuration *metrics.Histogram
|
||||
}
|
||||
|
||||
// ResetStats resets im stats.
|
||||
@@ -147,6 +155,12 @@ func NewImporter(ctx context.Context, cfg Config) (*Importer, error) {
|
||||
input: make(chan *TimeSeries, cfg.Concurrency*4),
|
||||
errors: make(chan *ImportError, cfg.Concurrency),
|
||||
backoff: cfg.Backoff,
|
||||
|
||||
importRequestsTotal: metrics.GetOrCreateCounter(`vmctl_importer_requests_total`),
|
||||
importRequestsErrorsTotal: metrics.GetOrCreateCounter(`vmctl_importer_request_errors_total`),
|
||||
importSamplesTotal: metrics.GetOrCreateCounter(`vmctl_importer_samples_total`),
|
||||
importBytesTotal: metrics.GetOrCreateCounter(`vmctl_importer_bytes_total`),
|
||||
importDuration: metrics.GetOrCreateHistogram(`vmctl_importer_request_duration_seconds`),
|
||||
}
|
||||
if err := im.Ping(); err != nil {
|
||||
return nil, fmt.Errorf("ping to %q failed: %s", addr, err)
|
||||
@@ -311,9 +325,13 @@ func (im *Importer) Import(tsBatch []*TimeSeries) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
startTime := time.Now()
|
||||
im.importRequestsTotal.Inc()
|
||||
|
||||
pr, pw := io.Pipe()
|
||||
req, err := http.NewRequest(http.MethodPost, im.importPath, pr)
|
||||
if err != nil {
|
||||
im.importRequestsErrorsTotal.Inc()
|
||||
return fmt.Errorf("cannot create request to %q: %s", im.addr, err)
|
||||
}
|
||||
if im.user != "" {
|
||||
@@ -333,6 +351,7 @@ func (im *Importer) Import(tsBatch []*TimeSeries) error {
|
||||
if im.compress {
|
||||
zw, err := gzip.NewWriterLevel(w, 1)
|
||||
if err != nil {
|
||||
im.importRequestsErrorsTotal.Inc()
|
||||
return fmt.Errorf("unexpected error when creating gzip writer: %s", err)
|
||||
}
|
||||
w = zw
|
||||
@@ -344,29 +363,39 @@ func (im *Importer) Import(tsBatch []*TimeSeries) error {
|
||||
for _, ts := range tsBatch {
|
||||
n, err := ts.write(bw)
|
||||
if err != nil {
|
||||
im.importRequestsErrorsTotal.Inc()
|
||||
return fmt.Errorf("write err: %w", err)
|
||||
}
|
||||
totalBytes += n
|
||||
totalSamples += len(ts.Values)
|
||||
}
|
||||
if err := bw.Flush(); err != nil {
|
||||
im.importRequestsErrorsTotal.Inc()
|
||||
return err
|
||||
}
|
||||
if closer, ok := w.(io.Closer); ok {
|
||||
err := closer.Close()
|
||||
if err != nil {
|
||||
im.importRequestsErrorsTotal.Inc()
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := pw.Close(); err != nil {
|
||||
im.importRequestsErrorsTotal.Inc()
|
||||
return err
|
||||
}
|
||||
|
||||
requestErr := <-errCh
|
||||
if requestErr != nil {
|
||||
im.importRequestsErrorsTotal.Inc()
|
||||
im.importDuration.UpdateDuration(startTime)
|
||||
return fmt.Errorf("import request error for %q: %w", im.addr, requestErr)
|
||||
}
|
||||
|
||||
im.importSamplesTotal.Add(totalSamples)
|
||||
im.importBytesTotal.Add(totalBytes)
|
||||
im.importDuration.UpdateDuration(startTime)
|
||||
|
||||
im.s.Lock()
|
||||
im.s.bytes += uint64(totalBytes)
|
||||
im.s.samples += uint64(totalSamples)
|
||||
|
||||
@@ -9,6 +9,8 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/backoff"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/barpool"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/limiter"
|
||||
@@ -82,13 +84,19 @@ func (p *vmNativeProcessor) run(ctx context.Context) error {
|
||||
if !prompt(ctx, question) {
|
||||
return nil
|
||||
}
|
||||
migrationTenantsTotal.Set(uint64(len(tenants)))
|
||||
}
|
||||
|
||||
for _, tenantID := range tenants {
|
||||
err := p.runBackfilling(ctx, tenantID, ranges)
|
||||
if err != nil {
|
||||
migrationErrorsTotal.Inc()
|
||||
return fmt.Errorf("migration failed: %s", err)
|
||||
}
|
||||
|
||||
if p.interCluster {
|
||||
migrationTenantsProcessed.Inc()
|
||||
}
|
||||
}
|
||||
|
||||
log.Println("Import finished!")
|
||||
@@ -156,6 +164,7 @@ func (p *vmNativeProcessor) runSingle(ctx context.Context, f native.Filter, srcU
|
||||
p.s.bytes += uint64(written)
|
||||
p.s.requests++
|
||||
p.s.Unlock()
|
||||
migrationBytesTransferredTotal.AddInt64(written)
|
||||
|
||||
if err := pw.Close(); err != nil {
|
||||
return err
|
||||
@@ -199,7 +208,7 @@ func (p *vmNativeProcessor) runBackfilling(ctx context.Context, tenantID string,
|
||||
|
||||
var foundSeriesMsg string
|
||||
var requestsToMake int
|
||||
var metrics = map[string][][]time.Time{
|
||||
var metricsMap = map[string][][]time.Time{
|
||||
"": ranges,
|
||||
}
|
||||
|
||||
@@ -211,11 +220,11 @@ func (p *vmNativeProcessor) runBackfilling(ctx context.Context, tenantID string,
|
||||
|
||||
if !p.disablePerMetricRequests {
|
||||
format = fmt.Sprintf(nativeWithBackoffTpl, barPrefix)
|
||||
metrics, err = p.explore(ctx, p.src, tenantID, ranges)
|
||||
metricsMap, err = p.explore(ctx, p.src, tenantID, ranges)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to explore metric names: %s", err)
|
||||
}
|
||||
if len(metrics) == 0 {
|
||||
if len(metricsMap) == 0 {
|
||||
errMsg := "no metrics found"
|
||||
if tenantID != "" {
|
||||
errMsg = fmt.Sprintf("%s for tenant id: %s", errMsg, tenantID)
|
||||
@@ -223,10 +232,14 @@ func (p *vmNativeProcessor) runBackfilling(ctx context.Context, tenantID string,
|
||||
log.Println(errMsg)
|
||||
return nil
|
||||
}
|
||||
for _, m := range metrics {
|
||||
for _, m := range metricsMap {
|
||||
requestsToMake += len(m)
|
||||
}
|
||||
foundSeriesMsg = fmt.Sprintf("Found %d unique metric names to import. Total import/export requests to make %d", len(metrics), requestsToMake)
|
||||
foundSeriesMsg = fmt.Sprintf("Found %d unique metric names to import. Total import/export requests to make %d", len(metricsMap), requestsToMake)
|
||||
|
||||
migrationMetricsTotal.Add(len(metricsMap))
|
||||
} else {
|
||||
requestsToMake = len(ranges)
|
||||
}
|
||||
|
||||
if !p.interCluster {
|
||||
@@ -240,6 +253,7 @@ func (p *vmNativeProcessor) runBackfilling(ctx context.Context, tenantID string,
|
||||
log.Print(foundSeriesMsg)
|
||||
}
|
||||
|
||||
migrationRequestsPlanned.Add(requestsToMake)
|
||||
bar := barpool.NewSingleProgress(format, requestsToMake)
|
||||
bar.Start()
|
||||
defer bar.Finish()
|
||||
@@ -263,12 +277,13 @@ func (p *vmNativeProcessor) runBackfilling(ctx context.Context, tenantID string,
|
||||
return
|
||||
}
|
||||
}
|
||||
migrationRequestsCompleted.Inc()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// any error breaks the import
|
||||
for mName, mRanges := range metrics {
|
||||
for mName, mRanges := range metricsMap {
|
||||
match, err := buildMatchWithFilter(p.filter.Match, mName)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to build filter %q for metric name %q: %s", p.filter.Match, mName, err)
|
||||
@@ -288,6 +303,9 @@ func (p *vmNativeProcessor) runBackfilling(ctx context.Context, tenantID string,
|
||||
}:
|
||||
}
|
||||
}
|
||||
if !p.disablePerMetricRequests {
|
||||
migrationMetricsProcessed.Inc()
|
||||
}
|
||||
}
|
||||
|
||||
close(filterCh)
|
||||
@@ -396,3 +414,18 @@ func buildMatchWithFilter(filter string, metricName string) (string, error) {
|
||||
match := "{" + strings.Join(filters, " or ") + "}"
|
||||
return match, nil
|
||||
}
|
||||
|
||||
var (
|
||||
migrationMetricsTotal = metrics.NewCounter(`vmctl_vm_native_migration_metrics_total`)
|
||||
migrationMetricsProcessed = metrics.NewCounter(`vmctl_vm_native_migration_metrics_processed`)
|
||||
|
||||
migrationRequestsPlanned = metrics.NewCounter(`vmctl_vm_native_migration_requests_planned`)
|
||||
migrationRequestsCompleted = metrics.NewCounter(`vmctl_vm_native_migration_requests_completed`)
|
||||
|
||||
migrationErrorsTotal = metrics.NewCounter(`vmctl_vm_native_migration_errors_total`)
|
||||
|
||||
migrationTenantsTotal = metrics.NewCounter(`vmctl_vm_native_migration_tenants_total`)
|
||||
migrationTenantsProcessed = metrics.NewCounter(`vmctl_vm_native_migration_tenants_processed`)
|
||||
|
||||
migrationBytesTransferredTotal = metrics.NewCounter(`vmctl_vm_native_migration_bytes_transferred_total`)
|
||||
)
|
||||
|
||||
@@ -182,6 +182,7 @@ func (ctx *InsertCtx) WriteMetadata(mmpbs []prompb.MetricMetadata) error {
|
||||
mm.Type = mmpb.Type
|
||||
mm.Unit = bytesutil.ToUnsafeBytes(mmpb.Unit)
|
||||
}
|
||||
ctx.mms = mms
|
||||
|
||||
err := vmstorage.AddMetadataRows(mms)
|
||||
if err != nil {
|
||||
@@ -206,6 +207,7 @@ func (ctx *InsertCtx) WritePromMetadata(mmps []prometheus.Metadata) error {
|
||||
mm.Help = bytesutil.ToUnsafeBytes(mmpb.Help)
|
||||
mm.Type = mmpb.Type
|
||||
}
|
||||
ctx.mms = mms
|
||||
|
||||
err := vmstorage.AddMetadataRows(mms)
|
||||
if err != nil {
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/searchutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
|
||||
@@ -196,12 +197,17 @@ func newNextSeriesForSearchQuery(ec *evalConfig, sq *storage.SearchQuery, expr g
|
||||
pathExpression: safePathExpression(expr),
|
||||
}
|
||||
s.summarize(aggrAvg, ec.startTime, ec.endTime, ec.storageStep, 0)
|
||||
t := timerpool.Get(30 * time.Second)
|
||||
|
||||
// A negative or zero duration will cause timer.C to return immediately
|
||||
remainingTimeout := ec.deadline.Deadline() - fasttime.UnixTimestamp()
|
||||
t := timerpool.Get(time.Duration(remainingTimeout) * time.Second)
|
||||
defer timerpool.Put(t)
|
||||
|
||||
select {
|
||||
case seriesCh <- s:
|
||||
case <-t.C:
|
||||
logger.Errorf("resource leak when processing the %s (full query: %s); please report this error to VictoriaMetrics developers",
|
||||
logger.Errorf("reached timeout when processing the %s (full query: %s), it can be due to the amount of storageNodes configured in vmselect is more than vmselect’s available CPU count "+
|
||||
"or vmselect is heavy loaded. Consider adding resources or increasing `-search.maxQueryDuration` or `timeout` parameter in the query.",
|
||||
expr.AppendString(nil), ec.originalQuery)
|
||||
}
|
||||
return nil
|
||||
|
||||
@@ -82,7 +82,7 @@ func RenderHandler(startTime time.Time, w http.ResponseWriter, r *http.Request)
|
||||
if s := r.FormValue("maxDataPoints"); len(s) > 0 {
|
||||
n, err := strconv.ParseFloat(s, 64)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot parse maxDataPoints=%q: %w", maxDataPoints, err)
|
||||
return fmt.Errorf("cannot parse maxDataPoints=%d: %w", maxDataPoints, err)
|
||||
}
|
||||
if n <= 0 {
|
||||
return fmt.Errorf("maxDataPoints must be greater than 0; got %f", n)
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"math"
|
||||
"sort"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
@@ -578,6 +579,7 @@ func mergeSortBlocks(dst *Result, sbh *sortBlocksHeap, dedupInterval int64) {
|
||||
return
|
||||
}
|
||||
heap.Init(sbh)
|
||||
var dedupSamples int
|
||||
for {
|
||||
sbs := sbh.sbs
|
||||
top := sbs[0]
|
||||
@@ -593,6 +595,7 @@ func mergeSortBlocks(dst *Result, sbh *sortBlocksHeap, dedupInterval int64) {
|
||||
if n := equalSamplesPrefix(top, sbNext); n > 0 && dedupInterval > 0 {
|
||||
// Skip n replicated samples at top if deduplication is enabled.
|
||||
top.NextIdx = topNextIdx + n
|
||||
dedupSamples += n
|
||||
} else {
|
||||
// Copy samples from top to dst with timestamps not exceeding tsNext.
|
||||
top.NextIdx = topNextIdx + binarySearchTimestamps(top.Timestamps[topNextIdx:], tsNext)
|
||||
@@ -607,8 +610,8 @@ func mergeSortBlocks(dst *Result, sbh *sortBlocksHeap, dedupInterval int64) {
|
||||
}
|
||||
}
|
||||
timestamps, values := storage.DeduplicateSamples(dst.Timestamps, dst.Values, dedupInterval)
|
||||
dedups := len(dst.Timestamps) - len(timestamps)
|
||||
dedupsDuringSelect.Add(dedups)
|
||||
dedupSamples += len(dst.Timestamps) - len(timestamps)
|
||||
dedupsDuringSelect.Add(dedupSamples)
|
||||
dst.Timestamps = timestamps
|
||||
dst.Values = values
|
||||
}
|
||||
@@ -634,7 +637,7 @@ func equalTimestampsPrefix(a, b []int64) int {
|
||||
|
||||
func equalValuesPrefix(a, b []float64) int {
|
||||
for i, v := range a {
|
||||
if i >= len(b) || v != b[i] {
|
||||
if i >= len(b) || math.Float64bits(v) != math.Float64bits(b[i]) {
|
||||
return i
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
package netstorage
|
||||
|
||||
import (
|
||||
"math"
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||
)
|
||||
|
||||
func TestMergeSortBlocks(t *testing.T) {
|
||||
@@ -194,3 +197,111 @@ func TestMergeSortBlocks(t *testing.T) {
|
||||
Values: []float64{7, 24, 26},
|
||||
})
|
||||
}
|
||||
|
||||
func TestEqualSamplesPrefix(t *testing.T) {
|
||||
f := func(a, b *sortBlock, expected int) {
|
||||
t.Helper()
|
||||
|
||||
actual := equalSamplesPrefix(a, b)
|
||||
if actual != expected {
|
||||
t.Fatalf("unexpected result: got %d, want %d", actual, expected)
|
||||
}
|
||||
}
|
||||
|
||||
// Empty blocks
|
||||
f(&sortBlock{}, &sortBlock{}, 0)
|
||||
|
||||
// Identical blocks
|
||||
f(&sortBlock{
|
||||
Timestamps: []int64{1, 2, 3, 4},
|
||||
Values: []float64{5, 6, 7, 8},
|
||||
}, &sortBlock{
|
||||
Timestamps: []int64{1, 2, 3, 4},
|
||||
Values: []float64{5, 6, 7, 8},
|
||||
}, 4)
|
||||
|
||||
// Non-zero NextIdx
|
||||
f(&sortBlock{
|
||||
Timestamps: []int64{1, 2, 3, 4},
|
||||
Values: []float64{5, 6, 7, 8},
|
||||
NextIdx: 2,
|
||||
}, &sortBlock{
|
||||
Timestamps: []int64{10, 20, 3, 4},
|
||||
Values: []float64{50, 60, 7, 8},
|
||||
NextIdx: 2,
|
||||
}, 2)
|
||||
|
||||
// Non-zero NextIdx with mismatch
|
||||
f(&sortBlock{
|
||||
Timestamps: []int64{1, 2, 3, 4},
|
||||
Values: []float64{5, 6, 7, 8},
|
||||
NextIdx: 1,
|
||||
}, &sortBlock{
|
||||
Timestamps: []int64{10, 2, 3, 4},
|
||||
Values: []float64{50, 6, 7, 80},
|
||||
NextIdx: 1,
|
||||
}, 2)
|
||||
|
||||
// Different lengths
|
||||
f(&sortBlock{
|
||||
Timestamps: []int64{1, 2, 3, 4},
|
||||
Values: []float64{5, 6, 7, 8},
|
||||
}, &sortBlock{
|
||||
Timestamps: []int64{1, 2, 3},
|
||||
Values: []float64{5, 6, 7},
|
||||
}, 3)
|
||||
|
||||
// Timestamps diverge
|
||||
f(&sortBlock{
|
||||
Timestamps: []int64{1, 2, 3, 4},
|
||||
Values: []float64{5, 6, 7, 8},
|
||||
}, &sortBlock{
|
||||
Timestamps: []int64{1, 2, 30, 4},
|
||||
Values: []float64{5, 6, 7, 8},
|
||||
}, 2)
|
||||
|
||||
// Values diverge
|
||||
f(&sortBlock{
|
||||
Timestamps: []int64{1, 2, 3, 4},
|
||||
Values: []float64{5, 6, 7, 8},
|
||||
}, &sortBlock{
|
||||
Timestamps: []int64{1, 2, 3, 4},
|
||||
Values: []float64{5, 60, 7, 8},
|
||||
}, 1)
|
||||
|
||||
// Zero matches
|
||||
f(&sortBlock{
|
||||
Timestamps: []int64{1, 2, 3, 4},
|
||||
Values: []float64{5, 6, 7, 8},
|
||||
}, &sortBlock{
|
||||
Timestamps: []int64{5, 6, 7, 8},
|
||||
Values: []float64{1, 2, 3, 4},
|
||||
}, 0)
|
||||
|
||||
// Compare staleness markers, matching
|
||||
f(&sortBlock{
|
||||
Timestamps: []int64{1, 2, 3, 4},
|
||||
Values: []float64{5, decimal.StaleNaN, 7, 8},
|
||||
}, &sortBlock{
|
||||
Timestamps: []int64{1, 2, 3, 4},
|
||||
Values: []float64{5, decimal.StaleNaN, 7, 8},
|
||||
}, 4)
|
||||
|
||||
// Special float values: +Inf, -Inf, 0, -0
|
||||
f(&sortBlock{
|
||||
Timestamps: []int64{1, 2, 3, 4},
|
||||
Values: []float64{math.Inf(1), math.Inf(-1), math.Copysign(0, +1), math.Copysign(0, -1)},
|
||||
}, &sortBlock{
|
||||
Timestamps: []int64{1, 2, 3, 4},
|
||||
Values: []float64{math.Inf(1), math.Inf(-1), math.Copysign(0, +1), math.Copysign(0, -1)},
|
||||
}, 4)
|
||||
|
||||
// Positive zero vs negative zero (bitwise different)
|
||||
f(&sortBlock{
|
||||
Timestamps: []int64{1, 2},
|
||||
Values: []float64{5, math.Copysign(0, +1)},
|
||||
}, &sortBlock{
|
||||
Timestamps: []int64{1, 2},
|
||||
Values: []float64{5, math.Copysign(0, -1)},
|
||||
}, 1)
|
||||
}
|
||||
|
||||
@@ -1713,6 +1713,7 @@ func evalRollupFuncNoCache(qt *querytracer.Tracer, ec *EvalConfig, funcName stri
|
||||
return nil, err
|
||||
}
|
||||
defer rml.Put(uint64(rollupMemorySize))
|
||||
qs.addMemoryUsage(rollupMemorySize)
|
||||
qt.Printf("the rollup evaluation needs an estimated %d bytes of RAM for %d series and %d points per series (summary %d points)",
|
||||
rollupMemorySize, timeseriesLen, pointsPerSeries, rollupPoints)
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ func Exec(qt *querytracer.Tracer, ec *EvalConfig, q string, isFirstPointOnly boo
|
||||
if querystats.Enabled() {
|
||||
startTime := time.Now()
|
||||
defer func() {
|
||||
querystats.RegisterQuery(q, ec.End-ec.Start, startTime)
|
||||
querystats.RegisterQuery(q, ec.End-ec.Start, startTime, ec.QueryStats.memoryUsage())
|
||||
ec.QueryStats.addExecutionTimeMsec(startTime)
|
||||
}()
|
||||
}
|
||||
|
||||
@@ -13,6 +13,8 @@ type QueryStats struct {
|
||||
ExecutionDuration atomic.Pointer[time.Duration]
|
||||
// SeriesFetched contains the number of series fetched from storage or cache.
|
||||
SeriesFetched atomic.Int64
|
||||
// MemoryUsage contains the estimated memory consumption of the query
|
||||
MemoryUsage atomic.Int64
|
||||
|
||||
at *auth.Token
|
||||
|
||||
@@ -53,3 +55,17 @@ func (qs *QueryStats) addExecutionTimeMsec(startTime time.Time) {
|
||||
d := time.Since(startTime)
|
||||
qs.ExecutionDuration.Store(&d)
|
||||
}
|
||||
|
||||
func (qs *QueryStats) addMemoryUsage(memoryUsage int64) {
|
||||
if qs == nil {
|
||||
return
|
||||
}
|
||||
qs.MemoryUsage.Store(memoryUsage)
|
||||
}
|
||||
|
||||
func (qs *QueryStats) memoryUsage() int64 {
|
||||
if qs == nil {
|
||||
return 0
|
||||
}
|
||||
return qs.MemoryUsage.Load()
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/stringsutil"
|
||||
)
|
||||
@@ -15,7 +16,8 @@ import (
|
||||
var (
|
||||
lastQueriesCount = flag.Int("search.queryStats.lastQueriesCount", 20000, "Query stats for /api/v1/status/top_queries is tracked on this number of last queries. "+
|
||||
"Zero value disables query stats tracking")
|
||||
minQueryDuration = flag.Duration("search.queryStats.minQueryDuration", time.Millisecond, "The minimum duration for queries to track in query stats at /api/v1/status/top_queries. Queries with lower duration are ignored in query stats")
|
||||
minQueryDuration = flag.Duration("search.queryStats.minQueryDuration", time.Millisecond, "The minimum duration for queries to track in query stats at /api/v1/status/top_queries. Queries with lower duration are ignored in query stats")
|
||||
minQueryMemoryUsage = flagutil.NewBytes("search.queryStats.minQueryMemoryUsage", 1024, "The minimum memory bytes consumption for queries to track in query stats at /api/v1/status/top_queries. Queries with lower memory bytes consumption are ignored in query stats")
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -31,9 +33,9 @@ func Enabled() bool {
|
||||
// RegisterQuery registers the query on the given timeRangeMsecs, which has been started at startTime.
|
||||
//
|
||||
// RegisterQuery must be called when the query is finished.
|
||||
func RegisterQuery(query string, timeRangeMsecs int64, startTime time.Time) {
|
||||
func RegisterQuery(query string, timeRangeMsecs int64, startTime time.Time, memoryUsage int64) {
|
||||
initOnce.Do(initQueryStats)
|
||||
qsTracker.registerQuery(query, timeRangeMsecs, startTime)
|
||||
qsTracker.registerQuery(query, timeRangeMsecs, startTime, memoryUsage)
|
||||
}
|
||||
|
||||
// WriteJSONQueryStats writes query stats to given writer in json format.
|
||||
@@ -54,6 +56,7 @@ type queryStatRecord struct {
|
||||
timeRangeSecs int64
|
||||
registerTime time.Time
|
||||
duration time.Duration
|
||||
memoryUsage int64
|
||||
}
|
||||
|
||||
type queryStatKey struct {
|
||||
@@ -66,8 +69,8 @@ func initQueryStats() {
|
||||
if recordsCount <= 0 {
|
||||
recordsCount = 1
|
||||
} else {
|
||||
logger.Infof("enabled query stats tracking at `/api/v1/status/top_queries` with -search.queryStats.lastQueriesCount=%d, -search.queryStats.minQueryDuration=%s",
|
||||
*lastQueriesCount, *minQueryDuration)
|
||||
logger.Infof("enabled query stats tracking at `/api/v1/status/top_queries` with -search.queryStats.lastQueriesCount=%d, -search.queryStats.minQueryDuration=%s, -search.queryStats.minQueryMemoryUsage=%s",
|
||||
*lastQueriesCount, *minQueryDuration, minQueryMemoryUsage)
|
||||
}
|
||||
qsTracker = &queryStatsTracker{
|
||||
a: make([]queryStatRecord, recordsCount),
|
||||
@@ -78,6 +81,7 @@ func (qst *queryStatsTracker) writeJSONQueryStats(w io.Writer, topN int, maxLife
|
||||
fmt.Fprintf(w, `{"topN":"%d","maxLifetime":"%s",`, topN, maxLifetime)
|
||||
fmt.Fprintf(w, `"search.queryStats.lastQueriesCount":%d,`, *lastQueriesCount)
|
||||
fmt.Fprintf(w, `"search.queryStats.minQueryDuration":"%s",`, *minQueryDuration)
|
||||
fmt.Fprintf(w, `"search.queryStats.minQueryMemoryUsage":"%s",`, minQueryMemoryUsage)
|
||||
fmt.Fprintf(w, `"topByCount":[`)
|
||||
topByCount := qst.getTopByCount(topN, maxLifetime)
|
||||
for i, r := range topByCount {
|
||||
@@ -102,15 +106,28 @@ func (qst *queryStatsTracker) writeJSONQueryStats(w io.Writer, topN int, maxLife
|
||||
fmt.Fprintf(w, `,`)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Fprintf(w, `],"topByAvgMemoryUsage":[`)
|
||||
topByAvgMemoryConsumption := qst.getTopByAvgMemoryUsage(topN, maxLifetime)
|
||||
for i, r := range topByAvgMemoryConsumption {
|
||||
fmt.Fprintf(w, `{"query":%s,"timeRangeSeconds":%d,"avgMemoryBytes":%d,"count":%d}`, stringsutil.JSONString(r.query), r.timeRangeSecs, r.memoryUsage, r.count)
|
||||
if i+1 < len(topByAvgMemoryConsumption) {
|
||||
fmt.Fprintf(w, `,`)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Fprintf(w, `]}`)
|
||||
}
|
||||
|
||||
func (qst *queryStatsTracker) registerQuery(query string, timeRangeMsecs int64, startTime time.Time) {
|
||||
func (qst *queryStatsTracker) registerQuery(query string, timeRangeMsecs int64, startTime time.Time, memoryUsage int64) {
|
||||
registerTime := time.Now()
|
||||
duration := registerTime.Sub(startTime)
|
||||
if duration < *minQueryDuration {
|
||||
return
|
||||
}
|
||||
if memoryUsage < int64(minQueryMemoryUsage.IntN()) {
|
||||
return
|
||||
}
|
||||
|
||||
qst.mu.Lock()
|
||||
defer qst.mu.Unlock()
|
||||
@@ -126,6 +143,7 @@ func (qst *queryStatsTracker) registerQuery(query string, timeRangeMsecs int64,
|
||||
r.timeRangeSecs = timeRangeMsecs / 1000
|
||||
r.registerTime = registerTime
|
||||
r.duration = duration
|
||||
r.memoryUsage = memoryUsage
|
||||
}
|
||||
|
||||
func (r *queryStatRecord) matches(currentTime time.Time, maxLifetime time.Duration) bool {
|
||||
@@ -257,3 +275,47 @@ func (qst *queryStatsTracker) getTopBySumDuration(topN int, maxLifetime time.Dur
|
||||
}
|
||||
return a
|
||||
}
|
||||
|
||||
type queryStatByMemory struct {
|
||||
query string
|
||||
timeRangeSecs int64
|
||||
memoryUsage int64
|
||||
count int
|
||||
}
|
||||
|
||||
func (qst *queryStatsTracker) getTopByAvgMemoryUsage(topN int, maxLifetime time.Duration) []queryStatByMemory {
|
||||
currentTime := time.Now()
|
||||
qst.mu.Lock()
|
||||
type countSum struct {
|
||||
count int
|
||||
sum int64
|
||||
}
|
||||
m := make(map[queryStatKey]countSum)
|
||||
for _, r := range qst.a {
|
||||
if r.matches(currentTime, maxLifetime) {
|
||||
k := r.key()
|
||||
ks := m[k]
|
||||
ks.count++
|
||||
ks.sum += r.memoryUsage
|
||||
m[k] = ks
|
||||
}
|
||||
}
|
||||
qst.mu.Unlock()
|
||||
|
||||
var a []queryStatByMemory
|
||||
for k, ks := range m {
|
||||
a = append(a, queryStatByMemory{
|
||||
query: k.query,
|
||||
timeRangeSecs: k.timeRangeSecs,
|
||||
memoryUsage: ks.sum / int64(ks.count),
|
||||
count: ks.count,
|
||||
})
|
||||
}
|
||||
sort.Slice(a, func(i, j int) bool {
|
||||
return a[i].memoryUsage > a[j].memoryUsage
|
||||
})
|
||||
if len(a) > topN {
|
||||
a = a[:topN]
|
||||
}
|
||||
return a
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@ aliases:
|
||||
- /MetricsQL.html
|
||||
- /metricsql/index.html
|
||||
- /metricsql/
|
||||
- /MetricsQL/
|
||||
---
|
||||
[VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics) implements MetricsQL -
|
||||
query language inspired by [PromQL](https://prometheus.io/docs/prometheus/latest/querying/basics/).
|
||||
209
app/vmselect/vmui/assets/index-C1hTBemk.js
Normal file
1
app/vmselect/vmui/assets/index-D7CzMv1O.css
Normal file
@@ -37,10 +37,10 @@
|
||||
<meta property="og:title" content="UI for VictoriaMetrics">
|
||||
<meta property="og:url" content="https://victoriametrics.com/">
|
||||
<meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data">
|
||||
<script type="module" crossorigin src="./assets/index-B6lol36n.js"></script>
|
||||
<link rel="modulepreload" crossorigin href="./assets/vendor-EZef-S_8.js">
|
||||
<script type="module" crossorigin src="./assets/index-C1hTBemk.js"></script>
|
||||
<link rel="modulepreload" crossorigin href="./assets/vendor-BR6Q0Fin.js">
|
||||
<link rel="stylesheet" crossorigin href="./assets/vendor-D1GxaB_c.css">
|
||||
<link rel="stylesheet" crossorigin href="./assets/index-VQRcNK83.css">
|
||||
<link rel="stylesheet" crossorigin href="./assets/index-D7CzMv1O.css">
|
||||
</head>
|
||||
<body>
|
||||
<noscript>You need to enable JavaScript to run this app.</noscript>
|
||||
|
||||
@@ -655,6 +655,7 @@ func writeStorageMetrics(w io.Writer, strg *storage.Storage) {
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_entries{type="indexdb/metricID"}`, idbm.MetricIDCacheSize)
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_entries{type="indexdb/date_metricID"}`, idbm.DateMetricIDCacheSize)
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_entries{type="indexdb/tagFiltersToMetricIDs"}`, idbm.TagFiltersToMetricIDsCacheSize)
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_entries{type="indexdb/tagFiltersLoops"}`, idbm.LoopsPerDateTagFilterCacheSize)
|
||||
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_size_bytes{type="storage/indexBlocks"}`, tm.IndexBlocksCacheSizeBytes)
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_size_bytes{type="storage/tsid"}`, m.TSIDCacheSizeBytes)
|
||||
@@ -670,6 +671,7 @@ func writeStorageMetrics(w io.Writer, strg *storage.Storage) {
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_size_bytes{type="indexdb/dataBlocksSparse"}`, idbm.DataBlocksSparseCacheSizeBytes)
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_size_bytes{type="indexdb/indexBlocks"}`, idbm.IndexBlocksCacheSizeBytes)
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_size_bytes{type="indexdb/tagFiltersToMetricIDs"}`, idbm.TagFiltersToMetricIDsCacheSizeBytes)
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_size_bytes{type="indexdb/tagFiltersLoops"}`, idbm.LoopsPerDateTagFilterCacheSizeBytes)
|
||||
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_size_max_bytes{type="storage/indexBlocks"}`, tm.IndexBlocksCacheSizeMaxBytes)
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_size_max_bytes{type="storage/tsid"}`, m.TSIDCacheSizeMaxBytes)
|
||||
@@ -681,6 +683,7 @@ func writeStorageMetrics(w io.Writer, strg *storage.Storage) {
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_size_max_bytes{type="indexdb/dataBlocksSparse"}`, idbm.DataBlocksSparseCacheSizeMaxBytes)
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_size_max_bytes{type="indexdb/indexBlocks"}`, idbm.IndexBlocksCacheSizeMaxBytes)
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_size_max_bytes{type="indexdb/tagFiltersToMetricIDs"}`, idbm.TagFiltersToMetricIDsCacheSizeMaxBytes)
|
||||
metrics.WriteGaugeUint64(w, `vm_cache_size_max_bytes{type="indexdb/tagFiltersLoops"}`, idbm.LoopsPerDateTagFilterCacheSizeMaxBytes)
|
||||
|
||||
metrics.WriteCounterUint64(w, `vm_cache_requests_total{type="storage/indexBlocks"}`, tm.IndexBlocksCacheRequests)
|
||||
metrics.WriteCounterUint64(w, `vm_cache_requests_total{type="storage/tsid"}`, m.TSIDCacheRequests)
|
||||
@@ -692,6 +695,7 @@ func writeStorageMetrics(w io.Writer, strg *storage.Storage) {
|
||||
metrics.WriteCounterUint64(w, `vm_cache_requests_total{type="indexdb/dataBlocksSparse"}`, idbm.DataBlocksSparseCacheRequests)
|
||||
metrics.WriteCounterUint64(w, `vm_cache_requests_total{type="indexdb/indexBlocks"}`, idbm.IndexBlocksCacheRequests)
|
||||
metrics.WriteCounterUint64(w, `vm_cache_requests_total{type="indexdb/tagFiltersToMetricIDs"}`, idbm.TagFiltersToMetricIDsCacheRequests)
|
||||
metrics.WriteCounterUint64(w, `vm_cache_requests_total{type="indexdb/tagFiltersLoops"}`, idbm.LoopsPerDateTagFilterCacheRequests)
|
||||
|
||||
metrics.WriteCounterUint64(w, `vm_cache_misses_total{type="storage/indexBlocks"}`, tm.IndexBlocksCacheMisses)
|
||||
metrics.WriteCounterUint64(w, `vm_cache_misses_total{type="storage/tsid"}`, m.TSIDCacheMisses)
|
||||
@@ -703,6 +707,7 @@ func writeStorageMetrics(w io.Writer, strg *storage.Storage) {
|
||||
metrics.WriteCounterUint64(w, `vm_cache_misses_total{type="indexdb/dataBlocksSparse"}`, idbm.DataBlocksSparseCacheMisses)
|
||||
metrics.WriteCounterUint64(w, `vm_cache_misses_total{type="indexdb/indexBlocks"}`, idbm.IndexBlocksCacheMisses)
|
||||
metrics.WriteCounterUint64(w, `vm_cache_misses_total{type="indexdb/tagFiltersToMetricIDs"}`, idbm.TagFiltersToMetricIDsCacheMisses)
|
||||
metrics.WriteCounterUint64(w, `vm_cache_misses_total{type="indexdb/tagFiltersLoops"}`, idbm.LoopsPerDateTagFilterCacheMisses)
|
||||
|
||||
metrics.WriteCounterUint64(w, `vm_cache_resets_total{type="indexdb/tagFiltersToMetricIDs"}`, idbm.TagFiltersToMetricIDsCacheResets)
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
FROM golang:1.25.6 AS build-web-stage
|
||||
FROM golang:1.26.0 AS build-web-stage
|
||||
COPY build /build
|
||||
|
||||
WORKDIR /build
|
||||
@@ -6,7 +6,7 @@ COPY web/ /build/
|
||||
RUN GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -o web-amd64 github.com/VictoriMetrics/vmui/ && \
|
||||
GOOS=windows GOARCH=amd64 CGO_ENABLED=0 go build -o web-windows github.com/VictoriMetrics/vmui/
|
||||
|
||||
FROM alpine:3.22.2
|
||||
FROM alpine:3.23.3
|
||||
USER root
|
||||
|
||||
COPY --from=build-web-stage /build/web-amd64 /app/web
|
||||
|
||||
@@ -2,9 +2,6 @@
|
||||
copy-metricsql-docs:
|
||||
cp docs/victoriametrics/MetricsQL.md app/vmui/packages/vmui/src/assets/MetricsQL.md
|
||||
|
||||
vmui-package-base-image:
|
||||
docker build -t vmui-builder-image -f app/vmui/Dockerfile-build ./app/vmui
|
||||
|
||||
vmui-run-npm-command: vmui-package-base-image
|
||||
docker run --rm \
|
||||
--user $(shell id -u):$(shell id -g) \
|
||||
|
||||
567
app/vmui/packages/vmui/package-lock.json
generated
@@ -24,10 +24,10 @@
|
||||
"dayjs": "^1.11.19",
|
||||
"lodash.debounce": "^4.0.8",
|
||||
"marked": "^17.0.1",
|
||||
"preact": "^10.28.2",
|
||||
"preact": "^10.28.3",
|
||||
"qs": "^6.14.1",
|
||||
"react-input-mask": "^2.0.4",
|
||||
"react-router-dom": "^7.12.0",
|
||||
"react-router-dom": "^7.13.0",
|
||||
"uplot": "^1.6.32",
|
||||
"vite": "^7.3.1",
|
||||
"web-vitals": "^5.1.0"
|
||||
@@ -35,29 +35,29 @@
|
||||
"devDependencies": {
|
||||
"@eslint/eslintrc": "^3.3.3",
|
||||
"@eslint/js": "^9.39.2",
|
||||
"@preact/preset-vite": "^2.10.2",
|
||||
"@preact/preset-vite": "^2.10.3",
|
||||
"@testing-library/jest-dom": "^6.9.1",
|
||||
"@testing-library/preact": "^3.2.4",
|
||||
"@types/lodash.debounce": "^4.0.9",
|
||||
"@types/node": "^25.0.8",
|
||||
"@types/node": "^25.2.0",
|
||||
"@types/qs": "^6.14.0",
|
||||
"@types/react": "^19.2.8",
|
||||
"@types/react": "^19.2.10",
|
||||
"@types/react-input-mask": "^3.0.6",
|
||||
"@types/react-router-dom": "^5.3.3",
|
||||
"@typescript-eslint/eslint-plugin": "^8.53.0",
|
||||
"@typescript-eslint/parser": "^8.53.0",
|
||||
"@typescript-eslint/eslint-plugin": "^8.54.0",
|
||||
"@typescript-eslint/parser": "^8.54.0",
|
||||
"cross-env": "^10.1.0",
|
||||
"eslint": "^9.39.2",
|
||||
"eslint-plugin-react": "^7.37.5",
|
||||
"eslint-plugin-unused-imports": "^4.3.0",
|
||||
"globals": "^17.0.0",
|
||||
"globals": "^17.3.0",
|
||||
"http-proxy-middleware": "^3.0.5",
|
||||
"jsdom": "^27.4.0",
|
||||
"jsdom": "^28.0.0",
|
||||
"postcss": "^8.5.6",
|
||||
"rollup-plugin-visualizer": "^6.0.5",
|
||||
"sass-embedded": "^1.97.2",
|
||||
"sass-embedded": "^1.97.3",
|
||||
"typescript": "^5.9.3",
|
||||
"vitest": "^4.0.17"
|
||||
"vitest": "^4.0.18"
|
||||
},
|
||||
"browserslist": {
|
||||
"production": [
|
||||
|
||||
@@ -12,6 +12,7 @@ aliases:
|
||||
- /MetricsQL.html
|
||||
- /metricsql/index.html
|
||||
- /metricsql/
|
||||
- /MetricsQL/
|
||||
---
|
||||
[VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics) implements MetricsQL -
|
||||
query language inspired by [PromQL](https://prometheus.io/docs/prometheus/latest/querying/basics/).
|
||||
|
||||
@@ -6,6 +6,7 @@ import { QueryContextType } from "../../../types";
|
||||
import { AUTOCOMPLETE_LIMITS } from "../../../constants/queryAutocomplete";
|
||||
import { QueryEditorAutocompleteProps } from "./QueryEditor";
|
||||
import { getExprLastPart, getValueByContext, getContext } from "./autocompleteUtils";
|
||||
import { extractCurrentLabel, extractLabelMatchers, extractMetric, splitByCursor } from "./utils/parser";
|
||||
|
||||
const QueryEditorAutocomplete: FC<QueryEditorAutocompleteProps> = ({
|
||||
value,
|
||||
@@ -20,45 +21,39 @@ const QueryEditorAutocomplete: FC<QueryEditorAutocompleteProps> = ({
|
||||
const metricsqlFunctions = useGetMetricsQL(includeFunctions);
|
||||
|
||||
const values = useMemo(() => {
|
||||
if (caretPosition[0] !== caretPosition[1]) return { beforeCursor: value, afterCursor: "" };
|
||||
const beforeCursor = value.substring(0, caretPosition[0]);
|
||||
const afterCursor = value.substring(caretPosition[1]);
|
||||
return { beforeCursor, afterCursor };
|
||||
return splitByCursor(value, caretPosition);
|
||||
}, [value, caretPosition]);
|
||||
|
||||
const exprLastPart = useMemo(() => getExprLastPart(values.beforeCursor), [values]);
|
||||
const exprLastPart = useMemo(() => {
|
||||
return getExprLastPart(values.beforeCursor);
|
||||
}, [values.beforeCursor]);
|
||||
|
||||
const metric = useMemo(() => {
|
||||
const regex1 = /\w+\((?<metricName>[^)]+)\)\s+(by|without|on|ignoring)\s*\(\w*/gi;
|
||||
const matchAlt = [...exprLastPart.matchAll(regex1)];
|
||||
if (matchAlt.length > 0 && matchAlt[0].groups && matchAlt[0].groups.metricName) {
|
||||
return matchAlt[0].groups.metricName;
|
||||
}
|
||||
|
||||
const regex2 = /^\s*\b(?<metricName>[^{}(),\s]+)(?={|$)/g;
|
||||
const match = [...exprLastPart.matchAll(regex2)];
|
||||
if (match.length > 0 && match[0].groups && match[0].groups.metricName) {
|
||||
return match[0].groups.metricName;
|
||||
}
|
||||
|
||||
return "";
|
||||
return extractMetric(exprLastPart);
|
||||
}, [exprLastPart]);
|
||||
|
||||
const label = useMemo(() => {
|
||||
const regexp = /[a-z_:-][\w\-.:/]*\b(?=\s*(=|!=|=~|!~))/g;
|
||||
const match = exprLastPart.match(regexp);
|
||||
return match ? match[match.length - 1] : "";
|
||||
return extractCurrentLabel(exprLastPart);
|
||||
}, [exprLastPart]);
|
||||
|
||||
const context = useMemo(() => getContext(values.beforeCursor, metric, label), [values, metric, label]);
|
||||
const context = useMemo(() => {
|
||||
return getContext(values.beforeCursor, metric, label);
|
||||
}, [values.beforeCursor, metric, label]);
|
||||
|
||||
const valueByContext = useMemo(() => getValueByContext(values.beforeCursor), [values.beforeCursor]);
|
||||
const valueByContext = useMemo(() => {
|
||||
return getValueByContext(values.beforeCursor);
|
||||
}, [values.beforeCursor]);
|
||||
|
||||
const labelMatchers = useMemo(() => {
|
||||
return extractLabelMatchers(values.beforeCursor, label);
|
||||
}, [values.beforeCursor, label]);
|
||||
|
||||
const { metrics, labels, labelValues, loading } = useFetchQueryOptions({
|
||||
valueByContext,
|
||||
metric,
|
||||
label,
|
||||
context,
|
||||
labelMatchers,
|
||||
});
|
||||
|
||||
const options = useMemo(() => {
|
||||
@@ -72,18 +67,18 @@ const QueryEditorAutocomplete: FC<QueryEditorAutocompleteProps> = ({
|
||||
default:
|
||||
return [];
|
||||
}
|
||||
}, [context, metrics, labels, labelValues]);
|
||||
}, [context, metrics, labels, labelValues, metricsqlFunctions]);
|
||||
|
||||
const handleSelect = useCallback((insert: string) => {
|
||||
// Find the start and end of valueByContext in the query string
|
||||
const value = values.beforeCursor;
|
||||
const beforeCursor = values.beforeCursor;
|
||||
let valueAfterCursor = values.afterCursor;
|
||||
const startIndexOfValueByContext = value.lastIndexOf(valueByContext, caretPosition[0]);
|
||||
const startIndexOfValueByContext = beforeCursor.lastIndexOf(valueByContext, caretPosition[0]);
|
||||
const endIndexOfValueByContext = startIndexOfValueByContext + valueByContext.length;
|
||||
|
||||
// Split the original string into parts: before, during, and after valueByContext
|
||||
const beforeValueByContext = value.substring(0, startIndexOfValueByContext);
|
||||
const afterValueByContext = value.substring(endIndexOfValueByContext);
|
||||
const beforeValueByContext = beforeCursor.substring(0, startIndexOfValueByContext);
|
||||
const afterValueByContext = beforeCursor.substring(endIndexOfValueByContext);
|
||||
|
||||
// Add quotes around the value if the context is labelValue
|
||||
if (context === QueryContextType.labelValue) {
|
||||
@@ -104,7 +99,7 @@ const QueryEditorAutocomplete: FC<QueryEditorAutocompleteProps> = ({
|
||||
// Assemble the new value with the inserted text
|
||||
const newVal = `${beforeValueByContext}${insert}${afterValueByContext}${valueAfterCursor}`;
|
||||
onSelect(newVal, beforeValueByContext.length + insert.length);
|
||||
}, [values]);
|
||||
}, [values.beforeCursor, values.afterCursor, valueByContext, caretPosition, context, onSelect]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!anchorEl.current) {
|
||||
@@ -142,7 +137,7 @@ const QueryEditorAutocomplete: FC<QueryEditorAutocompleteProps> = ({
|
||||
|
||||
span.remove();
|
||||
marker.remove();
|
||||
}, [anchorEl, caretPosition, hasHelperText]);
|
||||
}, [anchorEl, caretPosition, hasHelperText, values.beforeCursor, values.afterCursor]);
|
||||
|
||||
return (
|
||||
<>
|
||||
|
||||
@@ -0,0 +1,139 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import {
|
||||
splitByCursor,
|
||||
extractMetric,
|
||||
extractCurrentLabel,
|
||||
extractLabelMatchers,
|
||||
} from "./parser";
|
||||
|
||||
describe("splitByCursor", () => {
|
||||
it("splits by caret when selection is collapsed", () => {
|
||||
const res = splitByCursor("abcdef", [2, 2]);
|
||||
expect(res).toEqual({ beforeCursor: "ab", afterCursor: "cdef" });
|
||||
});
|
||||
|
||||
it("returns whole value as beforeCursor when selection is not collapsed", () => {
|
||||
const res = splitByCursor("abcdef", [1, 3]);
|
||||
expect(res).toEqual({ beforeCursor: "abcdef", afterCursor: "" });
|
||||
});
|
||||
|
||||
it("handles caret at 0", () => {
|
||||
const res = splitByCursor("abc", [0, 0]);
|
||||
expect(res).toEqual({ beforeCursor: "", afterCursor: "abc" });
|
||||
});
|
||||
|
||||
it("handles caret at end", () => {
|
||||
const res = splitByCursor("abc", [3, 3]);
|
||||
expect(res).toEqual({ beforeCursor: "abc", afterCursor: "" });
|
||||
});
|
||||
|
||||
it("treats reversed selection as non-collapsed (browser may return [end,start])", () => {
|
||||
const res = splitByCursor("abcdef", [4, 2]);
|
||||
expect(res).toEqual({ beforeCursor: "abcdef", afterCursor: "" });
|
||||
});
|
||||
});
|
||||
|
||||
describe("extractMetric", () => {
|
||||
it("extracts metric from plain selector", () => {
|
||||
expect(extractMetric("kube_pod_info{job=\"x\"}")).toBe("kube_pod_info");
|
||||
});
|
||||
|
||||
it("extracts metric from plain expr with leading spaces", () => {
|
||||
expect(extractMetric(" http_requests_total")).toBe("http_requests_total");
|
||||
});
|
||||
|
||||
it("extracts metric from expr with braces right after metric", () => {
|
||||
expect(extractMetric("foo_bar{a=\"b\"}")).toBe("foo_bar");
|
||||
});
|
||||
|
||||
it("extracts metric before grouping modifiers (by/without/on/ignoring)", () => {
|
||||
expect(extractMetric("sum(kube_pod_info) by (pod)")).toBe("kube_pod_info");
|
||||
expect(extractMetric("sum(kube_pod_info) without (pod)")).toBe("kube_pod_info");
|
||||
expect(extractMetric("sum(kube_pod_info) on (pod)")).toBe("kube_pod_info");
|
||||
expect(extractMetric("sum(kube_pod_info) ignoring (pod)")).toBe("kube_pod_info");
|
||||
});
|
||||
|
||||
it("returns empty string when no metric found", () => {
|
||||
expect(extractMetric("{job=\"x\"}")).toBe("");
|
||||
expect(extractMetric("")).toBe("");
|
||||
expect(extractMetric("()")).toBe("");
|
||||
});
|
||||
});
|
||||
|
||||
describe("extractCurrentLabel", () => {
|
||||
it("returns last label before operator", () => {
|
||||
expect(extractCurrentLabel("metric{job=\"foo\", instance=\"bar\"}")).toBe(
|
||||
"instance"
|
||||
);
|
||||
});
|
||||
|
||||
it("supports spaces around operator", () => {
|
||||
expect(extractCurrentLabel("metric{job=\"foo\", instance = \"bar\"}")).toBe(
|
||||
"instance"
|
||||
);
|
||||
});
|
||||
|
||||
it("supports regexp operators", () => {
|
||||
expect(extractCurrentLabel("metric{pod=~\"api-.*\",namespace=\"dev\"}")).toBe(
|
||||
"namespace"
|
||||
);
|
||||
});
|
||||
|
||||
it("supports label chars : - . /", () => {
|
||||
expect(extractCurrentLabel("m{foo-bar.baz/qux=\"1\"}")).toBe("foo-bar.baz/qux");
|
||||
});
|
||||
|
||||
it("returns empty string when no label pattern", () => {
|
||||
expect(extractCurrentLabel("metric{}").trim()).toBe("");
|
||||
expect(extractCurrentLabel("metric")).toBe("");
|
||||
});
|
||||
});
|
||||
|
||||
describe("extractLabelMatchers", () => {
|
||||
it("returns all matchers (quoted only)", () => {
|
||||
const expr = "metric{job=\"foo\", instance=\"bar\"}";
|
||||
expect(extractLabelMatchers(expr)).toEqual(["job=\"foo\"", "instance=\"bar\""]);
|
||||
});
|
||||
|
||||
it("keeps original spacing", () => {
|
||||
const expr = "metric{ job = \"foo\" , instance = \"bar\" }";
|
||||
expect(extractLabelMatchers(expr)).toEqual(["job = \"foo\"", "instance = \"bar\""]);
|
||||
});
|
||||
|
||||
it("supports !=, =~, !~", () => {
|
||||
const expr = "m{env!=\"prod\",pod=~\"api-.*\",zone!~\"eu-.*\"}";
|
||||
expect(extractLabelMatchers(expr)).toEqual([
|
||||
"env!=\"prod\"",
|
||||
"pod=~\"api-.*\"",
|
||||
"zone!~\"eu-.*\"",
|
||||
]);
|
||||
});
|
||||
|
||||
it("excludes only the specified currentLabel matcher (exact label, not prefix)", () => {
|
||||
const expr = "m{job=\"foo\", instance=\"bar\", pod=~\"api-.*\"}";
|
||||
expect(extractLabelMatchers(expr, "instance")).toEqual([
|
||||
"job=\"foo\"",
|
||||
"pod=~\"api-.*\"",
|
||||
]);
|
||||
});
|
||||
|
||||
it("does not exclude other labels that share a prefix with currentLabel", () => {
|
||||
const expr = "m{instance=\"bar\", insight=\"x\"}";
|
||||
expect(extractLabelMatchers(expr, "insight")).toEqual(["instance=\"bar\""]);
|
||||
});
|
||||
|
||||
it("excludes currentLabel matcher even with spaces around operator", () => {
|
||||
const expr = "m{job=\"foo\", instance = \"bar\"}";
|
||||
expect(extractLabelMatchers(expr, "instance")).toEqual(["job=\"foo\""]);
|
||||
});
|
||||
|
||||
it("returns [] when no matchers", () => {
|
||||
expect(extractLabelMatchers("m{}")).toEqual([]);
|
||||
expect(extractLabelMatchers("m")).toEqual([]);
|
||||
});
|
||||
|
||||
it("does not include unclosed quotes", () => {
|
||||
const expr = "m{job=\"foo\", instance=\"ba";
|
||||
expect(extractLabelMatchers(expr)).toEqual(["job=\"foo\""]);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,52 @@
|
||||
|
||||
export const splitByCursor = (
|
||||
value: string,
|
||||
caret: [number, number]
|
||||
) => {
|
||||
if (caret[0] !== caret[1]) {
|
||||
return { beforeCursor: value, afterCursor: "" };
|
||||
}
|
||||
|
||||
return {
|
||||
beforeCursor: value.substring(0, caret[0]),
|
||||
afterCursor: value.substring(caret[1]),
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
export const extractMetric = (expr: string): string => {
|
||||
const fnRegex = /\w+\((?<metricName>[^)]+)\)\s+(by|without|on|ignoring)\s*\(\w*/gi;
|
||||
const fnMatch = [...expr.matchAll(fnRegex)];
|
||||
|
||||
if (fnMatch[0]?.groups?.metricName) {
|
||||
return fnMatch[0].groups.metricName;
|
||||
}
|
||||
|
||||
const plainRegex = /^\s*\b(?<metricName>[^{}(),\s]+)(?={|$)/g;
|
||||
const match = [...expr.matchAll(plainRegex)];
|
||||
return match[0]?.groups?.metricName || "";
|
||||
};
|
||||
|
||||
export const extractCurrentLabel = (expr: string): string => {
|
||||
const regexp = /[a-z_:-][\w\-.:/]*\b(?=\s*(=|!=|=~|!~))/g;
|
||||
const match = expr.match(regexp);
|
||||
return match ? match[match.length - 1] : "";
|
||||
};
|
||||
|
||||
|
||||
export const extractLabelMatchers = (
|
||||
expr: string,
|
||||
currentLabel?: string
|
||||
): string[] => {
|
||||
const regexp = /([a-z_:-][\w\-.:/]*)\s*(?:=|!=|=~|!~)\s*"[^"]*"/g;
|
||||
|
||||
const matches = [...expr.matchAll(regexp)];
|
||||
// m[1] = label name
|
||||
// m[0] = full matcher string
|
||||
|
||||
if (!currentLabel) return matches.map(m => m[0]);
|
||||
|
||||
return matches
|
||||
.filter(m => m[1] !== currentLabel)
|
||||
.map(m => m[0]);
|
||||
};
|
||||
@@ -30,6 +30,7 @@ type FetchQueryArguments = {
|
||||
metric: string;
|
||||
label: string;
|
||||
context: QueryContextType
|
||||
labelMatchers: string[];
|
||||
}
|
||||
|
||||
const icons = {
|
||||
@@ -38,7 +39,7 @@ const icons = {
|
||||
[TypeData.labelValue]: <ValueIcon/>,
|
||||
};
|
||||
|
||||
export const useFetchQueryOptions = ({ valueByContext, metric, label, context }: FetchQueryArguments) => {
|
||||
export const useFetchQueryOptions = ({ valueByContext, metric, label, context, labelMatchers }: FetchQueryArguments) => {
|
||||
const { serverUrl } = useAppState();
|
||||
const { period: { start, end } } = useTimeState();
|
||||
const { autocompleteCache } = useQueryState();
|
||||
@@ -143,17 +144,19 @@ export const useFetchQueryOptions = ({ valueByContext, metric, label, context }:
|
||||
setLabels([]);
|
||||
|
||||
const metricEscaped = escapeDoubleQuotes(metric);
|
||||
const matchMetric = metric ? `__name__="${metricEscaped}"` : "";
|
||||
const matchValue = [matchMetric, ...labelMatchers].filter(Boolean).join(",");
|
||||
|
||||
fetchData({
|
||||
value,
|
||||
urlSuffix: "labels",
|
||||
setter: setLabels,
|
||||
type: TypeData.label,
|
||||
params: getQueryParams(metric ? { "match[]": `{__name__="${metricEscaped}"}` } : undefined)
|
||||
params: getQueryParams({ "match[]": `{${matchValue}}` })
|
||||
});
|
||||
|
||||
return () => abortControllerRef.current?.abort();
|
||||
}, [serverUrl, value, context, metric]);
|
||||
}, [serverUrl, value, context, metric, labelMatchers]);
|
||||
|
||||
// fetch labelValues
|
||||
useEffect(() => {
|
||||
@@ -166,7 +169,7 @@ export const useFetchQueryOptions = ({ valueByContext, metric, label, context }:
|
||||
const valueReEscaped = escapeDoubleQuotes(escapeRegexp(value));
|
||||
const matchMetric = metric ? `__name__="${metricEscaped}"` : "";
|
||||
const matchLabel = `${label}=~".*${valueReEscaped}.*"`;
|
||||
const matchValue = [matchMetric, matchLabel].filter(Boolean).join(",");
|
||||
const matchValue = [matchMetric, ...labelMatchers, matchLabel].filter(Boolean).join(",");
|
||||
|
||||
fetchData({
|
||||
value,
|
||||
@@ -177,7 +180,7 @@ export const useFetchQueryOptions = ({ valueByContext, metric, label, context }:
|
||||
});
|
||||
|
||||
return () => abortControllerRef.current?.abort();
|
||||
}, [serverUrl, value, context, metric, label]);
|
||||
}, [serverUrl, value, context, metric, label, labelMatchers]);
|
||||
|
||||
return {
|
||||
metrics,
|
||||
|
||||
@@ -26,8 +26,7 @@ const getQueryUrl = (row: TopQuery, timeRange: string) => {
|
||||
};
|
||||
|
||||
const processResponse = (data: TopQueriesData) => {
|
||||
const list = ["topByAvgDuration", "topByCount", "topBySumDuration"] as (keyof TopQueriesData)[];
|
||||
|
||||
const list = ["topByAvgDuration", "topByCount", "topBySumDuration", "topByAvgMemoryUsage"] as (keyof TopQueriesData)[];
|
||||
list.forEach(key => {
|
||||
const target = data[key] as TopQuery[];
|
||||
if (!Array.isArray(target)) return;
|
||||
|
||||
@@ -178,6 +178,17 @@ const TopQueries: FC = () => {
|
||||
{ key: "count" }
|
||||
]}
|
||||
/>
|
||||
<TopQueryPanel
|
||||
rows={data.topByAvgMemoryUsage}
|
||||
title={"Queries with most memory to execute"}
|
||||
columns={[
|
||||
{ key: "query" },
|
||||
{ key: "avgMemoryBytes", title: "avg memory usage, bytes" },
|
||||
{ key: "timeRange", sortBy: "timeRangeSeconds", title: "query time interval" },
|
||||
{ key: "count" }
|
||||
]}
|
||||
defaultOrderBy={"avgMemoryBytes"}
|
||||
/>
|
||||
</div>
|
||||
</>)}
|
||||
</div>
|
||||
|
||||
@@ -92,6 +92,7 @@ export interface TopQuery {
|
||||
query: string;
|
||||
timeRangeSeconds: number;
|
||||
sumDurationSeconds: number;
|
||||
avgMemoryBytes: number;
|
||||
timeRange: string;
|
||||
url?: string;
|
||||
}
|
||||
@@ -107,6 +108,7 @@ export interface TopQueriesData extends TopQueryStats {
|
||||
topByAvgDuration: TopQuery[];
|
||||
topByCount: TopQuery[];
|
||||
topBySumDuration: TopQuery[];
|
||||
topByAvgMemoryUsage: TopQuery[];
|
||||
error?: string;
|
||||
}
|
||||
|
||||
|
||||
@@ -10,25 +10,19 @@ const getProxy = (): Record<string, ProxyOptions> | undefined => {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const commonProxy: ProxyOptions = {
|
||||
target: "https://play.victoriametrics.com/select/0",
|
||||
changeOrigin: true,
|
||||
configure: (proxy) => {
|
||||
proxy.on("error", (err) => {
|
||||
console.error("[proxy error]", err.message);
|
||||
});
|
||||
},
|
||||
};
|
||||
|
||||
return {
|
||||
"^/(api|vmalert)/.*": {
|
||||
target: "https://play.victoriametrics.com/select/0/prometheus",
|
||||
changeOrigin: true,
|
||||
configure: (proxy) => {
|
||||
proxy.on("error", (err) => {
|
||||
console.error("[proxy error]", err.message);
|
||||
});
|
||||
},
|
||||
},
|
||||
"/prometheus/vmui/config.json": {
|
||||
target: "https://play.victoriametrics.com/select/0",
|
||||
changeOrigin: true,
|
||||
configure: (proxy) => {
|
||||
proxy.on("error", (err) => {
|
||||
console.error("[proxy error]", err.message);
|
||||
});
|
||||
},
|
||||
},
|
||||
"^/prometheus/(api|vmalert)/.*": { ...commonProxy },
|
||||
"/prometheus/vmui/config.json": { ...commonProxy },
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
@@ -58,6 +58,18 @@
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"enable": true,
|
||||
"expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(job)",
|
||||
"hide": false,
|
||||
"iconColor": "dark-yellow",
|
||||
"name": "restarts",
|
||||
"textFormat": "{{job}} restarted"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
@@ -18,6 +18,18 @@
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"enable": true,
|
||||
"expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(job)",
|
||||
"hide": false,
|
||||
"iconColor": "dark-yellow",
|
||||
"name": "restarts",
|
||||
"textFormat": "{{job}} restarted"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
@@ -6363,7 +6363,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows how many ongoing insertions (not API /write calls) on disk are taking place, where:\n* `max` - equal to number of CPUs;\n* `current` - current number of goroutines busy with inserting rows into underlying storage.\n\nEvery successful API /write call results into flush on disk. The `max` is an internal limit and can't be changed. It is always equal to the number of CPUs. \n\nWhen `current` hits `max` constantly, it means storage is overloaded and requires more CPU (see CPU usage) or disks with more IOPS (see disk writes and reads panels in Resource Usage row).",
|
||||
"description": "Shows the number of ongoing Cluster Native insert API operations sent to `vmstorage`, where:\n* `max` - the maximum number of calls that can be processed in parallel;\n* `current` - Shows the number of ongoing insert operations being processed by the storage.\n\nEach successful API call results in a flush to disk. The `max` value is controlled by the `-maxConcurrentInserts` flag set on the storage (2 * cgroup.AvailableCPUs() by default). \n\nIf `current` consistently reaches `max`, it indicates that the storage is overloaded and likely requires more CPU (see CPU usage) or disks with higher IOPS (see disk read/write panels in the Resource Usage row)",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -6500,7 +6500,7 @@
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Concurrent flushes on disk ($instance)",
|
||||
"title": "Concurrent inserts ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
@@ -11207,7 +11207,13 @@
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [],
|
||||
"links": [
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Source Code",
|
||||
"url": "https://l2s.victoriametrics.com/?app_version=${__field.labels.app_version}&location=${__field.labels.location}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
@@ -11262,12 +11268,12 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": true,
|
||||
"expr": "sum(rate(vm_log_messages_total{job=~\"$job\",instance=~\"$instance\", level!=\"info\"}[$__rate_interval])) by (job, instance, level, location) > 0",
|
||||
"expr": "sum(rate(vm_log_messages_total{job=~\"$job\",instance=~\"$instance\", level!=\"info\"}[$__rate_interval])) by (job, instance, level, app_version,location) > 0",
|
||||
"format": "time_series",
|
||||
"hide": false,
|
||||
"interval": "5m",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "__auto",
|
||||
"legendFormat": "{{instance}} ({{job}}) - {{level}}: {{location}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
|
||||
@@ -43,6 +43,18 @@
|
||||
"name": "version",
|
||||
"textFormat": "{{version}}",
|
||||
"titleFormat": "Version change"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"enable": true,
|
||||
"expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(job)",
|
||||
"hide": false,
|
||||
"iconColor": "dark-yellow",
|
||||
"name": "restarts",
|
||||
"textFormat": "{{job}} restarted"
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -1508,8 +1520,8 @@
|
||||
"links": [
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "asd",
|
||||
"url": "asd"
|
||||
"title": "Drilldown",
|
||||
"url": "/d/wNf0q_kZk?viewPanel=154&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -8531,8 +8543,8 @@
|
||||
"links": [
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/wNf0q_kZk?viewPanel=154&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
"title": "Source Code",
|
||||
"url": "https://l2s.victoriametrics.com/?app_version=${__field.labels.app_version}&location=${__field.labels.location}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -8589,11 +8601,11 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": true,
|
||||
"expr": "sum(rate(vm_log_messages_total{job=~\"$job\",instance=~\"$instance\", level!=\"info\"}[$__rate_interval])) by (job, instance, level, location) > 0",
|
||||
"expr": "sum(rate(vm_log_messages_total{job=~\"$job\",instance=~\"$instance\", level!=\"info\"}[$__rate_interval])) by (job, instance, level, app_version, location) > 0",
|
||||
"format": "time_series",
|
||||
"interval": "5m",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "__auto",
|
||||
"legendFormat": "{{instance}} ({{job}}) - {{level}}: {{location}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
|
||||
@@ -59,6 +59,18 @@
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"enable": true,
|
||||
"expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(job)",
|
||||
"hide": false,
|
||||
"iconColor": "dark-yellow",
|
||||
"name": "restarts",
|
||||
"textFormat": "{{job}} restarted"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
@@ -19,6 +19,18 @@
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"enable": true,
|
||||
"expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(job)",
|
||||
"hide": false,
|
||||
"iconColor": "dark-yellow",
|
||||
"name": "restarts",
|
||||
"textFormat": "{{job}} restarted"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
@@ -6364,7 +6364,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows how many ongoing insertions (not API /write calls) on disk are taking place, where:\n* `max` - equal to number of CPUs;\n* `current` - current number of goroutines busy with inserting rows into underlying storage.\n\nEvery successful API /write call results into flush on disk. The `max` is an internal limit and can't be changed. It is always equal to the number of CPUs. \n\nWhen `current` hits `max` constantly, it means storage is overloaded and requires more CPU (see CPU usage) or disks with more IOPS (see disk writes and reads panels in Resource Usage row).",
|
||||
"description": "Shows the number of ongoing Cluster Native insert API operations sent to `vmstorage`, where:\n* `max` - the maximum number of calls that can be processed in parallel;\n* `current` - Shows the number of ongoing insert operations being processed by the storage.\n\nEach successful API call results in a flush to disk. The `max` value is controlled by the `-maxConcurrentInserts` flag set on the storage (2 * cgroup.AvailableCPUs() by default). \n\nIf `current` consistently reaches `max`, it indicates that the storage is overloaded and likely requires more CPU (see CPU usage) or disks with higher IOPS (see disk read/write panels in the Resource Usage row)",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -6501,7 +6501,7 @@
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Concurrent flushes on disk ($instance)",
|
||||
"title": "Concurrent inserts ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
@@ -11208,7 +11208,13 @@
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [],
|
||||
"links": [
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Source Code",
|
||||
"url": "https://l2s.victoriametrics.com/?app_version=${__field.labels.app_version}&location=${__field.labels.location}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
@@ -11263,12 +11269,12 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": true,
|
||||
"expr": "sum(rate(vm_log_messages_total{job=~\"$job\",instance=~\"$instance\", level!=\"info\"}[$__rate_interval])) by (job, instance, level, location) > 0",
|
||||
"expr": "sum(rate(vm_log_messages_total{job=~\"$job\",instance=~\"$instance\", level!=\"info\"}[$__rate_interval])) by (job, instance, level, app_version,location) > 0",
|
||||
"format": "time_series",
|
||||
"hide": false,
|
||||
"interval": "5m",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "__auto",
|
||||
"legendFormat": "{{instance}} ({{job}}) - {{level}}: {{location}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
|
||||
@@ -44,6 +44,18 @@
|
||||
"name": "version",
|
||||
"textFormat": "{{version}}",
|
||||
"titleFormat": "Version change"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"enable": true,
|
||||
"expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(job)",
|
||||
"hide": false,
|
||||
"iconColor": "dark-yellow",
|
||||
"name": "restarts",
|
||||
"textFormat": "{{job}} restarted"
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -1509,8 +1521,8 @@
|
||||
"links": [
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "asd",
|
||||
"url": "asd"
|
||||
"title": "Drilldown",
|
||||
"url": "/d/wNf0q_kZk_vm?viewPanel=154&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -8532,8 +8544,8 @@
|
||||
"links": [
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/wNf0q_kZk_vm?viewPanel=154&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
"title": "Source Code",
|
||||
"url": "https://l2s.victoriametrics.com/?app_version=${__field.labels.app_version}&location=${__field.labels.location}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -8590,11 +8602,11 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": true,
|
||||
"expr": "sum(rate(vm_log_messages_total{job=~\"$job\",instance=~\"$instance\", level!=\"info\"}[$__rate_interval])) by (job, instance, level, location) > 0",
|
||||
"expr": "sum(rate(vm_log_messages_total{job=~\"$job\",instance=~\"$instance\", level!=\"info\"}[$__rate_interval])) by (job, instance, level, app_version, location) > 0",
|
||||
"format": "time_series",
|
||||
"interval": "5m",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "__auto",
|
||||
"legendFormat": "{{instance}} ({{job}}) - {{level}}: {{location}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
|
||||
@@ -1227,7 +1227,13 @@
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [],
|
||||
"links": [
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/G7Z9GzMGz_vm?viewPanel=162&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
@@ -8296,6 +8302,124 @@
|
||||
],
|
||||
"title": "Persistent queue Full ETA ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the rate of logging the messages by their level. Unexpected spike in rate is a good reason to check logs.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "bars",
|
||||
"fillOpacity": 100,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Source Code",
|
||||
"url": "https://l2s.victoriametrics.com/?app_version=${__field.labels.app_version}&location=${__field.labels.location}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 461
|
||||
},
|
||||
"id": 154,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"lastNotNull",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true,
|
||||
"sortBy": "Last *",
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": true,
|
||||
"expr": "sum(rate(vm_log_messages_total{job=~\"$job\",instance=~\"$instance\", level!=\"info\"}[$__rate_interval])) by (job, instance, level, app_version, location) > 0",
|
||||
"format": "time_series",
|
||||
"interval": "5m",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{instance}} - {{level}}: {{location}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Logging rate",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"title": "Drilldown",
|
||||
|
||||
@@ -53,6 +53,18 @@
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"enable": true,
|
||||
"expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(job)",
|
||||
"hide": false,
|
||||
"iconColor": "dark-yellow",
|
||||
"name": "restarts",
|
||||
"textFormat": "{{job}} restarted"
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -236,7 +248,7 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "count(vmauth_user_concurrent_requests_capacity{job=~\"$job\", instance=~\"$instance\"})",
|
||||
"expr": "count(count by(username) (vmauth_user_concurrent_requests_capacity{job=~\"$job\", instance=~\"$instance\"}))",
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"range": true,
|
||||
|
||||
@@ -1226,7 +1226,13 @@
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [],
|
||||
"links": [
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/G7Z9GzMGz?viewPanel=162&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
@@ -8295,6 +8301,124 @@
|
||||
],
|
||||
"title": "Persistent queue Full ETA ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the rate of logging the messages by their level. Unexpected spike in rate is a good reason to check logs.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "bars",
|
||||
"fillOpacity": 100,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Source Code",
|
||||
"url": "https://l2s.victoriametrics.com/?app_version=${__field.labels.app_version}&location=${__field.labels.location}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 461
|
||||
},
|
||||
"id": 154,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"lastNotNull",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true,
|
||||
"sortBy": "Last *",
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": true,
|
||||
"expr": "sum(rate(vm_log_messages_total{job=~\"$job\",instance=~\"$instance\", level!=\"info\"}[$__rate_interval])) by (job, instance, level, app_version, location) > 0",
|
||||
"format": "time_series",
|
||||
"interval": "5m",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{instance}} - {{level}}: {{location}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Logging rate",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"title": "Drilldown",
|
||||
|
||||
@@ -52,6 +52,18 @@
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"enable": true,
|
||||
"expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(job)",
|
||||
"hide": false,
|
||||
"iconColor": "dark-yellow",
|
||||
"name": "restarts",
|
||||
"textFormat": "{{job}} restarted"
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -235,7 +247,7 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "count(vmauth_user_concurrent_requests_capacity{job=~\"$job\", instance=~\"$instance\"})",
|
||||
"expr": "count(count by(username) (vmauth_user_concurrent_requests_capacity{job=~\"$job\", instance=~\"$instance\"}))",
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"range": true,
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
DOCKER_REGISTRIES ?= docker.io quay.io
|
||||
DOCKER_NAMESPACE ?= victoriametrics
|
||||
|
||||
ROOT_IMAGE ?= alpine:3.23.2
|
||||
ROOT_IMAGE ?= alpine:3.23.3
|
||||
ROOT_IMAGE_SCRATCH ?= scratch
|
||||
CERTS_IMAGE := alpine:3.23.2
|
||||
CERTS_IMAGE := alpine:3.23.3
|
||||
|
||||
GO_BUILDER_IMAGE := golang:1.25.6
|
||||
GO_BUILDER_IMAGE := golang:1.26.0
|
||||
|
||||
BUILDER_IMAGE := local/builder:2.0.0-$(shell echo $(GO_BUILDER_IMAGE) | tr :/ __)-1
|
||||
BASE_IMAGE := local/base:1.1.4-$(shell echo $(ROOT_IMAGE) | tr :/ __)-$(shell echo $(CERTS_IMAGE) | tr :/ __)
|
||||
|
||||
@@ -3,7 +3,7 @@ services:
|
||||
# It scrapes targets defined in --promscrape.config
|
||||
# And forward them to --remoteWrite.url
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.134.0
|
||||
image: victoriametrics/vmagent:v1.135.0
|
||||
depends_on:
|
||||
- "vmauth"
|
||||
ports:
|
||||
@@ -37,14 +37,14 @@ services:
|
||||
# vmstorage shards. Each shard receives 1/N of all metrics sent to vminserts,
|
||||
# where N is number of vmstorages (2 in this case).
|
||||
vmstorage-1:
|
||||
image: victoriametrics/vmstorage:v1.134.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.135.0-cluster
|
||||
volumes:
|
||||
- strgdata-1:/storage
|
||||
command:
|
||||
- "--storageDataPath=/storage"
|
||||
restart: always
|
||||
vmstorage-2:
|
||||
image: victoriametrics/vmstorage:v1.134.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.135.0-cluster
|
||||
volumes:
|
||||
- strgdata-2:/storage
|
||||
command:
|
||||
@@ -54,7 +54,7 @@ services:
|
||||
# vminsert is ingestion frontend. It receives metrics pushed by vmagent,
|
||||
# pre-process them and distributes across configured vmstorage shards.
|
||||
vminsert-1:
|
||||
image: victoriametrics/vminsert:v1.134.0-cluster
|
||||
image: victoriametrics/vminsert:v1.135.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -63,7 +63,7 @@ services:
|
||||
- "--storageNode=vmstorage-2:8400"
|
||||
restart: always
|
||||
vminsert-2:
|
||||
image: victoriametrics/vminsert:v1.134.0-cluster
|
||||
image: victoriametrics/vminsert:v1.135.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -75,7 +75,7 @@ services:
|
||||
# vmselect is a query fronted. It serves read queries in MetricsQL or PromQL.
|
||||
# vmselect collects results from configured `--storageNode` shards.
|
||||
vmselect-1:
|
||||
image: victoriametrics/vmselect:v1.134.0-cluster
|
||||
image: victoriametrics/vmselect:v1.135.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -85,7 +85,7 @@ services:
|
||||
- "--vmalert.proxyURL=http://vmalert:8880"
|
||||
restart: always
|
||||
vmselect-2:
|
||||
image: victoriametrics/vmselect:v1.134.0-cluster
|
||||
image: victoriametrics/vmselect:v1.135.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -100,7 +100,7 @@ services:
|
||||
# read requests from Grafana, vmui, vmalert among vmselects.
|
||||
# It can be used as an authentication proxy.
|
||||
vmauth:
|
||||
image: victoriametrics/vmauth:v1.134.0
|
||||
image: victoriametrics/vmauth:v1.135.0
|
||||
depends_on:
|
||||
- "vmselect-1"
|
||||
- "vmselect-2"
|
||||
@@ -114,7 +114,7 @@ services:
|
||||
|
||||
# vmalert executes alerting and recording rules
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.134.0
|
||||
image: victoriametrics/vmalert:v1.135.0
|
||||
depends_on:
|
||||
- "vmauth"
|
||||
ports:
|
||||
|
||||
@@ -3,7 +3,7 @@ services:
|
||||
# It scrapes targets defined in --promscrape.config
|
||||
# And forward them to --remoteWrite.url
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.134.0
|
||||
image: victoriametrics/vmagent:v1.135.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -18,7 +18,7 @@ services:
|
||||
# VictoriaMetrics instance, a single process responsible for
|
||||
# storing metrics and serve read requests.
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.134.0
|
||||
image: victoriametrics/victoria-metrics:v1.135.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
- 8089:8089
|
||||
@@ -54,7 +54,7 @@ services:
|
||||
|
||||
# vmalert executes alerting and recording rules
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.134.0
|
||||
image: victoriametrics/vmalert:v1.135.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
- "alertmanager"
|
||||
|
||||
@@ -136,3 +136,16 @@ groups:
|
||||
description: "High rate of slow inserts on \"{{ $labels.instance }}\" may be a sign of resource exhaustion
|
||||
for the current load. It is likely more RAM is needed for optimal handling of the current number of active time series.
|
||||
See also https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3976#issuecomment-1476883183"
|
||||
|
||||
- alert: MetadataCacheUtilizationIsTooHigh
|
||||
expr: |
|
||||
vm_metrics_metadata_storage_size_bytes / vm_metrics_metadata_storage_max_size_bytes > 0.95
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Metadata cache capacity on {{ $labels.instance }} (job={{ $labels.job }}) is utilized for more than 95% for the last 15min"
|
||||
description: "Metadata cache stores meta information about ingested time series - see https://docs.victoriametrics.com/victoriametrics/#metrics-metadata.
|
||||
When cache is overutilized, the oldest entries will be dropped out automatically. It may result into incomplete
|
||||
response for /api/v1/metadata API calls. It doesn't impact regular queries or alerts. Cache size is controlled
|
||||
via -storage.maxMetadataStorageSize cmd-line flag."
|
||||
@@ -1,6 +1,6 @@
|
||||
services:
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.134.0
|
||||
image: victoriametrics/vmagent:v1.135.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -14,7 +14,7 @@ services:
|
||||
restart: always
|
||||
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.134.0
|
||||
image: victoriametrics/victoria-metrics:v1.135.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
volumes:
|
||||
@@ -40,7 +40,7 @@ services:
|
||||
restart: always
|
||||
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.134.0
|
||||
image: victoriametrics/vmalert:v1.135.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -59,7 +59,7 @@ services:
|
||||
- '--external.alert.source=explore?orgId=1&left=["now-1h","now","VictoriaMetrics",{"expr": },{"mode":"Metrics"},{"ui":[true,true,true,"none"]}]'
|
||||
restart: always
|
||||
vmanomaly:
|
||||
image: victoriametrics/vmanomaly:v1.28.5
|
||||
image: victoriametrics/vmanomaly:v1.28.7
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
|
||||
@@ -14,6 +14,18 @@ aliases:
|
||||
---
|
||||
Please find the changelog for VictoriaMetrics Anomaly Detection below.
|
||||
|
||||
## v1.28.7
|
||||
Released: 2026-02-09
|
||||
|
||||
- UI: Updated [vmanomaly UI](https://docs.victoriametrics.com/anomaly-detection/ui/) from [v1.4.2](https://docs.victoriametrics.com/anomaly-detection/ui/#v142) to [v1.4.3](https://docs.victoriametrics.com/anomaly-detection/ui/#v143), see respective [release notes](https://docs.victoriametrics.com/anomaly-detection/ui/#v143) for details.
|
||||
|
||||
- BUGFIX: Resolved an issue with `Logs/Traces` datasource type in the vmanomaly UI where the `step` parameter from UI state wasn't properly passed to the backend, sometimes resulting in 422 errors when the backend expected a string (e.g. "10s") but received a number (e.g. 10) instead.
|
||||
|
||||
## v1.28.6
|
||||
Released: 2026-01-27
|
||||
|
||||
- IMPROVEMENT: Support additional backward-compatible [CLI arguments](https://docs.victoriametrics.com/anomaly-detection/quickstart/#command-line-arguments) formats, including key-value pairs (`key=value`, `-key=value`, `--key=value`), value shortcuts for boolean flag values (e.g. `dryRun=1`, `-dryRun true`, `--dryRun false`, `--dryRun 0`) and all respective combinations.
|
||||
|
||||
## v1.28.5
|
||||
Released: 2026-01-17
|
||||
|
||||
|
||||
@@ -419,7 +419,7 @@ services:
|
||||
# ...
|
||||
vmanomaly:
|
||||
container_name: vmanomaly
|
||||
image: victoriametrics/vmanomaly:v1.28.5
|
||||
image: victoriametrics/vmanomaly:v1.28.7
|
||||
# ...
|
||||
restart: always
|
||||
volumes:
|
||||
@@ -637,7 +637,7 @@ options:
|
||||
Here’s an example of using the config splitter to divide configurations based on the `extra_filters` argument from the reader section:
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/vmanomaly:v1.28.5 && docker image tag victoriametrics/vmanomaly:v1.28.5 vmanomaly
|
||||
docker pull victoriametrics/vmanomaly:v1.28.6 && docker image tag victoriametrics/vmanomaly:v1.28.7 vmanomaly
|
||||
```
|
||||
|
||||
```sh
|
||||
|
||||
@@ -45,8 +45,8 @@ There are 2 types of compatibilitity to consider when migrating in stateful mode
|
||||
|
||||
| Group start | Group end | Compatibility | Notes |
|
||||
|---------|--------- |------------|-------|
|
||||
| [v1.28.5](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1285) | Latest* | Fully Compatible | Just a placeholder for new releases |
|
||||
| [v1.26.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1262) | [v1.28.5](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1285) | Fully Compatible | [v1.28.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1280) introduced [rolling](https://docs.victoriametrics.com/anomaly-detection/components/models/#rolling-models) model class drop in favor of [online](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-models) models (`rolling_quantile` and `std` models), however, it does not impact compatibility, as artifacts were not produced by default for rolling models. Also, offline `mad` and `zscore` models are redirecting to their respective online counterparts since [v1.28.4](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1284). |
|
||||
| [v1.28.7](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1287) | Latest* | Fully Compatible | Just a placeholder for new releases |
|
||||
| [v1.26.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1262) | [v1.28.7](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1287) | Fully Compatible | [v1.28.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1280) introduced [rolling](https://docs.victoriametrics.com/anomaly-detection/components/models/#rolling-models) model class drop in favor of [online](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-models) models (`rolling_quantile` and `std` models), however, it does not impact compatibility, as artifacts were not produced by default for rolling models. Also, offline `mad` and `zscore` models are redirecting to their respective online counterparts since [v1.28.4](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1284). |
|
||||
| [v1.25.3](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1253) | [v1.26.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1270) | Partially Compatible* | [v1.25.3](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1253) introduced `forecast_at` argument for base [univariate](https://docs.victoriametrics.com/anomaly-detection/components/models/#univariate-models) and `Prophet` [models](https://docs.victoriametrics.com/anomaly-detection/components/models/#prophet), however, itself remains backward-reversible from newer states like [v1.26.2](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1262), [v1.27.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1270). (All models except `isolation_forest_multivariate` class will be dropped) |
|
||||
| [v1.25.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1251) | [v1.25.2](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1252) | Fully Compatible | In [v1.25.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1251) there was a change to `vmanomaly.db` metadata database format, so migrating from v1.24.0-v1.25.0 requires deletion of a state, see note above the table |
|
||||
| [v1.24.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1241) | [v1.25.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1250) | Partially Compatible* | In [v1.25.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1250) there were changes to **data dump layout** and to `online_quantile` and `isolation_forest_multivariate` [model](https://docs.victoriametrics.com/anomaly-detection/components/models/) states, so to migrate from v1.24.0-v1.24.1 it is recommended to drop the state |
|
||||
|
||||
@@ -24,6 +24,7 @@ The following options are available:
|
||||
|
||||
- [To run Docker image](#docker)
|
||||
- [To run in Kubernetes with Helm charts](#kubernetes-with-helm-charts)
|
||||
- [To run with VM Operator](#vm-operator)
|
||||
|
||||
> Anomaly detection models can be kept {{% available_from "v1.13.0" anomaly %}} **on host filesystem after `fit` stage** (instead of default in-memory option); This will drastically reduce RAM for larger configurations. Similar optimization {{% available_from "v1.16.0" anomaly %}} can be applied to data read from VictoriaMetrics TSDB. See instructions of how to enable it [here](https://docs.victoriametrics.com/anomaly-detection/faq/#on-disk-mode).
|
||||
|
||||
@@ -121,7 +122,7 @@ Below are the steps to get `vmanomaly` up and running inside a Docker container:
|
||||
1. Pull Docker image:
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/vmanomaly:v1.28.5
|
||||
docker pull victoriametrics/vmanomaly:v1.28.7
|
||||
```
|
||||
|
||||
2. Create the license file with your license key.
|
||||
@@ -141,7 +142,7 @@ docker run -it \
|
||||
-v ./license:/license \
|
||||
-v ./config.yaml:/config.yaml \
|
||||
-p 8490:8490 \
|
||||
victoriametrics/vmanomaly:v1.28.5 \
|
||||
victoriametrics/vmanomaly:v1.28.7 \
|
||||
/config.yaml \
|
||||
--licenseFile=/license \
|
||||
--loggerLevel=INFO \
|
||||
@@ -158,7 +159,7 @@ docker run -it \
|
||||
-e VMANOMALY_DATA_DUMPS_DIR=/tmp/vmanomaly/data \
|
||||
-e VMANOMALY_MODEL_DUMPS_DIR=/tmp/vmanomaly/models \
|
||||
-p 8490:8490 \
|
||||
victoriametrics/vmanomaly:v1.28.5 \
|
||||
victoriametrics/vmanomaly:v1.28.7 \
|
||||
/config.yaml \
|
||||
--licenseFile=/license \
|
||||
--loggerLevel=INFO \
|
||||
@@ -171,7 +172,7 @@ services:
|
||||
# ...
|
||||
vmanomaly:
|
||||
container_name: vmanomaly
|
||||
image: victoriametrics/vmanomaly:v1.28.4
|
||||
image: victoriametrics/vmanomaly:v1.28.7
|
||||
# ...
|
||||
restart: always
|
||||
volumes:
|
||||
@@ -307,6 +308,84 @@ writer:
|
||||
{{% available_from "v1.26.0" anomaly %}} `vmanomaly`'s built-in web UI can be used for prototyping and interactive experimenting to produce vmanomaly's and vmalert's configuration files. Please refer to the [UI documentation](https://docs.victoriametrics.com/anomaly-detection/ui/) for detailed instructions and examples.
|
||||
|
||||

|
||||
> [!TIP]
|
||||
Public playgrounds with pre-configured `vmanomaly` instances and VictoriaMetrics/VictoriaLogs/VictoriaTraces datasources are available for interactive experimenting without the need to set up your own instance or getting an enterprise license. You can find them in the [UI documentation](https://docs.victoriametrics.com/anomaly-detection/ui/#playgrounds) or access them directly via the links - [metrics](https://play-vmanomaly.victoriametrics.com/metrics/), [logs](https://play-vmanomaly.victoriametrics.com/logs/), [traces](https://play-vmanomaly.victoriametrics.com/traces/) - or embedded versions in the collapsible blocks.
|
||||
|
||||
{{% collapse name="Playground on VictoriaMetrics Datasource" %}}
|
||||
|
||||
<div class="position-relative mb-3">
|
||||
<button
|
||||
type="button"
|
||||
class="btn btn-primary btn-sm position-absolute top-0 end-0 m-2"
|
||||
style="z-index: 2;"
|
||||
onclick="document.getElementById('vmui-playground-vmanomaly-metrics')?.requestFullscreen?.()"
|
||||
>
|
||||
Fullscreen
|
||||
</button>
|
||||
|
||||
<iframe
|
||||
id="vmui-playground-vmanomaly-metrics"
|
||||
title="VictoriaMetrics Anomaly Detection Playground (Metrics)"
|
||||
allow="fullscreen"
|
||||
loading="lazy"
|
||||
class="w-100 border rounded"
|
||||
style="height: 400px; background: white;"
|
||||
src="https://play-vmanomaly.victoriametrics.com/metrics/"
|
||||
></iframe>
|
||||
</div>
|
||||
|
||||
{{% /collapse %}}
|
||||
|
||||
{{% collapse name="Playground on VictoriaLogs Datasource" %}}
|
||||
|
||||
<div class="position-relative mb-3">
|
||||
<button
|
||||
type="button"
|
||||
class="btn btn-primary btn-sm position-absolute top-0 end-0 m-2"
|
||||
style="z-index: 2;"
|
||||
onclick="document.getElementById('vmui-playground-vmanomaly-logs')?.requestFullscreen?.()"
|
||||
>
|
||||
Fullscreen
|
||||
</button>
|
||||
|
||||
<iframe
|
||||
id="vmui-playground-vmanomaly-logs"
|
||||
title="VictoriaLogs Anomaly Detection Playground (Logs)"
|
||||
allow="fullscreen"
|
||||
loading="lazy"
|
||||
class="w-100 border rounded"
|
||||
style="height: 400px; background: white;"
|
||||
src="https://play-vmanomaly.victoriametrics.com/logs/"
|
||||
></iframe>
|
||||
</div>
|
||||
|
||||
{{% /collapse %}}
|
||||
|
||||
{{% collapse name="Playground on VictoriaTraces Datasource" %}}
|
||||
|
||||
<div class="position-relative mb-3">
|
||||
<button
|
||||
type="button"
|
||||
class="btn btn-primary btn-sm position-absolute top-0 end-0 m-2"
|
||||
style="z-index: 2;"
|
||||
onclick="document.getElementById('vmui-playground-vmanomaly-traces')?.requestFullscreen?.()"
|
||||
>
|
||||
Fullscreen
|
||||
</button>
|
||||
|
||||
<iframe
|
||||
id="vmui-playground-vmanomaly-traces"
|
||||
title="VictoriaTraces Anomaly Detection Playground (Traces)"
|
||||
allowfullscreen
|
||||
allow="fullscreen"
|
||||
loading="lazy"
|
||||
class="w-100 border rounded"
|
||||
style="height: 400px; background: white;"
|
||||
src="https://play-vmanomaly.victoriametrics.com/traces/"
|
||||
></iframe>
|
||||
</div>
|
||||
|
||||
{{% /collapse %}}
|
||||
|
||||
### Recommended steps
|
||||
|
||||
|
||||
@@ -37,6 +37,86 @@ server:
|
||||
|
||||
For impactful parameters please refer to [optimize resource usage](#optimize-resource-usage) section of this page.
|
||||
|
||||
## Playgrounds
|
||||
|
||||
To start exploring the UI, you can use embedded demo with preconfigured queries and models down below on public playgrounds (VictoriaMetrics, VictoriaLogs and VictoriaTraces):
|
||||
|
||||
{{% collapse name="Playground on VictoriaMetrics Datasource" %}}
|
||||
|
||||
<div class="position-relative mb-3">
|
||||
<button
|
||||
type="button"
|
||||
class="btn btn-primary btn-sm position-absolute top-0 end-0 m-2"
|
||||
style="z-index: 2;"
|
||||
onclick="document.getElementById('vmui-playground-vmanomaly-metrics')?.requestFullscreen?.()"
|
||||
>
|
||||
Fullscreen
|
||||
</button>
|
||||
|
||||
<iframe
|
||||
id="vmui-playground-vmanomaly-metrics"
|
||||
title="VictoriaMetrics Anomaly Detection Playground (Metrics)"
|
||||
allow="fullscreen"
|
||||
loading="lazy"
|
||||
class="w-100 border rounded"
|
||||
style="height: 400px; background: white;"
|
||||
src="https://play-vmanomaly.victoriametrics.com/metrics/"
|
||||
></iframe>
|
||||
</div>
|
||||
|
||||
{{% /collapse %}}
|
||||
|
||||
{{% collapse name="Playground on VictoriaLogs Datasource" %}}
|
||||
|
||||
<div class="position-relative mb-3">
|
||||
<button
|
||||
type="button"
|
||||
class="btn btn-primary btn-sm position-absolute top-0 end-0 m-2"
|
||||
style="z-index: 2;"
|
||||
onclick="document.getElementById('vmui-playground-vmanomaly-logs')?.requestFullscreen?.()"
|
||||
>
|
||||
Fullscreen
|
||||
</button>
|
||||
|
||||
<iframe
|
||||
id="vmui-playground-vmanomaly-logs"
|
||||
title="VictoriaMetrics Anomaly Detection Playground (Logs)"
|
||||
allow="fullscreen"
|
||||
loading="lazy"
|
||||
class="w-100 border rounded"
|
||||
style="height: 400px; background: white;"
|
||||
src="https://play-vmanomaly.victoriametrics.com/logs/"
|
||||
></iframe>
|
||||
</div>
|
||||
|
||||
{{% /collapse %}}
|
||||
|
||||
{{% collapse name="Playground on VictoriaTraces Datasource" %}}
|
||||
|
||||
<div class="position-relative mb-3">
|
||||
<button
|
||||
type="button"
|
||||
class="btn btn-primary btn-sm position-absolute top-0 end-0 m-2"
|
||||
style="z-index: 2;"
|
||||
onclick="document.getElementById('vmui-playground-vmanomaly-traces')?.requestFullscreen?.()"
|
||||
>
|
||||
Fullscreen
|
||||
</button>
|
||||
|
||||
<iframe
|
||||
id="vmui-playground-vmanomaly-traces"
|
||||
title="VictoriaMetrics Anomaly Detection Playground (Traces)"
|
||||
allowfullscreen
|
||||
allow="fullscreen"
|
||||
loading="lazy"
|
||||
class="w-100 border rounded"
|
||||
style="height: 400px; background: white;"
|
||||
src="https://play-vmanomaly.victoriametrics.com/traces/"
|
||||
></iframe>
|
||||
</div>
|
||||
|
||||
{{% /collapse %}}
|
||||
|
||||
## Authentication
|
||||
|
||||
{{% available_from "v1.27.0" anomaly %}} The vmanomaly UI supports proxying authentication headers from [v1.1.0](#v110) and onwards.
|
||||
@@ -421,6 +501,15 @@ If the **results** look good and the **model configuration should be deployed in
|
||||
|
||||
## Changelog
|
||||
|
||||
### v1.4.3
|
||||
Released: 2026-02-09
|
||||
|
||||
vmanomaly version: [v1.28.7](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1287)
|
||||
|
||||
- Update color palette in dark theme for more contrast and better visibility of anomalies and confidence intervals in the Visualization Panel.
|
||||
|
||||
- Align Model Panel content to improve layout for smaller screens and embedded documentation.
|
||||
|
||||
### v1.4.2
|
||||
Released: 2026-01-17
|
||||
|
||||
@@ -484,10 +573,9 @@ vmanomaly version: [v1.27.0](https://docs.victoriametrics.com/anomaly-detection/
|
||||
|
||||
- IMPROVEMENT: datasource value is initialized from the server reader config (on the first UI initialization) if [mixed mode is used](#mixed-usage). Can be reset to the default value anytime by hitting the "Reset to Default" button next to the datasource field in the [Settings Panel](#settings-panel).
|
||||
|
||||
|
||||
### v1.0.0
|
||||
Released: 2025-10-02
|
||||
|
||||
vmanomaly version: [v1.26.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1260)
|
||||
|
||||
Initial public release of the vmanomaly UI.
|
||||
Initial public release of the vmanomaly UI.
|
||||
|
||||
@@ -46,37 +46,38 @@ settings:
|
||||
# how and when to run the models is defined by schedulers
|
||||
# https://docs.victoriametrics.com/anomaly-detection/components/scheduler/
|
||||
schedulers:
|
||||
periodic_1d: # alias
|
||||
periodic_online: # alias
|
||||
class: 'periodic' # scheduler class
|
||||
infer_every: "30s"
|
||||
fit_every: "1d"
|
||||
fit_window: "24h"
|
||||
infer_every: "30s" # how often to produce anomaly scores for new data
|
||||
fit_every: "365d" # how often to re-fit the models, for online models used effectively once, then they are updated with new data and won't require re-fit
|
||||
fit_window: "3d" # how much historical data to use for fit stage
|
||||
start_from: "00:00" # start from specified time, i.e. 00:00 given timezone and do daily fits as `fit_every` is 1 day
|
||||
tz: "Europe/Kyiv" # timezone to use for start_from
|
||||
periodic_1w:
|
||||
periodic_offline_1w:
|
||||
class: 'periodic'
|
||||
infer_every: "15m"
|
||||
fit_every: "1h"
|
||||
fit_window: "7d"
|
||||
fit_every: "24h"
|
||||
fit_window: "14d"
|
||||
# if no start_from is specified, jobs will start immediately after service starts
|
||||
|
||||
# what model types and with what hyperparams to run on your data
|
||||
# https://docs.victoriametrics.com/anomaly-detection/components/models/
|
||||
models:
|
||||
zscore: # we can set up alias for model
|
||||
class: 'zscore' # model class
|
||||
class: 'zscore_online' # model class
|
||||
z_threshold: 3.5
|
||||
provide_series: ['anomaly_score'] # what series to produce
|
||||
decay: 0.99 # weight for data points value should be in (0, 1], 1 means to give equal weight to all data
|
||||
provide_series: ['anomaly_score', 'y', 'yhat', 'yhat_upper'] # what series to produce as output of the model
|
||||
queries: ['host_network_receive_errors'] # what queries to run particular model on
|
||||
schedulers: ['periodic_1d'] # will be attached to 1-day schedule, fit every 10m and infer every 30s
|
||||
schedulers: ['periodic_online'] # will be fit once, used for infer every 30s
|
||||
min_dev_from_expected: 0.0 # turned off. if |y - yhat| < min_dev_from_expected, anomaly score will be 0
|
||||
detection_direction: 'above_expected' # detect anomalies only when y > yhat, "peaks"
|
||||
clip_predictions: True # clip predictions to expected data range, i.e. [0, inf] for this query `host_network_receive_errors
|
||||
prophet: # we can set up alias for model
|
||||
prophet_weekly: # we can set up alias for model
|
||||
class: 'prophet'
|
||||
provide_series: ['anomaly_score', 'yhat', 'yhat_lower', 'yhat_upper']
|
||||
provide_series: ['anomaly_score', 'y', 'yhat', 'yhat_lower', 'yhat_upper']
|
||||
queries: ['cpu_seconds_total']
|
||||
schedulers: ['periodic_1w'] # will be attached to 1-week schedule, fit every 1h and infer every 15m
|
||||
schedulers: ['periodic_offline_1w'] # will be attached to 1-week scheduler, re-fit every 24h and infer every 15m
|
||||
min_dev_from_expected: [0.01, 0.01] # minimum deviation from expected value to be even considered as anomaly
|
||||
anomaly_score_outside_data_range: 1.5 # override default anomaly score outside expected data range
|
||||
detection_direction: 'above_expected'
|
||||
@@ -111,13 +112,18 @@ reader:
|
||||
writer:
|
||||
datasource_url: "http://victoriametrics:8428/"
|
||||
# tenant_id: "0:0" # for VictoriaMetrics cluster, can support "multitenant"
|
||||
# https://docs.victoriametrics.com/anomaly-detection/components/writer/#metrics-formatting
|
||||
metric_format:
|
||||
__name__: $VAR
|
||||
for: $QUERY_KEY
|
||||
|
||||
|
||||
# enable self-monitoring in pull and/or push mode
|
||||
# https://docs.victoriametrics.com/anomaly-detection/components/monitoring/
|
||||
monitoring:
|
||||
pull: # Enable /metrics endpoint.
|
||||
addr: "0.0.0.0"
|
||||
port: 8490
|
||||
# pull: # Enable /metrics endpoint.
|
||||
# addr: "0.0.0.0"
|
||||
# port: 8490
|
||||
|
||||
push: # Enable pushing self-monitoring metrics
|
||||
url: "http://victoriametrics:8428"
|
||||
|
||||
@@ -1219,7 +1219,7 @@ monitoring:
|
||||
Let's pull the docker image for `vmanomaly`:
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/vmanomaly:v1.28.5
|
||||
docker pull victoriametrics/vmanomaly:v1.28.7
|
||||
```
|
||||
|
||||
Now we can run the docker container putting as volumes both config and model file:
|
||||
@@ -1233,7 +1233,7 @@ docker run -it \
|
||||
-v $(PWD)/license:/license \
|
||||
-v $(PWD)/custom_model.py:/vmanomaly/model/custom.py \
|
||||
-v $(PWD)/custom.yaml:/config.yaml \
|
||||
victoriametrics/vmanomaly:v1.28.5 /config.yaml \
|
||||
victoriametrics/vmanomaly:v1.28.7 /config.yaml \
|
||||
--licenseFile=/license
|
||||
--watch
|
||||
```
|
||||
|
||||
@@ -12,13 +12,22 @@ aliases:
|
||||
- /anomaly-detection/components/reader.html
|
||||
---
|
||||
|
||||
VictoriaMetrics Anomaly Detection (`vmanomaly`) primarily uses [VmReader](#vm-reader) to ingest data. This reader focuses on fetching time-series data directly from VictoriaMetrics with the help of powerful [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/) expressions for aggregating, filtering and grouping your data, ensuring seamless integration and efficient data handling.
|
||||
VictoriaMetrics Anomaly Detection (`vmanomaly`) has an input of Prometheus-compatible metrics from either [VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/) accessed with [VmReader](#vm-reader) with [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/) queries or from [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/) / [VictoriaTraces](https://docs.victoriametrics.com/victoriatraces/) accessed with [VLogsReader](#victorialogs-reader) with [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/) queries.
|
||||
|
||||
Future updates will introduce additional readers, expanding the range of data sources `vmanomaly` can work with.
|
||||
|
||||
## Playgrounds
|
||||
|
||||
To ease the development and testing of queries for `vmanomaly`'s input data, following playgrounds can be used for experimenting with MetricsQL and LogsQL queries:
|
||||
|
||||
Please see respective sections below for specific reader:
|
||||
- [MetricsQL playground](#metricsql-playground) for `VmReader`
|
||||
- [LogsQL playground](#logsql-playground) for `VLogsReader`
|
||||
|
||||
## VM reader
|
||||
|
||||
{{% collapse name="Queries format migration (to v1.13.0+)" %}}
|
||||
|
||||
> There is backward-compatible change{{% available_from "v1.13.0" anomaly %}} of [`queries`](https://docs.victoriametrics.com/anomaly-detection/components/reader/#vm-reader) arg of [VmReader](#vm-reader). New format allows to specify per-query parameters, like `step` to reduce amount of data read from VictoriaMetrics TSDB and to allow config flexibility. Please see [per-query parameters](#per-query-parameters) section for the details.
|
||||
|
||||
Old format like
|
||||
@@ -51,10 +60,11 @@ reader:
|
||||
tz: 'UTC' # by default, tz-free data is used throughout the model lifecycle
|
||||
# new query-level arguments will be added in backward-compatible way in future releases
|
||||
```
|
||||
{{% /collapse %}}
|
||||
|
||||
### Per-query parameters
|
||||
|
||||
There is change{{% available_from "v1.13.0" anomaly %}} of [`queries`](https://docs.victoriametrics.com/anomaly-detection/components/reader/#vm-reader) arg format. Now each query alias supports the next (sub)fields, which *override reader-level parameters*, if set:
|
||||
There is change {{% available_from "v1.13.0" anomaly %}} of [`queries`](https://docs.victoriametrics.com/anomaly-detection/components/reader/#vm-reader) arg format. Now each query alias supports the next (sub)fields, which *override reader-level parameters*, if set:
|
||||
|
||||
- `expr` (string): MetricsQL/PromQL expression that defines an input for VmReader. As accepted by `/query_range?query=%s`. i.e. `avg(vm_blocks)`
|
||||
|
||||
@@ -418,7 +428,8 @@ Optional argument{{% available_from "v1.25.3" anomaly %}} allows specifying a ti
|
||||
</table>
|
||||
|
||||
<br>
|
||||
Config section example:
|
||||
|
||||
**Config section example**:
|
||||
|
||||
```yaml
|
||||
reader:
|
||||
@@ -441,6 +452,35 @@ reader:
|
||||
latency_offset: '1ms'
|
||||
```
|
||||
|
||||
### MetricsQL Playground
|
||||
|
||||
To experiment with MetricsQL queries for `VmReader`, you can use the [VictoriaMetrics MetricsQL Playground](https://play.victoriametrics.com/), which provides an interactive environment to test and visualize your queries against sample data. You can also access embedded version of the playground below:
|
||||
|
||||
{{% collapse name="VictoriaMetrics Playground" %}}
|
||||
|
||||
<div class="position-relative mb-3">
|
||||
<button
|
||||
type="button"
|
||||
class="btn btn-primary btn-sm position-absolute top-0 end-0 m-2"
|
||||
style="z-index: 2;"
|
||||
onclick="document.getElementById('vmui-playground-metricsql')?.requestFullscreen?.()"
|
||||
>
|
||||
Fullscreen
|
||||
</button>
|
||||
|
||||
<iframe
|
||||
id="vmui-playground-metricsql"
|
||||
title="VictoriaMetrics MetricsQL Playground"
|
||||
allow="fullscreen"
|
||||
loading="lazy"
|
||||
class="w-100 border rounded"
|
||||
style="height: 400px; background: white;"
|
||||
src="https://play.victoriametrics.com/select/0/vmui/?#/?g0.range_input=24h&g0.end_input=2026-02-09T09%3A57%3A57&g0.relative_time=last_24_hours&g0.tab=0&g0.step_input=30m&g0.expr=sum%28%28rate%28node_cpu_seconds_total%7Bmode%21%3D%22idle%22%7D%5B5m%5D%29%29%29+by+%28service%29&legend_view=table&legend_hide_duplicates=true"
|
||||
></iframe>
|
||||
</div>
|
||||
|
||||
{{% /collapse %}}
|
||||
|
||||
### mTLS protection
|
||||
|
||||
`vmanomaly` supports [mutual TLS (mTLS)](https://en.wikipedia.org/wiki/Mutual_authentication){{% available_from "v1.16.3" anomaly %}} for secure communication across its components, including [VmReader](https://docs.victoriametrics.com/anomaly-detection/components/reader/#vm-reader), [VmWriter](https://docs.victoriametrics.com/anomaly-detection/components/writer/#vm-writer), and [Monitoring/Push](https://docs.victoriametrics.com/anomaly-detection/components/monitoring/#push-config-parameters). This allows for mutual authentication between the client and server when querying or writing data to [VictoriaMetrics Enterprise, configured for mTLS](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#mtls-protection).
|
||||
@@ -474,7 +514,6 @@ reader:
|
||||
# other config sections, like models, schedulers, writer, ...
|
||||
```
|
||||
|
||||
|
||||
### Healthcheck metrics
|
||||
|
||||
`VmReader` exposes [several healthchecks metrics](https://docs.victoriametrics.com/anomaly-detection/components/monitoring/#reader-behaviour-metrics).
|
||||
@@ -482,11 +521,11 @@ reader:
|
||||
|
||||
## VictoriaLogs reader
|
||||
|
||||
{{% available_from "v1.26.0" anomaly %}} `vmanomaly` adds support for reading data from [VictoriaLogs stats queries](https://docs.victoriametrics.com/victorialogs/querying/#querying-log-range-stats) endpoint with `VLogsReader`. This reader allows quering and analyzing log data stored in [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/), enabling anomaly detection on metrics generated from logs. **Querying [VictoriaTraces](https://docs.victoriametrics.com/victoriatraces/) is supported with the same reader, as the endpoints for both are equivalent.**
|
||||
{{% available_from "v1.26.0" anomaly %}} `vmanomaly` can read data from [VictoriaLogs stats queries](https://docs.victoriametrics.com/victorialogs/querying/#querying-log-range-stats) endpoint with `VLogsReader`. This reader allows quering and analyzing log data stored in [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/), enabling anomaly detection on metrics generated from logs. **Querying [VictoriaTraces](https://docs.victoriametrics.com/victoriatraces/) is supported with the same reader, as the endpoints for both are equivalent.**
|
||||
|
||||
Its queries should be expressed in a subset of [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/), which is similar to MetricsQL/PromQL but adapted for log data.
|
||||
Its queries should be expressed in [LogsQL*](https://docs.victoriametrics.com/victorialogs/logsql/) language that both VictoriaLogs and VictoriaTraces support, with the focus on using [stats pipe](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe) functions to calculate metrics from logs.
|
||||
|
||||
> Please be aware that `VLogsReader` is designed to work with a `/select/stats_query_range` endpoint of [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/), so the `<query>` expressions must contain `stats` [pipe](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe) (see [query-examples](#query-examples) section below). The calculated stats is converted into metrics with labels from `by(...)` clause of the `| stats by(...)` pipe, where `stats_func*` is any of the supported [stats function subset](#valid-stats-functions) of [available stats functions](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe-functions), while the `result_name*` is the name of the log field to store the result of the corresponding stats function. The `as` keyword is optional.
|
||||
> Please be aware that `VLogsReader` is designed to work with a `/select/stats_query_range` endpoint of [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/), so the `<query>` expressions must ends with `stats` [pipe](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe) (see [query-examples](#query-examples) section below). The calculated stats is converted into metrics with labels from `by(...)` clause of the `| stats by(...)` pipe, where `stats_func*` is any of the supported [stats function subset](#valid-stats-functions) of [available stats functions](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe-functions), while the `result_name*` is the name of the log field to store the result of the corresponding stats function. The `as` keyword is optional.
|
||||
|
||||
### Valid stats functions
|
||||
`VLogsReader` relies on [stats pipe functions](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe-functions) that return **numeric values**, which can be used for anomaly detection on timeseries (metrics). The future addition of similar stats functions in VictoriaLogs will be supported automatically, as long as they return **numeric values**.
|
||||
@@ -508,22 +547,110 @@ The supported stats functions currently include:
|
||||
|
||||
### Query Examples
|
||||
|
||||
> You can test your LogsQL queries with stats pipe functions using our [VictoriaLogs playground](https://play-vmlogs.victoriametrics.com/) or [VictoriaTraces playground](https://play-vtraces.victoriametrics.com/). Use either UI to access graphical results or the `/select/logsql/stats_query_range` endpoint to run your queries and see the raw results, e.g. as this [sample query](https://play-vmlogs.victoriametrics.com/select/logsql/stats_query_range?query=_time%3A5m%20%7C%20stats%20by%20%28_stream%29%20count%28%29%20as%20sample_row&step=1m).
|
||||
#### VictoriaLogs
|
||||
|
||||
Here are examples of simple valid LogsQL queries with stats pipe functions that can be used with `VLogsReader`.
|
||||
|
||||
The following query returns the average value for the duration field over logs matching the [filter](https://docs.victoriametrics.com/victorialogs/logsql/#filters) for `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word):
|
||||
Here are examples of simple [valid LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/) queries with stats pipe functions that can be used with `VLogsReader`.
|
||||
|
||||
1. Ingestion volume - good baseline time series, for detecting dropouts/spikes without depending on any schema.
|
||||
```shellhelp
|
||||
* | stats count() as logs
|
||||
```
|
||||
error | stats avg(duration) as avg_error_duration
|
||||
```
|
||||
|
||||
It is possible to calculate the average over fields with common prefix via `avg(prefix*)` syntax. For example, the following query calculates the number of logs with `foo` prefix having `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word):
|
||||
2. Ingestion rate (normalized) - good for detecting dropouts/spikes without depending on any schema, and also for detecting changes in log volume trends.
|
||||
```shellhelp
|
||||
* | stats rate() as logs_per_sec
|
||||
```
|
||||
|
||||
3. Per-stream rate - good for detecting dropouts/spikes on individual streams, and also for detecting changes in log volume trends on stream level.
|
||||
```shellhelp
|
||||
* | stats by (_stream) rate() as logs_per_sec
|
||||
```
|
||||
error | stats count(foo*) as foo_error_count
|
||||
|
||||
4. Active stream churn - good for detecting changes in the number of active streams. Catches "new sources exploded"/"sources disappeared" patterns.
|
||||
```shellhelp
|
||||
* | stats count_uniq(_stream) as active_streams
|
||||
```
|
||||
|
||||
5. Avg logs per stream - good for detecting changes in log volume trends on stream level, without depending on the number of streams (sources). Catches "new sources exploded"/"sources disappeared" patterns, as well as changes in log volume on stream level.
|
||||
```shellhelp
|
||||
* | stats count() as logs, count_uniq(_stream_id) as streams | math (logs / max(streams, 1)) as logs_per_stream
|
||||
```
|
||||
|
||||
6. Max message size - good for detecting changes in log message size patterns, which can be an indicator of changes in log structure or content.
|
||||
```shellhelp
|
||||
* | len(_msg) as msg_len | stats max(msg_len) as max_msg_len
|
||||
```
|
||||
|
||||
7. P90 word per message - good for detecting changes in the distribution of words per message, which can indicate changes in log content or structure.
|
||||
```shellhelp
|
||||
* | unpack_words as words drop_duplicates | json_array_len(words) as words_count | stats quantile(0.9, words_count) as p90_words_per_msg
|
||||
```
|
||||
|
||||
#### VictoriaTraces
|
||||
|
||||
> [!TIP]
|
||||
Almost identical to VictoriaLogs, but in VictoriaTraces you'll need to exclude index entries. Basically replace `*` with a filter, e.g. `{"resource_attr:service.name"!=""}` to select only spans with non-empty `service.name` resource attribute.
|
||||
|
||||
1. Ingestion volume - good baseline time series, for detecting dropouts/spikes without depending on any schema.
|
||||
```
|
||||
{"resource_attr:service.name"!=""} | stats count() as spans
|
||||
```
|
||||
or for the rate:
|
||||
```shellhelp
|
||||
{"resource_attr:service.name"!=""} | stats rate() as spans_per_sec
|
||||
```
|
||||
|
||||
2. Error volume - good for detecting dropouts/spikes in error spans, without depending on any schema. In VictoriaLogs you can use similar query with `status_code` field, if it exists in your logs.
|
||||
```
|
||||
# spans with `status_code=2`, see: https://github.com/open-telemetry/opentelemetry-proto/blob/main/opentelemetry/proto/trace/v1/trace.proto#L323
|
||||
{"resource_attr:service.name"!=""} AND status_code := "2" | stats count() as error_spans
|
||||
```
|
||||
or for the rate:
|
||||
```shellhelp
|
||||
{"resource_attr:service.name"!=""} AND status_code := "2" | stats rate() as error_spans_per_sec
|
||||
```
|
||||
|
||||
> [!TIP]
|
||||
[The stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) in VictoriaTraces means "service_name & span_name" combination.
|
||||
|
||||
```shellhelp
|
||||
{"resource_attr:service.name"!=""} | stats count_uniq(_stream) as active_streams
|
||||
```
|
||||
|
||||
### LogsQL playground
|
||||
|
||||
To experiment with LogsQL queries for `VLogsReader`, you can use the [VictoriaLogs LogsQL Playground](https://play-vmlogs.victoriametrics.com/), which provides an interactive environment to test and visualize your queries against sample log data.
|
||||
|
||||
Similarly, [VictoriaTraces LogsQL Playground](https://play-vtraces.victoriametrics.com/) can be used for testing LogsQL queries against sample trace data.
|
||||
|
||||
You can also access **embedded version of the playground below** (VictoriaLogs datasource):
|
||||
|
||||
{{% collapse name="VictoriaLogs LogsQL Playground" %}}
|
||||
|
||||
<div class="position-relative mb-3">
|
||||
<button
|
||||
type="button"
|
||||
class="btn btn-primary btn-sm position-absolute top-0 end-0 m-2"
|
||||
style="z-index: 2;"
|
||||
onclick="document.getElementById('vmui-playground-logsql')?.requestFullscreen?.()"
|
||||
>
|
||||
Fullscreen
|
||||
</button>
|
||||
|
||||
<iframe
|
||||
id="vmui-playground-logsql"
|
||||
title="VictoriaLogs LogsQL Playground"
|
||||
allow="fullscreen"
|
||||
loading="lazy"
|
||||
class="w-100 border rounded"
|
||||
style="height: 400px; background: white;"
|
||||
src="https://play-vmlogs.victoriametrics.com/select/vmui/?#/?query=*+%7C+stats+rate%28%29+as+logs_per_sec&g0.range_input=30m&g0.end_input=2026-02-09T10%3A01%3A26&g0.relative_time=last_30_minutes&graph_mode=stats&limit=100&bars_count=48"
|
||||
></iframe>
|
||||
</div>
|
||||
|
||||
{{% /collapse %}}
|
||||
|
||||
|
||||
### Config parameters
|
||||
|
||||
<table class="params">
|
||||
|
||||
@@ -10,9 +10,9 @@ sitemap:
|
||||
|
||||
- To use *vmanomaly*, part of the enterprise package, a license key is required. Obtain your key [here](https://victoriametrics.com/products/enterprise/trial/) for this tutorial or for enterprise use.
|
||||
- In the tutorial, we'll be using the following VictoriaMetrics components:
|
||||
- [VictoriaMetrics Single-Node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) (v1.134.0)
|
||||
- [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/) (v1.134.0)
|
||||
- [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) (v1.134.0)
|
||||
- [VictoriaMetrics Single-Node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) (v1.135.0)
|
||||
- [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/) (v1.135.0)
|
||||
- [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) (v1.135.0)
|
||||
- [Grafana](https://grafana.com/) (v.10.2.1)
|
||||
- [Docker](https://docs.docker.com/get-docker/) and [Docker Compose](https://docs.docker.com/compose/)
|
||||
- [Node exporter](https://github.com/prometheus/node_exporter#node-exporter) (v1.7.0) and [Alertmanager](https://prometheus.io/docs/alerting/latest/alertmanager/) (v0.27.0)
|
||||
@@ -323,7 +323,7 @@ Let's wrap it all up together into the `docker-compose.yml` file.
|
||||
services:
|
||||
vmagent:
|
||||
container_name: vmagent
|
||||
image: victoriametrics/vmagent:v1.134.0
|
||||
image: victoriametrics/vmagent:v1.135.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -340,7 +340,7 @@ services:
|
||||
|
||||
victoriametrics:
|
||||
container_name: victoriametrics
|
||||
image: victoriametrics/victoria-metrics:v1.134.0
|
||||
image: victoriametrics/victoria-metrics:v1.135.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
volumes:
|
||||
@@ -373,7 +373,7 @@ services:
|
||||
|
||||
vmalert:
|
||||
container_name: vmalert
|
||||
image: victoriametrics/vmalert:v1.134.0
|
||||
image: victoriametrics/vmalert:v1.135.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
|
||||
@@ -37,12 +37,6 @@ config:
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
- target_label: __address__
|
||||
replacement: kubernetes.default.svc:443
|
||||
- source_labels: [__meta_kubernetes_node_name]
|
||||
regex: (.+)
|
||||
target_label: __metrics_path__
|
||||
replacement: /api/v1/nodes/$1/proxy/metrics
|
||||
- job_name: "kubernetes-nodes-cadvisor"
|
||||
scheme: https
|
||||
tls_config:
|
||||
@@ -51,15 +45,12 @@ config:
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
metrics_path: /metrics/cadvisor
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
- target_label: __address__
|
||||
replacement: kubernetes.default.svc:443
|
||||
- source_labels: [__meta_kubernetes_node_name]
|
||||
regex: (.+)
|
||||
target_label: __metrics_path__
|
||||
replacement: /api/v1/nodes/$1/proxy/metrics/cadvisor
|
||||
- source_labels: [__metrics_path__]
|
||||
target_label: metrics_path
|
||||
metric_relabel_configs:
|
||||
- action: replace
|
||||
source_labels: [pod]
|
||||
|
||||
@@ -37,12 +37,6 @@ server:
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
- target_label: __address__
|
||||
replacement: kubernetes.default.svc:443
|
||||
- source_labels: [ __meta_kubernetes_node_name ]
|
||||
regex: (.+)
|
||||
target_label: __metrics_path__
|
||||
replacement: /api/v1/nodes/$1/proxy/metrics
|
||||
- job_name: "kubernetes-nodes-cadvisor"
|
||||
scheme: https
|
||||
tls_config:
|
||||
@@ -51,15 +45,12 @@ server:
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
metrics_path: /metrics/cadvisor
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
- target_label: __address__
|
||||
replacement: kubernetes.default.svc:443
|
||||
- source_labels: [ __meta_kubernetes_node_name ]
|
||||
regex: (.+)
|
||||
target_label: __metrics_path__
|
||||
replacement: /api/v1/nodes/$1/proxy/metrics/cadvisor
|
||||
- source_labels: [__metrics_path__]
|
||||
target_label: metrics_path
|
||||
metric_relabel_configs:
|
||||
- action: replace
|
||||
source_labels: [pod]
|
||||
@@ -78,4 +69,4 @@ server:
|
||||
source_labels: [id]
|
||||
regex: '^/system\.slice/(.+)\.service$'
|
||||
target_label: systemd_service_name
|
||||
replacement: '${1}'
|
||||
replacement: '${1}'
|
||||
|
||||
@@ -6,27 +6,26 @@ build:
|
||||
sitemap:
|
||||
disable: true
|
||||
---
|
||||
VictoriaMetrics and VictoriaLogs support ingestion [metrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#sending-data-via-opentelemetry)
|
||||
and [logs](https://docs.victoriametrics.com/victorialogs/data-ingestion/opentelemetry/) in OpenTelemetry format.
|
||||
This guide covers examples of using [opentelemetry-collector](https://opentelemetry.io/docs/collector/) and direct pushing of metrics and logs from the Go application.
|
||||
|
||||
This guide walks you through deploying VictoriaMetrics and VictoriaLogs on Kubernetes, and collecting [metrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#sending-data-via-opentelemetry) and [logs](https://docs.victoriametrics.com/victorialogs/data-ingestion/opentelemetry/) from a Go application either directly or via the [OpenTelemetry Collector](https://opentelemetry.io/docs/collector/).
|
||||
|
||||
## Pre-Requirements
|
||||
|
||||
* [kubernetes cluster](https://kubernetes.io/docs/tasks/tools/#kind)
|
||||
* [kubectl](https://kubernetes.io/docs/tasks/tools/#kubectl)
|
||||
* [helm](https://helm.sh/docs/intro/install/)
|
||||
- [Kubernetes cluster](https://kubernetes.io/docs/tasks/tools/#kind)
|
||||
- [kubectl](https://kubernetes.io/docs/tasks/tools/#kubectl)
|
||||
- [helm](https://helm.sh/docs/intro/install/)
|
||||
|
||||
## Installation
|
||||
|
||||
### VictoriaMetrics
|
||||
In order to collect metrics and logs, install the following components:
|
||||
|
||||
Install VictoriaMetrics helm repo:
|
||||
```sh
|
||||
helm repo add vm https://victoriametrics.github.io/helm-charts/
|
||||
helm repo update
|
||||
```
|
||||
- [VictoriaMetrics](#victoriametrics)
|
||||
- [VictoriaLogs](#victorialogs)
|
||||
|
||||
### VictoriaMetrics Installation {#victoriametrics}
|
||||
|
||||
Create a config file for the VictoriaMetrics chart. The following enables conversion of OpenTelemetry (OTEL) metric names into the Prometheus canonical format:
|
||||
|
||||
Add VictoriaMetrics chart values to convert OTEL metric names to Prometheus canonical format:
|
||||
```sh
|
||||
cat << EOF > vm-values.yaml
|
||||
server:
|
||||
@@ -35,19 +34,29 @@ server:
|
||||
EOF
|
||||
```
|
||||
|
||||
Install VictoriaMetrics single-server version:
|
||||
Install the VictoriaMetrics Helm repo:
|
||||
|
||||
```sh
|
||||
helm repo add vm https://victoriametrics.github.io/helm-charts/
|
||||
helm repo update
|
||||
```
|
||||
|
||||
Install the VictoriaMetrics single-server version:
|
||||
|
||||
```sh
|
||||
helm install victoria-metrics vm/victoria-metrics-single -f vm-values.yaml
|
||||
```
|
||||
|
||||
Verify it's up and running:
|
||||
|
||||
```sh
|
||||
kubectl get pods
|
||||
# NAME READY STATUS RESTARTS AGE
|
||||
# victoria-metrics-victoria-metrics-single-server-0 1/1 Running 0 3m1s
|
||||
```
|
||||
|
||||
VictoriaMetrics helm chart provides the following URL for writing data:
|
||||
The VictoriaMetrics Helm chart provides the following URL for writing data:
|
||||
|
||||
```text
|
||||
Write URL inside the kubernetes cluster:
|
||||
http://victoria-metrics-victoria-metrics-single-server.default.svc.cluster.local.:8428/<protocol-specific-write-endpoint>
|
||||
@@ -55,26 +64,30 @@ Write URL inside the kubernetes cluster:
|
||||
All supported write endpoints can be found at https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-time-series-data.
|
||||
```
|
||||
|
||||
For OpenTelemetry VictoriaMetrics write endpoint is:
|
||||
For OpenTelemetry, the VictoriaMetrics write endpoint is:
|
||||
|
||||
```text
|
||||
http://victoria-metrics-victoria-metrics-single-server.default.svc.cluster.local.:8428/opentelemetry/v1/metrics
|
||||
```
|
||||
|
||||
### VictoriaLogs
|
||||
### VictoriaLogs Installation {#victorialogs}
|
||||
|
||||
Install VictoriaLogs:
|
||||
|
||||
```sh
|
||||
helm install victoria-logs vm/victoria-logs-single
|
||||
```
|
||||
|
||||
Verify it's up and running:
|
||||
|
||||
```sh
|
||||
kubectl get pods
|
||||
# NAME READY STATUS RESTARTS AGE
|
||||
# victoria-logs-victoria-logs-single-server-0 1/1 Running 0 1m10s
|
||||
```
|
||||
|
||||
VictoriaLogs helm chart provides the following URL for writing data:
|
||||
The VictoriaLogs Helm chart provides the following URL for writing data:
|
||||
|
||||
```text
|
||||
Write URL inside the kubernetes cluster:
|
||||
http://victoria-logs-victoria-logs-single-server.default.svc.cluster.local.:9428/<protocol-specific-write-endpoint>
|
||||
@@ -82,23 +95,28 @@ Write URL inside the kubernetes cluster:
|
||||
All supported write endpoints can be found at https://docs.victoriametrics.com/victorialogs/data-ingestion/
|
||||
```
|
||||
|
||||
For OpenTelemetry VictoriaLogs write endpoint is:
|
||||
For OpenTelemetry, the VictoriaLogs write endpoint is:
|
||||
|
||||
```text
|
||||
http://victoria-logs-victoria-logs-single-server.default.svc.cluster.local.:9428/insert/opentelemetry/v1/logs
|
||||
```
|
||||
|
||||
## OpenTelemetry collector with VictoriaMetrics and VictoriaLogs
|
||||
## OpenTelemetry Collector with VictoriaMetrics and VictoriaLogs
|
||||
|
||||
The [OpenTelemetry Collector](https://opentelemetry.io/docs/collector/) can be configured to route incoming metrics and logs from applications to the VictoriaMetrics and VictoriaLogs services running in the Kubernetes cluster.
|
||||
|
||||

|
||||
{width="500"}
|
||||
|
||||
Add OpenTelemetry helm repo:
|
||||
Add the OpenTelemetry Collector Helm repo:
|
||||
|
||||
```sh
|
||||
helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts
|
||||
helm repo update
|
||||
```
|
||||
|
||||
Add OpenTelemetry Collector values:
|
||||
Create a config file for the OpenTelemetry Collector:
|
||||
|
||||
```sh
|
||||
cat << EOF > otel-values.yaml
|
||||
mode: deployment
|
||||
@@ -145,122 +163,157 @@ config:
|
||||
EOF
|
||||
```
|
||||
|
||||
Install OpenTelemetry Collector helm chart:
|
||||
Install the OpenTelemetry Collector:
|
||||
|
||||
```sh
|
||||
helm upgrade -i otel open-telemetry/opentelemetry-collector -f otel-values.yaml
|
||||
```
|
||||
|
||||
Check if OpenTelemetry Collector pod is up and running:
|
||||
Check that the OpenTelemetry Collector pod is up and running:
|
||||
|
||||
```sh
|
||||
kubectl get pod
|
||||
kubectl get pods
|
||||
# NAME READY STATUS RESTARTS AGE
|
||||
# otel-opentelemetry-collector-7467bbb559-2pq2n 1/1 Running 0 23m
|
||||
```
|
||||
|
||||
Forward VictoriaMetrics port to local machine to explore metrics ingested by the collector:
|
||||
## Confirm that metrics and logs are being ingested
|
||||
|
||||
To confirm metrics are being ingested by the Collector, port forward the VictoriaMetrics service:
|
||||
|
||||
```sh
|
||||
kubectl port-forward svc/victoria-metrics-victoria-metrics-single-server 8428
|
||||
```
|
||||
|
||||
Visit [http://localhost:8428/vmui/#/?g0.expr=k8s_container_ready](http://localhost:8428/vmui/#/?g0.expr=k8s_container_ready) to check if metric `k8s_container_ready` is present.
|
||||
Check other available metrics by visiting [cardinality explorer](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cardinality-explorer) page.
|
||||
Visit [http://localhost:8428/vmui/#/?g0.expr=k8s_container_ready&g0.tab=1](http://localhost:8428/vmui/#/?g0.expr=k8s_container_ready&g0.tab=1) to check if metric `k8s_container_ready` is present.
|
||||
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">VictoriaMetrics UI (VMUI) showing the <code>k8s_container_ready</code> metric</figcaption>
|
||||
|
||||
> [!NOTE] Tip
|
||||
> Use the [cardinality explorer](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cardinality-explorer) to inspect all available metrics.
|
||||
|
||||
To confirm that logs are being ingested by the Collector, port forward the VictoriaLogs service with the following command:
|
||||
|
||||
Forward VictoriaLogs port to local machine to explore logs ingested by the collector:
|
||||
```sh
|
||||
kubectl port-forward svc/victoria-logs-victoria-logs-single-server 9428
|
||||
```
|
||||
|
||||
Visit [http://localhost:9428/select/vmui](http://localhost:9428/select/vmui) to check if logs ingested by collector are present.
|
||||
Visit [http://localhost:9428/select/vmui](http://localhost:9428/select/vmui) to check if logs ingested by Collector are present.
|
||||
|
||||
The full version of possible configuration options for the collector can be found in [OpenTelemetry docs](https://opentelemetry.io/docs/collector/configuration/).
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">VMUI for VictoriaLogs showing ingested log entries</figcaption>
|
||||
|
||||
See the [OpenTelemetry docs](https://opentelemetry.io/docs/collector/configuration/) for all configuration options.
|
||||
|
||||
## Sending metrics and logs from Go application
|
||||
|
||||
Metrics and logs can be sent via OpenTelemetry instrumentation libraries. You can use any compatible OpenTelemetry
|
||||
instrumentation [clients](https://opentelemetry.io/docs/languages/).
|
||||
In our example, we'll create a WEB server in [Golang](https://go.dev/), instrument it with metrics and logs and configure
|
||||
it to send telemetry data to OpenTelemetry collector. The collector will then forward received data to
|
||||
VictoriaMetrics or VictoriaLogs.
|
||||
Metrics and logs can be sent via OpenTelemetry instrumentation libraries. You can use any compatible OpenTelemetry [instrumentation clients](https://opentelemetry.io/docs/languages/) in your application.
|
||||
|
||||
### Sending to OpenTelemetry collector
|
||||
In our example, we'll create a web server in [Go](https://go.dev/), with metrics and logs instrumented and sent over the OpenTelemetry Collector. The Collector then forwards the received data to either VictoriaMetrics or VictoriaLogs.
|
||||
|
||||
Create file `main.go` from [example](app.go-collector.example) that implements a dice roll WEB server instrumented with
|
||||
OpenTelemetry SDK and is configured to send data to OpenTelemetry collector at http://localhost:4318 address.
|
||||
See how to setup and run OpenTelemetry collector [here](#OpenTelemetry-collector-with-VictoriaMetrics-and-VictoriaLogs).
|
||||
### Sending to OpenTelemetry Collector
|
||||
|
||||
Download the [example code](app.go-collector.example) and rename it as `main.go`. The example code implements a dice roll web server that uses the OpenTelemetry SDK to send data to the OpenTelemetry Collector at `http://localhost:4318`.
|
||||
|
||||
> [!NOTE] Tip
|
||||
> See how to set up and run OpenTelemetry Collector [here](#opentelemetry-collector-with-victoriametrics-and-victorialogs).
|
||||
|
||||
First, port forward the OpenTelemetry Collector service in your cluster:
|
||||
|
||||
```sh
|
||||
kubectl port-forward svc/otel-opentelemetry-collector 4318
|
||||
```
|
||||
|
||||
Next, open a terminal in the same directory as the example code and execute the following commands:
|
||||
|
||||
In the same directory with the file create the `go.mod` file and execute following commands:
|
||||
```sh
|
||||
go mod init vm/otel
|
||||
go mod tidy
|
||||
```
|
||||
|
||||
Now try running the application:
|
||||
|
||||
```sh
|
||||
go run .
|
||||
```
|
||||
|
||||
By default, the application from example is listening at `http://localhost:8080`. Start sending requests
|
||||
to http://localhost:8080/rolldice endpoint to generate some metrics. The following command will send 20 requests:
|
||||
By default, the application in the example listens on `http://localhost:8080`. Start sending requests
|
||||
to the `http://localhost:8080/rolldice` endpoint to generate some metrics.
|
||||
|
||||
Run the following command to send 20 requests to the dice roll example application:
|
||||
|
||||
```sh
|
||||
for i in `seq 1 20`; do curl http://localhost:8080/rolldice; done
|
||||
```
|
||||
|
||||
After a few seconds you should start seeing metrics sent to VictoriaMetrics by visiting [http://localhost:8428/vmui/#/?g0.expr=dice_rolls_total](http://localhost:8428/vmui/#/?g0.expr=dice_rolls_total)
|
||||
After a few seconds, you should start seeing metrics sent to VictoriaMetrics by visiting [http://localhost:8428/vmui/#/?g0.expr=dice_rolls_total](http://localhost:8428/vmui/#/?g0.expr=dice_rolls_total)
|
||||
in your browser or by querying the metric `dice_rolls_total` in the UI interface.
|
||||

|
||||
|
||||
Logs should be available by visiting [http://localhost:9428/select/vmui](http://localhost:9428/select/vmui)
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">VMUI showing collected metrics for <code>dice_rolls_total</code></figcaption>
|
||||
|
||||
Logs should be available by visiting [http://localhost:9428/select/vmui](http://localhost:9428/select/vmui)
|
||||
using query `service.name: unknown_service:otel`.
|
||||

|
||||
|
||||
### Sending without OpenTelemetry collector
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">VMUI for VictoriaLogs showing collected logs for <code>service.name: unknown_service:otel</code></figcaption>
|
||||
|
||||
Metrics and logs can be ingested into VictoriaMetrics and VictoriaLogs directly via HTTP requests.
|
||||
Use any compatible OpenTelemetry instrumentation [clients](https://opentelemetry.io/docs/languages/).
|
||||
### Sending without OpenTelemetry Collector
|
||||
|
||||
You can send telemetry directly from your application to VictoriaMetrics and VictoriaLogs; the Collector is optional. You may use any OpenTelemetry [instrumentation client](https://opentelemetry.io/docs/languages/) to communicate directly with VictoriaMetrics and VictoriaLogs.
|
||||
|
||||

|
||||
{width="500"}
|
||||
|
||||
In our example, we'll create a WEB server in [Golang](https://go.dev/), instrument it with metrics and logs and configure
|
||||
it to send this telemetry data to VictoriaMetrics and VictoriaLogs.
|
||||
This time, we'll run a different web server, also in [Go](https://go.dev/) and instrumented with metrics and logs. This demo application sends telemetry data directly to VictoriaMetrics and VictoriaLogs services.
|
||||
|
||||
Download the [example code](app.go.example) and rename it as `main.go`. In the same directory, execute the following commands:
|
||||
|
||||
Create file `main.go` from [example](app.go.example). In the same directory with the file create the `go.mod` file and execute following commands:
|
||||
```sh
|
||||
go mod init vm/otel
|
||||
go mod tidy
|
||||
```
|
||||
|
||||
The example implements WEB server with two HTTP handlers: `/api/slow` and `/api/fast`. Start the application:
|
||||
The example implements a web server with two HTTP handlers: `/api/slow` and `/api/fast`. Start the application with:
|
||||
|
||||
```sh
|
||||
go run main.go
|
||||
2024/03/25 19:27:41 Starting web server...
|
||||
2024/03/25 19:27:41 web server started at localhost:8081.
|
||||
```
|
||||
|
||||
Make sure that VictoriaMetrics and VictoriaLogs are available locally at their default ports:
|
||||
Make sure that VictoriaMetrics and VictoriaLogs are available locally at their default ports. In a separate terminal, port forward the VictoriaMetrics and VictoriaLogs services:
|
||||
|
||||
```sh
|
||||
# port-forward victoriametrics to ingest metrics
|
||||
kubectl port-forward victoria-metrics-victoria-metrics-single-server-0 8428
|
||||
kubectl port-forward svc/victoria-metrics-victoria-metrics-single-server 8428
|
||||
# port-forward victorialogs to ingest logs
|
||||
kubectl port-forward victoria-logs-victoria-logs-single-server-0 9428
|
||||
kubectl port-forward svc/victoria-logs-victoria-logs-single-server 9428
|
||||
```
|
||||
|
||||
Visit application links [http://localhost:8081/api/fast](http://localhost:8081/api/fast) or [http://localhost:8081/api/slow](http://localhost:8081/api/slow)
|
||||
couple of times. The application will generate metrics and logs and will send them to VictoriaMetrics and VictoriaLogs.
|
||||
Generate a few HTTP requests to both routes so the application sends metrics and logs to VictoriaMetrics and VictoriaLogs.
|
||||
|
||||
After a few seconds you should start seeing metrics sent to VictoriaMetrics by visiting
|
||||
[http://localhost:8428/vmui/#/?g0.expr=http_requests_total](http://localhost:8428/vmui/#/?g0.expr=http_requests_total).
|
||||
```sh
|
||||
for i in `seq 1 20`; do curl http://localhost:8081/api/fast; done
|
||||
for i in `seq 1 5`; do curl http://localhost:8081/api/slow; done
|
||||
```
|
||||
|
||||

|
||||
After a few seconds, you should start seeing metrics sent to VictoriaMetrics by visiting [http://localhost:8428/vmui/#/?g0.expr=http_requests_total](http://localhost:8428/vmui/#/?g0.expr=http_requests_total).
|
||||
|
||||
Check other available metrics by visiting [cardinality explorer](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cardinality-explorer) page.
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">VMUI showing metrics for <code>http_requests_total</code></figcaption>
|
||||
|
||||
> [!NOTE] Tip
|
||||
> Check other available metrics by visiting the [cardinality explorer](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cardinality-explorer) page.
|
||||
|
||||
Logs should be available by visiting [http://localhost:9428/select/vmui](http://localhost:9428/select/vmui)
|
||||
using query `service.name: unknown_service:otel`.
|
||||
|
||||

|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">VMUI for VictoriaLogs showing logs for <code>service.name: unknown_service:otel</code></figcaption>
|
||||
|
||||
## Limitations
|
||||
|
||||
* VictoriaMetrics and VictoriaLogs do not support experimental JSON encoding [format](https://github.com/open-telemetry/opentelemetry-proto/blob/main/examples/metrics.json).
|
||||
* VictoriaMetrics supports only `AggregationTemporalityCumulative` type for [histogram](https://opentelemetry.io/docs/specs/otel/metrics/data-model/#histogram) and [summary](https://opentelemetry.io/docs/specs/otel/metrics/data-model/#summary-legacy). Either consider using cumulative temporality or try [`delta-to-cumulative processor`](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/deltatocumulativeprocessor) to make conversion to cumulative temporality in OTEL Collector.
|
||||
- VictoriaMetrics and VictoriaLogs do not support experimental JSON encoding [format](https://github.com/open-telemetry/opentelemetry-proto/blob/main/examples/metrics.json).
|
||||
- VictoriaMetrics supports only the `AggregationTemporalityCumulative` type for [histogram](https://opentelemetry.io/docs/specs/otel/metrics/data-model/#histogram) and [summary](https://opentelemetry.io/docs/specs/otel/metrics/data-model/#summary-legacy). Either consider using cumulative temporality or use the [`delta-to-cumulative processor`](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/deltatocumulativeprocessor) to convert to cumulative temporality in OpenTelemetry Collector.
|
||||
|
||||
@@ -28,7 +28,7 @@ import (
|
||||
"go.opentelemetry.io/otel/sdk/metric/metricdata"
|
||||
"go.opentelemetry.io/otel/sdk/resource"
|
||||
"go.opentelemetry.io/otel/sdk/trace"
|
||||
semconv "go.opentelemetry.io/otel/semconv/v1.26.0"
|
||||
semconv "go.opentelemetry.io/otel/semconv/v1.37.0"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
|
After Width: | Height: | Size: 412 KiB |
BIN
docs/guides/getting-started-with-opentelemetry/vmui-vlogs.webp
Normal file
|
After Width: | Height: | Size: 577 KiB |
@@ -249,27 +249,27 @@ services:
|
||||
- grafana_data:/var/lib/grafana/
|
||||
|
||||
vmsingle:
|
||||
image: victoriametrics/victoria-metrics:v1.134.0
|
||||
image: victoriametrics/victoria-metrics:v1.135.0
|
||||
command:
|
||||
- -httpListenAddr=0.0.0.0:8429
|
||||
|
||||
vmstorage:
|
||||
image: victoriametrics/vmstorage:v1.134.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.135.0-cluster
|
||||
|
||||
vminsert:
|
||||
image: victoriametrics/vminsert:v1.134.0-cluster
|
||||
image: victoriametrics/vminsert:v1.135.0-cluster
|
||||
command:
|
||||
- -storageNode=vmstorage:8400
|
||||
- -httpListenAddr=0.0.0.0:8480
|
||||
|
||||
vmselect:
|
||||
image: victoriametrics/vmselect:v1.134.0-cluster
|
||||
image: victoriametrics/vmselect:v1.135.0-cluster
|
||||
command:
|
||||
- -storageNode=vmstorage:8401
|
||||
- -httpListenAddr=0.0.0.0:8481
|
||||
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.134.0
|
||||
image: victoriametrics/vmagent:v1.135.0
|
||||
volumes:
|
||||
- ./scrape.yaml:/etc/vmagent/config.yaml
|
||||
command:
|
||||
@@ -278,7 +278,7 @@ services:
|
||||
- -remoteWrite.url=http://vmsingle:8429/api/v1/write
|
||||
|
||||
vmgateway-cluster:
|
||||
image: victoriametrics/vmgateway:v1.134.0-enterprise
|
||||
image: victoriametrics/vmgateway:v1.135.0-enterprise
|
||||
ports:
|
||||
- 8431:8431
|
||||
volumes:
|
||||
@@ -294,7 +294,7 @@ services:
|
||||
- -auth.oidcDiscoveryEndpoints=http://keycloak:8080/realms/master/.well-known/openid-configuration
|
||||
|
||||
vmgateway-single:
|
||||
image: victoriametrics/vmgateway:v1.134.0-enterprise
|
||||
image: victoriametrics/vmgateway:v1.135.0-enterprise
|
||||
ports:
|
||||
- 8432:8431
|
||||
volumes:
|
||||
@@ -405,7 +405,7 @@ Once iDP configuration is done, vmagent configuration needs to be updated to use
|
||||
|
||||
```yaml
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.134.0
|
||||
image: victoriametrics/vmagent:v1.135.0
|
||||
volumes:
|
||||
- ./scrape.yaml:/etc/vmagent/config.yaml
|
||||
- ./vmagent-client-secret:/etc/vmagent/oauth2-client-secret
|
||||
|
||||
@@ -187,12 +187,6 @@ scrape_configs:
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
- target_label: __address__
|
||||
replacement: kubernetes.default.svc:443
|
||||
- source_labels: [__meta_kubernetes_node_name]
|
||||
regex: (.+)
|
||||
target_label: __metrics_path__
|
||||
replacement: /api/v1/nodes/$1/proxy/metrics
|
||||
- job_name: "kubernetes-nodes-cadvisor"
|
||||
scheme: https
|
||||
tls_config:
|
||||
@@ -201,15 +195,12 @@ scrape_configs:
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
metrics_path: /metrics/cadvisor
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
- target_label: __address__
|
||||
replacement: kubernetes.default.svc:443
|
||||
- source_labels: [__meta_kubernetes_node_name]
|
||||
regex: (.+)
|
||||
target_label: __metrics_path__
|
||||
replacement: /api/v1/nodes/$1/proxy/metrics/cadvisor
|
||||
- source_labels: [__metrics_path__]
|
||||
target_label: metrics_path
|
||||
metric_relabel_configs:
|
||||
- action: replace
|
||||
source_labels: [pod]
|
||||
|
||||
@@ -213,12 +213,6 @@ config:
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
- target_label: __address__
|
||||
replacement: kubernetes.default.svc:443
|
||||
- source_labels: [__meta_kubernetes_node_name]
|
||||
regex: (.+)
|
||||
target_label: __metrics_path__
|
||||
replacement: /api/v1/nodes/$1/proxy/metrics
|
||||
- job_name: "kubernetes-nodes-cadvisor"
|
||||
scheme: https
|
||||
tls_config:
|
||||
@@ -227,15 +221,12 @@ config:
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
metrics_path: /metrics/cadvisor
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
- target_label: __address__
|
||||
replacement: kubernetes.default.svc:443
|
||||
- source_labels: [__meta_kubernetes_node_name]
|
||||
regex: (.+)
|
||||
target_label: __metrics_path__
|
||||
replacement: /api/v1/nodes/$1/proxy/metrics/cadvisor
|
||||
- source_labels: [__metrics_path__]
|
||||
target_label: metrics_path
|
||||
metric_relabel_configs:
|
||||
- action: replace
|
||||
source_labels: [pod]
|
||||
|
||||
@@ -6,172 +6,108 @@ build:
|
||||
sitemap:
|
||||
disable: true
|
||||
---
|
||||
**This guide covers:**
|
||||
|
||||
* The setup of a [VictoriaMetrics Single](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) in [Kubernetes](https://kubernetes.io/) via Helm charts
|
||||
* How to scrape metrics from k8s components using service discovery
|
||||
* How to visualize stored data
|
||||
* How to store metrics in [VictoriaMetrics](https://victoriametrics.com) tsdb
|
||||
This guide walks you through deploying a [single-node version of VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) on Kubernetes using Helm.
|
||||
|
||||
At the end of this guide, you will know:
|
||||
|
||||
- How to install VictoriaMetrics single node in Kubernetes.
|
||||
- How to scrape metrics from Kubernetes components using service discovery.
|
||||
- How to store metrics in [VictoriaMetrics](https://victoriametrics.com) time series database.
|
||||
- How to visualize stored data with Grafana.
|
||||
|
||||
**Precondition**
|
||||
|
||||
We will use:
|
||||
* [Kubernetes cluster 1.31.1-gke.1678000](https://cloud.google.com/kubernetes-engine)
|
||||
> We use GKE cluster from [GCP](https://cloud.google.com/) but this guide is also applied on any Kubernetes cluster. For example [Amazon EKS](https://aws.amazon.com/ru/eks/).
|
||||
* [Helm 3.14+](https://helm.sh/docs/intro/install)
|
||||
* [kubectl 1.31](https://kubernetes.io/docs/tasks/tools/install-kubectl)
|
||||
|
||||
- [Kubernetes cluster 1.34](https://cloud.google.com/kubernetes-engine)
|
||||
- [Helm 4.1.0+](https://helm.sh/docs/intro/install)
|
||||
- [kubectl 1.34.3](https://kubernetes.io/docs/tasks/tools/install-kubectl)
|
||||
|
||||
> We use a GKE cluster from [GCP](https://cloud.google.com/), but this guide also applies to any Kubernetes cluster. For example, [Amazon EKS](https://aws.amazon.com/ru/eks/) or an on-premises cluster.
|
||||
|
||||

|
||||
|
||||
## 1. VictoriaMetrics Helm repository
|
||||
|
||||
You need to add the VictoriaMetrics Helm repository to install VictoriaMetrics components. We’re going to use [VictoriaMetrics Single](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/). You can do this by running the following command:
|
||||
|
||||
Run the following command to add the VictoriaMetrics Helm repository:
|
||||
|
||||
```shell
|
||||
helm repo add vm https://victoriametrics.github.io/helm-charts/
|
||||
```
|
||||
|
||||
|
||||
Update Helm repositories:
|
||||
|
||||
```shell
|
||||
helm repo update
|
||||
```
|
||||
|
||||
To verify that everything is set up correctly you may run this command:
|
||||
To verify that everything is set up correctly, you may run this command:
|
||||
|
||||
```shell
|
||||
helm search repo vm/
|
||||
```
|
||||
|
||||
The expected output is:
|
||||
You should get a list of charts similar to this:
|
||||
|
||||
```text
|
||||
NAME CHART VERSION APP VERSION DESCRIPTION
|
||||
vm/victoria-logs-single 0.9.3 v1.16.0 Victoria Logs Single version - high-performance...
|
||||
vm/victoria-metrics-agent 0.17.2 v1.113.0 Victoria Metrics Agent - collects metrics from ...
|
||||
vm/victoria-metrics-alert 0.15.0 v1.113.0 Victoria Metrics Alert - executes a list of giv...
|
||||
vm/victoria-metrics-anomaly 1.9.0 v1.21.0 Victoria Metrics Anomaly Detection - a service ...
|
||||
vm/victoria-metrics-auth 0.10.0 v1.113.0 Victoria Metrics Auth - is a simple auth proxy ...
|
||||
vm/victoria-metrics-cluster 0.19.2 v1.113.0 Victoria Metrics Cluster version - high-perform...
|
||||
vm/victoria-metrics-common 0.0.42 Victoria Metrics Common - contains shared templ...
|
||||
vm/victoria-metrics-distributed 0.9.0 v1.113.0 A Helm chart for Running VMCluster on Multiple ...
|
||||
vm/victoria-metrics-gateway 0.8.0 v1.113.0 Victoria Metrics Gateway - Auth & Rate-Limittin...
|
||||
vm/victoria-metrics-k8s-stack 0.39.0 v1.113.0 Kubernetes monitoring on VictoriaMetrics stack....
|
||||
vm/victoria-metrics-operator 0.43.0 v0.54.1 Victoria Metrics Operator
|
||||
vm/victoria-metrics-single 0.15.1 v1.113.0 Victoria Metrics Single version - high-performa...
|
||||
NAME CHART VERSION APP VERSION DESCRIPTION
|
||||
vm/victoria-metrics-single 0.29.0 v1.134.0 VictoriaMetrics Single version - high-performan...
|
||||
vm/victoria-metrics-agent 0.30.0 v1.134.0 VictoriaMetrics Agent - collects metrics from v...
|
||||
vm/victoria-metrics-alert 0.30.0 v1.134.0 VictoriaMetrics Alert - executes a list of give...
|
||||
vm/victoria-metrics-anomaly 1.12.9 v1.28.2 VictoriaMetrics Anomaly Detection - a service t...
|
||||
...(list continues)...
|
||||
```
|
||||
|
||||
## 2. Install [VictoriaMetrics single](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) from Helm Chart
|
||||
|
||||
## 2. Install [VictoriaMetrics Single](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) from Helm Chart
|
||||
Run this command in your terminal to install [VictoriaMetrics single node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) to the default [namespace](https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/) in your cluster:
|
||||
|
||||
Run this command in your terminal:
|
||||
|
||||
```text
|
||||
```shell
|
||||
helm install vmsingle vm/victoria-metrics-single -f https://docs.victoriametrics.com/guides/examples/guide-vmsingle-values.yaml
|
||||
```
|
||||
|
||||
Here is full file content `guide-vmsingle-values.yaml`
|
||||
Below are the key sections in the chart values file [`guide-vmsingle-values.yaml`](https://docs.victoriametrics.com/guides/examples/guide-vmsingle-values.yaml):
|
||||
|
||||
```yaml
|
||||
server:
|
||||
scrape:
|
||||
enabled: true
|
||||
configMap: ""
|
||||
config:
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
scrape_configs:
|
||||
- job_name: victoriametrics
|
||||
static_configs:
|
||||
- targets: [ "localhost:8428" ]
|
||||
- job_name: "kubernetes-apiservers"
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
relabel_configs:
|
||||
- source_labels:
|
||||
[
|
||||
__meta_kubernetes_namespace,
|
||||
__meta_kubernetes_service_name,
|
||||
__meta_kubernetes_endpoint_port_name,
|
||||
]
|
||||
action: keep
|
||||
regex: default;kubernetes;https
|
||||
- job_name: "kubernetes-nodes"
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
- target_label: __address__
|
||||
replacement: kubernetes.default.svc:443
|
||||
- source_labels: [ __meta_kubernetes_node_name ]
|
||||
regex: (.+)
|
||||
target_label: __metrics_path__
|
||||
replacement: /api/v1/nodes/$1/proxy/metrics
|
||||
- job_name: "kubernetes-nodes-cadvisor"
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
- target_label: __address__
|
||||
replacement: kubernetes.default.svc:443
|
||||
- source_labels: [ __meta_kubernetes_node_name ]
|
||||
regex: (.+)
|
||||
target_label: __metrics_path__
|
||||
replacement: /api/v1/nodes/$1/proxy/metrics/cadvisor
|
||||
metric_relabel_configs:
|
||||
- action: replace
|
||||
source_labels: [pod]
|
||||
regex: '(.+)'
|
||||
target_label: pod_name
|
||||
replacement: '${1}'
|
||||
- action: replace
|
||||
source_labels: [container]
|
||||
regex: '(.+)'
|
||||
target_label: container_name
|
||||
replacement: '${1}'
|
||||
- action: replace
|
||||
target_label: name
|
||||
replacement: k8s_stub
|
||||
- action: replace
|
||||
source_labels: [id]
|
||||
regex: '^/system\.slice/(.+)\.service$'
|
||||
target_label: systemd_service_name
|
||||
replacement: '${1}'
|
||||
```
|
||||
- With `scrape: enabled: true`, we enable metric autodiscovery for the Kubernetes cluster.
|
||||
|
||||
```yaml
|
||||
server:
|
||||
scrape:
|
||||
enabled: true
|
||||
...
|
||||
```
|
||||
|
||||
* By running `helm install vmsingle vm/victoria-metrics-single` we install [VictoriaMetrics Single](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) to default [namespace](https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/) inside your cluster
|
||||
* By adding `scrape: enabled: true` we add and enable autodiscovery scraping from kubernetes cluster to [VictoriaMetrics Single](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/)
|
||||
* On line 67 from [https://docs.victoriametrics.com/guides/examples/guide-vmsingle-values.yaml](https://docs.victoriametrics.com/guides/examples/guide-vmsingle-values.yaml) we added `metric_relabel_configs` section that will help us to show Kubernetes metrics on Grafana dashboard.
|
||||
- The `metric_relabel_configs` section normalizes Kubernetes metrics labels so they are shown correctly in the Grafana dashboard later on.
|
||||
|
||||
```yaml
|
||||
...
|
||||
metric_relabel_configs:
|
||||
- action: replace
|
||||
source_labels: [pod]
|
||||
regex: '(.+)'
|
||||
target_label: pod_name
|
||||
replacement: '${1}'
|
||||
- action: replace
|
||||
source_labels: [container]
|
||||
regex: '(.+)'
|
||||
target_label: container_name
|
||||
replacement: '${1}'
|
||||
- action: replace
|
||||
target_label: name
|
||||
replacement: k8s_stub
|
||||
- action: replace
|
||||
source_labels: [id]
|
||||
regex: '^/system\.slice/(.+)\.service$'
|
||||
target_label: systemd_service_name
|
||||
replacement: '${1}'
|
||||
...
|
||||
```
|
||||
|
||||
As a result of the command you will see the following output:
|
||||
The `helm install vmsingle vm/victoria-metrics-single` command should result in the following output:
|
||||
|
||||
```text
|
||||
NAME: vmsingle
|
||||
LAST DEPLOYED: Fri Mar 21 11:50:39 2025
|
||||
LAST DEPLOYED: Wed Jan 28 13:04:36 2026
|
||||
NAMESPACE: default
|
||||
STATUS: deployed
|
||||
REVISION: 1
|
||||
DESCRIPTION: Install complete
|
||||
TEST SUITE: None
|
||||
NOTES:
|
||||
The VictoriaMetrics write api can be accessed via port 8428 on the following DNS name from within your cluster:
|
||||
@@ -182,12 +118,12 @@ Metrics Ingestion:
|
||||
export POD_NAME=$(kubectl get pods --namespace default -l "app=" -o jsonpath="{.items[0].metadata.name}")
|
||||
kubectl --namespace default port-forward $POD_NAME 8428
|
||||
|
||||
Write URL inside the kubernetes cluster:
|
||||
Write the URL inside the Kubernetes cluster:
|
||||
http://vmsingle-victoria-metrics-single-server.default.svc.cluster.local.:8428/<protocol-specific-write-endpoint>
|
||||
|
||||
All supported write endpoints can be found at https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-time-series-data
|
||||
|
||||
E.g: for Prometheus:
|
||||
E.g, for Prometheus:
|
||||
http://vmsingle-victoria-metrics-single-server.default.svc.cluster.local.:8428/api/v1/write
|
||||
|
||||
Metrics Scrape:
|
||||
@@ -199,7 +135,7 @@ Metrics Scrape:
|
||||
Inside cluster:
|
||||
http://vmsingle-victoria-metrics-single-server.default.svc.cluster.local.:8428/targets
|
||||
Outside cluster:
|
||||
You need to port-forward service (see instructions above) and call
|
||||
You need to port-forward the service (see instructions above) and call
|
||||
http://<service-host-port>/targets
|
||||
|
||||
Read Data:
|
||||
@@ -207,38 +143,43 @@ Read Data:
|
||||
http://vmsingle-victoria-metrics-single-server.default.svc.cluster.local.:8428
|
||||
```
|
||||
|
||||
For us it’s important to remember the url for the datasource (copy lines from output).
|
||||
Take note of the Grafana datasource URL near the end of the output, as we'll use it in the next step. In the example above, this is the datasource URL:
|
||||
|
||||
Verify that VictoriaMetrics pod is up and running by executing the following command:
|
||||
```text
|
||||
The following URL can be used as the datasource URL in Grafana::
|
||||
http://vmsingle-victoria-metrics-single-server.default.svc.cluster.local.:8428
|
||||
|
||||
```
|
||||
|
||||
Verify that the VictoriaMetrics pod is up and running by executing the following command:
|
||||
|
||||
```shell
|
||||
kubectl get pods
|
||||
```
|
||||
|
||||
The expected output is:
|
||||
Wait until the STATUS is Running. The expected output is:
|
||||
|
||||
```text
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
vmsingle-victoria-metrics-single-server-0 1/1 Running 0 68s
|
||||
```
|
||||
|
||||
|
||||
## 3. Install and connect Grafana to VictoriaMetrics with Helm
|
||||
|
||||
Add the Grafana Helm repository.
|
||||
|
||||
Add the Grafana Helm repository.
|
||||
|
||||
```shell
|
||||
helm repo add grafana https://grafana.github.io/helm-charts
|
||||
helm repo add grafana-community https://grafana-community.github.io/helm-charts
|
||||
helm repo update
|
||||
```
|
||||
|
||||
> [!NOTE] Tip
|
||||
> See more information on Grafana in [ArtifactHUB](https://artifacthub.io/packages/helm/grafana-community/grafana)
|
||||
|
||||
By installing the Chart with the release name `my-grafana`, you add the VictoriaMetrics datasource with official dashboard and kubernetes dashboard:
|
||||
Create a config file for the Grafana service. Ensure that the `url` value matches the Grafana datasource URL from the previous step:
|
||||
|
||||
```yaml
|
||||
cat <<EOF | helm install my-grafana grafana/grafana -f -
|
||||
cat <<EOF > grafana-single-values.yml
|
||||
datasources:
|
||||
datasources.yaml:
|
||||
apiVersion: 1
|
||||
@@ -246,7 +187,8 @@ cat <<EOF | helm install my-grafana grafana/grafana -f -
|
||||
- name: victoriametrics
|
||||
type: prometheus
|
||||
orgId: 1
|
||||
url: http://vmsingle-victoria-metrics-single-server.default.svc.cluster.local:8428
|
||||
# use the URL obtained from the VictoriaMetrics helm install output
|
||||
url: http://vmsingle-victoria-metrics-single-server.default.svc.cluster.local.:8428
|
||||
access: proxy
|
||||
isDefault: true
|
||||
updateIntervalSeconds: 10
|
||||
@@ -269,31 +211,71 @@ cat <<EOF | helm install my-grafana grafana/grafana -f -
|
||||
default:
|
||||
victoriametrics:
|
||||
gnetId: 10229
|
||||
revision: 22
|
||||
datasource: victoriametrics
|
||||
kubernetes:
|
||||
gnetId: 14205
|
||||
revision: 1
|
||||
datasource: victoriametrics
|
||||
EOF
|
||||
```
|
||||
|
||||
Run the following command to install Grafana with the release name `my-grafana`:
|
||||
|
||||
By running this command we:
|
||||
* Install Grafana from Helm repository.
|
||||
* Provision VictoriaMetrics datasource with the url from the output above which we copied before.
|
||||
* Add [this dashboard](https://grafana.com/grafana/dashboards/10229) for VictoriaMetrics.
|
||||
* Add [this dashboard](https://grafana.com/grafana/dashboards/14205) to see Kubernetes cluster metrics.
|
||||
```shell
|
||||
helm install my-grafana grafana-community/grafana -f grafana-single-values.yml
|
||||
```
|
||||
|
||||
By running this command, we:
|
||||
|
||||
- Install Grafana from the Helm repository.
|
||||
- Configure Grafana to use the VictoriaMetrics datasource URL.
|
||||
- Add two starter dashboards:
|
||||
- [Kubernetes Cluster Monitoring (via Prometheus)](https://grafana.com/grafana/dashboards/14205-kubernetes-cluster-monitoring-via-prometheus/) to show the Kubernetes Cluster metrics.
|
||||
- [VictoriaMetrics - single-node](https://grafana.com/grafana/dashboards/10229-victoriametrics-single-node/) for VictoriaMetrics telemetry ingestion monitoring.
|
||||
|
||||
Check the output log in your terminal. You should see the following output:
|
||||
|
||||
```text
|
||||
NAME: my-grafana
|
||||
LAST DEPLOYED: Wed Jan 28 13:12:51 2026
|
||||
NAMESPACE: default
|
||||
STATUS: deployed
|
||||
REVISION: 1
|
||||
DESCRIPTION: Install complete
|
||||
NOTES:
|
||||
1. Get your 'admin' user password by running:
|
||||
|
||||
kubectl get secret --namespace default my-grafana -o jsonpath="{.data.admin-password}" | base64 --decode ; echo
|
||||
|
||||
|
||||
Check the output log in your terminal.
|
||||
To see the password for Grafana `admin` user use the following command:
|
||||
2. The Grafana server can be accessed via port 80 on the following DNS name from within your cluster:
|
||||
|
||||
my-grafana.default.svc.cluster.local
|
||||
|
||||
Get the Grafana URL to visit by running these commands in the same shell:
|
||||
export POD_NAME=$(kubectl get pods --namespace default -l "app.kubernetes.io/name=grafana,app.kubernetes.io/instance=my-grafana" -o jsonpath="{.items[0].metadata.name}")
|
||||
kubectl --namespace default port-forward $POD_NAME 3000
|
||||
|
||||
3. Login with the password from step 1 and the username: admin
|
||||
#################################################################################
|
||||
###### WARNING: Persistence is disabled!!! You will lose your data when #####
|
||||
###### the Grafana pod is terminated. #####
|
||||
#################################################################################
|
||||
```
|
||||
|
||||
To see the password for Grafana `admin` user use the command shown in the previous output:
|
||||
|
||||
```shell
|
||||
kubectl get secret --namespace default my-grafana -o jsonpath="{.data.admin-password}" | base64 --decode ; echo
|
||||
```
|
||||
|
||||
Expose Grafana service on `127.0.0.1:3000`:
|
||||
Wait until the Grafana pod Status is Running:
|
||||
|
||||
```text
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
my-grafana-bc7796cf5-ffmln 1/1 Running 0 8m40s
|
||||
```
|
||||
|
||||
Expose the Grafana service on `127.0.0.1:3000` with:
|
||||
|
||||
```shell
|
||||
export POD_NAME=$(kubectl get pods --namespace default -l "app.kubernetes.io/name=grafana,app.kubernetes.io/instance=my-grafana" -o jsonpath="{.items[0].metadata.name}")
|
||||
@@ -301,25 +283,39 @@ export POD_NAME=$(kubectl get pods --namespace default -l "app.kubernetes.io/nam
|
||||
kubectl --namespace default port-forward $POD_NAME 3000
|
||||
```
|
||||
|
||||
Now Grafana should be accessible on the `http://127.0.0.1:3000` address.
|
||||
Now Grafana should be accessible at `http://127.0.0.1:3000`.
|
||||
|
||||
## 4. View the dashboards in your browser
|
||||
|
||||
## 4. Check the obtained result in your browser
|
||||
To check that VictoriaMetrics has collected metrics from the Kubernetes cluster, open the browser to `http://127.0.0.1:3000/dashboards` and choose the `Kubernetes Cluster Monitoring (via Prometheus)` dashboard.
|
||||
|
||||
To check that VictoriaMetrics has collects metrics from the k8s cluster open in browser `http://127.0.0.1:3000/dashboards` and choose `Kubernetes Cluster Monitoring (via Prometheus)` dashboard. Use `admin` for login and `password` that you previously obtained from kubectl.
|
||||
Use `admin` as the username and the password you obtained earlier using `kubectl get secret ...`.
|
||||
|
||||

|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">List of pre-installed dashboards in Grafana</figcaption>
|
||||
|
||||
You will see something like this:
|
||||
You should see the metrics for your Kubernetes dashboard:
|
||||
|
||||

|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">Grafana dashboard showing the Kubernetes cluster metrics</figcaption>
|
||||
|
||||
VictoriaMetrics dashboard also available to use:
|
||||
The VictoriaMetrics dashboard shows metrics on telemetry ingestion and resource utilization:
|
||||
|
||||

|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">Grafana dashboard for the VictoriaMetrics single-node service</figcaption>
|
||||
|
||||
## 5. Final thoughts
|
||||
|
||||
* We have set up TimeSeries Database for your k8s cluster.
|
||||
* Collected metrics from all running pods,nodes, … and store them in VictoriaMetrics database.
|
||||
* Visualize resources used in Kubernetes cluster by Grafana dashboards.
|
||||
- You now have a time series database for your Kubernetes cluster.
|
||||
- VictoriaMetrics continuously collects and stores metrics from all running pods and nodes.
|
||||
- Grafana dashboards give you a visual view of cluster resources.
|
||||
|
||||
Consider reading these resources to complete your setup:
|
||||
|
||||
- VictoriaMetrics
|
||||
- [Learn more about the single-node version](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/)
|
||||
- [Migrate existing metric data into VictoriaMetrics with vmctl](https://docs.victoriametrics.com/victoriametrics/vmctl/)
|
||||
- [Setup alerts](https://docs.victoriametrics.com/victoriametrics/vmalert/)
|
||||
- Grafana
|
||||
- [Enable persistent storage](https://grafana.com/docs/grafana/latest/setup-grafana/installation/helm/#enable-persistent-storage-recommended)
|
||||
- [Configure private TLS authority](https://grafana.com/docs/grafana/latest/setup-grafana/installation/helm/#configure-a-private-ca-certificate-authority)
|
||||
|
||||
|
Before Width: | Height: | Size: 18 KiB After Width: | Height: | Size: 215 KiB |
|
Before Width: | Height: | Size: 66 KiB After Width: | Height: | Size: 603 KiB |
|
Before Width: | Height: | Size: 53 KiB After Width: | Height: | Size: 650 KiB |