Compare commits
81 Commits
v1.136.0
...
roaring-bi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1f1c619abb | ||
|
|
217d116c2c | ||
|
|
449d4ff1a1 | ||
|
|
6128134e84 | ||
|
|
d467faf739 | ||
|
|
673b2ca7db | ||
|
|
40ccf0c333 | ||
|
|
fe341a4204 | ||
|
|
83ebf00659 | ||
|
|
5e602726f5 | ||
|
|
a6200cc83d | ||
|
|
a5811d3c3b | ||
|
|
5962b47c31 | ||
|
|
9a4edc738a | ||
|
|
30d01e9cae | ||
|
|
6b46f3920c | ||
|
|
97b11146ee | ||
|
|
2ef74bd6ea | ||
|
|
845161e377 | ||
|
|
f176a6624a | ||
|
|
4d06e34b66 | ||
|
|
6d8ddcb9ed | ||
|
|
dd4167709a | ||
|
|
71e253e1f0 | ||
|
|
9e155ffd9e | ||
|
|
2e9e40dc75 | ||
|
|
10d4294f9b | ||
|
|
5e77771668 | ||
|
|
dda5545078 | ||
|
|
087efbc451 | ||
|
|
68e64536b1 | ||
|
|
6e3ce4d55c | ||
|
|
8d1b88f985 | ||
|
|
3d3c057d52 | ||
|
|
94622fef29 | ||
|
|
804d77ffc5 | ||
|
|
79b18e9742 | ||
|
|
3404a47a6d | ||
|
|
0b8205ef46 | ||
|
|
53514febdc | ||
|
|
8531d86da0 | ||
|
|
a47d32e129 | ||
|
|
df96f4d3ab | ||
|
|
84dc5453ad | ||
|
|
8093d98c0e | ||
|
|
809f9471df | ||
|
|
f9d6d2e428 | ||
|
|
32eac31416 | ||
|
|
4d4c1ff72e | ||
|
|
645ce2b6b3 | ||
|
|
89600bd229 | ||
|
|
9b3a60efee | ||
|
|
a8c5934d1b | ||
|
|
43544fdb63 | ||
|
|
7a4df5755a | ||
|
|
83bcbc43d1 | ||
|
|
79921cf434 | ||
|
|
40402fdac3 | ||
|
|
05943abc11 | ||
|
|
e66e71c87e | ||
|
|
7f682c4c76 | ||
|
|
4947cd7f14 | ||
|
|
5ea7314912 | ||
|
|
655f0e9c1d | ||
|
|
2ffd25a120 | ||
|
|
175fcf6676 | ||
|
|
c05516afbe | ||
|
|
6b12684e56 | ||
|
|
8f7c94f512 | ||
|
|
4a6259a9b2 | ||
|
|
d5b9d3e641 | ||
|
|
6863de2c0e | ||
|
|
51a3e4e27a | ||
|
|
d7046d6e19 | ||
|
|
7e6c03e9c6 | ||
|
|
5267f35104 | ||
|
|
172ff84299 | ||
|
|
a3f955dd84 | ||
|
|
19e7d986fe | ||
|
|
db2ad6f900 | ||
|
|
db1f3f4ab8 |
4
.github/workflows/test.yml
vendored
@@ -86,7 +86,7 @@ jobs:
|
||||
- run: go version
|
||||
|
||||
- name: Run tests
|
||||
run: GOGC=10 make ${{ matrix.scenario}}
|
||||
run: make ${{ matrix.scenario}}
|
||||
|
||||
- name: Publish coverage
|
||||
uses: codecov/codecov-action@v5
|
||||
@@ -95,7 +95,7 @@ jobs:
|
||||
|
||||
apptest:
|
||||
name: apptest
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: apptest
|
||||
|
||||
steps:
|
||||
- name: Code checkout
|
||||
|
||||
@@ -33,13 +33,13 @@ func PopulateTimeTpl(b []byte, tGlobal time.Time) []byte {
|
||||
}
|
||||
switch strings.TrimSpace(parts[0]) {
|
||||
case `TIME_S`:
|
||||
return []byte(fmt.Sprintf("%d", t.Unix()))
|
||||
return fmt.Appendf(nil, "%d", t.Unix())
|
||||
case `TIME_MSZ`:
|
||||
return []byte(fmt.Sprintf("%d", t.Unix()*1e3))
|
||||
return fmt.Appendf(nil, "%d", t.Unix()*1e3)
|
||||
case `TIME_MS`:
|
||||
return []byte(fmt.Sprintf("%d", timeToMillis(t)))
|
||||
return fmt.Appendf(nil, "%d", timeToMillis(t))
|
||||
case `TIME_NS`:
|
||||
return []byte(fmt.Sprintf("%d", t.UnixNano()))
|
||||
return fmt.Appendf(nil, "%d", t.UnixNano())
|
||||
default:
|
||||
log.Fatalf("unknown time pattern %s in %s", parts[0], repl)
|
||||
}
|
||||
|
||||
@@ -18,7 +18,7 @@ func TestCalculateRetryDuration(t *testing.T) {
|
||||
f := func(retryAfterDuration, retryDuration time.Duration, n int, expectMinDuration time.Duration) {
|
||||
t.Helper()
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
for range n {
|
||||
retryDuration = getRetryDuration(retryAfterDuration, retryDuration, time.Minute)
|
||||
}
|
||||
|
||||
|
||||
@@ -51,9 +51,9 @@ func testPushWriteRequest(t *testing.T, rowsCount, expectedBlockLenProm, expecte
|
||||
|
||||
func newTestWriteRequest(seriesCount, labelsCount int) *prompb.WriteRequest {
|
||||
var wr prompb.WriteRequest
|
||||
for i := 0; i < seriesCount; i++ {
|
||||
for i := range seriesCount {
|
||||
var labels []prompb.Label
|
||||
for j := 0; j < labelsCount; j++ {
|
||||
for j := range labelsCount {
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: fmt.Sprintf("label_%d_%d", i, j),
|
||||
Value: fmt.Sprintf("value_%d_%d", i, j),
|
||||
|
||||
@@ -38,7 +38,7 @@ var (
|
||||
labelsGlobal []prompb.Label
|
||||
|
||||
remoteWriteRelabelConfigData atomic.Pointer[[]byte]
|
||||
remoteWriteURLRelabelConfigData atomic.Pointer[[]interface{}]
|
||||
remoteWriteURLRelabelConfigData atomic.Pointer[[]any]
|
||||
|
||||
relabelConfigReloads *metrics.Counter
|
||||
relabelConfigReloadErrors *metrics.Counter
|
||||
@@ -90,8 +90,8 @@ func WriteURLRelabelConfigData(w io.Writer) {
|
||||
return
|
||||
}
|
||||
type urlRelabelCfg struct {
|
||||
Url string `yaml:"url"`
|
||||
RelabelConfig interface{} `yaml:"relabel_config"`
|
||||
Url string `yaml:"url"`
|
||||
RelabelConfig any `yaml:"relabel_config"`
|
||||
}
|
||||
var cs []urlRelabelCfg
|
||||
for i, url := range *remoteWriteURLs {
|
||||
@@ -144,7 +144,7 @@ func loadRelabelConfigs() (*relabelConfigs, error) {
|
||||
len(*relabelConfigPaths), (len(*remoteWriteURLs)))
|
||||
}
|
||||
|
||||
var urlRelabelCfgs []interface{}
|
||||
var urlRelabelCfgs []any
|
||||
rcs.perURL = make([]*promrelabel.ParsedConfigs, len(*remoteWriteURLs))
|
||||
for i, path := range *relabelConfigPaths {
|
||||
if len(path) == 0 {
|
||||
@@ -157,7 +157,7 @@ func loadRelabelConfigs() (*relabelConfigs, error) {
|
||||
}
|
||||
rcs.perURL[i] = prc
|
||||
|
||||
var parsedCfg interface{}
|
||||
var parsedCfg any
|
||||
_ = yaml.Unmarshal(rawCfg, &parsedCfg)
|
||||
urlRelabelCfgs = append(urlRelabelCfgs, parsedCfg)
|
||||
}
|
||||
|
||||
@@ -28,12 +28,12 @@ func TestGetLabelsHash_Distribution(t *testing.T) {
|
||||
itemsCount := 1_000 * bucketsCount
|
||||
m := make([]int, bucketsCount)
|
||||
var labels []prompb.Label
|
||||
for i := 0; i < itemsCount; i++ {
|
||||
for i := range itemsCount {
|
||||
labels = append(labels[:0], prompb.Label{
|
||||
Name: "__name__",
|
||||
Value: fmt.Sprintf("some_name_%d", i),
|
||||
})
|
||||
for j := 0; j < 10; j++ {
|
||||
for j := range 10 {
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: fmt.Sprintf("label_%d", j),
|
||||
Value: fmt.Sprintf("value_%d_%d", i, j),
|
||||
@@ -248,7 +248,7 @@ func TestShardAmountRemoteWriteCtx(t *testing.T) {
|
||||
seriesCount := 100000
|
||||
// build 1000000 series
|
||||
tssBlock := make([]prompb.TimeSeries, 0, seriesCount)
|
||||
for i := 0; i < seriesCount; i++ {
|
||||
for i := range seriesCount {
|
||||
tssBlock = append(tssBlock, prompb.TimeSeries{
|
||||
Labels: []prompb.Label{
|
||||
{
|
||||
@@ -269,7 +269,7 @@ func TestShardAmountRemoteWriteCtx(t *testing.T) {
|
||||
// build active time series set
|
||||
nodes := make([]string, 0, remoteWriteCount)
|
||||
activeTimeSeriesByNodes := make([]map[string]struct{}, remoteWriteCount)
|
||||
for i := 0; i < remoteWriteCount; i++ {
|
||||
for i := range remoteWriteCount {
|
||||
nodes = append(nodes, fmt.Sprintf("node%d", i))
|
||||
activeTimeSeriesByNodes[i] = make(map[string]struct{})
|
||||
}
|
||||
|
||||
@@ -41,7 +41,7 @@ func TestParseInputValue_Success(t *testing.T) {
|
||||
if len(outputExpected) != len(output) {
|
||||
t.Fatalf("unexpected output length; got %d; want %d", len(outputExpected), len(output))
|
||||
}
|
||||
for i := 0; i < len(outputExpected); i++ {
|
||||
for i := range outputExpected {
|
||||
if outputExpected[i].Omitted != output[i].Omitted {
|
||||
t.Fatalf("unexpected Omitted field in the output\ngot\n%v\nwant\n%v", output, outputExpected)
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"flag"
|
||||
"fmt"
|
||||
"maps"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
@@ -12,6 +13,7 @@ import (
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"slices"
|
||||
"sort"
|
||||
"strings"
|
||||
"syscall"
|
||||
@@ -348,9 +350,7 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
|
||||
for k := range alertEvalTimesMap {
|
||||
alertEvalTimes = append(alertEvalTimes, k)
|
||||
}
|
||||
sort.Slice(alertEvalTimes, func(i, j int) bool {
|
||||
return alertEvalTimes[i] < alertEvalTimes[j]
|
||||
})
|
||||
slices.Sort(alertEvalTimes)
|
||||
|
||||
// sort group eval order according to the given "group_eval_order".
|
||||
sort.Slice(testGroups, func(i, j int) bool {
|
||||
@@ -361,12 +361,8 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
|
||||
var groups []*rule.Group
|
||||
for _, group := range testGroups {
|
||||
mergedExternalLabels := make(map[string]string)
|
||||
for k, v := range tg.ExternalLabels {
|
||||
mergedExternalLabels[k] = v
|
||||
}
|
||||
for k, v := range externalLabels {
|
||||
mergedExternalLabels[k] = v
|
||||
}
|
||||
maps.Copy(mergedExternalLabels, tg.ExternalLabels)
|
||||
maps.Copy(mergedExternalLabels, externalLabels)
|
||||
ng := rule.NewGroup(group, q, time.Minute, mergedExternalLabels)
|
||||
ng.Init()
|
||||
groups = append(groups, ng)
|
||||
|
||||
@@ -2,6 +2,7 @@ package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage"
|
||||
@@ -80,12 +81,8 @@ func (t *Type) ValidateExpr(expr string) error {
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot obtain labels from LogsQL expr: %q, err: %w", expr, err)
|
||||
}
|
||||
for i := range labels {
|
||||
// VictoriaLogs inserts `_time` field as a label in result when query with `stats by (_time:step)`,
|
||||
// making the result meaningless and may lead to cardinality issues.
|
||||
if labels[i] == "_time" {
|
||||
return fmt.Errorf("bad LogsQL expr: %q, err: cannot contain time buckets stats pipe `stats by (_time:step)`", expr)
|
||||
}
|
||||
if slices.Contains(labels, "_time") {
|
||||
return fmt.Errorf("bad LogsQL expr: %q, err: cannot contain time buckets stats pipe `stats by (_time:step)`", expr)
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("unknown datasource type=%q", t.Name)
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"maps"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
@@ -91,9 +92,7 @@ func (c *Client) Clone() *Client {
|
||||
ns.extraHeaders = make([]keyValue, len(c.extraHeaders))
|
||||
copy(ns.extraHeaders, c.extraHeaders)
|
||||
}
|
||||
for k, v := range c.extraParams {
|
||||
ns.extraParams[k] = v
|
||||
}
|
||||
maps.Copy(ns.extraParams, c.extraParams)
|
||||
|
||||
return ns
|
||||
}
|
||||
|
||||
@@ -34,7 +34,7 @@ type promResponse struct {
|
||||
// Stats supported by VictoriaMetrics since v1.90
|
||||
Stats struct {
|
||||
SeriesFetched *string `json:"seriesFetched,omitempty"`
|
||||
} `json:"stats,omitempty"`
|
||||
} `json:"stats"`
|
||||
// IsPartial supported by VictoriaMetrics
|
||||
IsPartial *bool `json:"isPartial,omitempty"`
|
||||
}
|
||||
|
||||
@@ -134,7 +134,7 @@ func (ls Labels) String() string {
|
||||
func LabelCompare(a, b Labels) int {
|
||||
l := min(len(b), len(a))
|
||||
|
||||
for i := 0; i < l; i++ {
|
||||
for i := range l {
|
||||
if a[i].Name != b[i].Name {
|
||||
if a[i].Name < b[i].Name {
|
||||
return -1
|
||||
|
||||
@@ -13,7 +13,7 @@ func BenchmarkPromInstantUnmarshal(b *testing.B) {
|
||||
|
||||
// BenchmarkParsePrometheusResponse/Instant_std+fastjson-10 1760 668959 ns/op 280147 B/op 5781 allocs/op
|
||||
b.Run("Instant std+fastjson", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
for range b.N {
|
||||
var pi promInstant
|
||||
err = pi.Unmarshal(data)
|
||||
if err != nil {
|
||||
|
||||
@@ -69,7 +69,7 @@ func TestManagerUpdateConcurrent(t *testing.T) {
|
||||
for n := range workers {
|
||||
wg.Go(func() {
|
||||
r := rand.New(rand.NewSource(int64(n)))
|
||||
for i := 0; i < iterations; i++ {
|
||||
for range iterations {
|
||||
rnd := r.Intn(len(paths))
|
||||
cfg, err := config.Parse([]string{paths[rnd]}, notifier.ValidateTemplates, true)
|
||||
if err != nil { // update can fail and this is expected
|
||||
|
||||
@@ -216,7 +216,7 @@ consul_sd_configs:
|
||||
for n := range workers {
|
||||
wg.Go(func() {
|
||||
r := rand.New(rand.NewSource(int64(n)))
|
||||
for i := 0; i < iterations; i++ {
|
||||
for range iterations {
|
||||
rnd := r.Intn(len(paths))
|
||||
_ = cw.reload(paths[rnd]) // update can fail and this is expected
|
||||
_ = cw.notifiers()
|
||||
|
||||
@@ -113,7 +113,7 @@ func NewClient(ctx context.Context, cfg Config) (*Client, error) {
|
||||
input: make(chan prompb.TimeSeries, cfg.MaxQueueSize),
|
||||
}
|
||||
|
||||
for i := 0; i < cc; i++ {
|
||||
for range cc {
|
||||
c.run(ctx)
|
||||
}
|
||||
return c, nil
|
||||
@@ -238,8 +238,10 @@ func (c *Client) flush(ctx context.Context, wr *prompb.WriteRequest) {
|
||||
defer func() {
|
||||
sendDuration.Add(time.Since(timeStart).Seconds())
|
||||
}()
|
||||
|
||||
attempts := 0
|
||||
L:
|
||||
for attempts := 0; ; attempts++ {
|
||||
for {
|
||||
err := c.send(ctx, b)
|
||||
if err != nil && (errors.Is(err, io.EOF) || netutil.IsTrivialNetworkError(err)) {
|
||||
// Something in the middle between client and destination might be closing
|
||||
@@ -281,6 +283,7 @@ L:
|
||||
time.Sleep(retryInterval)
|
||||
retryInterval *= 2
|
||||
|
||||
attempts++
|
||||
}
|
||||
|
||||
rwErrors.Inc()
|
||||
|
||||
@@ -44,7 +44,7 @@ func TestClient_Push(t *testing.T) {
|
||||
|
||||
r := rand.New(rand.NewSource(1))
|
||||
const rowsN = int(1e4)
|
||||
for i := 0; i < rowsN; i++ {
|
||||
for range rowsN {
|
||||
s := prompb.TimeSeries{
|
||||
Samples: []prompb.Sample{{
|
||||
Value: r.Float64(),
|
||||
@@ -102,7 +102,7 @@ func TestClient_run_maxBatchSizeDuringShutdown(t *testing.T) {
|
||||
}
|
||||
|
||||
// push time series to the client.
|
||||
for i := 0; i < pushCnt; i++ {
|
||||
for range pushCnt {
|
||||
if err = rwClient.Push(prompb.TimeSeries{}); err != nil {
|
||||
t.Fatalf("cannot time series to the client: %s", err)
|
||||
}
|
||||
|
||||
@@ -22,7 +22,7 @@ func TestDebugClient_Push(t *testing.T) {
|
||||
|
||||
const rowsN = 100
|
||||
var sent int
|
||||
for i := 0; i < rowsN; i++ {
|
||||
for i := range rowsN {
|
||||
s := prompb.TimeSeries{
|
||||
Samples: []prompb.Sample{{
|
||||
Value: float64(i),
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"maps"
|
||||
"net/url"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -30,8 +31,8 @@ var (
|
||||
"0 means no limit.")
|
||||
ruleUpdateEntriesLimit = flag.Int("rule.updateEntriesLimit", 20, "Defines the max number of rule's state updates stored in-memory. "+
|
||||
"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param.")
|
||||
resendDelay = flag.Duration("rule.resendDelay", 0, "MiniMum amount of time to wait before resending an alert to notifier.")
|
||||
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maxiMum duration for automatic alert expiration, "+
|
||||
resendDelay = flag.Duration("rule.resendDelay", 0, "Minimum amount of time to wait before resending an alert to notifier.")
|
||||
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maximum duration for automatic alert expiration, "+
|
||||
"which by default is 4 times evaluationInterval of the parent group")
|
||||
evalDelay = flag.Duration("rule.evalDelay", 30*time.Second, "Adjustment of the 'time' parameter for rule evaluation requests to compensate intentional data delay from the datasource. "+
|
||||
"Normally, should be equal to '-search.latencyOffset' (cmd-line flag configured for VictoriaMetrics single-node or vmselect). "+
|
||||
@@ -97,9 +98,7 @@ type groupMetrics struct {
|
||||
// set2 has priority over set1.
|
||||
func mergeLabels(groupName, ruleName string, set1, set2 map[string]string) map[string]string {
|
||||
r := map[string]string{}
|
||||
for k, v := range set1 {
|
||||
r[k] = v
|
||||
}
|
||||
maps.Copy(r, set1)
|
||||
for k, v := range set2 {
|
||||
if prevV, ok := r[k]; ok {
|
||||
logger.Infof("label %q=%q for rule %q.%q overwritten with external label %q=%q",
|
||||
@@ -495,11 +494,8 @@ func (g *Group) delayBeforeStart(ts time.Time, maxDelay time.Duration) time.Dura
|
||||
}
|
||||
|
||||
// otherwise, return a random duration between [0..min(interval, maxDelay)] based on group ID
|
||||
interval := g.Interval
|
||||
if interval > maxDelay {
|
||||
// artificially limit interval, so groups with big intervals could start sooner.
|
||||
interval = maxDelay
|
||||
}
|
||||
// artificially limit interval, so groups with big intervals could start sooner.
|
||||
interval := min(g.Interval, maxDelay)
|
||||
var randSleep time.Duration
|
||||
randSleep = time.Duration(float64(interval) * (float64(g.GetID()) / (1 << 64)))
|
||||
sleepOffset := time.Duration(ts.UnixNano() % interval.Nanoseconds())
|
||||
|
||||
@@ -405,7 +405,8 @@ func TestGroupStart(t *testing.T) {
|
||||
|
||||
var cur uint64
|
||||
prev := g.metrics.iterationTotal.Get()
|
||||
for i := 0; ; i++ {
|
||||
i := 0
|
||||
for {
|
||||
if i > 40 {
|
||||
t.Fatalf("group wasn't able to perform %d evaluations during %d eval intervals", n, i)
|
||||
}
|
||||
@@ -414,6 +415,7 @@ func TestGroupStart(t *testing.T) {
|
||||
return
|
||||
}
|
||||
time.Sleep(interval)
|
||||
i++
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -121,7 +121,7 @@ func (s *ruleState) add(e StateEntry) {
|
||||
func replayRule(r Rule, start, end time.Time, rw remotewrite.RWClient, replayRuleRetryAttempts int) (int, error) {
|
||||
var err error
|
||||
var tss []prompb.TimeSeries
|
||||
for i := 0; i < replayRuleRetryAttempts; i++ {
|
||||
for i := range replayRuleRetryAttempts {
|
||||
tss, err = r.execRange(context.Background(), start, end)
|
||||
if err == nil {
|
||||
break
|
||||
|
||||
@@ -40,7 +40,7 @@ func TestRule_state(t *testing.T) {
|
||||
}
|
||||
|
||||
var last time.Time
|
||||
for i := 0; i < stateEntriesN*2; i++ {
|
||||
for range stateEntriesN * 2 {
|
||||
last = time.Now()
|
||||
r.state.add(StateEntry{At: last})
|
||||
}
|
||||
@@ -68,7 +68,7 @@ func TestRule_stateConcurrent(_ *testing.T) {
|
||||
var wg sync.WaitGroup
|
||||
for range workers {
|
||||
wg.Go(func() {
|
||||
for i := 0; i < iterations; i++ {
|
||||
for range iterations {
|
||||
r.state.add(StateEntry{At: time.Now()})
|
||||
r.state.getAll()
|
||||
r.state.getLast()
|
||||
|
||||
@@ -42,7 +42,7 @@ func TestErrGroupConcurrent(_ *testing.T) {
|
||||
|
||||
const writersN = 4
|
||||
payload := make(chan error, writersN)
|
||||
for i := 0; i < writersN; i++ {
|
||||
for range writersN {
|
||||
go func() {
|
||||
for err := range payload {
|
||||
eg.Add(err)
|
||||
@@ -51,7 +51,7 @@ func TestErrGroupConcurrent(_ *testing.T) {
|
||||
}
|
||||
|
||||
const iterations = 500
|
||||
for i := 0; i < iterations; i++ {
|
||||
for i := range iterations {
|
||||
payload <- fmt.Errorf("error %d", i)
|
||||
if i%10 == 0 {
|
||||
_ = eg.Err()
|
||||
|
||||
@@ -65,10 +65,11 @@ type AuthConfig struct {
|
||||
type UserInfo struct {
|
||||
Name string `yaml:"name,omitempty"`
|
||||
|
||||
BearerToken string `yaml:"bearer_token,omitempty"`
|
||||
AuthToken string `yaml:"auth_token,omitempty"`
|
||||
Username string `yaml:"username,omitempty"`
|
||||
Password string `yaml:"password,omitempty"`
|
||||
BearerToken string `yaml:"bearer_token,omitempty"`
|
||||
JWT *JWTConfig `yaml:"jwt,omitempty"`
|
||||
AuthToken string `yaml:"auth_token,omitempty"`
|
||||
Username string `yaml:"username,omitempty"`
|
||||
Password string `yaml:"password,omitempty"`
|
||||
|
||||
URLPrefix *URLPrefix `yaml:"url_prefix,omitempty"`
|
||||
DiscoverBackendIPs *bool `yaml:"discover_backend_ips,omitempty"`
|
||||
@@ -588,7 +589,7 @@ func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *
|
||||
|
||||
// Slow path - select other backend urls.
|
||||
n := atomicCounter.Add(1) - 1
|
||||
for i := uint32(0); i < uint32(len(bus)); i++ {
|
||||
for i := range uint32(len(bus)) {
|
||||
idx := (n + i) % uint32(len(bus))
|
||||
bu := bus[idx]
|
||||
if bu.isBroken() {
|
||||
@@ -799,6 +800,9 @@ var (
|
||||
// authUsers contains the currently loaded auth users
|
||||
authUsers atomic.Pointer[map[string]*UserInfo]
|
||||
|
||||
// jwt authentication cache
|
||||
jwtAuthCache atomic.Pointer[jwtCache]
|
||||
|
||||
authConfigWG sync.WaitGroup
|
||||
stopCh chan struct{}
|
||||
)
|
||||
@@ -838,6 +842,14 @@ func reloadAuthConfigData(data []byte) (bool, error) {
|
||||
return false, fmt.Errorf("failed to parse auth config: %w", err)
|
||||
}
|
||||
|
||||
jui, err := parseJWTUsers(ac)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to parse JWT users from auth config: %w", err)
|
||||
}
|
||||
jwtc := &jwtCache{
|
||||
users: jui,
|
||||
}
|
||||
|
||||
m, err := parseAuthConfigUsers(ac)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to parse users from auth config: %w", err)
|
||||
@@ -857,6 +869,7 @@ func reloadAuthConfigData(data []byte) (bool, error) {
|
||||
authConfig.Store(ac)
|
||||
authConfigData.Store(&data)
|
||||
authUsers.Store(&m)
|
||||
jwtAuthCache.Store(jwtc)
|
||||
|
||||
return true, nil
|
||||
}
|
||||
@@ -881,6 +894,9 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) {
|
||||
if ui.BearerToken != "" {
|
||||
return nil, fmt.Errorf("field bearer_token can't be specified for unauthorized_user section")
|
||||
}
|
||||
if ui.JWT != nil {
|
||||
return nil, fmt.Errorf("field jwt can't be specified for unauthorized_user section")
|
||||
}
|
||||
if ui.AuthToken != "" {
|
||||
return nil, fmt.Errorf("field auth_token can't be specified for unauthorized_user section")
|
||||
}
|
||||
@@ -927,10 +943,17 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
|
||||
}
|
||||
for i := range uis {
|
||||
ui := &uis[i]
|
||||
// users with jwt tokens are parsed by parseJWTUsers function.
|
||||
// the function also checks that users with jwt tokens do not have auth tokens, bearer tokens, usernames and passwords.
|
||||
if ui.JWT != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
ats, err := getAuthTokens(ui.AuthToken, ui.BearerToken, ui.Username, ui.Password)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, at := range ats {
|
||||
if uiOld := byAuthToken[at]; uiOld != nil {
|
||||
return nil, fmt.Errorf("duplicate auth token=%q found for username=%q, name=%q; the previous one is set for username=%q, name=%q",
|
||||
|
||||
@@ -378,7 +378,7 @@ users:
|
||||
RetryStatusCodes: []int{500, 501},
|
||||
LoadBalancingPolicy: "first_available",
|
||||
MergeQueryArgs: []string{"foo", "bar"},
|
||||
DropSrcPathPrefixParts: intp(1),
|
||||
DropSrcPathPrefixParts: new(1),
|
||||
DiscoverBackendIPs: &discoverBackendIPsTrue,
|
||||
},
|
||||
}, nil)
|
||||
@@ -621,6 +621,22 @@ unauthorized_user:
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
// skip user info with jwt, it is parsed by parseJWTUsers
|
||||
f(`
|
||||
users:
|
||||
- username: foo
|
||||
password: bar
|
||||
url_prefix: http://aaa:343/bbb
|
||||
- jwt: {skip_verify: true}
|
||||
url_prefix: http://aaa:343/bbb
|
||||
`, map[string]*UserInfo{
|
||||
getHTTPAuthBasicToken("foo", "bar"): {
|
||||
Username: "foo",
|
||||
Password: "bar",
|
||||
URLPrefix: mustParseURL("http://aaa:343/bbb"),
|
||||
},
|
||||
}, nil)
|
||||
}
|
||||
|
||||
func TestParseAuthConfigPassesTLSVerificationConfig(t *testing.T) {
|
||||
@@ -831,7 +847,7 @@ func TestBrokenBackend(t *testing.T) {
|
||||
bus[1].setBroken()
|
||||
|
||||
// broken backend should never return while there are healthy backends
|
||||
for i := 0; i < 1e3; i++ {
|
||||
for range int(1e3) {
|
||||
b := up.getBackendURL()
|
||||
if b.isBroken() {
|
||||
t.Fatalf("unexpected broken backend %q", b.url)
|
||||
@@ -963,10 +979,6 @@ func mustParseURLs(us []string) *URLPrefix {
|
||||
return up
|
||||
}
|
||||
|
||||
func intp(n int) *int {
|
||||
return &n
|
||||
}
|
||||
|
||||
func mustNewRegex(s string) *Regex {
|
||||
var re Regex
|
||||
if err := yaml.Unmarshal([]byte(s), &re); err != nil {
|
||||
|
||||
156
app/vmauth/jwt.go
Normal file
@@ -0,0 +1,156 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/jwt"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
type jwtCache struct {
|
||||
// users contain UserInfo`s from AuthConfig with JWTConfig set
|
||||
users []*UserInfo
|
||||
}
|
||||
|
||||
type JWTConfig struct {
|
||||
PublicKeys []string `yaml:"public_keys,omitempty"`
|
||||
PublicKeyFiles []string `yaml:"public_key_files,omitempty"`
|
||||
SkipVerify bool `yaml:"skip_verify,omitempty"`
|
||||
|
||||
verifierPool *jwt.VerifierPool
|
||||
}
|
||||
|
||||
func parseJWTUsers(ac *AuthConfig) ([]*UserInfo, error) {
|
||||
jui := make([]*UserInfo, 0, len(ac.Users))
|
||||
for _, ui := range ac.Users {
|
||||
jwtToken := ui.JWT
|
||||
if jwtToken == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if ui.AuthToken != "" || ui.BearerToken != "" || ui.Username != "" || ui.Password != "" {
|
||||
return nil, fmt.Errorf("auth_token, bearer_token, username and password cannot be specified if jwt is set")
|
||||
}
|
||||
if len(jwtToken.PublicKeys) == 0 && len(jwtToken.PublicKeyFiles) == 0 && !jwtToken.SkipVerify {
|
||||
return nil, fmt.Errorf("jwt must contain at least a single public key, public_key_files or have skip_verify=true")
|
||||
}
|
||||
|
||||
if len(jwtToken.PublicKeys) > 0 || len(jwtToken.PublicKeyFiles) > 0 {
|
||||
keys := make([]any, 0, len(jwtToken.PublicKeys)+len(jwtToken.PublicKeyFiles))
|
||||
|
||||
for i := range jwtToken.PublicKeys {
|
||||
k, err := jwt.ParseKey([]byte(jwtToken.PublicKeys[i]))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
keys = append(keys, k)
|
||||
}
|
||||
|
||||
for _, filePath := range jwtToken.PublicKeyFiles {
|
||||
keyData, err := os.ReadFile(filePath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read public key from file %q: %w", filePath, err)
|
||||
}
|
||||
k, err := jwt.ParseKey(keyData)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse public key from file %q: %w", filePath, err)
|
||||
}
|
||||
keys = append(keys, k)
|
||||
}
|
||||
|
||||
vp, err := jwt.NewVerifierPool(keys)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jwtToken.verifierPool = vp
|
||||
}
|
||||
|
||||
if err := ui.initURLs(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
metricLabels, err := ui.getMetricLabels()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse metric_labels: %w", err)
|
||||
}
|
||||
ui.requests = ac.ms.GetOrCreateCounter(`vmauth_user_requests_total` + metricLabels)
|
||||
ui.requestErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_errors_total` + metricLabels)
|
||||
ui.backendRequests = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_requests_total` + metricLabels)
|
||||
ui.backendErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_errors_total` + metricLabels)
|
||||
ui.requestsDuration = ac.ms.GetOrCreateSummary(`vmauth_user_request_duration_seconds` + metricLabels)
|
||||
mcr := ui.getMaxConcurrentRequests()
|
||||
ui.concurrencyLimitCh = make(chan struct{}, mcr)
|
||||
ui.concurrencyLimitReached = ac.ms.GetOrCreateCounter(`vmauth_user_concurrent_requests_limit_reached_total` + metricLabels)
|
||||
_ = ac.ms.GetOrCreateGauge(`vmauth_user_concurrent_requests_capacity`+metricLabels, func() float64 {
|
||||
return float64(cap(ui.concurrencyLimitCh))
|
||||
})
|
||||
_ = ac.ms.GetOrCreateGauge(`vmauth_user_concurrent_requests_current`+metricLabels, func() float64 {
|
||||
return float64(len(ui.concurrencyLimitCh))
|
||||
})
|
||||
|
||||
rt, err := newRoundTripper(ui.TLSCAFile, ui.TLSCertFile, ui.TLSKeyFile, ui.TLSServerName, ui.TLSInsecureSkipVerify)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot initialize HTTP RoundTripper: %w", err)
|
||||
}
|
||||
ui.rt = rt
|
||||
|
||||
jui = append(jui, &ui)
|
||||
}
|
||||
|
||||
// the limitation will be lifted once claim based matching will be implemented
|
||||
if len(jui) > 1 {
|
||||
return nil, fmt.Errorf("multiple users with JWT tokens are not supported; found %d users", len(jui))
|
||||
}
|
||||
|
||||
return jui, nil
|
||||
}
|
||||
|
||||
func getUserInfoByJWTToken(ats []string) *UserInfo {
|
||||
js := *jwtAuthCache.Load()
|
||||
if len(js.users) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, at := range ats {
|
||||
if strings.Count(at, ".") != 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
at, _ = strings.CutPrefix(at, `http_auth:`)
|
||||
|
||||
tkn, err := jwt.NewToken(at, true)
|
||||
if err != nil {
|
||||
if *logInvalidAuthTokens {
|
||||
logger.Infof("cannot parse jwt token: %s", err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
if tkn.IsExpired(time.Now()) {
|
||||
if *logInvalidAuthTokens {
|
||||
logger.Infof("jwt token is expired")
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
for _, ui := range js.users {
|
||||
if ui.JWT.SkipVerify {
|
||||
return ui
|
||||
}
|
||||
|
||||
if err := ui.JWT.verifierPool.Verify(tkn); err != nil {
|
||||
if *logInvalidAuthTokens {
|
||||
logger.Infof("cannot verify jwt token: %s", err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
return ui
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
304
app/vmauth/jwt_test.go
Normal file
@@ -0,0 +1,304 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestJWTParseAuthConfigFailure(t *testing.T) {
|
||||
validRSAPublicKey := `-----BEGIN PUBLIC KEY-----
|
||||
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAiX7oPWKOWRQsGFEWvwZO
|
||||
mL2PYsdYUsu9nr0qtPCjxQHUJgLfT3rdKlvKpPFYv7ZmKnqTncg36Wz9uiYmWJ7e
|
||||
IB5Z+fko8kVIMzarCqVvpAJDzYF/pUii68xvuYoK3L9TIOAeyCXv+prwnr2IH+Mw
|
||||
9AONzWbRrYoO74XyTE9vMU5qmI/L1VPk+PR8lqPOSptLvzsfoaIk2ED4yK2nRB+6
|
||||
st+k4nccPqbErqHc8aiXnXfugfnr6b+NPFYUzKsDqkymGOokVijrI8B3jNw6c6Do
|
||||
zphk+D3wgLsXYHfMcZbXIMqffqm/aB8Qg88OpFOkQ3rd2p6R9+hacnZkfkn3Phiw
|
||||
yQIDAQAB
|
||||
-----END PUBLIC KEY-----
|
||||
`
|
||||
// ECDSA with the P-521 curve
|
||||
validECDSAPublicKey := `-----BEGIN PUBLIC KEY-----
|
||||
MIGbMBAGByqGSM49AgEGBSuBBAAjA4GGAAQAU9RmtkCRuYTKCyvLlDn5DtBZOHSe
|
||||
QTa5j9q/oQVpCKqcXVFrH5dgh0GL+P/ZhkeuowPzCZqntGf0+7wPt9OxSJcADVJm
|
||||
dv92m540MXss8zdHf5qtE0gsu2Ved0R7Z8a8QwGZ/1mYZ+kFGGbdQTlSvRqDySTq
|
||||
XOtclIk1uhc03oL9nOQ=
|
||||
-----END PUBLIC KEY-----
|
||||
`
|
||||
|
||||
f := func(s string, expErr string) {
|
||||
t.Helper()
|
||||
ac, err := parseAuthConfig([]byte(s))
|
||||
if err != nil {
|
||||
if expErr != err.Error() {
|
||||
t.Fatalf("unexpected error; got %q; want %q", err.Error(), expErr)
|
||||
}
|
||||
return
|
||||
}
|
||||
users, err := parseJWTUsers(ac)
|
||||
if err != nil {
|
||||
if expErr != err.Error() {
|
||||
t.Fatalf("unexpected error; got %q; want %q", err.Error(), expErr)
|
||||
}
|
||||
return
|
||||
}
|
||||
t.Fatalf("expecting non-nil error; got %v", users)
|
||||
}
|
||||
|
||||
// unauthorized_user cannot be used with jwt
|
||||
f(`
|
||||
unauthorized_user:
|
||||
jwt: {skip_verify: true}
|
||||
url_prefix: http://foo.bar
|
||||
`, `field jwt can't be specified for unauthorized_user section`)
|
||||
|
||||
// username and jwt in a single config
|
||||
f(`
|
||||
users:
|
||||
- username: foo
|
||||
jwt: {skip_verify: true}
|
||||
url_prefix: http://foo.bar
|
||||
`, `auth_token, bearer_token, username and password cannot be specified if jwt is set`)
|
||||
// bearer_token and jwt in a single config
|
||||
f(`
|
||||
users:
|
||||
- bearer_token: foo
|
||||
jwt: {skip_verify: true}
|
||||
url_prefix: http://foo.bar
|
||||
`, `auth_token, bearer_token, username and password cannot be specified if jwt is set`)
|
||||
// bearer_token and jwt in a single config
|
||||
f(`
|
||||
users:
|
||||
- auth_token: "Foo token"
|
||||
jwt: {skip_verify: true}
|
||||
url_prefix: http://foo.bar
|
||||
`, `auth_token, bearer_token, username and password cannot be specified if jwt is set`)
|
||||
|
||||
// jwt public_keys or skip_verify must be set, part 1
|
||||
f(`
|
||||
users:
|
||||
- jwt: {}
|
||||
url_prefix: http://foo.bar
|
||||
`, `jwt must contain at least a single public key, public_key_files or have skip_verify=true`)
|
||||
|
||||
// jwt public_keys or skip_verify must be set, part 2
|
||||
f(`
|
||||
users:
|
||||
- jwt: {public_keys: null}
|
||||
url_prefix: http://foo.bar
|
||||
`, `jwt must contain at least a single public key, public_key_files or have skip_verify=true`)
|
||||
|
||||
// jwt public_keys or skip_verify must be set, part 3
|
||||
f(`
|
||||
users:
|
||||
- jwt: {public_keys: []}
|
||||
url_prefix: http://foo.bar
|
||||
`, `jwt must contain at least a single public key, public_key_files or have skip_verify=true`)
|
||||
|
||||
// jwt public_keys, public_key_files or skip_verify must be set
|
||||
f(`
|
||||
users:
|
||||
- jwt: {public_key_files: []}
|
||||
url_prefix: http://foo.bar
|
||||
`, `jwt must contain at least a single public key, public_key_files or have skip_verify=true`)
|
||||
|
||||
// invalid public key, part 1
|
||||
f(`
|
||||
users:
|
||||
- jwt: {public_keys: [""]}
|
||||
url_prefix: http://foo.bar
|
||||
`, `failed to parse key "": failed to decode PEM block containing public key`)
|
||||
|
||||
// invalid public key, part 2
|
||||
f(`
|
||||
users:
|
||||
- jwt: {public_keys: ["invalid"]}
|
||||
url_prefix: http://foo.bar
|
||||
`, `failed to parse key "invalid": failed to decode PEM block containing public key`)
|
||||
|
||||
// invalid public key, part 2
|
||||
f(fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
- %q
|
||||
- "invalid"
|
||||
url_prefix: http://foo.bar
|
||||
`, validRSAPublicKey, validECDSAPublicKey), `failed to parse key "invalid": failed to decode PEM block containing public key`)
|
||||
|
||||
// several jwt users
|
||||
// invalid public key, part 2
|
||||
f(fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
url_prefix: http://foo.bar
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
url_prefix: http://foo.bar
|
||||
`, validRSAPublicKey, validECDSAPublicKey), `multiple users with JWT tokens are not supported; found 2 users`)
|
||||
|
||||
// public key file doesn't exist
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
public_key_files:
|
||||
- /path/to/nonexistent/file.pem
|
||||
url_prefix: http://foo.bar
|
||||
`, "cannot read public key from file \"/path/to/nonexistent/file.pem\": open /path/to/nonexistent/file.pem: no such file or directory")
|
||||
|
||||
// public key file invalid
|
||||
// auth with key from file
|
||||
publicKeyFile := filepath.Join(t.TempDir(), "a_public_key.pem")
|
||||
if err := os.WriteFile(publicKeyFile, []byte(`invalidPEM`), 0o644); err != nil {
|
||||
t.Fatalf("failed to write public key file: %s", err)
|
||||
}
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
public_key_files:
|
||||
- `+publicKeyFile+`
|
||||
url_prefix: http://foo.bar
|
||||
`, "cannot parse public key from file \""+publicKeyFile+"\": failed to parse key \"invalidPEM\": failed to decode PEM block containing public key")
|
||||
}
|
||||
|
||||
func TestJWTParseAuthConfigSuccess(t *testing.T) {
|
||||
validRSAPublicKey := `-----BEGIN PUBLIC KEY-----
|
||||
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAiX7oPWKOWRQsGFEWvwZO
|
||||
mL2PYsdYUsu9nr0qtPCjxQHUJgLfT3rdKlvKpPFYv7ZmKnqTncg36Wz9uiYmWJ7e
|
||||
IB5Z+fko8kVIMzarCqVvpAJDzYF/pUii68xvuYoK3L9TIOAeyCXv+prwnr2IH+Mw
|
||||
9AONzWbRrYoO74XyTE9vMU5qmI/L1VPk+PR8lqPOSptLvzsfoaIk2ED4yK2nRB+6
|
||||
st+k4nccPqbErqHc8aiXnXfugfnr6b+NPFYUzKsDqkymGOokVijrI8B3jNw6c6Do
|
||||
zphk+D3wgLsXYHfMcZbXIMqffqm/aB8Qg88OpFOkQ3rd2p6R9+hacnZkfkn3Phiw
|
||||
yQIDAQAB
|
||||
-----END PUBLIC KEY-----
|
||||
`
|
||||
// ECDSA with the P-521 curve
|
||||
validECDSAPublicKey := `-----BEGIN PUBLIC KEY-----
|
||||
MIGbMBAGByqGSM49AgEGBSuBBAAjA4GGAAQAU9RmtkCRuYTKCyvLlDn5DtBZOHSe
|
||||
QTa5j9q/oQVpCKqcXVFrH5dgh0GL+P/ZhkeuowPzCZqntGf0+7wPt9OxSJcADVJm
|
||||
dv92m540MXss8zdHf5qtE0gsu2Ved0R7Z8a8QwGZ/1mYZ+kFGGbdQTlSvRqDySTq
|
||||
XOtclIk1uhc03oL9nOQ=
|
||||
-----END PUBLIC KEY-----
|
||||
`
|
||||
|
||||
f := func(s string) {
|
||||
t.Helper()
|
||||
ac, err := parseAuthConfig([]byte(s))
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
|
||||
jui, err := parseJWTUsers(ac)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
|
||||
for _, ui := range jui {
|
||||
if ui.JWT == nil {
|
||||
t.Fatalf("unexpected nil JWTConfig")
|
||||
}
|
||||
|
||||
if ui.JWT.SkipVerify {
|
||||
if ui.JWT.verifierPool != nil {
|
||||
t.Fatalf("unexpected non-nil verifier pool for skip_verify=true")
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if ui.JWT.verifierPool == nil {
|
||||
t.Fatalf("unexpected nil verifier pool for non-empty public keys")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
f(fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
url_prefix: http://foo.bar
|
||||
`, validRSAPublicKey))
|
||||
|
||||
f(fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
url_prefix: http://foo.bar
|
||||
`, validECDSAPublicKey))
|
||||
|
||||
f(fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
- %q
|
||||
url_prefix: http://foo.bar
|
||||
`, validRSAPublicKey, validECDSAPublicKey))
|
||||
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
skip_verify: true
|
||||
url_prefix: http://foo.bar
|
||||
`)
|
||||
|
||||
// combined with other auth methods
|
||||
f(`
|
||||
users:
|
||||
- username: foo
|
||||
password: bar
|
||||
url_prefix: http://foo.bar
|
||||
|
||||
- jwt:
|
||||
skip_verify: true
|
||||
url_prefix: http://foo.bar
|
||||
|
||||
- bearer_token: foo
|
||||
url_prefix: http://foo.bar
|
||||
`)
|
||||
|
||||
rsaKeyFile := filepath.Join(t.TempDir(), "rsa_public_key.pem")
|
||||
if err := os.WriteFile(rsaKeyFile, []byte(validRSAPublicKey), 0o644); err != nil {
|
||||
t.Fatalf("failed to write RSA key file: %s", err)
|
||||
}
|
||||
ecdsaKeyFile := filepath.Join(t.TempDir(), "ecdsa_public_key.pem")
|
||||
if err := os.WriteFile(ecdsaKeyFile, []byte(validECDSAPublicKey), 0o644); err != nil {
|
||||
t.Fatalf("failed to write ECDSA key file: %s", err)
|
||||
}
|
||||
|
||||
// Test single public key file
|
||||
f(fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_key_files:
|
||||
- %q
|
||||
url_prefix: http://foo.bar
|
||||
`, rsaKeyFile))
|
||||
|
||||
// Test multiple public key files
|
||||
f(fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_key_files:
|
||||
- %q
|
||||
- %q
|
||||
url_prefix: http://foo.bar
|
||||
`, rsaKeyFile, ecdsaKeyFile))
|
||||
|
||||
// Test combined inline keys and files
|
||||
f(fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
public_key_files:
|
||||
- %q
|
||||
url_prefix: http://foo.bar
|
||||
`, validECDSAPublicKey, rsaKeyFile))
|
||||
}
|
||||
@@ -181,29 +181,32 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
ui := getUserInfoByAuthTokens(ats)
|
||||
if ui == nil {
|
||||
uu := authConfig.Load().UnauthorizedUser
|
||||
if uu != nil {
|
||||
processUserRequest(w, r, uu)
|
||||
return true
|
||||
}
|
||||
|
||||
invalidAuthTokenRequests.Inc()
|
||||
if *logInvalidAuthTokens {
|
||||
err := fmt.Errorf("cannot authorize request with auth tokens %q", ats)
|
||||
err = &httpserver.ErrorWithStatusCode{
|
||||
Err: err,
|
||||
StatusCode: http.StatusUnauthorized,
|
||||
}
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
} else {
|
||||
http.Error(w, "Unauthorized", http.StatusUnauthorized)
|
||||
}
|
||||
if ui := getUserInfoByAuthTokens(ats); ui != nil {
|
||||
processUserRequest(w, r, ui)
|
||||
return true
|
||||
}
|
||||
if ui := getUserInfoByJWTToken(ats); ui != nil {
|
||||
processUserRequest(w, r, ui)
|
||||
return true
|
||||
}
|
||||
|
||||
processUserRequest(w, r, ui)
|
||||
uu := authConfig.Load().UnauthorizedUser
|
||||
if uu != nil {
|
||||
processUserRequest(w, r, uu)
|
||||
return true
|
||||
}
|
||||
|
||||
invalidAuthTokenRequests.Inc()
|
||||
if *logInvalidAuthTokens {
|
||||
err := fmt.Errorf("cannot authorize request with auth tokens %q", ats)
|
||||
err = &httpserver.ErrorWithStatusCode{
|
||||
Err: err,
|
||||
StatusCode: http.StatusUnauthorized,
|
||||
}
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
} else {
|
||||
http.Error(w, "Unauthorized", http.StatusUnauthorized)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -368,7 +371,7 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
||||
}
|
||||
|
||||
maxAttempts := up.getBackendsCount()
|
||||
for i := 0; i < maxAttempts; i++ {
|
||||
for range maxAttempts {
|
||||
bu := up.getBackendURL()
|
||||
if bu == nil {
|
||||
break
|
||||
@@ -401,7 +404,7 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
||||
ui.backendErrors.Inc()
|
||||
}
|
||||
err := &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("all the %d backends for the user %q are unavailable", up.getBackendsCount(), ui.name()),
|
||||
Err: fmt.Errorf("all the %d backends for the user %q are unavailable for proxying the request - check previous WARN logs to see the exact error for each failed backend", up.getBackendsCount(), ui.name()),
|
||||
StatusCode: http.StatusBadGateway,
|
||||
}
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
|
||||
@@ -3,11 +3,20 @@ package main
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto"
|
||||
"crypto/rand"
|
||||
"crypto/rsa"
|
||||
"crypto/x509"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"encoding/pem"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
@@ -420,7 +429,7 @@ unauthorized_user:
|
||||
}
|
||||
responseExpected = `
|
||||
statusCode=502
|
||||
all the 2 backends for the user "" are unavailable`
|
||||
all the 2 backends for the user "" are unavailable for proxying the request - check previous WARN logs to see the exact error for each failed backend`
|
||||
f(cfgStr, requestURL, backendHandler, responseExpected)
|
||||
|
||||
// all the backend_urls are unavailable for authorized user
|
||||
@@ -438,7 +447,7 @@ users:
|
||||
}
|
||||
responseExpected = `
|
||||
statusCode=502
|
||||
all the 2 backends for the user "some-user" are unavailable`
|
||||
all the 2 backends for the user "some-user" are unavailable for proxying the request - check previous WARN logs to see the exact error for each failed backend`
|
||||
f(cfgStr, requestURL, backendHandler, responseExpected)
|
||||
|
||||
// zero discovered backend IPs
|
||||
@@ -460,7 +469,7 @@ unauthorized_user:
|
||||
}
|
||||
responseExpected = `
|
||||
statusCode=502
|
||||
all the 0 backends for the user "" are unavailable`
|
||||
all the 0 backends for the user "" are unavailable for proxying the request - check previous WARN logs to see the exact error for each failed backend`
|
||||
f(cfgStr, requestURL, backendHandler, responseExpected)
|
||||
netutil.Resolver = origResolver
|
||||
|
||||
@@ -477,7 +486,7 @@ unauthorized_user:
|
||||
}
|
||||
responseExpected = `
|
||||
statusCode=502
|
||||
all the 2 backends for the user "" are unavailable`
|
||||
all the 2 backends for the user "" are unavailable for proxying the request - check previous WARN logs to see the exact error for each failed backend`
|
||||
f(cfgStr, requestURL, backendHandler, responseExpected)
|
||||
if n := retries.Load(); n != 2 {
|
||||
t.Fatalf("unexpected number of retries; got %d; want 2", n)
|
||||
@@ -506,6 +515,218 @@ requested_url={BACKEND}/path2/foo/?de=fg`
|
||||
}
|
||||
}
|
||||
|
||||
func TestJWTRequestHandler(t *testing.T) {
|
||||
// Generate RSA key pair for testing
|
||||
privateKey, err := rsa.GenerateKey(rand.Reader, 2048)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot generate RSA key: %s", err)
|
||||
}
|
||||
|
||||
// Generate public key PEM
|
||||
publicKeyBytes, err := x509.MarshalPKIXPublicKey(&privateKey.PublicKey)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot marshal public key: %s", err)
|
||||
}
|
||||
publicKeyPEM := pem.EncodeToMemory(&pem.Block{
|
||||
Type: "PUBLIC KEY",
|
||||
Bytes: publicKeyBytes,
|
||||
})
|
||||
|
||||
genToken := func(t *testing.T, body map[string]any, valid bool) string {
|
||||
t.Helper()
|
||||
|
||||
headerJSON, err := json.Marshal(map[string]any{
|
||||
"alg": "RS256",
|
||||
"typ": "JWT",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("cannot marshal header: %s", err)
|
||||
}
|
||||
headerB64 := base64.RawURLEncoding.EncodeToString(headerJSON)
|
||||
|
||||
bodyJSON, err := json.Marshal(body)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot marshal body: %s", err)
|
||||
}
|
||||
bodyB64 := base64.RawURLEncoding.EncodeToString(bodyJSON)
|
||||
|
||||
payload := headerB64 + "." + bodyB64
|
||||
|
||||
var signatureB64 string
|
||||
if valid {
|
||||
// Create real RSA signature
|
||||
hash := crypto.SHA256
|
||||
h := hash.New()
|
||||
h.Write([]byte(payload))
|
||||
digest := h.Sum(nil)
|
||||
|
||||
signature, err := rsa.SignPKCS1v15(rand.Reader, privateKey, hash, digest)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot sign token: %s", err)
|
||||
}
|
||||
signatureB64 = base64.RawURLEncoding.EncodeToString(signature)
|
||||
} else {
|
||||
signatureB64 = base64.RawURLEncoding.EncodeToString([]byte("invalid_signature"))
|
||||
}
|
||||
|
||||
return payload + "." + signatureB64
|
||||
}
|
||||
genToken(t, nil, false)
|
||||
|
||||
f := func(cfgStr string, r *http.Request, responseExpected string) {
|
||||
t.Helper()
|
||||
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if _, err := w.Write([]byte(r.RequestURI + "\n")); err != nil {
|
||||
panic(fmt.Errorf("cannot write response: %w", err))
|
||||
}
|
||||
if v := r.Header.Get(`extra_label`); v != "" {
|
||||
if _, err := w.Write([]byte(`extra_label=` + v + "\n")); err != nil {
|
||||
panic(fmt.Errorf("cannot write response: %w", err))
|
||||
}
|
||||
}
|
||||
if v := r.Header.Get(`extra_filters`); v != "" {
|
||||
if _, err := w.Write([]byte(`extra_filters=` + v + "\n")); err != nil {
|
||||
panic(fmt.Errorf("cannot write response: %w", err))
|
||||
}
|
||||
}
|
||||
}))
|
||||
defer ts.Close()
|
||||
|
||||
cfgStr = strings.ReplaceAll(cfgStr, "{BACKEND}", ts.URL)
|
||||
responseExpected = strings.ReplaceAll(responseExpected, "{BACKEND}", ts.URL)
|
||||
|
||||
cfgOrigP := authConfigData.Load()
|
||||
if _, err := reloadAuthConfigData([]byte(cfgStr)); err != nil {
|
||||
t.Fatalf("cannot load config data: %s", err)
|
||||
}
|
||||
defer func() {
|
||||
cfgOrig := []byte("unauthorized_user:\n url_prefix: http://foo/bar")
|
||||
if cfgOrigP != nil {
|
||||
cfgOrig = *cfgOrigP
|
||||
}
|
||||
_, err := reloadAuthConfigData(cfgOrig)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot load the original config: %s", err)
|
||||
}
|
||||
}()
|
||||
|
||||
w := &fakeResponseWriter{}
|
||||
if !requestHandlerWithInternalRoutes(w, r) {
|
||||
t.Fatalf("unexpected false is returned from requestHandler")
|
||||
}
|
||||
|
||||
response := w.getResponse()
|
||||
response = strings.ReplaceAll(response, "\r\n", "\n")
|
||||
response = strings.TrimSpace(response)
|
||||
responseExpected = strings.TrimSpace(responseExpected)
|
||||
if response != responseExpected {
|
||||
t.Fatalf("unexpected response\ngot\n%s\nwant\n%s", response, responseExpected)
|
||||
}
|
||||
}
|
||||
|
||||
simpleCfgStr := fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
url_prefix: {BACKEND}/foo`, string(publicKeyPEM))
|
||||
noVMAccessClaimToken := genToken(t, nil, true)
|
||||
defaultVMAccessClaimToken := genToken(t, map[string]any{
|
||||
"exp": time.Now().Add(10 * time.Minute).Unix(),
|
||||
"vm_access": map[string]any{},
|
||||
}, true)
|
||||
expiredToken := genToken(t, map[string]any{
|
||||
"exp": 10,
|
||||
"vm_access": map[string]any{},
|
||||
}, true)
|
||||
invalidSignatureToken := genToken(t, map[string]any{
|
||||
"exp": time.Now().Add(10 * time.Minute).Unix(),
|
||||
"vm_access": map[string]any{},
|
||||
}, false)
|
||||
|
||||
// missing authorization
|
||||
request := httptest.NewRequest(`GET`, "http://some-host.com/abc", nil)
|
||||
responseExpected := `
|
||||
statusCode=401
|
||||
Www-Authenticate: Basic realm="Restricted"
|
||||
missing 'Authorization' request header`
|
||||
f(simpleCfgStr, request, responseExpected)
|
||||
|
||||
// token without vm_access claim
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/abc", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+noVMAccessClaimToken)
|
||||
responseExpected = `
|
||||
statusCode=401
|
||||
Unauthorized`
|
||||
f(simpleCfgStr, request, responseExpected)
|
||||
|
||||
// expired token
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/abc", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+expiredToken)
|
||||
responseExpected = `
|
||||
statusCode=401
|
||||
Unauthorized`
|
||||
f(simpleCfgStr, request, responseExpected)
|
||||
|
||||
// invalid signature token
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/abc", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+invalidSignatureToken)
|
||||
responseExpected = `
|
||||
statusCode=401
|
||||
Unauthorized`
|
||||
f(simpleCfgStr, request, responseExpected)
|
||||
|
||||
// invalid signature token and skip verify
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/abc", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+invalidSignatureToken)
|
||||
responseExpected = `
|
||||
statusCode=200
|
||||
/foo/abc`
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
skip_verify: true
|
||||
url_prefix: {BACKEND}/foo`, request, responseExpected)
|
||||
|
||||
// token with default valid vm_access claim
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/abc", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+defaultVMAccessClaimToken)
|
||||
responseExpected = `
|
||||
statusCode=200
|
||||
/foo/abc`
|
||||
f(simpleCfgStr, request, responseExpected)
|
||||
|
||||
// jwt token used but no matching user with JWT token in config
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/abc", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+defaultVMAccessClaimToken)
|
||||
responseExpected = `
|
||||
statusCode=401
|
||||
Unauthorized`
|
||||
f(`
|
||||
users:
|
||||
- password: a-password
|
||||
username: a-user
|
||||
url_prefix: {BACKEND}/foo`, request, responseExpected)
|
||||
|
||||
// auth with key from file
|
||||
publicKeyFile := filepath.Join(t.TempDir(), "a_public_key.pem")
|
||||
if err := os.WriteFile(publicKeyFile, []byte(publicKeyPEM), 0o644); err != nil {
|
||||
t.Fatalf("failed to write public key file: %s", err)
|
||||
}
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/abc", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+defaultVMAccessClaimToken)
|
||||
responseExpected = `
|
||||
statusCode=200
|
||||
/foo/abc`
|
||||
f(fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_key_files:
|
||||
- %q
|
||||
url_prefix: {BACKEND}/foo`, string(publicKeyFile)), request, responseExpected)
|
||||
}
|
||||
|
||||
type fakeResponseWriter struct {
|
||||
h http.Header
|
||||
|
||||
@@ -832,7 +1053,7 @@ func TestBufferedBody_RetrySuccess(t *testing.T) {
|
||||
if !canRetry {
|
||||
t.Fatalf("canRetry() must return true before reading anything")
|
||||
}
|
||||
for i := 0; i < 5; i++ {
|
||||
for i := range 5 {
|
||||
data, err := io.ReadAll(rb)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error when reading all the data at iteration %d: %s", i, err)
|
||||
@@ -890,7 +1111,7 @@ func TestBufferedBody_RetrySuccessPartialRead(t *testing.T) {
|
||||
if !canRetry {
|
||||
t.Fatalf("canRetry must return true")
|
||||
}
|
||||
for i := 0; i < len(s); i++ {
|
||||
for i := range len(s) {
|
||||
buf := make([]byte, i)
|
||||
n, err := io.ReadFull(rb, buf)
|
||||
if err != nil {
|
||||
|
||||
@@ -174,7 +174,7 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
||||
},
|
||||
RetryStatusCodes: []int{503, 501},
|
||||
LoadBalancingPolicy: "first_available",
|
||||
DropSrcPathPrefixParts: intp(2),
|
||||
DropSrcPathPrefixParts: new(2),
|
||||
}, "/a/b/c", "http://foo.bar/c", `bb: aaa`, `x: y`, []int{503, 501}, "first_available", 2)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar/federate"),
|
||||
@@ -219,13 +219,13 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
||||
},
|
||||
RetryStatusCodes: []int{503, 500, 501},
|
||||
LoadBalancingPolicy: "first_available",
|
||||
DropSrcPathPrefixParts: intp(1),
|
||||
DropSrcPathPrefixParts: new(1),
|
||||
},
|
||||
{
|
||||
SrcPaths: getRegexs([]string{"/api/v1/write"}),
|
||||
URLPrefix: mustParseURL("http://vminsert/0/prometheus"),
|
||||
RetryStatusCodes: []int{},
|
||||
DropSrcPathPrefixParts: intp(0),
|
||||
DropSrcPathPrefixParts: new(0),
|
||||
},
|
||||
{
|
||||
SrcPaths: getRegexs([]string{"/metrics"}),
|
||||
@@ -242,7 +242,7 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
||||
},
|
||||
},
|
||||
RetryStatusCodes: []int{502},
|
||||
DropSrcPathPrefixParts: intp(2),
|
||||
DropSrcPathPrefixParts: new(2),
|
||||
}
|
||||
f(ui, "http://host42/vmsingle/api/v1/query?query=up&db=foo", "http://vmselect/0/prometheus/api/v1/query?db=foo&query=up",
|
||||
"xx: aa\nyy: asdf", "qwe: rty", []int{503, 500, 501}, "first_available", 1)
|
||||
@@ -259,7 +259,7 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
||||
SrcPaths: getRegexs([]string{"/api/v1/write"}),
|
||||
URLPrefix: mustParseURL("http://vminsert/0/prometheus"),
|
||||
RetryStatusCodes: []int{},
|
||||
DropSrcPathPrefixParts: intp(0),
|
||||
DropSrcPathPrefixParts: new(0),
|
||||
},
|
||||
{
|
||||
SrcPaths: getRegexs([]string{"/metrics/a/b"}),
|
||||
@@ -275,7 +275,7 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
||||
},
|
||||
},
|
||||
RetryStatusCodes: []int{502},
|
||||
DropSrcPathPrefixParts: intp(2),
|
||||
DropSrcPathPrefixParts: new(2),
|
||||
}
|
||||
f(ui, "https://foo-host/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "", "", []int{}, "least_loaded", 0)
|
||||
f(ui, "https://foo-host/metrics/a/b", "http://metrics-server/b", "", "", []int{502}, "least_loaded", 2)
|
||||
|
||||
@@ -47,7 +47,7 @@ func New(retries int, factor float64, minDuration time.Duration) (*Backoff, erro
|
||||
// Retry process retries until all attempts are completed
|
||||
func (b *Backoff) Retry(ctx context.Context, cb retryableFunc) (uint64, error) {
|
||||
var attempt uint64
|
||||
for i := 0; i < b.retries; i++ {
|
||||
for i := range b.retries {
|
||||
err := cb()
|
||||
if err == nil {
|
||||
return attempt, nil
|
||||
|
||||
@@ -76,11 +76,11 @@ func (ts *TimeSeries) write(w io.Writer) (int, error) {
|
||||
|
||||
pointsCount := len(timestampsBatch)
|
||||
cw.printf(`},"timestamps":[`)
|
||||
for i := 0; i < pointsCount-1; i++ {
|
||||
for i := range pointsCount - 1 {
|
||||
cw.printf(`%d,`, timestampsBatch[i])
|
||||
}
|
||||
cw.printf(`%d],"values":[`, timestampsBatch[pointsCount-1])
|
||||
for i := 0; i < pointsCount-1; i++ {
|
||||
for i := range pointsCount - 1 {
|
||||
cw.printf(`%v,`, valuesBatch[i])
|
||||
}
|
||||
cw.printf("%v]}\n", valuesBatch[pointsCount-1])
|
||||
|
||||
@@ -262,7 +262,7 @@ func (p *vmNativeProcessor) runBackfilling(ctx context.Context, tenantID string,
|
||||
errCh := make(chan error, p.cc)
|
||||
|
||||
var wg sync.WaitGroup
|
||||
for i := 0; i < p.cc; i++ {
|
||||
for range p.cc {
|
||||
wg.Go(func() {
|
||||
for f := range filterCh {
|
||||
if !p.disablePerMetricRequests {
|
||||
|
||||
@@ -142,7 +142,7 @@ type aggrStatePercentile struct {
|
||||
|
||||
func newAggrStatePercentile(pointsLen int, n float64) aggrState {
|
||||
hs := make([]*histogram.Fast, pointsLen)
|
||||
for i := 0; i < pointsLen; i++ {
|
||||
for i := range pointsLen {
|
||||
hs[i] = histogram.NewFast()
|
||||
}
|
||||
return &aggrStatePercentile{
|
||||
|
||||
@@ -50,7 +50,7 @@ func (ec *evalConfig) newTimestamps(step int64) []int64 {
|
||||
pointsLen := ec.pointsLen(step)
|
||||
timestamps := make([]int64, pointsLen)
|
||||
ts := ec.startTime
|
||||
for i := 0; i < pointsLen; i++ {
|
||||
for i := range pointsLen {
|
||||
timestamps[i] = ts
|
||||
ts += step
|
||||
}
|
||||
|
||||
@@ -25,7 +25,7 @@ func naturalLess(a, b string) bool {
|
||||
}
|
||||
|
||||
func getNonNumPrefix(s string) (prefix string, tail string) {
|
||||
for i := 0; i < len(s); i++ {
|
||||
for i := range len(s) {
|
||||
ch := s[i]
|
||||
if ch >= '0' && ch <= '9' {
|
||||
return s[:i], s[i:]
|
||||
|
||||
@@ -209,7 +209,7 @@ func parseInterval(s string) (int64, error) {
|
||||
s = strings.TrimSpace(s)
|
||||
prefix := s
|
||||
var suffix string
|
||||
for i := 0; i < len(s); i++ {
|
||||
for i := range len(s) {
|
||||
ch := s[i]
|
||||
if ch != '-' && ch != '+' && ch != '.' && (ch < '0' || ch > '9') {
|
||||
prefix = s[:i]
|
||||
|
||||
@@ -1228,7 +1228,7 @@ func transformDelay(ec *evalConfig, fe *graphiteql.FuncExpr) (nextSeriesFunc, er
|
||||
stepsLocal = len(values)
|
||||
}
|
||||
copy(values[stepsLocal:], values[:len(values)-stepsLocal])
|
||||
for i := 0; i < stepsLocal; i++ {
|
||||
for i := range stepsLocal {
|
||||
values[i] = nan
|
||||
}
|
||||
}
|
||||
@@ -1740,7 +1740,7 @@ func transformGroup(ec *evalConfig, fe *graphiteql.FuncExpr) (nextSeriesFunc, er
|
||||
|
||||
func groupSeriesLists(ec *evalConfig, args []*graphiteql.ArgExpr, expr graphiteql.Expr) (nextSeriesFunc, error) {
|
||||
var nextSeriess []nextSeriesFunc
|
||||
for i := 0; i < len(args); i++ {
|
||||
for i := range args {
|
||||
nextSeries, err := evalSeriesList(ec, args, "seriesList", i)
|
||||
if err != nil {
|
||||
for _, f := range nextSeriess {
|
||||
@@ -3233,7 +3233,7 @@ func transformSeriesByTag(ec *evalConfig, fe *graphiteql.FuncExpr) (nextSeriesFu
|
||||
return nil, fmt.Errorf("at least one tagExpression must be passed to seriesByTag")
|
||||
}
|
||||
var tagExpressions []string
|
||||
for i := 0; i < len(args); i++ {
|
||||
for i := range args {
|
||||
te, err := getString(args, "tagExpressions", i)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -3633,7 +3633,7 @@ var graphiteToGolangRe = regexp.MustCompile(`\\(\d+)`)
|
||||
|
||||
func getNodes(args []*graphiteql.ArgExpr) ([]graphiteql.Expr, error) {
|
||||
var nodes []graphiteql.Expr
|
||||
for i := 0; i < len(args); i++ {
|
||||
for i := range args {
|
||||
expr := args[i].Expr
|
||||
switch expr.(type) {
|
||||
case *graphiteql.NumberExpr, *graphiteql.StringExpr:
|
||||
@@ -4052,7 +4052,7 @@ func formatPathsFromSeriesExpressions(seriesExpressions []string, sortPaths bool
|
||||
|
||||
func newNaNSeries(ec *evalConfig, step int64) *series {
|
||||
values := make([]float64, ec.pointsLen(step))
|
||||
for i := 0; i < len(values); i++ {
|
||||
for i := range values {
|
||||
values[i] = nan
|
||||
}
|
||||
return &series{
|
||||
@@ -5244,7 +5244,7 @@ func transformLinearRegression(ec *evalConfig, fe *graphiteql.FuncExpr) (nextSer
|
||||
|
||||
func linearRegressionForSeries(ec *evalConfig, fe *graphiteql.FuncExpr, ss, sourceSeries []*series) (nextSeriesFunc, error) {
|
||||
var resp []*series
|
||||
for i := 0; i < len(ss); i++ {
|
||||
for i := range ss {
|
||||
source := sourceSeries[i]
|
||||
s := ss[i]
|
||||
s.Tags["linearRegressions"] = fmt.Sprintf("%d, %d", ec.startTime/1e3, ec.endTime/1e3)
|
||||
@@ -5258,7 +5258,7 @@ func linearRegressionForSeries(ec *evalConfig, fe *graphiteql.FuncExpr, ss, sour
|
||||
continue
|
||||
}
|
||||
values := s.Values
|
||||
for j := 0; j < len(values); j++ {
|
||||
for j := range values {
|
||||
values[j] = offset + (float64(int(s.Timestamps[0])+j*int(s.step)))*factor
|
||||
}
|
||||
resp = append(resp, s)
|
||||
@@ -5370,7 +5370,7 @@ func holtWinterConfidenceBands(ec *evalConfig, fe *graphiteql.FuncExpr, args []*
|
||||
valuesLen := len(forecastValues)
|
||||
upperBand := make([]float64, 0, valuesLen)
|
||||
lowerBand := make([]float64, 0, valuesLen)
|
||||
for i := 0; i < valuesLen; i++ {
|
||||
for i := range valuesLen {
|
||||
forecastItem := forecastValues[i]
|
||||
deviationItem := deviationValues[i]
|
||||
if math.IsNaN(forecastItem) || math.IsNaN(deviationItem) {
|
||||
@@ -5464,7 +5464,7 @@ func transformHoltWintersAberration(ec *evalConfig, fe *graphiteql.FuncExpr) (ne
|
||||
return nil, fmt.Errorf("bug, len mismatch for series: %d and upperBand values: %d or lowerBand values: %d", len(values), len(upperBand), len(lowerBand))
|
||||
}
|
||||
aberration := make([]float64, 0, len(values))
|
||||
for i := 0; i < len(values); i++ {
|
||||
for i := range values {
|
||||
v := values[i]
|
||||
upperValue := upperBand[i]
|
||||
lowerValue := lowerBand[i]
|
||||
|
||||
@@ -280,7 +280,7 @@ func isMetricExprChar(ch byte) bool {
|
||||
}
|
||||
|
||||
func appendEscapedIdent(dst []byte, s string) []byte {
|
||||
for i := 0; i < len(s); i++ {
|
||||
for i := range len(s) {
|
||||
ch := s[i]
|
||||
if isIdentChar(ch) || isMetricExprChar(ch) {
|
||||
if i == 0 && !isFirstIdentChar(ch) {
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"math"
|
||||
"slices"
|
||||
"sort"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
@@ -491,10 +492,7 @@ func (pts *packedTimeseries) unpackTo(dst []*sortBlock, tbf *tmpBlocksFile, tr s
|
||||
}
|
||||
|
||||
// Prepare worker channels.
|
||||
workers := min(len(upws), gomaxprocs)
|
||||
if workers < 1 {
|
||||
workers = 1
|
||||
}
|
||||
workers := max(min(len(upws), gomaxprocs), 1)
|
||||
itemsPerWorker := (len(upws) + workers - 1) / workers
|
||||
workChs := make([]chan *unpackWork, workers)
|
||||
for i := range workChs {
|
||||
@@ -832,12 +830,7 @@ func GraphiteTags(qt *querytracer.Tracer, filter string, limit int, deadline sea
|
||||
}
|
||||
|
||||
func hasString(a []string, s string) bool {
|
||||
for _, x := range a {
|
||||
if x == s {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
return slices.Contains(a, s)
|
||||
}
|
||||
|
||||
// LabelValues returns label values matching the given labelName and sq until the given deadline.
|
||||
|
||||
@@ -10,14 +10,14 @@ func BenchmarkMergeSortBlocks(b *testing.B) {
|
||||
b.Run(fmt.Sprintf("replicationFactor-%d", replicationFactor), func(b *testing.B) {
|
||||
const samplesPerBlock = 8192
|
||||
var blocks []*sortBlock
|
||||
for j := 0; j < 10; j++ {
|
||||
for j := range 10 {
|
||||
timestamps := make([]int64, samplesPerBlock)
|
||||
values := make([]float64, samplesPerBlock)
|
||||
for i := range timestamps {
|
||||
timestamps[i] = int64(j*samplesPerBlock + i)
|
||||
values[i] = float64(j*samplesPerBlock + i)
|
||||
}
|
||||
for i := 0; i < replicationFactor; i++ {
|
||||
for range replicationFactor {
|
||||
blocks = append(blocks, &sortBlock{
|
||||
Timestamps: timestamps,
|
||||
Values: values,
|
||||
@@ -30,7 +30,7 @@ func BenchmarkMergeSortBlocks(b *testing.B) {
|
||||
b.Run("overlapped-blocks-bestcase", func(b *testing.B) {
|
||||
const samplesPerBlock = 8192
|
||||
var blocks []*sortBlock
|
||||
for j := 0; j < 10; j++ {
|
||||
for j := range 10 {
|
||||
timestamps := make([]int64, samplesPerBlock)
|
||||
values := make([]float64, samplesPerBlock)
|
||||
for i := range timestamps {
|
||||
@@ -45,7 +45,7 @@ func BenchmarkMergeSortBlocks(b *testing.B) {
|
||||
for j := 1; j < len(blocks); j++ {
|
||||
prev := blocks[j-1].Timestamps
|
||||
curr := blocks[j].Timestamps
|
||||
for i := 0; i < samplesPerBlock/2; i++ {
|
||||
for i := range samplesPerBlock / 2 {
|
||||
prev[i+samplesPerBlock/2], curr[i] = curr[i], prev[i+samplesPerBlock/2]
|
||||
}
|
||||
}
|
||||
@@ -54,7 +54,7 @@ func BenchmarkMergeSortBlocks(b *testing.B) {
|
||||
b.Run("overlapped-blocks-worstcase", func(b *testing.B) {
|
||||
const samplesPerBlock = 8192
|
||||
var blocks []*sortBlock
|
||||
for j := 0; j < 5; j++ {
|
||||
for j := range 5 {
|
||||
timestamps := make([]int64, samplesPerBlock)
|
||||
values := make([]float64, samplesPerBlock)
|
||||
for i := range timestamps {
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"math"
|
||||
"net/http"
|
||||
"runtime"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
@@ -1004,14 +1005,7 @@ func removeEmptyValuesAndTimeseries(tss []netstorage.Result) []netstorage.Result
|
||||
dst := tss[:0]
|
||||
for i := range tss {
|
||||
ts := &tss[i]
|
||||
hasNaNs := false
|
||||
for _, v := range ts.Values {
|
||||
if math.IsNaN(v) {
|
||||
hasNaNs = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !hasNaNs {
|
||||
if !slices.ContainsFunc(ts.Values, math.IsNaN) {
|
||||
// Fast path: nothing to remove.
|
||||
if len(ts.Values) > 0 {
|
||||
dst = append(dst, *ts)
|
||||
|
||||
@@ -742,7 +742,7 @@ func getRangeTopKTimeseries(tss []*timeseries, modifier *metricsql.ModifierExpr,
|
||||
|
||||
func reverseSeries(tss []*timeseries) {
|
||||
j := len(tss)
|
||||
for i := 0; i < len(tss)/2; i++ {
|
||||
for i := range len(tss) / 2 {
|
||||
j--
|
||||
tss[i], tss[j] = tss[j], tss[i]
|
||||
}
|
||||
@@ -983,7 +983,7 @@ func getPerPointIQRBounds(tss []*timeseries) ([]float64, []float64) {
|
||||
var qs []float64
|
||||
lower := make([]float64, pointsLen)
|
||||
upper := make([]float64, pointsLen)
|
||||
for i := 0; i < pointsLen; i++ {
|
||||
for i := range pointsLen {
|
||||
values = values[:0]
|
||||
for _, ts := range tss {
|
||||
v := ts.Values[i]
|
||||
|
||||
@@ -53,7 +53,7 @@ func TestIncrementalAggr(t *testing.T) {
|
||||
Values: valuesExpected,
|
||||
}}
|
||||
// run the test multiple times to make sure there are no side effects on concurrency
|
||||
for i := 0; i < 10; i++ {
|
||||
for i := range 10 {
|
||||
iafc := newIncrementalAggrFuncContext(ae, callbacks)
|
||||
tssSrcCopy := copyTimeseries(tssSrc)
|
||||
if err := testIncrementalParallelAggr(iafc, tssSrcCopy, tssExpected); err != nil {
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"fmt"
|
||||
"math"
|
||||
"regexp"
|
||||
"slices"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
@@ -1935,14 +1936,7 @@ func dropStaleNaNs(funcName string, values []float64, timestamps []int64) ([]flo
|
||||
return values, timestamps
|
||||
}
|
||||
// Remove Prometheus staleness marks, so non-default rollup functions don't hit NaN values.
|
||||
hasStaleSamples := false
|
||||
for _, v := range values {
|
||||
if decimal.IsStaleNaN(v) {
|
||||
hasStaleSamples = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !hasStaleSamples {
|
||||
if !slices.ContainsFunc(values, decimal.IsStaleNaN) {
|
||||
// Fast path: values have no Prometheus staleness marks.
|
||||
return values, timestamps
|
||||
}
|
||||
|
||||
@@ -313,7 +313,7 @@ func escapeDots(s string) string {
|
||||
return s
|
||||
}
|
||||
result := make([]byte, 0, len(s)+2*dotsCount)
|
||||
for i := 0; i < len(s); i++ {
|
||||
for i := range len(s) {
|
||||
if s[i] == '.' && (i == 0 || s[i-1] != '\\') && (i+1 == len(s) || i+1 < len(s) && s[i+1] != '*' && s[i+1] != '+' && s[i+1] != '{') {
|
||||
// Escape a dot if the following conditions are met:
|
||||
// - if it isn't escaped already, i.e. if there is no `\` char before the dot.
|
||||
|
||||
@@ -67,7 +67,7 @@ func TestExecSuccess(t *testing.T) {
|
||||
Deadline: searchutil.NewDeadline(time.Now(), time.Minute, ""),
|
||||
RoundDigits: 100,
|
||||
}
|
||||
for i := 0; i < 5; i++ {
|
||||
for range 5 {
|
||||
result, err := Exec(nil, ec, q, false)
|
||||
if err != nil {
|
||||
t.Fatalf(`unexpected error when executing %q: %s`, q, err)
|
||||
@@ -9827,7 +9827,7 @@ func TestExecError(t *testing.T) {
|
||||
Deadline: searchutil.NewDeadline(time.Now(), time.Minute, ""),
|
||||
RoundDigits: 100,
|
||||
}
|
||||
for i := 0; i < 4; i++ {
|
||||
for range 4 {
|
||||
rv, err := Exec(nil, ec, q, false)
|
||||
if err == nil {
|
||||
t.Fatalf(`expecting non-nil error on %q`, q)
|
||||
|
||||
@@ -55,7 +55,7 @@ type parseCache struct {
|
||||
|
||||
func newParseCache() *parseCache {
|
||||
pc := new(parseCache)
|
||||
for i := 0; i < parseBucketCount; i++ {
|
||||
for i := range parseBucketCount {
|
||||
pc.buckets[i] = newParseBucket()
|
||||
}
|
||||
return pc
|
||||
@@ -75,7 +75,7 @@ func (pc *parseCache) get(q string) *parseCacheValue {
|
||||
|
||||
func (pc *parseCache) requests() uint64 {
|
||||
var n uint64
|
||||
for i := 0; i < parseBucketCount; i++ {
|
||||
for i := range parseBucketCount {
|
||||
n += pc.buckets[i].requests.Load()
|
||||
}
|
||||
return n
|
||||
@@ -83,7 +83,7 @@ func (pc *parseCache) requests() uint64 {
|
||||
|
||||
func (pc *parseCache) misses() uint64 {
|
||||
var n uint64
|
||||
for i := 0; i < parseBucketCount; i++ {
|
||||
for i := range parseBucketCount {
|
||||
n += pc.buckets[i].misses.Load()
|
||||
}
|
||||
return n
|
||||
@@ -91,7 +91,7 @@ func (pc *parseCache) misses() uint64 {
|
||||
|
||||
func (pc *parseCache) len() uint64 {
|
||||
var n uint64
|
||||
for i := 0; i < parseBucketCount; i++ {
|
||||
for i := range parseBucketCount {
|
||||
n += pc.buckets[i].len()
|
||||
}
|
||||
return n
|
||||
|
||||
@@ -17,7 +17,7 @@ func testGetParseCacheValue(q string) *parseCacheValue {
|
||||
|
||||
func testGenerateQueries(items int) []string {
|
||||
queries := make([]string, items)
|
||||
for i := 0; i < items; i++ {
|
||||
for i := range items {
|
||||
queries[i] = fmt.Sprintf(`node_time_seconds{instance="node%d", job="job%d"}`, i, i)
|
||||
}
|
||||
return queries
|
||||
@@ -102,7 +102,7 @@ func TestParseCacheBucketOverflow(t *testing.T) {
|
||||
v := testGetParseCacheValue(queries[0])
|
||||
|
||||
// Fill bucket
|
||||
for i := 0; i < parseBucketMaxLen; i++ {
|
||||
for i := range parseBucketMaxLen {
|
||||
b.put(queries[i], v)
|
||||
}
|
||||
expectedLen = uint64(parseBucketMaxLen)
|
||||
|
||||
@@ -15,7 +15,7 @@ func BenchmarkCachePutNoOverFlow(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
for i := 0; i < items; i++ {
|
||||
for i := range items {
|
||||
pc.put(queries[i], v)
|
||||
}
|
||||
}
|
||||
@@ -32,14 +32,14 @@ func BenchmarkCacheGetNoOverflow(b *testing.B) {
|
||||
queries := testGenerateQueries(items)
|
||||
v := testGetParseCacheValue(queries[0])
|
||||
|
||||
for i := 0; i < len(queries); i++ {
|
||||
for i := range queries {
|
||||
pc.put(queries[i], v)
|
||||
}
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
for i := 0; i < items; i++ {
|
||||
for i := range items {
|
||||
if v := pc.get(queries[i]); v == nil {
|
||||
b.Errorf("unexpected nil value obtained from cache for query: %s ", queries[i])
|
||||
}
|
||||
@@ -59,7 +59,7 @@ func BenchmarkCachePutGetNoOverflow(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
for i := 0; i < items; i++ {
|
||||
for i := range items {
|
||||
pc.put(queries[i], v)
|
||||
if res := pc.get(queries[i]); res == nil {
|
||||
b.Errorf("unexpected nil value obtained from cache for query: %s ", queries[i])
|
||||
@@ -79,7 +79,7 @@ func BenchmarkCachePutOverflow(b *testing.B) {
|
||||
queries := testGenerateQueries(items)
|
||||
v := testGetParseCacheValue(queries[0])
|
||||
|
||||
for i := 0; i < parseCacheMaxLen; i++ {
|
||||
for i := range parseCacheMaxLen {
|
||||
c.put(queries[i], v)
|
||||
}
|
||||
|
||||
@@ -105,7 +105,7 @@ func BenchmarkCachePutGetOverflow(b *testing.B) {
|
||||
queries := testGenerateQueries(items)
|
||||
v := testGetParseCacheValue(queries[0])
|
||||
|
||||
for i := 0; i < parseCacheMaxLen; i++ {
|
||||
for i := range parseCacheMaxLen {
|
||||
c.put(queries[i], v)
|
||||
}
|
||||
|
||||
@@ -141,8 +141,8 @@ var testSimpleQueries = []string{
|
||||
|
||||
func BenchmarkParsePromQLWithCacheSimple(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
for j := 0; j < len(testSimpleQueries); j++ {
|
||||
for range b.N {
|
||||
for j := range testSimpleQueries {
|
||||
_, err := parsePromQLWithCache(testSimpleQueries[j])
|
||||
if err != nil {
|
||||
b.Errorf("unexpected error: %s", err)
|
||||
@@ -155,7 +155,7 @@ func BenchmarkParsePromQLWithCacheSimpleParallel(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
for i := 0; i < len(testSimpleQueries); i++ {
|
||||
for i := range testSimpleQueries {
|
||||
_, err := parsePromQLWithCache(testSimpleQueries[i])
|
||||
if err != nil {
|
||||
b.Errorf("unexpected error: %s", err)
|
||||
@@ -210,8 +210,8 @@ var testComplexQueries = []string{
|
||||
|
||||
func BenchmarkParsePromQLWithCacheComplex(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
for j := 0; j < len(testComplexQueries); j++ {
|
||||
for range b.N {
|
||||
for j := range testComplexQueries {
|
||||
_, err := parsePromQLWithCache(testComplexQueries[j])
|
||||
if err != nil {
|
||||
b.Errorf("unexpected error: %s", err)
|
||||
@@ -224,7 +224,7 @@ func BenchmarkParsePromQLWithCacheComplexParallel(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
for i := 0; i < len(testComplexQueries); i++ {
|
||||
for i := range testComplexQueries {
|
||||
_, err := parsePromQLWithCache(testComplexQueries[i])
|
||||
if err != nil {
|
||||
b.Errorf("unexpected error: %s", err)
|
||||
|
||||
@@ -739,7 +739,7 @@ func (mi *rollupResultCacheMetainfo) Unmarshal(src []byte) error {
|
||||
entriesLen := int(encoding.UnmarshalUint32(src))
|
||||
src = src[4:]
|
||||
mi.entries = slicesutil.SetLength(mi.entries, entriesLen)
|
||||
for i := 0; i < entriesLen; i++ {
|
||||
for i := range entriesLen {
|
||||
tail, err := mi.entries[i].Unmarshal(src)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot unmarshal entry #%d: %w", i, err)
|
||||
|
||||
@@ -11,14 +11,14 @@ import (
|
||||
|
||||
func TestRollupResultCacheInitStop(t *testing.T) {
|
||||
t.Run("inmemory", func(_ *testing.T) {
|
||||
for i := 0; i < 5; i++ {
|
||||
for range 5 {
|
||||
InitRollupResultCache("")
|
||||
StopRollupResultCache()
|
||||
}
|
||||
})
|
||||
t.Run("file-based", func(_ *testing.T) {
|
||||
cacheFilePath := "test-rollup-result-cache"
|
||||
for i := 0; i < 3; i++ {
|
||||
for range 3 {
|
||||
InitRollupResultCache(cacheFilePath)
|
||||
StopRollupResultCache()
|
||||
}
|
||||
@@ -241,12 +241,12 @@ func TestRollupResultCache(t *testing.T) {
|
||||
t.Run("big-timeseries", func(t *testing.T) {
|
||||
ResetRollupResultCache()
|
||||
var tss []*timeseries
|
||||
for i := 0; i < 1000; i++ {
|
||||
for i := range 1000 {
|
||||
ts := ×eries{
|
||||
Timestamps: []int64{1000, 1200, 1400, 1600, 1800, 2000},
|
||||
Values: []float64{1, 2, 3, 4, 5, 6},
|
||||
}
|
||||
ts.MetricName.MetricGroup = []byte(fmt.Sprintf("metric %d", i))
|
||||
ts.MetricName.MetricGroup = fmt.Appendf(nil, "metric %d", i)
|
||||
tss = append(tss, ts)
|
||||
}
|
||||
rollupResultCacheV.PutSeries(nil, ec, fe, window, tss)
|
||||
|
||||
@@ -240,7 +240,7 @@ func testRollupFunc(t *testing.T, funcName string, args []any, vExpected float64
|
||||
if rollupFuncsRemoveCounterResets[funcName] {
|
||||
removeCounterResets(rfa.values, rfa.timestamps, 0)
|
||||
}
|
||||
for i := 0; i < 5; i++ {
|
||||
for range 5 {
|
||||
v := rf(&rfa)
|
||||
if math.IsNaN(vExpected) {
|
||||
if !math.IsNaN(v) {
|
||||
@@ -1493,7 +1493,7 @@ func TestRollupBigNumberOfValues(t *testing.T) {
|
||||
rc.Timestamps = rc.getTimestamps()
|
||||
srcValues := make([]float64, srcValuesCount)
|
||||
srcTimestamps := make([]int64, srcValuesCount)
|
||||
for i := 0; i < srcValuesCount; i++ {
|
||||
for i := range int(srcValuesCount) {
|
||||
srcValues[i] = float64(i)
|
||||
srcTimestamps[i] = int64(i / 2)
|
||||
}
|
||||
|
||||
@@ -451,7 +451,7 @@ func transformBucketsLimit(tfa *transformFuncArg) ([]*timeseries, error) {
|
||||
sort.Slice(leGroup, func(i, j int) bool {
|
||||
return leGroup[i].le < leGroup[j].le
|
||||
})
|
||||
for n := 0; n < pointsCount; n++ {
|
||||
for n := range pointsCount {
|
||||
prevValue := float64(0)
|
||||
for i := range leGroup {
|
||||
xx := &leGroup[i]
|
||||
@@ -1192,7 +1192,7 @@ func transformInterpolate(tfa *transformFuncArg) ([]*timeseries, error) {
|
||||
}
|
||||
prevValue := nan
|
||||
var nextValue float64
|
||||
for i := 0; i < len(values); i++ {
|
||||
for i := range values {
|
||||
if !math.IsNaN(values[i]) {
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -62,7 +62,7 @@ var (
|
||||
"Excess series are logged and dropped. This can be useful for limiting series churn rate. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cardinality-limiter . "+
|
||||
"See also -storage.maxHourlySeries")
|
||||
|
||||
minFreeDiskSpaceBytes = flagutil.NewBytes("storage.minFreeDiskSpaceBytes", 10e6, "The minimum free disk space at -storageDataPath after which the storage stops accepting new data")
|
||||
minFreeDiskSpaceBytes = flagutil.NewBytes("storage.minFreeDiskSpaceBytes", 100e6, "The minimum free disk space at -storageDataPath after which the storage stops accepting new data")
|
||||
|
||||
cacheSizeStorageTSID = flagutil.NewBytes("storage.cacheSizeStorageTSID", 0, "Overrides max size for storage/tsid cache. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cache-tuning")
|
||||
|
||||
@@ -7,6 +7,7 @@ import { AUTOCOMPLETE_LIMITS } from "../../../constants/queryAutocomplete";
|
||||
import { QueryEditorAutocompleteProps } from "./QueryEditor";
|
||||
import { getExprLastPart, getValueByContext, getContext } from "./autocompleteUtils";
|
||||
import { extractCurrentLabel, extractLabelMatchers, extractMetric, splitByCursor } from "./utils/parser";
|
||||
import { escapeLabelName } from "../../../utils/metric";
|
||||
|
||||
const QueryEditorAutocomplete: FC<QueryEditorAutocompleteProps> = ({
|
||||
value,
|
||||
@@ -90,6 +91,7 @@ const QueryEditorAutocomplete: FC<QueryEditorAutocompleteProps> = ({
|
||||
}
|
||||
|
||||
if (context === QueryContextType.label) {
|
||||
insert = escapeLabelName(insert);
|
||||
valueAfterCursor = valueAfterCursor.replace(/^[^\s=!,{}()"|+\-/*^]*/, "");
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { QueryUpdater } from "./types";
|
||||
import { escapeLabelName } from "../../utils/metric";
|
||||
|
||||
export const queryUpdater: QueryUpdater = {
|
||||
seriesCountByMetricName: ({ query }): string => {
|
||||
@@ -28,5 +29,5 @@ const getSeriesSelector = (label: string | null, value: string): string => {
|
||||
if (!label) {
|
||||
return "";
|
||||
}
|
||||
return "{" + label + "=" + JSON.stringify(value) + "}";
|
||||
return "{" + escapeLabelName(label) + "=" + JSON.stringify(value) + "}";
|
||||
};
|
||||
|
||||
@@ -18,6 +18,7 @@ import {
|
||||
TipHighNumberOfValues
|
||||
} from "./CardinalityTips";
|
||||
import useSearchParamsFromObject from "../../hooks/useSearchParamsFromObject";
|
||||
import { escapeLabelName } from "../../utils/metric";
|
||||
|
||||
const spinnerMessage = `Please wait while cardinality stats is calculated.
|
||||
This may take some time if the db contains big number of time series.`;
|
||||
@@ -36,12 +37,17 @@ const CardinalityPanel: FC = () => {
|
||||
const defaultState = getDefaultState(match, focusLabel);
|
||||
|
||||
const handleFilterClick = (key: string) => (query: string) => {
|
||||
const rawQuery = query;
|
||||
|
||||
const isLabelKey = ["labelValueCountByLabelName", "seriesCountByLabelName"].includes(key);
|
||||
if (isLabelKey) query = escapeLabelName(query);
|
||||
|
||||
const value = queryUpdater[key]({ query, focusLabel, match });
|
||||
const params: Record<string, string> = { match: value };
|
||||
if (key === "labelValueCountByLabelName" || key == "seriesCountByLabelName") {
|
||||
params.focusLabel = query;
|
||||
if (isLabelKey) {
|
||||
params.focusLabel = rawQuery;
|
||||
}
|
||||
if (key == "seriesCountByFocusLabelValue") {
|
||||
if (key === "seriesCountByFocusLabelValue") {
|
||||
params.focusLabel = "";
|
||||
}
|
||||
setSearchParamsFromKeys(params);
|
||||
|
||||
@@ -52,3 +52,7 @@ export const isHistogramData = (result: MetricBase[]) => {
|
||||
|
||||
return isHistogram && result.every(r => histogramLabels.some(l => l in r.metric));
|
||||
};
|
||||
|
||||
export const escapeLabelName = (s: string) => {
|
||||
return s.replace(/([\\./-])/g, "\\$1");
|
||||
};
|
||||
|
||||
@@ -19,7 +19,6 @@ func TestSingleMetricsMetadata(t *testing.T) {
|
||||
sut := tc.MustStartVmsingle("vmsingle", []string{
|
||||
"-storageDataPath=" + tc.Dir(),
|
||||
"-retentionPeriod=100y",
|
||||
"-enableMetadata",
|
||||
})
|
||||
// verify empty stats
|
||||
resp := sut.PrometheusAPIV1Metadata(t, "", 0, apptest.QueryOpts{})
|
||||
@@ -120,15 +119,12 @@ func TestClusterMetricsMetadata(t *testing.T) {
|
||||
|
||||
vminsert1 := tc.MustStartVminsert("vminsert1", []string{
|
||||
fmt.Sprintf("-storageNode=%s,%s", vmstorage1.VminsertAddr(), vmstorage2.VminsertAddr()),
|
||||
"-enableMetadata",
|
||||
})
|
||||
vminsert2 := tc.MustStartVminsert("vminsert-2", []string{
|
||||
fmt.Sprintf("-storageNode=%s,%s", vmstorage1.VminsertAddr(), vmstorage2.VminsertAddr()),
|
||||
"-enableMetadata",
|
||||
})
|
||||
vminsertGlobal := tc.MustStartVminsert("vminsert-global", []string{
|
||||
fmt.Sprintf("-storageNode=%s,%s", vminsert1.ClusternativeListenAddr(), vminsert2.ClusternativeListenAddr()),
|
||||
"-enableMetadata",
|
||||
})
|
||||
vmselect := tc.MustStartVmselect("vmselect", []string{
|
||||
fmt.Sprintf("-storageNode=%s,%s", vmstorage1.VmselectAddr(), vmstorage2.VmselectAddr()),
|
||||
|
||||
@@ -171,6 +171,26 @@ func TestClusterMultiTenantSelect(t *testing.T) {
|
||||
t.Errorf("unexpected response (-want, +got):\n%s", diff)
|
||||
}
|
||||
|
||||
// /api/v1/label/../value with extra_filters
|
||||
|
||||
wantVR := apptest.NewPrometheusAPIV1LabelValuesResponse(t,
|
||||
`{"data": [
|
||||
"5"
|
||||
]
|
||||
}`)
|
||||
wantSR.Sort()
|
||||
gotVR := vmselect.PrometheusAPIV1LabelValues(t, "vm_account_id", "foo", apptest.QueryOpts{
|
||||
Start: "2022-05-10T08:00:00.000Z",
|
||||
End: "2022-05-10T08:30:00.000Z",
|
||||
ExtraFilters: []string{`{vm_account_id="5"}`},
|
||||
Tenant: "multitenant",
|
||||
})
|
||||
gotSR.Sort()
|
||||
|
||||
if diff := cmp.Diff(wantVR, gotVR, cmpopts.IgnoreFields(apptest.PrometheusAPIV1LabelValuesResponse{}, "Status", "IsPartial")); diff != "" {
|
||||
t.Errorf("unexpected response (-want, +got):\n%s", diff)
|
||||
}
|
||||
|
||||
// Delete series from specific tenant
|
||||
vmselect.APIV1AdminTSDBDeleteSeries(t, "foo_bar", apptest.QueryOpts{
|
||||
Tenant: "5:15",
|
||||
|
||||
@@ -278,7 +278,7 @@ func validateStreamReadHeaders(t *testing.T, r *http.Request) bool {
|
||||
func GenerateRemoteReadSeries(start, end, numOfSeries, numOfSamples int64) []*prompb.TimeSeries {
|
||||
var ts []*prompb.TimeSeries
|
||||
j := 0
|
||||
for i := 0; i < int(numOfSeries); i++ {
|
||||
for i := range int(numOfSeries) {
|
||||
if i%3 == 0 {
|
||||
j++
|
||||
}
|
||||
|
||||
@@ -313,7 +313,7 @@ func TestSingleVMAgentDropOnOverload(t *testing.T) {
|
||||
|
||||
waitFor := func(f func() bool) {
|
||||
t.Helper()
|
||||
for i := 0; i < retries; i++ {
|
||||
for range retries {
|
||||
if f() {
|
||||
return
|
||||
}
|
||||
@@ -339,7 +339,7 @@ func TestSingleVMAgentDropOnOverload(t *testing.T) {
|
||||
|
||||
// Send 2 more requests, the first RW endpoint should receive everything, the second should add them to the queue
|
||||
// since worker is busy with the first request.
|
||||
for i := 0; i < 2; i++ {
|
||||
for i := range 2 {
|
||||
vmagent.APIV1ImportPrometheusNoWaitFlush(t, []string{
|
||||
"foo_bar 1 1652169600000", // 2022-05-10T08:00:00Z
|
||||
}, apptest.QueryOpts{})
|
||||
|
||||
@@ -145,7 +145,7 @@ func (app *Vmagent) ReloadRelabelConfigs(t *testing.T) {
|
||||
}
|
||||
|
||||
var currTotal float64
|
||||
for i := 0; i < 30; i++ {
|
||||
for range 30 {
|
||||
currTotal = app.GetMetric(t, "vmagent_relabel_config_reloads_total")
|
||||
if currTotal > prevTotal {
|
||||
return
|
||||
|
||||
@@ -506,6 +506,24 @@
|
||||
"value": 200
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Alert"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "links",
|
||||
"value": [
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Alert",
|
||||
"url": "/alerting/${ds:text}/${__value.text}/find"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -659,4 +677,4 @@
|
||||
"uid": "ehXxUsGSk",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
}
|
||||
@@ -91,8 +91,26 @@
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {},
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
@@ -103,17 +121,42 @@
|
||||
},
|
||||
"id": 24,
|
||||
"options": {
|
||||
"code": {
|
||||
"language": "plaintext",
|
||||
"showLineNumbers": false,
|
||||
"showMiniMap": false
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"percentChangeColorMode": "standard",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "/^short_version$/",
|
||||
"values": false
|
||||
},
|
||||
"content": "<div style=\"text-align: center;\">$version</div>",
|
||||
"mode": "markdown"
|
||||
"showPercentChange": false,
|
||||
"textMode": "value",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "12.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "vm_app_version{job=~\"$job\",instance=~\"$instance\"}",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"interval": "",
|
||||
"legendFormat": "{{short_version}}",
|
||||
"range": false,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Version",
|
||||
"type": "text"
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
|
||||
@@ -5129,7 +5129,7 @@
|
||||
"uid": "${ds}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job)",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job) > 0",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
@@ -11153,7 +11153,7 @@
|
||||
"uid": "${ds}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job,instance)",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job,instance) > 0",
|
||||
"legendFormat": "{{instance}} ({{job}})",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
|
||||
@@ -5174,7 +5174,7 @@
|
||||
"uid": "${ds}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job)",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job) > 0",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
@@ -7667,7 +7667,7 @@
|
||||
"uid": "${ds}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job,instance)",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job,instance) > 0",
|
||||
"legendFormat": "{{instance}} ({{job}})",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
|
||||
@@ -92,29 +92,72 @@
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {},
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"h": 4,
|
||||
"w": 4,
|
||||
"x": 0,
|
||||
"y": 1
|
||||
},
|
||||
"id": 24,
|
||||
"options": {
|
||||
"code": {
|
||||
"language": "plaintext",
|
||||
"showLineNumbers": false,
|
||||
"showMiniMap": false
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"percentChangeColorMode": "standard",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "/^short_version$/",
|
||||
"values": false
|
||||
},
|
||||
"content": "<div style=\"text-align: center;\">$version</div>",
|
||||
"mode": "markdown"
|
||||
"showPercentChange": false,
|
||||
"textMode": "value",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "12.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "vm_app_version{job=~\"$job\",instance=~\"$instance\"}",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"interval": "",
|
||||
"legendFormat": "{{short_version}}",
|
||||
"range": false,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Version",
|
||||
"type": "text"
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
|
||||
@@ -5130,7 +5130,7 @@
|
||||
"uid": "${ds}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job)",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job) > 0",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
@@ -11154,7 +11154,7 @@
|
||||
"uid": "${ds}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job,instance)",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job,instance) > 0",
|
||||
"legendFormat": "{{instance}} ({{job}})",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
|
||||
@@ -5175,7 +5175,7 @@
|
||||
"uid": "${ds}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job)",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job) > 0",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
@@ -7668,7 +7668,7 @@
|
||||
"uid": "${ds}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job,instance)",
|
||||
"expr": "sum(rate(process_major_pagefaults_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job,instance) > 0",
|
||||
"legendFormat": "{{instance}} ({{job}})",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
|
||||
@@ -2698,6 +2698,109 @@
|
||||
],
|
||||
"title": "Restarts ($job)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "This metric helps identify slow-uploading clients. Elevated buffering percentiles together with a drop in incoming request rate, in the absence of CPU, memory, or upstream saturation, usually indicate slow client uploads (for example, a client on EDGE connection).\n\nSlow clients can hold concurrency slots for extended periods, causing vmauth to hit concurrency limits and reject requests even when upstream services have sufficient capacity. Increasing `-requestBufferSize` decouples client upload speed from request processing, reducing concurrency pressure and protecting vmauth and upstream services, at the cost of higher memory usage.\n\nThis metric is available only when `-requestBufferSize` is non-zero (default: 32KiB). The maximum time vmauth waits for the request body to be fully read (or for the buffer to be filled) is controlled by `-maxQueueDuration` (default: 10s).\n\nRead more in [request body buffering](https://docs.victoriametrics.com/victoriametrics/vmauth/#request-body-buffering) documentation.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 46
|
||||
},
|
||||
"id": 39,
|
||||
"links": [
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Request body buffering",
|
||||
"url": "https://docs.victoriametrics.com/victoriametrics/vmauth/#request-body-buffering"
|
||||
}
|
||||
],
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.3.2",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "max(vmauth_buffer_request_body_duration_seconds{job=~\"$job\", instance=~\"$instance\",quantile=\"0.99\"}) by (job)",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Request body buffering duration P99",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"title": "Troubleshooting",
|
||||
|
||||
@@ -2697,6 +2697,109 @@
|
||||
],
|
||||
"title": "Restarts ($job)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "This metric helps identify slow-uploading clients. Elevated buffering percentiles together with a drop in incoming request rate, in the absence of CPU, memory, or upstream saturation, usually indicate slow client uploads (for example, a client on EDGE connection).\n\nSlow clients can hold concurrency slots for extended periods, causing vmauth to hit concurrency limits and reject requests even when upstream services have sufficient capacity. Increasing `-requestBufferSize` decouples client upload speed from request processing, reducing concurrency pressure and protecting vmauth and upstream services, at the cost of higher memory usage.\n\nThis metric is available only when `-requestBufferSize` is non-zero (default: 32KiB). The maximum time vmauth waits for the request body to be fully read (or for the buffer to be filled) is controlled by `-maxQueueDuration` (default: 10s).\n\nRead more in [request body buffering](https://docs.victoriametrics.com/victoriametrics/vmauth/#request-body-buffering) documentation.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 46
|
||||
},
|
||||
"id": 39,
|
||||
"links": [
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Request body buffering",
|
||||
"url": "https://docs.victoriametrics.com/victoriametrics/vmauth/#request-body-buffering"
|
||||
}
|
||||
],
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.3.2",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "max(vmauth_buffer_request_body_duration_seconds{job=~\"$job\", instance=~\"$instance\",quantile=\"0.99\"}) by (job)",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Request body buffering duration P99",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"title": "Troubleshooting",
|
||||
|
||||
@@ -137,6 +137,11 @@ publish-via-docker-from-rc:
|
||||
$(foreach registry,$(DOCKER_REGISTRIES),\
|
||||
docker buildx imagetools create --tag $(registry)/$(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)-scratch $(registry)/$(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)$(EXTRA_DOCKER_TAG_SUFFIX)-scratch; \
|
||||
)
|
||||
@if echo "$(PKG_TAG)" | grep -q "enterprise"; then \
|
||||
$(foreach registry,$(DOCKER_REGISTRIES),\
|
||||
docker buildx imagetools create --tag $(registry)/$(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)-fips $(registry)/$(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)$(EXTRA_DOCKER_TAG_SUFFIX)-fips; \
|
||||
) \
|
||||
fi
|
||||
|
||||
publish-via-docker-latest:
|
||||
$(foreach registry,$(DOCKER_REGISTRIES),\
|
||||
|
||||
@@ -3,7 +3,7 @@ services:
|
||||
# It scrapes targets defined in --promscrape.config
|
||||
# And forward them to --remoteWrite.url
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.135.0
|
||||
image: victoriametrics/vmagent:v1.136.0
|
||||
depends_on:
|
||||
- "vmauth"
|
||||
ports:
|
||||
@@ -37,14 +37,14 @@ services:
|
||||
# vmstorage shards. Each shard receives 1/N of all metrics sent to vminserts,
|
||||
# where N is number of vmstorages (2 in this case).
|
||||
vmstorage-1:
|
||||
image: victoriametrics/vmstorage:v1.135.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.136.0-cluster
|
||||
volumes:
|
||||
- strgdata-1:/storage
|
||||
command:
|
||||
- "--storageDataPath=/storage"
|
||||
restart: always
|
||||
vmstorage-2:
|
||||
image: victoriametrics/vmstorage:v1.135.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.136.0-cluster
|
||||
volumes:
|
||||
- strgdata-2:/storage
|
||||
command:
|
||||
@@ -54,7 +54,7 @@ services:
|
||||
# vminsert is ingestion frontend. It receives metrics pushed by vmagent,
|
||||
# pre-process them and distributes across configured vmstorage shards.
|
||||
vminsert-1:
|
||||
image: victoriametrics/vminsert:v1.135.0-cluster
|
||||
image: victoriametrics/vminsert:v1.136.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -63,7 +63,7 @@ services:
|
||||
- "--storageNode=vmstorage-2:8400"
|
||||
restart: always
|
||||
vminsert-2:
|
||||
image: victoriametrics/vminsert:v1.135.0-cluster
|
||||
image: victoriametrics/vminsert:v1.136.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -75,7 +75,7 @@ services:
|
||||
# vmselect is a query fronted. It serves read queries in MetricsQL or PromQL.
|
||||
# vmselect collects results from configured `--storageNode` shards.
|
||||
vmselect-1:
|
||||
image: victoriametrics/vmselect:v1.135.0-cluster
|
||||
image: victoriametrics/vmselect:v1.136.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -85,7 +85,7 @@ services:
|
||||
- "--vmalert.proxyURL=http://vmalert:8880"
|
||||
restart: always
|
||||
vmselect-2:
|
||||
image: victoriametrics/vmselect:v1.135.0-cluster
|
||||
image: victoriametrics/vmselect:v1.136.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -100,7 +100,7 @@ services:
|
||||
# read requests from Grafana, vmui, vmalert among vmselects.
|
||||
# It can be used as an authentication proxy.
|
||||
vmauth:
|
||||
image: victoriametrics/vmauth:v1.135.0
|
||||
image: victoriametrics/vmauth:v1.136.0
|
||||
depends_on:
|
||||
- "vmselect-1"
|
||||
- "vmselect-2"
|
||||
@@ -114,7 +114,7 @@ services:
|
||||
|
||||
# vmalert executes alerting and recording rules
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.135.0
|
||||
image: victoriametrics/vmalert:v1.136.0
|
||||
depends_on:
|
||||
- "vmauth"
|
||||
ports:
|
||||
|
||||
@@ -3,7 +3,7 @@ services:
|
||||
# It scrapes targets defined in --promscrape.config
|
||||
# And forward them to --remoteWrite.url
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.135.0
|
||||
image: victoriametrics/vmagent:v1.136.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -18,7 +18,7 @@ services:
|
||||
# VictoriaMetrics instance, a single process responsible for
|
||||
# storing metrics and serve read requests.
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.135.0
|
||||
image: victoriametrics/victoria-metrics:v1.136.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
- 8089:8089
|
||||
@@ -54,7 +54,7 @@ services:
|
||||
|
||||
# vmalert executes alerting and recording rules
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.135.0
|
||||
image: victoriametrics/vmalert:v1.136.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
- "alertmanager"
|
||||
|
||||
@@ -27,7 +27,7 @@ groups:
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/oS7Bi_0Wz?viewPanel=20&var-instance={{ $labels.instance }}"
|
||||
dashboard: "{{ $externalURL }}/d/oS7Bi_0Wz?viewPanel=20&var-instance={{ $labels.instance }}"
|
||||
summary: "Instance {{ $labels.instance }} will run out of disk space in 3 days"
|
||||
description: "Taking into account current ingestion rate, free disk space will be enough only
|
||||
for {{ $value | humanizeDuration }} on instance {{ $labels.instance }}.\n
|
||||
@@ -51,7 +51,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/oS7Bi_0Wz?viewPanel=20&var-instance={{ $labels.instance }}"
|
||||
dashboard: "{{ $externalURL }}/d/oS7Bi_0Wz?viewPanel=20&var-instance={{ $labels.instance }}"
|
||||
summary: "Instance {{ $labels.instance }} will become read-only in 3 days"
|
||||
description: "Taking into account current ingestion rate, free disk space and -storage.minFreeDiskSpaceBytes
|
||||
instance {{ $labels.instance }} will remain writable for {{ $value | humanizeDuration }}.\n
|
||||
@@ -68,7 +68,7 @@ groups:
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/oS7Bi_0Wz?viewPanel=20&var-instance={{ $labels.instance }}"
|
||||
dashboard: "{{ $externalURL }}/d/oS7Bi_0Wz?viewPanel=20&var-instance={{ $labels.instance }}"
|
||||
summary: "Instance {{ $labels.instance }} (job={{ $labels.job }}) will run out of disk space soon"
|
||||
description: "Disk utilisation on instance {{ $labels.instance }} is more than 80%.\n
|
||||
Having less than 20% of free disk space could cripple merges processes and overall performance.
|
||||
@@ -81,7 +81,7 @@ groups:
|
||||
severity: warning
|
||||
show_at: dashboard
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/oS7Bi_0Wz?viewPanel=52&var-instance={{ $labels.instance }}"
|
||||
dashboard: "{{ $externalURL }}/d/oS7Bi_0Wz?viewPanel=52&var-instance={{ $labels.instance }}"
|
||||
summary: "Too many errors served for {{ $labels.job }} path {{ $labels.path }} (instance {{ $labels.instance }})"
|
||||
description: "Requests to path {{ $labels.path }} are receiving errors.
|
||||
Please verify if clients are sending correct requests."
|
||||
@@ -100,7 +100,7 @@ groups:
|
||||
severity: warning
|
||||
show_at: dashboard
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/oS7Bi_0Wz?viewPanel=44&var-instance={{ $labels.instance }}"
|
||||
dashboard: "{{ $externalURL }}/d/oS7Bi_0Wz?viewPanel=44&var-instance={{ $labels.instance }}"
|
||||
summary: "Too many RPC errors for {{ $labels.job }} (instance {{ $labels.instance }})"
|
||||
description: "RPC errors are interconnection errors between cluster components.\n
|
||||
Possible reasons for errors are misconfiguration, overload, network blips or unreachable components."
|
||||
@@ -116,7 +116,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/oS7Bi_0Wz?viewPanel=102"
|
||||
dashboard: "{{ $externalURL }}/d/oS7Bi_0Wz?viewPanel=102"
|
||||
summary: "Churn rate is more than 10% for the last 15m"
|
||||
description: "VM constantly creates new time series.\n
|
||||
This effect is known as Churn Rate.\n
|
||||
@@ -132,7 +132,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/oS7Bi_0Wz?viewPanel=102"
|
||||
dashboard: "{{ $externalURL }}/d/oS7Bi_0Wz?viewPanel=102"
|
||||
summary: "Too high number of new series created over last 24h"
|
||||
description: "The number of created new time series over last 24h is 3x times higher than
|
||||
current number of active series.\n
|
||||
@@ -151,7 +151,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/oS7Bi_0Wz?viewPanel=108"
|
||||
dashboard: "{{ $externalURL }}/d/oS7Bi_0Wz?viewPanel=108"
|
||||
summary: "Percentage of slow inserts is more than 5% for the last 15m"
|
||||
description: "High rate of slow inserts may be a sign of resource exhaustion
|
||||
for the current load. It is likely more RAM is needed for optimal handling of the current number of active time series.
|
||||
@@ -164,7 +164,7 @@ groups:
|
||||
severity: warning
|
||||
show_at: dashboard
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/oS7Bi_0Wz?viewPanel=139&var-instance={{ $labels.instance }}"
|
||||
dashboard: "{{ $externalURL }}/d/oS7Bi_0Wz?viewPanel=139&var-instance={{ $labels.instance }}"
|
||||
summary: "Connection between vminsert on {{ $labels.instance }} and vmstorage on {{ $labels.addr }} is saturated"
|
||||
description: "The connection between vminsert (instance {{ $labels.instance }}) and vmstorage (instance {{ $labels.addr }})
|
||||
is saturated by more than 90% and vminsert won't be able to keep up.\n
|
||||
|
||||
@@ -15,7 +15,7 @@ groups:
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/G7Z9GzMGz?viewPanel=49&var-instance={{ $labels.instance }}"
|
||||
dashboard: "{{ $externalURL }}/d/G7Z9GzMGz?viewPanel=49&var-instance={{ $labels.instance }}"
|
||||
summary: "Instance {{ $labels.instance }} is dropping data from persistent queue"
|
||||
description: "Vmagent dropped {{ $value | humanize1024 }} from persistent queue
|
||||
on instance {{ $labels.instance }} for the last 10m."
|
||||
@@ -26,7 +26,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/G7Z9GzMGz?viewPanel=79&var-instance={{ $labels.instance }}"
|
||||
dashboard: "{{ $externalURL }}/d/G7Z9GzMGz?viewPanel=79&var-instance={{ $labels.instance }}"
|
||||
summary: "Vmagent is dropping data blocks that are rejected by remote storage"
|
||||
description: "Job \"{{ $labels.job }}\" on instance {{ $labels.instance }} drops the rejected by
|
||||
remote-write server data blocks. Check the logs to find the reason for rejects."
|
||||
@@ -37,7 +37,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/G7Z9GzMGz?viewPanel=31&var-instance={{ $labels.instance }}"
|
||||
dashboard: "{{ $externalURL }}/d/G7Z9GzMGz?viewPanel=31&var-instance={{ $labels.instance }}"
|
||||
summary: "Vmagent fails to scrape one or more targets"
|
||||
description: "Job \"{{ $labels.job }}\" on instance {{ $labels.instance }} fails to scrape targets for last 15m"
|
||||
|
||||
@@ -61,7 +61,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/G7Z9GzMGz?viewPanel=77&var-instance={{ $labels.instance }}"
|
||||
dashboard: "{{ $externalURL }}/d/G7Z9GzMGz?viewPanel=77&var-instance={{ $labels.instance }}"
|
||||
summary: "Vmagent responds with too many errors on data ingestion protocols"
|
||||
description: "Job \"{{ $labels.job }}\" on instance {{ $labels.instance }} responds with errors to write requests for last 15m."
|
||||
|
||||
@@ -71,7 +71,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/G7Z9GzMGz?viewPanel=61&var-instance={{ $labels.instance }}"
|
||||
dashboard: "{{ $externalURL }}/d/G7Z9GzMGz?viewPanel=61&var-instance={{ $labels.instance }}"
|
||||
summary: "Job \"{{ $labels.job }}\" on instance {{ $labels.instance }} fails to push to remote storage"
|
||||
description: "Vmagent fails to push data via remote write protocol to destination \"{{ $labels.url }}\"\n
|
||||
Ensure that destination is up and reachable."
|
||||
@@ -87,7 +87,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/G7Z9GzMGz?viewPanel=84&var-instance={{ $labels.instance }}"
|
||||
dashboard: "{{ $externalURL }}/d/G7Z9GzMGz?viewPanel=84&var-instance={{ $labels.instance }}"
|
||||
summary: "Remote write connection from \"{{ $labels.job }}\" (instance {{ $labels.instance }}) to {{ $labels.url }} is saturated"
|
||||
description: "The remote write connection between vmagent \"{{ $labels.job }}\" (instance {{ $labels.instance }}) and destination \"{{ $labels.url }}\"
|
||||
is saturated by more than 90% and vmagent won't be able to keep up.\n
|
||||
@@ -101,7 +101,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/G7Z9GzMGz?viewPanel=98&var-instance={{ $labels.instance }}"
|
||||
dashboard: "{{ $externalURL }}/d/G7Z9GzMGz?viewPanel=98&var-instance={{ $labels.instance }}"
|
||||
summary: "Persistent queue writes for instance {{ $labels.instance }} are saturated"
|
||||
description: "Persistent queue writes for vmagent \"{{ $labels.job }}\" (instance {{ $labels.instance }})
|
||||
are saturated by more than 90% and vmagent won't be able to keep up with flushing data on disk.
|
||||
@@ -113,7 +113,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/G7Z9GzMGz?viewPanel=99&var-instance={{ $labels.instance }}"
|
||||
dashboard: "{{ $externalURL }}/d/G7Z9GzMGz?viewPanel=99&var-instance={{ $labels.instance }}"
|
||||
summary: "Persistent queue reads for instance {{ $labels.instance }} are saturated"
|
||||
description: "Persistent queue reads for vmagent \"{{ $labels.job }}\" (instance {{ $labels.instance }})
|
||||
are saturated by more than 90% and vmagent won't be able to keep up with reading data from the disk.
|
||||
@@ -124,7 +124,7 @@ groups:
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/G7Z9GzMGz?viewPanel=88&var-instance={{ $labels.instance }}"
|
||||
dashboard: "{{ $externalURL }}/d/G7Z9GzMGz?viewPanel=88&var-instance={{ $labels.instance }}"
|
||||
summary: "Instance {{ $labels.instance }} reached 90% of the limit"
|
||||
description: "Max series limit set via -remoteWrite.maxHourlySeries flag is close to reaching the max value.
|
||||
Then samples for new time series will be dropped instead of sending them to remote storage systems."
|
||||
@@ -134,7 +134,7 @@ groups:
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/G7Z9GzMGz?viewPanel=90&var-instance={{ $labels.instance }}"
|
||||
dashboard: "{{ $externalURL }}/d/G7Z9GzMGz?viewPanel=90&var-instance={{ $labels.instance }}"
|
||||
summary: "Instance {{ $labels.instance }} reached 90% of the limit"
|
||||
description: "Max series limit set via -remoteWrite.maxDailySeries flag is close to reaching the max value.
|
||||
Then samples for new time series will be dropped instead of sending them to remote storage systems."
|
||||
|
||||
@@ -23,7 +23,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/LzldHAVnz?viewPanel=13&var-instance={{ $labels.instance }}&var-file={{ $labels.file }}&var-group={{ $labels.group }}"
|
||||
dashboard: "{{ $externalURL }}/d/LzldHAVnz?viewPanel=13&var-instance={{ $labels.instance }}&var-file={{ $labels.file }}&var-group={{ $labels.group }}"
|
||||
summary: "Alerting rules are failing for vmalert instance {{ $labels.instance }}"
|
||||
description: "Alerting rules execution is failing for \"{{ $labels.alertname }}\" from group \"{{ $labels.group }}\" in file \"{{ $labels.file }}\".
|
||||
Check vmalert's logs for detailed error message."
|
||||
@@ -34,7 +34,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/LzldHAVnz?viewPanel=30&var-instance={{ $labels.instance }}&var-file={{ $labels.file }}&var-group={{ $labels.group }}"
|
||||
dashboard: "{{ $externalURL }}/d/LzldHAVnz?viewPanel=30&var-instance={{ $labels.instance }}&var-file={{ $labels.file }}&var-group={{ $labels.group }}"
|
||||
summary: "Recording rules are failing for vmalert instance {{ $labels.instance }}"
|
||||
description: "Recording rules execution is failing for \"{{ $labels.recording }}\" from group \"{{ $labels.group }}\" in file \"{{ $labels.file }}\".
|
||||
Check vmalert's logs for detailed error message."
|
||||
@@ -45,7 +45,7 @@ groups:
|
||||
labels:
|
||||
severity: info
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/LzldHAVnz?viewPanel=33&var-file={{ $labels.file }}&var-group={{ $labels.group }}"
|
||||
dashboard: "{{ $externalURL }}/d/LzldHAVnz?viewPanel=33&var-file={{ $labels.file }}&var-group={{ $labels.group }}"
|
||||
summary: "Recording rule {{ $labels.recording }} ({{ $labels.group }}) produces no data"
|
||||
description: "Recording rule \"{{ $labels.recording }}\" from group \"{{ $labels.group }}\ in file \"{{ $labels.file }}\"
|
||||
produces 0 samples over the last 30min. It might be caused by a misconfiguration
|
||||
|
||||
@@ -11,7 +11,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/nbuo5Mr4k?viewPanel=10&var-instance={{ $labels.instance }}"
|
||||
dashboard: "{{ $externalURL }}/d/nbuo5Mr4k?viewPanel=10&var-instance={{ $labels.instance }}"
|
||||
summary: "vmauth ({{ $labels.instance }}) reached concurrent requests limit"
|
||||
description: "Possible solutions: increase -maxQueueDuration flag value, increase -maxConcurrentRequests flag value,
|
||||
deploy additional vmauth replicas, check requests latency at backend service.
|
||||
@@ -22,7 +22,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/nbuo5Mr4k?viewPanel=10&var-instance={{ $labels.instance }}"
|
||||
dashboard: "{{ $externalURL }}/d/nbuo5Mr4k?viewPanel=10&var-instance={{ $labels.instance }}"
|
||||
summary: "vmauth ({{ $labels.instance }}) has reached concurrent requests limit for username {{ $labels.username }}"
|
||||
description: "Possible solutions: increase -maxQueueDuration flag value, increase -maxConcurrentPerUserRequests flag value,
|
||||
deploy additional vmauth replicas, check requests latency at backend service."
|
||||
@@ -32,7 +32,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/nbuo5Mr4k?viewPanel=10&var-instance={{ $labels.instance }}"
|
||||
dashboard: "{{ $externalURL }}/d/nbuo5Mr4k?viewPanel=10&var-instance={{ $labels.instance }}"
|
||||
summary: "vmauth ({{ $labels.instance }}) has reached concurrent requests limit for unauthorized user"
|
||||
description: "Possible solutions: increase -maxQueueDuration flag value, increase -maxConcurrentPerUserRequests flag value,
|
||||
deploy additional vmauth replicas, check requests latency at backend service."
|
||||
@@ -42,7 +42,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/nbuo5Mr4k?viewPanel=37&var-instance={{ $labels.instance }}"
|
||||
dashboard: "{{ $externalURL }}/d/nbuo5Mr4k?viewPanel=37&var-instance={{ $labels.instance }}"
|
||||
summary: "Too many errors served for unauthorized user (instance {{ $labels.instance }})"
|
||||
description: "Requests from unauthorized user are receiving errors.
|
||||
Please check the vmauth logs to verify that the configuration is correct and clients are sending valid requests."
|
||||
@@ -52,7 +52,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/nbuo5Mr4k?viewPanel=37&var-instance={{ $labels.instance }}"
|
||||
dashboard: "{{ $externalURL }}/d/nbuo5Mr4k?viewPanel=37&var-instance={{ $labels.instance }}"
|
||||
summary: "Too many errors served for user {{ $labels.username }} (instance {{ $labels.instance }})"
|
||||
description: "Requests from user {{ $labels.username }} are receiving errors.
|
||||
Please check the vmauth logs to verify that the configuration is correct and clients are sending valid requests."
|
||||
|
||||
@@ -27,7 +27,7 @@ groups:
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/wNf0q_kZk?viewPanel=53&var-instance={{ $labels.instance }}"
|
||||
dashboard: "{{ $externalURL }}/d/wNf0q_kZk?viewPanel=53&var-instance={{ $labels.instance }}"
|
||||
summary: "Instance {{ $labels.instance }} will run out of disk space soon"
|
||||
description: "Taking into account current ingestion rate, free disk space will be enough only
|
||||
for {{ $value | humanizeDuration }} on instance {{ $labels.instance }}.\n
|
||||
@@ -51,7 +51,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/oS7Bi_0Wz?viewPanel=53&var-instance={{ $labels.instance }}"
|
||||
dashboard: "{{ $externalURL }}/d/oS7Bi_0Wz?viewPanel=53&var-instance={{ $labels.instance }}"
|
||||
summary: "Instance {{ $labels.instance }} will become read-only in 3 days"
|
||||
description: "Taking into account current ingestion rate and free disk space
|
||||
instance {{ $labels.instance }} is writable for {{ $value | humanizeDuration }}.\n
|
||||
@@ -68,7 +68,7 @@ groups:
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/wNf0q_kZk?viewPanel=53&var-instance={{ $labels.instance }}"
|
||||
dashboard: "{{ $externalURL }}/d/wNf0q_kZk?viewPanel=53&var-instance={{ $labels.instance }}"
|
||||
summary: "Instance {{ $labels.instance }} (job={{ $labels.job }}) will run out of disk space soon"
|
||||
description: "Disk utilisation on instance {{ $labels.instance }} is more than 80%.\n
|
||||
Having less than 20% of free disk space could cripple merge processes and overall performance.
|
||||
@@ -80,7 +80,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/wNf0q_kZk?viewPanel=35&var-instance={{ $labels.instance }}"
|
||||
dashboard: "{{ $externalURL }}/d/wNf0q_kZk?viewPanel=35&var-instance={{ $labels.instance }}"
|
||||
summary: "Too many errors served for path {{ $labels.path }} (instance {{ $labels.instance }})"
|
||||
description: "Requests to path {{ $labels.path }} are receiving errors.
|
||||
Please verify if clients are sending correct requests."
|
||||
@@ -96,7 +96,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/wNf0q_kZk?viewPanel=66&var-instance={{ $labels.instance }}"
|
||||
dashboard: "{{ $externalURL }}/d/wNf0q_kZk?viewPanel=66&var-instance={{ $labels.instance }}"
|
||||
summary: "Churn rate is more than 10% on \"{{ $labels.instance }}\" for the last 15m"
|
||||
description: "VM constantly creates new time series on \"{{ $labels.instance }}\".\n
|
||||
This effect is known as Churn Rate.\n
|
||||
@@ -112,7 +112,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/wNf0q_kZk?viewPanel=66&var-instance={{ $labels.instance }}"
|
||||
dashboard: "{{ $externalURL }}/d/wNf0q_kZk?viewPanel=66&var-instance={{ $labels.instance }}"
|
||||
summary: "Too high number of new series on \"{{ $labels.instance }}\" created over last 24h"
|
||||
description: "The number of created new time series over last 24h is 3x times higher than
|
||||
current number of active series on \"{{ $labels.instance }}\".\n
|
||||
@@ -131,7 +131,7 @@ groups:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/wNf0q_kZk?viewPanel=68&var-instance={{ $labels.instance }}"
|
||||
dashboard: "{{ $externalURL }}/d/wNf0q_kZk?viewPanel=68&var-instance={{ $labels.instance }}"
|
||||
summary: "Percentage of slow inserts is more than 5% on \"{{ $labels.instance }}\" for the last 15m"
|
||||
description: "High rate of slow inserts on \"{{ $labels.instance }}\" may be a sign of resource exhaustion
|
||||
for the current load. It is likely more RAM is needed for optimal handling of the current number of active time series.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
services:
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.135.0
|
||||
image: victoriametrics/vmagent:v1.136.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -14,7 +14,7 @@ services:
|
||||
restart: always
|
||||
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.135.0
|
||||
image: victoriametrics/victoria-metrics:v1.136.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
volumes:
|
||||
@@ -40,7 +40,7 @@ services:
|
||||
restart: always
|
||||
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.135.0
|
||||
image: victoriametrics/vmalert:v1.136.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
|
||||
276
docs/Makefile
@@ -68,17 +68,22 @@ docs-images-to-webp: docs-image
|
||||
-exec sh -c 'cwebp -preset drawing -m 6 -o $$(echo {} | cut -f-1 -d.).webp {} && rm -rf {}' {} \;
|
||||
|
||||
docs-update-vmsingle-flags:
|
||||
(cd /tmp/vm-enterprise-single-node && make victoria-metrics)
|
||||
(cd /tmp/vm-opensource-single-node && make victoria-metrics)
|
||||
(cd /tmp/vm-enterprise-single-node && ./bin/victoria-metrics -help 2>&1) > /tmp/vm-enterprise-single-node/victoria_metrics_enterprise_flags_tmp.md
|
||||
(cd /tmp/vm-opensource-single-node && ./bin/victoria-metrics -help 2>&1) > /tmp/vm-opensource-single-node/victoria_metrics_common_flags_tmp.md
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/victoria_metrics_common_flags.md
|
||||
cat /tmp/vm-opensource-single-node/victoria_metrics_common_flags_tmp.md >> docs/victoriametrics/victoria_metrics_common_flags.md
|
||||
printf -- '```\n' >> docs/victoriametrics/victoria_metrics_common_flags.md
|
||||
ifndef TAG
|
||||
$(error TAG must be provided to update flags in docs)
|
||||
endif
|
||||
git checkout $(TAG) && $(MAKE) victoria-metrics && \
|
||||
./bin/victoria-metrics -help > /tmp/victoria_metrics_common_flags_tmp.md
|
||||
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/victoria_metrics_enterprise_flags.md
|
||||
diff /tmp/vm-enterprise-single-node/victoria_metrics_enterprise_flags_tmp.md /tmp/vm-opensource-single-node/victoria_metrics_common_flags_tmp.md |grep '^<' | sed 's/^< //' >> docs/victoriametrics/victoria_metrics_enterprise_flags.md
|
||||
printf -- '```\n' >> docs/victoriametrics/victoria_metrics_enterprise_flags.md
|
||||
git checkout $(TAG)-enterprise && $(MAKE) victoria-metrics && \
|
||||
./bin/victoria-metrics -help > /tmp/victoria_metrics_enterprise_flags_tmp.md
|
||||
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/victoria_metrics_common_flags.md && \
|
||||
cat /tmp/victoria_metrics_common_flags_tmp.md >> docs/victoriametrics/victoria_metrics_common_flags.md && \
|
||||
printf '```\n' >> docs/victoriametrics/victoria_metrics_common_flags.md
|
||||
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/victoria_metrics_enterprise_flags.md && \
|
||||
diff /tmp/victoria_metrics_enterprise_flags_tmp.md /tmp/victoria_metrics_common_flags_tmp.md |grep '^<' | sed 's/^< //' >> docs/victoriametrics/victoria_metrics_enterprise_flags.md && \
|
||||
printf '```\n' >> docs/victoriametrics/victoria_metrics_enterprise_flags.md
|
||||
|
||||
# replace tabs in output with one space
|
||||
sed -i 's/\t/ /g' docs/victoriametrics/victoria_metrics_common_flags.md
|
||||
@@ -92,19 +97,21 @@ docs-update-vmsingle-flags:
|
||||
sed -i '/The maximum number of concurrent goroutines to work with files;/ s/(default [0-9]\+)/(default fsutil.getDefaultConcurrency())/' docs/victoriametrics/victoria_metrics_common_flags.md
|
||||
|
||||
docs-update-vmauth-flags:
|
||||
# ---- vmauth
|
||||
(cd /tmp/vm-enterprise-single-node && make vmauth)
|
||||
(cd /tmp/vm-opensource-single-node && make vmauth)
|
||||
(cd /tmp/vm-enterprise-single-node && ./bin/vmauth -help 2>&1) > /tmp/vm-enterprise-single-node/vmauth_enterprise_flags_tmp.md
|
||||
(cd /tmp/vm-opensource-single-node && ./bin/vmauth -help 2>&1) > /tmp/vm-opensource-single-node/vmauth_common_flags_tmp.md
|
||||
ifndef TAG
|
||||
$(error TAG must be provided to update flags in docs)
|
||||
endif
|
||||
git checkout $(TAG) && $(MAKE) vmauth && \
|
||||
./bin/vmauth -help > /tmp/vmauth_common_flags_tmp.md
|
||||
git checkout $(TAG)-enterprise && $(MAKE) vmauth && \
|
||||
./bin/vmauth -help > /tmp/vmauth_enterprise_flags_tmp.md
|
||||
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmauth_common_flags.md
|
||||
cat /tmp/vm-opensource-single-node/vmauth_common_flags_tmp.md >> docs/victoriametrics/vmauth_common_flags.md
|
||||
printf -- '```\n' >> docs/victoriametrics/vmauth_common_flags.md
|
||||
cat /tmp/vmauth_common_flags_tmp.md >> docs/victoriametrics/vmauth_common_flags.md
|
||||
printf '```\n' >> docs/victoriametrics/vmauth_common_flags.md
|
||||
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmauth_enterprise_flags.md
|
||||
diff /tmp/vm-enterprise-single-node/vmauth_enterprise_flags_tmp.md /tmp/vm-opensource-single-node/vmauth_common_flags_tmp.md |grep '^<' | sed 's/^< //' >> docs/victoriametrics/vmauth_enterprise_flags.md
|
||||
printf -- '```' >> docs/victoriametrics/vmauth_enterprise_flags.md
|
||||
diff /tmp/vmauth_enterprise_flags_tmp.md /tmp/vmauth_common_flags_tmp.md |grep '^<' | sed 's/^< //' >> docs/victoriametrics/vmauth_enterprise_flags.md
|
||||
printf '```' >> docs/victoriametrics/vmauth_enterprise_flags.md
|
||||
|
||||
# replace tabs in output with one space
|
||||
sed -i 's/\t/ /g' docs/victoriametrics/vmauth_common_flags.md
|
||||
@@ -115,17 +122,22 @@ docs-update-vmauth-flags:
|
||||
sed -i '/The maximum number of concurrent goroutines to work with files;/ s/(default [0-9]\+)/(default fsutil.getDefaultConcurrency())/' docs/victoriametrics/vmauth_common_flags.md
|
||||
|
||||
docs-update-vmagent-flags:
|
||||
(cd /tmp/vm-enterprise-single-node && make vmagent)
|
||||
(cd /tmp/vm-opensource-single-node && make vmagent)
|
||||
(cd /tmp/vm-enterprise-single-node && ./bin/vmagent -help 2>&1) > /tmp/vm-enterprise-single-node/vmagent_enterprise_flags_tmp.md
|
||||
(cd /tmp/vm-opensource-single-node && ./bin/vmagent -help 2>&1) > /tmp/vm-opensource-single-node/vmagent_common_flags_tmp.md
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmagent_common_flags.md
|
||||
cat /tmp/vm-opensource-single-node/vmagent_common_flags_tmp.md >> docs/victoriametrics/vmagent_common_flags.md
|
||||
printf -- '```\n' >> docs/victoriametrics/vmagent_common_flags.md
|
||||
ifndef TAG
|
||||
$(error TAG must be provided to update flags in docs)
|
||||
endif
|
||||
git checkout $(TAG) && $(MAKE) vmagent && \
|
||||
./bin/vmagent -help > /tmp/vmagent_common_flags_tmp.md
|
||||
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmagent_enterprise_flags.md
|
||||
diff /tmp/vm-enterprise-single-node/vmagent_enterprise_flags_tmp.md /tmp/vm-opensource-single-node/vmagent_common_flags_tmp.md |grep '^<' | sed 's/^< //' >> docs/victoriametrics/vmagent_enterprise_flags.md
|
||||
printf -- '```\n' >> docs/victoriametrics/vmagent_enterprise_flags.md
|
||||
git checkout $(TAG)-enterprise && $(MAKE) vmagent && \
|
||||
./bin/vmagent -help > /tmp/vmagent_enterprise_flags_tmp.md
|
||||
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmagent_common_flags.md && \
|
||||
cat /tmp/vmagent_common_flags_tmp.md >> docs/victoriametrics/vmagent_common_flags.md && \
|
||||
printf '```\n' >> docs/victoriametrics/vmagent_common_flags.md
|
||||
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmagent_enterprise_flags.md && \
|
||||
diff /tmp/vmagent_enterprise_flags_tmp.md /tmp/vmagent_common_flags_tmp.md |grep '^<' | sed 's/^< //' >> docs/victoriametrics/vmagent_enterprise_flags.md && \
|
||||
printf '```\n' >> docs/victoriametrics/vmagent_enterprise_flags.md
|
||||
|
||||
# replace tabs in output with one space
|
||||
sed -i 's/\t/ /g' docs/victoriametrics/vmagent_common_flags.md
|
||||
@@ -138,18 +150,22 @@ docs-update-vmagent-flags:
|
||||
sed -i '/The maximum number of concurrent goroutines to work with files;/ s/(default [0-9]\+)/(default fsutil.getDefaultConcurrency())/' docs/victoriametrics/vmagent_common_flags.md
|
||||
|
||||
docs-update-vmalert-flags:
|
||||
(cd /tmp/vm-enterprise-single-node && make vmalert)
|
||||
(cd /tmp/vm-opensource-single-node && make vmalert)
|
||||
(cd /tmp/vm-enterprise-single-node && ./bin/vmalert -help 2>&1) > /tmp/vm-enterprise-single-node/vmalert_enterprise_flags_tmp.md
|
||||
(cd /tmp/vm-opensource-single-node && ./bin/vmalert -help 2>&1) > /tmp/vm-opensource-single-node/vmalert_common_flags_tmp.md
|
||||
ifndef TAG
|
||||
$(error TAG must be provided to update flags in docs)
|
||||
endif
|
||||
git checkout $(TAG) && $(MAKE) vmalert && \
|
||||
./bin/vmalert -help > /tmp/vmalert_common_flags_tmp.md
|
||||
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmalert_common_flags.md
|
||||
cat /tmp/vm-opensource-single-node/vmalert_common_flags_tmp.md >> docs/victoriametrics/vmalert_common_flags.md
|
||||
printf -- '```\n' >> docs/victoriametrics/vmalert_common_flags.md
|
||||
git checkout $(TAG)-enterprise && $(MAKE) vmalert && \
|
||||
./bin/vmalert -help > /tmp/vmalert_enterprise_flags_tmp.md
|
||||
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmalert_enterprise_flags.md
|
||||
diff /tmp/vm-enterprise-single-node/vmalert_enterprise_flags_tmp.md /tmp/vm-opensource-single-node/vmalert_common_flags_tmp.md |grep '^<' | sed 's/^< //' >> docs/victoriametrics/vmalert_enterprise_flags.md
|
||||
printf -- '```' >> docs/victoriametrics/vmalert_enterprise_flags.md
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmalert_common_flags.md && \
|
||||
cat /tmp/vmalert_common_flags_tmp.md >> docs/victoriametrics/vmalert_common_flags.md && \
|
||||
printf '```\n' >> docs/victoriametrics/vmalert_common_flags.md
|
||||
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmalert_enterprise_flags.md && \
|
||||
diff /tmp/vmalert_enterprise_flags_tmp.md /tmp/vmalert_common_flags_tmp.md |grep '^<' | sed 's/^< //' >> docs/victoriametrics/vmalert_enterprise_flags.md && \
|
||||
printf '```' >> docs/victoriametrics/vmalert_enterprise_flags.md
|
||||
|
||||
# replace tabs in output with one space
|
||||
sed -i 's/\t/ /g' docs/victoriametrics/vmalert_common_flags.md
|
||||
@@ -161,18 +177,22 @@ docs-update-vmalert-flags:
|
||||
sed -i '/The maximum number of concurrent goroutines to work with files;/ s/(default [0-9]\+)/(default fsutil.getDefaultConcurrency())/' docs/victoriametrics/vmalert_common_flags.md
|
||||
|
||||
docs-update-vmselect-flags:
|
||||
(cd /tmp/vm-enterprise-cluster && make vmselect)
|
||||
(cd /tmp/vm-opensource-cluster && make vmselect)
|
||||
(cd /tmp/vm-enterprise-cluster && ./bin/vmselect -help 2>&1) > /tmp/vm-enterprise-cluster/vmselect_enterprise_flags_tmp.md
|
||||
(cd /tmp/vm-opensource-cluster && ./bin/vmselect -help 2>&1) > /tmp/vm-opensource-cluster/vmselect_common_flags_tmp.md
|
||||
ifndef TAG
|
||||
$(error TAG must be provided to update flags in docs)
|
||||
endif
|
||||
git checkout $(TAG)-cluster && $(MAKE) vmselect && \
|
||||
./bin/vmselect -help > /tmp/vmselect_common_flags_tmp.md
|
||||
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmselect_common_flags.md
|
||||
cat /tmp/vm-opensource-cluster/vmselect_common_flags_tmp.md >> docs/victoriametrics/vmselect_common_flags.md
|
||||
printf -- '```\n' >> docs/victoriametrics/vmselect_common_flags.md
|
||||
git checkout $(TAG)-enterprise-cluster && $(MAKE) vmselect && \
|
||||
./bin/vmselect -help > /tmp/vmselect_enterprise_flags_tmp.md
|
||||
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmselect_enterprise_flags.md
|
||||
diff /tmp/vm-enterprise-cluster/vmselect_enterprise_flags_tmp.md /tmp/vm-opensource-cluster/vmselect_common_flags_tmp.md |grep '^<' | sed 's/^< //' >> docs/victoriametrics/vmselect_enterprise_flags.md
|
||||
printf -- '```' >> docs/victoriametrics/vmselect_enterprise_flags.md
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmselect_common_flags.md && \
|
||||
cat /tmp/vmselect_common_flags_tmp.md >> docs/victoriametrics/vmselect_common_flags.md && \
|
||||
printf '```\n' >> docs/victoriametrics/vmselect_common_flags.md
|
||||
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmselect_enterprise_flags.md && \
|
||||
diff /tmp/vmselect_enterprise_flags_tmp.md /tmp/vmselect_common_flags_tmp.md |grep '^<' | sed 's/^< //' >> docs/victoriametrics/vmselect_enterprise_flags.md && \
|
||||
printf '```' >> docs/victoriametrics/vmselect_enterprise_flags.md
|
||||
|
||||
# replace tabs in output with one space
|
||||
sed -i 's/\t/ /g' docs/victoriametrics/vmselect_common_flags.md
|
||||
@@ -186,18 +206,22 @@ docs-update-vmselect-flags:
|
||||
sed -i '/The maximum number of concurrent goroutines to work with files;/ s/(default [0-9]\+)/(default fsutil.getDefaultConcurrency())/' docs/victoriametrics/vmselect_common_flags.md
|
||||
|
||||
docs-update-vminsert-flags:
|
||||
(cd /tmp/vm-enterprise-cluster && make vminsert)
|
||||
(cd /tmp/vm-opensource-cluster && make vminsert)
|
||||
(cd /tmp/vm-enterprise-cluster && ./bin/vminsert -help 2>&1) > /tmp/vm-enterprise-cluster/vminsert_enterprise_flags_tmp.md
|
||||
(cd /tmp/vm-opensource-cluster && ./bin/vminsert -help 2>&1) > /tmp/vm-opensource-cluster/vminsert_common_flags_tmp.md
|
||||
ifndef TAG
|
||||
$(error TAG must be provided to update flags in docs)
|
||||
endif
|
||||
git checkout $(TAG)-cluster && $(MAKE) vminsert && \
|
||||
./bin/vminsert -help > /tmp/vminsert_common_flags_tmp.md
|
||||
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vminsert_common_flags.md
|
||||
cat /tmp/vm-opensource-cluster/vminsert_common_flags_tmp.md >> docs/victoriametrics/vminsert_common_flags.md
|
||||
printf -- '```\n' >> docs/victoriametrics/vminsert_common_flags.md
|
||||
git checkout $(TAG)-enterprise-cluster && $(MAKE) vminsert && \
|
||||
./bin/vminsert -help > /tmp/vminsert_enterprise_flags_tmp.md
|
||||
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vminsert_enterprise_flags.md
|
||||
diff /tmp/vm-enterprise-cluster/vminsert_enterprise_flags_tmp.md /tmp/vm-opensource-cluster/vminsert_common_flags_tmp.md |grep '^<' | sed 's/^< //' >> docs/victoriametrics/vminsert_enterprise_flags.md
|
||||
printf -- '```' >> docs/victoriametrics/vminsert_enterprise_flags.md
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vminsert_common_flags.md && \
|
||||
cat /tmp/vminsert_common_flags_tmp.md >> docs/victoriametrics/vminsert_common_flags.md && \
|
||||
printf '```\n' >> docs/victoriametrics/vminsert_common_flags.md
|
||||
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vminsert_enterprise_flags.md && \
|
||||
diff /tmp/vminsert_enterprise_flags_tmp.md /tmp/vminsert_common_flags_tmp.md |grep '^<' | sed 's/^< //' >> docs/victoriametrics/vminsert_enterprise_flags.md && \
|
||||
printf '```' >> docs/victoriametrics/vminsert_enterprise_flags.md
|
||||
|
||||
# replace tabs in output with one space
|
||||
sed -i 's/\t/ /g' docs/victoriametrics/vminsert_common_flags.md
|
||||
@@ -218,18 +242,22 @@ docs-update-vminsert-flags:
|
||||
sed -i '/The maximum number of concurrent goroutines to work with files;/ s/(default [0-9]\+)/(default fsutil.getDefaultConcurrency())/' docs/victoriametrics/vminsert_common_flags.md
|
||||
|
||||
docs-update-vmstorage-flags:
|
||||
(cd /tmp/vm-enterprise-cluster && make vmstorage)
|
||||
(cd /tmp/vm-opensource-cluster && make vmstorage)
|
||||
(cd /tmp/vm-enterprise-cluster && ./bin/vmstorage -help 2>&1) > /tmp/vm-enterprise-cluster/vmstorage_enterprise_flags_tmp.md
|
||||
(cd /tmp/vm-opensource-cluster && ./bin/vmstorage -help 2>&1) > /tmp/vm-opensource-cluster/vmstorage_common_flags_tmp.md
|
||||
ifndef TAG
|
||||
$(error TAG must be provided to update flags in docs)
|
||||
endif
|
||||
git checkout $(TAG)-cluster && $(MAKE) vmstorage && \
|
||||
./bin/vmstorage -help > /tmp/vmstorage_common_flags_tmp.md
|
||||
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmstorage_common_flags.md
|
||||
cat /tmp/vm-opensource-cluster/vmstorage_common_flags_tmp.md >> docs/victoriametrics/vmstorage_common_flags.md
|
||||
printf -- '```\n' >> docs/victoriametrics/vmstorage_common_flags.md
|
||||
git checkout $(TAG)-enterprise-cluster && $(MAKE) vmstorage && \
|
||||
./bin/vmstorage -help > /tmp/vmstorage_enterprise_flags_tmp.md
|
||||
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmstorage_enterprise_flags.md
|
||||
diff /tmp/vm-enterprise-cluster/vmstorage_enterprise_flags_tmp.md /tmp/vm-opensource-cluster/vmstorage_common_flags_tmp.md |grep '^<' | sed 's/^< //' >> docs/victoriametrics/vmstorage_enterprise_flags.md
|
||||
printf -- '```' >> docs/victoriametrics/vmstorage_enterprise_flags.md
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmstorage_common_flags.md && \
|
||||
cat /tmp/vmstorage_common_flags_tmp.md >> docs/victoriametrics/vmstorage_common_flags.md && \
|
||||
printf '```\n' >> docs/victoriametrics/vmstorage_common_flags.md
|
||||
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmstorage_enterprise_flags.md && \
|
||||
diff /tmp/vmstorage_enterprise_flags_tmp.md /tmp/vmstorage_common_flags_tmp.md |grep '^<' | sed 's/^< //' >> docs/victoriametrics/vmstorage_enterprise_flags.md && \
|
||||
printf '```' >> docs/victoriametrics/vmstorage_enterprise_flags.md
|
||||
|
||||
# replace tabs in output with one space
|
||||
sed -i 's/\t/ /g' docs/victoriametrics/vmstorage_common_flags.md
|
||||
@@ -242,37 +270,40 @@ docs-update-vmstorage-flags:
|
||||
sed -i '/The maximum number of concurrent goroutines to work with files;/ s/(default [0-9]\+)/(default fsutil.getDefaultConcurrency())/' docs/victoriametrics/vmstorage_common_flags.md
|
||||
|
||||
docs-update-vmctl-flags:
|
||||
(cd /tmp/vm-opensource-single-node && make vmctl)
|
||||
(cd /tmp/vm-opensource-single-node && ./bin/vmctl -help 2>&1) > /tmp/vm-opensource-single-node/vmctl_flags_tmp.md
|
||||
(cd /tmp/vm-opensource-single-node && ./bin/vmctl opentsdb -help 2>&1) > /tmp/vm-opensource-single-node/vmctl_opentsdb_flags_tmp.md
|
||||
(cd /tmp/vm-opensource-single-node && ./bin/vmctl influx -help 2>&1) > /tmp/vm-opensource-single-node/vmctl_influx_flags_tmp.md
|
||||
(cd /tmp/vm-opensource-single-node && ./bin/vmctl remote-read -help 2>&1) > /tmp/vm-opensource-single-node/vmctl_remote-read_flags_tmp.md
|
||||
(cd /tmp/vm-opensource-single-node && ./bin/vmctl prometheus -help 2>&1) > /tmp/vm-opensource-single-node/vmctl_prometheus_flags_tmp.md
|
||||
(cd /tmp/vm-opensource-single-node && ./bin/vmctl vm-native -help 2>&1) > /tmp/vm-opensource-single-node/vmctl_vm-native_flags_tmp.md
|
||||
ifndef TAG
|
||||
$(error TAG must be provided to update flags in docs)
|
||||
endif
|
||||
git checkout $(TAG) && $(MAKE) vmctl && \
|
||||
./bin/vmctl -help > /tmp/vmctl_flags_tmp.md && \
|
||||
./bin/vmctl opentsdb -help > /tmp/vmctl_opentsdb_flags_tmp.md && \
|
||||
./bin/vmctl influx -help > /tmp/vmctl_influx_flags_tmp.md && \
|
||||
./bin/vmctl remote-read -help > /tmp/vmctl_remote-read_flags_tmp.md && \
|
||||
./bin/vmctl prometheus -help > /tmp/vmctl_prometheus_flags_tmp.md && \
|
||||
./bin/vmctl vm-native -help > /tmp/vmctl_vm-native_flags_tmp.md
|
||||
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmctl/vmctl_flags.md
|
||||
cat /tmp/vm-opensource-single-node/vmctl_flags_tmp.md >> docs/victoriametrics/vmctl/vmctl_flags.md
|
||||
printf -- '```\n' >> docs/victoriametrics/vmctl/vmctl_flags.md
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmctl/vmctl_flags.md && \
|
||||
cat /tmp/vmctl_flags_tmp.md >> docs/victoriametrics/vmctl/vmctl_flags.md && \
|
||||
printf '```\n' >> docs/victoriametrics/vmctl/vmctl_flags.md
|
||||
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmctl/vmctl_opentsdb_flags.md
|
||||
cat /tmp/vm-opensource-single-node/vmctl_opentsdb_flags_tmp.md >> docs/victoriametrics/vmctl/vmctl_opentsdb_flags.md
|
||||
printf -- '```\n' >> docs/victoriametrics/vmctl/vmctl_opentsdb_flags.md
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmctl/vmctl_opentsdb_flags.md && \
|
||||
cat /tmp/vmctl_opentsdb_flags_tmp.md >> docs/victoriametrics/vmctl/vmctl_opentsdb_flags.md && \
|
||||
printf '```\n' >> docs/victoriametrics/vmctl/vmctl_opentsdb_flags.md
|
||||
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmctl/vmctl_influx_flags.md
|
||||
cat /tmp/vm-opensource-single-node/vmctl_influx_flags_tmp.md >> docs/victoriametrics/vmctl/vmctl_influx_flags.md
|
||||
printf -- '```\n' >> docs/victoriametrics/vmctl/vmctl_influx_flags.md
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmctl/vmctl_influx_flags.md && \
|
||||
cat /tmp/vmctl_influx_flags_tmp.md >> docs/victoriametrics/vmctl/vmctl_influx_flags.md && \
|
||||
printf '```\n' >> docs/victoriametrics/vmctl/vmctl_influx_flags.md
|
||||
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmctl/vmctl_remote-read_flags.md
|
||||
cat /tmp/vm-opensource-single-node/vmctl_remote-read_flags_tmp.md >> docs/victoriametrics/vmctl/vmctl_remote-read_flags.md
|
||||
printf -- '```\n' >> docs/victoriametrics/vmctl/vmctl_remote-read_flags.md
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmctl/vmctl_remote-read_flags.md && \
|
||||
cat /tmp/vmctl_remote-read_flags_tmp.md >> docs/victoriametrics/vmctl/vmctl_remote-read_flags.md && \
|
||||
printf '```\n' >> docs/victoriametrics/vmctl/vmctl_remote-read_flags.md
|
||||
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmctl/vmctl_prometheus_flags.md
|
||||
cat /tmp/vm-opensource-single-node/vmctl_prometheus_flags_tmp.md >> docs/victoriametrics/vmctl/vmctl_prometheus_flags.md
|
||||
printf -- '```\n' >> docs/victoriametrics/vmctl/vmctl_prometheus_flags.md
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmctl/vmctl_prometheus_flags.md && \
|
||||
cat /tmp/vmctl_prometheus_flags_tmp.md >> docs/victoriametrics/vmctl/vmctl_prometheus_flags.md && \
|
||||
printf '```\n' >> docs/victoriametrics/vmctl/vmctl_prometheus_flags.md
|
||||
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmctl/vmctl_vm-native_flags.md
|
||||
cat /tmp/vm-opensource-single-node/vmctl_vm-native_flags_tmp.md >> docs/victoriametrics/vmctl/vmctl_vm-native_flags.md
|
||||
printf -- '```\n' >> docs/victoriametrics/vmctl/vmctl_vm-native_flags.md
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmctl/vmctl_vm-native_flags.md && \
|
||||
cat /tmp/vmctl_vm-native_flags_tmp.md >> docs/victoriametrics/vmctl/vmctl_vm-native_flags.md && \
|
||||
printf '```\n' >> docs/victoriametrics/vmctl/vmctl_vm-native_flags.md
|
||||
|
||||
# remove Total time line from all vmctl flag files to reduce diffs noise
|
||||
sed -i '/Total time:/d' docs/victoriametrics/vmctl/vmctl_flags.md
|
||||
@@ -285,49 +316,30 @@ docs-update-vmctl-flags:
|
||||
# remove Version line and the actual version line from vmctl_flags.md to reduce diffs noise
|
||||
sed -i '/^VERSION:/,+1d' docs/victoriametrics/vmctl/vmctl_flags.md
|
||||
|
||||
# docs-update-flags updates flags in the documentation using the actual binaries compiled
|
||||
# from the latest enterprise-single-node and enterprise-cluster branches (hardcoded for now).
|
||||
# docs-update-flags updates flags in the documentation
|
||||
# using the actual binaries compiled from the provided $TAG.
|
||||
# The command also normalizes the output a bit.
|
||||
#
|
||||
# The command does not replace the need to manually update flags in the documentation when
|
||||
# new flags are added or existing flags are updated. It just helps to keep the documentation
|
||||
# in sync with the actual binaries.
|
||||
# There is no need to update flags manually while working on change in the code.
|
||||
# The flags will be synced when a new release tag is cut.
|
||||
#
|
||||
# It can be run from any branch.
|
||||
# All work happens inside temporary directories under /tmp.
|
||||
# The script checks out the required branch, builds the binaries, and updates the documentation.
|
||||
# The current Git repository is not touched.
|
||||
# The command can be run from any branch.
|
||||
# The script checks out the required $TAG, builds the binaries, and updates the documentation.
|
||||
docs-update-flags:
|
||||
ifndef TAG
|
||||
$(error TAG must be provided to update flags in docs)
|
||||
endif
|
||||
# Note for MacOS users:
|
||||
# You need to install gnu versions of sed and awk inorder fo inplace editing to work
|
||||
# You need to install gnu versions of sed and awk to enable in-place editing
|
||||
# Install using: brew install gnu-sed gawk
|
||||
# Add tools to PATH see how in `brew info gnu-sed` and `brew info gawk
|
||||
|
||||
git fetch enterprise
|
||||
git fetch opensource
|
||||
|
||||
rm -rf /tmp/vm-enterprise-cluster
|
||||
git worktree remove /tmp/vm-enterprise-cluster || true
|
||||
git worktree add /tmp/vm-enterprise-cluster enterprise/enterprise-cluster
|
||||
|
||||
rm -rf /tmp/vm-enterprise-single-node
|
||||
git worktree remove /tmp/vm-enterprise-single-node || true
|
||||
git worktree add /tmp/vm-enterprise-single-node enterprise/enterprise-single-node
|
||||
|
||||
|
||||
rm -rf /tmp/vm-opensource-cluster
|
||||
git worktree remove /tmp/vm-opensource-cluster || true
|
||||
git worktree add /tmp/vm-opensource-cluster opensource/cluster
|
||||
|
||||
rm -rf /tmp/vm-opensource-single-node
|
||||
git worktree remove /tmp/vm-opensource-single-node || true
|
||||
git worktree add /tmp/vm-opensource-single-node opensource/master
|
||||
|
||||
make docs-update-vmctl-flags
|
||||
make docs-update-vmsingle-flags
|
||||
make docs-update-vmalert-flags
|
||||
make docs-update-vmauth-flags
|
||||
make docs-update-vmagent-flags
|
||||
make docs-update-vmselect-flags
|
||||
make docs-update-vminsert-flags
|
||||
make docs-update-vmstorage-flags
|
||||
orig_branch=$$(git rev-parse --abbrev-ref HEAD); \
|
||||
$(MAKE) docs-update-vmctl-flags && git checkout "$$orig_branch" && \
|
||||
$(MAKE) docs-update-vmsingle-flags && git checkout "$$orig_branch" && \
|
||||
$(MAKE) docs-update-vmalert-flags && git checkout "$$orig_branch" && \
|
||||
$(MAKE) docs-update-vmauth-flags && git checkout "$$orig_branch" && \
|
||||
$(MAKE) docs-update-vmagent-flags && git checkout "$$orig_branch" && \
|
||||
$(MAKE) docs-update-vmselect-flags && git checkout "$$orig_branch" && \
|
||||
$(MAKE) docs-update-vminsert-flags && git checkout "$$orig_branch" && \
|
||||
$(MAKE) docs-update-vmstorage-flags && git checkout "$$orig_branch"
|
||||
@@ -136,21 +136,21 @@ models:
|
||||
|
||||
Here's how default (backward-compatible) behavior looks like - anomalies will be tracked in `both` directions (`y > yhat` or `y < yhat`). This is useful when there is no domain expertise to filter the required direction.
|
||||
|
||||

|
||||

|
||||
|
||||
|
||||
When set to `above_expected`, anomalies are tracked only when `y > yhat`.
|
||||
|
||||
*Example metrics*: Error rate, response time, page load time, number of failed transactions - metrics where *lower values are better*, so **higher** values are typically tracked.
|
||||
|
||||

|
||||

|
||||
|
||||
|
||||
When set to `below_expected`, anomalies are tracked only when `y < yhat`.
|
||||
|
||||
*Example metrics*: Service Level Agreement (SLA) compliance, conversion rate, Customer Satisfaction Score (CSAT) - metrics where *higher values are better*, so **lower** values are typically tracked.
|
||||
|
||||

|
||||

|
||||
|
||||
|
||||
Config with a split example:
|
||||
@@ -199,13 +199,13 @@ reader:
|
||||
|
||||
Visualizations below demonstrate this concept; the green zone defined as the `[yhat - min_dev_from_expected, yhat + min_dev_from_expected]` range excludes actual data points (`y`) from generating anomaly scores if they fall within that range.
|
||||
|
||||

|
||||

|
||||
|
||||
|
||||

|
||||

|
||||
|
||||
|
||||

|
||||

|
||||
|
||||
Example config of how to use this param based on query results:
|
||||
|
||||
|
||||
|
Before Width: | Height: | Size: 32 KiB After Width: | Height: | Size: 32 KiB |
|
Before Width: | Height: | Size: 29 KiB After Width: | Height: | Size: 29 KiB |
|
Before Width: | Height: | Size: 32 KiB After Width: | Height: | Size: 32 KiB |
|
Before Width: | Height: | Size: 30 KiB After Width: | Height: | Size: 30 KiB |
|
Before Width: | Height: | Size: 30 KiB After Width: | Height: | Size: 30 KiB |
|
Before Width: | Height: | Size: 32 KiB After Width: | Height: | Size: 32 KiB |
@@ -10,9 +10,9 @@ sitemap:
|
||||
|
||||
- To use *vmanomaly*, part of the enterprise package, a license key is required. Obtain your key [here](https://victoriametrics.com/products/enterprise/trial/) for this tutorial or for enterprise use.
|
||||
- In the tutorial, we'll be using the following VictoriaMetrics components:
|
||||
- [VictoriaMetrics Single-Node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) (v1.135.0)
|
||||
- [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/) (v1.135.0)
|
||||
- [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) (v1.135.0)
|
||||
- [VictoriaMetrics Single-Node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) (v1.136.0)
|
||||
- [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/) (v1.136.0)
|
||||
- [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) (v1.136.0)
|
||||
- [Grafana](https://grafana.com/) (v.10.2.1)
|
||||
- [Docker](https://docs.docker.com/get-docker/) and [Docker Compose](https://docs.docker.com/compose/)
|
||||
- [Node exporter](https://github.com/prometheus/node_exporter#node-exporter) (v1.7.0) and [Alertmanager](https://prometheus.io/docs/alerting/latest/alertmanager/) (v0.27.0)
|
||||
@@ -323,7 +323,7 @@ Let's wrap it all up together into the `docker-compose.yml` file.
|
||||
services:
|
||||
vmagent:
|
||||
container_name: vmagent
|
||||
image: victoriametrics/vmagent:v1.135.0
|
||||
image: victoriametrics/vmagent:v1.136.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -340,7 +340,7 @@ services:
|
||||
|
||||
victoriametrics:
|
||||
container_name: victoriametrics
|
||||
image: victoriametrics/victoria-metrics:v1.135.0
|
||||
image: victoriametrics/victoria-metrics:v1.136.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
volumes:
|
||||
@@ -373,7 +373,7 @@ services:
|
||||
|
||||
vmalert:
|
||||
container_name: vmalert
|
||||
image: victoriametrics/vmalert:v1.135.0
|
||||
image: victoriametrics/vmalert:v1.136.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
|
||||
@@ -6,32 +6,55 @@ build:
|
||||
sitemap:
|
||||
disable: true
|
||||
---
|
||||
**The guide covers:**
|
||||
|
||||
* The setup of a [VM Operator](https://github.com/VictoriaMetrics/helm-charts/tree/master/charts/victoria-metrics-operator) via Helm in [Kubernetes](https://kubernetes.io/) with Helm charts.
|
||||
* The setup of a [VictoriaMetrics Cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) via [VM Operator](https://github.com/VictoriaMetrics/helm-charts/tree/master/charts/victoria-metrics-operator).
|
||||
* How to add CRD for a [VictoriaMetrics Cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) via [VM Operator](https://github.com/VictoriaMetrics/helm-charts/tree/master/charts/victoria-metrics-operator).
|
||||
* How to visualize stored data
|
||||
* How to store metrics in [VictoriaMetrics](https://victoriametrics.com)
|
||||
The [VictoriaMetrics Kubernetes Operator](https://docs.victoriametrics.com/operator/) simplifies deploying VictoriaMetrics Stack components on Kubernetes or OpenShift using declarative YAML [custom resources](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/).
|
||||
|
||||
By the end of this guide, you will be able to:
|
||||
|
||||
- Install and configure [VictoriaMetrics cluster](https://docs.victoriametrics.com/helm/victoria-logs-cluster/) using the Operator.
|
||||
- Scrape metrics from Kubernetes components.
|
||||
- Store metrics in [VictoriaMetrics](https://victoriametrics.com) time-series database.
|
||||
- Visualize metrics in Grafana.
|
||||
|
||||
**Preconditions**
|
||||
|
||||
* [Kubernetes cluster 1.20.9-gke.1001](https://cloud.google.com/kubernetes-engine). We use a GKE cluster from [GCP](https://cloud.google.com/) but this guide also applies to any Kubernetes cluster. For example, [Amazon EKS](https://aws.amazon.com/ru/eks/).
|
||||
* [Helm 3](https://helm.sh/docs/intro/install).
|
||||
* [kubectl 1.21+](https://kubernetes.io/docs/tasks/tools/install-kubectl).
|
||||
- A [Kubernetes GKE cluster 1.33](https://cloud.google.com/kubernetes-engine) or later
|
||||
- [Helm 4.1](https://helm.sh/docs/intro/install) or later
|
||||
- [kubectl 1.34](https://kubernetes.io/docs/tasks/tools/install-kubectl) or later
|
||||
|
||||
> [!NOTE] Tip
|
||||
> We use a GKE cluster from [GCP](https://cloud.google.com/), but this guide can also be applied to any Kubernetes cluster, such as [Amazon EKS](https://aws.amazon.com/ru/eks/) or an on-premises cluster.
|
||||
|
||||
## 1. VictoriaMetrics Helm repository
|
||||
|
||||
See how to work with a [VictoriaMetrics Helm repository in previous guide](https://docs.victoriametrics.com/guides/k8s-monitoring-via-vm-cluster/#1-victoriametrics-helm-repository).
|
||||
To start, add the VictoriaMetrics Helm repository with the following commands:
|
||||
|
||||
```sh
|
||||
helm repo add vm https://victoriametrics.github.io/helm-charts/
|
||||
helm repo update
|
||||
```
|
||||
|
||||
To verify that everything is set up correctly, you may run this command:
|
||||
|
||||
```sh
|
||||
helm search repo vm/
|
||||
```
|
||||
|
||||
You should see a list similar to this:
|
||||
|
||||
```text
|
||||
NAME CHART VERSION APP VERSION DESCRIPTION
|
||||
vm/victoria-metrics-operator 0.58.1 v0.67.0 VictoriaMetrics Operator
|
||||
vm/victoria-metrics-operator-crds 0.7.0 v0.67.0 VictoriaMetrics Operator CRDs
|
||||
...(list continues)...
|
||||
```
|
||||
|
||||
## 2. Install the VM Operator from the Helm chart
|
||||
|
||||
|
||||
```sh
|
||||
helm install vmoperator vm/victoria-metrics-operator
|
||||
```
|
||||
|
||||
|
||||
The expected output is:
|
||||
|
||||
```sh
|
||||
@@ -51,12 +74,12 @@ See "Getting started guide for VM Operator" on https://docs.victoriametrics.com/
|
||||
|
||||
Run the following command to check that VM Operator is up and running:
|
||||
|
||||
|
||||
```sh
|
||||
kubectl --namespace default get pods -l "app.kubernetes.io/instance=vmoperator"
|
||||
kubectl get pods -l "app.kubernetes.io/instance=vmoperator"
|
||||
```
|
||||
|
||||
The expected output:
|
||||
Wait until `STATUS=Running` and `Ready=1/1`, like this:
|
||||
|
||||
```sh
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
vmoperator-victoria-metrics-operator-67cff44cd6-s47n6 1/1 Running 0 77s
|
||||
@@ -64,21 +87,24 @@ vmoperator-victoria-metrics-operator-67cff44cd6-s47n6 1/1 Running 0
|
||||
|
||||
## 3. Install VictoriaMetrics Cluster
|
||||
|
||||
> For this example we will use default value for `name: example-vmcluster-persistent`. Change it value up to your needs.
|
||||
> [!NOTE]
|
||||
> For this example, we use the default name for the cluster (`name: example-vmcluster-persistent`). Change the name to suit your needs.
|
||||
|
||||
Run the following command to install [VictoriaMetrics Cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) via [VM Operator](https://github.com/VictoriaMetrics/helm-charts/tree/master/charts/victoria-metrics-operator):
|
||||
First, create a YAML file to configure the deployment of VictoriaMetrics cluster version:
|
||||
|
||||
<p id="example-cluster-config"></p>
|
||||
|
||||
```sh
|
||||
cat << EOF | kubectl apply -f -
|
||||
cat << EOF > vmcluster-config.yml
|
||||
apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMCluster
|
||||
metadata:
|
||||
# define the name of the VM cluster
|
||||
name: example-vmcluster-persistent
|
||||
spec:
|
||||
# Add fields here
|
||||
# define retention period (i.e., 12 months)
|
||||
retentionPeriod: "12"
|
||||
# define the number of pods for each of the services in the VM cluster
|
||||
vmstorage:
|
||||
replicaCount: 2
|
||||
vmselect:
|
||||
@@ -88,25 +114,41 @@ spec:
|
||||
EOF
|
||||
```
|
||||
|
||||
The expected output:
|
||||
Let's break down the main elements of the config file:
|
||||
|
||||
| Field | Purpose | Example |
|
||||
| --------------------------- | ----------------- | ---------------------------- |
|
||||
| `metadata: name` | Cluster name | example-vmcluster-persistent |
|
||||
| `spec: retentionPeriod` | Metrics retention | "12" (months) |
|
||||
| `spec: vmstorage: replicaCount` | vmstorage replicas | 2 |
|
||||
| `spec: vmselect: replicaCount` | vmselect replicas | 2 |
|
||||
| `spec: vminsert: replicaCount` | vminsert replicas | 2 |
|
||||
|
||||
> [!NOTE] Tip
|
||||
> A VictoriaMetrics cluster runs [three services](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#architecture-overview): `vmstorage`, `vminsert`, and `vmselect`. You can independently customize the number of replicas for each service.
|
||||
|
||||
Once you have defined the name, retention period, and number of replicas for your cluster, run the following command to deploy the VictoriaMetrics cluster in the default namespace:
|
||||
|
||||
```sh
|
||||
kubectl apply -f vmcluster-config.yml
|
||||
```
|
||||
|
||||
The command should output something like this:
|
||||
|
||||
```text
|
||||
vmcluster.operator.victoriametrics.com/example-vmcluster-persistent created
|
||||
```
|
||||
|
||||
* By applying this CRD we install the [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) to the default [namespace](https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/) of your k8s cluster with following params:
|
||||
* `retentionPeriod: "12"` defines the [retention](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#retention) to 12 months.
|
||||
* `replicaCount: 2` creates two replicas of vmselect, vminsert and vmstorage.
|
||||
|
||||
Please note that it may take some time for the pods to start. To check that the pods are started, run the following command:
|
||||
Pods may take some time to become ready. To check that the pods are started, run the following command:
|
||||
<p id="example-cluster-config"></p>
|
||||
|
||||
```sh
|
||||
kubectl get pods | grep vmcluster
|
||||
kubectl get pods -l managed-by=vm-operator
|
||||
```
|
||||
|
||||
The expected output:
|
||||
```sh
|
||||
The expected output is:
|
||||
|
||||
```text
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
vminsert-example-vmcluster-persistent-845849cb84-9vb6f 1/1 Running 0 5m15s
|
||||
vminsert-example-vmcluster-persistent-845849cb84-r7mmk 1/1 Running 0 5m15s
|
||||
@@ -116,43 +158,50 @@ vmstorage-example-vmcluster-persistent-0 1/1 Running 0
|
||||
vmstorage-example-vmcluster-persistent-1 1/1 Running 0 5m25s
|
||||
```
|
||||
|
||||
There is an extra command to get information about the cluster state:
|
||||
The VictoriaMetrics Operator adds an extra command to get information about the state of the cluster:
|
||||
|
||||
```sh
|
||||
kubectl get vmclusters
|
||||
```
|
||||
|
||||
The expected output:
|
||||
Output is typically:
|
||||
|
||||
```text
|
||||
NAME INSERT COUNT STORAGE COUNT SELECT COUNT AGE STATUS
|
||||
example-vmcluster-persistent 2 2 2 5m53s operational
|
||||
```
|
||||
|
||||
Internet traffic goes through the Kubernetes Load balancer which use the set of Pods targeted by a [Kubernetes Service](https://kubernetes.io/docs/concepts/services-networking/service/). The service in [VictoriaMetrics Cluster architecture](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#architecture-overview) which accepts the ingested data named `vminsert` and in Kubernetes it is a `vminsert ` service. So we need to use it for remote_write url.
|
||||
### Install vmagent
|
||||
|
||||
To get the name of `vminsert` services, please run the following command:
|
||||
In order to send metrics to the VictoriaMetrics database, we need a [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) service. This service scrapes metrics, applies relabeling, and forwards them to the `vminsert` service in the cluster.
|
||||
|
||||
First, we need to determine the URL for the `vminsert` service. Run the following command to obtain the service name of the service:
|
||||
|
||||
```sh
|
||||
kubectl get svc | grep vminsert
|
||||
kubectl get svc -l app.kubernetes.io/name=vminsert
|
||||
```
|
||||
|
||||
The expected output:
|
||||
The expected output is:
|
||||
|
||||
```sh
|
||||
vminsert-example-vmcluster-persistent ClusterIP 10.107.47.136 <none> 8480/TCP 5m58s
|
||||
```text
|
||||
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
|
||||
vminsert-example-vmcluster-persistent ClusterIP 10.43.42.217 <none> 8480/TCP 2d
|
||||
```
|
||||
|
||||
To scrape metrics from Kubernetes with a VictoriaMetrics Cluster we will need to install [VMAgent](https://docs.victoriametrics.com/victoriametrics/vmagent/) with some additional configurations.
|
||||
Copy `vminsert-example-vmcluster-persistent` (or whatever user put into metadata.name field [https://docs.victoriametrics.com/guides/getting-started-with-vm-operator/#example-cluster-config](https://docs.victoriametrics.com/guides/getting-started-with-vm-operator/#example-cluster-config)) service name and add it to the `remoteWrite` URL from [quick-start example](https://github.com/VictoriaMetrics/operator/blob/master/docs/quick-start.md#vmagent).
|
||||
Here is an example of the full configuration that we need to apply:
|
||||
The write URL for the `vminsert` service takes the form of `http://<service-name>.<namespace>.svc.cluster.local:<port-number>`. In our example, the URL is:
|
||||
|
||||
```text
|
||||
http://vminsert-example-vmcluster-persistent.default.svc.cluster.local:8480
|
||||
```
|
||||
|
||||
Create a YAML file to configure vmagent. Ensure that `spec: remoteWrite: url` matches the `vminsert` service URL:
|
||||
|
||||
```sh
|
||||
cat <<EOF | kubectl apply -f -
|
||||
cat <<EOF > vmagent-config.yml
|
||||
apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMAgent
|
||||
metadata:
|
||||
# You may change the name of the vmagent service here
|
||||
name: example-vmagent
|
||||
spec:
|
||||
serviceScrapeNamespaceSelector: {}
|
||||
@@ -165,78 +214,156 @@ spec:
|
||||
staticScrapeNamespaceSelector: {}
|
||||
replicaCount: 1
|
||||
remoteWrite:
|
||||
# url must be "http://<service-name>.<namespace>.svc.cluster.local:<port-number>/insert/0/prometheus/api/v1/write"
|
||||
- url: "http://vminsert-example-vmcluster-persistent.default.svc.cluster.local:8480/insert/0/prometheus/api/v1/write"
|
||||
EOF
|
||||
```
|
||||
|
||||
Let's break down the main settings in the config:
|
||||
|
||||
The expected output:
|
||||
- `metadata: name` defines the name of the vmagent service (e.g., `example-vmagent`)
|
||||
- `spec: remoteWrite: url` defines the fully qualified URL for the `vminsert` service. Ensure the URL is correct and ends with `/insert/0/prometheus/api/v1/write`.
|
||||
|
||||
Install `vmagent` with:
|
||||
|
||||
```sh
|
||||
kubectl apply -f vmagent-config.yml
|
||||
```
|
||||
|
||||
You should get this message:
|
||||
|
||||
```text
|
||||
vmagent.operator.victoriametrics.com/example-vmagent created
|
||||
```
|
||||
|
||||
>`remoteWrite.url` for VMAgent consists of the following parts:
|
||||
> "service_name.VMCluster_namespace.svc.kubernetes_cluster_domain" that in our case will look like vminsert-example-vmcluster-persistent.default.svc.cluster.local
|
||||
|
||||
Verify that `VMAgent` is up and running by executing the following command:
|
||||
|
||||
Verify that `vmagent` is operational:
|
||||
|
||||
```sh
|
||||
kubectl get pods | grep vmagent
|
||||
kubectl get vmagent
|
||||
```
|
||||
|
||||
The expected output is:
|
||||
|
||||
```text
|
||||
vmagent-example-vmagent-7996844b5f-b5rzs 2/2 Running 0 9s
|
||||
NAME SHARDS COUNT REPLICA COUNT STATUS AGE
|
||||
example-vmagent 1 operational 21h
|
||||
```
|
||||
|
||||
> There are two containers for VMagent: the first one is a VMagent and the second one is a sidecar with a secret. VMagent use a secret with configuration which is mounted to the special sidecar. It observes the changes with configuration and send a signal to reload configuration for the VMagent.
|
||||
|
||||
Run the following command to make `VMAgent`'s port accessible from the local machine:
|
||||
|
||||
Run the following command to make the service port accessible from the local machine:
|
||||
|
||||
```sh
|
||||
kubectl port-forward svc/vmagent-example-vmagent 8429:8429
|
||||
```
|
||||
|
||||
The expected output is:
|
||||
The terminal should show the following. Keep the session open to access the forwarded connection:
|
||||
|
||||
```text
|
||||
Forwarding from 127.0.0.1:8429 -> 8429
|
||||
Forwarding from [::1]:8429 -> 8429
|
||||
```
|
||||
|
||||
To check that `VMAgent` collects metrics from the k8s cluster open in the browser `http://127.0.0.1:8429/targets`.
|
||||
You will see something like this:
|
||||
To check that `vmagent` is collecting metrics by browsing `http://127.0.0.1:8429/targets`. You will see something like this:
|
||||
|
||||

|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">vmagent's status for discovered targets</figcaption>
|
||||
|
||||
`VMAgent` connects to [kubernetes service discovery](https://kubernetes.io/docs/concepts/services-networking/service/) and gets targets which needs to be scraped. This service discovery is controlled by [VictoriaMetrics Operator](https://github.com/VictoriaMetrics/operator)
|
||||
Notice that only the VictoriaMetrics services are being targeted. By default, `vmagent` does not scrape Kubernetes cluster metrics. The next section explains how to enable scraping in Kubernetes.
|
||||
|
||||
### Enable Kubernetes metrics scraping {#kubernetes-scraping}
|
||||
|
||||
> [!NOTE] Tip
|
||||
> This step is optional. Skip to the next section if you do not want to collect metrics from the Kubernetes control plane and node components.
|
||||
|
||||
To enable metric collection from the Kubernetes system, we need to update `vmagent` configuration and set up various [Scrape CRDs](https://docs.victoriametrics.com/operator/resources/).
|
||||
|
||||
Update the `vmagent-config.yml` file as follows. Ensure you define `spec: remoteWrite: url:` value is still correct as in the previous step.
|
||||
|
||||
```sh
|
||||
cat <<EOF >vmagent-config.yml
|
||||
apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMAgent
|
||||
metadata:
|
||||
name: example-vmagent
|
||||
namespace: default
|
||||
spec:
|
||||
replicaCount: 1
|
||||
# Enable CRD-based scraping
|
||||
selectAllByDefault: true
|
||||
remoteWrite:
|
||||
# url takes the form of "http://<service-name>.<namespace>.svc.cluster.local:<port-number>/insert/0/prometheus/api/v1/write"
|
||||
- url: "http://vminsert-example-vmcluster-persistent.default.svc.cluster.local:8480/insert/0/prometheus/api/v1/write"
|
||||
EOF
|
||||
```
|
||||
|
||||
Update `vmagent`:
|
||||
|
||||
```sh
|
||||
kubectl apply -f vmagent-config.yml
|
||||
```
|
||||
|
||||
Download the [vmscrape-config.yml-example](vmscrape-config.yml-example) file and rename it to `vmscrape-config.yml`. This config sets up scrape CRDs for key Kubernetes components, including nodes, pods, APIs, and services.
|
||||
|
||||
Apply the scrape CRDs:
|
||||
|
||||
```sh
|
||||
kubectl apply -f vmscrape-config.yml
|
||||
```
|
||||
|
||||
The expected output is:
|
||||
|
||||
```text
|
||||
vmnodescrape.operator.victoriametrics.com/kubelet-cadvisor created
|
||||
vmnodescrape.operator.victoriametrics.com/kubelet-metrics created
|
||||
vmscrapeconfig.operator.victoriametrics.com/kubernetes-apiservers created
|
||||
vmscrapeconfig.operator.victoriametrics.com/kubernetes-pods created
|
||||
vmscrapeconfig.operator.victoriametrics.com/kubernetes-service-endpoints created
|
||||
```
|
||||
|
||||
Go back to the `vmagent` target page by browsing `http://127.0.0.1:8429/targets`. This time, you should find targets such as `nodeScrape/default/kubelet-cadvisor` and `nodeScrape/default/kubelet-metrics` with an up status:
|
||||
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">You should find Kubernetes-specific targets now</figcaption>
|
||||
|
||||
## 4. Verifying VictoriaMetrics cluster
|
||||
|
||||
See [how to install and connect Grafana to VictoriaMetrics](https://docs.victoriametrics.com/guides/k8s-monitoring-via-vm-cluster/#4-install-and-connect-grafana-to-victoriametrics-with-helm) but with one addition - we should get the name of `vmselect` service from the freshly installed VictoriaMetrics Cluster because it will now be different.
|
||||
|
||||
To get the new service name, please run the following command:
|
||||
The next step is to install Grafana to visualize collected metrics.
|
||||
|
||||
Add the Grafana Helm repository with:
|
||||
|
||||
```sh
|
||||
kubectl get svc | grep vmselect
|
||||
helm repo add grafana-community https://grafana-community.github.io/helm-charts
|
||||
helm repo update
|
||||
```
|
||||
|
||||
The expected output:
|
||||
Next, we need to determine the URL for the `vmselect` service. To get the service name, run the following command:
|
||||
|
||||
```sh
|
||||
vmselect-example-vmcluster-persistent ClusterIP None <none> 8481/TCP 7m
|
||||
kubectl get svc -l app.kubernetes.io/name=vmselect
|
||||
```
|
||||
|
||||
The final config will look like this:
|
||||
|
||||
You should get a message like this:
|
||||
|
||||
```sh
|
||||
cat <<EOF | helm install my-grafana grafana/grafana -f -
|
||||
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
|
||||
vmselect-example-vmcluster-persistent ClusterIP None <none> 8481/TCP 7m
|
||||
```
|
||||
|
||||
We'll need to supply a datasource URL for Grafana, which in VictoriaMetrics cluster takes the following form:
|
||||
|
||||
```text
|
||||
http://<service-name>.<namespace>.svc.cluster.local:<port-number>
|
||||
```
|
||||
|
||||
Thus, in our example, the URL is:
|
||||
|
||||
```text
|
||||
http://vmselect-example-vmcluster-persistent.default.svc.cluster.local:8481/select/0/prometheus/
|
||||
```
|
||||
|
||||
Create a values file for the Grafana Helm chart:
|
||||
|
||||
```
|
||||
cat << EOF > grafana-values.yml
|
||||
datasources:
|
||||
datasources.yaml:
|
||||
apiVersion: 1
|
||||
@@ -244,6 +371,7 @@ cat <<EOF | helm install my-grafana grafana/grafana -f -
|
||||
- name: victoriametrics
|
||||
type: prometheus
|
||||
orgId: 1
|
||||
# url takes the form of 'http://<vmselect-service-name>.<namespace>.svc.cluster.local:<port-number>/select/0/prometheus'
|
||||
url: http://vmselect-example-vmcluster-persistent.default.svc.cluster.local:8481/select/0/prometheus/
|
||||
access: proxy
|
||||
isDefault: true
|
||||
@@ -267,31 +395,108 @@ cat <<EOF | helm install my-grafana grafana/grafana -f -
|
||||
default:
|
||||
victoriametrics:
|
||||
gnetId: 11176
|
||||
revision: 18
|
||||
datasource: victoriametrics
|
||||
vmagent:
|
||||
gnetId: 12683
|
||||
revision: 7
|
||||
datasource: victoriametrics
|
||||
kubernetes:
|
||||
gnetId: 14205
|
||||
revision: 1
|
||||
datasource: victoriametrics
|
||||
EOF
|
||||
```
|
||||
|
||||
Let's break down the main parts of the config file:
|
||||
|
||||
- `datasources: datasources.yaml: datasources: url` defines the URL for the `vmselect` service. This endpoint is the datasource Grafana uses to query the metrics database.
|
||||
- `dashboards: default:` loads three starter dashboards to monitor the Kubernetes cluster, the VictoriaMetrics services, and the `vmagent` service.
|
||||
|
||||
Install Grafana into the Kubernetes cluster with the name `my-grafana` in the default namespace with the following command:
|
||||
|
||||
```sh
|
||||
helm install my-grafana grafana-community/grafana -f grafana-values.yml
|
||||
```
|
||||
|
||||
The output should look similar to this:
|
||||
|
||||
```text
|
||||
NAME: my-grafana
|
||||
LAST DEPLOYED: Fri Feb 6 19:00:15 2026
|
||||
NAMESPACE: default
|
||||
STATUS: deployed
|
||||
REVISION: 1
|
||||
DESCRIPTION: Install complete
|
||||
NOTES:
|
||||
1. Get your 'admin' user password by running:
|
||||
|
||||
kubectl get secret --namespace default my-grafana -o jsonpath="{.data.admin-password}" | base64 --decode ; echo
|
||||
|
||||
|
||||
2. The Grafana server can be accessed via port 80 on the following DNS name from within your cluster:
|
||||
|
||||
my-grafana.default.svc.cluster.local
|
||||
|
||||
Get the Grafana URL to visit by running these commands in the same shell:
|
||||
export POD_NAME=$(kubectl get pods --namespace default -l "app.kubernetes.io/name=grafana,app.kubernetes.io/instance=my-grafana" -o jsonpath="{.items[0].metadata.name}")
|
||||
kubectl --namespace default port-forward $POD_NAME 3000
|
||||
|
||||
3. Login with the password from step 1 and the username: admin
|
||||
#################################################################################
|
||||
###### WARNING: Persistence is disabled!!! You will lose your data when #####
|
||||
###### the Grafana pod is terminated. #####
|
||||
#################################################################################
|
||||
```
|
||||
|
||||
Use the first command in the output to obtain the password for the `admin` user:
|
||||
|
||||
```sh
|
||||
kubectl get secret --namespace default my-grafana -o jsonpath="{.data.admin-password}" | base64 --decode ; echo
|
||||
|
||||
```
|
||||
|
||||
The second part of the output shows how to port-forward the Grafana service to access it locally on `127.0.0.1:3000`:
|
||||
|
||||
```sh
|
||||
export pod_name=$(kubectl get pods --namespace default -l "app.kubernetes.io/name=grafana,app.kubernetes.io/instance=my-grafana" -o jsonpath="{.items[0].metadata.name}")
|
||||
|
||||
kubectl --namespace default port-forward $pod_name 3000
|
||||
```
|
||||
|
||||
## 5. Check the result you obtained in your browser
|
||||
|
||||
To check that [VictoriaMetrics](https://victoriametrics.com) collecting metrics from the k8s cluster open in your browser `http://127.0.0.1:3000/dashboards` and choose the `VictoriaMetrics - cluster` dashboard. Use `admin` for login and the `password` that you previously got from kubectl.
|
||||
To check that [VictoriaMetrics](https://victoriametrics.com) is collecting metrics from the Kubernetes cluster, open your browser to http://127.0.0.1:3000/dashboards and choose the `VictoriaMetrics - cluster` dashboard.
|
||||
|
||||

|
||||
Use `admin` for login and the `password` obtained with `kubectl get secret ...`.
|
||||
|
||||
The expected output is:
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">List of pre-installed dashboards in Grafana</figcaption>
|
||||
|
||||
The "VictoriaMetrics - cluster" dashboard shows activity of the VictoriaMetrics services.
|
||||
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">Grafana dashboard showing metrics for the VictoriaMetrics cluster services</figcaption>
|
||||
|
||||
There is a separate dashboard for the `vmagent` service's activity. This shows the ingestion rate and resource utilization.
|
||||
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">Grafana dashboard showing metrics for the vmagent service</figcaption>
|
||||
|
||||
If you [added the scrape configs](#kubernetes-scraping), the Kubernetes dashboard will be populated with metrics; otherwise, it will be empty.
|
||||
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">Grafana dashboard showing Kubernetes cluster metrics</figcaption>
|
||||
|
||||

|
||||
|
||||
## 6. Summary
|
||||
|
||||
* We set up Kubernetes Operator for VictoriaMetrics with using CRD.
|
||||
* We collected metrics from all running services and stored them in the VictoriaMetrics database.
|
||||
- We set up a Kubernetes Operator for VictoriaMetrics using CRDs.
|
||||
- We collected metrics from all running services and stored them in the VictoriaMetrics database.
|
||||
- We installed Grafana to visualize metrics
|
||||
|
||||
Consider reading these resources to complete your setup:
|
||||
|
||||
- [VictoriaMetrics Operator Quickstart](https://docs.victoriametrics.com/operator/quick-start/)
|
||||
- See [VictoriaMetrics K8s Stack](https://docs.victoriametrics.com/helm/victoria-metrics-k8s-stack/) for an all-in-one solution for Kubernetes monitoring
|
||||
- Grafana
|
||||
- [Enable persistent storage](https://grafana.com/docs/grafana/latest/setup-grafana/installation/helm/#enable-persistent-storage-recommended)
|
||||
- [Configure private TLS authority](https://grafana.com/docs/grafana/latest/setup-grafana/installation/helm/#configure-a-private-ca-certificate-authority)
|
||||
|
||||
|
||||
|
After Width: | Height: | Size: 645 KiB |
|
After Width: | Height: | Size: 447 KiB |
|
After Width: | Height: | Size: 474 KiB |
|
After Width: | Height: | Size: 793 KiB |
|
Before Width: | Height: | Size: 20 KiB After Width: | Height: | Size: 308 KiB |
|
Before Width: | Height: | Size: 119 KiB After Width: | Height: | Size: 917 KiB |
243
docs/guides/getting-started-with-vm-operator/vmscrape-config.yml-example
Executable file
@@ -0,0 +1,243 @@
|
||||
# vmnodescrape cadvisor
|
||||
apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMNodeScrape
|
||||
metadata:
|
||||
name: kubelet-cadvisor
|
||||
spec:
|
||||
scheme: https
|
||||
path: /metrics/cadvisor
|
||||
tlsConfig:
|
||||
insecureSkipVerify: true
|
||||
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
|
||||
# Override job label
|
||||
relabelConfigs:
|
||||
- targetLabel: job
|
||||
replacement: "kubernetes-nodes-cadvisor"
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
|
||||
# relabel metrics to work with Grafana dashboard
|
||||
metricRelabelConfigs:
|
||||
- action: replace
|
||||
sourceLabels: [pod]
|
||||
regex: "(.+)"
|
||||
targetLabel: pod_name
|
||||
replacement: "$1"
|
||||
- action: replace
|
||||
sourceLabels: [container]
|
||||
regex: "(.+)"
|
||||
targetLabel: container_name
|
||||
replacement: "$1"
|
||||
- action: replace
|
||||
targetLabel: name
|
||||
replacement: "k8s_stub"
|
||||
- action: replace
|
||||
sourceLabels: [id]
|
||||
regex: "^/system\\.slice/(.+)\\.service$"
|
||||
targetLabel: systemd_service_name
|
||||
replacement: "$1"
|
||||
|
||||
---
|
||||
|
||||
# vmnodescrape kubelet
|
||||
apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMNodeScrape
|
||||
metadata:
|
||||
name: kubelet-metrics
|
||||
spec:
|
||||
scheme: https
|
||||
tlsConfig:
|
||||
insecureSkipVerify: true
|
||||
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
|
||||
# relabel metrics to work with Grafana dashboard
|
||||
relabelConfigs:
|
||||
- targetLabel: job
|
||||
replacement: "kubernetes-nodes"
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
|
||||
---
|
||||
|
||||
# vmscrapeconfig apiservers
|
||||
apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMScrapeConfig
|
||||
metadata:
|
||||
name: kubernetes-apiservers
|
||||
spec:
|
||||
kubernetesSDConfigs:
|
||||
- role: endpoints
|
||||
|
||||
scheme: https
|
||||
tlsConfig:
|
||||
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecureSkipVerify: true
|
||||
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
|
||||
relabelConfigs:
|
||||
# Keep only the kubernetes service on https port
|
||||
- sourceLabels:
|
||||
- __meta_kubernetes_namespace
|
||||
- __meta_kubernetes_service_name
|
||||
- __meta_kubernetes_endpoint_port_name
|
||||
action: keep
|
||||
regex: "default;kubernetes;https"
|
||||
|
||||
# relabel metrics to work with Grafana dashboard
|
||||
- targetLabel: job
|
||||
replacement: "kubernetes-apiservers"
|
||||
|
||||
---
|
||||
|
||||
# vmscrapeconfig pods
|
||||
apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMScrapeConfig
|
||||
metadata:
|
||||
name: kubernetes-pods
|
||||
spec:
|
||||
kubernetesSDConfigs:
|
||||
- role: pod
|
||||
|
||||
relabelConfigs:
|
||||
# Skip init containers
|
||||
- action: drop
|
||||
sourceLabels: [__meta_kubernetes_pod_container_init]
|
||||
regex: "true"
|
||||
|
||||
# Ensure port annotation matches container port
|
||||
- action: keep_if_equal
|
||||
sourceLabels:
|
||||
- __meta_kubernetes_pod_annotation_prometheus_io_port
|
||||
- __meta_kubernetes_pod_container_port_number
|
||||
|
||||
# Only pods with prometheus.io/scrape="true"
|
||||
- sourceLabels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
|
||||
action: keep
|
||||
regex: "true"
|
||||
|
||||
# Exclude pods marked as "slow"
|
||||
- sourceLabels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape_slow]
|
||||
action: drop
|
||||
regex: "true"
|
||||
|
||||
# Scheme override
|
||||
- sourceLabels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme]
|
||||
action: replace
|
||||
targetLabel: __scheme__
|
||||
regex: "(https?)"
|
||||
|
||||
# Path override
|
||||
- sourceLabels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
|
||||
action: replace
|
||||
targetLabel: __metrics_path__
|
||||
regex: "(.+)"
|
||||
|
||||
# Address override using prometheus.io/port
|
||||
- sourceLabels:
|
||||
- __address__
|
||||
- __meta_kubernetes_pod_annotation_prometheus_io_port
|
||||
action: replace
|
||||
targetLabel: __address__
|
||||
regex: "([^:]+)(?::\\d+)?;(\\d+)"
|
||||
replacement: "$1:$2"
|
||||
|
||||
# Copy pod labels
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_pod_label_(.+)
|
||||
|
||||
# Use kubernetes_namespace, kubernetes_pod_name, kubernetes_node labels
|
||||
- sourceLabels: [__meta_kubernetes_namespace]
|
||||
action: replace
|
||||
targetLabel: kubernetes_namespace
|
||||
- sourceLabels: [__meta_kubernetes_pod_name]
|
||||
action: replace
|
||||
targetLabel: kubernetes_pod_name
|
||||
- sourceLabels: [__meta_kubernetes_pod_node_name]
|
||||
action: replace
|
||||
targetLabel: kubernetes_node
|
||||
|
||||
# Drop non-running pods
|
||||
- sourceLabels: [__meta_kubernetes_pod_phase]
|
||||
action: drop
|
||||
regex: "Pending|Succeeded|Failed|Completed"
|
||||
|
||||
# Override job label
|
||||
- targetLabel: job
|
||||
replacement: "kubernetes-pods"
|
||||
|
||||
---
|
||||
|
||||
# vmscrapeconfig service endpoints
|
||||
apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMScrapeConfig
|
||||
metadata:
|
||||
name: kubernetes-service-endpoints
|
||||
spec:
|
||||
kubernetesSDConfigs:
|
||||
- role: endpoints
|
||||
|
||||
relabelConfigs:
|
||||
# Skip init containers
|
||||
- action: drop
|
||||
sourceLabels: [__meta_kubernetes_pod_container_init]
|
||||
regex: "true"
|
||||
|
||||
# Ensure port annotation matches container port
|
||||
- action: keep_if_equal
|
||||
sourceLabels:
|
||||
- __meta_kubernetes_pod_annotation_prometheus_io_port
|
||||
- __meta_kubernetes_pod_container_port_number
|
||||
|
||||
# Only services with prometheus.io/scrape="true"
|
||||
- sourceLabels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
|
||||
action: keep
|
||||
regex: "true"
|
||||
|
||||
# Exclude "slow" services
|
||||
- sourceLabels: [__meta_kubernetes_service_annotation_prometheus_io_scrape_slow]
|
||||
action: drop
|
||||
regex: "true"
|
||||
|
||||
# Scheme override
|
||||
- sourceLabels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
|
||||
action: replace
|
||||
targetLabel: __scheme__
|
||||
regex: "(https?)"
|
||||
|
||||
# Path override
|
||||
- sourceLabels: [__meta_kubernetes_service_annotation_prometheus_io_path]
|
||||
action: replace
|
||||
targetLabel: __metrics_path__
|
||||
regex: "(.+)"
|
||||
|
||||
# Address override using prometheus.io/port
|
||||
- sourceLabels:
|
||||
- __address__
|
||||
- __meta_kubernetes_service_annotation_prometheus_io_port
|
||||
action: replace
|
||||
targetLabel: __address__
|
||||
regex: "([^:]+)(?::\\d+)?;(\\d+)"
|
||||
replacement: "$1:$2"
|
||||
|
||||
# Copy service labels
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_service_label_(.+)
|
||||
|
||||
# relabel metrics to work with Grafana dashboard
|
||||
- sourceLabels: [__meta_kubernetes_namespace]
|
||||
action: replace
|
||||
targetLabel: kubernetes_namespace
|
||||
- sourceLabels: [__meta_kubernetes_service_name]
|
||||
action: replace
|
||||
targetLabel: kubernetes_name
|
||||
- sourceLabels: [__meta_kubernetes_pod_node_name]
|
||||
action: replace
|
||||
targetLabel: kubernetes_node
|
||||
|
||||
# Override job label
|
||||
- targetLabel: job
|
||||
replacement: "kubernetes-service-endpoints"
|
||||
|
||||